Skip to content

Response file parsing for GCC and Clang #1781

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 6 additions & 5 deletions src/compiler/gcc.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ use crate::compiler::args::*;
use crate::compiler::c::{
ArtifactDescriptor, CCompilerImpl, CCompilerKind, Language, ParsedArguments,
};
use crate::compiler::response_file::SplitResponseFileArgs;
use crate::compiler::{clang, Cacheable, ColorMode, CompileCommand, CompilerArguments};
use crate::mock_command::{CommandCreatorSync, RunCommand};
use crate::util::{run_input_output, OsStrExt};
Expand Down Expand Up @@ -894,11 +895,11 @@ impl<'a> Iterator for ExpandIncludeFile<'a> {
debug!("failed to read @-file `{}`: {}", file.display(), e);
return Some(arg);
}
if contents.contains('"') || contents.contains('\'') {
return Some(arg);
}
let new_args = contents.split_whitespace().collect::<Vec<_>>();
self.stack.extend(new_args.iter().rev().map(|s| s.into()));
// Parse the response file contents, taking into account quote-wrapped strings and new-line separators.
let resp_file_args = SplitResponseFileArgs::from(&contents).collect::<Vec<_>>();
// Pump arguments back to the stack, in reverse order so we can `Vec::pop` and visit in original front-to-back order.
let rev_args = resp_file_args.iter().rev().map(|s| s.into());
self.stack.extend(rev_args);
}
}
}
Expand Down
1 change: 1 addition & 0 deletions src/compiler/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ mod diab;
mod gcc;
mod msvc;
mod nvcc;
mod response_file;
mod rust;
mod tasking_vx;
#[macro_use]
Expand Down
127 changes: 2 additions & 125 deletions src/compiler/msvc.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ use crate::compiler::args::*;
use crate::compiler::c::{
ArtifactDescriptor, CCompilerImpl, CCompilerKind, Language, ParsedArguments,
};
use crate::compiler::response_file::SplitResponseFileArgs;
use crate::compiler::{
clang, gcc, write_temp_file, Cacheable, ColorMode, CompileCommand, CompilerArguments,
};
Expand Down Expand Up @@ -1224,8 +1225,7 @@ impl<'a> Iterator for ExpandIncludeFile<'a> {
trace!("Expanded response file {:?} to {:?}", file_path, content);

// Parse the response file contents, taking into account quote-wrapped strings and new-line separators.
// Special implementation to account for MSVC response file format.
let resp_file_args = SplitMsvcResponseFileArgs::from(&content).collect::<Vec<_>>();
let resp_file_args = SplitResponseFileArgs::from(&content).collect::<Vec<_>>();
// Pump arguments back to the stack, in reverse order so we can `Vec::pop` and visit in original front-to-back order.
let rev_args = resp_file_args.iter().rev().map(|s| s.into());
self.stack.extend(rev_args);
Expand All @@ -1250,129 +1250,6 @@ where
result.map_err(|err| io::Error::new(io::ErrorKind::Other, err.into_owned()))
}

/// An iterator over the arguments in a Windows command line.
///
/// This produces results identical to `CommandLineToArgvW` except in the
/// following cases:
///
/// 1. When passed an empty string, CommandLineToArgvW returns the path to the
/// current executable file. Here, the iterator will simply be empty.
/// 2. CommandLineToArgvW interprets the first argument differently than the
/// rest. Here, all arguments are treated in identical fashion.
///
/// Parsing rules:
///
/// - Arguments are delimited by whitespace (either a space or tab).
/// - A string surrounded by double quotes is interpreted as a single argument.
/// - Backslashes are interpreted literally unless followed by a double quote.
/// - 2n backslashes followed by a double quote reduce to n backslashes and we
/// enter the "in quote" state.
/// - 2n+1 backslashes followed by a double quote reduces to n backslashes,
/// we do *not* enter the "in quote" state, and the double quote is
/// interpreted literally.
///
/// References:
/// - https://msdn.microsoft.com/en-us/library/windows/desktop/bb776391(v=vs.85).aspx
/// - https://msdn.microsoft.com/en-us/library/windows/desktop/17w5ykft(v=vs.85).aspx
#[derive(Clone, Debug)]
struct SplitMsvcResponseFileArgs<'a> {
/// String slice of the file content that is being parsed.
/// Slice is mutated as this iterator is executed.
file_content: &'a str,
}

impl<'a, T> From<&'a T> for SplitMsvcResponseFileArgs<'a>
where
T: AsRef<str> + 'static,
{
fn from(file_content: &'a T) -> Self {
Self {
file_content: file_content.as_ref(),
}
}
}

impl<'a> SplitMsvcResponseFileArgs<'a> {
/// Appends backslashes to `target` by decrementing `count`.
/// If `step` is >1, then `count` is decremented by `step`, resulting in 1 backslash appended for every `step`.
fn append_backslashes_to(target: &mut String, count: &mut usize, step: usize) {
while *count >= step {
target.push('\\');
*count -= step;
}
}
}

impl<'a> Iterator for SplitMsvcResponseFileArgs<'a> {
type Item = String;

fn next(&mut self) -> Option<String> {
let mut in_quotes = false;
let mut backslash_count: usize = 0;

// Strip any leading whitespace before relevant characters
let is_whitespace = |c| matches!(c, ' ' | '\t' | '\n' | '\r');
self.file_content = self.file_content.trim_start_matches(is_whitespace);

if self.file_content.is_empty() {
return None;
}

// The argument string to return, built by analyzing the current slice in the iterator.
let mut arg = String::new();
// All characters still in the string slice. Will be mutated by consuming
// values until the current arg is built.
let mut chars = self.file_content.chars();
// Build the argument by evaluating each character in the string slice.
for c in &mut chars {
match c {
// In order to handle the escape character based on the char(s) which come after it,
// they are counted instead of appended literally, until a non-backslash character is encountered.
'\\' => backslash_count += 1,
// Either starting or ending a quoted argument, or appending a literal character (if the quote was escaped).
'"' => {
// Only append half the number of backslashes encountered, because this is an escaped string.
// This will reduce `backslash_count` to either 0 or 1.
Self::append_backslashes_to(&mut arg, &mut backslash_count, 2);
match backslash_count == 0 {
// If there are no remaining encountered backslashes,
// then we have found either the start or end of a quoted argument.
true => in_quotes = !in_quotes,
// The quote character is escaped, so it is treated as a literal and appended to the arg string.
false => {
backslash_count = 0;
arg.push('"');
}
}
}
// If whitespace is encountered, only preserve it if we are currently in quotes.
// Otherwise it marks the end of the current argument.
' ' | '\t' | '\n' | '\r' => {
Self::append_backslashes_to(&mut arg, &mut backslash_count, 1);
// If not in a quoted string, then this is the end of the argument.
if !in_quotes {
break;
}
// Otherwise, the whitespace must be preserved in the argument.
arg.push(c);
}
// All other characters treated as is
_ => {
Self::append_backslashes_to(&mut arg, &mut backslash_count, 1);
arg.push(c);
}
}
}

// Flush any backslashes at the end of the string.
Self::append_backslashes_to(&mut arg, &mut backslash_count, 1);
// Save the current remaining characters for the next step in the iterator.
self.file_content = chars.as_str();

Some(arg)
}
}

#[cfg(test)]
mod test {
use std::str::FromStr;
Expand Down
165 changes: 165 additions & 0 deletions src/compiler/response_file.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,165 @@
/// An iterator over the arguments in a response file.
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i would prefer the move to be done into a single commit
and then, modifications into a second commit

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sure, I'll rewrite the commit into two commits in that order.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@Alexei-Barnes ping ? :)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sorry for the slow action here - I've been pulled into a work project that's eating my free time. I should get back to this soon.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

are you still going to work on this? thanks

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Right, sorry for the mega wait - I'm working on this again now.

Copy link
Contributor Author

@Alexei-Barnes Alexei-Barnes Aug 16, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

OK, this is split into two separate commits now. The first commit just moves the code, with minimal changes necessary to do so. The second change modifies it, adds tests, and uses it in the GCC module. The final code is unchanged from when you last looked at it.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've now also merged main into this branch, to keep it up to date. No merge conflicts.

///
/// This produces results identical to `CommandLineToArgvW` except in the
/// following cases:
///
/// 1. When passed an empty string, CommandLineToArgvW returns the path to the
/// current executable file. Here, the iterator will simply be empty.
/// 2. CommandLineToArgvW interprets the first argument differently than the
/// rest. Here, all arguments are treated in identical fashion.
///
/// Parsing rules:
///
/// - Arguments are delimited by whitespace (either a space or tab).
/// - A string surrounded by double quotes is interpreted as a single argument.
/// - Backslashes are interpreted literally unless followed by a double quote.
/// - 2n backslashes followed by a double quote reduce to n backslashes and we
/// enter the "in quote" state.
/// - 2n+1 backslashes followed by a double quote reduces to n backslashes,
/// we do *not* enter the "in quote" state, and the double quote is
/// interpreted literally.
///
/// References:
/// - https://msdn.microsoft.com/en-us/library/windows/desktop/bb776391(v=vs.85).aspx
/// - https://msdn.microsoft.com/en-us/library/windows/desktop/17w5ykft(v=vs.85).aspx
#[derive(Clone, Debug)]
pub struct SplitResponseFileArgs<'a> {
/// String slice of the file content that is being parsed.
/// Slice is mutated as this iterator is executed.
file_content: &'a str,
}

impl<'a, T> From<&'a T> for SplitResponseFileArgs<'a>
where
T: AsRef<str> + 'static,
{
fn from(file_content: &'a T) -> Self {
Self {
file_content: file_content.as_ref(),
}
}
}

impl<'a> SplitResponseFileArgs<'a> {
/// Appends backslashes to `target` by decrementing `count`.
/// If `step` is >1, then `count` is decremented by `step`, resulting in 1 backslash appended for every `step`.
fn append_backslashes_to(target: &mut String, count: &mut usize, step: usize) {
while *count >= step {
target.push('\\');
*count -= step;
}
}
}

impl<'a> Iterator for SplitResponseFileArgs<'a> {
type Item = String;

fn next(&mut self) -> Option<String> {
let mut in_quotes = false;
let mut backslash_count: usize = 0;

// Strip any leading whitespace before relevant characters
let is_whitespace = |c| matches!(c, ' ' | '\t' | '\n' | '\r');
self.file_content = self.file_content.trim_start_matches(is_whitespace);

if self.file_content.is_empty() {
return None;
}

// The argument string to return, built by analyzing the current slice in the iterator.
let mut arg = String::new();
// All characters still in the string slice. Will be mutated by consuming
// values until the current arg is built.
let mut chars = self.file_content.chars();
// Build the argument by evaluating each character in the string slice.
for c in &mut chars {
match c {
// In order to handle the escape character based on the char(s) which come after it,
// they are counted instead of appended literally, until a non-backslash character is encountered.
'\\' => backslash_count += 1,
// Either starting or ending a quoted argument, or appending a literal character (if the quote was escaped).
'"' => {
// Only append half the number of backslashes encountered, because this is an escaped string.
// This will reduce `backslash_count` to either 0 or 1.
Self::append_backslashes_to(&mut arg, &mut backslash_count, 2);
match backslash_count == 0 {
// If there are no remaining encountered backslashes,
// then we have found either the start or end of a quoted argument.
true => in_quotes = !in_quotes,
// The quote character is escaped, so it is treated as a literal and appended to the arg string.
false => {
backslash_count = 0;
arg.push('"');
}
}
}
// If whitespace is encountered, only preserve it if we are currently in quotes.
// Otherwise it marks the end of the current argument.
' ' | '\t' | '\n' | '\r' => {
Self::append_backslashes_to(&mut arg, &mut backslash_count, 1);
// If not in a quoted string, then this is the end of the argument.
if !in_quotes {
break;
}
// Otherwise, the whitespace must be preserved in the argument.
arg.push(c);
}
// All other characters treated as is
_ => {
Self::append_backslashes_to(&mut arg, &mut backslash_count, 1);
arg.push(c);
}
}
}

// Flush any backslashes at the end of the string.
Self::append_backslashes_to(&mut arg, &mut backslash_count, 1);
// Save the current remaining characters for the next step in the iterator.
self.file_content = chars.as_str();

Some(arg)
}
}

#[cfg(test)]
mod test {
use super::*;

#[test]
fn parse_simple_args() {
let content = "-A1 -A2 -A3 -I ../includes";
let args = SplitResponseFileArgs::from(&content).collect::<Vec<_>>();
assert_eq!(args[0], "-A1");
assert_eq!(args[1], "-A2");
assert_eq!(args[2], "-A3");
assert_eq!(args[3], "-I");
assert_eq!(args[4], "../includes");
}

#[test]
fn parse_quoted_path_arg() {
let content = "-I \"../included headers\"";
let args = SplitResponseFileArgs::from(&content).collect::<Vec<_>>();
assert_eq!(args[0], "-I");
assert_eq!(args[1], "../included headers");
}

#[test]
fn parse_escaped_quoted_path_arg() {
let content = "-I \"../included \\\"headers\\\"\"";
let args = SplitResponseFileArgs::from(&content).collect::<Vec<_>>();
assert_eq!(args[0], "-I");
assert_eq!(args[1], "../included \"headers\"");
}

#[test]
fn parse_various_whitespace_characters() {
let content = "-A1 -A2\n-A3\n\r-A4\r-A5\n ";
let args = SplitResponseFileArgs::from(&content).collect::<Vec<_>>();
assert_eq!(args[0], "-A1");
assert_eq!(args[1], "-A2");
assert_eq!(args[2], "-A3");
assert_eq!(args[3], "-A4");
assert_eq!(args[4], "-A5");
}
}