diff --git a/src/compiler/gcc.rs b/src/compiler/gcc.rs index 6991a50ba..3522c6a90 100644 --- a/src/compiler/gcc.rs +++ b/src/compiler/gcc.rs @@ -16,6 +16,7 @@ use crate::compiler::args::*; use crate::compiler::c::{ ArtifactDescriptor, CCompilerImpl, CCompilerKind, Language, ParsedArguments, }; +use crate::compiler::response_file::SplitResponseFileArgs; use crate::compiler::{clang, Cacheable, ColorMode, CompileCommand, CompilerArguments}; use crate::mock_command::{CommandCreatorSync, RunCommand}; use crate::util::{run_input_output, OsStrExt}; @@ -894,11 +895,11 @@ impl<'a> Iterator for ExpandIncludeFile<'a> { debug!("failed to read @-file `{}`: {}", file.display(), e); return Some(arg); } - if contents.contains('"') || contents.contains('\'') { - return Some(arg); - } - let new_args = contents.split_whitespace().collect::>(); - self.stack.extend(new_args.iter().rev().map(|s| s.into())); + // Parse the response file contents, taking into account quote-wrapped strings and new-line separators. + let resp_file_args = SplitResponseFileArgs::from(&contents).collect::>(); + // Pump arguments back to the stack, in reverse order so we can `Vec::pop` and visit in original front-to-back order. + let rev_args = resp_file_args.iter().rev().map(|s| s.into()); + self.stack.extend(rev_args); } } } diff --git a/src/compiler/mod.rs b/src/compiler/mod.rs index 2d9f0e668..230a009f3 100644 --- a/src/compiler/mod.rs +++ b/src/compiler/mod.rs @@ -23,6 +23,7 @@ mod diab; mod gcc; mod msvc; mod nvcc; +mod response_file; mod rust; mod tasking_vx; #[macro_use] diff --git a/src/compiler/msvc.rs b/src/compiler/msvc.rs index 8c0a24c5b..bfc1ce99f 100644 --- a/src/compiler/msvc.rs +++ b/src/compiler/msvc.rs @@ -16,6 +16,7 @@ use crate::compiler::args::*; use crate::compiler::c::{ ArtifactDescriptor, CCompilerImpl, CCompilerKind, Language, ParsedArguments, }; +use crate::compiler::response_file::SplitResponseFileArgs; use crate::compiler::{ clang, gcc, write_temp_file, Cacheable, ColorMode, CompileCommand, CompilerArguments, }; @@ -1224,8 +1225,7 @@ impl<'a> Iterator for ExpandIncludeFile<'a> { trace!("Expanded response file {:?} to {:?}", file_path, content); // Parse the response file contents, taking into account quote-wrapped strings and new-line separators. - // Special implementation to account for MSVC response file format. - let resp_file_args = SplitMsvcResponseFileArgs::from(&content).collect::>(); + let resp_file_args = SplitResponseFileArgs::from(&content).collect::>(); // Pump arguments back to the stack, in reverse order so we can `Vec::pop` and visit in original front-to-back order. let rev_args = resp_file_args.iter().rev().map(|s| s.into()); self.stack.extend(rev_args); @@ -1250,129 +1250,6 @@ where result.map_err(|err| io::Error::new(io::ErrorKind::Other, err.into_owned())) } -/// An iterator over the arguments in a Windows command line. -/// -/// This produces results identical to `CommandLineToArgvW` except in the -/// following cases: -/// -/// 1. When passed an empty string, CommandLineToArgvW returns the path to the -/// current executable file. Here, the iterator will simply be empty. -/// 2. CommandLineToArgvW interprets the first argument differently than the -/// rest. Here, all arguments are treated in identical fashion. -/// -/// Parsing rules: -/// -/// - Arguments are delimited by whitespace (either a space or tab). -/// - A string surrounded by double quotes is interpreted as a single argument. -/// - Backslashes are interpreted literally unless followed by a double quote. -/// - 2n backslashes followed by a double quote reduce to n backslashes and we -/// enter the "in quote" state. -/// - 2n+1 backslashes followed by a double quote reduces to n backslashes, -/// we do *not* enter the "in quote" state, and the double quote is -/// interpreted literally. -/// -/// References: -/// - https://msdn.microsoft.com/en-us/library/windows/desktop/bb776391(v=vs.85).aspx -/// - https://msdn.microsoft.com/en-us/library/windows/desktop/17w5ykft(v=vs.85).aspx -#[derive(Clone, Debug)] -struct SplitMsvcResponseFileArgs<'a> { - /// String slice of the file content that is being parsed. - /// Slice is mutated as this iterator is executed. - file_content: &'a str, -} - -impl<'a, T> From<&'a T> for SplitMsvcResponseFileArgs<'a> -where - T: AsRef + 'static, -{ - fn from(file_content: &'a T) -> Self { - Self { - file_content: file_content.as_ref(), - } - } -} - -impl<'a> SplitMsvcResponseFileArgs<'a> { - /// Appends backslashes to `target` by decrementing `count`. - /// If `step` is >1, then `count` is decremented by `step`, resulting in 1 backslash appended for every `step`. - fn append_backslashes_to(target: &mut String, count: &mut usize, step: usize) { - while *count >= step { - target.push('\\'); - *count -= step; - } - } -} - -impl<'a> Iterator for SplitMsvcResponseFileArgs<'a> { - type Item = String; - - fn next(&mut self) -> Option { - let mut in_quotes = false; - let mut backslash_count: usize = 0; - - // Strip any leading whitespace before relevant characters - let is_whitespace = |c| matches!(c, ' ' | '\t' | '\n' | '\r'); - self.file_content = self.file_content.trim_start_matches(is_whitespace); - - if self.file_content.is_empty() { - return None; - } - - // The argument string to return, built by analyzing the current slice in the iterator. - let mut arg = String::new(); - // All characters still in the string slice. Will be mutated by consuming - // values until the current arg is built. - let mut chars = self.file_content.chars(); - // Build the argument by evaluating each character in the string slice. - for c in &mut chars { - match c { - // In order to handle the escape character based on the char(s) which come after it, - // they are counted instead of appended literally, until a non-backslash character is encountered. - '\\' => backslash_count += 1, - // Either starting or ending a quoted argument, or appending a literal character (if the quote was escaped). - '"' => { - // Only append half the number of backslashes encountered, because this is an escaped string. - // This will reduce `backslash_count` to either 0 or 1. - Self::append_backslashes_to(&mut arg, &mut backslash_count, 2); - match backslash_count == 0 { - // If there are no remaining encountered backslashes, - // then we have found either the start or end of a quoted argument. - true => in_quotes = !in_quotes, - // The quote character is escaped, so it is treated as a literal and appended to the arg string. - false => { - backslash_count = 0; - arg.push('"'); - } - } - } - // If whitespace is encountered, only preserve it if we are currently in quotes. - // Otherwise it marks the end of the current argument. - ' ' | '\t' | '\n' | '\r' => { - Self::append_backslashes_to(&mut arg, &mut backslash_count, 1); - // If not in a quoted string, then this is the end of the argument. - if !in_quotes { - break; - } - // Otherwise, the whitespace must be preserved in the argument. - arg.push(c); - } - // All other characters treated as is - _ => { - Self::append_backslashes_to(&mut arg, &mut backslash_count, 1); - arg.push(c); - } - } - } - - // Flush any backslashes at the end of the string. - Self::append_backslashes_to(&mut arg, &mut backslash_count, 1); - // Save the current remaining characters for the next step in the iterator. - self.file_content = chars.as_str(); - - Some(arg) - } -} - #[cfg(test)] mod test { use std::str::FromStr; diff --git a/src/compiler/response_file.rs b/src/compiler/response_file.rs new file mode 100644 index 000000000..5891aa5ba --- /dev/null +++ b/src/compiler/response_file.rs @@ -0,0 +1,165 @@ +/// An iterator over the arguments in a response file. +/// +/// This produces results identical to `CommandLineToArgvW` except in the +/// following cases: +/// +/// 1. When passed an empty string, CommandLineToArgvW returns the path to the +/// current executable file. Here, the iterator will simply be empty. +/// 2. CommandLineToArgvW interprets the first argument differently than the +/// rest. Here, all arguments are treated in identical fashion. +/// +/// Parsing rules: +/// +/// - Arguments are delimited by whitespace (either a space or tab). +/// - A string surrounded by double quotes is interpreted as a single argument. +/// - Backslashes are interpreted literally unless followed by a double quote. +/// - 2n backslashes followed by a double quote reduce to n backslashes and we +/// enter the "in quote" state. +/// - 2n+1 backslashes followed by a double quote reduces to n backslashes, +/// we do *not* enter the "in quote" state, and the double quote is +/// interpreted literally. +/// +/// References: +/// - https://msdn.microsoft.com/en-us/library/windows/desktop/bb776391(v=vs.85).aspx +/// - https://msdn.microsoft.com/en-us/library/windows/desktop/17w5ykft(v=vs.85).aspx +#[derive(Clone, Debug)] +pub struct SplitResponseFileArgs<'a> { + /// String slice of the file content that is being parsed. + /// Slice is mutated as this iterator is executed. + file_content: &'a str, +} + +impl<'a, T> From<&'a T> for SplitResponseFileArgs<'a> +where + T: AsRef + 'static, +{ + fn from(file_content: &'a T) -> Self { + Self { + file_content: file_content.as_ref(), + } + } +} + +impl<'a> SplitResponseFileArgs<'a> { + /// Appends backslashes to `target` by decrementing `count`. + /// If `step` is >1, then `count` is decremented by `step`, resulting in 1 backslash appended for every `step`. + fn append_backslashes_to(target: &mut String, count: &mut usize, step: usize) { + while *count >= step { + target.push('\\'); + *count -= step; + } + } +} + +impl<'a> Iterator for SplitResponseFileArgs<'a> { + type Item = String; + + fn next(&mut self) -> Option { + let mut in_quotes = false; + let mut backslash_count: usize = 0; + + // Strip any leading whitespace before relevant characters + let is_whitespace = |c| matches!(c, ' ' | '\t' | '\n' | '\r'); + self.file_content = self.file_content.trim_start_matches(is_whitespace); + + if self.file_content.is_empty() { + return None; + } + + // The argument string to return, built by analyzing the current slice in the iterator. + let mut arg = String::new(); + // All characters still in the string slice. Will be mutated by consuming + // values until the current arg is built. + let mut chars = self.file_content.chars(); + // Build the argument by evaluating each character in the string slice. + for c in &mut chars { + match c { + // In order to handle the escape character based on the char(s) which come after it, + // they are counted instead of appended literally, until a non-backslash character is encountered. + '\\' => backslash_count += 1, + // Either starting or ending a quoted argument, or appending a literal character (if the quote was escaped). + '"' => { + // Only append half the number of backslashes encountered, because this is an escaped string. + // This will reduce `backslash_count` to either 0 or 1. + Self::append_backslashes_to(&mut arg, &mut backslash_count, 2); + match backslash_count == 0 { + // If there are no remaining encountered backslashes, + // then we have found either the start or end of a quoted argument. + true => in_quotes = !in_quotes, + // The quote character is escaped, so it is treated as a literal and appended to the arg string. + false => { + backslash_count = 0; + arg.push('"'); + } + } + } + // If whitespace is encountered, only preserve it if we are currently in quotes. + // Otherwise it marks the end of the current argument. + ' ' | '\t' | '\n' | '\r' => { + Self::append_backslashes_to(&mut arg, &mut backslash_count, 1); + // If not in a quoted string, then this is the end of the argument. + if !in_quotes { + break; + } + // Otherwise, the whitespace must be preserved in the argument. + arg.push(c); + } + // All other characters treated as is + _ => { + Self::append_backslashes_to(&mut arg, &mut backslash_count, 1); + arg.push(c); + } + } + } + + // Flush any backslashes at the end of the string. + Self::append_backslashes_to(&mut arg, &mut backslash_count, 1); + // Save the current remaining characters for the next step in the iterator. + self.file_content = chars.as_str(); + + Some(arg) + } +} + +#[cfg(test)] +mod test { + use super::*; + + #[test] + fn parse_simple_args() { + let content = "-A1 -A2 -A3 -I ../includes"; + let args = SplitResponseFileArgs::from(&content).collect::>(); + assert_eq!(args[0], "-A1"); + assert_eq!(args[1], "-A2"); + assert_eq!(args[2], "-A3"); + assert_eq!(args[3], "-I"); + assert_eq!(args[4], "../includes"); + } + + #[test] + fn parse_quoted_path_arg() { + let content = "-I \"../included headers\""; + let args = SplitResponseFileArgs::from(&content).collect::>(); + assert_eq!(args[0], "-I"); + assert_eq!(args[1], "../included headers"); + } + + #[test] + fn parse_escaped_quoted_path_arg() { + let content = "-I \"../included \\\"headers\\\"\""; + let args = SplitResponseFileArgs::from(&content).collect::>(); + assert_eq!(args[0], "-I"); + assert_eq!(args[1], "../included \"headers\""); + } + + #[test] + fn parse_various_whitespace_characters() { + let content = "-A1 -A2\n-A3\n\r-A4\r-A5\n "; + let args = SplitResponseFileArgs::from(&content).collect::>(); + assert_eq!(args[0], "-A1"); + assert_eq!(args[1], "-A2"); + assert_eq!(args[2], "-A3"); + assert_eq!(args[3], "-A4"); + assert_eq!(args[4], "-A5"); + } +}