|
| 1 | +/// An iterator over the arguments in a response file. |
| 2 | +/// |
| 3 | +/// This produces results identical to `CommandLineToArgvW` except in the |
| 4 | +/// following cases: |
| 5 | +/// |
| 6 | +/// 1. When passed an empty string, CommandLineToArgvW returns the path to the |
| 7 | +/// current executable file. Here, the iterator will simply be empty. |
| 8 | +/// 2. CommandLineToArgvW interprets the first argument differently than the |
| 9 | +/// rest. Here, all arguments are treated in identical fashion. |
| 10 | +/// |
| 11 | +/// Parsing rules: |
| 12 | +/// |
| 13 | +/// - Arguments are delimited by whitespace (either a space or tab). |
| 14 | +/// - A string surrounded by double quotes is interpreted as a single argument. |
| 15 | +/// - Backslashes are interpreted literally unless followed by a double quote. |
| 16 | +/// - 2n backslashes followed by a double quote reduce to n backslashes and we |
| 17 | +/// enter the "in quote" state. |
| 18 | +/// - 2n+1 backslashes followed by a double quote reduces to n backslashes, |
| 19 | +/// we do *not* enter the "in quote" state, and the double quote is |
| 20 | +/// interpreted literally. |
| 21 | +/// |
| 22 | +/// References: |
| 23 | +/// - https://msdn.microsoft.com/en-us/library/windows/desktop/bb776391(v=vs.85).aspx |
| 24 | +/// - https://msdn.microsoft.com/en-us/library/windows/desktop/17w5ykft(v=vs.85).aspx |
| 25 | +#[derive(Clone, Debug)] |
| 26 | +pub struct SplitResponseFileArgs<'a> { |
| 27 | + /// String slice of the file content that is being parsed. |
| 28 | + /// Slice is mutated as this iterator is executed. |
| 29 | + file_content: &'a str, |
| 30 | +} |
| 31 | + |
| 32 | +impl<'a, T> From<&'a T> for SplitResponseFileArgs<'a> |
| 33 | +where |
| 34 | + T: AsRef<str> + 'static, |
| 35 | +{ |
| 36 | + fn from(file_content: &'a T) -> Self { |
| 37 | + Self { |
| 38 | + file_content: file_content.as_ref(), |
| 39 | + } |
| 40 | + } |
| 41 | +} |
| 42 | + |
| 43 | +impl<'a> SplitResponseFileArgs<'a> { |
| 44 | + /// Appends backslashes to `target` by decrementing `count`. |
| 45 | + /// If `step` is >1, then `count` is decremented by `step`, resulting in 1 backslash appended for every `step`. |
| 46 | + fn append_backslashes_to(target: &mut String, count: &mut usize, step: usize) { |
| 47 | + while *count >= step { |
| 48 | + target.push('\\'); |
| 49 | + *count -= step; |
| 50 | + } |
| 51 | + } |
| 52 | +} |
| 53 | + |
| 54 | +impl<'a> Iterator for SplitResponseFileArgs<'a> { |
| 55 | + type Item = String; |
| 56 | + |
| 57 | + fn next(&mut self) -> Option<String> { |
| 58 | + let mut in_quotes = false; |
| 59 | + let mut backslash_count: usize = 0; |
| 60 | + |
| 61 | + // Strip any leading whitespace before relevant characters |
| 62 | + let is_whitespace = |c| matches!(c, ' ' | '\t' | '\n' | '\r'); |
| 63 | + self.file_content = self.file_content.trim_start_matches(is_whitespace); |
| 64 | + |
| 65 | + if self.file_content.is_empty() { |
| 66 | + return None; |
| 67 | + } |
| 68 | + |
| 69 | + // The argument string to return, built by analyzing the current slice in the iterator. |
| 70 | + let mut arg = String::new(); |
| 71 | + // All characters still in the string slice. Will be mutated by consuming |
| 72 | + // values until the current arg is built. |
| 73 | + let mut chars = self.file_content.chars(); |
| 74 | + // Build the argument by evaluating each character in the string slice. |
| 75 | + for c in &mut chars { |
| 76 | + match c { |
| 77 | + // In order to handle the escape character based on the char(s) which come after it, |
| 78 | + // they are counted instead of appended literally, until a non-backslash character is encountered. |
| 79 | + '\\' => backslash_count += 1, |
| 80 | + // Either starting or ending a quoted argument, or appending a literal character (if the quote was escaped). |
| 81 | + '"' => { |
| 82 | + // Only append half the number of backslashes encountered, because this is an escaped string. |
| 83 | + // This will reduce `backslash_count` to either 0 or 1. |
| 84 | + Self::append_backslashes_to(&mut arg, &mut backslash_count, 2); |
| 85 | + match backslash_count == 0 { |
| 86 | + // If there are no remaining encountered backslashes, |
| 87 | + // then we have found either the start or end of a quoted argument. |
| 88 | + true => in_quotes = !in_quotes, |
| 89 | + // The quote character is escaped, so it is treated as a literal and appended to the arg string. |
| 90 | + false => { |
| 91 | + backslash_count = 0; |
| 92 | + arg.push('"'); |
| 93 | + } |
| 94 | + } |
| 95 | + } |
| 96 | + // If whitespace is encountered, only preserve it if we are currently in quotes. |
| 97 | + // Otherwise it marks the end of the current argument. |
| 98 | + ' ' | '\t' | '\n' | '\r' => { |
| 99 | + Self::append_backslashes_to(&mut arg, &mut backslash_count, 1); |
| 100 | + // If not in a quoted string, then this is the end of the argument. |
| 101 | + if !in_quotes { |
| 102 | + break; |
| 103 | + } |
| 104 | + // Otherwise, the whitespace must be preserved in the argument. |
| 105 | + arg.push(c); |
| 106 | + } |
| 107 | + // All other characters treated as is |
| 108 | + _ => { |
| 109 | + Self::append_backslashes_to(&mut arg, &mut backslash_count, 1); |
| 110 | + arg.push(c); |
| 111 | + } |
| 112 | + } |
| 113 | + } |
| 114 | + |
| 115 | + // Flush any backslashes at the end of the string. |
| 116 | + Self::append_backslashes_to(&mut arg, &mut backslash_count, 1); |
| 117 | + // Save the current remaining characters for the next step in the iterator. |
| 118 | + self.file_content = chars.as_str(); |
| 119 | + |
| 120 | + Some(arg) |
| 121 | + } |
| 122 | +} |
| 123 | + |
| 124 | +#[cfg(test)] |
| 125 | +mod test { |
| 126 | + use super::*; |
| 127 | + |
| 128 | + #[test] |
| 129 | + fn parse_simple_args() { |
| 130 | + let content = "-A1 -A2 -A3 -I ../includes"; |
| 131 | + let args = SplitResponseFileArgs::from(&content).collect::<Vec<_>>(); |
| 132 | + assert_eq!(args[0], "-A1"); |
| 133 | + assert_eq!(args[1], "-A2"); |
| 134 | + assert_eq!(args[2], "-A3"); |
| 135 | + assert_eq!(args[3], "-I"); |
| 136 | + assert_eq!(args[4], "../includes"); |
| 137 | + } |
| 138 | + |
| 139 | + #[test] |
| 140 | + fn parse_quoted_path_arg() { |
| 141 | + let content = "-I \"../included headers\""; |
| 142 | + let args = SplitResponseFileArgs::from(&content).collect::<Vec<_>>(); |
| 143 | + assert_eq!(args[0], "-I"); |
| 144 | + assert_eq!(args[1], "../included headers"); |
| 145 | + } |
| 146 | + |
| 147 | + #[test] |
| 148 | + fn parse_escaped_quoted_path_arg() { |
| 149 | + let content = "-I \"../included \\\"headers\\\"\""; |
| 150 | + let args = SplitResponseFileArgs::from(&content).collect::<Vec<_>>(); |
| 151 | + assert_eq!(args[0], "-I"); |
| 152 | + assert_eq!(args[1], "../included \"headers\""); |
| 153 | + } |
| 154 | + |
| 155 | + #[test] |
| 156 | + fn parse_various_whitespace_characters() { |
| 157 | + let content = "-A1 -A2\n-A3\n\r-A4\r-A5\n "; |
| 158 | + let args = SplitResponseFileArgs::from(&content).collect::<Vec<_>>(); |
| 159 | + assert_eq!(args[0], "-A1"); |
| 160 | + assert_eq!(args[1], "-A2"); |
| 161 | + assert_eq!(args[2], "-A3"); |
| 162 | + assert_eq!(args[3], "-A4"); |
| 163 | + assert_eq!(args[4], "-A5"); |
| 164 | + } |
| 165 | +} |
0 commit comments