-
-
Notifications
You must be signed in to change notification settings - Fork 31
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
fix: Valid key-parsing with mixed quotes.
Ideally fixes #37, but current implementation has some issues. Will elaborate on issues with this "fix" in a comment.
- Loading branch information
Showing
4 changed files
with
243 additions
and
38 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,97 @@ | ||
using System; | ||
using System.Collections.Generic; | ||
using System.Linq; | ||
using Tomlet.Exceptions; | ||
using Xunit; | ||
|
||
namespace Tomlet.Tests | ||
{ | ||
public class QuotedKeysTests | ||
{ | ||
[Theory] | ||
[InlineData("\"a.'b\"", "a.'b")] // a.'b | ||
[InlineData("\"a.\\\"b\"", "a.\"b")] // a."b | ||
[InlineData("\"\"", "")] // | ||
[InlineData("\"\\\"\"", "\"")] // " | ||
[InlineData("\"a.🐱b\"", "a.🐱b")] // a.🐱b | ||
[InlineData("'a.\"b'", "a.\"b")] // a."b | ||
[InlineData("'a.\\\"b'", "a.\\\"b")] // a.\"b | ||
[InlineData("''", "")] // | ||
[InlineData("'\"'", "\"")] // \" | ||
[InlineData("'\\\"'", "\\\"")] // \" | ||
[InlineData("'a.🐱b'", "a.🐱b")] // a.🐱b | ||
[InlineData("\"a.b\\\".c\"", "a.b\".c")] // a.b".c | ||
public void NonDottedKeysWork(string inputKey, string expectedKey) | ||
{ | ||
var inputString = $"{inputKey} = \"value\""; | ||
var dict = TomletMain.To<Dictionary<string, string>>(inputString); | ||
Assert.Contains(expectedKey, (IDictionary<string, string>)dict); | ||
} | ||
|
||
[Theory] | ||
[InlineData("\"a\"b\"")] | ||
[InlineData("'a'b'")] | ||
[InlineData("'a\\'b'")] | ||
//[InlineData("a\"b")] // Illegal in specs, but no harm in reading it | ||
//[InlineData("a'b")] // Illegal in specs, but no harm in reading it | ||
//[InlineData("a🐱b")] // Illegal in specs, but no harm in reading it | ||
[InlineData("'ab\"")] | ||
public void IllegalNonDottedKeysThrow(string inputKey) | ||
{ | ||
var inputString = $"{inputKey} = \"value\""; | ||
Assert.ThrowsAny<TomlException>(() => _ = TomletMain.To<Dictionary<string, string>>(inputString)); | ||
} | ||
|
||
[Theory] | ||
[InlineData("'a.b'.c", "a.b", "c")] | ||
[InlineData("'a.b'.\"c\"", "a.b", "c")] | ||
[InlineData("a.'b.c'", "a", "b.c")] | ||
[InlineData("\"a\".'b.c'", "a", "b.c")] | ||
[InlineData("\"a\\\".b.c", "a", "b.c")] | ||
[InlineData("'a.\"b'.c", "a.\"b", "c")] | ||
[InlineData("\"a.b\\\"c\".d", "a.b\"c", "d")] | ||
public void DottedKeysWork(string inputKey, string expectedKey, string expectedSubkey) | ||
{ | ||
var inputString = $"{inputKey} = \"value\""; | ||
var dict = TomletMain.To<Dictionary<string, Dictionary<string, string>>>(inputString); | ||
var subDict = Assert.Contains(expectedKey, (IDictionary<string, Dictionary<string, string>>)dict); | ||
Assert.Contains(expectedSubkey, (IDictionary<string, string>)subDict); | ||
} | ||
|
||
[Theory] | ||
[InlineData("'a.\"b'.c\"")] | ||
[InlineData("\"a.bc\".d\"")] | ||
[InlineData("\"a.b\"c\".d\"")] | ||
[InlineData("\"a.b\"c\".d")] | ||
[InlineData("\"a.b\\\"c\".d\"")] | ||
[InlineData("'a.b'c'.d")] | ||
[InlineData("'a.b\\'c'.d")] | ||
[InlineData("'a.bc'.d'")] | ||
public void IllegalDottedKeysThrow(string inputKey) | ||
{ | ||
var inputString = $"{inputKey} = \"value\""; | ||
Assert.ThrowsAny<TomlException>(() => _ = TomletMain.To<Dictionary<string, string>>(inputString)); | ||
} | ||
|
||
|
||
[Theory] | ||
[InlineData("\"a\"b\"", @"(?:'""a""b""')|(?:""\\""a\\""b\\"""")")] // Simple or Literal | ||
[InlineData("'a'b'", @"""'a'b'""")] // Simple only | ||
[InlineData("'a\\'b'", @"""'a\\'b'""")] // Simple only | ||
[InlineData("a\"b", @"(?:'a""b')|(?:""a\\""b"")")] // Simple or Literal | ||
[InlineData("a'b", @"""a'b""")] // Simple only | ||
[InlineData("a🐱b", @"(?:'a🐱b')|(?:""a🐱b"")")] // Simple or Literal | ||
[InlineData("'ab\"", @"""'ab\\""""")] // Simple only | ||
public void SerializingIllegalKeysWorks(string inputKey, string expectedOutput) | ||
{ | ||
var dict = new Dictionary<string, string> | ||
{ | ||
{ inputKey, "a" }, | ||
}; | ||
var document = TomletMain.DocumentFrom(dict); | ||
Assert.NotEmpty(document.Keys); | ||
var parsedKey = document.Keys.First(); | ||
Assert.Matches(expectedOutput, parsedKey); | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,57 +1,136 @@ | ||
using System; | ||
using System.Text.RegularExpressions; | ||
using Tomlet.Exceptions; | ||
|
||
namespace Tomlet | ||
{ | ||
internal static class TomlKeyUtils | ||
{ | ||
private static readonly Regex UnquotedKeyRegex = new Regex("^[a-zA-Z0-9-_]+$"); | ||
|
||
internal static void GetTopLevelAndSubKeys(string key, out string ourKeyName, out string restOfKey) | ||
{ | ||
var wholeKeyIsQuoted = key.StartsWith("\"") && key.EndsWith("\"") || key.StartsWith("'") && key.EndsWith("'"); | ||
var firstPartOfKeyIsQuoted = !wholeKeyIsQuoted && (key.StartsWith("\"") || key.StartsWith("'")); | ||
var isBasicString = key.StartsWith("\""); | ||
var isLiteralString = key.StartsWith("'"); | ||
|
||
if (!key.Contains(".") || wholeKeyIsQuoted) | ||
if (isLiteralString) | ||
{ | ||
ourKeyName = key; | ||
restOfKey = ""; | ||
// Literal strings can't be escaped | ||
var literalEnd = key.IndexOf('\'', 1); | ||
if (literalEnd + 1 == key.Length) | ||
{ | ||
// Full key, no splitting needed. | ||
ourKeyName = key; | ||
restOfKey = ""; | ||
return; | ||
} | ||
|
||
if (key[literalEnd + 1] != '.') | ||
{ | ||
// Literal strings cannot contain ' | ||
// TODO: Find better exception | ||
throw new InvalidTomlKeyException(key); | ||
} | ||
|
||
if (literalEnd + 2 == key.Length) | ||
{ | ||
// You cannot have an empty unquoted key | ||
// TODO: Find better exception | ||
throw new InvalidTomlKeyException(key); | ||
} | ||
|
||
ourKeyName = key.Substring(0, literalEnd + 1); | ||
restOfKey = key.Substring(literalEnd + 2); | ||
return; | ||
} | ||
|
||
//Unquoted dotted key means we put this in a sub-table. | ||
if (!isBasicString) | ||
{ | ||
var firstDot = key.IndexOf(".", StringComparison.Ordinal); | ||
if (firstDot == -1) | ||
{ | ||
// Key is undotted. | ||
// We could make a check for illegal characters here, but there isn't much point to it. | ||
ourKeyName = key; | ||
restOfKey = ""; | ||
return; | ||
} | ||
|
||
if (firstDot + 1 == key.Length) | ||
{ | ||
// You cannot have an empty unquoted key | ||
// TODO: Find better exception | ||
throw new InvalidTomlKeyException(key); | ||
} | ||
|
||
ourKeyName = key.Substring(0, firstDot); | ||
restOfKey = key.Substring(firstDot + 1); | ||
return; | ||
} | ||
|
||
//First get the name of the key in *this* table. | ||
if (!firstPartOfKeyIsQuoted) | ||
var firstUnquote = FindNextUnescapedQuote(key, 1); | ||
if (firstUnquote == -1) | ||
{ | ||
var split = key.Split('.'); | ||
ourKeyName = split[0]; | ||
// Quoted string was never closed | ||
// TODO: Find better exception | ||
throw new InvalidTomlKeyException(key); | ||
} | ||
else | ||
|
||
if (firstUnquote + 1 == key.Length) | ||
{ | ||
// Full key, no splitting needed. | ||
ourKeyName = key; | ||
var keyNameWithoutOpeningQuote = ourKeyName.Substring(1); | ||
if (ourKeyName.Contains("\"")) | ||
ourKeyName = ourKeyName.Substring(0, 2 + keyNameWithoutOpeningQuote.IndexOf("\"", StringComparison.Ordinal)); | ||
else | ||
ourKeyName = ourKeyName.Substring(0, 2 + keyNameWithoutOpeningQuote.IndexOf("'", StringComparison.Ordinal)); | ||
restOfKey = ""; | ||
return; | ||
} | ||
|
||
//And get the remainder of the key, relative to the sub-table. | ||
restOfKey = key.Substring(ourKeyName.Length + 1); | ||
if (key[firstUnquote + 1] != '.') | ||
{ | ||
// Quoted strings cannot contain unescaped " | ||
// TODO: Find better exception | ||
throw new InvalidTomlKeyException(key); | ||
} | ||
|
||
ourKeyName = ourKeyName.Trim(); | ||
if (firstUnquote + 2 == key.Length) | ||
{ | ||
// You cannot have an empty unquoted key | ||
// TODO: Find better exception | ||
throw new InvalidTomlKeyException(key); | ||
} | ||
|
||
ourKeyName = key.Substring(0, firstUnquote + 1); | ||
restOfKey = key.Substring(firstUnquote + 2); | ||
} | ||
|
||
public static string FullStringToProperKey(string key) | ||
{ | ||
GetTopLevelAndSubKeys(key, out var a, out var b); | ||
var keyLooksQuoted = key.StartsWith("\"") || key.StartsWith("'"); | ||
var keyLooksDotted = key.Contains("."); | ||
|
||
if (keyLooksQuoted || keyLooksDotted || !string.IsNullOrEmpty(b)) | ||
private static int FindNextUnescapedQuote(string input, int startingIndex) | ||
{ | ||
var i = startingIndex; | ||
var isEscaped = false; | ||
for (; i < input.Length; i++) | ||
{ | ||
return TomlUtils.AddCorrectQuotes(key); | ||
if (input[i] == '\\') | ||
{ | ||
isEscaped = !isEscaped; | ||
continue; | ||
} | ||
|
||
if (input[i] != '"' || isEscaped) | ||
{ | ||
isEscaped = false; | ||
continue; | ||
} | ||
|
||
return i; | ||
} | ||
|
||
return key; | ||
|
||
return -1; // Return -1 if no unescaped quote is found | ||
} | ||
|
||
internal static string FullStringToProperKey(string key) | ||
{ | ||
var canBeUnquoted = UnquotedKeyRegex.Match(key).Success; | ||
return canBeUnquoted ? key : TomlUtils.AddCorrectQuotes(key); | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters