Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Unquote csv strings #18

Merged
merged 2 commits into from
May 2, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 20 additions & 2 deletions include/utl/parser/csv.h
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,19 @@ inline void parse_column(cstr& s, T& arg) {
adjust_for_quote + adjust_for_cr));
}

inline void unescape_quoted_string(std::string& arg) {
std::string::size_type found_at = 0;
while ((found_at = arg.find('"', found_at)) != std::string::npos) {
if (found_at < arg.size() - 1 && arg[found_at + 1] == '"') {
arg.erase(found_at, 1); // Since the string is now one character shorter,
// found_at now points to the next character
++found_at; // Skip following character ("), we are now after the ""
} else {
++found_at; // Continue search from next character
}
}
}

template <typename IntType,
std::enable_if_t<std::is_integral<IntType>::value, int> = 0>
inline void parse_value(cstr& s, IntType& arg) {
Expand All @@ -71,8 +84,13 @@ inline void parse_value(cstr& s, bool& arg) {
s = s.skip_whitespace_front();
parse_arg(s, arg);
}
inline void parse_value(cstr& s, std::string& arg) { parse_arg(s, arg); }
inline void parse_value(cstr& s, cstr& arg) { parse_arg(s, arg); }
inline void parse_value(cstr& s, std::string& arg) {
jbruechert marked this conversation as resolved.
Show resolved Hide resolved
parse_arg(s, arg);
unescape_quoted_string(arg);
}
jbruechert marked this conversation as resolved.
Show resolved Hide resolved
inline void parse_value(cstr& s, cstr& arg) {
parse_arg(s, arg);
}

template <int Index, typename... Args>
typename std::enable_if<Index == sizeof...(Args)>::type read(
Expand Down
2 changes: 1 addition & 1 deletion include/utl/parser/csv_range.h
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ struct csv_range : public LineRange {
T t{};
cista::for_each_field(t, [&, i = 0u](auto& f) mutable {
if (row[i]) {
parse_arg(row[i], f.val());
parse_value(row[i], f.val());
}
++i;
});
Expand Down
23 changes: 22 additions & 1 deletion test/parser/pipe_csv_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,28 @@ TEST(pipe_csv, csv_escaped_string) {
| vec();

ASSERT_TRUE(result.size() == 1);
EXPECT_TRUE(result[0].foo_.val() == R"([""asd"", ""bsd""])");
EXPECT_TRUE(result[0].foo_.val() == R"(["asd", "bsd"])");
EXPECT_TRUE(result[0].bar_.val() == "asd");
EXPECT_TRUE(result[0].baz_.val() == "xxx");
}

TEST(pipe_csv, csv_invalid_escaped_string) {
struct dat {
csv_col<std::string, UTL_NAME("FOO")> foo_;
csv_col<std::string, UTL_NAME("BAR")> bar_;
csv_col<std::string, UTL_NAME("BAZ")> baz_;
};

// This is invalid, but we need to make sure not to crash
constexpr auto const input = R"(BAR,FOO,BAZ
"asd","[""asd"", ""bsd""]","xxx""
)";
auto const result = line_range{make_buf_reader(input, {})} //
| csv<dat, ','>() //
| vec();

ASSERT_TRUE(result.size() == 1);
EXPECT_TRUE(result[0].foo_.val() == R"(["asd", "bsd"])");
EXPECT_TRUE(result[0].bar_.val() == "asd");
EXPECT_TRUE(result[0].baz_.val() == R"(xxx")");
}
Loading