From 865ff6e9d45db567bb3e194419cbc52815d1e0f5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jonah=20Br=C3=BCchert?= <jbb@kaidan.im>
Date: Mon, 29 Apr 2024 17:41:47 +0200
Subject: [PATCH 1/2] Unescape csv strings

---
 include/utl/parser/csv.h       | 22 ++++++++++++++++++++--
 include/utl/parser/csv_range.h |  2 +-
 test/parser/pipe_csv_test.cc   |  2 +-
 3 files changed, 22 insertions(+), 4 deletions(-)
diff --git a/include/utl/parser/csv.h b/include/utl/parser/csv.h
index 4558e0f..79049c5 100644
--- a/include/utl/parser/csv.h
+++ b/include/utl/parser/csv.h
@@ -53,6 +53,19 @@ inline void parse_column(cstr& s, T& arg) {
                                             adjust_for_quote + adjust_for_cr));
 }
 
+inline void unescape_quoted_string(std::string& arg) {
+  std::string::size_type found_at = 0;
+  while ((found_at = arg.find('"', found_at)) != std::string::npos) {
+    if (found_at < arg.size() - 1 && arg[found_at + 1] == '"') {
+      arg.erase(found_at, 1);  // Since the string is now one character shorter,
+                               // found_at now points to the next character
+      ++found_at;  // Skip following character ("), we are now after the ""
+    } else {
+      ++found_at;  // Continue search from next character
+    }
+  }
+}
+
 template <typename IntType,
           std::enable_if_t<std::is_integral<IntType>::value, int> = 0>
 inline void parse_value(cstr& s, IntType& arg) {
@@ -71,8 +84,13 @@ inline void parse_value(cstr& s, bool& arg) {
   s = s.skip_whitespace_front();
   parse_arg(s, arg);
 }
-inline void parse_value(cstr& s, std::string& arg) { parse_arg(s, arg); }
-inline void parse_value(cstr& s, cstr& arg) { parse_arg(s, arg); }
+inline void parse_value(cstr& s, std::string& arg) {
+  parse_arg(s, arg);
+  unescape_quoted_string(arg);
+}
+inline void parse_value(cstr& s, cstr& arg) {
+  parse_arg(s, arg);
+}
 
 template <int Index, typename... Args>
 typename std::enable_if<Index == sizeof...(Args)>::type read(
diff --git a/include/utl/parser/csv_range.h b/include/utl/parser/csv_range.h
index 8714990..1d25042 100644
--- a/include/utl/parser/csv_range.h
+++ b/include/utl/parser/csv_range.h
@@ -94,7 +94,7 @@ struct csv_range : public LineRange {
     T t{};
     cista::for_each_field(t, [&, i = 0u](auto& f) mutable {
       if (row[i]) {
-        parse_arg(row[i], f.val());
+        parse_value(row[i], f.val());
       }
       ++i;
     });
diff --git a/test/parser/pipe_csv_test.cc b/test/parser/pipe_csv_test.cc
index b0d3509..beb0ffd 100644
--- a/test/parser/pipe_csv_test.cc
+++ b/test/parser/pipe_csv_test.cc
@@ -101,7 +101,7 @@ TEST(pipe_csv, csv_escaped_string) {
                       | vec();
 
   ASSERT_TRUE(result.size() == 1);
-  EXPECT_TRUE(result[0].foo_.val() == R"([""asd"", ""bsd""])");
+  EXPECT_TRUE(result[0].foo_.val() == R"(["asd", "bsd"])");
   EXPECT_TRUE(result[0].bar_.val() == "asd");
   EXPECT_TRUE(result[0].baz_.val() == "xxx");
 }

From 6f2c66f4477cd056d6328615b6816e1fbd89a755 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jonah=20Br=C3=BCchert?= <jbb@kaidan.im>
Date: Thu, 2 May 2024 18:39:50 +0200
Subject: [PATCH 2/2] Add test to make sure we don't crash on invalid csv

---
 test/parser/pipe_csv_test.cc | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

diff --git a/test/parser/pipe_csv_test.cc b/test/parser/pipe_csv_test.cc
index beb0ffd..c61427f 100644
--- a/test/parser/pipe_csv_test.cc
+++ b/test/parser/pipe_csv_test.cc
@@ -105,3 +105,24 @@ TEST(pipe_csv, csv_escaped_string) {
   EXPECT_TRUE(result[0].bar_.val() == "asd");
   EXPECT_TRUE(result[0].baz_.val() == "xxx");
 }
+
+TEST(pipe_csv, csv_invalid_escaped_string) {
+  struct dat {
+    csv_col<std::string, UTL_NAME("FOO")> foo_;
+    csv_col<std::string, UTL_NAME("BAR")> bar_;
+    csv_col<std::string, UTL_NAME("BAZ")> baz_;
+  };
+
+  // This is invalid, but we need to make sure not to crash
+  constexpr auto const input = R"(BAR,FOO,BAZ
+"asd","[""asd"", ""bsd""]","xxx""
+)";
+  auto const result = line_range{make_buf_reader(input, {})}  //
+                      | csv<dat, ','>()  //
+                      | vec();
+
+  ASSERT_TRUE(result.size() == 1);
+  EXPECT_TRUE(result[0].foo_.val() == R"(["asd", "bsd"])");
+  EXPECT_TRUE(result[0].bar_.val() == "asd");
+  EXPECT_TRUE(result[0].baz_.val() == R"(xxx")");
+}