From 7a6e3d85e158d33fe18dd15f8b97711c5cbaafbb Mon Sep 17 00:00:00 2001 From: Archie Wood <58074498+archiewood@users.noreply.github.com> Date: Thu, 24 Oct 2024 23:28:57 -0400 Subject: [PATCH 1/7] working but mandatory --- src/gsheets_copy.cpp | 24 ++++++------------------ src/include/gsheets_copy.hpp | 31 +++++++++++++++++++++++++++++++ 2 files changed, 37 insertions(+), 18 deletions(-) diff --git a/src/gsheets_copy.cpp b/src/gsheets_copy.cpp index 38bf147..fd0052a 100644 --- a/src/gsheets_copy.cpp +++ b/src/gsheets_copy.cpp @@ -21,26 +21,12 @@ namespace duckdb copy_to_sink = GSheetWriteSink; } - struct GSheetCopyGlobalState : public GlobalFunctionData - { - explicit GSheetCopyGlobalState(ClientContext &context, const string &sheet_id, const string &token, const string &sheet_name) - : sheet_id(sheet_id), token(token), sheet_name(sheet_name) - { - } - - public: - string sheet_id; - string token; - string sheet_name; - }; - - struct GSheetWriteBindData : public TableFunctionData - { - }; unique_ptr GSheetCopyFunction::GSheetWriteBind(ClientContext &context, CopyFunctionBindInput &input, const vector &names, const vector &sql_types) { - return make_uniq(); + string file_path = input.info.file_path; + + return make_uniq(file_path, sql_types, names); } unique_ptr GSheetCopyFunction::GSheetWriteInitializeGlobal(ClientContext &context, FunctionData &bind_data, const string &file_path) @@ -96,9 +82,11 @@ namespace duckdb sheet_data["range"] = "Sheet1"; sheet_data["majorDimension"] = "ROWS"; - // TODO: Add column headers + vector headers = bind_data_p.Cast().options.name_list; vector> values; + values.push_back(headers); + for (idx_t r = 0; r < input.size(); r++) { vector row; diff --git a/src/include/gsheets_copy.hpp b/src/include/gsheets_copy.hpp index d762b08..2803939 100644 --- a/src/include/gsheets_copy.hpp +++ b/src/include/gsheets_copy.hpp @@ -6,6 +6,37 @@ namespace duckdb { + struct GSheetCopyGlobalState : public GlobalFunctionData + { + explicit GSheetCopyGlobalState(ClientContext &context, const string &sheet_id, const string &token, const string &sheet_name) + : sheet_id(sheet_id), token(token), sheet_name(sheet_name) + { + } + + public: + string sheet_id; + string token; + string sheet_name; + }; + + struct GSheetWriteOptions + { + vector name_list; + }; + + struct GSheetWriteBindData : public TableFunctionData + { + vector files; + GSheetWriteOptions options; + vector sql_types; + + GSheetWriteBindData(string file_path, vector sql_types, vector names) + : sql_types(std::move(sql_types)) + { + files.push_back(std::move(file_path)); + options.name_list = std::move(names); + } + }; class GSheetCopyFunction : public CopyFunction { From ff565c82362e2bdde212010be110ec247d6da8e1 Mon Sep 17 00:00:00 2001 From: Archie Wood <58074498+archiewood@users.noreply.github.com> Date: Fri, 25 Oct 2024 00:11:14 -0400 Subject: [PATCH 2/7] add test --- test/sql/copyto.test | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) create mode 100644 test/sql/copyto.test diff --git a/test/sql/copyto.test b/test/sql/copyto.test new file mode 100644 index 0000000..3f4b867 --- /dev/null +++ b/test/sql/copyto.test @@ -0,0 +1,35 @@ +# name: test/sql/copyto.test +# description: test COPY TO function +# group: [gsheets] + +require-env TOKEN + +require gsheets + +# Create a secret NB must substitute a token, do not commit! +statement ok +create secret test_secret (type gsheet, token '${TOKEN}'); + +# Create a table to copy to Google Sheet +statement ok +create table spreadsheets as +select 'Microsoft' as company, 'Excel' as product, 1985 as year_founded +union all +select 'Google', 'Google Sheets', 2006 +union all +select 'Apple', 'Numbers', 1984 +union all +select 'LibreOffice', 'Calc', 2000; + +# Copy the table to Google Sheet +statement ok +copy spreadsheets to 'https://docs.google.com/spreadsheets/d/11QdEasMWbETbFVxry-SsD8jVcdYIT1zBQszcF84MdE8/edit#gid=0' (format gsheet); + +# Read the table from Google Sheet +query III +from read_gsheet('https://docs.google.com/spreadsheets/d/11QdEasMWbETbFVxry-SsD8jVcdYIT1zBQszcF84MdE8/edit#gid=0'); +---- +Microsoft Excel 1985 +Google Google Sheets 2006 +Apple Numbers 1984 +LibreOffice Calc 2000 From 5a7281b5152c19c96ce0fa8bd78a44a9be070865 Mon Sep 17 00:00:00 2001 From: Archie Wood <58074498+archiewood@users.noreply.github.com> Date: Fri, 25 Oct 2024 00:12:12 -0400 Subject: [PATCH 3/7] update todos --- TODO.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/TODO.md b/TODO.md index 49dad11..ebadc16 100644 --- a/TODO.md +++ b/TODO.md @@ -1,7 +1,7 @@ # TODO ## Copy to -- [ ] header +- [x] header - [ ] sheet - [ ] types - [ ] implicit copy to when it sees a gsheets url From be908a831962f8b367d2955b76f63dc1e0c1500c Mon Sep 17 00:00:00 2001 From: Archie Wood <58074498+archiewood@users.noreply.github.com> Date: Fri, 25 Oct 2024 01:16:41 -0400 Subject: [PATCH 4/7] specify COPY TO sheet using url --- src/gsheets_copy.cpp | 34 ++++++++++++++++++++++++++++---- src/gsheets_read.cpp | 12 +++++------ src/gsheets_requests.cpp | 15 ++++++++++---- src/gsheets_utils.cpp | 19 ++++++++++++++---- src/include/gsheets_copy.hpp | 6 +++--- src/include/gsheets_read.hpp | 4 ++-- src/include/gsheets_requests.hpp | 5 +++-- src/include/gsheets_utils.hpp | 8 ++++++++ 8 files changed, 78 insertions(+), 25 deletions(-) diff --git a/src/gsheets_copy.cpp b/src/gsheets_copy.cpp index fd0052a..2e938f4 100644 --- a/src/gsheets_copy.cpp +++ b/src/gsheets_copy.cpp @@ -55,14 +55,25 @@ namespace duckdb } std::string token = token_value.ToString(); + std::string spreadsheet_id = extract_spreadsheet_id(file_path); std::string sheet_id = extract_sheet_id(file_path); std::string sheet_name = "Sheet1"; // TODO: make this configurable + std::string metadata_response = get_spreadsheet_metadata(spreadsheet_id, token); + json metadata = parseJson(metadata_response); + + for (const auto& sheet : metadata["sheets"]) { + if (sheet["properties"]["sheetId"].get() == std::stoi(sheet_id)) { + sheet_name = sheet["properties"]["title"].get(); + break; + } + } + // If writing, clear out the entire sheet first. // Do this here in the initialization so that it only happens once - std::string response = delete_sheet_data(sheet_id, token, sheet_name); + std::string response = delete_sheet_data(spreadsheet_id, token, sheet_name); - return make_uniq(context, sheet_id, token, sheet_name); + return make_uniq(context, spreadsheet_id, token, sheet_name); } unique_ptr GSheetCopyFunction::GSheetWriteInitializeLocal(ExecutionContext &context, FunctionData &bind_data_p) @@ -75,11 +86,26 @@ namespace duckdb input.Flatten(); auto &gstate = gstate_p.Cast(); + std::string sheet_id = extract_sheet_id(bind_data_p.Cast().files[0]); + + std::string metadata_response = get_spreadsheet_metadata(gstate.spreadsheet_id, gstate.token); + json metadata = parseJson(metadata_response); + + std::string sheet_name = "Sheet1"; + + for (const auto& sheet : metadata["sheets"]) { + if (sheet["properties"]["sheetId"].get() == std::stoi(sheet_id)) { + sheet_name = sheet["properties"]["title"].get(); + break; + } + } + + // Create object ready to write to Google Sheet json sheet_data; // TODO: make this configurable - sheet_data["range"] = "Sheet1"; + sheet_data["range"] = sheet_name; sheet_data["majorDimension"] = "ROWS"; vector headers = bind_data_p.Cast().options.name_list; @@ -123,7 +149,7 @@ namespace duckdb // Make the API call to write data to the Google Sheet // Today, this is only append. - std::string response = fetch_sheet_data(gstate.sheet_id, gstate.token, gstate.sheet_name, HttpMethod::POST, request_body); + std::string response = fetch_sheet_data(gstate.spreadsheet_id, gstate.token, sheet_name, HttpMethod::POST, request_body); // Check for errors in the response json response_json = parseJson(response); diff --git a/src/gsheets_read.cpp b/src/gsheets_read.cpp index 22ae90a..4bd1d02 100644 --- a/src/gsheets_read.cpp +++ b/src/gsheets_read.cpp @@ -10,9 +10,9 @@ namespace duckdb { using json = nlohmann::json; -ReadSheetBindData::ReadSheetBindData(string sheet_id, string token, bool header, string sheet_name) - : sheet_id(sheet_id), token(token), finished(false), row_index(0), header(header), sheet_name(sheet_name) { - response = fetch_sheet_data(sheet_id, token, sheet_name, HttpMethod::GET); +ReadSheetBindData::ReadSheetBindData(string spreadsheet_id, string token, bool header, string sheet_name) + : spreadsheet_id(spreadsheet_id), token(token), finished(false), row_index(0), header(header), sheet_name(sheet_name) { + response = fetch_sheet_data(spreadsheet_id, token, sheet_name, HttpMethod::GET); } @@ -96,8 +96,8 @@ unique_ptr ReadSheetBind(ClientContext &context, TableFunctionBind } } - // Extract the sheet ID from the input (URL or ID) - std::string sheet_id = extract_sheet_id(sheet_input); + // Extract the spreadsheet ID from the input (URL or ID) + std::string spreadsheet_id = extract_spreadsheet_id(sheet_input); // Use the SecretManager to get the token auto &secret_manager = SecretManager::Get(context); @@ -125,7 +125,7 @@ unique_ptr ReadSheetBind(ClientContext &context, TableFunctionBind std::string token = token_value.ToString(); - auto bind_data = make_uniq(sheet_id, token, header, sheet); + auto bind_data = make_uniq(spreadsheet_id, token, header, sheet); json cleanJson = parseJson(bind_data->response); SheetData sheet_data = getSheetData(cleanJson); diff --git a/src/gsheets_requests.cpp b/src/gsheets_requests.cpp index ec21e59..a8e6c0c 100644 --- a/src/gsheets_requests.cpp +++ b/src/gsheets_requests.cpp @@ -89,10 +89,10 @@ namespace duckdb return response; } - std::string fetch_sheet_data(const std::string &sheet_id, const std::string &token, const std::string &sheet_name, HttpMethod method, const std::string &body) + std::string fetch_sheet_data(const std::string &spreadsheet_id, const std::string &token, const std::string &sheet_name, HttpMethod method, const std::string &body) { std::string host = "sheets.googleapis.com"; - std::string path = "/v4/spreadsheets/" + sheet_id + "/values/" + sheet_name; + std::string path = "/v4/spreadsheets/" + spreadsheet_id + "/values/" + sheet_name; if (method == HttpMethod::POST) { path += ":append"; @@ -102,11 +102,18 @@ namespace duckdb return perform_https_request(host, path, token, method, body); } - std::string delete_sheet_data(const std::string &sheet_id, const std::string &token, const std::string &sheet_name) + std::string delete_sheet_data(const std::string &spreadsheet_id, const std::string &token, const std::string &sheet_name) { std::string host = "sheets.googleapis.com"; - std::string path = "/v4/spreadsheets/" + sheet_id + "/values/" + sheet_name + ":clear"; + std::string path = "/v4/spreadsheets/" + spreadsheet_id + "/values/" + sheet_name + ":clear"; return perform_https_request(host, path, token, HttpMethod::POST, "{}"); } + + std::string get_spreadsheet_metadata(const std::string &spreadsheet_id, const std::string &token) + { + std::string host = "sheets.googleapis.com"; + std::string path = "/v4/spreadsheets/" + spreadsheet_id + "?&fields=sheets.properties"; + return perform_https_request(host, path, token, HttpMethod::GET, ""); + } } diff --git a/src/gsheets_utils.cpp b/src/gsheets_utils.cpp index dcbd28b..803f4ce 100644 --- a/src/gsheets_utils.cpp +++ b/src/gsheets_utils.cpp @@ -7,18 +7,18 @@ using json = nlohmann::json; namespace duckdb { -std::string extract_sheet_id(const std::string& input) { +std::string extract_spreadsheet_id(const std::string& input) { // Check if the input is already a sheet ID (no slashes) if (input.find('/') == std::string::npos) { return input; } - // Regular expression to match the sheet ID in a Google Sheets URL + // Regular expression to match the spreadsheet ID in a Google Sheets URL if(input.find("docs.google.com/spreadsheets/d/") != std::string::npos) { - std::regex sheet_id_regex("/d/([a-zA-Z0-9-_]+)"); + std::regex spreadsheet_id_regex("/d/([a-zA-Z0-9-_]+)"); std::smatch match; - if (std::regex_search(input, match, sheet_id_regex) && match.size() > 1) { + if (std::regex_search(input, match, spreadsheet_id_regex) && match.size() > 1) { return match.str(1); } } @@ -26,6 +26,17 @@ std::string extract_sheet_id(const std::string& input) { throw duckdb::InvalidInputException("Invalid Google Sheets URL or ID"); } +std::string extract_sheet_id(const std::string& input) { + if (input.find("docs.google.com/spreadsheets/d/") != std::string::npos && input.find("edit?gid=") != std::string::npos) { + std::regex sheet_id_regex("gid=([0-9]+)"); + std::smatch match; + if (std::regex_search(input, match, sheet_id_regex) && match.size() > 1) { + return match.str(1); + } + } + throw duckdb::InvalidInputException("Invalid Google Sheets URL or ID"); +} + json parseJson(const std::string& json_str) { try { // Find the start of the JSON object diff --git a/src/include/gsheets_copy.hpp b/src/include/gsheets_copy.hpp index 2803939..09121fb 100644 --- a/src/include/gsheets_copy.hpp +++ b/src/include/gsheets_copy.hpp @@ -8,13 +8,13 @@ namespace duckdb { struct GSheetCopyGlobalState : public GlobalFunctionData { - explicit GSheetCopyGlobalState(ClientContext &context, const string &sheet_id, const string &token, const string &sheet_name) - : sheet_id(sheet_id), token(token), sheet_name(sheet_name) + explicit GSheetCopyGlobalState(ClientContext &context, const string &spreadsheet_id, const string &token, const string &sheet_name) + : spreadsheet_id(spreadsheet_id), token(token), sheet_name(sheet_name) { } public: - string sheet_id; + string spreadsheet_id; string token; string sheet_name; }; diff --git a/src/include/gsheets_read.hpp b/src/include/gsheets_read.hpp index 8dd8047..39c47f6 100644 --- a/src/include/gsheets_read.hpp +++ b/src/include/gsheets_read.hpp @@ -8,7 +8,7 @@ namespace duckdb { struct ReadSheetBindData : public TableFunctionData { - string sheet_id; + string spreadsheet_id; string token; bool finished; idx_t row_index; @@ -16,7 +16,7 @@ struct ReadSheetBindData : public TableFunctionData { bool header; string sheet_name; - ReadSheetBindData(string sheet_id, string token, bool header, string sheet_name); + ReadSheetBindData(string spreadsheet_id, string token, bool header, string sheet_name); }; void ReadSheetFunction(ClientContext &context, TableFunctionInput &data_p, DataChunk &output); diff --git a/src/include/gsheets_requests.hpp b/src/include/gsheets_requests.hpp index 4e6ec4a..417a11e 100644 --- a/src/include/gsheets_requests.hpp +++ b/src/include/gsheets_requests.hpp @@ -13,8 +13,9 @@ enum class HttpMethod { std::string perform_https_request(const std::string& host, const std::string& path, const std::string& token, HttpMethod method = HttpMethod::GET, const std::string& body = "", const std::string& content_type = "application/json"); -std::string fetch_sheet_data(const std::string& sheet_id, const std::string& token, const std::string& sheet_name, HttpMethod method = HttpMethod::GET, const std::string& body = ""); +std::string fetch_sheet_data(const std::string& spreadsheet_id, const std::string& token, const std::string& sheet_name, HttpMethod method = HttpMethod::GET, const std::string& body = ""); -std::string delete_sheet_data(const std::string& sheet_id, const std::string& token, const std::string& sheet_name); +std::string delete_sheet_data(const std::string& spreadsheet_id, const std::string& token, const std::string& sheet_name); +std::string get_spreadsheet_metadata(const std::string& spreadsheet_id, const std::string& token); } \ No newline at end of file diff --git a/src/include/gsheets_utils.hpp b/src/include/gsheets_utils.hpp index 8541547..6e353bb 100644 --- a/src/include/gsheets_utils.hpp +++ b/src/include/gsheets_utils.hpp @@ -15,6 +15,14 @@ namespace duckdb { * @return The extracted sheet ID * @throws InvalidInputException if the input is neither a valid URL nor a sheet ID */ +std::string extract_spreadsheet_id(const std::string& input); + + +/** + * Extracts the sheet ID from a Google Sheets URL + * @param input A Google Sheets URL + * @return The extracted sheet ID + */ std::string extract_sheet_id(const std::string& input); struct SheetData { From c5a52522df0d3e2dd47d4c4ce73f68fb8fb0b844 Mon Sep 17 00:00:00 2001 From: Archie Wood <58074498+archiewood@users.noreply.github.com> Date: Mon, 28 Oct 2024 22:45:59 -0400 Subject: [PATCH 5/7] clean up and add to read_gsheet --- src/gsheets_copy.cpp | 32 ++++++----------------- src/gsheets_read.cpp | 38 +++++++++++++++------------ src/gsheets_requests.cpp | 4 ++- src/gsheets_utils.cpp | 44 +++++++++++++++++++++++++++++--- src/include/gsheets_requests.hpp | 2 +- src/include/gsheets_utils.hpp | 33 +++++++++++++++++++++++- 6 files changed, 107 insertions(+), 46 deletions(-) diff --git a/src/gsheets_copy.cpp b/src/gsheets_copy.cpp index 2e938f4..1eda39a 100644 --- a/src/gsheets_copy.cpp +++ b/src/gsheets_copy.cpp @@ -57,23 +57,17 @@ namespace duckdb std::string token = token_value.ToString(); std::string spreadsheet_id = extract_spreadsheet_id(file_path); std::string sheet_id = extract_sheet_id(file_path); - std::string sheet_name = "Sheet1"; // TODO: make this configurable + std::string sheet_name = "Sheet1"; - std::string metadata_response = get_spreadsheet_metadata(spreadsheet_id, token); - json metadata = parseJson(metadata_response); + sheet_name = get_sheet_name_from_id(spreadsheet_id, sheet_id, token); - for (const auto& sheet : metadata["sheets"]) { - if (sheet["properties"]["sheetId"].get() == std::stoi(sheet_id)) { - sheet_name = sheet["properties"]["title"].get(); - break; - } - } + std::string encoded_sheet_name = url_encode(sheet_name); // If writing, clear out the entire sheet first. // Do this here in the initialization so that it only happens once - std::string response = delete_sheet_data(spreadsheet_id, token, sheet_name); + std::string response = delete_sheet_data(spreadsheet_id, token, encoded_sheet_name); - return make_uniq(context, spreadsheet_id, token, sheet_name); + return make_uniq(context, spreadsheet_id, token, encoded_sheet_name); } unique_ptr GSheetCopyFunction::GSheetWriteInitializeLocal(ExecutionContext &context, FunctionData &bind_data_p) @@ -88,23 +82,13 @@ namespace duckdb std::string sheet_id = extract_sheet_id(bind_data_p.Cast().files[0]); - std::string metadata_response = get_spreadsheet_metadata(gstate.spreadsheet_id, gstate.token); - json metadata = parseJson(metadata_response); - std::string sheet_name = "Sheet1"; - for (const auto& sheet : metadata["sheets"]) { - if (sheet["properties"]["sheetId"].get() == std::stoi(sheet_id)) { - sheet_name = sheet["properties"]["title"].get(); - break; - } - } - - + sheet_name = get_sheet_name_from_id(gstate.spreadsheet_id, sheet_id, gstate.token); + std::string encoded_sheet_name = url_encode(sheet_name); // Create object ready to write to Google Sheet json sheet_data; - // TODO: make this configurable sheet_data["range"] = sheet_name; sheet_data["majorDimension"] = "ROWS"; @@ -149,7 +133,7 @@ namespace duckdb // Make the API call to write data to the Google Sheet // Today, this is only append. - std::string response = fetch_sheet_data(gstate.spreadsheet_id, gstate.token, sheet_name, HttpMethod::POST, request_body); + std::string response = call_sheets_api(gstate.spreadsheet_id, gstate.token, encoded_sheet_name, HttpMethod::POST, request_body); // Check for errors in the response json response_json = parseJson(response); diff --git a/src/gsheets_read.cpp b/src/gsheets_read.cpp index 4bd1d02..13372b4 100644 --- a/src/gsheets_read.cpp +++ b/src/gsheets_read.cpp @@ -12,7 +12,7 @@ using json = nlohmann::json; ReadSheetBindData::ReadSheetBindData(string spreadsheet_id, string token, bool header, string sheet_name) : spreadsheet_id(spreadsheet_id), token(token), finished(false), row_index(0), header(header), sheet_name(sheet_name) { - response = fetch_sheet_data(spreadsheet_id, token, sheet_name, HttpMethod::GET); + response = call_sheets_api(spreadsheet_id, token, sheet_name, HttpMethod::GET); } @@ -81,20 +81,7 @@ unique_ptr ReadSheetBind(ClientContext &context, TableFunctionBind // Default values bool header = true; - string sheet = "Sheet1"; - - // Parse named parameters - for (auto &kv : input.named_parameters) { - if (kv.first == "header") { - try { - header = kv.second.GetValue(); - } catch (const std::exception& e) { - throw InvalidInputException("Invalid value for 'header' parameter. Expected a boolean value."); - } - } else if (kv.first == "sheet") { - sheet = kv.second.GetValue(); - } - } + string sheet_name = "Sheet1"; // Extract the spreadsheet ID from the input (URL or ID) std::string spreadsheet_id = extract_spreadsheet_id(sheet_input); @@ -125,7 +112,26 @@ unique_ptr ReadSheetBind(ClientContext &context, TableFunctionBind std::string token = token_value.ToString(); - auto bind_data = make_uniq(spreadsheet_id, token, header, sheet); + // Get sheet name from URL + std::string sheet_id = extract_sheet_id(sheet_input); + sheet_name = get_sheet_name_from_id(spreadsheet_id, sheet_id, token); + std::string encoded_sheet_name = url_encode(sheet_name); + + // Parse named parameters + for (auto &kv : input.named_parameters) { + if (kv.first == "header") { + try { + header = kv.second.GetValue(); + } catch (const std::exception& e) { + throw InvalidInputException("Invalid value for 'header' parameter. Expected a boolean value."); + } + } else if (kv.first == "sheet") { + sheet_name = kv.second.GetValue(); + } + } + + + auto bind_data = make_uniq(spreadsheet_id, token, header, encoded_sheet_name); json cleanJson = parseJson(bind_data->response); SheetData sheet_data = getSheetData(cleanJson); diff --git a/src/gsheets_requests.cpp b/src/gsheets_requests.cpp index a8e6c0c..2ac154a 100644 --- a/src/gsheets_requests.cpp +++ b/src/gsheets_requests.cpp @@ -62,6 +62,8 @@ namespace duckdb request += body; } + + if (BIO_write(bio, request.c_str(), request.length()) <= 0) { BIO_free_all(bio); @@ -89,7 +91,7 @@ namespace duckdb return response; } - std::string fetch_sheet_data(const std::string &spreadsheet_id, const std::string &token, const std::string &sheet_name, HttpMethod method, const std::string &body) + std::string call_sheets_api(const std::string &spreadsheet_id, const std::string &token, const std::string &sheet_name, HttpMethod method, const std::string &body) { std::string host = "sheets.googleapis.com"; std::string path = "/v4/spreadsheets/" + spreadsheet_id + "/values/" + sheet_name; diff --git a/src/gsheets_utils.cpp b/src/gsheets_utils.cpp index 803f4ce..978acbc 100644 --- a/src/gsheets_utils.cpp +++ b/src/gsheets_utils.cpp @@ -1,8 +1,10 @@ #include "gsheets_utils.hpp" +#include "gsheets_requests.hpp" #include "duckdb/common/exception.hpp" #include #include #include +#include using json = nlohmann::json; namespace duckdb { @@ -27,14 +29,36 @@ std::string extract_spreadsheet_id(const std::string& input) { } std::string extract_sheet_id(const std::string& input) { - if (input.find("docs.google.com/spreadsheets/d/") != std::string::npos && input.find("edit?gid=") != std::string::npos) { + if (input.find("docs.google.com/spreadsheets/d/") != std::string::npos && input.find("gid=") != std::string::npos) { std::regex sheet_id_regex("gid=([0-9]+)"); std::smatch match; if (std::regex_search(input, match, sheet_id_regex) && match.size() > 1) { return match.str(1); } } - throw duckdb::InvalidInputException("Invalid Google Sheets URL or ID"); + return "0"; +} + +std::string get_sheet_name_from_id(const std::string& spreadsheet_id, const std::string& sheet_id, const std::string& token) { + std::string metadata_response = get_spreadsheet_metadata(spreadsheet_id, token); + json metadata = parseJson(metadata_response); + for (const auto& sheet : metadata["sheets"]) { + if (sheet["properties"]["sheetId"].get() == std::stoi(sheet_id)) { + return sheet["properties"]["title"].get(); + } + } + throw duckdb::InvalidInputException("Sheet with ID %s not found", sheet_id); +} + +std::string get_sheet_id_from_name(const std::string& spreadsheet_id, const std::string& sheet_name, const std::string& token) { + std::string metadata_response = get_spreadsheet_metadata(spreadsheet_id, token); + json metadata = parseJson(metadata_response); + for (const auto& sheet : metadata["sheets"]) { + if (sheet["properties"]["title"].get() == sheet_name) { + return sheet["properties"]["sheetId"].get(); + } + } + throw duckdb::InvalidInputException("Sheet with name %s not found", sheet_name); } json parseJson(const std::string& json_str) { @@ -99,4 +123,18 @@ std::string generate_random_string(size_t length) { return result; } -} // namespace duckdb +std::string url_encode(const std::string& str) { + std::string encoded; + for (char c : str) { + if (isalnum(c) || c == '-' || c == '_' || c == '.' || c == '~') { + encoded += c; + } else { + std::stringstream ss; + ss << std::hex << std::uppercase << static_cast(static_cast(c)); + encoded += '%' + ss.str(); + } + } + return encoded; +} + +} // namespace duckdb \ No newline at end of file diff --git a/src/include/gsheets_requests.hpp b/src/include/gsheets_requests.hpp index 417a11e..8a487f5 100644 --- a/src/include/gsheets_requests.hpp +++ b/src/include/gsheets_requests.hpp @@ -13,7 +13,7 @@ enum class HttpMethod { std::string perform_https_request(const std::string& host, const std::string& path, const std::string& token, HttpMethod method = HttpMethod::GET, const std::string& body = "", const std::string& content_type = "application/json"); -std::string fetch_sheet_data(const std::string& spreadsheet_id, const std::string& token, const std::string& sheet_name, HttpMethod method = HttpMethod::GET, const std::string& body = ""); +std::string call_sheets_api(const std::string& spreadsheet_id, const std::string& token, const std::string& sheet_name, HttpMethod method = HttpMethod::GET, const std::string& body = ""); std::string delete_sheet_data(const std::string& spreadsheet_id, const std::string& token, const std::string& sheet_name); diff --git a/src/include/gsheets_utils.hpp b/src/include/gsheets_utils.hpp index 6e353bb..7f6ce19 100644 --- a/src/include/gsheets_utils.hpp +++ b/src/include/gsheets_utils.hpp @@ -25,6 +25,24 @@ std::string extract_spreadsheet_id(const std::string& input); */ std::string extract_sheet_id(const std::string& input); +/** + * Gets the sheet name from a spreadsheet ID and sheet ID + * @param spreadsheet_id The spreadsheet ID + * @param sheet_id The sheet ID + * @param token The Google API token + * @return The sheet name + */ +std::string get_sheet_name_from_id(const std::string& spreadsheet_id, const std::string& sheet_id, const std::string& token); + +/** + * Gets the sheet ID from a spreadsheet ID and sheet name + * @param spreadsheet_id The spreadsheet ID + * @param sheet_name The sheet name + * @param token The Google API token + * @return The sheet ID + */ +std::string get_sheet_id_from_name(const std::string& spreadsheet_id, const std::string& sheet_name, const std::string& token); + struct SheetData { std::string range; std::string majorDimension; @@ -33,6 +51,11 @@ struct SheetData { SheetData getSheetData(const json& j); +/** + * Parses a JSON string into a json object + * @param json_str The JSON string + * @return The parsed json object + */ json parseJson(const std::string& json_str); /** @@ -42,4 +65,12 @@ json parseJson(const std::string& json_str); */ std::string generate_random_string(size_t length); -} // namespace duckdb + +/** + * Encodes a string to be used in a URL + * @param str The string to encode + * @return The encoded string + */ +std::string url_encode(const std::string& str); + +} // namespace duckdb \ No newline at end of file From 1e912d92dd090a7d563d21bf51ce5f19330c43e0 Mon Sep 17 00:00:00 2001 From: Archie Wood <58074498+archiewood@users.noreply.github.com> Date: Mon, 28 Oct 2024 22:56:17 -0400 Subject: [PATCH 6/7] reorder, fix test --- src/gsheets_read.cpp | 2 +- test/sql/copyto.test | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/gsheets_read.cpp b/src/gsheets_read.cpp index 13372b4..1002b9c 100644 --- a/src/gsheets_read.cpp +++ b/src/gsheets_read.cpp @@ -115,7 +115,6 @@ unique_ptr ReadSheetBind(ClientContext &context, TableFunctionBind // Get sheet name from URL std::string sheet_id = extract_sheet_id(sheet_input); sheet_name = get_sheet_name_from_id(spreadsheet_id, sheet_id, token); - std::string encoded_sheet_name = url_encode(sheet_name); // Parse named parameters for (auto &kv : input.named_parameters) { @@ -130,6 +129,7 @@ unique_ptr ReadSheetBind(ClientContext &context, TableFunctionBind } } + std::string encoded_sheet_name = url_encode(sheet_name); auto bind_data = make_uniq(spreadsheet_id, token, header, encoded_sheet_name); diff --git a/test/sql/copyto.test b/test/sql/copyto.test index 3f4b867..5f90cf7 100644 --- a/test/sql/copyto.test +++ b/test/sql/copyto.test @@ -23,11 +23,11 @@ select 'LibreOffice', 'Calc', 2000; # Copy the table to Google Sheet statement ok -copy spreadsheets to 'https://docs.google.com/spreadsheets/d/11QdEasMWbETbFVxry-SsD8jVcdYIT1zBQszcF84MdE8/edit#gid=0' (format gsheet); +copy spreadsheets to 'https://docs.google.com/spreadsheets/d/11QdEasMWbETbFVxry-SsD8jVcdYIT1zBQszcF84MdE8/edit?gid=1295634987#gid=1295634987' (format gsheet); # Read the table from Google Sheet query III -from read_gsheet('https://docs.google.com/spreadsheets/d/11QdEasMWbETbFVxry-SsD8jVcdYIT1zBQszcF84MdE8/edit#gid=0'); +from read_gsheet('https://docs.google.com/spreadsheets/d/11QdEasMWbETbFVxry-SsD8jVcdYIT1zBQszcF84MdE8/edit?gid=1295634987#gid=1295634987'); ---- Microsoft Excel 1985 Google Google Sheets 2006 From 86db304e8e8c98256c923a8a53c37561313f2849 Mon Sep 17 00:00:00 2001 From: Archie Wood <58074498+archiewood@users.noreply.github.com> Date: Mon, 28 Oct 2024 23:03:50 -0400 Subject: [PATCH 7/7] rename tests --- TODO.md | 6 +++--- test/sql/{copyto.test => copy_to.test} | 2 +- test/sql/{gsheets.test => read_gsheet.test} | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) rename test/sql/{copyto.test => copy_to.test} (97%) rename test/sql/{gsheets.test => read_gsheet.test} (95%) diff --git a/TODO.md b/TODO.md index ebadc16..620230f 100644 --- a/TODO.md +++ b/TODO.md @@ -2,7 +2,7 @@ ## Copy to - [x] header -- [ ] sheet +- [x] sheet - [ ] types - [ ] implicit copy to when it sees a gsheets url - [x] warn when more than 2048 rows @@ -19,5 +19,5 @@ - [ ] Service Account keyfile ## Tests -- Tests for read_gsheet() -- Tests for copy to \ No newline at end of file +- [x] Tests for read_gsheet() +- [x] Tests for copy to diff --git a/test/sql/copyto.test b/test/sql/copy_to.test similarity index 97% rename from test/sql/copyto.test rename to test/sql/copy_to.test index 5f90cf7..6cc7108 100644 --- a/test/sql/copyto.test +++ b/test/sql/copy_to.test @@ -1,4 +1,4 @@ -# name: test/sql/copyto.test +# name: test/sql/copy_to.test # description: test COPY TO function # group: [gsheets] diff --git a/test/sql/gsheets.test b/test/sql/read_gsheet.test similarity index 95% rename from test/sql/gsheets.test rename to test/sql/read_gsheet.test index 0aecd3b..9c4fcf2 100644 --- a/test/sql/gsheets.test +++ b/test/sql/read_gsheet.test @@ -1,5 +1,5 @@ -# name: test/sql/gsheets.test -# description: test gsheets extension +# name: test/sql/read_gsheet.test +# description: test read_gsheet() function # group: [gsheets] require-env TOKEN