Skip to content

Commit

Permalink
working COPY TO
Browse files Browse the repository at this point in the history
  • Loading branch information
archiewood committed Oct 17, 2024
1 parent a83ef21 commit 6d81d69
Show file tree
Hide file tree
Showing 10 changed files with 132 additions and 67 deletions.
65 changes: 55 additions & 10 deletions src/gsheets_copy.cpp
Original file line number Diff line number Diff line change
@@ -1,7 +1,11 @@
#include "gsheets_copy.hpp"
#include "gsheets_requests.hpp"
#include "gsheets_auth.hpp"
#include "gsheets_utils.hpp"

#include "duckdb/common/serializer/buffered_file_writer.hpp"
#include "duckdb/common/file_system.hpp"
#include <iostream>
#include "duckdb/main/secret/secret_manager.hpp"
#include <json.hpp>

using json = nlohmann::json;
Expand All @@ -19,12 +23,15 @@ namespace duckdb

struct GSheetCopyGlobalState : public GlobalFunctionData
{
explicit GSheetCopyGlobalState(ClientContext &context)
explicit GSheetCopyGlobalState(ClientContext &context, const string &sheet_id, const string &token, const string &sheet_name)
: sheet_id(sheet_id), token(token), sheet_name(sheet_name)
{
}

public:
unique_ptr<BufferedFileWriter> file_writer;
string sheet_id;
string token;
string sheet_name;
};

struct GSheetWriteBindData : public TableFunctionData
Expand All @@ -38,10 +45,34 @@ namespace duckdb

unique_ptr<GlobalFunctionData> GSheetCopyFunction::GSheetWriteInitializeGlobal(ClientContext &context, FunctionData &bind_data, const string &file_path)
{
auto result = make_uniq<GSheetCopyGlobalState>(context);
auto &fs = FileSystem::GetFileSystem(context);
result->file_writer = make_uniq<BufferedFileWriter>(fs, file_path);
return std::move(result);
auto &secret_manager = SecretManager::Get(context);
auto transaction = CatalogTransaction::GetSystemCatalogTransaction(context);
auto secret_match = secret_manager.LookupSecret(transaction, "gsheet", "gsheet");

if (!secret_match.HasMatch()) {
throw InvalidInputException("No 'gsheet' secret found. Please create a secret with 'CREATE SECRET' first.");
}

auto &secret = secret_match.GetSecret();
if (secret.GetType() != "gsheet") {
throw InvalidInputException("Invalid secret type. Expected 'gsheet', got '%s'", secret.GetType());
}

const auto *kv_secret = dynamic_cast<const KeyValueSecret*>(&secret);
if (!kv_secret) {
throw InvalidInputException("Invalid secret format for 'gsheet' secret");
}

Value token_value;
if (!kv_secret->TryGetValue("token", token_value)) {
throw InvalidInputException("'token' not found in 'gsheet' secret");
}

std::string token = token_value.ToString();
std::string sheet_id = extract_sheet_id(file_path);
std::string sheet_name = "Sheet1"; // TODO: make this configurable

return make_uniq<GSheetCopyGlobalState>(context, sheet_id, token, sheet_name);
}

unique_ptr<LocalFunctionData> GSheetCopyFunction::GSheetWriteInitializeLocal(ExecutionContext &context, FunctionData &bind_data_p)
Expand All @@ -52,9 +83,13 @@ namespace duckdb
void GSheetCopyFunction::GSheetWriteSink(ExecutionContext &context, FunctionData &bind_data_p, GlobalFunctionData &gstate_p, LocalFunctionData &lstate, DataChunk &input)
{
input.Flatten();
auto &gstate = gstate_p.Cast<GSheetCopyGlobalState>();

// Create object ready to write to Google Sheet
json sheet_data;
sheet_data["range"] = "A1"; // Assuming we start from A1, adjust as needed

// TODO: make this configurable
sheet_data["range"] = "Sheet1";
sheet_data["majorDimension"] = "ROWS";

vector<vector<string>> values;
Expand Down Expand Up @@ -89,6 +124,16 @@ namespace duckdb
}
sheet_data["values"] = values;

std::cout << sheet_data.dump() << std::endl;
// Convert the JSON object to a string
std::string request_body = sheet_data.dump();

// Make the API call to write data to the Google Sheet
std::string response = fetch_sheet_data(gstate.sheet_id, gstate.token, gstate.sheet_name, HttpMethod::PUT, request_body);

// Check for errors in the response
json response_json = parseJson(response);
if (response_json.contains("error")) {
throw duckdb::IOException("Error writing to Google Sheet: " + response_json["error"]["message"].get<std::string>());
}
}
} // namespace duckdb
} // namespace duckdb
1 change: 1 addition & 0 deletions src/gsheets_extension.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ static void LoadInternal(DatabaseInstance &instance) {
SSL_library_init();
SSL_load_error_strings();
OpenSSL_add_all_algorithms();


// Register read_gsheet table function
TableFunction read_gsheet_function("read_gsheet", {LogicalType::VARCHAR}, ReadSheetFunction, ReadSheetBind);
Expand Down
50 changes: 5 additions & 45 deletions src/gsheets_read.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,49 +12,7 @@ using json = nlohmann::json;

ReadSheetBindData::ReadSheetBindData(string sheet_id, string token, bool header, string sheet_name)
: sheet_id(sheet_id), token(token), finished(false), row_index(0), header(header), sheet_name(sheet_name) {
response = fetch_sheet_data(sheet_id, token, sheet_name);
}

SheetData parseJson(const std::string& json_str) {
SheetData result;
try {
// Find the start of the JSON object
size_t start = json_str.find('{');
if (start == std::string::npos) {
throw std::runtime_error("No JSON object found in the response");
}

// Find the end of the JSON object
size_t end = json_str.rfind('}');
if (end == std::string::npos) {
throw std::runtime_error("No closing brace found in the JSON response");
}

// Extract the JSON object
std::string clean_json = json_str.substr(start, end - start + 1);

json j = json::parse(clean_json);

if (j.contains("range") && j.contains("majorDimension") && j.contains("values")) {
result.range = j["range"].get<std::string>();
result.majorDimension = j["majorDimension"].get<std::string>();
result.values = j["values"].get<std::vector<std::vector<std::string>>>();
} else if (j.contains("error")) {
string message = j["error"]["message"].get<std::string>();
int code = j["error"]["code"].get<int>();
throw std::runtime_error("Google Sheets API error: " + std::to_string(code) + " - " + message);
} else {
std::cerr << "JSON does not contain expected fields" << std::endl;
std::cerr << "Raw JSON string: " << json_str << std::endl;
throw;
}
} catch (const json::exception& e) {
std::cerr << "JSON parsing error: " << e.what() << std::endl;
std::cerr << "Raw JSON string: " << json_str << std::endl;
throw;
}

return result;
response = fetch_sheet_data(sheet_id, token, sheet_name, HttpMethod::GET);
}


Expand All @@ -65,7 +23,8 @@ void ReadSheetFunction(ClientContext &context, TableFunctionInput &data_p, DataC
return;
}

SheetData sheet_data = parseJson(bind_data.response);
json cleanJson = parseJson(bind_data.response);
SheetData sheet_data = getSheetData(cleanJson);

idx_t row_count = 0;
idx_t column_count = output.ColumnCount();
Expand Down Expand Up @@ -168,7 +127,8 @@ unique_ptr<FunctionData> ReadSheetBind(ClientContext &context, TableFunctionBind

auto bind_data = make_uniq<ReadSheetBindData>(sheet_id, token, header, sheet);

SheetData sheet_data = parseJson(bind_data->response);
json cleanJson = parseJson(bind_data->response);
SheetData sheet_data = getSheetData(cleanJson);

if (!sheet_data.values.empty()) {
idx_t start_index = header ? 1 : 0;
Expand Down
12 changes: 9 additions & 3 deletions src/gsheets_requests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,7 @@ namespace duckdb
BIO_free_all(bio);
SSL_CTX_free(ctx);

// Extract body from response
// Extract body from response
size_t body_start = response.find("\r\n\r\n");
if (body_start != std::string::npos)
Expand All @@ -90,11 +91,16 @@ namespace duckdb
return response;
}

std::string fetch_sheet_data(const std::string &sheet_id, const std::string &token, const std::string &sheet_name)
std::string fetch_sheet_data(const std::string &sheet_id, const std::string &token, const std::string &sheet_name, HttpMethod method, const std::string &body)
{
std::string host = "sheets.googleapis.com";
std::string path = "/v4/spreadsheets/" + sheet_id + "/values/" + sheet_name;

return perform_https_request(host, path, token);
if (method == HttpMethod::PUT) {
path += "?valueInputOption=RAW";
}

return perform_https_request(host, path, token, method, body);
return perform_https_request(host, path, token, method, body);
}
}
}
47 changes: 47 additions & 0 deletions src/gsheets_utils.cpp
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
#include "gsheets_utils.hpp"
#include "duckdb/common/exception.hpp"
#include <regex>
#include <json.hpp>
#include <iostream>

using json = nlohmann::json;
namespace duckdb {

std::string extract_sheet_id(const std::string& input) {
Expand All @@ -23,4 +26,48 @@ std::string extract_sheet_id(const std::string& input) {
throw duckdb::InvalidInputException("Invalid Google Sheets URL or ID");
}

json parseJson(const std::string& json_str) {
try {
// Find the start of the JSON object
size_t start = json_str.find('{');
if (start == std::string::npos) {
throw std::runtime_error("No JSON object found in the response");
}

// Find the end of the JSON object
size_t end = json_str.rfind('}');
if (end == std::string::npos) {
throw std::runtime_error("No closing brace found in the JSON response");
}

// Extract the JSON object
std::string clean_json = json_str.substr(start, end - start + 1);

json j = json::parse(clean_json);
return j;
} catch (const json::exception& e) {
std::cerr << "JSON parsing error: " << e.what() << std::endl;
std::cerr << "Raw JSON string: " << json_str << std::endl;
throw;
}
}

SheetData getSheetData(const json& j) {
SheetData result;
if (j.contains("range") && j.contains("majorDimension") && j.contains("values")) {
result.range = j["range"].get<std::string>();
result.majorDimension = j["majorDimension"].get<std::string>();
result.values = j["values"].get<std::vector<std::vector<std::string>>>();
} else if (j.contains("error")) {
string message = j["error"]["message"].get<std::string>();
int code = j["error"]["code"].get<int>();
throw std::runtime_error("Google Sheets API error: " + std::to_string(code) + " - " + message);
} else {
std::cerr << "JSON does not contain expected fields" << std::endl;
std::cerr << "Raw JSON string: " << j.dump() << std::endl;
throw;
}
return result;
}

} // namespace duckdb
8 changes: 0 additions & 8 deletions src/include/gsheets_read.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,14 +19,6 @@ struct ReadSheetBindData : public TableFunctionData {
ReadSheetBindData(string sheet_id, string token, bool header, string sheet_name);
};

struct SheetData {
std::string range;
std::string majorDimension;
std::vector<std::vector<std::string>> values;
};

SheetData parseJson(const std::string& json_str);

void ReadSheetFunction(ClientContext &context, TableFunctionInput &data_p, DataChunk &output);

unique_ptr<FunctionData> ReadSheetBind(ClientContext &context, TableFunctionBindInput &input,
Expand Down
2 changes: 1 addition & 1 deletion src/include/gsheets_requests.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,5 +13,5 @@ enum class HttpMethod {
std::string perform_https_request(const std::string& host, const std::string& path, const std::string& token,
HttpMethod method = HttpMethod::GET, const std::string& body = "");

std::string fetch_sheet_data(const std::string& sheet_id, const std::string& token, const std::string& sheet_name);
std::string fetch_sheet_data(const std::string& sheet_id, const std::string& token, const std::string& sheet_name, HttpMethod method = HttpMethod::GET, const std::string& body = "");
}
14 changes: 14 additions & 0 deletions src/include/gsheets_utils.hpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
#pragma once

#include <string>
#include <vector>
#include <json.hpp>

using json = nlohmann::json;

namespace duckdb {

Expand All @@ -12,4 +16,14 @@ namespace duckdb {
*/
std::string extract_sheet_id(const std::string& input);

struct SheetData {
std::string range;
std::string majorDimension;
std::vector<std::vector<std::string>> values;
};

SheetData getSheetData(const json& j);

json parseJson(const std::string& json_str);

} // namespace duckdb
Empty file removed tmp_value
Empty file.
Empty file removed value
Empty file.

0 comments on commit 6d81d69

Please sign in to comment.