-
Notifications
You must be signed in to change notification settings - Fork 56
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Move httpfs extension outside of duckdb and rename to cached_httpfs
* Rename/modify all occurrences of httpfs * Skip clang format for folder
- Loading branch information
Showing
22 changed files
with
3,946 additions
and
5 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
DisableFormat: true | ||
SortIncludes: false |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,51 @@ | ||
cmake_minimum_required(VERSION 2.8.12...3.29) | ||
|
||
project(CachedHTTPFsExtension) | ||
|
||
add_extension_definitions() | ||
|
||
include_directories(include ../duckdb/third_party/httplib ../duckdb/parquet/include) | ||
|
||
build_static_extension( | ||
cached_httpfs | ||
hffs.cpp | ||
s3fs.cpp | ||
httpfs.cpp | ||
http_state.cpp | ||
crypto.cpp | ||
create_secret_functions.cpp | ||
cached_httpfs_extension.cpp) | ||
|
||
set(PARAMETERS "-warnings") | ||
build_loadable_extension( | ||
cached_httpfs | ||
${PARAMETERS} | ||
hffs.cpp | ||
s3fs.cpp | ||
httpfs.cpp | ||
http_state.cpp | ||
crypto.cpp | ||
create_secret_functions.cpp | ||
cached_httpfs_extension.cpp) | ||
|
||
if(MINGW) | ||
set(OPENSSL_USE_STATIC_LIBS TRUE) | ||
endif() | ||
|
||
find_package(OpenSSL REQUIRED) | ||
include_directories(${OPENSSL_INCLUDE_DIR}) | ||
target_link_libraries(cached_httpfs_loadable_extension duckdb_mbedtls | ||
${OPENSSL_LIBRARIES}) | ||
target_link_libraries(cached_httpfs_extension duckdb_mbedtls ${OPENSSL_LIBRARIES}) | ||
|
||
if(MINGW) | ||
find_package(ZLIB) | ||
target_link_libraries(cached_httpfs_loadable_extension ZLIB::ZLIB -lcrypt32) | ||
target_link_libraries(cached_httpfs_extension ZLIB::ZLIB -lcrypt32) | ||
endif() | ||
|
||
install( | ||
TARGETS cached_httpfs_extension | ||
EXPORT "${DUCKDB_EXPORT_SET}" | ||
LIBRARY DESTINATION "${INSTALL_LIB_DIR}" | ||
ARCHIVE DESTINATION "${INSTALL_LIB_DIR}") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
Documentation on S3 tests setup can be found [here](../../test/sql/copy/s3/README.md) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,104 @@ | ||
#define DUCKDB_EXTENSION_MAIN | ||
|
||
#include "cached_httpfs_extension.hpp" | ||
|
||
#include "create_secret_functions.hpp" | ||
#include "duckdb.hpp" | ||
#include "s3fs.hpp" | ||
#include "hffs.hpp" | ||
#include "crypto.hpp" | ||
|
||
namespace duckdb { | ||
|
||
static void LoadInternal(DatabaseInstance &instance) { | ||
S3FileSystem::Verify(); // run some tests to see if all the hashes work out | ||
auto &fs = instance.GetFileSystem(); | ||
|
||
fs.RegisterSubSystem(make_uniq<HTTPFileSystem>()); | ||
fs.RegisterSubSystem(make_uniq<HuggingFaceFileSystem>()); | ||
fs.RegisterSubSystem(make_uniq<S3FileSystem>(BufferManager::GetBufferManager(instance))); | ||
|
||
auto &config = DBConfig::GetConfig(instance); | ||
|
||
// Global HTTP config | ||
// Single timeout value is used for all 4 types of timeouts, we could split it into 4 if users need that | ||
config.AddExtensionOption("http_timeout", "HTTP timeout read/write/connection/retry", LogicalType::UBIGINT, | ||
Value(30000)); | ||
config.AddExtensionOption("http_retries", "HTTP retries on I/O error", LogicalType::UBIGINT, Value(3)); | ||
config.AddExtensionOption("http_retry_wait_ms", "Time between retries", LogicalType::UBIGINT, Value(100)); | ||
config.AddExtensionOption("force_download", "Forces upfront download of file", LogicalType::BOOLEAN, Value(false)); | ||
// Reduces the number of requests made while waiting, for example retry_wait_ms of 50 and backoff factor of 2 will | ||
// result in wait times of 0 50 100 200 400...etc. | ||
config.AddExtensionOption("http_retry_backoff", "Backoff factor for exponentially increasing retry wait time", | ||
LogicalType::FLOAT, Value(4)); | ||
config.AddExtensionOption( | ||
"http_keep_alive", | ||
"Keep alive connections. Setting this to false can help when running into connection failures", | ||
LogicalType::BOOLEAN, Value(true)); | ||
config.AddExtensionOption("enable_server_cert_verification", "Enable server side certificate verification.", | ||
LogicalType::BOOLEAN, Value(false)); | ||
config.AddExtensionOption("ca_cert_file", "Path to a custom certificate file for self-signed certificates.", | ||
LogicalType::VARCHAR, Value("")); | ||
// Global S3 config | ||
config.AddExtensionOption("s3_region", "S3 Region", LogicalType::VARCHAR, Value("us-east-1")); | ||
config.AddExtensionOption("s3_access_key_id", "S3 Access Key ID", LogicalType::VARCHAR); | ||
config.AddExtensionOption("s3_secret_access_key", "S3 Access Key", LogicalType::VARCHAR); | ||
config.AddExtensionOption("s3_session_token", "S3 Session Token", LogicalType::VARCHAR); | ||
config.AddExtensionOption("s3_endpoint", "S3 Endpoint", LogicalType::VARCHAR); | ||
config.AddExtensionOption("s3_url_style", "S3 URL style", LogicalType::VARCHAR, Value("vhost")); | ||
config.AddExtensionOption("s3_use_ssl", "S3 use SSL", LogicalType::BOOLEAN, Value(true)); | ||
config.AddExtensionOption("s3_url_compatibility_mode", "Disable Globs and Query Parameters on S3 URLs", | ||
LogicalType::BOOLEAN, Value(false)); | ||
|
||
// S3 Uploader config | ||
config.AddExtensionOption("s3_uploader_max_filesize", "S3 Uploader max filesize (between 50GB and 5TB)", | ||
LogicalType::VARCHAR, "800GB"); | ||
config.AddExtensionOption("s3_uploader_max_parts_per_file", "S3 Uploader max parts per file (between 1 and 10000)", | ||
LogicalType::UBIGINT, Value(10000)); | ||
config.AddExtensionOption("s3_uploader_thread_limit", "S3 Uploader global thread limit", LogicalType::UBIGINT, | ||
Value(50)); | ||
|
||
// HuggingFace options | ||
config.AddExtensionOption("hf_max_per_page", "Debug option to limit number of items returned in list requests", | ||
LogicalType::UBIGINT, Value::UBIGINT(0)); | ||
|
||
auto provider = make_uniq<AWSEnvironmentCredentialsProvider>(config); | ||
provider->SetAll(); | ||
|
||
CreateS3SecretFunctions::Register(instance); | ||
CreateBearerTokenFunctions::Register(instance); | ||
// set pointer to OpenSSL encryption state | ||
config.encryption_util = make_shared_ptr<AESGCMStateSSLFactory>(); | ||
} | ||
|
||
void CachedHttpfsExtension::Load(DuckDB &db) { | ||
LoadInternal(*db.instance); | ||
} | ||
std::string CachedHttpfsExtension::Name() { | ||
return "cached_httpfs"; | ||
} | ||
|
||
std::string CachedHttpfsExtension::Version() const { | ||
#ifdef EXT_VERSION_HTTPFS | ||
return EXT_VERSION_HTTPFS; | ||
#else | ||
return ""; | ||
#endif | ||
} | ||
|
||
} // namespace duckdb | ||
|
||
extern "C" { | ||
|
||
DUCKDB_EXTENSION_API void cached_httpfs_init(duckdb::DatabaseInstance &db) { | ||
LoadInternal(db); | ||
} | ||
|
||
DUCKDB_EXTENSION_API const char *cached_httpfs_version() { | ||
return duckdb::DuckDB::LibraryVersion(); | ||
} | ||
} | ||
|
||
#ifndef DUCKDB_EXTENSION_MAIN | ||
#error DUCKDB_EXTENSION_MAIN not defined | ||
#endif |
Oops, something went wrong.