diff --git a/lib/explorer/fss/s3.ex b/lib/explorer/fss/s3.ex index 0efac2c60..a37604ead 100644 --- a/lib/explorer/fss/s3.ex +++ b/lib/explorer/fss/s3.ex @@ -16,15 +16,19 @@ defimpl Explorer.FSS, for: FSS.S3.Entry do end defp url(%FSS.S3.Entry{} = entry) do + config = entry.config + + uri = URI.parse(config.endpoint) + uri = - if endpoint = entry.config.endpoint do - URI.parse(endpoint) + if is_nil(config.bucket) do + uri else - URI.parse("https://s3.#{entry.config.region}.amazonaws.com") + append_path(uri, "/" <> config.bucket) end uri - |> append_path("/" <> entry.bucket <> "/" <> entry.key) + |> append_path("/" <> entry.key) |> URI.to_string() end diff --git a/lib/explorer/polars_backend/shared.ex b/lib/explorer/polars_backend/shared.ex index 8df5e5e94..1fc5b0ec2 100644 --- a/lib/explorer/polars_backend/shared.ex +++ b/lib/explorer/polars_backend/shared.ex @@ -185,8 +185,11 @@ defmodule Explorer.PolarsBackend.Shared do the `System.tmp_dir()`. """ def build_path_for_entry(%FSS.S3.Entry{} = entry) do + bucket = entry.config.bucket || "default-explorer-bucket" + hash = - :crypto.hash(:sha256, entry.bucket <> "/" <> entry.key) |> Base.url_encode64(padding: false) + :crypto.hash(:sha256, entry.config.endpoint <> "/" <> bucket <> "/" <> entry.key) + |> Base.url_encode64(padding: false) id = "s3-file-#{hash}" diff --git a/mix.exs b/mix.exs index 9a27ba69d..3d571c323 100644 --- a/mix.exs +++ b/mix.exs @@ -38,7 +38,7 @@ defmodule Explorer.MixProject do [ {:aws_signature, "~> 0.3"}, {:castore, "~> 1.0"}, - {:fss, github: "elixir-explorer/fss"}, + {:fss, github: "elixir-explorer/fss", branch: "main"}, {:rustler_precompiled, "~> 0.6"}, {:table, "~> 0.1.2"}, {:adbc, "~> 0.1", optional: true}, diff --git a/mix.lock b/mix.lock index 6a28d5bdf..45674eae7 100644 --- a/mix.lock +++ b/mix.lock @@ -14,7 +14,7 @@ "earmark_parser": {:hex, :earmark_parser, "1.4.33", "3c3fd9673bb5dcc9edc28dd90f50c87ce506d1f71b70e3de69aa8154bc695d44", [:mix], [], "hexpm", "2d526833729b59b9fdb85785078697c72ac5e5066350663e5be6a1182da61b8f"}, "elixir_make": {:hex, :elixir_make, "0.7.7", "7128c60c2476019ed978210c245badf08b03dbec4f24d05790ef791da11aa17c", [:mix], [{:castore, "~> 0.1 or ~> 1.0", [hex: :castore, repo: "hexpm", optional: true]}], "hexpm", "5bc19fff950fad52bbe5f211b12db9ec82c6b34a9647da0c2224b8b8464c7e6c"}, "ex_doc": {:hex, :ex_doc, "0.30.3", "bfca4d340e3b95f2eb26e72e4890da83e2b3a5c5b0e52607333bf5017284b063", [:mix], [{:earmark_parser, "~> 1.4.31", [hex: :earmark_parser, repo: "hexpm", optional: false]}, {:makeup_elixir, "~> 0.14", [hex: :makeup_elixir, repo: "hexpm", optional: false]}, {:makeup_erlang, "~> 0.1", [hex: :makeup_erlang, repo: "hexpm", optional: false]}], "hexpm", "fbc8702046c1d25edf79de376297e608ac78cdc3a29f075484773ad1718918b6"}, - "fss": {:git, "https://github.com/elixir-explorer/fss.git", "417a1b6716c89d6a59cae01340ed0aff7e5b1164", []}, + "fss": {:git, "https://github.com/elixir-explorer/fss.git", "19ed6ce8359e9790e818b732ba8d552aaf36e029", [branch: "main"]}, "jason": {:hex, :jason, "1.4.1", "af1504e35f629ddcdd6addb3513c3853991f694921b1b9368b0bd32beb9f1b63", [:mix], [{:decimal, "~> 1.0 or ~> 2.0", [hex: :decimal, repo: "hexpm", optional: true]}], "hexpm", "fbb01ecdfd565b56261302f7e1fcc27c4fb8f32d56eab74db621fc154604a7a1"}, "makeup": {:hex, :makeup, "1.1.0", "6b67c8bc2882a6b6a445859952a602afc1a41c2e08379ca057c0f525366fc3ca", [:mix], [{:nimble_parsec, "~> 1.2.2 or ~> 1.3", [hex: :nimble_parsec, repo: "hexpm", optional: false]}], "hexpm", "0a45ed501f4a8897f580eabf99a2e5234ea3e75a4373c8a52824f6e873be57a6"}, "makeup_elixir": {:hex, :makeup_elixir, "0.16.1", "cc9e3ca312f1cfeccc572b37a09980287e243648108384b97ff2b76e505c3555", [:mix], [{:makeup, "~> 1.0", [hex: :makeup, repo: "hexpm", optional: false]}, {:nimble_parsec, "~> 1.2.3 or ~> 1.3", [hex: :nimble_parsec, repo: "hexpm", optional: false]}], "hexpm", "e127a341ad1b209bd80f7bd1620a15693a9908ed780c3b763bccf7d200c767c6"}, diff --git a/native/explorer/src/dataframe/io.rs b/native/explorer/src/dataframe/io.rs index d4a7707fa..8977154cc 100644 --- a/native/explorer/src/dataframe/io.rs +++ b/native/explorer/src/dataframe/io.rs @@ -262,13 +262,20 @@ fn build_aws_s3_cloud_writer( ) -> Result { let config = ex_entry.config; let mut aws_builder = object_store::aws::AmazonS3Builder::new() - .with_region(config.region) - .with_bucket_name(ex_entry.bucket) - .with_access_key_id(config.access_key_id) - .with_secret_access_key(config.secret_access_key); - - if let Some(endpoint) = config.endpoint { - aws_builder = aws_builder.with_allow_http(true).with_endpoint(endpoint); + .with_region(&config.region) + .with_access_key_id(&config.access_key_id) + .with_secret_access_key(&config.secret_access_key) + .with_allow_http(true) + .with_endpoint(&config.endpoint); + + if let Some(bucket_name) = &config.bucket { + aws_builder = aws_builder.with_bucket_name(bucket_name); + } else { + // We use the virtual host style, and the bucket name is going to be ignored + // because it's assumed to be already in the endpoint URL. + aws_builder = aws_builder + .with_bucket_name("explorer-default-bucket-name") + .with_virtual_hosted_style_request(true); } if let Some(token) = config.token { diff --git a/native/explorer/src/datatypes.rs b/native/explorer/src/datatypes.rs index f47a9f367..23a46af4a 100644 --- a/native/explorer/src/datatypes.rs +++ b/native/explorer/src/datatypes.rs @@ -421,21 +421,25 @@ pub struct ExS3Config { pub access_key_id: String, pub secret_access_key: String, pub region: String, - pub endpoint: Option, + pub endpoint: String, + pub bucket: Option, pub token: Option, } #[derive(NifStruct, Clone, Debug)] #[module = "FSS.S3.Entry"] pub struct ExS3Entry { - pub bucket: String, pub key: String, pub config: ExS3Config, } impl fmt::Display for ExS3Entry { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!(f, "s3://{}/{}", self.bucket, self.key) + if let Some(bucket_name) = &self.config.bucket { + write!(f, "s3://{}/{}", bucket_name, self.key) + } else { + write!(f, "s3://default-explorer-bucket/{}", self.key) + } } } @@ -450,13 +454,18 @@ impl ExS3Config { (S3Key::from_str("aws_allow_http").unwrap(), &true_as_string), ]; - if let Some(endpoint) = &self.endpoint { - aws_opts.push((S3Key::Endpoint, endpoint)) - } + aws_opts.push((S3Key::Endpoint, &self.endpoint)); if let Some(token) = &self.token { aws_opts.push((S3Key::Token, token)) } + + // When bucket is not present, we need to force the virtual host style + // in order to ignore the bucket name. + if self.bucket.is_none() { + aws_opts.push((S3Key::VirtualHostedStyleRequest, &true_as_string)); + } + CloudOptions::default().with_aws(aws_opts) } } diff --git a/test/explorer/data_frame/csv_test.exs b/test/explorer/data_frame/csv_test.exs index 28fd5aa94..6d4cc039e 100644 --- a/test/explorer/data_frame/csv_test.exs +++ b/test/explorer/data_frame/csv_test.exs @@ -531,6 +531,27 @@ defmodule Explorer.DataFrame.CSVTest do assert {:error, %ArgumentError{message: "resource not found (404)"}} = DF.from_csv(path, config: s3_config) end + + @tag :cloud_integration + test "writes a CSV file to endpoint ignoring bucket name", %{df: df} do + config = %FSS.S3.Config{ + access_key_id: "test", + secret_access_key: "test", + endpoint: "http://localhost:4566/test-bucket", + bucket: nil, + region: "us-east-1" + } + + entry = %FSS.S3.Entry{ + key: "wine-yolo-#{System.monotonic_time()}.csv", + config: config + } + + assert :ok = DF.to_csv(df, entry) + + saved_df = DF.from_csv!(entry) + assert DF.to_columns(saved_df) == DF.to_columns(Explorer.Datasets.wine()) + end end describe "from_csv/2 - HTTP" do