From 90dd8f44de766b54e1fb2a9aa2fc3d98a67cd336 Mon Sep 17 00:00:00 2001 From: Philip Sampaio Date: Thu, 17 Aug 2023 00:00:41 -0300 Subject: [PATCH 1/6] Move `:bucket` from S3 entry to config --- lib/fss/s3.ex | 39 +++++++++++++++++++++------------------ test/fss/s3_test.exs | 7 ++++++- 2 files changed, 27 insertions(+), 19 deletions(-) diff --git a/lib/fss/s3.ex b/lib/fss/s3.ex index 30f5552..6e2daf3 100644 --- a/lib/fss/s3.ex +++ b/lib/fss/s3.ex @@ -10,6 +10,7 @@ defmodule FSS.S3 do defstruct [ :access_key_id, + :bucket, :region, :secret_access_key, :endpoint, @@ -22,6 +23,7 @@ defmodule FSS.S3 do The attributes are: * `:access_key_id` - This attribute is required. + * `:bucket` - A valid bucket name. This attribute is required. * `:region` - This attribute is required. * `:secret_access_key` - This attribute is required. * `:endpoint` - This attribute is optional. If specified, then `:region` is ignored. @@ -31,6 +33,7 @@ defmodule FSS.S3 do """ @type t :: %__MODULE__{ access_key_id: String.t(), + bucket: String.t(), region: String.t(), secret_access_key: String.t(), endpoint: String.t() | nil, @@ -43,19 +46,17 @@ defmodule FSS.S3 do Represents the S3 resource itself. """ - defstruct [:bucket, :key, :config] + defstruct [:key, :config] @typedoc """ The entry struct for S3. The attributes are: - * `:bucket` - A valid bucket name. This attribute is required. * `:key` - A valid key for the resource. This attribute is required. * `:config` - A valid S3 config from the type `Config.t()`. This attribute is required. """ @type t :: %__MODULE__{ - bucket: String.t(), key: String.t(), config: Config.t() } @@ -77,6 +78,9 @@ defmodule FSS.S3 do - `AWS_SECRET_ACCESS_KEY` - `AWS_REGION` or `AWS_DEFAULT_REGION` - `AWS_SESSION_TOKEN` + + Although you can pass the `:bucket` as a configuration option, it's not + possible to override the bucket name from the URL. """ @spec parse(String.t(), Keyword.t()) :: {:ok, Entry.t()} | {:error, Exception.t()} def parse(url, opts \\ []) do @@ -89,23 +93,10 @@ defmodule FSS.S3 do config = opts |> Keyword.fetch!(:config) - |> case do - nil -> - config_from_system_env() - - %Config{} = config -> - config - - config when is_list(config) or is_map(config) -> - struct!(config_from_system_env(), config) - - other -> - raise ArgumentError, - "expect configuration to be a %FSS.S3.Config{} struct, a keyword list or a map. Instead got #{inspect(other)}" - end + |> normalize_config!() |> validate_config!() - {:ok, %Entry{bucket: bucket, key: key, config: config}} + {:ok, %Entry{key: key, config: %{config | bucket: bucket}}} _ -> {:error, @@ -124,6 +115,18 @@ defmodule FSS.S3 do config end + defp normalize_config!(nil), do: config_from_system_env() + defp normalize_config!(%Config{} = config), do: config + + defp normalize_config!(config) when is_list(config) or is_map(config) do + struct!(config_from_system_env(), config) + end + + defp normalize_config!(other) do + raise ArgumentError, + "expect configuration to be a %FSS.S3.Config{} struct, a keyword list or a map. Instead got #{inspect(other)}" + end + defp check!(config, key, env) do if Map.fetch!(config, key) in ["", nil] do raise ArgumentError, diff --git a/test/fss/s3_test.exs b/test/fss/s3_test.exs index 1d2d0c4..1470884 100644 --- a/test/fss/s3_test.exs +++ b/test/fss/s3_test.exs @@ -17,10 +17,11 @@ defmodule FSS.S3Test do end test "parses a s3:// style uri", %{config: config} do - assert {:ok, %Entry{bucket: "my-bucket", key: "my-file.png", config: %Config{} = config}} = + assert {:ok, %Entry{key: "my-file.png", config: %Config{} = config}} = S3.parse("s3://my-bucket/my-file.png", config: config) assert is_nil(config.endpoint) + assert config.bucket == "my-bucket" assert config.secret_access_key == "my-secret" assert config.access_key_id == "my-access" assert config.region == "us-west-2" @@ -38,6 +39,7 @@ defmodule FSS.S3Test do ) assert config.endpoint == "localhost" + assert config.bucket == "my-bucket" assert config.secret_access_key == "my-secret-1" assert config.access_key_id == "my-access-key-1" assert config.region == "eu-east-1" @@ -50,11 +52,14 @@ defmodule FSS.S3Test do endpoint: "localhost", secret_access_key: "my-secret-1", access_key_id: "my-access-key-1", + # We always ignore bucket from config. + bucket: "random-name", region: "eu-east-1" } ) assert config.endpoint == "localhost" + assert config.bucket == "my-bucket" assert config.secret_access_key == "my-secret-1" assert config.access_key_id == "my-access-key-1" assert config.region == "eu-east-1" From eb4f39f3df31357229c6a670c1df34c91c278c7a Mon Sep 17 00:00:00 2001 From: Philip Sampaio Date: Thu, 17 Aug 2023 14:18:02 -0300 Subject: [PATCH 2/6] Parse config from bucket URL --- lib/fss/s3.ex | 86 ++++++++++++++++++++++++++++++++++++++++++++ test/fss/s3_test.exs | 85 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 171 insertions(+) diff --git a/lib/fss/s3.ex b/lib/fss/s3.ex index 6e2daf3..f525973 100644 --- a/lib/fss/s3.ex +++ b/lib/fss/s3.ex @@ -146,4 +146,90 @@ defmodule FSS.S3 do token: System.get_env("AWS_SESSION_TOKEN") } end + + @doc """ + Parses configuration from a bucket URL. + + It expects an URL in the following formats: + + - `https://s3.[region].amazonaws.com/[bucket]` + - `https://[bucket].s3.[region].amazonaws.com` + - `https://my-custom-endpoint.com/[bucket]` + + If the URL is not in one of these formats, then an error + is returned. For custom endpoints (services that are compatible + with AWS S3), it's possible to pass the port and use HTTP. + + All the additional required configuration must be passed as + options to the `:config` option. + + ## Options + + * `:config` - It expects a `Config.t()` or a `Keyword.t()` with the keys + representing the attributes of the `Config.t()`. By default it is `nil`, + which means that we are going to try to fetch the credentials and configuration + from the system's environment variables. + + The following env vars are read: + + - `AWS_ACCESS_KEY_ID` + - `AWS_SECRET_ACCESS_KEY` + - `AWS_REGION` or `AWS_DEFAULT_REGION` + - `AWS_SESSION_TOKEN` + + Although you can pass the `:bucket` and `:region` as options, + they are always override by the values parsed from the URL. + """ + @spec parse_config_from_bucket_url(String.t()) :: {:ok, Config.t()} | {:error, Exception.t()} + def parse_config_from_bucket_url(bucket_url, opts \\ []) do + opts = Keyword.validate!(opts, config: nil) + + uri = URI.parse(bucket_url) + + case uri do + %{host: host, path: path} when is_binary(host) and (is_nil(path) or path == "/") -> + base_config = + opts + |> Keyword.fetch!(:config) + |> normalize_config!() + + case String.split(host, ".") do + [bucket, "s3", region, "amazonaws", "com"] -> + {:ok, %Config{base_config | bucket: bucket, region: region}} + + _other -> + {:error, + ArgumentError.exception( + "cannot extract bucket name from URL. Expected URL in the format " <> + "https://s3.[region].amazonaws.com/[bucket], got: " <> + URI.to_string(uri) + )} + end + + %{host: host, path: "/" <> bucket} when is_binary(host) -> + base_config = + opts + |> Keyword.fetch!(:config) + |> normalize_config!() + |> then(fn config -> %Config{config | bucket: bucket} end) + + case String.split(host, ".") do + ["s3", region, "amazonaws", "com"] -> + {:ok, %Config{base_config | region: region}} + + _other -> + endpoint_uri = %URI{uri | path: nil} + + {:ok, %Config{base_config | endpoint: URI.to_string(endpoint_uri)}} + end + + _ -> + {:error, + ArgumentError.exception( + "expected URL in the format " <> + "https://s3.[region].amazonaws.com/[bucket], got: " <> + URI.to_string(uri) + )} + end + end end diff --git a/test/fss/s3_test.exs b/test/fss/s3_test.exs index 1470884..89a4761 100644 --- a/test/fss/s3_test.exs +++ b/test/fss/s3_test.exs @@ -115,4 +115,89 @@ defmodule FSS.S3Test do end end end + + describe "parse_config_from_bucket_url/2" do + setup do + default_config = [ + secret_access_key: "my-secret", + access_key_id: "my-access" + ] + + [default_config: default_config] + end + + test "parses a path-style AWS S3 url", %{default_config: default_config} do + assert {:ok, config} = + S3.parse_config_from_bucket_url("https://s3.us-west-2.amazonaws.com/my-bucket", + config: default_config + ) + + assert %Config{} = config + assert config.region == "us-west-2" + assert config.bucket == "my-bucket" + assert is_nil(config.endpoint) + end + + test "parses a host-style AWS S3 url", %{default_config: default_config} do + assert {:ok, config} = + S3.parse_config_from_bucket_url("https://my-bucket-1.s3.us-west-2.amazonaws.com/", + config: default_config + ) + + assert %Config{} = config + assert config.region == "us-west-2" + assert config.bucket == "my-bucket-1" + assert is_nil(config.endpoint) + end + + test "parses a path-style S3 compatible url", %{default_config: default_config} do + assert {:ok, config} = + S3.parse_config_from_bucket_url("https://storage.googleapis.com/my-bucket-on-gcp", + config: default_config + ) + + assert %Config{} = config + assert is_nil(config.region) + assert config.bucket == "my-bucket-on-gcp" + assert config.endpoint == "https://storage.googleapis.com" + end + + test "parses a path-style S3 compatible url with a port", %{default_config: default_config} do + assert {:ok, config} = + S3.parse_config_from_bucket_url("http://localhost:4852/my-bucket-on-lh", + config: default_config + ) + + assert %Config{} = config + assert is_nil(config.region) + assert config.bucket == "my-bucket-on-lh" + assert config.endpoint == "http://localhost:4852" + end + + test "cannot extract bucket from host-style S3 url", %{default_config: default_config} do + assert {:error, error} = + S3.parse_config_from_bucket_url("https://my-bucket-on-gcp.storage.googleapis.com", + config: default_config + ) + + message = + "cannot extract bucket name from URL. Expected URL in the format " <> + "https://s3.[region].amazonaws.com/[bucket], got: " <> + "https://my-bucket-on-gcp.storage.googleapis.com" + + assert error == ArgumentError.exception(message) + end + + test "cannot parse url without host", %{default_config: default_config} do + assert {:error, error} = + S3.parse_config_from_bucket_url("/my-path", + config: default_config + ) + + message = + "expected URL in the format https://s3.[region].amazonaws.com/[bucket], got: /my-path" + + assert error == ArgumentError.exception(message) + end + end end From 6b5ecd2653d42b699449de205ac7b2b219f0b555 Mon Sep 17 00:00:00 2001 From: Philip Sampaio Date: Thu, 17 Aug 2023 16:34:18 -0300 Subject: [PATCH 3/6] Revert "Parse config from bucket URL" This reverts commit eb4f39f3df31357229c6a670c1df34c91c278c7a. --- lib/fss/s3.ex | 86 -------------------------------------------- test/fss/s3_test.exs | 85 ------------------------------------------- 2 files changed, 171 deletions(-) diff --git a/lib/fss/s3.ex b/lib/fss/s3.ex index f525973..6e2daf3 100644 --- a/lib/fss/s3.ex +++ b/lib/fss/s3.ex @@ -146,90 +146,4 @@ defmodule FSS.S3 do token: System.get_env("AWS_SESSION_TOKEN") } end - - @doc """ - Parses configuration from a bucket URL. - - It expects an URL in the following formats: - - - `https://s3.[region].amazonaws.com/[bucket]` - - `https://[bucket].s3.[region].amazonaws.com` - - `https://my-custom-endpoint.com/[bucket]` - - If the URL is not in one of these formats, then an error - is returned. For custom endpoints (services that are compatible - with AWS S3), it's possible to pass the port and use HTTP. - - All the additional required configuration must be passed as - options to the `:config` option. - - ## Options - - * `:config` - It expects a `Config.t()` or a `Keyword.t()` with the keys - representing the attributes of the `Config.t()`. By default it is `nil`, - which means that we are going to try to fetch the credentials and configuration - from the system's environment variables. - - The following env vars are read: - - - `AWS_ACCESS_KEY_ID` - - `AWS_SECRET_ACCESS_KEY` - - `AWS_REGION` or `AWS_DEFAULT_REGION` - - `AWS_SESSION_TOKEN` - - Although you can pass the `:bucket` and `:region` as options, - they are always override by the values parsed from the URL. - """ - @spec parse_config_from_bucket_url(String.t()) :: {:ok, Config.t()} | {:error, Exception.t()} - def parse_config_from_bucket_url(bucket_url, opts \\ []) do - opts = Keyword.validate!(opts, config: nil) - - uri = URI.parse(bucket_url) - - case uri do - %{host: host, path: path} when is_binary(host) and (is_nil(path) or path == "/") -> - base_config = - opts - |> Keyword.fetch!(:config) - |> normalize_config!() - - case String.split(host, ".") do - [bucket, "s3", region, "amazonaws", "com"] -> - {:ok, %Config{base_config | bucket: bucket, region: region}} - - _other -> - {:error, - ArgumentError.exception( - "cannot extract bucket name from URL. Expected URL in the format " <> - "https://s3.[region].amazonaws.com/[bucket], got: " <> - URI.to_string(uri) - )} - end - - %{host: host, path: "/" <> bucket} when is_binary(host) -> - base_config = - opts - |> Keyword.fetch!(:config) - |> normalize_config!() - |> then(fn config -> %Config{config | bucket: bucket} end) - - case String.split(host, ".") do - ["s3", region, "amazonaws", "com"] -> - {:ok, %Config{base_config | region: region}} - - _other -> - endpoint_uri = %URI{uri | path: nil} - - {:ok, %Config{base_config | endpoint: URI.to_string(endpoint_uri)}} - end - - _ -> - {:error, - ArgumentError.exception( - "expected URL in the format " <> - "https://s3.[region].amazonaws.com/[bucket], got: " <> - URI.to_string(uri) - )} - end - end end diff --git a/test/fss/s3_test.exs b/test/fss/s3_test.exs index 89a4761..1470884 100644 --- a/test/fss/s3_test.exs +++ b/test/fss/s3_test.exs @@ -115,89 +115,4 @@ defmodule FSS.S3Test do end end end - - describe "parse_config_from_bucket_url/2" do - setup do - default_config = [ - secret_access_key: "my-secret", - access_key_id: "my-access" - ] - - [default_config: default_config] - end - - test "parses a path-style AWS S3 url", %{default_config: default_config} do - assert {:ok, config} = - S3.parse_config_from_bucket_url("https://s3.us-west-2.amazonaws.com/my-bucket", - config: default_config - ) - - assert %Config{} = config - assert config.region == "us-west-2" - assert config.bucket == "my-bucket" - assert is_nil(config.endpoint) - end - - test "parses a host-style AWS S3 url", %{default_config: default_config} do - assert {:ok, config} = - S3.parse_config_from_bucket_url("https://my-bucket-1.s3.us-west-2.amazonaws.com/", - config: default_config - ) - - assert %Config{} = config - assert config.region == "us-west-2" - assert config.bucket == "my-bucket-1" - assert is_nil(config.endpoint) - end - - test "parses a path-style S3 compatible url", %{default_config: default_config} do - assert {:ok, config} = - S3.parse_config_from_bucket_url("https://storage.googleapis.com/my-bucket-on-gcp", - config: default_config - ) - - assert %Config{} = config - assert is_nil(config.region) - assert config.bucket == "my-bucket-on-gcp" - assert config.endpoint == "https://storage.googleapis.com" - end - - test "parses a path-style S3 compatible url with a port", %{default_config: default_config} do - assert {:ok, config} = - S3.parse_config_from_bucket_url("http://localhost:4852/my-bucket-on-lh", - config: default_config - ) - - assert %Config{} = config - assert is_nil(config.region) - assert config.bucket == "my-bucket-on-lh" - assert config.endpoint == "http://localhost:4852" - end - - test "cannot extract bucket from host-style S3 url", %{default_config: default_config} do - assert {:error, error} = - S3.parse_config_from_bucket_url("https://my-bucket-on-gcp.storage.googleapis.com", - config: default_config - ) - - message = - "cannot extract bucket name from URL. Expected URL in the format " <> - "https://s3.[region].amazonaws.com/[bucket], got: " <> - "https://my-bucket-on-gcp.storage.googleapis.com" - - assert error == ArgumentError.exception(message) - end - - test "cannot parse url without host", %{default_config: default_config} do - assert {:error, error} = - S3.parse_config_from_bucket_url("/my-path", - config: default_config - ) - - message = - "expected URL in the format https://s3.[region].amazonaws.com/[bucket], got: /my-path" - - assert error == ArgumentError.exception(message) - end - end end From e713ed5460ddb7c4b92b5eb046cd43783c9d1c6a Mon Sep 17 00:00:00 2001 From: Philip Sampaio Date: Thu, 17 Aug 2023 16:37:29 -0300 Subject: [PATCH 4/6] Make region and bucket as optionals, and set endpoint These are changes necessary to support the usage when only the bucket url is known. --- lib/fss/s3.ex | 42 ++++++++++++++++++++++++++++++++++-------- test/fss/s3_test.exs | 13 ++++++++++++- 2 files changed, 46 insertions(+), 9 deletions(-) diff --git a/lib/fss/s3.ex b/lib/fss/s3.ex index 6e2daf3..85988f0 100644 --- a/lib/fss/s3.ex +++ b/lib/fss/s3.ex @@ -23,19 +23,25 @@ defmodule FSS.S3 do The attributes are: * `:access_key_id` - This attribute is required. - * `:bucket` - A valid bucket name. This attribute is required. - * `:region` - This attribute is required. + + * `:bucket` - A valid bucket name. This attribute is optional, + but if is not provided, then it's assumed that the bucket is in the `:endpoint`. + + * `:region` - This attribute is optional. + * `:secret_access_key` - This attribute is required. + * `:endpoint` - This attribute is optional. If specified, then `:region` is ignored. This attribute is useful for when you are using a service that is compatible with the AWS S3 API. + * `:token` - This attribute is optional. """ @type t :: %__MODULE__{ access_key_id: String.t(), - bucket: String.t(), - region: String.t(), secret_access_key: String.t(), + bucket: String.t() | nil, + region: String.t() | nil, endpoint: String.t() | nil, token: String.t() | nil } @@ -79,8 +85,10 @@ defmodule FSS.S3 do - `AWS_REGION` or `AWS_DEFAULT_REGION` - `AWS_SESSION_TOKEN` - Although you can pass the `:bucket` as a configuration option, it's not - possible to override the bucket name from the URL. + In case the `:endpoint` is not configured, then we use the default host-style + URL from AWS, that is `https://[bucket-name].s3.[region].amazonaws.com`, unless + the bucket name contains dots, meaning that we can't use a virtual host, and + instead of use the path-style: `https://s3.[region].amazonaws.com/[bucket-name]`. """ @spec parse(String.t(), Keyword.t()) :: {:ok, Entry.t()} | {:error, Exception.t()} def parse(url, opts \\ []) do @@ -95,8 +103,26 @@ defmodule FSS.S3 do |> Keyword.fetch!(:config) |> normalize_config!() |> validate_config!() - - {:ok, %Entry{key: key, config: %{config | bucket: bucket}}} + |> then(fn %Config{} = config -> + config = %Config{config | bucket: bucket} + + if is_nil(config.endpoint) do + # We only use the path-style if the bucket name contain dots. + # The standard way is to use the virtual-host style. + endpoint = + if String.contains?(bucket, ".") do + "https://s3." <> config.region <> ".amazonaws.com/" <> bucket + else + "https://" <> bucket <> ".s3." <> config.region <> ".amazonaws.com" + end + + %Config{config | endpoint: endpoint} + else + config + end + end) + + {:ok, %Entry{key: key, config: config}} _ -> {:error, diff --git a/test/fss/s3_test.exs b/test/fss/s3_test.exs index 1470884..8f92113 100644 --- a/test/fss/s3_test.exs +++ b/test/fss/s3_test.exs @@ -20,13 +20,24 @@ defmodule FSS.S3Test do assert {:ok, %Entry{key: "my-file.png", config: %Config{} = config}} = S3.parse("s3://my-bucket/my-file.png", config: config) - assert is_nil(config.endpoint) + assert config.endpoint == "https://my-bucket.s3.us-west-2.amazonaws.com" assert config.bucket == "my-bucket" assert config.secret_access_key == "my-secret" assert config.access_key_id == "my-access" assert config.region == "us-west-2" end + test "parses a s3:// style uri with bucket name containing dots", %{config: config} do + assert {:ok, %Entry{key: "my-file.png", config: %Config{} = config}} = + S3.parse("s3://my.bucket.with.dots/my-file.png", config: config) + + assert config.endpoint == "https://s3.us-west-2.amazonaws.com/my.bucket.with.dots" + assert config.bucket == "my.bucket.with.dots" + assert config.secret_access_key == "my-secret" + assert config.access_key_id == "my-access" + assert config.region == "us-west-2" + end + test "accepts a config as a keyword list" do assert {:ok, %Entry{config: %Config{} = config}} = S3.parse("s3://my-bucket/my-file.png", From 9154e1d66a34057d228e88bd4483f4e527ce4919 Mon Sep 17 00:00:00 2001 From: Philip Sampaio Date: Sat, 19 Aug 2023 01:47:25 -0300 Subject: [PATCH 5/6] Better document the endpoint field for S3 Also make that attribute required in the typespecs. --- lib/fss/s3.ex | 35 ++++++++++++++++++++++++----------- 1 file changed, 24 insertions(+), 11 deletions(-) diff --git a/lib/fss/s3.ex b/lib/fss/s3.ex index 85988f0..2c36279 100644 --- a/lib/fss/s3.ex +++ b/lib/fss/s3.ex @@ -24,25 +24,35 @@ defmodule FSS.S3 do * `:access_key_id` - This attribute is required. + * `:secret_access_key` - This attribute is required. + * `:bucket` - A valid bucket name. This attribute is optional, - but if is not provided, then it's assumed that the bucket is in the `:endpoint`. + but if is not provided, then the `:endpoint` must include the bucket name + either in the host, as a virtual host, or in the path. - * `:region` - This attribute is optional. + * `:region` - This attribute is optional. It's normally required when working + with the official AWS S3 API. - * `:secret_access_key` - This attribute is required. + * `:endpoint` - If specified, then `:region` is ignored. + This attribute is required to be configured if you are using a service that + is compatible with the AWS S3 API. - * `:endpoint` - This attribute is optional. If specified, then `:region` is ignored. - This attribute is useful for when you are using a service that is compatible with - the AWS S3 API. + In case only a "bucket URL" - without discrimination of the bucket name - is provided + then the `:bucket` attribute can be nil just like the `:region`. + + In case the endpoint is not provided, we compute a valid one for the AWS S3 API. + This endpoint is going to follow the virtual-host style most of the time, with + the only exception being when the bucket name has dots. In that case we build + the AWS S3 endpoint with the bucket name as a path in the URL. * `:token` - This attribute is optional. """ @type t :: %__MODULE__{ access_key_id: String.t(), secret_access_key: String.t(), + endpoint: String.t(), bucket: String.t() | nil, region: String.t() | nil, - endpoint: String.t() | nil, token: String.t() | nil } end @@ -85,10 +95,13 @@ defmodule FSS.S3 do - `AWS_REGION` or `AWS_DEFAULT_REGION` - `AWS_SESSION_TOKEN` - In case the `:endpoint` is not configured, then we use the default host-style - URL from AWS, that is `https://[bucket-name].s3.[region].amazonaws.com`, unless - the bucket name contains dots, meaning that we can't use a virtual host, and - instead of use the path-style: `https://s3.[region].amazonaws.com/[bucket-name]`. + In case the endpoint is not provided, we compute a valid one for the AWS S3 API. + This endpoint is going to follow the virtual-host style most of the time, with + the only exception being when the bucket name has dots. In that case we build + the AWS S3 endpoint with the bucket name as a path in the URL. + + See https://docs.aws.amazon.com/AmazonS3/latest/userguide/access-bucket-intro.html + for more details. """ @spec parse(String.t(), Keyword.t()) :: {:ok, Entry.t()} | {:error, Exception.t()} def parse(url, opts \\ []) do From 7c0b3038c45fba39df6b80ae5d43370052d96b45 Mon Sep 17 00:00:00 2001 From: Philip Sampaio Date: Mon, 21 Aug 2023 21:54:59 -0300 Subject: [PATCH 6/6] Fix parsing of s3:// URL to consider empty bucket This is going to treat the endpoint differently depending on bucket name. It is also going to make `:bucket` nil if we can build a virtual-host style endpoint. The rules are more or less the following: * Endpoint is given * And bucket is an empty string (s3:///path): keep bucket nil * Otherwise keep both endpoint and bucket in config * Endpoint is nil * Bucket has dots: keep the endpoint _without_ the bucket, fill in bucket field * Bucket does not have dots: keep the endpoint with bucket, bucket is nil --- lib/fss/s3.ex | 38 ++++++++++++++++++++++++-------------- test/fss/s3_test.exs | 34 ++++++++++++++++++++++++++++++++-- 2 files changed, 56 insertions(+), 16 deletions(-) diff --git a/lib/fss/s3.ex b/lib/fss/s3.ex index 2c36279..892a160 100644 --- a/lib/fss/s3.ex +++ b/lib/fss/s3.ex @@ -28,7 +28,11 @@ defmodule FSS.S3 do * `:bucket` - A valid bucket name. This attribute is optional, but if is not provided, then the `:endpoint` must include the bucket name - either in the host, as a virtual host, or in the path. + either in the host, as a virtual host, or in the path. In other words: + if the bucket is not given, then `:endpoint` must be configured. + + This attribute is going to be set to `nil` if the endpoint was + not provided, unless the bucket name contain dots in it. * `:region` - This attribute is optional. It's normally required when working with the official AWS S3 API. @@ -43,7 +47,7 @@ defmodule FSS.S3 do In case the endpoint is not provided, we compute a valid one for the AWS S3 API. This endpoint is going to follow the virtual-host style most of the time, with the only exception being when the bucket name has dots. In that case we build - the AWS S3 endpoint with the bucket name as a path in the URL. + the AWS S3 endpoint without the bucket name in it. * `:token` - This attribute is optional. """ @@ -95,10 +99,9 @@ defmodule FSS.S3 do - `AWS_REGION` or `AWS_DEFAULT_REGION` - `AWS_SESSION_TOKEN` - In case the endpoint is not provided, we compute a valid one for the AWS S3 API. - This endpoint is going to follow the virtual-host style most of the time, with - the only exception being when the bucket name has dots. In that case we build - the AWS S3 endpoint with the bucket name as a path in the URL. + In case the endpoint is not provided, we compute a valid one for the AWS S3 API, + That is going to follow the path style. The endpoint is not going to include the + `:bucket` in it, being necessary to do that when using this FSS entry. See https://docs.aws.amazon.com/AmazonS3/latest/userguide/access-bucket-intro.html for more details. @@ -111,6 +114,8 @@ defmodule FSS.S3 do case uri do %{scheme: "s3", host: bucket, path: "/" <> key} when is_binary(bucket) -> + bucket = if bucket != "", do: bucket + config = opts |> Keyword.fetch!(:config) @@ -119,23 +124,28 @@ defmodule FSS.S3 do |> then(fn %Config{} = config -> config = %Config{config | bucket: bucket} - if is_nil(config.endpoint) do - # We only use the path-style if the bucket name contain dots. - # The standard way is to use the virtual-host style. - endpoint = + if is_nil(config.endpoint) and not is_nil(bucket) do + s3_host_suffix = "s3." <> config.region <> ".amazonaws.com" + + # We consume the bucket name in the endpoint if there is no dots in it. + {endpoint, bucket} = if String.contains?(bucket, ".") do - "https://s3." <> config.region <> ".amazonaws.com/" <> bucket + {"https://" <> s3_host_suffix, bucket} else - "https://" <> bucket <> ".s3." <> config.region <> ".amazonaws.com" + {"https://" <> bucket <> "." <> s3_host_suffix, nil} end - %Config{config | endpoint: endpoint} + %Config{config | endpoint: endpoint, bucket: bucket} else config end end) - {:ok, %Entry{key: key, config: config}} + if is_nil(config.endpoint) do + {:error, ArgumentError.exception("endpoint is required when bucket is nil")} + else + {:ok, %Entry{key: key, config: config}} + end _ -> {:error, diff --git a/test/fss/s3_test.exs b/test/fss/s3_test.exs index 8f92113..678edbe 100644 --- a/test/fss/s3_test.exs +++ b/test/fss/s3_test.exs @@ -21,7 +21,7 @@ defmodule FSS.S3Test do S3.parse("s3://my-bucket/my-file.png", config: config) assert config.endpoint == "https://my-bucket.s3.us-west-2.amazonaws.com" - assert config.bucket == "my-bucket" + assert config.bucket == nil assert config.secret_access_key == "my-secret" assert config.access_key_id == "my-access" assert config.region == "us-west-2" @@ -31,13 +31,43 @@ defmodule FSS.S3Test do assert {:ok, %Entry{key: "my-file.png", config: %Config{} = config}} = S3.parse("s3://my.bucket.with.dots/my-file.png", config: config) - assert config.endpoint == "https://s3.us-west-2.amazonaws.com/my.bucket.with.dots" + assert config.endpoint == "https://s3.us-west-2.amazonaws.com" assert config.bucket == "my.bucket.with.dots" assert config.secret_access_key == "my-secret" assert config.access_key_id == "my-access" assert config.region == "us-west-2" end + test "parses a s3:// style uri without bucket name but passing endpoint", %{config: config} do + assert {:ok, %Entry{key: "my-file.png", config: %Config{} = config}} = + S3.parse("s3:///my-file.png", + config: %{ + config + | endpoint: "https://my-custom-endpoint.example.com/my-custom-bucket" + } + ) + + assert config.endpoint == "https://my-custom-endpoint.example.com/my-custom-bucket" + assert config.bucket == nil + assert config.secret_access_key == "my-secret" + assert config.access_key_id == "my-access" + assert config.region == "us-west-2" + end + + test "does not parse a s3:// style uri without bucket and without an endpoint", %{ + config: config + } do + assert {:error, error} = + S3.parse("s3:///my-file.png", + config: %{ + config + | endpoint: nil + } + ) + + assert error == ArgumentError.exception("endpoint is required when bucket is nil") + end + test "accepts a config as a keyword list" do assert {:ok, %Entry{config: %Config{} = config}} = S3.parse("s3://my-bucket/my-file.png",