Skip to content

Commit 3d971df

Browse files
authored
Cache byte range requests (#215). Fixes #188
# What A potential fix for #188 When the `Range` header is supplied: * NGINX will perform subrequests to s3 in byte ranges of `PROXY_CACHE_SLICE_SIZE` until the requested range is satisfied * Cache will be populated in slices of `PROXY_CACHE_SLICE_SIZE`. * Only the requested byte range will be cached When the `Range` header is not supplied: * Normal behavior - files will be cached in their entirety * For large files, `proxy_cache_lock` ensures that multiple requests for the same file are not cached multiple times. Requests received after the initial `MISS` will queue until they can be served from the cache (the initial request cache write is complete). ## Implementation Details * This solution takes advantage of the existing [redirectToS3](https://github.com/nginxinc/nginx-s3-gateway/blob/656395c2b2cc8aaf79a78b59b4abbe5b5d04a5a3/common/etc/nginx/include/s3gateway.js#L347) function to change the target NGINX conf location based on the presence of the `Range` header * The main configuration for the s3 proxy action has been broken out into `common/etc/nginx/templates/gateway/s3_location_common.conf.template` * A separate cache is defined for the slice-based caching * In the slice caching location, the [http_slice_module](http://nginx.org/en/docs/http/ngx_http_slice_module.html) is configured and other caching options overridden as necessary. ## Examples ### Normal Request ```bash curl -o foo.txt localhost:8989/a/5mb.txt % Total % Received % Xferd Average Speed Time Time Time Current Dload Upload Total Spent Left Speed 100 5120k 100 5120k 0 0 111M 0 --:--:-- --:--:-- --:--:-- 113M ``` A single cache file is created ```bash root@f339daeb2d44:/var/cache/nginx/s3_proxy# tree . . `-- 5 `-- 9e `-- 447b5a707c18a4c0e90344925e6b39e5 ``` The size of the cache file is equal to the requested file: ```bash root@f339daeb2d44:/var/cache/nginx/s3_proxy# du -h . 5.1M ./5/9e 5.1M ./5 5.1M . ``` ### Byte Range Request In this example, I'm requesting a 5mb file, and the `PROXY_CACHE_SLICE_SIZE` option has been set to `1000k` (1000 [kilobytes](http://nginx.org/en/docs/syntax.html)) ```bash curl -o foo.txt -r 1000000-4000000 localhost:8989/a/5mb.txt % Total % Received % Xferd Average Speed Time Time Time Current Dload Upload Total Spent Left Speed 100 2929k 100 2929k 0 0 66.8M 0 --:--:-- --:--:-- --:--:-- 68.1M ``` Cache files are created in chunks: ```bash root@f339daeb2d44:/var/cache/nginx/s3_proxy_slices# tree . . |-- 0 | `-- 5c | `-- 18f94c01f7a1beed3afe0aa92baf05c0 |-- 4 | `-- 30 | `-- 9fac913edc79622fdcc2975d91e4f304 |-- b | `-- 5b | `-- 91bfb9ef86136be4b07cdc2eb51025bb `-- d `-- 82 `-- 339384e3e9840cf7f8fe4e54fdc8182d ``` The size of each cache file is roughly equal to the requested file the chunk size: ```bash root@f339daeb2d44:/var/cache/nginx/s3_proxy_slices# du -h . 1008K ./d/82 1012K ./d 1008K ./0/5c 1012K ./0 1008K ./b/5b 1012K ./b 1008K ./4/30 1012K ./4 4.0M . ```
1 parent 2d3e306 commit 3d971df

13 files changed

+127
-46
lines changed

Dockerfile.buildkit.plus

+1
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ ENV XSLT_VERSION 30-1
77

88
ENV PROXY_CACHE_MAX_SIZE "10g"
99
ENV PROXY_CACHE_INACTIVE "60m"
10+
ENV PROXY_CACHE_SLICE_SIZE "1m"
1011
ENV PROXY_CACHE_VALID_OK "1h"
1112
ENV PROXY_CACHE_VALID_NOTFOUND "1m"
1213
ENV PROXY_CACHE_VALID_FORBIDDEN "30s"

Dockerfile.oss

+1
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ ENV NJS_VERSION "0.8.2"
55

66
ENV PROXY_CACHE_MAX_SIZE "10g"
77
ENV PROXY_CACHE_INACTIVE "60m"
8+
ENV PROXY_CACHE_SLICE_SIZE "1m"
89
ENV PROXY_CACHE_VALID_OK "1h"
910
ENV PROXY_CACHE_VALID_NOTFOUND "1m"
1011
ENV PROXY_CACHE_VALID_FORBIDDEN "30s"

Dockerfile.plus

+1
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ ENV XSLT_VERSION 30-1
77

88
ENV PROXY_CACHE_MAX_SIZE "10g"
99
ENV PROXY_CACHE_INACTIVE "60m"
10+
ENV PROXY_CACHE_SLICE_SIZE "1m"
1011
ENV PROXY_CACHE_VALID_OK "1h"
1112
ENV PROXY_CACHE_VALID_NOTFOUND "1m"
1213
ENV PROXY_CACHE_VALID_FORBIDDEN "30s"

common/etc/nginx/include/s3gateway.js

+6-1
Original file line numberDiff line numberDiff line change
@@ -362,7 +362,12 @@ function redirectToS3(r) {
362362
} else if (!ALLOW_LISTING && !PROVIDE_INDEX_PAGE && uriPath === "/") {
363363
r.internalRedirect("@error404");
364364
} else {
365-
r.internalRedirect("@s3");
365+
if (r.headersIn["Range"]) {
366+
r.internalRedirect("@s3_sliced");
367+
} else {
368+
r.internalRedirect("@s3");
369+
}
370+
366371
}
367372
}
368373

common/etc/nginx/nginx.conf

+2
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,9 @@ env APPEND_SLASH_FOR_POSSIBLE_DIRECTORY;
2626
env DIRECTORY_LISTING_PATH_PREFIX;
2727
env PROXY_CACHE_MAX_SIZE;
2828
env PROXY_CACHE_INACTIVE;
29+
env PROXY_CACHE_SLICE_SIZE;
2930
env PROXY_CACHE_VALID_OK;
31+
env PROXY_CACHE_SLICE_SIZE;
3032
env PROXY_CACHE_VALID_NOTFOUND;
3133
env PROXY_CACHE_VALID_FORBIDDEN;
3234
env HEADER_PREFIXES_TO_STRIP;

common/etc/nginx/templates/cache.conf.template

+8
Original file line numberDiff line numberDiff line change
@@ -6,3 +6,11 @@ keys_zone=s3_cache:10m
66
max_size=$PROXY_CACHE_MAX_SIZE
77
inactive=$PROXY_CACHE_INACTIVE
88
use_temp_path=off;
9+
10+
11+
proxy_cache_path /var/cache/nginx/s3_proxy_slices
12+
levels=1:2
13+
keys_zone=s3_cache_slices:10m
14+
max_size=$PROXY_CACHE_MAX_SIZE
15+
inactive=$PROXY_CACHE_INACTIVE
16+
use_temp_path=off;

common/etc/nginx/templates/default.conf.template

+14-43
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,7 @@ server {
8383
# CORS is implemented by returning the appropriate headers as part of
8484
# the response to an OPTIONS request. If you want to customize the
8585
# CORS response, the cors.conf.template file can be overwritten and
86-
# extended to meet one's needs.
86+
# extended to meet your needs.
8787
include /etc/nginx/conf.d/gateway/cors.conf;
8888

8989
auth_request /aws/credentials/retrieve;
@@ -101,51 +101,22 @@ server {
101101
include /etc/nginx/conf.d/gateway/js_fetch_trusted_certificate.conf;
102102
}
103103

104+
# This is the primary location that proxies the request to s3
105+
# See the included s3_location_common.conf file for all logic
104106
location @s3 {
105-
# We include only the headers needed for the authentication signatures that
106-
# we plan to use.
107-
include /etc/nginx/conf.d/gateway/v${AWS_SIGS_VERSION}_headers.conf;
108-
109-
# The CORS configuration needs to be imported in several places in order for
110-
# it to be applied within different contexts.
111-
include /etc/nginx/conf.d/gateway/cors.conf;
112-
113-
# Don't allow any headers from the client - we don't want them messing
114-
# with S3 at all.
115-
proxy_pass_request_headers off;
116-
117-
# Enable passing of the server name through TLS Server Name Indication extension.
118-
proxy_ssl_server_name on;
119-
proxy_ssl_name ${S3_SERVER};
120-
121-
# Set the Authorization header to the AWS Signatures credentials
122-
proxy_set_header Authorization $s3auth;
123-
proxy_set_header X-Amz-Security-Token $awsSessionToken;
124-
125-
# We set the host as the bucket name to inform the S3 API of the bucket
126-
proxy_set_header Host $s3_host_hdr;
127-
128-
# Use keep alive connections in order to improve performance
129-
proxy_http_version 1.1;
130-
proxy_set_header Connection '';
131-
132-
# We strip off all of the AWS specific headers from the server so that
133-
# there is nothing identifying the object as having originated in an
134-
# object store.
135-
js_header_filter s3gateway.editHeaders;
136-
137-
# Catch all errors from S3 and sanitize them so that the user can't
138-
# gain intelligence about the S3 bucket being proxied.
139-
proxy_intercept_errors on;
140-
141-
# Comment out this line to receive the error messages returned by S3
142-
error_page 400 401 402 403 405 406 407 408 409 410 411 412 413 414 415 416 417 418 420 422 423 424 426 428 429 431 444 449 450 451 500 501 502 503 504 505 506 507 508 509 510 511 =404 @error404;
143-
144-
error_page 404 @trailslashControl;
107+
include /etc/nginx/conf.d/gateway/s3_location_common.conf;
108+
}
145109

146-
proxy_pass ${S3_SERVER_PROTO}://storage_urls$s3uri;
110+
# Same as the primary location above but handling and caching
111+
# byte range requests efficiently
112+
location @s3_sliced {
113+
proxy_cache s3_cache_slices;
114+
proxy_cache_valid 200 302 206 ${PROXY_CACHE_VALID_OK};
115+
proxy_cache_key "$request_method$host$uri$slice_range";
147116

148-
include /etc/nginx/conf.d/gateway/s3_location.conf;
117+
slice ${PROXY_CACHE_SLICE_SIZE};
118+
proxy_set_header Range $slice_range;
119+
include /etc/nginx/conf.d/gateway/s3_location_common.conf;
149120
}
150121

151122
location @s3PreListing {
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
# We include only the headers needed for the authentication signatures that
2+
# we plan to use.
3+
include /etc/nginx/conf.d/gateway/v${AWS_SIGS_VERSION}_headers.conf;
4+
5+
# The CORS configuration needs to be imported in several places in order for
6+
# it to be applied within different contexts.
7+
include /etc/nginx/conf.d/gateway/cors.conf;
8+
9+
# Don't allow any headers from the client - we don't want them messing
10+
# with S3 at all.
11+
proxy_pass_request_headers off;
12+
13+
# Enable passing of the server name through TLS Server Name Indication extension.
14+
proxy_ssl_server_name on;
15+
proxy_ssl_name ${S3_SERVER};
16+
17+
# Set the Authorization header to the AWS Signatures credentials
18+
proxy_set_header Authorization $s3auth;
19+
proxy_set_header X-Amz-Security-Token $awsSessionToken;
20+
21+
# We set the host as the bucket name to inform the S3 API of the bucket
22+
proxy_set_header Host $s3_host_hdr;
23+
24+
# Use keep alive connections in order to improve performance
25+
proxy_http_version 1.1;
26+
proxy_set_header Connection '';
27+
28+
# We strip off all of the AWS specific headers from the server so that
29+
# there is nothing identifying the object as having originated in an
30+
# object store.
31+
js_header_filter s3gateway.editHeaders;
32+
33+
# Catch all errors from S3 and sanitize them so that the user can't
34+
# gain intelligence about the S3 bucket being proxied.
35+
proxy_intercept_errors on;
36+
37+
# Comment out this line to receive the error messages returned by S3
38+
error_page 400 401 402 403 405 406 407 408 409 410 411 412 413 414 415 416 417 418 420 422 423 424 426 428 429 431 444 449 450 451 500 501 502 503 504 505 506 507 508 509 510 511 =404 @error404;
39+
40+
error_page 404 @trailslashControl;
41+
42+
proxy_pass ${S3_SERVER_PROTO}://storage_urls$s3uri;
43+
44+
include /etc/nginx/conf.d/gateway/s3_location.conf;

docs/getting_started.md

+13-1
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,8 @@ running as a Container or as a Systemd service.
3131
| `DIRECTORY_LISTING_PATH_PREFIX` | No | | | In `ALLOW_DIRECTORY_LIST=true` mode [adds defined prefix to links](#configuring-directory-listing) |
3232
| `DNS_RESOLVERS` | No | | | DNS resolvers (separated by single spaces) to configure NGINX with |
3333
| `PROXY_CACHE_MAX_SIZE` | No | | `10g` | Limits cache size |
34-
| `PROXY_CACHE_INACTIVE` | No | | `60m` | Cached data that are not accessed during the time specified by the parameter get removed from the cache regardless of their freshness |
34+
| `PROXY_CACHE_INACTIVE` | No | | `60m` | Cached data that are not accessed during the time specified by the parameter get removed from the cache regardless of their freshness
35+
| `PROXY_CACHE_SLICE_SIZE` | No | | `1m` | For requests with a `Range` header included, determines the size of the chunks in which the file is fetched. Values much smaller than the requests can lead to inefficiencies due to reading and writing many files. See [below for more details](#byte-range-requests-and-caching) | |
3536
| `PROXY_CACHE_VALID_OK` | No | | `1h` | Sets caching time for response code 200 and 302 |
3637
| `PROXY_CACHE_VALID_NOTFOUND` | No | | `1m` | Sets caching time for response code 404 |
3738
| `PROXY_CACHE_VALID_FORBIDDEN` | No | | `30s` | Sets caching time for response code 403 |
@@ -112,6 +113,17 @@ S3 bucket in a subfolder on an ALB. For example, if you wanted to expose the
112113
root of a bucket under the path "www.mysite.com/somepath", you would set this
113114
variable to "/somepath".
114115

116+
## Byte-Range Requests and Caching
117+
The gateway caches [byte-range](https://developer.mozilla.org/en-US/docs/Web/HTTP/Range_requests) (requests sent with a `Range` header) requests differently than normal requests.
118+
119+
The gateway is configured to cache such requests in chunks of size `PROXY_CACHE_SLICE_SIZE`. If you don't provide this configuration value it will default to 1 megabyte.
120+
121+
This means that if you request 2.5 megabytes of a 1 gigabyte file, the gateway will cache 3 megabytes and nothing else.
122+
123+
Setting your slice size too small can have performance impacts since NGINX performs a subrequest for each slice. For more details see the [official reference](http://nginx.org/en/docs/http/ngx_http_slice_module.html).
124+
125+
You may make byte-range requests and normal requests for the same file and NGINX will automatically handle them differently. The caches for file chunks and normal file requests are separate on disk.
126+
115127
## Running as a Systemd Service
116128

117129
An [install script](/standalone_ubuntu_oss_install.sh) for the gateway shows

settings.example

+1
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ PROVIDE_INDEX_PAGE=false
1414
APPEND_SLASH_FOR_POSSIBLE_DIRECTORY=false
1515
DIRECTORY_LISTING_PATH_PREFIX=""
1616
PROXY_CACHE_MAX_SIZE=10g
17+
ENV PROXY_CACHE_SLICE_SIZE="1m"
1718
PROXY_CACHE_INACTIVE=60m
1819
PROXY_CACHE_VALID_OK=1h
1920
PROXY_CACHE_VALID_NOTFOUND=1m

standalone_ubuntu_oss_install.sh

+3
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,7 @@ echo "Directory Listing Enabled: ${ALLOW_DIRECTORY_LIST}"
9292
echo "Directory Listing path prefix: ${DIRECTORY_LISTING_PATH_PREFIX}"
9393
echo "Cache size limit: ${PROXY_CACHE_MAX_SIZE}"
9494
echo "Cache inactive timeout: ${PROXY_CACHE_INACTIVE}"
95+
echo "Slice of slice for byte range requests: ${PROXY_CACHE_SLICE_SIZE}"
9596
echo "Proxy Caching Time for Valid Response: ${PROXY_CACHE_VALID_OK}"
9697
echo "Proxy Caching Time for Not Found Response: ${PROXY_CACHE_VALID_NOTFOUND}"
9798
echo "Proxy Caching Time for Forbidden Response: ${PROXY_CACHE_VALID_FORBIDDEN}"
@@ -167,6 +168,8 @@ DEBUG=${DEBUG:-'false'}
167168
PROXY_CACHE_MAX_SIZE=${PROXY_CACHE_MAX_SIZE:-'10g'}
168169
# Cached data that are not accessed during the time get removed
169170
PROXY_CACHE_INACTIVE=${PROXY_CACHE_INACTIVE:-'60m'}
171+
# Request slice size
172+
PROXY_CACHE_SLICE_SIZE=${PROXY_CACHE_SLICE_SIZE:-'1m'}
170173
# Proxy caching time for response code 200 and 302
171174
PROXY_CACHE_VALID_OK=${PROXY_CACHE_VALID_OK:-'1h'}
172175
# Proxy caching time for response code 404

test/docker-compose.yaml

+1
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ services:
3131
AWS_SIGS_VERSION:
3232
STATIC_SITE_HOSTING:
3333
PROXY_CACHE_MAX_SIZE: "10g"
34+
PROXY_CACHE_SLICE_SIZE: "1m"
3435
PROXY_CACHE_INACTIVE: "60m"
3536
PROXY_CACHE_VALID_OK: "1h"
3637
PROXY_CACHE_VALID_NOTFOUND: "1m"

test/integration/test_api.sh

+32-1
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,14 @@ if ! [ -x "${checksum_cmd}" ]; then
7777
exit ${no_dep_exit_code}
7878
fi
7979

80+
81+
file_convert_command="$(command -v dd || true)"
82+
83+
if ! [ -x "${file_convert_command}" ]; then
84+
e "required dependency not found: dd not found in the path or not executable"
85+
exit ${no_dep_exit_code}
86+
fi
87+
8088
# If we are using the `md5` executable
8189
# then use the -r flag which makes it behave the same as `md5sum`
8290
# this is done after the `-x` check for ability to execute
@@ -140,6 +148,27 @@ assertHttpRequestEquals() {
140148
exit ${test_fail_exit_code}
141149
fi
142150
fi
151+
# Not a real method but better than making a whole new helper or massively refactoring this one
152+
elif [ "${method}" = "GET_RANGE" ]; then
153+
# Call format to check for a range of byte 30 to 1000:
154+
# assertHttpRequestEquals "GET_RANGE" "a.txt" "data/bucket-1/a.txt" 30 1000 "206"
155+
body_data_path="${test_dir}/$3"
156+
range_start="$4"
157+
range_end="$5"
158+
byte_count=$((range_end - range_start + 1)) # add one since we read through the last byte
159+
expected_response_code="$6"
160+
161+
file_checksum=$(${file_convert_command} if="$body_data_path" bs=1 skip="$range_start" count="$byte_count" 2>/dev/null | ${checksum_cmd})
162+
expected_checksum="${file_checksum:0:${checksum_length}}"
163+
164+
curl_checksum_output="$(${curl_cmd} -X "GET" -r "${range_start}"-"${range_end}" "${uri}" ${extra_arg} | ${checksum_cmd})"
165+
s3_file_checksum="${curl_checksum_output:0:${checksum_length}}"
166+
167+
if [ "${expected_checksum}" != "${s3_file_checksum}" ]; then
168+
e "Checksum doesn't match expectation. Request [GET ${uri} Range: "${range_start}"-"${range_end}"] Expected [${expected_checksum}] Actual [${s3_file_checksum}]"
169+
e "curl command: ${curl_cmd} -X "GET" -r "${range_start}"-"${range_end}" "${uri}" ${extra_arg} | ${checksum_cmd}"
170+
exit ${test_fail_exit_code}
171+
fi
143172
else
144173
e "Method unsupported: [${method}]"
145174
fi
@@ -175,7 +204,6 @@ if [ -n "${prefix_leading_directory_path}" ]; then
175204
fi
176205

177206
# Ordinary filenames
178-
179207
assertHttpRequestEquals "HEAD" "a.txt" "200"
180208
assertHttpRequestEquals "HEAD" "a.txt?some=param&that=should&be=stripped#aaah" "200"
181209
assertHttpRequestEquals "HEAD" "b/c/d.txt" "200"
@@ -184,6 +212,9 @@ assertHttpRequestEquals "HEAD" "b/e.txt" "200"
184212
assertHttpRequestEquals "HEAD" "b//e.txt" "200"
185213
assertHttpRequestEquals "HEAD" "a/abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789.txt" "200"
186214

215+
# Byte range requests
216+
assertHttpRequestEquals "GET_RANGE" 'a/plus%2Bplus.txt' "data/bucket-1/a/plus+plus.txt" 30 1000 "206"
217+
187218
# We try to request URLs that are properly encoded as well as URLs that
188219
# are not properly encoded to understand what works and what does not.
189220

0 commit comments

Comments
 (0)