Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(gateway): more explicit IPFSBackend and no multi-range #369

Merged
merged 11 commits into from
Oct 2, 2023
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,11 @@ The following emojis are used to highlight certain changes:

### Changed

* `boxo/gateway`
* 🛠 The `IPFSBackend` interface was updated to make the responses of the
`Head` method more explicit. It now returns a `HeadResponse` instead of a
`files.Node`.

### Removed

### Fixed
Expand Down
65 changes: 59 additions & 6 deletions gateway/blocks_backend.go
Original file line number Diff line number Diff line change
Expand Up @@ -153,11 +153,34 @@ func (bb *BlocksBackend) Get(ctx context.Context, path ImmutablePath, ranges ...
return md, nil, err
}

// Only a single range is supported in responses to HTTP Range Requests.
// When more than one is passed in the Range header, this library will
// return a response for the first one and ignores remaining ones.
var ra *ByteRange
if len(ranges) > 0 {
ra = &ranges[0]
}

rootCodec := nd.Cid().Prefix().GetCodec()

// This covers both Raw blocks and terminal IPLD codecs like dag-cbor and dag-json
// Note: while only cbor, json, dag-cbor, and dag-json are currently supported by gateways this could change
// Note: For the raw codec we return just the relevant range rather than the entire block
if rootCodec != uint64(mc.DagPb) {
return md, NewGetResponseFromFile(files.NewBytesFile(nd.RawData())), nil
f := files.NewBytesFile(nd.RawData())

fileSize, err := f.Size()
if err != nil {
return ContentPathMetadata{}, nil, err
}

if rootCodec == uint64(mc.Raw) {
if err := seekToRangeStart(f, ra); err != nil {
return ContentPathMetadata{}, nil, err
}
}

return md, NewGetResponseFromReader(f, fileSize), nil
}

// This code path covers full graph, single file/directory, and range requests
Expand All @@ -179,10 +202,23 @@ func (bb *BlocksBackend) Get(ctx context.Context, path ImmutablePath, ranges ...
if sz < 0 {
return ContentPathMetadata{}, nil, fmt.Errorf("directory cumulative DAG size cannot be negative")
}
return md, NewGetResponseFromDirectoryListing(uint64(sz), dir.EnumLinksAsync(ctx)), nil
return md, NewGetResponseFromDirectoryListing(uint64(sz), dir.EnumLinksAsync(ctx), nil), nil
}
if file, ok := f.(files.File); ok {
return md, NewGetResponseFromFile(file), nil
fileSize, err := f.Size()
if err != nil {
return ContentPathMetadata{}, nil, err
}

if err := seekToRangeStart(file, ra); err != nil {
return ContentPathMetadata{}, nil, err
}

if s, ok := f.(*files.Symlink); ok {
return md, NewGetResponseFromSymlink(s, fileSize), nil
}

return md, NewGetResponseFromReader(file, fileSize), nil
}

return ContentPathMetadata{}, nil, fmt.Errorf("data was not a valid file or directory: %w", ErrInternalServerError) // TODO: should there be a gateway invalid content type to abstract over the various IPLD error types?
Expand Down Expand Up @@ -211,15 +247,15 @@ func (bb *BlocksBackend) GetBlock(ctx context.Context, path ImmutablePath) (Cont
return md, files.NewBytesFile(nd.RawData()), nil
}

func (bb *BlocksBackend) Head(ctx context.Context, path ImmutablePath) (ContentPathMetadata, files.Node, error) {
func (bb *BlocksBackend) Head(ctx context.Context, path ImmutablePath) (ContentPathMetadata, *HeadResponse, error) {
md, nd, err := bb.getNode(ctx, path)
if err != nil {
return md, nil, err
}

rootCodec := nd.Cid().Prefix().GetCodec()
if rootCodec != uint64(mc.DagPb) {
return md, files.NewBytesFile(nd.RawData()), nil
return md, NewHeadResponseForFile(files.NewBytesFile(nd.RawData()), int64(len(nd.RawData()))), nil
}

// TODO: We're not handling non-UnixFS dag-pb. There's a bit of a discrepancy
Expand All @@ -229,7 +265,24 @@ func (bb *BlocksBackend) Head(ctx context.Context, path ImmutablePath) (ContentP
return ContentPathMetadata{}, nil, err
}

return md, fileNode, nil
sz, err := fileNode.Size()
if err != nil {
return ContentPathMetadata{}, nil, err
}

if _, ok := fileNode.(files.Directory); ok {
return md, NewHeadResponseForDirectory(sz), nil
}

if _, ok := fileNode.(*files.Symlink); ok {
return md, NewHeadResponseForSymlink(sz), nil
}

if f, ok := fileNode.(files.File); ok {
return md, NewHeadResponseForFile(f, sz), nil
}

return ContentPathMetadata{}, nil, fmt.Errorf("unsupported UnixFS file type")
}

// emptyRoot is a CAR root with the empty identity CID. CAR files are recommended
Expand Down
82 changes: 71 additions & 11 deletions gateway/gateway.go
Original file line number Diff line number Diff line change
Expand Up @@ -260,21 +260,74 @@ type ByteRange struct {
}

type GetResponse struct {
bytes files.File
bytes io.ReadCloser
bytesSize int64
symlink *files.Symlink
directoryMetadata *directoryMetadata
}

func (r *GetResponse) Close() error {
if r.bytes != nil {
return r.bytes.Close()
}
if r.symlink != nil {
return r.symlink.Close()
}
if r.directoryMetadata != nil {
if r.directoryMetadata.closeFn == nil {
return nil
}
return r.directoryMetadata.closeFn()
}
// Should be unreachable
return nil
}

var _ io.Closer = (*GetResponse)(nil)

type directoryMetadata struct {
dagSize uint64
entries <-chan unixfs.LinkResult
closeFn func() error
}

func NewGetResponseFromReader(file io.ReadCloser, fullFileSize int64) *GetResponse {
return &GetResponse{bytes: file, bytesSize: fullFileSize}
}

func NewGetResponseFromSymlink(symlink *files.Symlink, size int64) *GetResponse {
return &GetResponse{symlink: symlink, bytesSize: size}
}

func NewGetResponseFromFile(file files.File) *GetResponse {
return &GetResponse{bytes: file}
func NewGetResponseFromDirectoryListing(dagSize uint64, entries <-chan unixfs.LinkResult, closeFn func() error) *GetResponse {
return &GetResponse{directoryMetadata: &directoryMetadata{dagSize: dagSize, entries: entries, closeFn: closeFn}}
}

func NewGetResponseFromDirectoryListing(dagSize uint64, entries <-chan unixfs.LinkResult) *GetResponse {
return &GetResponse{directoryMetadata: &directoryMetadata{dagSize, entries}}
type HeadResponse struct {
bytesSize int64
startingBytes io.ReadCloser
isFile bool
isSymLink bool
isDir bool
}

func (r *HeadResponse) Close() error {
if r.startingBytes != nil {
return r.startingBytes.Close()
}
return nil
}

func NewHeadResponseForFile(startingBytes io.ReadCloser, size int64) *HeadResponse {
return &HeadResponse{startingBytes: startingBytes, isFile: true, bytesSize: size}
}

func NewHeadResponseForSymlink(symlinkSize int64) *HeadResponse {
return &HeadResponse{isSymLink: true, bytesSize: symlinkSize}
}

func NewHeadResponseForDirectory(dagSize int64) *HeadResponse {
return &HeadResponse{isDir: true, bytesSize: dagSize}
}

// IPFSBackend is the required set of functionality used to implement the IPFS
Expand Down Expand Up @@ -305,6 +358,9 @@ type IPFSBackend interface {
// file will still need magic bytes from the very beginning for content
// type sniffing).
// - A range request for a directory currently holds no semantic meaning.
// - For non-UnixFS (and non-raw data) such as terminal IPLD dag-cbor/json, etc. blocks the returned response
// bytes should be the complete block and returned as an [io.ReadSeekCloser] starting at the beginning of the
// block rather than as an [io.ReadCloser] that starts at the beginning of the range request.
//
// [HTTP Byte Ranges]: https://httpwg.org/specs/rfc9110.html#rfc.section.14.1.2
Get(context.Context, ImmutablePath, ...ByteRange) (ContentPathMetadata, *GetResponse, error)
Expand All @@ -316,12 +372,16 @@ type IPFSBackend interface {
// GetBlock returns a single block of data
GetBlock(context.Context, ImmutablePath) (ContentPathMetadata, files.File, error)

// Head returns a file or directory depending on what the path is that has been requested.
// For UnixFS files should return a file which has the correct file size and either returns the ContentType in ContentPathMetadata or
// enough data (e.g. 3kiB) such that the content type can be determined by sniffing.
// For all other data types returning just size information is sufficient
// TODO: give function more explicit return types
Head(context.Context, ImmutablePath) (ContentPathMetadata, files.Node, error)
// Head returns a [HeadResponse] depending on what the path is that has been requested.
// For UnixFS files (and raw blocks) should return the size of the file and either set the ContentType in
// ContentPathMetadata or send back a reader from the beginning of the file with enough data (e.g. 3kiB) such that
// the content type can be determined by sniffing.
//
// For UnixFS directories and symlinks only setting the size and type are necessary.
//
// For all other data types (e.g. (DAG-)CBOR/JSON blocks) returning the size information as a file while setting
// the content-type is sufficient.
Head(context.Context, ImmutablePath) (ContentPathMetadata, *HeadResponse, error)

// ResolvePath resolves the path using UnixFS resolver. If the path does not
// exist due to a missing link, it should return an error of type:
Expand Down
4 changes: 2 additions & 2 deletions gateway/gateway_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -719,7 +719,7 @@ func (mb *errorMockBackend) GetBlock(ctx context.Context, path ImmutablePath) (C
return ContentPathMetadata{}, nil, mb.err
}

func (mb *errorMockBackend) Head(ctx context.Context, path ImmutablePath) (ContentPathMetadata, files.Node, error) {
func (mb *errorMockBackend) Head(ctx context.Context, path ImmutablePath) (ContentPathMetadata, *HeadResponse, error) {
return ContentPathMetadata{}, nil, mb.err
}

Expand Down Expand Up @@ -803,7 +803,7 @@ func (mb *panicMockBackend) GetBlock(ctx context.Context, immutablePath Immutabl
panic("i am panicking")
}

func (mb *panicMockBackend) Head(ctx context.Context, immutablePath ImmutablePath) (ContentPathMetadata, files.Node, error) {
func (mb *panicMockBackend) Head(ctx context.Context, immutablePath ImmutablePath) (ContentPathMetadata, *HeadResponse, error) {
panic("i am panicking")
}

Expand Down
55 changes: 17 additions & 38 deletions gateway/handler.go
Original file line number Diff line number Diff line change
Expand Up @@ -92,13 +92,19 @@ func NewHandler(c Config, backend IPFSBackend) http.Handler {
return newHandlerWithMetrics(&c, backend)
}

// serveContent replies to the request using the content in the provided ReadSeeker
// serveContent replies to the request using the content in the provided Reader
// and returns the status code written and any error encountered during a write.
// It wraps http.serveContent which takes care of If-None-Match+Etag,
// It wraps httpServeContent (a close clone of http.ServeContent) which takes care of If-None-Match+Etag,
// Content-Length and range requests.
func serveContent(w http.ResponseWriter, req *http.Request, name string, modtime time.Time, content io.ReadSeeker) (int, bool, error) {
//
// Notes:
// 1. For HEAD requests the io.Reader may be nil/undefined
// 2. When the io.Reader is needed it must start at the beginning of the first Range Request component if it exists
// 3. Only a single HTTP Range Request is supported, if more than one are requested only the first will be honored
// 4. The Content-Type header must already be set
func serveContent(w http.ResponseWriter, req *http.Request, modtime time.Time, size int64, content io.Reader) (int, bool, error) {
ew := &errRecordingResponseWriter{ResponseWriter: w}
http.ServeContent(ew, req, name, modtime, content)
httpServeContent(ew, req, modtime, size, content)

// When we calculate some metrics we want a flag that lets us to ignore
// errors and 304 Not Modified, and only care when requested data
Expand Down Expand Up @@ -554,40 +560,6 @@ func etagMatch(ifNoneMatchHeader string, etagsToCheck ...string) bool {
return false
}

// scanETag determines if a syntactically valid ETag is present at s. If so,
// the ETag and remaining text after consuming ETag is returned. Otherwise,
// it returns "", "".
// (This is the same logic as one executed inside of http.ServeContent)
func scanETag(s string) (etag string, remain string) {
s = textproto.TrimString(s)
start := 0
if strings.HasPrefix(s, "W/") {
start = 2
}
if len(s[start:]) < 2 || s[start] != '"' {
return "", ""
}
// ETag is either W/"text" or "text".
// See RFC 7232 2.3.
for i := start + 1; i < len(s); i++ {
c := s[i]
switch {
// Character values allowed in ETags.
case c == 0x21 || c >= 0x23 && c <= 0x7E || c >= 0x80:
case c == '"':
return s[:i+1], s[i+1:]
default:
return "", ""
}
}
return "", ""
}

// etagWeakMatch reports whether a and b match using weak ETag comparison.
func etagWeakMatch(a, b string) bool {
return strings.TrimPrefix(a, "W/") == strings.TrimPrefix(b, "W/")
}

// getEtag generates an ETag value based on an HTTP Request, a CID and a response
// format. This function DOES NOT generate ETags for CARs or IPNS Records.
func getEtag(r *http.Request, cid cid.Cid, responseFormat string) string {
Expand Down Expand Up @@ -776,6 +748,13 @@ func (i *handler) handleWebRequestErrors(w http.ResponseWriter, r *http.Request,
return ImmutablePath{}, false
}

// If the error is not an IPLD traversal error then we should not be looking for _redirects or legacy 404s
if !isErrNotFound(err) {
err = fmt.Errorf("failed to resolve %s: %w", debugStr(contentPath.String()), err)
i.webError(w, r, err, http.StatusInternalServerError)
return ImmutablePath{}, false
}

// If we have origin isolation (subdomain gw, DNSLink website),
// and response type is UnixFS (default for website hosting)
// we can leverage the presence of an _redirects file and apply rules defined there.
Expand Down
12 changes: 11 additions & 1 deletion gateway/handler_block.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,9 +38,19 @@ func (i *handler) serveRawBlock(ctx context.Context, w http.ResponseWriter, r *h
w.Header().Set("Content-Type", rawResponseFormat)
w.Header().Set("X-Content-Type-Options", "nosniff") // no funny business in the browsers :^)

sz, err := data.Size()
if err != nil {
i.handleRequestErrors(w, r, rq.contentPath, err)
return false
}

if !i.seekToStartOfFirstRange(w, r, data) {
return false
}

// ServeContent will take care of
// If-None-Match+Etag, Content-Length and range requests
_, dataSent, _ := serveContent(w, r, name, modtime, data)
_, dataSent, _ := serveContent(w, r, modtime, sz, data)

if dataSent {
// Update metrics
Expand Down
Loading