Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

WASMFS chunked fetch backend #23021

Open
wants to merge 15 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 13 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions ChangeLog.md
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,12 @@ See docs/process.md for more on how version tagging works.
source maps, so it has not worked in many years, and there have been no
requests for it. This has no impact on the source map support in browser
devtools. (#23553)
- The WASMFS fetch backend now fetches files in chunks using HTTP range
requests (if supported by the server). `wasmfs_create_fetch_backend` now
takes a second parameter (`uint32_t chunk_size`) to configure the size of
each chunk. If a file is read a few times with random accesses, a small
chunk size will minimize bandwidth; if a file is read in larger contiguous
ranges, a larger chunk size will reduce the number of requests.
kripken marked this conversation as resolved.
Show resolved Hide resolved

4.0.2 - 01/30/25
----------------
Expand Down
9 changes: 7 additions & 2 deletions src/lib/libfetchfs.js
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,13 @@ addToLibrary({
$FETCHFS__deps: ['$stringToUTF8OnStack', 'wasmfs_create_fetch_backend'],
$FETCHFS: {
createBackend(opts) {
return _wasmfs_create_fetch_backend(stringToUTF8OnStack(opts.base_url));
}
return withStackSave(
() => _wasmfs_create_fetch_backend(
stringToUTF8OnStack(opts.base_url ?? ""),
opts.chunkSize | 0
)
);
},
},
});

Expand Down
136 changes: 102 additions & 34 deletions src/lib/libwasmfs_fetch.js
Original file line number Diff line number Diff line change
Expand Up @@ -5,28 +5,24 @@
*/

addToLibrary({
$wasmFS$JSMemoryRanges: {},

// Fetch backend: On first access of the file (either a read or a getSize), it
// will fetch() the data from the network asynchronously. Otherwise, after
// that fetch it behaves just like JSFile (and it reuses the code from there).

_wasmfs_create_fetch_backend_js__deps: [
'$wasmFS$backends',
'$wasmFS$JSMemoryFiles',
'_wasmfs_create_js_file_backend_js',
'_wasmfs_fetch_get_file_path',
'$wasmFS$JSMemoryRanges',
'_wasmfs_fetch_get_file_url',
'_wasmfs_fetch_get_chunk_size',
],
_wasmfs_create_fetch_backend_js: async function(backend) {
// Get a promise that fetches the data and stores it in JS memory (if it has
// not already been fetched).
async function getFile(file) {
if (wasmFS$JSMemoryFiles[file]) {
// The data is already here, so nothing to do before we continue on to
// the actual read below.
return Promise.resolve();
}
// This is the first time we want the file's data.
async function getFileRange(file, offset, len) {
var url = '';
var fileUrl_p = __wasmfs_fetch_get_file_path(file);
var fileUrl_p = __wasmfs_fetch_get_file_url(file);
var fileUrl = UTF8ToString(fileUrl_p);
var isAbs = fileUrl.indexOf('://') !== -1;
if (isAbs) {
Expand All @@ -35,55 +31,127 @@ addToLibrary({
try {
var u = new URL(fileUrl, self.location.origin);
url = u.toString();
} catch (e) {
} catch (_e) {
throw {status: 404};
}
}
var response = await fetch(url);
if (response.ok) {
var buffer = await response['arrayBuffer']();
wasmFS$JSMemoryFiles[file] = new Uint8Array(buffer);
} else {
var chunkSize = __wasmfs_fetch_get_chunk_size(file);
offset ??= 0;
len ??= chunkSize;
// In which chunk does the seeked range start? E.g., 5-14 with chunksize 8 will start in chunk 0.
var firstChunk = (offset / chunkSize) | 0;
// In which chunk does the seeked range end? E.g., 5-14 with chunksize 8 will end in chunk 1, as will 5-16 (since byte 16 isn't requested).
// This will always give us a chunk >= firstChunk since len > 0.
var lastChunk = ((offset+len-1) / chunkSize) | 0;
if (!(file in wasmFS$JSMemoryRanges)) {
var fileInfo = await fetch(url, {method:'HEAD', headers:{'Range': 'bytes=0-'}});
if (fileInfo.ok &&
fileInfo.headers.has('Content-Length') &&
fileInfo.headers.get('Accept-Ranges') == 'bytes' &&
(parseInt(fileInfo.headers.get('Content-Length'), 10) > chunkSize*2)) {
wasmFS$JSMemoryRanges[file] = {
size: parseInt(fileInfo.headers.get('Content-Length'), 10),
chunks: [],
chunkSize: chunkSize
};
} else {
// may as well/forced to download the whole file
var wholeFileReq = await fetch(url);
if (!wholeFileReq.ok) {
throw wholeFileReq;
}
var wholeFileData = new Uint8Array(await wholeFileReq.arrayBuffer());
var text = new TextDecoder().decode(wholeFileData);
wasmFS$JSMemoryRanges[file] = {
size: wholeFileData.byteLength,
chunks: [wholeFileData],
chunkSize: wholeFileData.byteLength
};
return Promise.resolve();
}
}
var allPresent = true;
var i;
// Do we have all the chunks already? If so, we don't need to do any fetches.
for (i = firstChunk; i <= lastChunk; i++) {
if (!wasmFS$JSMemoryRanges[file].chunks[i]) {
allPresent = false;
break;
}
}
if (allPresent) {
// The data is already here, so nothing to do before we continue on to
// the actual read.
return Promise.resolve();
}
// This is the first time we want the chunks' data. We'll make
// one request for all the chunks we need, rather than one
// request per chunk.
var start = firstChunk * chunkSize;
// We must fetch *up to* the last byte of the last chunk.
var end = (lastChunk+1) * chunkSize;
var response = await fetch(url, {headers:{'Range': `bytes=${start}-${end-1}`}});
if (!response.ok) {
throw response;
}
var bytes = await response.bytes();
for (i = firstChunk; i <= lastChunk; i++) {
wasmFS$JSMemoryRanges[file].chunks[i] = bytes.slice(i*chunkSize-start,(i+1)*chunkSize-start);
}
return Promise.resolve();
}

// Start with the normal JSFile operations. This sets
// wasmFS$backends[backend]
// which we will then augment.
__wasmfs_create_js_file_backend_js(backend);

// Add the async operations on top.
var jsFileOps = wasmFS$backends[backend];
wasmFS$backends[backend] = {
// alloc/free operations are not actually async. Just forward to the
// parent class, but we must return a Promise as the caller expects.
allocFile: async (file) => {
jsFileOps.allocFile(file);
// nop
return Promise.resolve();
},
freeFile: async (file) => {
jsFileOps.freeFile(file);
// free memory
wasmFS$JSMemoryRanges[file] = undefined;
return Promise.resolve();
},

write: async (file, buffer, length, offset) => {
abort("TODO: file writing in fetch backend? read-only for now");
console.error('TODO: file writing in fetch backend? read-only for now');
},

// read/getSize fetch the data, then forward to the parent class.
read: async (file, buffer, length, offset) => {
if (length == 0) {
return 0;
}
try {
await getFile(file);
} catch (response) {
return response.status === 404 ? -{{{ cDefs.ENOENT }}} : -{{{ cDefs.EBADF }}};
await getFileRange(file, offset || 0, length);
} catch (failedResponse) {
return failedResponse.status === 404 ? -{{{ cDefs.ENOENT }}} : -{{{ cDefs.EBADF }}};
}
return jsFileOps.read(file, buffer, length, offset);
var fileInfo = wasmFS$JSMemoryRanges[file];
var chunks = fileInfo.chunks;
var chunkSize = fileInfo.chunkSize;
var firstChunk = (offset / chunkSize) | 0;
// See comments in getFileRange.
var lastChunk = ((offset+length-1) / chunkSize) | 0;
var readLength = 0;
for (var i = firstChunk; i <= lastChunk; i++) {
var chunk = chunks[i];
var start = Math.max(i*chunkSize, offset);
var chunkStart = i*chunkSize;
var end = Math.min(chunkStart+chunkSize, offset+length);
HEAPU8.set(chunk.subarray(start-chunkStart, end-chunkStart), buffer+(start-offset));
readLength = end - offset;
}
return readLength;
},
getSize: async (file) => {
try {
await getFile(file);
} catch (response) {}
return jsFileOps.getSize(file);
await getFileRange(file, 0, 0);
} catch (failedResponse) {
return 0;
}
return wasmFS$JSMemoryRanges[file].size;
},
};
},
Expand Down
21 changes: 20 additions & 1 deletion system/include/emscripten/wasmfs.h
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,23 @@ typedef backend_t (*backend_constructor_t)(void*);

backend_t wasmfs_create_memory_backend(void);

// Fetch backend
//
// Creates a new fetchfs backend. FetchFS will backstop filesystem
// reads to HTTP fetch requests, which will download just specific
// ranges of the requested files. FetchFS works best when your web
// server supports HTTP range requests, and it's important that those
// files are not stored encrypted or compressed at rest. FetchFS by
// default will dispatch HTTP requests to URLs beginning with base_url
// and ending with whatever the file's path is relative to where the
// fetchfs directory is mounted.
//
// Individual range requests will be no bigger than chunk_size, and will
// be aligned to boundaries of chunk_size. Files smaller than chunk_size
// will be downloaded all at once.
//
// If chunk_size is 0, a reasonable default value will be used.
//
// Note: this cannot be called on the browser main thread because it might
// deadlock while waiting for its dedicated worker thread to be spawned.
//
Expand All @@ -57,7 +74,9 @@ backend_t wasmfs_create_memory_backend(void);
//
// TODO: Add an async version of this function that will work on the main
// thread.
backend_t wasmfs_create_fetch_backend(const char* base_url __attribute__((nonnull)));
//
backend_t wasmfs_create_fetch_backend(const char* base_url __attribute__((nonnull)),
uint32_t chunk_size);
JoeOsborn marked this conversation as resolved.
Show resolved Hide resolved

backend_t wasmfs_create_node_backend(const char* root __attribute__((nonnull)));

Expand Down
67 changes: 51 additions & 16 deletions system/lib/wasmfs/backends/fetch_backend.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,17 +12,39 @@

namespace wasmfs {

const uint32_t DEFAULT_CHUNK_SIZE = 16*1024*1024;

class FetchBackend : public wasmfs::ProxiedAsyncJSBackend {
std::string baseUrl;
uint32_t chunkSize;
public:
FetchBackend(const std::string& baseUrl,
uint32_t chunkSize,
std::function<void(backend_t)> setupOnThread)
: ProxiedAsyncJSBackend(setupOnThread), baseUrl(baseUrl), chunkSize(chunkSize) {}
std::shared_ptr<DataFile> createFile(mode_t mode) override;
std::shared_ptr<Directory> createDirectory(mode_t mode) override;
const std::string getFileURL(const std::string& filePath);
uint32_t getChunkSize();
};


class FetchFile : public ProxiedAsyncJSImplFile {
std::string filePath;
std::string fileUrl;

public:
FetchFile(const std::string& path,
mode_t mode,
backend_t backend,
emscripten::ProxyWorker& proxy)
: ProxiedAsyncJSImplFile(mode, backend, proxy), filePath(path) {}
: ProxiedAsyncJSImplFile(mode, backend, proxy), filePath(path) {
this->fileUrl = dynamic_cast<FetchBackend*>(getBackend())->getFileURL(filePath);
}

const std::string& getPath() const { return filePath; }
const std::string& getURL() const { return fileUrl; }
const uint32_t getChunkSize() const { return dynamic_cast<FetchBackend*>(getBackend())->getChunkSize(); }
};

class FetchDirectory : public MemoryDirectory {
Expand Down Expand Up @@ -57,40 +79,53 @@ class FetchDirectory : public MemoryDirectory {
std::string getChildPath(const std::string& name) const {
return dirPath + '/' + name;
}

std::shared_ptr<File> getChild(const std::string& name) override {
return MemoryDirectory::getChild(name);
}
};

class FetchBackend : public ProxiedAsyncJSBackend {
std::string baseUrl;
std::shared_ptr<DataFile> FetchBackend::createFile(mode_t mode) {
return std::make_shared<FetchFile>("", mode, this, proxy);
}

public:
FetchBackend(const std::string& baseUrl,
std::function<void(backend_t)> setupOnThread)
: ProxiedAsyncJSBackend(setupOnThread), baseUrl(baseUrl) {}
std::shared_ptr<Directory> FetchBackend::createDirectory(mode_t mode) {
return std::make_shared<FetchDirectory>("", mode, this, proxy);
}

std::shared_ptr<DataFile> createFile(mode_t mode) override {
return std::make_shared<FetchFile>(baseUrl, mode, this, proxy);
const std::string FetchBackend::getFileURL(const std::string& filePath) {
if (filePath == "") {
return baseUrl;
}
return baseUrl + "/" + filePath;
}

std::shared_ptr<Directory> createDirectory(mode_t mode) override {
return std::make_shared<FetchDirectory>(baseUrl, mode, this, proxy);
}
};
uint32_t FetchBackend::getChunkSize() {
return chunkSize;
}

extern "C" {
backend_t wasmfs_create_fetch_backend(const char* base_url) {
backend_t wasmfs_create_fetch_backend(const char* base_url, uint32_t chunkSize) {
// ProxyWorker cannot safely be synchronously spawned from the main browser
// thread. See comment in thread_utils.h for more details.
assert(!emscripten_is_main_browser_thread() &&
"Cannot safely create fetch backend on main browser thread");
return wasmFS.addBackend(std::make_unique<FetchBackend>(
base_url ? base_url : "",
chunkSize ? chunkSize : DEFAULT_CHUNK_SIZE,
[](backend_t backend) { _wasmfs_create_fetch_backend_js(backend); }));
}

const char* _wasmfs_fetch_get_file_url(void* ptr) {
auto* file = reinterpret_cast<wasmfs::FetchFile*>(ptr);
return file ? file->getURL().data() : nullptr;
}

const char* EMSCRIPTEN_KEEPALIVE _wasmfs_fetch_get_file_path(void* ptr) {
uint32_t _wasmfs_fetch_get_chunk_size(void* ptr) {
auto* file = reinterpret_cast<wasmfs::FetchFile*>(ptr);
return file ? file->getPath().data() : nullptr;
return file ? file->getChunkSize() : DEFAULT_CHUNK_SIZE;
}

}

} // namespace wasmfs
1 change: 0 additions & 1 deletion system/lib/wasmfs/backends/fetch_backend.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
#include "wasmfs.h"

extern "C" {

// See library_wasmfs_fetch.js
void _wasmfs_create_fetch_backend_js(wasmfs::backend_t);
}
Loading