Skip to content

Commit

Permalink
Split data segments as part of wasm-opt rather than finalize
Browse files Browse the repository at this point in the history
  • Loading branch information
sbc100 committed Nov 9, 2023
1 parent 45550a8 commit bc75928
Show file tree
Hide file tree
Showing 6 changed files with 55 additions and 53 deletions.
57 changes: 38 additions & 19 deletions emcc.py
Original file line number Diff line number Diff line change
Expand Up @@ -632,7 +632,13 @@ def should_run_binaryen_optimizer():
return settings.OPT_LEVEL >= 2


def get_binaryen_passes():
def remove_trailing_zeros(memfile):
mem_data = utils.read_binary(memfile)
mem_data = mem_data.rstrip(b'\0')
utils.write_binary(memfile, mem_data)


def get_binaryen_passes(memfile):
passes = []
optimizing = should_run_binaryen_optimizer()
# wasm-emscripten-finalize will strip the features section for us
Expand Down Expand Up @@ -717,6 +723,12 @@ def check_human_readable_list(items):
if settings.MEMORY64 == 2:
passes += ['--memory64-lowering']

if memfile:
passes += [
f'--separate-data-segments={memfile}',
f'--pass-arg=global-base@{settings.GLOBAL_BASE}'
]

if settings.BINARYEN_IGNORE_IMPLICIT_TRAPS:
passes += ['--ignore-implicit-traps']
# normally we can assume the memory, if imported, has not been modified
Expand Down Expand Up @@ -3233,33 +3245,28 @@ def phase_post_link(options, state, in_wasm, wasm_target, target, js_syms):

settings.TARGET_JS_NAME = os.path.basename(state.js_target)

if settings.MEM_INIT_IN_WASM:
memfile = None
else:
memfile = shared.replace_or_append_suffix(target, '.mem')

if options.embind_emit_tsd:
phase_embind_emit_tsd(options, in_wasm, wasm_target, memfile, js_syms)
phase_embind_emit_tsd(options, in_wasm, wasm_target, js_syms)

phase_emscript(options, in_wasm, wasm_target, memfile, js_syms)
phase_emscript(options, in_wasm, wasm_target, js_syms)

if options.js_transform:
phase_source_transforms(options)

if memfile and not settings.MINIMAL_RUNTIME:
# MINIMAL_RUNTIME doesn't use `var memoryInitializer` but instead expects Module['mem'] to
# be loaded before the module. See src/postamble_minimal.js.
phase_memory_initializer(memfile)
if settings.MEM_INIT_IN_WASM:
memfile = None
else:
memfile = shared.replace_or_append_suffix(target, '.mem')

phase_binaryen(target, options, wasm_target)
phase_binaryen(target, options, wasm_target, memfile)

# If we are not emitting any JS then we are all done now
if options.oformat != OFormat.WASM:
phase_final_emitting(options, state, target, wasm_target, memfile)


@ToolchainProfiler.profile_block('emscript')
def phase_emscript(options, in_wasm, wasm_target, memfile, js_syms):
def phase_emscript(options, in_wasm, wasm_target, js_syms):
# Emscripten
logger.debug('emscript')

Expand All @@ -3268,12 +3275,12 @@ def phase_emscript(options, in_wasm, wasm_target, memfile, js_syms):
if shared.SKIP_SUBPROCS:
return

emscripten.run(in_wasm, wasm_target, final_js, memfile, js_syms)
emscripten.run(in_wasm, wasm_target, final_js, js_syms)
save_intermediate('original')


@ToolchainProfiler.profile_block('embind emit tsd')
def phase_embind_emit_tsd(options, in_wasm, wasm_target, memfile, js_syms):
def phase_embind_emit_tsd(options, in_wasm, wasm_target, js_syms):
logger.debug('emit tsd')
# Save settings so they can be restored after TS generation.
original_settings = settings.backup()
Expand Down Expand Up @@ -3311,7 +3318,7 @@ def phase_embind_emit_tsd(options, in_wasm, wasm_target, memfile, js_syms):
outfile_js = in_temp('tsgen_a.out.js')
# The Wasm outfile may be modified by emscripten.run, so use a temporary file.
outfile_wasm = in_temp('tsgen_a.out.wasm')
emscripten.run(in_wasm, outfile_wasm, outfile_js, memfile, js_syms)
emscripten.run(in_wasm, outfile_wasm, outfile_js, js_syms)
out = shared.run_js_tool(outfile_js, [], stdout=PIPE)
write_file(
os.path.join(os.path.dirname(wasm_target), options.embind_emit_tsd), out)
Expand Down Expand Up @@ -3797,7 +3804,7 @@ def consume_arg_file():


@ToolchainProfiler.profile_block('binaryen')
def phase_binaryen(target, options, wasm_target):
def phase_binaryen(target, options, wasm_target, memfile):
global final_js
logger.debug('using binaryen')
# whether we need to emit -g (function name debug info) in the final wasm
Expand All @@ -3820,7 +3827,7 @@ def phase_binaryen(target, options, wasm_target):
# run wasm-opt if we have work for it: either passes, or if we are using
# source maps (which requires some extra processing to keep the source map
# but remove DWARF)
passes = get_binaryen_passes()
passes = get_binaryen_passes(memfile)
if passes:
# if asyncify is used, we will use it in the next stage, and so if it is
# the only reason we need intermediate debug info, we can stop keeping it
Expand All @@ -3837,6 +3844,18 @@ def phase_binaryen(target, options, wasm_target):
debug=intermediate_debug_info)
building.save_intermediate(wasm_target, 'byn.wasm')

if memfile:
# we have a separate .mem file. binaryen did not strip any trailing zeros,
# because it's an ABI question as to whether it is valid to do so or not.
# we can do so here, since we make sure to zero out that memory (even in
# the dynamic linking case, our loader zeros it out)
remove_trailing_zeros(memfile)

# MINIMAL_RUNTIME doesn't use `var memoryInitializer` but instead expects Module['mem'] to
# be loaded before the module. See src/postamble_minimal.js.
if not settings.MINIMAL_RUNTIME:
phase_memory_initializer(memfile)

if settings.EVAL_CTORS:
with ToolchainProfiler.profile_block('eval_ctors'):
building.eval_ctors(final_js, wasm_target, debug_info=intermediate_debug_info)
Expand Down
27 changes: 5 additions & 22 deletions emscripten.py
Original file line number Diff line number Diff line change
Expand Up @@ -281,7 +281,7 @@ def create_named_globals(metadata):
return '\n'.join(named_globals)


def emscript(in_wasm, out_wasm, outfile_js, memfile, js_syms):
def emscript(in_wasm, out_wasm, outfile_js, js_syms):
# Overview:
# * Run wasm-emscripten-finalize to extract metadata and modify the binary
# to use emscripten's wasm<->JS ABI
Expand All @@ -294,7 +294,7 @@ def emscript(in_wasm, out_wasm, outfile_js, memfile, js_syms):
# set file locations, so that JS glue can find what it needs
settings.WASM_BINARY_FILE = js_manipulation.escape_for_js_string(os.path.basename(out_wasm))

metadata = finalize_wasm(in_wasm, out_wasm, memfile, js_syms)
metadata = finalize_wasm(in_wasm, out_wasm, js_syms)

if settings.RELOCATABLE and settings.MEMORY64 == 2:
metadata.imports += ['__memory_base32']
Expand Down Expand Up @@ -439,12 +439,6 @@ def emscript(in_wasm, out_wasm, outfile_js, memfile, js_syms):
module = None


def remove_trailing_zeros(memfile):
mem_data = utils.read_binary(memfile)
mem_data = mem_data.rstrip(b'\0')
utils.write_binary(memfile, mem_data)


@ToolchainProfiler.profile()
def get_metadata(infile, outfile, modify_wasm, args):
metadata = extract_metadata.extract_metadata(infile)
Expand All @@ -463,7 +457,7 @@ def get_metadata(infile, outfile, modify_wasm, args):
return metadata


def finalize_wasm(infile, outfile, memfile, js_syms):
def finalize_wasm(infile, outfile, js_syms):
building.save_intermediate(infile, 'base.wasm')
args = []

Expand Down Expand Up @@ -501,10 +495,6 @@ def finalize_wasm(infile, outfile, memfile, js_syms):
modify_wasm = True
else:
args.append('--no-legalize-javascript-ffi')
if memfile:
args.append(f'--separate-data-segments={memfile}')
args.append(f'--global-base={settings.GLOBAL_BASE}')
modify_wasm = True
if settings.SIDE_MODULE:
args.append('--side-module')
if settings.STACK_OVERFLOW_CHECK >= 2:
Expand Down Expand Up @@ -554,13 +544,6 @@ def finalize_wasm(infile, outfile, memfile, js_syms):
if settings.GENERATE_SOURCE_MAP:
building.save_intermediate(outfile + '.map', 'post_finalize.map')

if memfile:
# we have a separate .mem file. binaryen did not strip any trailing zeros,
# because it's an ABI question as to whether it is valid to do so or not.
# we can do so here, since we make sure to zero out that memory (even in
# the dynamic linking case, our loader zeros it out)
remove_trailing_zeros(memfile)

expected_exports = set(settings.EXPORTED_FUNCTIONS)
expected_exports.update(asmjs_mangle(s) for s in settings.REQUIRED_EXPORTS)
# Assume that when JS symbol dependencies are exported it is because they
Expand Down Expand Up @@ -984,5 +967,5 @@ def create_pointer_conversion_wrappers(metadata):
return wrappers


def run(in_wasm, out_wasm, outfile_js, memfile, js_syms):
emscript(in_wasm, out_wasm, outfile_js, memfile, js_syms)
def run(in_wasm, out_wasm, outfile_js, js_syms):
emscript(in_wasm, out_wasm, outfile_js, js_syms)
4 changes: 2 additions & 2 deletions test/code_size/embind_val_wasm.json
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
"a.js": 7498,
"a.js.gz": 3142,
"a.wasm": 9628,
"a.wasm.gz": 4938,
"a.wasm.gz": 4939,
"total": 17799,
"total_gz": 8511
"total_gz": 8512
}
4 changes: 2 additions & 2 deletions test/code_size/hello_wasm_worker_wasm.json
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
"a.js": 667,
"a.js.gz": 458,
"a.wasm": 1855,
"a.wasm.gz": 1049,
"a.wasm.gz": 1050,
"total": 3259,
"total_gz": 1940
"total_gz": 1941
}
8 changes: 4 additions & 4 deletions test/code_size/hello_webgl2_wasm.json
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@
"a.html.gz": 379,
"a.js": 4700,
"a.js.gz": 2417,
"a.wasm": 10467,
"a.wasm.gz": 6706,
"total": 15736,
"total_gz": 9502
"a.wasm": 10466,
"a.wasm.gz": 6707,
"total": 15735,
"total_gz": 9503
}
8 changes: 4 additions & 4 deletions test/code_size/hello_webgl_wasm.json
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@
"a.html.gz": 379,
"a.js": 4186,
"a.js.gz": 2244,
"a.wasm": 10467,
"a.wasm.gz": 6706,
"total": 15222,
"total_gz": 9329
"a.wasm": 10466,
"a.wasm.gz": 6707,
"total": 15221,
"total_gz": 9330
}

0 comments on commit bc75928

Please sign in to comment.