From aec91e57c34e123e33d12959d65d594753e29c62 Mon Sep 17 00:00:00 2001 From: juj Date: Thu, 29 Aug 2024 17:10:11 +0300 Subject: [PATCH] JS VMs struggle to recognize when the condition part of a for loop is constant, especially w.r.t. GLctx.getProgramParameter(), so make sure that no repeated computation is done in such paths. (#20439) --- src/library_webgl.js | 53 ++++++++++++++--------- test/code_size/embind_hello_wasm.json | 4 +- test/code_size/hello_webgl2_wasm.json | 8 ++-- test/code_size/hello_webgl2_wasm2js.json | 8 ++-- test/code_size/hello_webgl_wasm.json | 8 ++-- test/code_size/hello_webgl_wasm2js.json | 8 ++-- test/code_size/math_wasm.json | 4 +- test/code_size/random_printf_wasm.json | 4 +- test/code_size/random_printf_wasm2js.json | 4 +- 9 files changed, 57 insertions(+), 44 deletions(-) diff --git a/src/library_webgl.js b/src/library_webgl.js index 2d32c24f1ff4d..6b35a6402fc03 100644 --- a/src/library_webgl.js +++ b/src/library_webgl.js @@ -2182,7 +2182,8 @@ for (/**@suppress{duplicate}*/var i = 0; i <= {{{ GL_POOL_TEMP_BUFFERS_SIZE }}}; // maps integer locations back to uniform name strings, so that we can lazily fetch uniform array locations program.uniformArrayNamesById = {}; - for (i = 0; i < GLctx.getProgramParameter(program, 0x8B86/*GL_ACTIVE_UNIFORMS*/); ++i) { + var numActiveUniforms = GLctx.getProgramParameter(program, 0x8B86/*GL_ACTIVE_UNIFORMS*/); + for (i = 0; i < numActiveUniforms; ++i) { var u = GLctx.getActiveUniform(program, i); var nm = u.name; var sz = u.size; @@ -2499,8 +2500,9 @@ for (/**@suppress{duplicate}*/var i = 0; i <= {{{ GL_POOL_TEMP_BUFFERS_SIZE }}}; #if GL_POOL_TEMP_BUFFERS if (count <= {{{ GL_POOL_TEMP_BUFFERS_SIZE / 2 }}}) { // avoid allocation when uploading few enough uniforms - var view = miniTempWebGLIntBuffers[2*count]; - for (var i = 0; i < 2*count; i += 2) { + count *= 2; + var view = miniTempWebGLIntBuffers[count]; + for (var i = 0; i < count; i += 2) { view[i] = {{{ makeGetValue('value', '4*i', 'i32') }}}; view[i+1] = {{{ makeGetValue('value', '4*i+4', 'i32') }}}; } @@ -2541,8 +2543,9 @@ for (/**@suppress{duplicate}*/var i = 0; i <= {{{ GL_POOL_TEMP_BUFFERS_SIZE }}}; #if GL_POOL_TEMP_BUFFERS if (count <= {{{ GL_POOL_TEMP_BUFFERS_SIZE / 3 }}}) { // avoid allocation when uploading few enough uniforms - var view = miniTempWebGLIntBuffers[3*count]; - for (var i = 0; i < 3*count; i += 3) { + count *= 3; + var view = miniTempWebGLIntBuffers[count]; + for (var i = 0; i < count; i += 3) { view[i] = {{{ makeGetValue('value', '4*i', 'i32') }}}; view[i+1] = {{{ makeGetValue('value', '4*i+4', 'i32') }}}; view[i+2] = {{{ makeGetValue('value', '4*i+8', 'i32') }}}; @@ -2584,8 +2587,9 @@ for (/**@suppress{duplicate}*/var i = 0; i <= {{{ GL_POOL_TEMP_BUFFERS_SIZE }}}; #if GL_POOL_TEMP_BUFFERS if (count <= {{{ GL_POOL_TEMP_BUFFERS_SIZE / 4 }}}) { // avoid allocation when uploading few enough uniforms - var view = miniTempWebGLIntBuffers[4*count]; - for (var i = 0; i < 4*count; i += 4) { + count *= 4; + var view = miniTempWebGLIntBuffers[count]; + for (var i = 0; i < count; i += 4) { view[i] = {{{ makeGetValue('value', '4*i', 'i32') }}}; view[i+1] = {{{ makeGetValue('value', '4*i+4', 'i32') }}}; view[i+2] = {{{ makeGetValue('value', '4*i+8', 'i32') }}}; @@ -2669,8 +2673,9 @@ for (/**@suppress{duplicate}*/var i = 0; i <= {{{ GL_POOL_TEMP_BUFFERS_SIZE }}}; #if GL_POOL_TEMP_BUFFERS if (count <= {{{ GL_POOL_TEMP_BUFFERS_SIZE / 2 }}}) { // avoid allocation when uploading few enough uniforms - var view = miniTempWebGLFloatBuffers[2*count]; - for (var i = 0; i < 2*count; i += 2) { + count *= 2; + var view = miniTempWebGLFloatBuffers[count]; + for (var i = 0; i < count; i += 2) { view[i] = {{{ makeGetValue('value', '4*i', 'float') }}}; view[i+1] = {{{ makeGetValue('value', '4*i+4', 'float') }}}; } @@ -2711,8 +2716,9 @@ for (/**@suppress{duplicate}*/var i = 0; i <= {{{ GL_POOL_TEMP_BUFFERS_SIZE }}}; #if GL_POOL_TEMP_BUFFERS if (count <= {{{ GL_POOL_TEMP_BUFFERS_SIZE / 3 }}}) { // avoid allocation when uploading few enough uniforms - var view = miniTempWebGLFloatBuffers[3*count]; - for (var i = 0; i < 3*count; i += 3) { + count *= 3; + var view = miniTempWebGLFloatBuffers[count]; + for (var i = 0; i < count; i += 3) { view[i] = {{{ makeGetValue('value', '4*i', 'float') }}}; view[i+1] = {{{ makeGetValue('value', '4*i+4', 'float') }}}; view[i+2] = {{{ makeGetValue('value', '4*i+8', 'float') }}}; @@ -2758,7 +2764,8 @@ for (/**@suppress{duplicate}*/var i = 0; i <= {{{ GL_POOL_TEMP_BUFFERS_SIZE }}}; // hoist the heap out of the loop for size and for pthreads+growth. var heap = HEAPF32; value = {{{ getHeapOffset('value', 'float') }}}; - for (var i = 0; i < 4 * count; i += 4) { + count *= 4; + for (var i = 0; i < count; i += 4) { var dst = value + i; view[i] = heap[dst]; view[i + 1] = heap[dst + 1]; @@ -2802,8 +2809,9 @@ for (/**@suppress{duplicate}*/var i = 0; i <= {{{ GL_POOL_TEMP_BUFFERS_SIZE }}}; #if GL_POOL_TEMP_BUFFERS if (count <= {{{ GL_POOL_TEMP_BUFFERS_SIZE / 4 }}}) { // avoid allocation when uploading few enough uniforms - var view = miniTempWebGLFloatBuffers[4*count]; - for (var i = 0; i < 4*count; i += 4) { + count *= 4; + var view = miniTempWebGLFloatBuffers[count]; + for (var i = 0; i < count; i += 4) { view[i] = {{{ makeGetValue('value', '4*i', 'float') }}}; view[i+1] = {{{ makeGetValue('value', '4*i+4', 'float') }}}; view[i+2] = {{{ makeGetValue('value', '4*i+8', 'float') }}}; @@ -2846,8 +2854,9 @@ for (/**@suppress{duplicate}*/var i = 0; i <= {{{ GL_POOL_TEMP_BUFFERS_SIZE }}}; #if GL_POOL_TEMP_BUFFERS if (count <= {{{ GL_POOL_TEMP_BUFFERS_SIZE / 9 }}}) { // avoid allocation when uploading few enough uniforms - var view = miniTempWebGLFloatBuffers[9*count]; - for (var i = 0; i < 9*count; i += 9) { + count *= 9; + var view = miniTempWebGLFloatBuffers[count]; + for (var i = 0; i < count; i += 9) { view[i] = {{{ makeGetValue('value', '4*i', 'float') }}}; view[i+1] = {{{ makeGetValue('value', '4*i+4', 'float') }}}; view[i+2] = {{{ makeGetValue('value', '4*i+8', 'float') }}}; @@ -2899,7 +2908,8 @@ for (/**@suppress{duplicate}*/var i = 0; i <= {{{ GL_POOL_TEMP_BUFFERS_SIZE }}}; // hoist the heap out of the loop for size and for pthreads+growth. var heap = HEAPF32; value = {{{ getHeapOffset('value', 'float') }}}; - for (var i = 0; i < 16 * count; i += 16) { + count *= 16; + for (var i = 0; i < count; i += 16) { var dst = value + i; view[i] = heap[dst]; view[i + 1] = heap[dst + 1]; @@ -3351,21 +3361,24 @@ for (/**@suppress{duplicate}*/var i = 0; i <= {{{ GL_POOL_TEMP_BUFFERS_SIZE }}}; {{{ makeSetValue('p', '0', 'log.length + 1', 'i32') }}}; } else if (pname == 0x8B87 /* GL_ACTIVE_UNIFORM_MAX_LENGTH */) { if (!program.maxUniformLength) { - for (var i = 0; i < GLctx.getProgramParameter(program, 0x8B86/*GL_ACTIVE_UNIFORMS*/); ++i) { + var numActiveUniforms = GLctx.getProgramParameter(program, 0x8B86/*GL_ACTIVE_UNIFORMS*/); + for (var i = 0; i < numActiveUniforms; ++i) { program.maxUniformLength = Math.max(program.maxUniformLength, GLctx.getActiveUniform(program, i).name.length+1); } } {{{ makeSetValue('p', '0', 'program.maxUniformLength', 'i32') }}}; } else if (pname == 0x8B8A /* GL_ACTIVE_ATTRIBUTE_MAX_LENGTH */) { if (!program.maxAttributeLength) { - for (var i = 0; i < GLctx.getProgramParameter(program, 0x8B89/*GL_ACTIVE_ATTRIBUTES*/); ++i) { + var numActiveAttributes = GLctx.getProgramParameter(program, 0x8B89/*GL_ACTIVE_ATTRIBUTES*/); + for (var i = 0; i < numActiveAttributes; ++i) { program.maxAttributeLength = Math.max(program.maxAttributeLength, GLctx.getActiveAttrib(program, i).name.length+1); } } {{{ makeSetValue('p', '0', 'program.maxAttributeLength', 'i32') }}}; } else if (pname == 0x8A35 /* GL_ACTIVE_UNIFORM_BLOCK_MAX_NAME_LENGTH */) { if (!program.maxUniformBlockNameLength) { - for (var i = 0; i < GLctx.getProgramParameter(program, 0x8A36/*GL_ACTIVE_UNIFORM_BLOCKS*/); ++i) { + var numActiveUniformBlocks = GLctx.getProgramParameter(program, 0x8A36/*GL_ACTIVE_UNIFORM_BLOCKS*/); + for (var i = 0; i < numActiveUniformBlocks; ++i) { program.maxUniformBlockNameLength = Math.max(program.maxUniformBlockNameLength, GLctx.getActiveUniformBlockName(program, i).length+1); } } diff --git a/test/code_size/embind_hello_wasm.json b/test/code_size/embind_hello_wasm.json index c64f5a38f2551..c2e009eb3b271 100644 --- a/test/code_size/embind_hello_wasm.json +++ b/test/code_size/embind_hello_wasm.json @@ -4,7 +4,7 @@ "a.js": 9920, "a.js.gz": 4354, "a.wasm": 7715, - "a.wasm.gz": 3512, + "a.wasm.gz": 3508, "total": 18187, - "total_gz": 8246 + "total_gz": 8242 } diff --git a/test/code_size/hello_webgl2_wasm.json b/test/code_size/hello_webgl2_wasm.json index 416bb2efe4761..636c8cd9c491f 100644 --- a/test/code_size/hello_webgl2_wasm.json +++ b/test/code_size/hello_webgl2_wasm.json @@ -1,10 +1,10 @@ { "a.html": 454, "a.html.gz": 328, - "a.js": 4521, - "a.js.gz": 2308, + "a.js": 4531, + "a.js.gz": 2312, "a.wasm": 10399, "a.wasm.gz": 6695, - "total": 15374, - "total_gz": 9331 + "total": 15384, + "total_gz": 9335 } diff --git a/test/code_size/hello_webgl2_wasm2js.json b/test/code_size/hello_webgl2_wasm2js.json index 564f31e6457b3..b609a374c3632 100644 --- a/test/code_size/hello_webgl2_wasm2js.json +++ b/test/code_size/hello_webgl2_wasm2js.json @@ -1,8 +1,8 @@ { "a.html": 346, "a.html.gz": 262, - "a.js": 22193, - "a.js.gz": 11583, - "total": 22539, - "total_gz": 11845 + "a.js": 22203, + "a.js.gz": 11588, + "total": 22549, + "total_gz": 11850 } diff --git a/test/code_size/hello_webgl_wasm.json b/test/code_size/hello_webgl_wasm.json index c787832cff4e7..bba6cc02b6b02 100644 --- a/test/code_size/hello_webgl_wasm.json +++ b/test/code_size/hello_webgl_wasm.json @@ -1,10 +1,10 @@ { "a.html": 454, "a.html.gz": 328, - "a.js": 4059, - "a.js.gz": 2153, + "a.js": 4069, + "a.js.gz": 2158, "a.wasm": 10399, "a.wasm.gz": 6695, - "total": 14912, - "total_gz": 9176 + "total": 14922, + "total_gz": 9181 } diff --git a/test/code_size/hello_webgl_wasm2js.json b/test/code_size/hello_webgl_wasm2js.json index a3be5003962e8..62a2a23ab7664 100644 --- a/test/code_size/hello_webgl_wasm2js.json +++ b/test/code_size/hello_webgl_wasm2js.json @@ -1,8 +1,8 @@ { "a.html": 346, "a.html.gz": 262, - "a.js": 21719, - "a.js.gz": 11419, - "total": 22065, - "total_gz": 11681 + "a.js": 21729, + "a.js.gz": 11423, + "total": 22075, + "total_gz": 11685 } diff --git a/test/code_size/math_wasm.json b/test/code_size/math_wasm.json index cefd1f1d7477e..5328e0087d307 100644 --- a/test/code_size/math_wasm.json +++ b/test/code_size/math_wasm.json @@ -4,7 +4,7 @@ "a.js": 110, "a.js.gz": 125, "a.wasm": 2719, - "a.wasm.gz": 1674, + "a.wasm.gz": 1673, "total": 3381, - "total_gz": 2179 + "total_gz": 2178 } diff --git a/test/code_size/random_printf_wasm.json b/test/code_size/random_printf_wasm.json index bcfa0afce45db..36ba7bcb64a0a 100644 --- a/test/code_size/random_printf_wasm.json +++ b/test/code_size/random_printf_wasm.json @@ -1,6 +1,6 @@ { "a.html": 12690, - "a.html.gz": 6857, + "a.html.gz": 6855, "total": 12690, - "total_gz": 6857 + "total_gz": 6855 } diff --git a/test/code_size/random_printf_wasm2js.json b/test/code_size/random_printf_wasm2js.json index 1074f7b62fb46..09b30713f23f0 100644 --- a/test/code_size/random_printf_wasm2js.json +++ b/test/code_size/random_printf_wasm2js.json @@ -1,6 +1,6 @@ { "a.html": 17277, - "a.html.gz": 7489, + "a.html.gz": 7486, "total": 17277, - "total_gz": 7489 + "total_gz": 7486 }