From 6560077508ae3c79fbe9ba1e2f90ec6f5e610db8 Mon Sep 17 00:00:00 2001 From: Muhammad Haris <101793258+headlessNode@users.noreply.github.com> Date: Mon, 20 Jan 2025 04:02:15 +0500 Subject: [PATCH] feat: add C `ndarray` API and refactor `blas/ext/base/scusumpw` PR-URL: https://github.com/stdlib-js/stdlib/pull/4814 Co-authored-by: Athan Reines Reviewed-by: Athan Reines --- .../@stdlib/blas/ext/base/scusumpw/README.md | 152 ++++++++++++++++-- .../ext/base/scusumpw/benchmark/benchmark.js | 12 +- .../scusumpw/benchmark/benchmark.native.js | 12 +- .../scusumpw/benchmark/benchmark.ndarray.js | 12 +- .../benchmark/benchmark.ndarray.native.js | 12 +- .../scusumpw/benchmark/c/benchmark.length.c | 49 +++++- .../blas/ext/base/scusumpw/docs/repl.txt | 12 +- .../ext/base/scusumpw/docs/types/index.d.ts | 12 +- .../ext/base/scusumpw/examples/c/example.c | 16 +- .../blas/ext/base/scusumpw/examples/index.js | 14 +- .../include/stdlib/blas/ext/base/scusumpw.h | 9 +- .../blas/ext/base/scusumpw/lib/index.js | 7 +- .../blas/ext/base/scusumpw/lib/ndarray.js | 8 +- .../ext/base/scusumpw/lib/ndarray.native.js | 23 +-- .../blas/ext/base/scusumpw/lib/scusumpw.js | 28 +--- .../ext/base/scusumpw/lib/scusumpw.native.js | 7 +- .../blas/ext/base/scusumpw/manifest.json | 32 ++-- .../blas/ext/base/scusumpw/src/addon.c | 26 ++- .../@stdlib/blas/ext/base/scusumpw/src/main.c | 94 +++++++++++ .../blas/ext/base/scusumpw/src/scusumpw.c | 94 ----------- 20 files changed, 400 insertions(+), 231 deletions(-) create mode 100644 lib/node_modules/@stdlib/blas/ext/base/scusumpw/src/main.c delete mode 100644 lib/node_modules/@stdlib/blas/ext/base/scusumpw/src/scusumpw.c diff --git a/lib/node_modules/@stdlib/blas/ext/base/scusumpw/README.md b/lib/node_modules/@stdlib/blas/ext/base/scusumpw/README.md index 99de471245d3..58e25d7518a5 100644 --- a/lib/node_modules/@stdlib/blas/ext/base/scusumpw/README.md +++ b/lib/node_modules/@stdlib/blas/ext/base/scusumpw/README.md @@ -61,11 +61,11 @@ The function has the following parameters: - **N**: number of indexed elements. - **sum**: initial sum. - **x**: input [`Float32Array`][@stdlib/array/float32]. -- **strideX**: index increment for `x`. +- **strideX**: stride length for `x`. - **y**: output [`Float32Array`][@stdlib/array/float32]. -- **strideY**: index increment for `y`. +- **strideY**: stride length for `y`. -The `N` and `stride` parameters determine which elements in the strided arrays are accessed at runtime. For example, to compute the cumulative sum of every other element in `x`, +The `N` and stride parameters determine which elements in the strided arrays are accessed at runtime. For example, to compute the cumulative sum of every other element: ```javascript var Float32Array = require( '@stdlib/array/float32' ); @@ -115,7 +115,7 @@ The function has the following additional parameters: - **offsetX**: starting index for `x`. - **offsetY**: starting index for `y`. -While [`typed array`][mdn-typed-array] views mandate a view offset based on the underlying `buffer`, `offsetX` and `offsetY` parameters support indexing semantics based on a starting indices. For example, to calculate the cumulative sum of every other value in `x` starting from the second value and to store in the last `N` elements of `y` starting from the last element +While [`typed array`][mdn-typed-array] views mandate a view offset based on the underlying buffer, the offset parameters support indexing semantics based on starting indices. For example, to calculate the cumulative sum of every other element starting from the second element and to store in the last `N` elements of `y` starting from the last element: ```javascript var Float32Array = require( '@stdlib/array/float32' ); @@ -149,15 +149,17 @@ scusumpw.ndarray( 4, 0.0, x, 2, 1, y, -1, y.length-1 ); ```javascript -var discreteUniform = require( '@stdlib/random/base/discrete-uniform' ).factory; -var filledarrayBy = require( '@stdlib/array/filled-by' ); -var Float32Array = require( '@stdlib/array/float32' ); +var discreteUniform = require( '@stdlib/random/array/discrete-uniform' ); var scusumpw = require( '@stdlib/blas/ext/base/scusumpw' ); -var x = filledarrayBy( 10, 'float32', discreteUniform( 0, 100 ) ); -var y = new Float32Array( x.length ); - +var x = discreteUniform( 10, -100, 100, { + 'dtype': 'float32' +}); console.log( x ); + +var y = discreteUniform( 10, -100, 100, { + 'dtype': 'float32' +}); console.log( y ); scusumpw( x.length, 0.0, x, 1, y, -1 ); @@ -168,8 +170,138 @@ console.log( y ); + + * * * +
+ +## C APIs + + + +
+ +
+ + + + + +
+ +### Usage + +```c +#include "stdlib/blas/ext/base/scusumpw.h" +``` + +#### stdlib_strided_scusumpw( N, sum, \*X, strideX, \*Y, strideY ) + +Computes the cumulative sum of single-precision floating-point strided array elements using pairwise summation. + +```c +const float x[] = { 1.0f, 2.0f, 3.0f, 4.0f } +float y[] = { 0.0f, 0.0f, 0.0f, 0.0f } + +stdlib_strided_scusumpw( 4, 0.0f, x, 1, y, 1 ); +``` + +The function accepts the following arguments: + +- **N**: `[in] CBLAS_INT` number of indexed elements. +- **sum**: `[in] float` initial sum. +- **X**: `[in] float*` input array. +- **strideX**: `[in] CBLAS_INT` stride length for `X`. +- **Y**: `[out] float*` output array. +- **strideY**: `[in] CBLAS_INT` stride length for `Y`. + +```c +void stdlib_strided_scusumpw( const CBLAS_INT N, const float sum, const float *X, const CBLAS_INT strideX, float *Y, const CBLAS_INT strideY ); +``` + + + +#### stdlib_strided_scusumpw_ndarray( N, sum, \*X, strideX, offsetX, \*Y, strideY, offsetY ) + + + +Computes the cumulative sum of single-precision floating-point strided array elements using pairwise summation and alternative indexing semantics. + +```c +const float x[] = { 1.0f, 2.0f, 3.0f, 4.0f } +float y[] = { 0.0f, 0.0f, 0.0f, 0.0f } + +stdlib_strided_scusumpw_ndarray( 4, 0.0f, x, 1, 0, y, 1, 0 ); +``` + +The function accepts the following arguments: + +- **N**: `[in] CBLAS_INT` number of indexed elements. +- **sum**: `[in] float` initial sum. +- **X**: `[in] float*` input array. +- **strideX**: `[in] CBLAS_INT` stride length for `X`. +- **offsetX**: `[in] CBLAS_INT` starting index for `X`. +- **Y**: `[out] float*` output array. +- **strideY**: `[in] CBLAS_INT` stride length for `Y`. +- **offsetY**: `[in] CBLAS_INT` starting index for `Y`. + +```c +void stdlib_strided_scusumpw_ndarray( const CBLAS_INT N, const float sum, const float *X, const CBLAS_INT strideX, const CBLAS_INT offsetX, float *Y, const CBLAS_INT strideY, const CBLAS_INT offsetY ); +``` + +
+ + + + + +
+ +
+ + + + + +
+ +### Examples + +```c +#include "stdlib/blas/ext/base/scusumpw.h" +#include + +int main( void ) { + // Create strided arrays: + const float x[] = { 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f }; + float y[] = { 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f }; + + // Specify the number of elements: + const int N = 4; + + // Specify stride lengths: + const int strideX = 2; + const int strideY = -2; + + // Compute the cumulative sum: + stdlib_strided_scusumpw( N, 0.0f, x, strideX, y, strideY ); + + // Print the result: + for ( int i = 0; i < 8; i++ ) { + printf( "y[ %d ] = %f\n", i, y[ i ] ); + } +} +``` + +
+ + + +
+ + +
## References diff --git a/lib/node_modules/@stdlib/blas/ext/base/scusumpw/benchmark/benchmark.js b/lib/node_modules/@stdlib/blas/ext/base/scusumpw/benchmark/benchmark.js index 9140d18b1123..5ab230748e21 100644 --- a/lib/node_modules/@stdlib/blas/ext/base/scusumpw/benchmark/benchmark.js +++ b/lib/node_modules/@stdlib/blas/ext/base/scusumpw/benchmark/benchmark.js @@ -21,18 +21,18 @@ // MODULES // var bench = require( '@stdlib/bench' ); -var uniform = require( '@stdlib/random/base/uniform' ).factory; -var filledarrayBy = require( '@stdlib/array/filled-by' ); +var uniform = require( '@stdlib/random/array/uniform' ); var isnanf = require( '@stdlib/math/base/assert/is-nanf' ); var pow = require( '@stdlib/math/base/special/pow' ); -var Float32Array = require( '@stdlib/array/float32' ); var pkg = require( './../package.json' ).name; var scusumpw = require( './../lib/scusumpw.js' ); // VARIABLES // -var rand = uniform( -10.0, 10.0 ); +var options = { + 'dtype': 'float32' +}; // FUNCTIONS // @@ -45,8 +45,8 @@ var rand = uniform( -10.0, 10.0 ); * @returns {Function} benchmark function */ function createBenchmark( len ) { - var y = new Float32Array( len ); - var x = filledarrayBy( len, 'float32', rand ); + var x = uniform( len, -100, 100, options ); + var y = uniform( len, -100, 100, options ); return benchmark; diff --git a/lib/node_modules/@stdlib/blas/ext/base/scusumpw/benchmark/benchmark.native.js b/lib/node_modules/@stdlib/blas/ext/base/scusumpw/benchmark/benchmark.native.js index e63e3363000e..60d651aced41 100644 --- a/lib/node_modules/@stdlib/blas/ext/base/scusumpw/benchmark/benchmark.native.js +++ b/lib/node_modules/@stdlib/blas/ext/base/scusumpw/benchmark/benchmark.native.js @@ -22,11 +22,9 @@ var resolve = require( 'path' ).resolve; var bench = require( '@stdlib/bench' ); -var uniform = require( '@stdlib/random/base/uniform' ).factory; -var filledarrayBy = require( '@stdlib/array/filled-by' ); +var uniform = require( '@stdlib/random/array/uniform' ); var isnanf = require( '@stdlib/math/base/assert/is-nanf' ); var pow = require( '@stdlib/math/base/special/pow' ); -var Float32Array = require( '@stdlib/array/float32' ); var tryRequire = require( '@stdlib/utils/try-require' ); var pkg = require( './../package.json' ).name; @@ -37,7 +35,9 @@ var scusumpw = tryRequire( resolve( __dirname, './../lib/scusumpw.native.js' ) ) var opts = { 'skip': ( scusumpw instanceof Error ) }; -var rand = uniform( -10.0, 10.0 ); +var options = { + 'dtype': 'float32' +}; // FUNCTIONS // @@ -50,8 +50,8 @@ var rand = uniform( -10.0, 10.0 ); * @returns {Function} benchmark function */ function createBenchmark( len ) { - var x = filledarrayBy( len, 'float32', rand ); - var y = new Float32Array( len ); + var x = uniform( len, -100, 100, options ); + var y = uniform( len, -100, 100, options ); return benchmark; diff --git a/lib/node_modules/@stdlib/blas/ext/base/scusumpw/benchmark/benchmark.ndarray.js b/lib/node_modules/@stdlib/blas/ext/base/scusumpw/benchmark/benchmark.ndarray.js index 0a6f1bc6c736..cf51c329ec9b 100644 --- a/lib/node_modules/@stdlib/blas/ext/base/scusumpw/benchmark/benchmark.ndarray.js +++ b/lib/node_modules/@stdlib/blas/ext/base/scusumpw/benchmark/benchmark.ndarray.js @@ -21,18 +21,18 @@ // MODULES // var bench = require( '@stdlib/bench' ); -var uniform = require( '@stdlib/random/base/uniform' ).factory; -var filledarrayBy = require( '@stdlib/array/filled-by' ); +var uniform = require( '@stdlib/random/array/uniform' ); var isnanf = require( '@stdlib/math/base/assert/is-nanf' ); var pow = require( '@stdlib/math/base/special/pow' ); -var Float32Array = require( '@stdlib/array/float32' ); var pkg = require( './../package.json' ).name; var scusumpw = require( './../lib/ndarray.js' ); // VARIABLES // -var rand = uniform( -10.0, 10.0 ); +var options = { + 'dtype': 'float32' +}; // FUNCTIONS // @@ -45,8 +45,8 @@ var rand = uniform( -10.0, 10.0 ); * @returns {Function} benchmark function */ function createBenchmark( len ) { - var x = filledarrayBy( len, 'float32', rand ); - var y = new Float32Array( len ); + var x = uniform( len, -100, 100, options ); + var y = uniform( len, -100, 100, options ); return benchmark; diff --git a/lib/node_modules/@stdlib/blas/ext/base/scusumpw/benchmark/benchmark.ndarray.native.js b/lib/node_modules/@stdlib/blas/ext/base/scusumpw/benchmark/benchmark.ndarray.native.js index 0a386bb1269e..635f00179c15 100644 --- a/lib/node_modules/@stdlib/blas/ext/base/scusumpw/benchmark/benchmark.ndarray.native.js +++ b/lib/node_modules/@stdlib/blas/ext/base/scusumpw/benchmark/benchmark.ndarray.native.js @@ -22,11 +22,9 @@ var resolve = require( 'path' ).resolve; var bench = require( '@stdlib/bench' ); -var uniform = require( '@stdlib/random/base/uniform' ).factory; -var filledarrayBy = require( '@stdlib/array/filled-by' ); +var uniform = require( '@stdlib/random/array/uniform' ); var isnanf = require( '@stdlib/math/base/assert/is-nanf' ); var pow = require( '@stdlib/math/base/special/pow' ); -var Float32Array = require( '@stdlib/array/float32' ); var tryRequire = require( '@stdlib/utils/try-require' ); var pkg = require( './../package.json' ).name; @@ -37,7 +35,9 @@ var scusumpw = tryRequire( resolve( __dirname, './../lib/ndarray.native.js' ) ); var opts = { 'skip': ( scusumpw instanceof Error ) }; -var rand = uniform( -10.0, 10.0 ); +var options = { + 'dtype': 'float32' +}; // FUNCTIONS // @@ -50,8 +50,8 @@ var rand = uniform( -10.0, 10.0 ); * @returns {Function} benchmark function */ function createBenchmark( len ) { - var x = filledarrayBy( len, 'float32', rand ); - var y = new Float32Array( len ); + var x = uniform( len, -100, 100, options ); + var y = uniform( len, -100, 100, options ); return benchmark; diff --git a/lib/node_modules/@stdlib/blas/ext/base/scusumpw/benchmark/c/benchmark.length.c b/lib/node_modules/@stdlib/blas/ext/base/scusumpw/benchmark/c/benchmark.length.c index 1ae836b58c1c..bd11818f8375 100644 --- a/lib/node_modules/@stdlib/blas/ext/base/scusumpw/benchmark/c/benchmark.length.c +++ b/lib/node_modules/@stdlib/blas/ext/base/scusumpw/benchmark/c/benchmark.length.c @@ -94,7 +94,7 @@ static float rand_float( void ) { * @param len array length * @return elapsed time in seconds */ -static double benchmark( int iterations, int len ) { +static double benchmark1( int iterations, int len ) { double elapsed; float x[ len ]; float y[ len ]; @@ -121,6 +121,40 @@ static double benchmark( int iterations, int len ) { return elapsed; } +/** +* Runs a benchmark. +* +* @param iterations number of iterations +* @param len array length +* @return elapsed time in seconds +*/ +static double benchmark2( int iterations, int len ) { + double elapsed; + float x[ len ]; + float y[ len ]; + double t; + int i; + + for ( i = 0; i < len; i++ ) { + x[ i ] = ( rand_float() * 20000.0f ) - 10000.0f; + y[ i ] = 0.0f; + } + t = tic(); + for ( i = 0; i < iterations; i++ ) { + x[ 0 ] += 1.0f; + stdlib_strided_scusumpw_ndarray( len, 0.0f, x, 1, 0, y, 1, 0 ); + if ( y[ 0 ] != y[ 0 ] ) { + printf( "should not return NaN\n" ); + break; + } + } + elapsed = tic() - t; + if ( y[ len-1 ] != y[ len-1 ] ) { + printf( "should not return NaN\n" ); + } + return elapsed; +} + /** * Main execution sequence. */ @@ -143,7 +177,18 @@ int main( void ) { for ( j = 0; j < REPEATS; j++ ) { count += 1; printf( "# c::%s:len=%d\n", NAME, len ); - elapsed = benchmark( iter, len ); + elapsed = benchmark1( iter, len ); + print_results( iter, elapsed ); + printf( "ok %d benchmark finished\n", count ); + } + } + for ( i = MIN; i <= MAX; i++ ) { + len = pow( 10, i ); + iter = ITERATIONS / pow( 10, i-1 ); + for ( j = 0; j < REPEATS; j++ ) { + count += 1; + printf( "# c::%s:ndarray:len=%d\n", NAME, len ); + elapsed = benchmark2( iter, len ); print_results( iter, elapsed ); printf( "ok %d benchmark finished\n", count ); } diff --git a/lib/node_modules/@stdlib/blas/ext/base/scusumpw/docs/repl.txt b/lib/node_modules/@stdlib/blas/ext/base/scusumpw/docs/repl.txt index 70e9f3b55348..1088423ebe42 100644 --- a/lib/node_modules/@stdlib/blas/ext/base/scusumpw/docs/repl.txt +++ b/lib/node_modules/@stdlib/blas/ext/base/scusumpw/docs/repl.txt @@ -23,13 +23,13 @@ Input array. strideX: integer - Index increment for `x`. + Stride length for `x`. y: Float32Array Output array. strideY: integer - Index increment for `y`. + Stride length for `y`. Returns ------- @@ -66,8 +66,8 @@ elements using pairwise summation and alternative indexing semantics. While typed array views mandate a view offset based on the underlying - buffer, the `offset` parameter supports indexing semantics based on a - starting index. + buffer, the offset parameter supports indexing semantics based on a starting + index. Parameters ---------- @@ -81,7 +81,7 @@ Input array. strideX: integer - Index increment for `x`. + Stride length for `x`. offsetX: integer Starting index for `x`. @@ -90,7 +90,7 @@ Output array. strideY: integer - Index increment for `y`. + Stride length for `y`. offsetY: integer Starting index for `y`. diff --git a/lib/node_modules/@stdlib/blas/ext/base/scusumpw/docs/types/index.d.ts b/lib/node_modules/@stdlib/blas/ext/base/scusumpw/docs/types/index.d.ts index ca8d5f1d11e6..d14a9db7b98b 100644 --- a/lib/node_modules/@stdlib/blas/ext/base/scusumpw/docs/types/index.d.ts +++ b/lib/node_modules/@stdlib/blas/ext/base/scusumpw/docs/types/index.d.ts @@ -28,9 +28,9 @@ interface Routine { * @param N - number of indexed elements * @param sum - initial sum * @param x - input array - * @param strideX - `x` stride length + * @param strideX - stride length for `x` * @param y - output array - * @param strideY - `y` stride length + * @param strideY - stride length for `y` * @returns output array * * @example @@ -50,10 +50,10 @@ interface Routine { * @param N - number of indexed elements * @param sum - initial sum * @param x - input array - * @param strideX - `x` stride length + * @param strideX - stride length for `x` * @param offsetX - starting index for `x` * @param y - output array - * @param strideY - `y` stride length + * @param strideY - stride length for `y` * @param offsetY - starting index for `y` * @returns output array * @@ -75,9 +75,9 @@ interface Routine { * @param N - number of indexed elements * @param sum - initial sum * @param x - input array -* @param strideX - `x` stride length +* @param strideX - stride length for `x` * @param y - output array -* @param strideY - `y` stride length +* @param strideY - stride length for `y` * @returns output array * * @example diff --git a/lib/node_modules/@stdlib/blas/ext/base/scusumpw/examples/c/example.c b/lib/node_modules/@stdlib/blas/ext/base/scusumpw/examples/c/example.c index de0dde11533f..69cf517ecd18 100644 --- a/lib/node_modules/@stdlib/blas/ext/base/scusumpw/examples/c/example.c +++ b/lib/node_modules/@stdlib/blas/ext/base/scusumpw/examples/c/example.c @@ -17,27 +17,25 @@ */ #include "stdlib/blas/ext/base/scusumpw.h" -#include #include -#include int main( void ) { // Create strided arrays: - const float x[] = { 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0 }; - float y[] = { 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 }; + const float x[] = { 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f }; + float y[] = { 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f }; // Specify the number of elements: - const int64_t N = 4; + const int N = 4; // Specify stride lengths: - const int64_t strideX = 2; - const int64_t strideY = -2; + const int strideX = 2; + const int strideY = -2; // Compute the cumulative sum: stdlib_strided_scusumpw( N, 0.0f, x, strideX, y, strideY ); // Print the result: - for ( int64_t i = 0; i < 8; i++ ) { - printf( "y[ %"PRId64" ] = %f\n", i, y[ i ] ); + for ( int i = 0; i < 8; i++ ) { + printf( "y[ %d ] = %f\n", i, y[ i ] ); } } diff --git a/lib/node_modules/@stdlib/blas/ext/base/scusumpw/examples/index.js b/lib/node_modules/@stdlib/blas/ext/base/scusumpw/examples/index.js index 64d24e25349a..7725d88e1b85 100644 --- a/lib/node_modules/@stdlib/blas/ext/base/scusumpw/examples/index.js +++ b/lib/node_modules/@stdlib/blas/ext/base/scusumpw/examples/index.js @@ -18,15 +18,17 @@ 'use strict'; -var discreteUniform = require( '@stdlib/random/base/discrete-uniform' ).factory; -var filledarrayBy = require( '@stdlib/array/filled-by' ); -var Float32Array = require( '@stdlib/array/float32' ); +var discreteUniform = require( '@stdlib/random/array/discrete-uniform' ); var scusumpw = require( './../lib' ); -var x = filledarrayBy( 10, 'float32', discreteUniform( 0, 100 ) ); -var y = new Float32Array( x.length ); - +var x = discreteUniform( 10, -100, 100, { + 'dtype': 'float32' +}); console.log( x ); + +var y = discreteUniform( 10, -100, 100, { + 'dtype': 'float32' +}); console.log( y ); scusumpw( x.length, 0.0, x, 1, y, -1 ); diff --git a/lib/node_modules/@stdlib/blas/ext/base/scusumpw/include/stdlib/blas/ext/base/scusumpw.h b/lib/node_modules/@stdlib/blas/ext/base/scusumpw/include/stdlib/blas/ext/base/scusumpw.h index acbf3964a90b..293e2f54115b 100644 --- a/lib/node_modules/@stdlib/blas/ext/base/scusumpw/include/stdlib/blas/ext/base/scusumpw.h +++ b/lib/node_modules/@stdlib/blas/ext/base/scusumpw/include/stdlib/blas/ext/base/scusumpw.h @@ -19,7 +19,7 @@ #ifndef STDLIB_BLAS_EXT_BASE_SCUSUMPW_H #define STDLIB_BLAS_EXT_BASE_SCUSUMPW_H -#include +#include "stdlib/blas/base/shared.h" /* * If C++, prevent name mangling so that the compiler emits a binary file having undecorated names, thus mirroring the behavior of a C compiler. @@ -31,7 +31,12 @@ extern "C" { /** * Computes the cumulative sum of single-precision floating-point strided array elements using pairwise summation. */ -void stdlib_strided_scusumpw( const int64_t N, const float sum, const float *X, const int64_t strideX, float *Y, const int64_t strideY ); +void API_SUFFIX(stdlib_strided_scusumpw)( const CBLAS_INT N, const float sum, const float *X, const CBLAS_INT strideX, float *Y, const CBLAS_INT strideY ); + +/** +* Computes the cumulative sum of single-precision floating-point strided array elements using pairwise summation and alternative indexing semantics. +*/ +void API_SUFFIX(stdlib_strided_scusumpw_ndarray)( const CBLAS_INT N, const float sum, const float *X, const CBLAS_INT strideX, const CBLAS_INT offsetX, float *Y, const CBLAS_INT strideY, const CBLAS_INT offsetY ); #ifdef __cplusplus } diff --git a/lib/node_modules/@stdlib/blas/ext/base/scusumpw/lib/index.js b/lib/node_modules/@stdlib/blas/ext/base/scusumpw/lib/index.js index 3658a03afb6e..fddfdca7af09 100644 --- a/lib/node_modules/@stdlib/blas/ext/base/scusumpw/lib/index.js +++ b/lib/node_modules/@stdlib/blas/ext/base/scusumpw/lib/index.js @@ -29,21 +29,18 @@ * * var x = new Float32Array( [ 1.0, -2.0, 2.0 ] ); * var y = new Float32Array( x.length ); -* var N = x.length; * -* scusumpw( N, 0.0, x, 1, y, 1 ); +* scusumpw( x.length, 0.0, x, 1, y, 1 ); * // y => [ 1.0, -1.0, 1.0 ] * * @example * var Float32Array = require( '@stdlib/array/float32' ); -* var floor = require( '@stdlib/math/base/special/floor' ); * var scusumpw = require( '@stdlib/blas/ext/base/scusumpw' ); * * var x = new Float32Array( [ 2.0, 1.0, 2.0, -2.0, -2.0, 2.0, 3.0, 4.0 ] ); * var y = new Float32Array( x.length ); -* var N = floor( x.length / 2 ); * -* scusumpw.ndarray( N, 0.0, x, 2, 1, y, 1, 0 ); +* scusumpw.ndarray( 4, 0.0, x, 2, 1, y, 1, 0 ); * // y => [ 1.0, -1.0, 1.0, 5.0, 0.0, 0.0, 0.0, 0.0 ] */ diff --git a/lib/node_modules/@stdlib/blas/ext/base/scusumpw/lib/ndarray.js b/lib/node_modules/@stdlib/blas/ext/base/scusumpw/lib/ndarray.js index e162d7d78f4b..d0eb097dd91d 100644 --- a/lib/node_modules/@stdlib/blas/ext/base/scusumpw/lib/ndarray.js +++ b/lib/node_modules/@stdlib/blas/ext/base/scusumpw/lib/ndarray.js @@ -46,22 +46,20 @@ var BLOCKSIZE = 128; * @param {PositiveInteger} N - number of indexed elements * @param {number} sum - initial sum * @param {Float32Array} x - input array -* @param {integer} strideX - `x` stride length +* @param {integer} strideX - stride length for `x` * @param {NonNegativeInteger} offsetX - starting index for `x` * @param {Float32Array} y - output array -* @param {integer} strideY - `y` stride length +* @param {integer} strideY - stride length for `y` * @param {NonNegativeInteger} offsetY - starting index for `y` * @returns {Float32Array} output array * * @example * var Float32Array = require( '@stdlib/array/float32' ); -* var floor = require( '@stdlib/math/base/special/floor' ); * * var x = new Float32Array( [ 2.0, 1.0, 2.0, -2.0, -2.0, 2.0, 3.0, 4.0 ] ); * var y = new Float32Array( x.length ); -* var N = floor( x.length / 2 ); * -* var v = scusumpw( N, 0.0, x, 2, 1, y, 1, 0 ); +* var v = scusumpw( 4, 0.0, x, 2, 1, y, 1, 0 ); * // returns [ 1.0, -1.0, 1.0, 5.0, 0.0, 0.0, 0.0, 0.0 ] */ function scusumpw( N, sum, x, strideX, offsetX, y, strideY, offsetY ) { diff --git a/lib/node_modules/@stdlib/blas/ext/base/scusumpw/lib/ndarray.native.js b/lib/node_modules/@stdlib/blas/ext/base/scusumpw/lib/ndarray.native.js index a5c83d5f57fe..11a4475dcbe0 100644 --- a/lib/node_modules/@stdlib/blas/ext/base/scusumpw/lib/ndarray.native.js +++ b/lib/node_modules/@stdlib/blas/ext/base/scusumpw/lib/ndarray.native.js @@ -20,9 +20,7 @@ // MODULES // -var minViewBufferIndex = require( '@stdlib/strided/base/min-view-buffer-index' ); -var offsetView = require( '@stdlib/strided/base/offset-view' ); -var addon = require( './scusumpw.native.js' ); +var addon = require( './../src/addon.node' ); // MAIN // @@ -33,35 +31,24 @@ var addon = require( './scusumpw.native.js' ); * @param {PositiveInteger} N - number of indexed elements * @param {number} sum - initial sum * @param {Float32Array} x - input array -* @param {integer} strideX - `x` stride length +* @param {integer} strideX - stride length for `x` * @param {NonNegativeInteger} offsetX - starting index for `x` * @param {Float32Array} y - output array -* @param {integer} strideY - `y` stride length +* @param {integer} strideY - stride length for `y` * @param {NonNegativeInteger} offsetY - starting index for `y` * @returns {Float32Array} output array * * @example * var Float32Array = require( '@stdlib/array/float32' ); -* var floor = require( '@stdlib/math/base/special/floor' ); * * var x = new Float32Array( [ 2.0, 1.0, 2.0, -2.0, -2.0, 2.0, 3.0, 4.0 ] ); * var y = new Float32Array( x.length ); -* var N = floor( x.length / 2 ); * -* var v = scusumpw( N, 0.0, x, 2, 1, y, 1, 0 ); +* var v = scusumpw( 4, 0.0, x, 2, 1, y, 1, 0 ); * // returns [ 1.0, -1.0, 1.0, 5.0, 0.0, 0.0, 0.0, 0.0 ] */ function scusumpw( N, sum, x, strideX, offsetX, y, strideY, offsetY ) { - var viewX; - var viewY; - - offsetX = minViewBufferIndex( N, strideX, offsetX ); - offsetY = minViewBufferIndex( N, strideY, offsetY ); - - viewX = offsetView( x, offsetX ); - viewY = offsetView( y, offsetY ); - - addon( N, sum, viewX, strideX, viewY, strideY ); + addon.ndarray( N, sum, x, strideX, offsetX, y, strideY, offsetY ); return y; } diff --git a/lib/node_modules/@stdlib/blas/ext/base/scusumpw/lib/scusumpw.js b/lib/node_modules/@stdlib/blas/ext/base/scusumpw/lib/scusumpw.js index 5d08496a20f2..07fc0ce8bd9c 100644 --- a/lib/node_modules/@stdlib/blas/ext/base/scusumpw/lib/scusumpw.js +++ b/lib/node_modules/@stdlib/blas/ext/base/scusumpw/lib/scusumpw.js @@ -20,7 +20,8 @@ // MODULES // -var cusum = require( './ndarray.js' ); +var stride2offset = require( '@stdlib/strided/base/stride2offset' ); +var ndarray = require( './ndarray.js' ); // MAIN // @@ -39,9 +40,9 @@ var cusum = require( './ndarray.js' ); * @param {PositiveInteger} N - number of indexed elements * @param {number} sum - initial sum * @param {Float32Array} x - input array -* @param {integer} strideX - `x` stride length +* @param {integer} strideX - stride length for `x` * @param {Float32Array} y - output array -* @param {integer} strideY - `y` stride length +* @param {integer} strideY - stride length for `y` * @returns {Float32Array} output array * * @example @@ -49,29 +50,12 @@ var cusum = require( './ndarray.js' ); * * var x = new Float32Array( [ 1.0, -2.0, 2.0 ] ); * var y = new Float32Array( x.length ); -* var N = x.length; * -* var v = scusumpw( N, 0.0, x, 1, y, 1 ); +* var v = scusumpw( x.length, 0.0, x, 1, y, 1 ); * // returns [ 1.0, -1.0, 1.0 ] */ function scusumpw( N, sum, x, strideX, y, strideY ) { - var ix; - var iy; - - if ( N <= 0 ) { - return y; - } - if ( strideX < 0 ) { - ix = (1-N) * strideX; - } else { - ix = 0; - } - if ( strideY < 0 ) { - iy = (1-N) * strideY; - } else { - iy = 0; - } - return cusum( N, sum, x, strideX, ix, y, strideY, iy ); + return ndarray( N, sum, x, strideX, stride2offset( N, strideX ), y, strideY, stride2offset( N, strideY ) ); // eslint-disable-line max-len } diff --git a/lib/node_modules/@stdlib/blas/ext/base/scusumpw/lib/scusumpw.native.js b/lib/node_modules/@stdlib/blas/ext/base/scusumpw/lib/scusumpw.native.js index e5d423a852a5..ce47e2ba9b93 100644 --- a/lib/node_modules/@stdlib/blas/ext/base/scusumpw/lib/scusumpw.native.js +++ b/lib/node_modules/@stdlib/blas/ext/base/scusumpw/lib/scusumpw.native.js @@ -31,9 +31,9 @@ var addon = require( './../src/addon.node' ); * @param {PositiveInteger} N - number of indexed elements * @param {number} sum - initial sum * @param {Float32Array} x - input array -* @param {integer} strideX - `x` stride length +* @param {integer} strideX - stride length for `x` * @param {Float32Array} y - output array -* @param {integer} strideY - `y` stride length +* @param {integer} strideY - stride length for `y` * @returns {Float32Array} output array * * @example @@ -41,9 +41,8 @@ var addon = require( './../src/addon.node' ); * * var x = new Float32Array( [ 1.0, -2.0, 2.0 ] ); * var y = new Float32Array( x.length ); -* var N = x.length; * -* var v = scusumpw( N, 0.0, x, 1, y, 1 ); +* var v = scusumpw( x.length, 0.0, x, 1, y, 1 ); * // returns [ 1.0, -1.0, 1.0 ] */ function scusumpw( N, sum, x, strideX, y, strideY ) { diff --git a/lib/node_modules/@stdlib/blas/ext/base/scusumpw/manifest.json b/lib/node_modules/@stdlib/blas/ext/base/scusumpw/manifest.json index 5b3db9e7966a..4188b4abb85e 100644 --- a/lib/node_modules/@stdlib/blas/ext/base/scusumpw/manifest.json +++ b/lib/node_modules/@stdlib/blas/ext/base/scusumpw/manifest.json @@ -28,50 +28,52 @@ { "task": "build", "src": [ - "./src/scusumpw.c" + "./src/main.c" ], "include": [ "./include" ], - "libraries": [ - "-lm" - ], + "libraries": [], "libpath": [], "dependencies": [ "@stdlib/napi/export", "@stdlib/napi/argv", "@stdlib/napi/argv-float", "@stdlib/napi/argv-int64", - "@stdlib/napi/argv-strided-float32array" + "@stdlib/napi/argv-strided-float32array", + "@stdlib/blas/base/shared", + "@stdlib/strided/base/stride2offset" ] }, { "task": "benchmark", "src": [ - "./src/scusumpw.c" + "./src/main.c" ], "include": [ "./include" ], - "libraries": [ - "-lm" - ], + "libraries": [], "libpath": [], - "dependencies": [] + "dependencies": [ + "@stdlib/blas/base/shared", + "@stdlib/strided/base/stride2offset" + ] }, { "task": "examples", "src": [ - "./src/scusumpw.c" + "./src/main.c" ], "include": [ "./include" ], - "libraries": [ - "-lm" - ], + "libraries": [], "libpath": [], - "dependencies": [] + "dependencies": [ + "@stdlib/blas/base/shared", + "@stdlib/strided/base/stride2offset" + ] } ] } diff --git a/lib/node_modules/@stdlib/blas/ext/base/scusumpw/src/addon.c b/lib/node_modules/@stdlib/blas/ext/base/scusumpw/src/addon.c index d9f6b05df005..7454b4c9b160 100644 --- a/lib/node_modules/@stdlib/blas/ext/base/scusumpw/src/addon.c +++ b/lib/node_modules/@stdlib/blas/ext/base/scusumpw/src/addon.c @@ -17,6 +17,7 @@ */ #include "stdlib/blas/ext/base/scusumpw.h" +#include "stdlib/blas/base/shared.h" #include "stdlib/napi/export.h" #include "stdlib/napi/argv.h" #include "stdlib/napi/argv_float.h" @@ -40,10 +41,29 @@ static napi_value addon( napi_env env, napi_callback_info info ) { STDLIB_NAPI_ARGV_INT64( env, strideY, argv, 5 ); STDLIB_NAPI_ARGV_STRIDED_FLOAT32ARRAY( env, X, N, strideX, argv, 2 ); STDLIB_NAPI_ARGV_STRIDED_FLOAT32ARRAY( env, Y, N, strideY, argv, 4 ); + API_SUFFIX(stdlib_strided_scusumpw)( N, sum, X, strideX, Y, strideY ); + return NULL; +} - stdlib_strided_scusumpw( N, sum, X, strideX, Y, strideY ); - +/** +* Receives JavaScript callback invocation data. +* +* @param env environment under which the function is invoked +* @param info callback data +* @return Node-API value +*/ +static napi_value addon_method( napi_env env, napi_callback_info info ) { + STDLIB_NAPI_ARGV( env, info, argv, argc, 8 ); + STDLIB_NAPI_ARGV_INT64( env, N, argv, 0 ); + STDLIB_NAPI_ARGV_FLOAT( env, sum, argv, 1 ); + STDLIB_NAPI_ARGV_INT64( env, strideX, argv, 3 ); + STDLIB_NAPI_ARGV_INT64( env, offsetX, argv, 4 ); + STDLIB_NAPI_ARGV_INT64( env, strideY, argv, 6 ); + STDLIB_NAPI_ARGV_INT64( env, offsetY, argv, 7 ); + STDLIB_NAPI_ARGV_STRIDED_FLOAT32ARRAY( env, X, N, strideX, argv, 2 ); + STDLIB_NAPI_ARGV_STRIDED_FLOAT32ARRAY( env, Y, N, strideY, argv, 5 ); + API_SUFFIX(stdlib_strided_scusumpw_ndarray)( N, sum, X, strideX, offsetX, Y, strideY, offsetY ); return NULL; } -STDLIB_NAPI_MODULE_EXPORT_FCN( addon ) +STDLIB_NAPI_MODULE_EXPORT_FCN_WITH_METHOD( addon, "ndarray", addon_method ); diff --git a/lib/node_modules/@stdlib/blas/ext/base/scusumpw/src/main.c b/lib/node_modules/@stdlib/blas/ext/base/scusumpw/src/main.c new file mode 100644 index 000000000000..5c1024dded94 --- /dev/null +++ b/lib/node_modules/@stdlib/blas/ext/base/scusumpw/src/main.c @@ -0,0 +1,94 @@ +/** +* @license Apache-2.0 +* +* Copyright (c) 2025 The Stdlib Authors. +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*/ + +#include "stdlib/blas/ext/base/scusumpw.h" +#include "stdlib/strided/base/stride2offset.h" +#include "stdlib/blas/base/shared.h" + +/** +* Computes the cumulative sum of single-precision floating-point strided array elements using pairwise summation. +* +* ## Method +* +* - This implementation uses pairwise summation, which accrues rounding error `O(log2 N)` instead of `O(N)`. The recursion depth is also `O(log2 N)`. +* +* ## References +* +* - Higham, Nicholas J. 1993. "The Accuracy of Floating Point Summation." _SIAM Journal on Scientific Computing_ 14 (4): 783–99. doi:[10.1137/0914050](https://doi.org/10.1137/0914050). +* +* @param N number of indexed elements +* @param sum initial sum +* @param X input array +* @param strideX stride length for X +* @param Y output array +* @param strideY stride length for Y +*/ +void API_SUFFIX(stdlib_strided_scusumpw)( const CBLAS_INT N, const float sum, const float *X, const CBLAS_INT strideX, float *Y, const CBLAS_INT strideY ) { + const CBLAS_INT ox = stdlib_strided_stride2offset( N, strideX ); + const CBLAS_INT oy = stdlib_strided_stride2offset( N, strideY ); + API_SUFFIX(stdlib_strided_scusumpw_ndarray)( N, sum, X, strideX, ox, Y, strideY, oy ); +} + +/** +* Computes the cumulative sum of single-precision floating-point strided array elements using pairwise summation. +* +* ## Method +* +* - This implementation uses pairwise summation, which accrues rounding error `O(log2 N)` instead of `O(N)`. The recursion depth is also `O(log2 N)`. +* +* ## References +* +* - Higham, Nicholas J. 1993. "The Accuracy of Floating Point Summation." _SIAM Journal on Scientific Computing_ 14 (4): 783–99. doi:[10.1137/0914050](https://doi.org/10.1137/0914050). +* +* @param N number of indexed elements +* @param sum initial sum +* @param X input array +* @param strideX stride length for X +* @param Y output array +* @param strideY stride length for Y +*/ +void API_SUFFIX(stdlib_strided_scusumpw_ndarray)( const CBLAS_INT N, const float sum, const float *X, const CBLAS_INT strideX, const CBLAS_INT offsetX, float *Y, const CBLAS_INT strideY, const CBLAS_INT offsetY ) { + CBLAS_INT ix; + CBLAS_INT iy; + CBLAS_INT i; + CBLAS_INT n; + float s; + + if ( N <= 0 ) { + return; + } + ix = offsetX; + iy = offsetY; + + // Blocksize for pairwise summation... + if ( N <= 128 ) { + s = 0.0f; + for ( i = 0; i < N; i++ ) { + s += X[ ix ]; + Y[ iy ] = sum + s; + ix += strideX; + iy += strideY; + } + return; + } + n = N / 2; + API_SUFFIX(stdlib_strided_scusumpw_ndarray)( n, sum, X, strideX, ix, Y, strideY, iy ); + iy += (n-1) * strideY; + API_SUFFIX(stdlib_strided_scusumpw_ndarray)( N-n, Y[ iy ], X, strideX, ix+(n*strideX), Y, strideY, iy+strideY ); + return; +} diff --git a/lib/node_modules/@stdlib/blas/ext/base/scusumpw/src/scusumpw.c b/lib/node_modules/@stdlib/blas/ext/base/scusumpw/src/scusumpw.c deleted file mode 100644 index 2ccb45d1a101..000000000000 --- a/lib/node_modules/@stdlib/blas/ext/base/scusumpw/src/scusumpw.c +++ /dev/null @@ -1,94 +0,0 @@ -/** -* @license Apache-2.0 -* -* Copyright (c) 2020 The Stdlib Authors. -* -* Licensed under the Apache License, Version 2.0 (the "License"); -* you may not use this file except in compliance with the License. -* You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, software -* distributed under the License is distributed on an "AS IS" BASIS, -* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -* See the License for the specific language governing permissions and -* limitations under the License. -*/ - -#include "stdlib/blas/ext/base/scusumpw.h" -#include - -/** -* Computes the cumulative sum of single-precision floating-point strided array elements using pairwise summation. -* -* ## Method -* -* - This implementation uses pairwise summation, which accrues rounding error `O(log2 N)` instead of `O(N)`. The recursion depth is also `O(log2 N)`. -* -* ## References -* -* - Higham, Nicholas J. 1993. "The Accuracy of Floating Point Summation." _SIAM Journal on Scientific Computing_ 14 (4): 783–99. doi:[10.1137/0914050](https://doi.org/10.1137/0914050). -* -* @param N number of indexed elements -* @param sum initial sum -* @param X input array -* @param strideX X stride length -* @param Y output array -* @param strideY Y stride length -*/ -void stdlib_strided_scusumpw( const int64_t N, const float sum, const float *X, const int64_t strideX, float *Y, const int64_t strideY ) { - float *xp1; - float *xp2; - float *yp1; - float *yp2; - int64_t ix; - int64_t iy; - int64_t i; - int64_t n; - float s; - - if ( N <= 0 ) { - return; - } - if ( strideX < 0 ) { - ix = (1-N) * strideX; - } else { - ix = 0; - } - if ( strideY < 0 ) { - iy = (1-N) * strideY; - } else { - iy = 0; - } - // Blocksize for pairwise summation... - if ( N <= 128 ) { - s = 0.0f; - for ( i = 0; i < N; i++ ) { - s += X[ ix ]; - Y[ iy ] = sum + s; - ix += strideX; - iy += strideY; - } - return; - } - n = N / 2; - if ( strideX < 0 ) { - xp1 = (float *)X + ( (n-N)*strideX ); - xp2 = (float *)X; - } else { - xp1 = (float *)X; - xp2 = (float *)X + ( n*strideX ); - } - if ( strideY < 0 ) { - yp1 = Y + ( (n-N)*strideY ); - yp2 = Y; - } else { - yp1 = Y; - yp2 = Y + ( n*strideY ); - } - stdlib_strided_scusumpw( n, sum, xp1, strideX, yp1, strideY ); - iy += (n-1) * strideY; - stdlib_strided_scusumpw( N-n, Y[ iy ], xp2, strideX, yp2, strideY ); - return; -}