diff --git a/RadixSort.js b/RadixSort.js index 6447fdf..8e42d7a 100644 --- a/RadixSort.js +++ b/RadixSort.js @@ -1,5 +1,4 @@ -// TODO: It may be faster to do shift-mask instead of the current mask-shift, since the mask would then be the same for all bytes (0xff). -// TODO: Remove extract digit function and just inline the core code. I've seem this improve performance in C#. Not sure how agressively JavaScript inlines small functions. +// TODO: Remove extract digit function and just inline the core code. I've seen this improve performance in C#. Not sure how agressively JavaScript inlines small functions. // TODO: Compare performance versus TimSort for random, pre-sorted and constant, since TimSort is available in JavaScript thru npm // (https://stackoverflow.com/questions/40721767/what-is-the-fastest-way-to-sort-a-largeish-array-of-numbers-in-javascript) @@ -23,52 +22,6 @@ HpcAlgorithms.Sorting = (function() return digit; } - /** - * Radix Sort (Least Significant Digit - LSD) of unsigned integer array with values up to 32-bits (e.g. 0, 1, 2, ... 2_000_000_000, ...) - * This algorithm is not in-place - i.e. returns a sorted array - * @param {Array of numbers} inputArray Array of numbers, which must be unsigned integers of values within 32-bits - * @return {Array of numbers} Sorted array of numbers - */ - var RadixSortLsdUInt32_old = function(inputArray) - { - if (typeof inputArray.constructor === Array && typeof inputArray[0] === "number") throw new TypeError("Input argument must be an array of unsigned integers"); - var numberOfBins = 256; - var Log2ofPowerOfTwoRadix = 8; - var outputArray = new Array(inputArray.length); - var count = new Array(numberOfBins); - var outputArrayHasResult = false; - - var bitMask = 255; - var shiftRightAmount = 0; - - var startOfBin = new Array( numberOfBins ); - - while( bitMask != 0 ) // end processing digits when all the mask bits have been processed and shifted out, leaving no bits set in the bitMask - { - for (var i = 0; i < numberOfBins; i++ ) - count[ i ] = 0; - for (var current = 0; current < inputArray.length; current++ ) // Scan the array and count the number of times each digit value appears - i.e. size of each bin - count[ extractDigit( inputArray[ current ], bitMask, shiftRightAmount ) ]++; - - startOfBin[ 0 ] = 0; - for( var i = 1; i < numberOfBins; i++ ) - startOfBin[ i ] = startOfBin[ i - 1 ] + count[ i - 1 ]; - for ( var current = 0; current < inputArray.length; current++ ) - outputArray[ startOfBin[ extractDigit( inputArray[ current ], bitMask, shiftRightAmount ) ]++ ] = inputArray[ current ]; - - bitMask <<= Log2ofPowerOfTwoRadix; - shiftRightAmount += Log2ofPowerOfTwoRadix; - outputArrayHasResult = !outputArrayHasResult; - - var tmp = inputArray, inputArray = outputArray, outputArray = tmp; // swap input and output arrays - } - if ( outputArrayHasResult ) - for ( var current = 0; current < inputArray.length; current++ ) // copy from output array into the input array - inputArray[ current ] = outputArray[ current ]; - - return inputArray; - } - var HistogramByteComponents = function(inArray, l, r) { var numberOfDigits = 4; @@ -85,14 +38,43 @@ HpcAlgorithms.Sorting = (function() for (var current = l; current <= r; current++) // Scan the array and count the number of times each digit value appears - i.e. size of each bin { var value = inArray[current]; - count[0][ value & 0xff ]++; - count[1][(value & 0xff00) >> 8]++; - count[2][(value & 0xff0000) >> 16]++; - count[3][(value & 0xff000000) >> 24]++; + count[0][ value & 0xff]++; + count[1][(value >> 8) & 0xff]++; + count[2][(value >> 16) & 0xff]++; + count[3][(value >> 24) & 0xff]++; } return count; } + var HistogramByteComponentsAndKeyArray = function(inArray, l, r, getKey) + { + var numberOfDigits = 4; + var numberOfBins = 256; + var inKeys = new Array(inArray.length); + + var count = new Array(numberOfDigits); + for (var d = 0; d < numberOfDigits; d++) + { + count[d] = new Array(numberOfBins); + for (var b = 0; b < numberOfBins; b++) + count[d][b] = 0; + } + + for (var current = l; current <= r; current++) // Scan the array and count the number of times each digit value appears - i.e. size of each bin + { + var value = getKey(inArray[current]); + inKeys[current] = value; + count[0][ value & 0xff]++; + count[1][(value >> 8) & 0xff]++; + count[2][(value >> 16) & 0xff]++; + count[3][(value >> 24) & 0xff]++; + } + return { + count: count, + inKeys: inKeys + }; + } + /** * Radix Sort (Least Significant Digit - LSD) of unsigned integer array with values up to 32-bits (e.g. 0, 1, 2, ... 2_000_000_000, ...) * This algorithm is not in-place - i.e. returns a sorted array @@ -147,39 +129,56 @@ HpcAlgorithms.Sorting = (function() * @param {function} getKey function to extract and return a numeric key from the user defined type/class to sort on * @return {Array of numbers} Sorted array of a user defined type */ - var RadixSortLsdUdtUInt32 = function(inputArray, getKey) - { - var numberOfBins = 256; - var Log2ofPowerOfTwoRadix = 8; - var OutputArray = new Array(inputArray.length); - var count = new Array(numberOfBins); - var OutputArrayHasResult = false; - var bitMask = 255; + function RadixSortLsdUdtUInt32(inputArray, getKey) { + var numberOfBitsPerDigit = 8; + var numberOfBins = 1 << numberOfBitsPerDigit; + var numberOfDigits = 4; + var outputArray = new Array(inputArray.length); + var outSortedKeys = new Array(inputArray.length); + var outputArrayHasResult = false; + var bitMask = numberOfBins - 1; var shiftRightAmount = 0; - var startOfBin = new Array( numberOfBins ); - + var d = 0; + + var retValue = HistogramByteComponentsAndKeyArray(inputArray, 0, inputArray.length - 1, getKey); + var count = retValue.count; + var inKeys = retValue.inKeys; + + var startOfBin = new Array(numberOfDigits); + for (d = 0; d < numberOfDigits; d++) + { + startOfBin[d] = new Array(numberOfBins); + startOfBin[d][0] = 0; + for (var b = 1; b < numberOfBins; b++ ) + startOfBin[d][b] = startOfBin[d][b - 1] + count[d][b - 1]; + } + + d = 0; while( bitMask != 0 ) // end processing digits when all the mask bits have been processed and shifted out, leaving no bits set in the bitMask { - for (var i = 0; i < numberOfBins; i++ ) - count[ i ] = 0; - for (var current = 0; current < inputArray.length; current++ ) // Scan the array and count the number of times each digit value appears - i.e. size of each bin - count[ extractDigit( getKey(inputArray[ current ]), bitMask, shiftRightAmount ) ]++; - startOfBin[ 0 ] = 0; - for( var i = 1; i < numberOfBins; i++ ) - startOfBin[ i ] = startOfBin[ i - 1 ] + count[ i - 1 ]; - for ( var current = 0; current < inputArray.length; current++ ) - OutputArray[ startOfBin[ extractDigit( getKey(inputArray[ current ]), bitMask, shiftRightAmount ) ]++ ] = inputArray[ current ]; - bitMask <<= Log2ofPowerOfTwoRadix; - shiftRightAmount += Log2ofPowerOfTwoRadix; - OutputArrayHasResult = !OutputArrayHasResult; - var tmp = inputArray, inputArray = OutputArray, OutputArray = tmp; // swap input and output arrays + var startOfBinLoc = startOfBin[d]; + + for (var current = 0; current < inputArray.length; current++) + { + var endOfBinIndex = (inKeys[current] & bitMask) >> shiftRightAmount; + var index = startOfBinLoc[endOfBinIndex]; + outputArray[ index] = inputArray[current]; + outSortedKeys[index] = inKeys[ current]; + startOfBinLoc[endOfBinIndex]++; + } + + bitMask <<= numberOfBitsPerDigit; + shiftRightAmount += numberOfBitsPerDigit; + outputArrayHasResult = !outputArrayHasResult; + d++; + + var tmp = inputArray, inputArray = outputArray, outputArray = tmp; // swap input and output arrays + var tmpKeys = inKeys; inKeys = outSortedKeys; outSortedKeys = tmpKeys; // swap input and output key arrays } - if ( OutputArrayHasResult ) - for ( var current = 0; current < inputArray.length; current++ ) // copy from output array into the input array - inputArray[ current ] = OutputArray[ current ]; - return inputArray; - } + return outputArrayHasResult ? outputArray : inputArray; + } + return { //someProperty: 'prop value', RadixSortLsdUInt32: RadixSortLsdUInt32, diff --git a/package.json b/package.json index 4cc524d..ff24e5a 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "hpc-algorithms", - "version": "1.0.4", + "version": "1.0.5", "description": "High Performance Computing Algorithms", "main": "RadixSort.js", "scripts": { diff --git a/readme.md b/readme.md index 272499a..0e6df5b 100644 --- a/readme.md +++ b/readme.md @@ -8,11 +8,9 @@ Let us know what other algorithms could use acceleration or improvement. Faster and Better Algorithms, starting with high performance sorting: - LSD Radix Sort for unsigned integer arrays. 20-30X faster than JavaScript's built-in array sort for arrays less than 35 Million. 5-10X faster for arrays greater than 35 Million. This sort algorithm is not in-place, returning a new sorted array. -Discussion, benchmarks and example in https://duvanenko.tech.blog/2017/06/15/faster-sorting-in-javascript/ -- LSD Radix Sort for arrays of user defined classes by an unsigned integer key. This is a stable sort, -while JavaScript built-in is not stable. -Discussion, benchmarks and example in https://duvanenko.tech.blog/2017/07/10/sorting-arrays-of-objects-in-javascript-with-radix-sort/ - -Even higher performance coming soon for both of these algorithms, with support for more data types. +Discussion, benchmarks and usage in https://duvanenko.tech.blog/2017/06/15/faster-sorting-in-javascript/ +- LSD Radix Sort for arrays of objects by an unsigned integer key. 15X faster than JavaScript's built-in .sort(). +This is a stable sort, while JavaScript built-in is not stable. This sort algorithm is not in-place, returning a new sorted array. +Discussion, benchmarks and usage in https://duvanenko.tech.blog/2017/07/10/sorting-arrays-of-objects-in-javascript-with-radix-sort/ If you have a specific needs for higher performance algorithms, let us know.