-
-
Notifications
You must be signed in to change notification settings - Fork 5.6k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
a10b5bd
commit 8f57cf3
Showing
2 changed files
with
158 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,99 @@ | ||
/** | ||
* Polynomial Hash also known as Polynomial rolling hash function. | ||
* | ||
* Polynomial rolling hash function is a hash function that uses only multiplications and additions. | ||
* | ||
* | ||
* NOTE: If two strings are equal, their hash values should also be equal. But the inverse need not be true. | ||
* | ||
* Wikipedia: https://en.wikipedia.org/wiki/Rolling_hash | ||
*/ | ||
const DEFAULT_BASE = 37; | ||
const DEFAULT_MODULUS = 101; | ||
|
||
export default class PolynomialHash { | ||
/** | ||
* @param {number} [base] - Base number that is used to create the polynomial. | ||
* @param {number} [modulus] - Modulus number that keeps the hash from overflowing. | ||
*/ | ||
constructor({ base = DEFAULT_BASE, modulus = DEFAULT_MODULUS } = {}) { | ||
this.base = base; | ||
this.modulus = modulus; | ||
} | ||
|
||
/** | ||
* Function that creates hash representation of the word. | ||
* | ||
* Time complexity: O(word.length). | ||
* | ||
* @param {string} word - String that needs to be hashed. | ||
* @return {number} | ||
*/ | ||
hash(word) { | ||
const charCodes = Array.from(word).map((char) => this.charToNumber(char)); | ||
|
||
let hash = 0; | ||
for (let charIndex = 0; charIndex < charCodes.length; charIndex += 1) { | ||
hash *= this.base; | ||
hash += charCodes[charIndex]; | ||
hash %= this.modulus; | ||
} | ||
|
||
return hash; | ||
} | ||
|
||
/** | ||
* Function that creates hash representation of the word | ||
* based on previous word (shifted by one character left) hash value. | ||
* | ||
* Recalculates the hash representation of a word so that it isn't | ||
* necessary to traverse the whole word again. | ||
* | ||
* Time complexity: O(1). | ||
* | ||
* @param {number} prevHash | ||
* @param {string} prevWord | ||
* @param {string} newWord | ||
* @return {number} | ||
*/ | ||
roll(prevHash, prevWord, newWord) { | ||
let hash = prevHash; | ||
|
||
const prevValue = this.charToNumber(prevWord[0]); | ||
const newValue = this.charToNumber(newWord[newWord.length - 1]); | ||
|
||
let prevValueMultiplier = 1; | ||
for (let i = 1; i < prevWord.length; i += 1) { | ||
prevValueMultiplier *= this.base; | ||
prevValueMultiplier %= this.modulus; | ||
} | ||
|
||
hash += this.modulus; | ||
hash -= (prevValue * prevValueMultiplier) % this.modulus; | ||
|
||
hash *= this.base; | ||
hash += newValue; | ||
hash %= this.modulus; | ||
|
||
return hash; | ||
} | ||
|
||
/** | ||
* Converts char to number. | ||
* | ||
* @param {string} char | ||
* @return {number} | ||
*/ | ||
charToNumber(char) { | ||
let charCode = char.codePointAt(0); | ||
|
||
// Check if character has surrogate pair. | ||
const surrogate = char.codePointAt(1); | ||
if (surrogate !== undefined) { | ||
const surrogateShift = 2 ** 16; | ||
charCode += surrogate * surrogateShift; | ||
} | ||
|
||
return charCode; | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,59 @@ | ||
import PolynomialHash from '../PolynomialHash'; | ||
|
||
describe('PolynomialHash', () => { | ||
it('should calculate new hash based on previous one', () => { | ||
const bases = [3, 79, 101, 3251, 13229, 122743, 3583213]; | ||
const mods = [79, 101]; | ||
const frameSizes = [5, 20]; | ||
|
||
// @TODO: Provide Unicode support. | ||
const text = 'Lorem Ipsum is simply dummy text of the printing and ' | ||
+ 'typesetting industry. Lorem Ipsum has been the industry\'s standard ' | ||
+ 'galley of type and \u{ffff} scrambled it to make a type specimen book. It ' | ||
+ 'electronic 耀 typesetting, remaining essentially unchanged. It was ' | ||
// + 'popularised in the \u{20005} \u{20000}1960s with the release of Letraset sheets ' | ||
+ 'publishing software like Aldus PageMaker 耀 including versions of Lorem.'; | ||
|
||
// Check hashing for different prime base. | ||
bases.forEach((base) => { | ||
mods.forEach((modulus) => { | ||
const polynomialHash = new PolynomialHash({ base, modulus }); | ||
|
||
// Check hashing for different word lengths. | ||
frameSizes.forEach((frameSize) => { | ||
let previousWord = text.substr(0, frameSize); | ||
let previousHash = polynomialHash.hash(previousWord); | ||
|
||
// Shift frame through the whole text. | ||
for (let frameShift = 1; frameShift < (text.length - frameSize); frameShift += 1) { | ||
const currentWord = text.substr(frameShift, frameSize); | ||
const currentHash = polynomialHash.hash(currentWord); | ||
const currentRollingHash = polynomialHash.roll(previousHash, previousWord, currentWord); | ||
|
||
// Check that rolling hash is the same as directly calculated hash. | ||
expect(currentRollingHash).toBe(currentHash); | ||
|
||
previousWord = currentWord; | ||
previousHash = currentHash; | ||
} | ||
}); | ||
}); | ||
}); | ||
}); | ||
|
||
it('should generate numeric hashed less than 100', () => { | ||
const polynomialHash = new PolynomialHash({ modulus: 100 }); | ||
|
||
expect(polynomialHash.hash('Some long text that is used as a key')).toBe(41); | ||
expect(polynomialHash.hash('Test')).toBe(92); | ||
expect(polynomialHash.hash('a')).toBe(97); | ||
expect(polynomialHash.hash('b')).toBe(98); | ||
expect(polynomialHash.hash('c')).toBe(99); | ||
expect(polynomialHash.hash('d')).toBe(0); | ||
expect(polynomialHash.hash('e')).toBe(1); | ||
expect(polynomialHash.hash('ab')).toBe(87); | ||
|
||
// @TODO: Provide Unicode support. | ||
expect(polynomialHash.hash('\u{20000}')).toBe(92); | ||
}); | ||
}); |