Skip to content

Commit 8f57cf3

Browse files
Polynomial Hash
1 parent a10b5bd commit 8f57cf3

File tree

2 files changed

+158
-0
lines changed

2 files changed

+158
-0
lines changed

Hashes/PolynomialHash.js

Lines changed: 99 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,99 @@
1+
/**
2+
* Polynomial Hash also known as Polynomial rolling hash function.
3+
*
4+
* Polynomial rolling hash function is a hash function that uses only multiplications and additions.
5+
*
6+
*
7+
* NOTE: If two strings are equal, their hash values should also be equal. But the inverse need not be true.
8+
*
9+
* Wikipedia: https://en.wikipedia.org/wiki/Rolling_hash
10+
*/
11+
const DEFAULT_BASE = 37;
12+
const DEFAULT_MODULUS = 101;
13+
14+
export default class PolynomialHash {
15+
/**
16+
* @param {number} [base] - Base number that is used to create the polynomial.
17+
* @param {number} [modulus] - Modulus number that keeps the hash from overflowing.
18+
*/
19+
constructor({ base = DEFAULT_BASE, modulus = DEFAULT_MODULUS } = {}) {
20+
this.base = base;
21+
this.modulus = modulus;
22+
}
23+
24+
/**
25+
* Function that creates hash representation of the word.
26+
*
27+
* Time complexity: O(word.length).
28+
*
29+
* @param {string} word - String that needs to be hashed.
30+
* @return {number}
31+
*/
32+
hash(word) {
33+
const charCodes = Array.from(word).map((char) => this.charToNumber(char));
34+
35+
let hash = 0;
36+
for (let charIndex = 0; charIndex < charCodes.length; charIndex += 1) {
37+
hash *= this.base;
38+
hash += charCodes[charIndex];
39+
hash %= this.modulus;
40+
}
41+
42+
return hash;
43+
}
44+
45+
/**
46+
* Function that creates hash representation of the word
47+
* based on previous word (shifted by one character left) hash value.
48+
*
49+
* Recalculates the hash representation of a word so that it isn't
50+
* necessary to traverse the whole word again.
51+
*
52+
* Time complexity: O(1).
53+
*
54+
* @param {number} prevHash
55+
* @param {string} prevWord
56+
* @param {string} newWord
57+
* @return {number}
58+
*/
59+
roll(prevHash, prevWord, newWord) {
60+
let hash = prevHash;
61+
62+
const prevValue = this.charToNumber(prevWord[0]);
63+
const newValue = this.charToNumber(newWord[newWord.length - 1]);
64+
65+
let prevValueMultiplier = 1;
66+
for (let i = 1; i < prevWord.length; i += 1) {
67+
prevValueMultiplier *= this.base;
68+
prevValueMultiplier %= this.modulus;
69+
}
70+
71+
hash += this.modulus;
72+
hash -= (prevValue * prevValueMultiplier) % this.modulus;
73+
74+
hash *= this.base;
75+
hash += newValue;
76+
hash %= this.modulus;
77+
78+
return hash;
79+
}
80+
81+
/**
82+
* Converts char to number.
83+
*
84+
* @param {string} char
85+
* @return {number}
86+
*/
87+
charToNumber(char) {
88+
let charCode = char.codePointAt(0);
89+
90+
// Check if character has surrogate pair.
91+
const surrogate = char.codePointAt(1);
92+
if (surrogate !== undefined) {
93+
const surrogateShift = 2 ** 16;
94+
charCode += surrogate * surrogateShift;
95+
}
96+
97+
return charCode;
98+
}
99+
}

Hashes/test/PolynomialHash.test.js

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
import PolynomialHash from '../PolynomialHash';
2+
3+
describe('PolynomialHash', () => {
4+
it('should calculate new hash based on previous one', () => {
5+
const bases = [3, 79, 101, 3251, 13229, 122743, 3583213];
6+
const mods = [79, 101];
7+
const frameSizes = [5, 20];
8+
9+
// @TODO: Provide Unicode support.
10+
const text = 'Lorem Ipsum is simply dummy text of the printing and '
11+
+ 'typesetting industry. Lorem Ipsum has been the industry\'s standard '
12+
+ 'galley of type and \u{ffff} scrambled it to make a type specimen book. It '
13+
+ 'electronic 耀 typesetting, remaining essentially unchanged. It was '
14+
// + 'popularised in the \u{20005} \u{20000}1960s with the release of Letraset sheets '
15+
+ 'publishing software like Aldus PageMaker 耀 including versions of Lorem.';
16+
17+
// Check hashing for different prime base.
18+
bases.forEach((base) => {
19+
mods.forEach((modulus) => {
20+
const polynomialHash = new PolynomialHash({ base, modulus });
21+
22+
// Check hashing for different word lengths.
23+
frameSizes.forEach((frameSize) => {
24+
let previousWord = text.substr(0, frameSize);
25+
let previousHash = polynomialHash.hash(previousWord);
26+
27+
// Shift frame through the whole text.
28+
for (let frameShift = 1; frameShift < (text.length - frameSize); frameShift += 1) {
29+
const currentWord = text.substr(frameShift, frameSize);
30+
const currentHash = polynomialHash.hash(currentWord);
31+
const currentRollingHash = polynomialHash.roll(previousHash, previousWord, currentWord);
32+
33+
// Check that rolling hash is the same as directly calculated hash.
34+
expect(currentRollingHash).toBe(currentHash);
35+
36+
previousWord = currentWord;
37+
previousHash = currentHash;
38+
}
39+
});
40+
});
41+
});
42+
});
43+
44+
it('should generate numeric hashed less than 100', () => {
45+
const polynomialHash = new PolynomialHash({ modulus: 100 });
46+
47+
expect(polynomialHash.hash('Some long text that is used as a key')).toBe(41);
48+
expect(polynomialHash.hash('Test')).toBe(92);
49+
expect(polynomialHash.hash('a')).toBe(97);
50+
expect(polynomialHash.hash('b')).toBe(98);
51+
expect(polynomialHash.hash('c')).toBe(99);
52+
expect(polynomialHash.hash('d')).toBe(0);
53+
expect(polynomialHash.hash('e')).toBe(1);
54+
expect(polynomialHash.hash('ab')).toBe(87);
55+
56+
// @TODO: Provide Unicode support.
57+
expect(polynomialHash.hash('\u{20000}')).toBe(92);
58+
});
59+
});

0 commit comments

Comments
 (0)