Skip to content

Commit

Permalink
Polynomial Hash
Browse files Browse the repository at this point in the history
  • Loading branch information
prasad-chavan1 committed Sep 25, 2023
1 parent a10b5bd commit 8f57cf3
Show file tree
Hide file tree
Showing 2 changed files with 158 additions and 0 deletions.
99 changes: 99 additions & 0 deletions Hashes/PolynomialHash.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
/**
* Polynomial Hash also known as Polynomial rolling hash function.
*
* Polynomial rolling hash function is a hash function that uses only multiplications and additions.
*
*
* NOTE: If two strings are equal, their hash values should also be equal. But the inverse need not be true.
*
* Wikipedia: https://en.wikipedia.org/wiki/Rolling_hash
*/
const DEFAULT_BASE = 37;
const DEFAULT_MODULUS = 101;

export default class PolynomialHash {
/**
* @param {number} [base] - Base number that is used to create the polynomial.
* @param {number} [modulus] - Modulus number that keeps the hash from overflowing.
*/
constructor({ base = DEFAULT_BASE, modulus = DEFAULT_MODULUS } = {}) {
this.base = base;
this.modulus = modulus;
}

/**
* Function that creates hash representation of the word.
*
* Time complexity: O(word.length).
*
* @param {string} word - String that needs to be hashed.
* @return {number}
*/
hash(word) {
const charCodes = Array.from(word).map((char) => this.charToNumber(char));

let hash = 0;
for (let charIndex = 0; charIndex < charCodes.length; charIndex += 1) {
hash *= this.base;
hash += charCodes[charIndex];
hash %= this.modulus;
}

return hash;
}

/**
* Function that creates hash representation of the word
* based on previous word (shifted by one character left) hash value.
*
* Recalculates the hash representation of a word so that it isn't
* necessary to traverse the whole word again.
*
* Time complexity: O(1).
*
* @param {number} prevHash
* @param {string} prevWord
* @param {string} newWord
* @return {number}
*/
roll(prevHash, prevWord, newWord) {
let hash = prevHash;

const prevValue = this.charToNumber(prevWord[0]);
const newValue = this.charToNumber(newWord[newWord.length - 1]);

let prevValueMultiplier = 1;
for (let i = 1; i < prevWord.length; i += 1) {
prevValueMultiplier *= this.base;
prevValueMultiplier %= this.modulus;
}

hash += this.modulus;
hash -= (prevValue * prevValueMultiplier) % this.modulus;

hash *= this.base;
hash += newValue;
hash %= this.modulus;

return hash;
}

/**
* Converts char to number.
*
* @param {string} char
* @return {number}
*/
charToNumber(char) {
let charCode = char.codePointAt(0);

// Check if character has surrogate pair.
const surrogate = char.codePointAt(1);
if (surrogate !== undefined) {
const surrogateShift = 2 ** 16;
charCode += surrogate * surrogateShift;
}

return charCode;
}
}
59 changes: 59 additions & 0 deletions Hashes/test/PolynomialHash.test.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
import PolynomialHash from '../PolynomialHash';

describe('PolynomialHash', () => {
it('should calculate new hash based on previous one', () => {
const bases = [3, 79, 101, 3251, 13229, 122743, 3583213];
const mods = [79, 101];
const frameSizes = [5, 20];

// @TODO: Provide Unicode support.
const text = 'Lorem Ipsum is simply dummy text of the printing and '
+ 'typesetting industry. Lorem Ipsum has been the industry\'s standard '
+ 'galley of type and \u{ffff} scrambled it to make a type specimen book. It '
+ 'electronic 耀 typesetting, remaining essentially unchanged. It was '
// + 'popularised in the \u{20005} \u{20000}1960s with the release of Letraset sheets '
+ 'publishing software like Aldus PageMaker 耀 including versions of Lorem.';

// Check hashing for different prime base.
bases.forEach((base) => {
mods.forEach((modulus) => {
const polynomialHash = new PolynomialHash({ base, modulus });

// Check hashing for different word lengths.
frameSizes.forEach((frameSize) => {
let previousWord = text.substr(0, frameSize);
let previousHash = polynomialHash.hash(previousWord);

// Shift frame through the whole text.
for (let frameShift = 1; frameShift < (text.length - frameSize); frameShift += 1) {
const currentWord = text.substr(frameShift, frameSize);
const currentHash = polynomialHash.hash(currentWord);
const currentRollingHash = polynomialHash.roll(previousHash, previousWord, currentWord);

// Check that rolling hash is the same as directly calculated hash.
expect(currentRollingHash).toBe(currentHash);

previousWord = currentWord;
previousHash = currentHash;
}
});
});
});
});

it('should generate numeric hashed less than 100', () => {
const polynomialHash = new PolynomialHash({ modulus: 100 });

expect(polynomialHash.hash('Some long text that is used as a key')).toBe(41);
expect(polynomialHash.hash('Test')).toBe(92);
expect(polynomialHash.hash('a')).toBe(97);
expect(polynomialHash.hash('b')).toBe(98);
expect(polynomialHash.hash('c')).toBe(99);
expect(polynomialHash.hash('d')).toBe(0);
expect(polynomialHash.hash('e')).toBe(1);
expect(polynomialHash.hash('ab')).toBe(87);

// @TODO: Provide Unicode support.
expect(polynomialHash.hash('\u{20000}')).toBe(92);
});
});

0 comments on commit 8f57cf3

Please sign in to comment.