class MyHashSet() {
private val table = IntArray(1000000 + 1) { -1 }
fun add(key: Int) {
table[key] = key
}
fun remove(key: Int) {
table[key] = -1
}
fun contains(key: Int): Boolean {
return table[key] != -1
}
}
We can use dynamic array or linked list to handle collision.
The first approach is to use dynamic array to store the keys:
- We use a fixed-size array (
buckets
) to store the keys. - Each bucket will store a list of keys to handle collision.
- We use a simple hash function to find the bucket.
class MyHashSet {
private val size = 769 // A prime number reduces collisions
private val buckets = Array<MutableList<Int>>(size) { mutableListOf() }
fun add(key: Int) {
val index = hash(key)
if (key !in buckets[index]) {
buckets[index].add(key)
}
}
fun remove(key: Int) {
val index = hash(key)
buckets[index].remove(key)
}
fun contains(key: Int): Boolean {
val index = hash(key)
return key in buckets[index]
}
private fun hash(key: Int): Int = key % size
}
Or we can use linked list to handle collision:
add(key)
: We hash the key and append it to the end of linked list. Make sure to check if the key already exists, if yes, just skip itremove(key)
: We hash the key and remove it from the linked list.contains(key)
: We hash the key and search it from the linked list.
// Use built-in linked list
class MyHashSet {
private val capacity = 769 // A prime number reduces collisions
private val buckets = Array<LinkedList<Int>>(capacity) { LinkedList() }
private fun hash(key: Int): Int = key % capacity
fun add(key: Int) {
val hashIndex = hash(key)
val bucket = buckets[hashIndex]
if (key !in bucket) {
bucket.add(key)
}
}
fun remove(key: Int) {
val hashIndex = hash(key)
buckets[hashIndex].remove(key)
}
fun contains(key: Int): Boolean {
val hashIndex = hash(key)
return key in buckets[hashIndex]
}
}
// Or equivalent with linked list implmented by ourselves
data class SetNode(
val key: Int,
var next: SetNode? = null
)
class MyHashSet() {
private val size = 769
private val multipler = 12582917L
private val array = Array<SetNode>(size) { SetNode(-1) }
fun add(key: Int) {
val hash = hash(key)
var previous: SetNode? = array[hash]
var current: SetNode? = array[hash]?.next
// Pay attention: we don't use general way to find the last node.
// because we have to check if the key already exists, so we iterate
// every node to check if the key already exists.
//
// The following code doesn't work because we don't check if key already exists.
// var current: SetNode? = head
// while (current?.next != null) {
// current = current.next
// }
// current?.next = SetNode(key)
// We iterate every node to check if the key already exists.
while (current != null) {
if (current.key == key) return
previous = current
current = current.next
}
previous?.next = SetNode(key)
}
fun remove(key: Int) {
val hash = hash(key)
var previous: SetNode? = array[hash]
var current: SetNode? = array[hash]?.next
while (current != null) {
if (current.key == key) {
previous?.next = current.next
return
}
previous = current
current = current.next
}
}
fun contains(key: Int): Boolean {
val hash = hash(key)
var current: SetNode? = array[hash]?.next
while (current != null) {
if (current.key == key) return true
current = current.next
}
return false
}
private fun hash(key: Int): Int {
return (key.toLong() * multipler % size).toInt()
}
}
- Time Complexity:
O(n / k)
wheren
is the number of all possible values andk
is the number of buckets. - Space Complexity:
O(n + k)
wheren
is the number of all possible values andk
is the number of buckets.
private const val EMPTY = -1
private const val REMOVED = -2
// From ChatGPT, it's correct but I don't fully understand, and we have to use -2 to mark as deleted.
class MyHashSet {
private val size = 10007 // A prime number for better distribution
private val buckets = IntArray(size) { EMPTY }
private fun hash(key: Int): Int = key % size
fun add(key: Int) {
var index = hash(key)
while (buckets[index] != EMPTY && buckets[index] != key) {
index = (index + 1) % size
}
buckets[index] = key
}
fun remove(key: Int) {
var index = hash(key)
while (buckets[index] != EMPTY) {
if (buckets[index] == key) {
buckets[index] = REMOVED
return
}
index = (index + 1) % size
}
}
fun contains(key: Int): Boolean {
var index = hash(key)
while (buckets[index] != EMPTY) {
if (buckets[index] == key) return true
index = (index + 1) % size
}
return false
}
}
class MyHashSet() {
private val size = 10007
private val multipler = 12582917L
// We initialize the table with -1, which means empty.
private val table = IntArray(size) { EMPTY }
fun add(key: Int) {
var probe = 0
var hash = hash(key, probe)
while (table[hash] != EMPTY) {
if (table[hash] == key) return
probe++
hash = hash(key, probe)
}
table[hash] = key
}
fun remove(key: Int) {
var probe = 0
var hash = hash(key, probe)
while (table[hash] != EMPTY) {
if (table[hash] == key) {
table[hash] = REMOVED
return
}
probe++
hash = hash(key, probe)
}
}
fun contains(key: Int): Boolean {
var probe = 0
var hash = hash(key, probe)
while (table[hash] != EMPTY) {
if (table[hash] == key) return true
probe++
hash = hash(key, probe)
}
return false
}
// We use linear probing to find the next empty slot.
private fun hash(key: Int, probe: Int): Int {
return (hash(key) + probe) % size
}
private fun hash(key: Int): Int {
return (key.toLong() * multipler % size).toInt()
}
}
- Time Complexity:
O(1)
on average if hash function is good and low load factor,O(n)
wheren
is the number of all possible values for worst case. - Space Complexity:
O(n)
wheren
is the number of all possible values.
When we remove an element, we have to mark it as deleted instead of setting it to EMPTY
. This is because we have to ensure that the probing sequence is not broken. If we set the slot to EMPTY
, the probing sequence will be broken and we can't find the element that is inserted after the deleted element.
For example, we have the following sequence of operations:
size = 10
hash(x) = x % 10
add(5)
add(15)
remove(5)
contains(15)
If we set the slot to EMPTY
when we remove the element, we see
add(5) // index 5 = 5
add(15) // index 5 is occupied -> linear probing -> index 6 = 15
remove(5) // index 5 = EMPTY
contains(15) // index 5 = EMPTY, break the while loop, return false (incorrect)
If we set the slot to REMOVED
when we remove the element, we see
add(5) // index 5 = 5
add(15) // index 5 is occupied -> linear probing -> index 6 = 15
remove(5) // index 5 = REMOVED
contains(15) // index 5 = REMOVED, continue probing -> index 6 = 15, return true (correct)
Probing should continue until we find the key or reach an empty slot. If we use EMPTY
for removed elements, the search will stop too early, failing to find valid keys that were probed forward.
But we have to modify our add()
to reuse the index marked as REMOVED
, when inserting a new key, we should reuse REMOVED
slots if they appear before any EMPTY
slot. And contains()
should skip REMOVED
and continue probling. This ensure that the probing sequence is not broken and we can reuse the slot that is removed before. (Not implemented in the above code)