Skip to content
This repository was archived by the owner on Feb 18, 2025. It is now read-only.

Commit a66295f

Browse files
committed
stdlib: implement HashSet directly instead of forwarding to HashMap
1 parent 0b6600a commit a66295f

File tree

1 file changed

+184
-15
lines changed

1 file changed

+184
-15
lines changed

dora/stdlib/collections.dora

+184-15
Original file line numberDiff line numberDiff line change
@@ -805,43 +805,212 @@ impl[K: Hash + Identity + Equals, V] HashMapIterator[K, V] {
805805
}
806806

807807
@pub class HashSet[K: Hash + Identity + Equals] {
808-
map: HashMap[K, ()],
808+
insertedAndDeleted: BitSet,
809+
keys: Array[K],
810+
entries: Int64,
811+
cap: Int64,
809812
}
810813

811814
impl[K: Hash + Identity + Equals] HashSet[K] {
812815
@pub @static fun new(keys: K...): HashSet[K] {
813-
let map: HashMap[K, ()] = HashMap[K, ()]::new();
816+
// BitSet.size == capacity * 2
817+
// [bit 0: inserted; bit 1: deleted] * capacity
818+
let set = HashSet[K](
819+
BitSet::new(0),
820+
Array[K]::new(),
821+
0,
822+
0,
823+
);
814824

815825
for key in keys {
816-
map.insert(key, ());
826+
set.insert(key);
817827
}
818828

819-
HashSet[K](map)
829+
set
820830
}
821831

822-
@pub fun insert(key: K): Unit {
823-
self.map.insert(key, ());
832+
@pub fun size(): Int64 = self.entries;
833+
834+
@pub fun isEmpty(): Bool = self.entries == 0;
835+
836+
@pub fun capacity(): Int64 = self.cap;
837+
838+
@pub fun insert(key: K): Option[K] {
839+
self.ensureCapacity(1);
840+
assert(self.entries < self.cap);
841+
842+
var hash = key.hash();
843+
var idx = hash.toInt64() & (self.cap - 1);
844+
var insertIdx = None[Int64];
845+
846+
while true {
847+
if self
848+
... .isLive(idx) {
849+
let currentKey = self.keys.get(idx);
850+
851+
if currentKey.hash() == hash && (currentKey.identicalTo(key) || currentKey.equals(key)) {
852+
let oldKey = self.keys.get(idx);
853+
self.keys.set(idx, key);
854+
return Some[K](oldKey);
855+
}
856+
}
857+
... .isDeleted(idx) {
858+
// There might be live entries after a deleted one.
859+
860+
if insertIdx.isNone() {
861+
insertIdx = Some[Int64](idx);
862+
}
863+
}
864+
else {
865+
let insertIdx = insertIdx.unwrapOr(idx);
866+
867+
self.insertedAndDeleted.insert(2 * insertIdx);
868+
self.insertedAndDeleted.remove(2 * insertIdx + 1);
869+
870+
self.keys.set(insertIdx, key);
871+
872+
self.entries = self.entries + 1;
873+
return None[K];
874+
}
875+
idx = (idx + 1) & (self.cap - 1);
876+
}
877+
878+
unreachable[Option[K]]()
824879
}
825880

826881
@pub fun contains(key: K): Bool {
827-
self.map.contains(key)
882+
assert(self.entries <= self.cap);
883+
884+
if self.entries == 0 {
885+
return false;
886+
}
887+
888+
var hash = key.hash();
889+
var idx = hash.toInt64() & (self.cap - 1);
890+
891+
while true {
892+
if self
893+
... .isLive(idx) {
894+
let currentKey = self.keys.get(idx);
895+
896+
if currentKey.hash() == hash && (currentKey.identicalTo(key) || currentKey.equals(key)) {
897+
return true;
898+
}
899+
idx = (idx + 1) & (self.cap - 1);
900+
}
901+
... .isDeleted(idx) {
902+
// There might be live entries after a deleted one.
903+
idx = (idx + 1) & (self.cap - 1);
904+
}
905+
else {
906+
return false;
907+
}
908+
}
909+
910+
false
828911
}
829912

830-
@pub fun remove(key: K): Bool {
831-
self.map.remove(key).isSome()
913+
@pub fun remove(key: K): Option[K] {
914+
self.shrink();
915+
916+
var hash = key.hash();
917+
var idx = hash.toInt64() & (self.cap - 1);
918+
919+
while true {
920+
if self
921+
... .isLive(idx) {
922+
let currentKey = self.keys.get(idx);
923+
924+
if currentKey.hash() == hash && (currentKey.identicalTo(key) || currentKey.equals(key)) {
925+
self.insertedAndDeleted.insert(2 * idx + 1);
926+
927+
unsafeKillRefs[K](self.keys, idx);
928+
929+
self.entries = self.entries - 1;
930+
return Option[K]::Some(currentKey);
931+
}
932+
idx = (idx + 1) & (self.cap - 1);
933+
}
934+
... .isDeleted(idx) {
935+
// There might be live entries after a deleted one.
936+
idx = (idx + 1) & (self.cap - 1);
937+
}
938+
else {
939+
Option[K]::None;
940+
}
941+
}
942+
943+
Option[K]::None
832944
}
833945

834-
@pub fun size(): Int64 {
835-
self.map.size()
946+
fun ensureCapacity(elementsToAdd: Int64): Unit {
947+
if self.entries + elementsToAdd < self.cap {
948+
if self.entries <= (self.cap - (self.cap / 4)) {
949+
return;
950+
}
951+
}
952+
953+
var newCapacity = 4;
954+
let oldCapacity = self.cap;
955+
956+
if oldCapacity > 0 {
957+
newCapacity = oldCapacity * 2;
958+
}
959+
960+
assert(self.entries + elementsToAdd < newCapacity);
961+
962+
self.rehash(newCapacity);
836963
}
837964

838-
@pub fun isEmpty(): Bool {
839-
self.map.isEmpty()
965+
fun shrink(): Unit {
966+
if self.entries > (self.cap / 4) {
967+
return;
968+
}
969+
970+
let newCapacity = self.cap / 2;
971+
972+
if newCapacity < 4 {
973+
return;
974+
}
975+
976+
assert(self.entries < newCapacity);
977+
978+
self.rehash(newCapacity);
840979
}
841980

842-
@pub fun capacity(): Int64 {
843-
self.map.capacity()
981+
fun rehash(newCapacity: Int64): Unit {
982+
let oldCapacity = self.cap;
983+
984+
let newSet = HashSet[K]::new();
985+
newSet.insertedAndDeleted = BitSet::new(2 * newCapacity);
986+
newSet.keys = Array[K]::unsafeNew(newCapacity);
987+
newSet.entries = 0;
988+
newSet.cap = newCapacity;
989+
990+
var idx = 0;
991+
992+
while idx < oldCapacity {
993+
if self.isLive(idx) {
994+
let key = self.keys.get(idx);
995+
996+
newSet.insert(key);
997+
}
998+
999+
idx = idx + 1;
1000+
}
1001+
1002+
self.insertedAndDeleted = newSet.insertedAndDeleted;
1003+
self.keys = newSet.keys;
1004+
self.entries = newSet.entries;
1005+
self.cap = newCapacity;
8441006
}
1007+
1008+
fun isLive(idx: Int64): Bool =
1009+
self.insertedAndDeleted.contains(2 * idx)
1010+
&& self.insertedAndDeleted.contains(2 * idx + 1).not();
1011+
1012+
fun isDeleted(idx: Int64): Bool =
1013+
self.insertedAndDeleted.contains(2 * idx + 1);
8451014
}
8461015

8471016
@pub class List[T] {

0 commit comments

Comments
 (0)