14 Apr
2025
14 Apr
'25
12:36 a.m.
https://github.com/karl3/rep
i _almost_ implemented dict. it works barebones but uses an
exponential growth ratio of 256.
i've almost reduced the exponential growth ratio to the smallest
needed power of 2, there's an arithmetic bug right now
i'll paste it
if place != self._sentinel:
collision = self._key(place)
if collision != keyhash:
assert idx ==
int.from_bytes(collision[:self._hashbytes], 'big') >> self._hashshift
#& self._hashmask
spread = 0
while True:
spread += 1
hashbits = self._hashbits + spread
expansion = 1 << spread
#hashmask = capacity - 1
hashbytes = (hashbits+7) >> 3
hashshift = (hashbytes << 3) - hashbits
idx = int.from_bytes(keyhash[:hashbytes], 'big')
>> hashshift #& hashmask
if idx != int.from_bytes(collision[:hashbytes],
'big') >> hashshift: #& hashmask:
break
capacity = self._capacity * expansion
assert 1 << hashbits == capacity
expansionmask = spread - 1
def content_generator():
for superidx, item in
enumerate(tqdm.tqdm(self.array, desc='growing hashtable',
leave=False)):
if item == self._sentinel:
yield from [self._sentinel] * expansion
else:
keyhash = self._key(item)
if place != self._sentinel:
collision = self._key(place)
if collision != keyhash:
assert idx ==
int.from_bytes(collision[:self._hashbytes], 'big') >> self._hashshift
#& self._hashmask
spread = 0
while True:
spread += 1
hashbits = self._hashbits + spread
expansion = 1 << spread
#hashmask = capacity - 1
hashbytes = (hashbits+7) >> 3
hashshift = (hashbytes << 3) - hashbits
idx = int.from_bytes(keyhash[:hashbytes], 'big')
>> hashshift #& hashmask
if idx != int.from_bytes(collision[:hashbytes],
'big') >> hashshift: #& hashmask:
break
capacity = self._capacity * expansion
assert 1 << hashbits == capacity
expansionmask = spread - 1
def content_generator():
for superidx, item in
enumerate(tqdm.tqdm(self.array, desc='growing hashtable',
leave=False)):
if item == self._sentinel:
yield from [self._sentinel] * expansion
else:
keyhash = self._key(item)
wholeidx =
int.from_bytes(keyhash[:hashbytes], 'big') #>> hashshift #& hashmask
assert superidx == wholeidx >> (hashbytes
* 8 - self._hashbits)
subidx = (wholeidx >> hashshift) & expansionmask
assert superidx * expansion + subidx ==
wholeidx >> hashshift
yield from [self._sentinel] * subidx
yield item
yield from [self._sentinel] * (expansion -
subidx - 1)
self.array[:] =
IterableWithLength(content_generator(), self._capacity * expansion)
yield from [self._sentinel] * subidx
yield item
yield from [self._sentinel] * (expansion -
subidx - 1)
self.array[:] =
IterableWithLength(content_generator(), self._capacity * expansion)
The arithmetic mistake is in this section:
wholeidx =
int.from_bytes(keyhash[:hashbytes], 'big') # misleading comment
removed
assert superidx == wholeidx >> (hashbytes
* 8 - self._hashbits)
subidx = (wholeidx >> hashshift) & expansionmask
assert superidx * expansion + subidx ==
wholeidx >> hashshift
The second assertion is raising. One of my bitwise arithmetics is wrong.
After reducing the exponential growth ratio I was going to implement
.update() for fast bulk updates of many items.
It's so useful to have basic data structures!