
14 Apr
2025
14 Apr
'25
12:36 a.m.
https://github.com/karl3/rep i _almost_ implemented dict. it works barebones but uses an exponential growth ratio of 256. i've almost reduced the exponential growth ratio to the smallest needed power of 2, there's an arithmetic bug right now i'll paste it if place != self._sentinel: collision = self._key(place) if collision != keyhash: assert idx == int.from_bytes(collision[:self._hashbytes], 'big') >> self._hashshift #& self._hashmask spread = 0 while True: spread += 1 hashbits = self._hashbits + spread expansion = 1 << spread #hashmask = capacity - 1 hashbytes = (hashbits+7) >> 3 hashshift = (hashbytes << 3) - hashbits idx = int.from_bytes(keyhash[:hashbytes], 'big') >> hashshift #& hashmask if idx != int.from_bytes(collision[:hashbytes], 'big') >> hashshift: #& hashmask: break capacity = self._capacity * expansion assert 1 << hashbits == capacity expansionmask = spread - 1 def content_generator(): for superidx, item in enumerate(tqdm.tqdm(self.array, desc='growing hashtable', leave=False)): if item == self._sentinel: yield from [self._sentinel] * expansion else: keyhash = self._key(item) if place != self._sentinel: collision = self._key(place) if collision != keyhash: assert idx == int.from_bytes(collision[:self._hashbytes], 'big') >> self._hashshift #& self._hashmask spread = 0 while True: spread += 1 hashbits = self._hashbits + spread expansion = 1 << spread #hashmask = capacity - 1 hashbytes = (hashbits+7) >> 3 hashshift = (hashbytes << 3) - hashbits idx = int.from_bytes(keyhash[:hashbytes], 'big') >> hashshift #& hashmask if idx != int.from_bytes(collision[:hashbytes], 'big') >> hashshift: #& hashmask: break capacity = self._capacity * expansion assert 1 << hashbits == capacity expansionmask = spread - 1 def content_generator(): for superidx, item in enumerate(tqdm.tqdm(self.array, desc='growing hashtable', leave=False)): if item == self._sentinel: yield from [self._sentinel] * expansion else: keyhash = self._key(item) wholeidx = int.from_bytes(keyhash[:hashbytes], 'big') #>> hashshift #& hashmask assert superidx == wholeidx >> (hashbytes * 8 - self._hashbits) subidx = (wholeidx >> hashshift) & expansionmask assert superidx * expansion + subidx == wholeidx >> hashshift yield from [self._sentinel] * subidx yield item yield from [self._sentinel] * (expansion - subidx - 1) self.array[:] = IterableWithLength(content_generator(), self._capacity * expansion) yield from [self._sentinel] * subidx yield item yield from [self._sentinel] * (expansion - subidx - 1) self.array[:] = IterableWithLength(content_generator(), self._capacity * expansion) The arithmetic mistake is in this section: wholeidx = int.from_bytes(keyhash[:hashbytes], 'big') # misleading comment removed assert superidx == wholeidx >> (hashbytes * 8 - self._hashbits) subidx = (wholeidx >> hashshift) & expansionmask assert superidx * expansion + subidx == wholeidx >> hashshift The second assertion is raising. One of my bitwise arithmetics is wrong. After reducing the exponential growth ratio I was going to implement .update() for fast bulk updates of many items. It's so useful to have basic data structures!