
14 Apr
2025
14 Apr
'25
12:39 a.m.
> https://github.com/karl3/rep oops https://github.com/karl3wm/rep the storage backend is replaceable. it's just basic data structures. hard for me now, useful. > i _almost_ implemented dict. it works barebones but uses an > exponential growth ratio of 256. > i've almost reduced the exponential growth ratio to the smallest > needed power of 2, there's an arithmetic bug right now > > i'll paste it > > if place != self._sentinel: > collision = self._key(place) > if collision != keyhash: > assert idx == > int.from_bytes(collision[:self._hashbytes], 'big') >> self._hashshift > #& self._hashmask > spread = 0 > while True: > spread += 1 > hashbits = self._hashbits + spread > expansion = 1 << spread > #hashmask = capacity - 1 > hashbytes = (hashbits+7) >> 3 > hashshift = (hashbytes << 3) - hashbits > idx = int.from_bytes(keyhash[:hashbytes], 'big') > >> hashshift #& hashmask > if idx != int.from_bytes(collision[:hashbytes], > 'big') >> hashshift: #& hashmask: > break > capacity = self._capacity * expansion > assert 1 << hashbits == capacity > expansionmask = spread - 1 > def content_generator(): > for superidx, item in > enumerate(tqdm.tqdm(self.array, desc='growing hashtable', > leave=False)): > if item == self._sentinel: > yield from [self._sentinel] * expansion > else: > keyhash = self._key(item) > if place != self._sentinel: > collision = self._key(place) > if collision != keyhash: > assert idx == > int.from_bytes(collision[:self._hashbytes], 'big') >> self._hashshift > #& self._hashmask > spread = 0 > while True: > spread += 1 > hashbits = self._hashbits + spread > expansion = 1 << spread > #hashmask = capacity - 1 > hashbytes = (hashbits+7) >> 3 > hashshift = (hashbytes << 3) - hashbits > idx = int.from_bytes(keyhash[:hashbytes], 'big') > >> hashshift #& hashmask > if idx != int.from_bytes(collision[:hashbytes], > 'big') >> hashshift: #& hashmask: > break > capacity = self._capacity * expansion > assert 1 << hashbits == capacity > expansionmask = spread - 1 > def content_generator(): > for superidx, item in > enumerate(tqdm.tqdm(self.array, desc='growing hashtable', > leave=False)): > if item == self._sentinel: > yield from [self._sentinel] * expansion > else: > keyhash = self._key(item) > wholeidx = > int.from_bytes(keyhash[:hashbytes], 'big') #>> hashshift #& hashmask > assert superidx == wholeidx >> (hashbytes > * 8 - self._hashbits) > subidx = (wholeidx >> hashshift) & expansionmask > assert superidx * expansion + subidx == > wholeidx >> hashshift > yield from [self._sentinel] * subidx > yield item > yield from [self._sentinel] * (expansion - > subidx - 1) > self.array[:] = > IterableWithLength(content_generator(), self._capacity * expansion) > yield from [self._sentinel] * subidx > yield item > yield from [self._sentinel] * (expansion - > subidx - 1) > self.array[:] = > IterableWithLength(content_generator(), self._capacity * expansion) > > The arithmetic mistake is in this section: > wholeidx = > int.from_bytes(keyhash[:hashbytes], 'big') # misleading comment > removed > assert superidx == wholeidx >> (hashbytes > * 8 - self._hashbits) > subidx = (wholeidx >> hashshift) & expansionmask > assert superidx * expansion + subidx == > wholeidx >> hashshift > The second assertion is raising. One of my bitwise arithmetics is wrong. > > After reducing the exponential growth ratio I was going to implement > .update() for fast bulk updates of many items. > > It's so useful to have basic data structures!