213 lines
6.5 KiB
Cython
213 lines
6.5 KiB
Cython
|
# cython: infer_types=True
|
||
|
# cython: cdivision=True
|
||
|
#
|
||
|
cimport cython
|
||
|
|
||
|
|
||
|
DEF EMPTY_KEY = 0
|
||
|
DEF DELETED_KEY = 1
|
||
|
|
||
|
|
||
|
cdef class PreshMap:
|
||
|
"""Hash map that assumes keys come pre-hashed. Maps uint64_t --> uint64_t.
|
||
|
Uses open addressing with linear probing.
|
||
|
|
||
|
Usage
|
||
|
map = PreshMap() # Create a table
|
||
|
map = PreshMap(initial_size=1024) # Create with initial size (efficiency)
|
||
|
map[key] = value # Set a value to a key
|
||
|
value = map[key] # Get a value given a key
|
||
|
for key, value in map.items(): # Iterate over items
|
||
|
len(map) # Get number of inserted keys
|
||
|
"""
|
||
|
def __init__(self, size_t initial_size=8):
|
||
|
# Size must be power of two
|
||
|
if initial_size == 0:
|
||
|
initial_size = 8
|
||
|
if initial_size & (initial_size - 1) != 0:
|
||
|
power = 1
|
||
|
while power < initial_size:
|
||
|
power *= 2
|
||
|
initial_size = power
|
||
|
self.mem = Pool()
|
||
|
self.c_map = <MapStruct*>self.mem.alloc(1, sizeof(MapStruct))
|
||
|
map_init(self.mem, self.c_map, initial_size)
|
||
|
|
||
|
property capacity:
|
||
|
def __get__(self):
|
||
|
return self.c_map.length
|
||
|
|
||
|
def items(self):
|
||
|
cdef key_t key
|
||
|
cdef void* value
|
||
|
cdef int i = 0
|
||
|
while map_iter(self.c_map, &i, &key, &value):
|
||
|
yield key, <size_t>value
|
||
|
|
||
|
def keys(self):
|
||
|
for key, _ in self.items():
|
||
|
yield key
|
||
|
|
||
|
def values(self):
|
||
|
for _, value in self.items():
|
||
|
yield value
|
||
|
|
||
|
def pop(self, key_t key, default=None):
|
||
|
cdef void* value = map_get(self.c_map, key)
|
||
|
map_clear(self.c_map, key)
|
||
|
return <size_t>value if value != NULL else default
|
||
|
|
||
|
def __getitem__(self, key_t key):
|
||
|
cdef void* value = map_get(self.c_map, key)
|
||
|
return <size_t>value if value != NULL else None
|
||
|
|
||
|
def __setitem__(self, key_t key, size_t value):
|
||
|
map_set(self.mem, self.c_map, key, <void*>value)
|
||
|
|
||
|
def __delitem__(self, key_t key):
|
||
|
map_clear(self.c_map, key)
|
||
|
|
||
|
def __len__(self):
|
||
|
return self.c_map.filled
|
||
|
|
||
|
def __contains__(self, key_t key):
|
||
|
cdef void* value = map_get(self.c_map, key)
|
||
|
return True if value != NULL else False
|
||
|
|
||
|
def __iter__(self):
|
||
|
for key in self.keys():
|
||
|
yield key
|
||
|
|
||
|
cdef inline void* get(self, key_t key) nogil:
|
||
|
return map_get(self.c_map, key)
|
||
|
|
||
|
cdef void set(self, key_t key, void* value) except *:
|
||
|
map_set(self.mem, self.c_map, key, <void*>value)
|
||
|
|
||
|
|
||
|
cdef class PreshMapArray:
|
||
|
"""An array of hash tables that assume keys come pre-hashed. Each table
|
||
|
uses open addressing with linear probing.
|
||
|
"""
|
||
|
def __init__(self, size_t length, size_t initial_size=8):
|
||
|
self.mem = Pool()
|
||
|
self.length = length
|
||
|
self.maps = <MapStruct*>self.mem.alloc(length, sizeof(MapStruct))
|
||
|
for i in range(length):
|
||
|
map_init(self.mem, &self.maps[i], initial_size)
|
||
|
|
||
|
cdef inline void* get(self, size_t i, key_t key) nogil:
|
||
|
return map_get(&self.maps[i], key)
|
||
|
|
||
|
cdef void set(self, size_t i, key_t key, void* value) except *:
|
||
|
map_set(self.mem, &self.maps[i], key, <void*>value)
|
||
|
|
||
|
|
||
|
cdef void map_init(Pool mem, MapStruct* map_, size_t length) except *:
|
||
|
map_.length = length
|
||
|
map_.filled = 0
|
||
|
map_.cells = <Cell*>mem.alloc(length, sizeof(Cell))
|
||
|
|
||
|
|
||
|
cdef void map_set(Pool mem, MapStruct* map_, key_t key, void* value) except *:
|
||
|
if key == EMPTY_KEY:
|
||
|
map_.value_for_empty_key = value
|
||
|
map_.is_empty_key_set = True
|
||
|
elif key == DELETED_KEY:
|
||
|
map_.value_for_del_key = value
|
||
|
map_.is_del_key_set = True
|
||
|
cdef Cell* cell
|
||
|
cell = _find_cell(map_.cells, map_.length, key)
|
||
|
if cell.key == EMPTY_KEY:
|
||
|
cell.key = key
|
||
|
map_.filled += 1
|
||
|
cell.value = value
|
||
|
if (map_.filled + 1) * 5 >= (map_.length * 3):
|
||
|
_resize(mem, map_)
|
||
|
|
||
|
|
||
|
cdef void* map_get(const MapStruct* map_, const key_t key) nogil:
|
||
|
if key == EMPTY_KEY:
|
||
|
return map_.value_for_empty_key
|
||
|
elif key == DELETED_KEY:
|
||
|
return map_.value_for_del_key
|
||
|
cdef Cell* cell = _find_cell(map_.cells, map_.length, key)
|
||
|
return cell.value
|
||
|
|
||
|
|
||
|
cdef void* map_clear(MapStruct* map_, const key_t key) nogil:
|
||
|
if key == EMPTY_KEY:
|
||
|
value = map_.value_for_empty_key if map_.is_empty_key_set else NULL
|
||
|
map_.is_empty_key_set = False
|
||
|
return value
|
||
|
elif key == DELETED_KEY:
|
||
|
value = map_.value_for_del_key if map_.is_del_key_set else NULL
|
||
|
map_.is_del_key_set = False
|
||
|
return value
|
||
|
else:
|
||
|
cell = _find_cell(map_.cells, map_.length, key)
|
||
|
cell.key = DELETED_KEY
|
||
|
map_.filled -= 1
|
||
|
return cell.value
|
||
|
|
||
|
|
||
|
cdef void* map_bulk_get(const MapStruct* map_, const key_t* keys, void** values,
|
||
|
int n) nogil:
|
||
|
cdef int i
|
||
|
for i in range(n):
|
||
|
values[i] = map_get(map_, keys[i])
|
||
|
|
||
|
|
||
|
cdef bint map_iter(const MapStruct* map_, int* i, key_t* key, void** value) nogil:
|
||
|
'''Iterate over the filled items, setting the current place in i, and the
|
||
|
key and value. Return False when iteration finishes.
|
||
|
'''
|
||
|
cdef const Cell* cell
|
||
|
while i[0] < map_.length:
|
||
|
cell = &map_.cells[i[0]]
|
||
|
i[0] += 1
|
||
|
if cell[0].key != EMPTY_KEY and cell[0].key != DELETED_KEY:
|
||
|
key[0] = cell[0].key
|
||
|
value[0] = cell[0].value
|
||
|
return True
|
||
|
# Remember to check for cells keyed by the special empty and deleted keys
|
||
|
if i[0] == map_.length:
|
||
|
i[0] += 1
|
||
|
if map_.is_empty_key_set:
|
||
|
key[0] = EMPTY_KEY
|
||
|
value[0] = map_.value_for_empty_key
|
||
|
return True
|
||
|
if i[0] == map_.length + 1:
|
||
|
i[0] += 1
|
||
|
if map_.is_del_key_set:
|
||
|
key[0] = DELETED_KEY
|
||
|
value[0] = map_.value_for_del_key
|
||
|
return True
|
||
|
return False
|
||
|
|
||
|
|
||
|
@cython.cdivision
|
||
|
cdef inline Cell* _find_cell(Cell* cells, const key_t size, const key_t key) nogil:
|
||
|
# Modulo for powers-of-two via bitwise &
|
||
|
cdef key_t i = (key & (size - 1))
|
||
|
while cells[i].key != 0 and cells[i].key != key:
|
||
|
i = (i + 1) & (size - 1)
|
||
|
return &cells[i]
|
||
|
|
||
|
|
||
|
cdef void _resize(Pool mem, MapStruct* map_) except *:
|
||
|
cdef size_t new_size = map_.length * 2
|
||
|
cdef Cell* old_cells = map_.cells
|
||
|
cdef size_t old_size = map_.length
|
||
|
|
||
|
map_.length = new_size
|
||
|
map_.filled = 0
|
||
|
map_.cells = <Cell*>mem.alloc(new_size, sizeof(Cell))
|
||
|
|
||
|
cdef size_t i
|
||
|
cdef size_t slot
|
||
|
for i in range(old_size):
|
||
|
if old_cells[i].key != EMPTY_KEY and old_cells[i].key != DELETED_KEY:
|
||
|
map_set(mem, map_, old_cells[i].key, old_cells[i].value)
|
||
|
mem.free(old_cells)
|