You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

212 lines
6.5 KiB

4 years ago
  1. # cython: infer_types=True
  2. # cython: cdivision=True
  3. #
  4. cimport cython
  5. DEF EMPTY_KEY = 0
  6. DEF DELETED_KEY = 1
  7. cdef class PreshMap:
  8. """Hash map that assumes keys come pre-hashed. Maps uint64_t --> uint64_t.
  9. Uses open addressing with linear probing.
  10. Usage
  11. map = PreshMap() # Create a table
  12. map = PreshMap(initial_size=1024) # Create with initial size (efficiency)
  13. map[key] = value # Set a value to a key
  14. value = map[key] # Get a value given a key
  15. for key, value in map.items(): # Iterate over items
  16. len(map) # Get number of inserted keys
  17. """
  18. def __init__(self, size_t initial_size=8):
  19. # Size must be power of two
  20. if initial_size == 0:
  21. initial_size = 8
  22. if initial_size & (initial_size - 1) != 0:
  23. power = 1
  24. while power < initial_size:
  25. power *= 2
  26. initial_size = power
  27. self.mem = Pool()
  28. self.c_map = <MapStruct*>self.mem.alloc(1, sizeof(MapStruct))
  29. map_init(self.mem, self.c_map, initial_size)
  30. property capacity:
  31. def __get__(self):
  32. return self.c_map.length
  33. def items(self):
  34. cdef key_t key
  35. cdef void* value
  36. cdef int i = 0
  37. while map_iter(self.c_map, &i, &key, &value):
  38. yield key, <size_t>value
  39. def keys(self):
  40. for key, _ in self.items():
  41. yield key
  42. def values(self):
  43. for _, value in self.items():
  44. yield value
  45. def pop(self, key_t key, default=None):
  46. cdef void* value = map_get(self.c_map, key)
  47. map_clear(self.c_map, key)
  48. return <size_t>value if value != NULL else default
  49. def __getitem__(self, key_t key):
  50. cdef void* value = map_get(self.c_map, key)
  51. return <size_t>value if value != NULL else None
  52. def __setitem__(self, key_t key, size_t value):
  53. map_set(self.mem, self.c_map, key, <void*>value)
  54. def __delitem__(self, key_t key):
  55. map_clear(self.c_map, key)
  56. def __len__(self):
  57. return self.c_map.filled
  58. def __contains__(self, key_t key):
  59. cdef void* value = map_get(self.c_map, key)
  60. return True if value != NULL else False
  61. def __iter__(self):
  62. for key in self.keys():
  63. yield key
  64. cdef inline void* get(self, key_t key) nogil:
  65. return map_get(self.c_map, key)
  66. cdef void set(self, key_t key, void* value) except *:
  67. map_set(self.mem, self.c_map, key, <void*>value)
  68. cdef class PreshMapArray:
  69. """An array of hash tables that assume keys come pre-hashed. Each table
  70. uses open addressing with linear probing.
  71. """
  72. def __init__(self, size_t length, size_t initial_size=8):
  73. self.mem = Pool()
  74. self.length = length
  75. self.maps = <MapStruct*>self.mem.alloc(length, sizeof(MapStruct))
  76. for i in range(length):
  77. map_init(self.mem, &self.maps[i], initial_size)
  78. cdef inline void* get(self, size_t i, key_t key) nogil:
  79. return map_get(&self.maps[i], key)
  80. cdef void set(self, size_t i, key_t key, void* value) except *:
  81. map_set(self.mem, &self.maps[i], key, <void*>value)
  82. cdef void map_init(Pool mem, MapStruct* map_, size_t length) except *:
  83. map_.length = length
  84. map_.filled = 0
  85. map_.cells = <Cell*>mem.alloc(length, sizeof(Cell))
  86. cdef void map_set(Pool mem, MapStruct* map_, key_t key, void* value) except *:
  87. if key == EMPTY_KEY:
  88. map_.value_for_empty_key = value
  89. map_.is_empty_key_set = True
  90. elif key == DELETED_KEY:
  91. map_.value_for_del_key = value
  92. map_.is_del_key_set = True
  93. cdef Cell* cell
  94. cell = _find_cell(map_.cells, map_.length, key)
  95. if cell.key == EMPTY_KEY:
  96. cell.key = key
  97. map_.filled += 1
  98. cell.value = value
  99. if (map_.filled + 1) * 5 >= (map_.length * 3):
  100. _resize(mem, map_)
  101. cdef void* map_get(const MapStruct* map_, const key_t key) nogil:
  102. if key == EMPTY_KEY:
  103. return map_.value_for_empty_key
  104. elif key == DELETED_KEY:
  105. return map_.value_for_del_key
  106. cdef Cell* cell = _find_cell(map_.cells, map_.length, key)
  107. return cell.value
  108. cdef void* map_clear(MapStruct* map_, const key_t key) nogil:
  109. if key == EMPTY_KEY:
  110. value = map_.value_for_empty_key if map_.is_empty_key_set else NULL
  111. map_.is_empty_key_set = False
  112. return value
  113. elif key == DELETED_KEY:
  114. value = map_.value_for_del_key if map_.is_del_key_set else NULL
  115. map_.is_del_key_set = False
  116. return value
  117. else:
  118. cell = _find_cell(map_.cells, map_.length, key)
  119. cell.key = DELETED_KEY
  120. map_.filled -= 1
  121. return cell.value
  122. cdef void* map_bulk_get(const MapStruct* map_, const key_t* keys, void** values,
  123. int n) nogil:
  124. cdef int i
  125. for i in range(n):
  126. values[i] = map_get(map_, keys[i])
  127. cdef bint map_iter(const MapStruct* map_, int* i, key_t* key, void** value) nogil:
  128. '''Iterate over the filled items, setting the current place in i, and the
  129. key and value. Return False when iteration finishes.
  130. '''
  131. cdef const Cell* cell
  132. while i[0] < map_.length:
  133. cell = &map_.cells[i[0]]
  134. i[0] += 1
  135. if cell[0].key != EMPTY_KEY and cell[0].key != DELETED_KEY:
  136. key[0] = cell[0].key
  137. value[0] = cell[0].value
  138. return True
  139. # Remember to check for cells keyed by the special empty and deleted keys
  140. if i[0] == map_.length:
  141. i[0] += 1
  142. if map_.is_empty_key_set:
  143. key[0] = EMPTY_KEY
  144. value[0] = map_.value_for_empty_key
  145. return True
  146. if i[0] == map_.length + 1:
  147. i[0] += 1
  148. if map_.is_del_key_set:
  149. key[0] = DELETED_KEY
  150. value[0] = map_.value_for_del_key
  151. return True
  152. return False
  153. @cython.cdivision
  154. cdef inline Cell* _find_cell(Cell* cells, const key_t size, const key_t key) nogil:
  155. # Modulo for powers-of-two via bitwise &
  156. cdef key_t i = (key & (size - 1))
  157. while cells[i].key != 0 and cells[i].key != key:
  158. i = (i + 1) & (size - 1)
  159. return &cells[i]
  160. cdef void _resize(Pool mem, MapStruct* map_) except *:
  161. cdef size_t new_size = map_.length * 2
  162. cdef Cell* old_cells = map_.cells
  163. cdef size_t old_size = map_.length
  164. map_.length = new_size
  165. map_.filled = 0
  166. map_.cells = <Cell*>mem.alloc(new_size, sizeof(Cell))
  167. cdef size_t i
  168. cdef size_t slot
  169. for i in range(old_size):
  170. if old_cells[i].key != EMPTY_KEY and old_cells[i].key != DELETED_KEY:
  171. map_set(mem, map_, old_cells[i].key, old_cells[i].value)
  172. mem.free(old_cells)