You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

555 lines
14 KiB

4 years ago
  1. from cpython.dict cimport (PyDict_Check, PyDict_CheckExact, PyDict_GetItem,
  2. PyDict_Merge, PyDict_New, PyDict_Next,
  3. PyDict_SetItem, PyDict_Update, PyDict_DelItem)
  4. from cpython.list cimport PyList_Append, PyList_New
  5. from cpython.object cimport PyObject_SetItem
  6. from cpython.ref cimport PyObject, Py_DECREF, Py_INCREF, Py_XDECREF
  7. # Locally defined bindings that differ from `cython.cpython` bindings
  8. from cytoolz.cpython cimport PyDict_Next_Compat, PtrIter_Next
  9. from copy import copy
  10. __all__ = ['merge', 'merge_with', 'valmap', 'keymap', 'itemmap', 'valfilter',
  11. 'keyfilter', 'itemfilter', 'assoc', 'dissoc', 'assoc_in', 'get_in',
  12. 'update_in']
  13. cdef int PyMapping_Next(object p, Py_ssize_t *ppos, PyObject* *pkey, PyObject* *pval) except -1:
  14. """Mimic "PyDict_Next" interface, but for any mapping"""
  15. cdef PyObject *obj
  16. obj = PtrIter_Next(p)
  17. if obj is NULL:
  18. return 0
  19. pkey[0] = <PyObject*>(<object>obj)[0]
  20. pval[0] = <PyObject*>(<object>obj)[1]
  21. Py_XDECREF(obj)
  22. return 1
  23. cdef f_map_next get_map_iter(object d, PyObject* *ptr) except NULL:
  24. """Return function pointer to perform iteration over object returned in ptr.
  25. The returned function signature matches "PyDict_Next". If ``d`` is a dict,
  26. then the returned function *is* PyDict_Next, so iteration wil be very fast.
  27. The object returned through ``ptr`` needs to have its reference count
  28. reduced by one once the caller "owns" the object.
  29. This function lets us control exactly how iteration should be performed
  30. over a given mapping. The current rules are:
  31. 1) If ``d`` is exactly a dict, use PyDict_Next
  32. 2) If ``d`` is subtype of dict, use PyMapping_Next. This lets the user
  33. control the order iteration, such as for ordereddict.
  34. 3) If using PyMapping_Next, iterate using ``iteritems`` if possible,
  35. otherwise iterate using ``items``.
  36. """
  37. cdef object val
  38. cdef f_map_next rv
  39. if PyDict_CheckExact(d):
  40. val = d
  41. rv = &PyDict_Next_Compat
  42. elif hasattr(d, 'iteritems'):
  43. val = iter(d.iteritems())
  44. rv = &PyMapping_Next
  45. else:
  46. val = iter(d.items())
  47. rv = &PyMapping_Next
  48. Py_INCREF(val)
  49. ptr[0] = <PyObject*>val
  50. return rv
  51. cdef get_factory(name, kwargs):
  52. factory = kwargs.pop('factory', dict)
  53. if kwargs:
  54. raise TypeError("{0}() got an unexpected keyword argument "
  55. "'{1}'".format(name, kwargs.popitem()[0]))
  56. return factory
  57. cdef object c_merge(object dicts, object factory=dict):
  58. cdef object rv
  59. rv = factory()
  60. if PyDict_CheckExact(rv):
  61. for d in dicts:
  62. PyDict_Update(rv, d)
  63. else:
  64. for d in dicts:
  65. rv.update(d)
  66. return rv
  67. def merge(*dicts, **kwargs):
  68. """
  69. Merge a collection of dictionaries
  70. >>> merge({1: 'one'}, {2: 'two'})
  71. {1: 'one', 2: 'two'}
  72. Later dictionaries have precedence
  73. >>> merge({1: 2, 3: 4}, {3: 3, 4: 4})
  74. {1: 2, 3: 3, 4: 4}
  75. See Also:
  76. merge_with
  77. """
  78. if len(dicts) == 1 and not PyDict_Check(dicts[0]):
  79. dicts = dicts[0]
  80. factory = get_factory('merge', kwargs)
  81. return c_merge(dicts, factory)
  82. cdef object c_merge_with(object func, object dicts, object factory=dict):
  83. cdef:
  84. dict result
  85. object rv, d
  86. list seq
  87. f_map_next f
  88. PyObject *obj
  89. PyObject *pkey
  90. PyObject *pval
  91. Py_ssize_t pos
  92. result = PyDict_New()
  93. rv = factory()
  94. for d in dicts:
  95. f = get_map_iter(d, &obj)
  96. d = <object>obj
  97. Py_DECREF(d)
  98. pos = 0
  99. while f(d, &pos, &pkey, &pval):
  100. obj = PyDict_GetItem(result, <object>pkey)
  101. if obj is NULL:
  102. seq = PyList_New(0)
  103. PyList_Append(seq, <object>pval)
  104. PyDict_SetItem(result, <object>pkey, seq)
  105. else:
  106. PyList_Append(<object>obj, <object>pval)
  107. f = get_map_iter(result, &obj)
  108. d = <object>obj
  109. Py_DECREF(d)
  110. pos = 0
  111. while f(d, &pos, &pkey, &pval):
  112. PyObject_SetItem(rv, <object>pkey, func(<object>pval))
  113. return rv
  114. def merge_with(func, *dicts, **kwargs):
  115. """
  116. Merge dictionaries and apply function to combined values
  117. A key may occur in more than one dict, and all values mapped from the key
  118. will be passed to the function as a list, such as func([val1, val2, ...]).
  119. >>> merge_with(sum, {1: 1, 2: 2}, {1: 10, 2: 20})
  120. {1: 11, 2: 22}
  121. >>> merge_with(first, {1: 1, 2: 2}, {2: 20, 3: 30}) # doctest: +SKIP
  122. {1: 1, 2: 2, 3: 30}
  123. See Also:
  124. merge
  125. """
  126. if len(dicts) == 1 and not PyDict_Check(dicts[0]):
  127. dicts = dicts[0]
  128. factory = get_factory('merge_with', kwargs)
  129. return c_merge_with(func, dicts, factory)
  130. cpdef object valmap(object func, object d, object factory=dict):
  131. """
  132. Apply function to values of dictionary
  133. >>> bills = {"Alice": [20, 15, 30], "Bob": [10, 35]}
  134. >>> valmap(sum, bills) # doctest: +SKIP
  135. {'Alice': 65, 'Bob': 45}
  136. See Also:
  137. keymap
  138. itemmap
  139. """
  140. cdef:
  141. object rv
  142. f_map_next f
  143. PyObject *obj
  144. PyObject *pkey
  145. PyObject *pval
  146. Py_ssize_t pos = 0
  147. rv = factory()
  148. f = get_map_iter(d, &obj)
  149. d = <object>obj
  150. Py_DECREF(d)
  151. while f(d, &pos, &pkey, &pval):
  152. rv[<object>pkey] = func(<object>pval)
  153. return rv
  154. cpdef object keymap(object func, object d, object factory=dict):
  155. """
  156. Apply function to keys of dictionary
  157. >>> bills = {"Alice": [20, 15, 30], "Bob": [10, 35]}
  158. >>> keymap(str.lower, bills) # doctest: +SKIP
  159. {'alice': [20, 15, 30], 'bob': [10, 35]}
  160. See Also:
  161. valmap
  162. itemmap
  163. """
  164. cdef:
  165. object rv
  166. f_map_next f
  167. PyObject *obj
  168. PyObject *pkey
  169. PyObject *pval
  170. Py_ssize_t pos = 0
  171. rv = factory()
  172. f = get_map_iter(d, &obj)
  173. d = <object>obj
  174. Py_DECREF(d)
  175. while f(d, &pos, &pkey, &pval):
  176. rv[func(<object>pkey)] = <object>pval
  177. return rv
  178. cpdef object itemmap(object func, object d, object factory=dict):
  179. """
  180. Apply function to items of dictionary
  181. >>> accountids = {"Alice": 10, "Bob": 20}
  182. >>> itemmap(reversed, accountids) # doctest: +SKIP
  183. {10: "Alice", 20: "Bob"}
  184. See Also:
  185. keymap
  186. valmap
  187. """
  188. cdef:
  189. object rv, k, v
  190. f_map_next f
  191. PyObject *obj
  192. PyObject *pkey
  193. PyObject *pval
  194. Py_ssize_t pos = 0
  195. rv = factory()
  196. f = get_map_iter(d, &obj)
  197. d = <object>obj
  198. Py_DECREF(d)
  199. while f(d, &pos, &pkey, &pval):
  200. k, v = func((<object>pkey, <object>pval))
  201. rv[k] = v
  202. return rv
  203. cpdef object valfilter(object predicate, object d, object factory=dict):
  204. """
  205. Filter items in dictionary by value
  206. >>> iseven = lambda x: x % 2 == 0
  207. >>> d = {1: 2, 2: 3, 3: 4, 4: 5}
  208. >>> valfilter(iseven, d)
  209. {1: 2, 3: 4}
  210. See Also:
  211. keyfilter
  212. itemfilter
  213. valmap
  214. """
  215. cdef:
  216. object rv
  217. f_map_next f
  218. PyObject *obj
  219. PyObject *pkey
  220. PyObject *pval
  221. Py_ssize_t pos = 0
  222. rv = factory()
  223. f = get_map_iter(d, &obj)
  224. d = <object>obj
  225. Py_DECREF(d)
  226. while f(d, &pos, &pkey, &pval):
  227. if predicate(<object>pval):
  228. rv[<object>pkey] = <object>pval
  229. return rv
  230. cpdef object keyfilter(object predicate, object d, object factory=dict):
  231. """
  232. Filter items in dictionary by key
  233. >>> iseven = lambda x: x % 2 == 0
  234. >>> d = {1: 2, 2: 3, 3: 4, 4: 5}
  235. >>> keyfilter(iseven, d)
  236. {2: 3, 4: 5}
  237. See Also:
  238. valfilter
  239. itemfilter
  240. keymap
  241. """
  242. cdef:
  243. object rv
  244. f_map_next f
  245. PyObject *obj
  246. PyObject *pkey
  247. PyObject *pval
  248. Py_ssize_t pos = 0
  249. rv = factory()
  250. f = get_map_iter(d, &obj)
  251. d = <object>obj
  252. Py_DECREF(d)
  253. while f(d, &pos, &pkey, &pval):
  254. if predicate(<object>pkey):
  255. rv[<object>pkey] = <object>pval
  256. return rv
  257. cpdef object itemfilter(object predicate, object d, object factory=dict):
  258. """
  259. Filter items in dictionary by item
  260. >>> def isvalid(item):
  261. ... k, v = item
  262. ... return k % 2 == 0 and v < 4
  263. >>> d = {1: 2, 2: 3, 3: 4, 4: 5}
  264. >>> itemfilter(isvalid, d)
  265. {2: 3}
  266. See Also:
  267. keyfilter
  268. valfilter
  269. itemmap
  270. """
  271. cdef:
  272. object rv, k, v
  273. f_map_next f
  274. PyObject *obj
  275. PyObject *pkey
  276. PyObject *pval
  277. Py_ssize_t pos = 0
  278. rv = factory()
  279. f = get_map_iter(d, &obj)
  280. d = <object>obj
  281. Py_DECREF(d)
  282. while f(d, &pos, &pkey, &pval):
  283. k = <object>pkey
  284. v = <object>pval
  285. if predicate((k, v)):
  286. rv[k] = v
  287. return rv
  288. cpdef object assoc(object d, object key, object value, object factory=dict):
  289. """
  290. Return a new dict with new key value pair
  291. New dict has d[key] set to value. Does not modify the initial dictionary.
  292. >>> assoc({'x': 1}, 'x', 2)
  293. {'x': 2}
  294. >>> assoc({'x': 1}, 'y', 3) # doctest: +SKIP
  295. {'x': 1, 'y': 3}
  296. """
  297. cdef object rv
  298. rv = factory()
  299. if PyDict_CheckExact(rv):
  300. PyDict_Update(rv, d)
  301. else:
  302. rv.update(d)
  303. rv[key] = value
  304. return rv
  305. cpdef object assoc_in(object d, object keys, object value, object factory=dict):
  306. """
  307. Return a new dict with new, potentially nested, key value pair
  308. >>> purchase = {'name': 'Alice',
  309. ... 'order': {'items': ['Apple', 'Orange'],
  310. ... 'costs': [0.50, 1.25]},
  311. ... 'credit card': '5555-1234-1234-1234'}
  312. >>> assoc_in(purchase, ['order', 'costs'], [0.25, 1.00]) # doctest: +SKIP
  313. {'credit card': '5555-1234-1234-1234',
  314. 'name': 'Alice',
  315. 'order': {'costs': [0.25, 1.00], 'items': ['Apple', 'Orange']}}
  316. """
  317. cdef object prevkey, key
  318. cdef object rv, inner, dtemp
  319. prevkey, keys = keys[0], keys[1:]
  320. rv = factory()
  321. if PyDict_CheckExact(rv):
  322. PyDict_Update(rv, d)
  323. else:
  324. rv.update(d)
  325. inner = rv
  326. for key in keys:
  327. if prevkey in d:
  328. d = d[prevkey]
  329. dtemp = factory()
  330. if PyDict_CheckExact(dtemp):
  331. PyDict_Update(dtemp, d)
  332. else:
  333. dtemp.update(d)
  334. else:
  335. d = factory()
  336. dtemp = d
  337. inner[prevkey] = dtemp
  338. prevkey = key
  339. inner = dtemp
  340. inner[prevkey] = value
  341. return rv
  342. cdef object c_dissoc(object d, object keys):
  343. cdef object rv, key
  344. rv = copy(d)
  345. for key in keys:
  346. if key in rv:
  347. del rv[key]
  348. return rv
  349. def dissoc(d, *keys):
  350. """
  351. Return a new dict with the given key(s) removed.
  352. New dict has d[key] deleted for each supplied key.
  353. Does not modify the initial dictionary.
  354. >>> dissoc({'x': 1, 'y': 2}, 'y')
  355. {'x': 1}
  356. >>> dissoc({'x': 1, 'y': 2}, 'y', 'x')
  357. {}
  358. >>> dissoc({'x': 1}, 'y') # Ignores missing keys
  359. {'x': 1}
  360. """
  361. return c_dissoc(d, keys)
  362. cpdef object update_in(object d, object keys, object func, object default=None, object factory=dict):
  363. """
  364. Update value in a (potentially) nested dictionary
  365. inputs:
  366. d - dictionary on which to operate
  367. keys - list or tuple giving the location of the value to be changed in d
  368. func - function to operate on that value
  369. If keys == [k0,..,kX] and d[k0]..[kX] == v, update_in returns a copy of the
  370. original dictionary with v replaced by func(v), but does not mutate the
  371. original dictionary.
  372. If k0 is not a key in d, update_in creates nested dictionaries to the depth
  373. specified by the keys, with the innermost value set to func(default).
  374. >>> inc = lambda x: x + 1
  375. >>> update_in({'a': 0}, ['a'], inc)
  376. {'a': 1}
  377. >>> transaction = {'name': 'Alice',
  378. ... 'purchase': {'items': ['Apple', 'Orange'],
  379. ... 'costs': [0.50, 1.25]},
  380. ... 'credit card': '5555-1234-1234-1234'}
  381. >>> update_in(transaction, ['purchase', 'costs'], sum) # doctest: +SKIP
  382. {'credit card': '5555-1234-1234-1234',
  383. 'name': 'Alice',
  384. 'purchase': {'costs': 1.75, 'items': ['Apple', 'Orange']}}
  385. >>> # updating a value when k0 is not in d
  386. >>> update_in({}, [1, 2, 3], str, default="bar")
  387. {1: {2: {3: 'bar'}}}
  388. >>> update_in({1: 'foo'}, [2, 3, 4], inc, 0)
  389. {1: 'foo', 2: {3: {4: 1}}}
  390. """
  391. cdef object prevkey, key
  392. cdef object rv, inner, dtemp
  393. prevkey, keys = keys[0], keys[1:]
  394. rv = factory()
  395. if PyDict_CheckExact(rv):
  396. PyDict_Update(rv, d)
  397. else:
  398. rv.update(d)
  399. inner = rv
  400. for key in keys:
  401. if prevkey in d:
  402. d = d[prevkey]
  403. dtemp = factory()
  404. if PyDict_CheckExact(dtemp):
  405. PyDict_Update(dtemp, d)
  406. else:
  407. dtemp.update(d)
  408. else:
  409. d = factory()
  410. dtemp = d
  411. inner[prevkey] = dtemp
  412. prevkey = key
  413. inner = dtemp
  414. if prevkey in d:
  415. key = func(d[prevkey])
  416. else:
  417. key = func(default)
  418. inner[prevkey] = key
  419. return rv
  420. cdef tuple _get_in_exceptions = (KeyError, IndexError, TypeError)
  421. cpdef object get_in(object keys, object coll, object default=None, object no_default=False):
  422. """
  423. Returns coll[i0][i1]...[iX] where [i0, i1, ..., iX]==keys.
  424. If coll[i0][i1]...[iX] cannot be found, returns ``default``, unless
  425. ``no_default`` is specified, then it raises KeyError or IndexError.
  426. ``get_in`` is a generalization of ``operator.getitem`` for nested data
  427. structures such as dictionaries and lists.
  428. >>> transaction = {'name': 'Alice',
  429. ... 'purchase': {'items': ['Apple', 'Orange'],
  430. ... 'costs': [0.50, 1.25]},
  431. ... 'credit card': '5555-1234-1234-1234'}
  432. >>> get_in(['purchase', 'items', 0], transaction)
  433. 'Apple'
  434. >>> get_in(['name'], transaction)
  435. 'Alice'
  436. >>> get_in(['purchase', 'total'], transaction)
  437. >>> get_in(['purchase', 'items', 'apple'], transaction)
  438. >>> get_in(['purchase', 'items', 10], transaction)
  439. >>> get_in(['purchase', 'total'], transaction, 0)
  440. 0
  441. >>> get_in(['y'], {}, no_default=True)
  442. Traceback (most recent call last):
  443. ...
  444. KeyError: 'y'
  445. See Also:
  446. itertoolz.get
  447. operator.getitem
  448. """
  449. cdef object item
  450. try:
  451. for item in keys:
  452. coll = coll[item]
  453. return coll
  454. except _get_in_exceptions:
  455. if no_default:
  456. raise
  457. return default