You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

452 lines
15 KiB

4 years ago
  1. import copy
  2. import numpy
  3. from .neural._classes.model import Model
  4. from . import check
  5. from .check import equal_axis
  6. from . import describe
  7. class FunctionLayer(Model):
  8. '''Wrap functions into weightless Model instances, for use as network
  9. components.'''
  10. def __init__(self, begin_update, predict=None, predict_one=None,
  11. nI=None, nO=None, *args, **kwargs):
  12. self.begin_update = begin_update
  13. if predict is not None:
  14. self.predict = predict
  15. if predict_one is not None:
  16. self.predict_one = predict_one
  17. self.nI = nI
  18. self.nO = nO
  19. Model.__init__(self)
  20. def _run_child_hooks(model, X, y):
  21. for layer in model._layers:
  22. for hook in layer.on_data_hooks:
  23. hook(layer, X, y)
  24. X = layer(X)
  25. @describe.on_data(_run_child_hooks)
  26. class FeedForward(Model):
  27. '''A feed-forward network, that chains multiple Model instances together.'''
  28. name = 'feed-forward'
  29. def __init__(self, layers, **kwargs):
  30. self._layers = []
  31. for layer in layers:
  32. if isinstance(layer, FeedForward):
  33. self._layers.extend(layer._layers)
  34. else:
  35. self._layers.append(layer)
  36. Model.__init__(self, **kwargs)
  37. @property
  38. def input_shape(self):
  39. return self._layers[0].input_shape
  40. @property
  41. def output_shape(self):
  42. return self._layers[-1].output_shape
  43. def predict(self, X):
  44. for layer in self._layers:
  45. X = layer(X)
  46. return X
  47. def begin_update(self, X, drop=0.):
  48. callbacks = []
  49. for layer in self._layers:
  50. X, inc_layer_grad = layer.begin_update(X, drop=drop)
  51. callbacks.append(inc_layer_grad)
  52. def continue_update(gradient, sgd=None):
  53. for callback in reversed(callbacks):
  54. if gradient is None or callback == None:
  55. break
  56. gradient = callback(gradient, sgd)
  57. return gradient
  58. return X, continue_update
  59. def layerize(begin_update=None, predict=None, *args, **kwargs):
  60. '''Wrap a function into a layer'''
  61. if begin_update is not None:
  62. return FunctionLayer(begin_update, predict=predict, *args, **kwargs)
  63. def wrapper(begin_update):
  64. return FunctionLayer(begin_update, *args, **kwargs)
  65. return wrapper
  66. def metalayerize(user_func):
  67. '''Wrap a function over a sequence of layers and an input into a layer.'''
  68. def returned(layers, *args, **kwargs):
  69. def begin_update(X, *args, **kwargs):
  70. return user_func(layers, X, *args, **kwargs)
  71. return FunctionLayer(begin_update, *args, **kwargs)
  72. return returned
  73. @layerize
  74. def flatten_add_lengths(seqs, pad=0, drop=0.):
  75. ops = Model.ops
  76. lengths = ops.asarray([len(seq) for seq in seqs], dtype='i')
  77. def finish_update(d_X, sgd=None):
  78. return ops.unflatten(d_X, lengths, pad=pad)
  79. X = ops.flatten(seqs, pad=pad)
  80. return (X, lengths), finish_update
  81. def remap_ids(ops=None, column=0):
  82. id_map = {0: 0}
  83. def remap_ids_fwd(ids, drop=0.):
  84. ids = ids[:, column]
  85. if not isinstance(ids, numpy.ndarray):
  86. ids = ids.get()
  87. n_vector = len(id_map)
  88. for i, id_ in enumerate(ids):
  89. id_ = int(id_)
  90. if id_ not in id_map:
  91. id_map[id_] = n_vector
  92. n_vector += 1
  93. ids[i] = id_map[id_]
  94. return ops.asarray(ids), None
  95. model = layerize(remap_ids_fwd)
  96. if ops is None:
  97. ops = model.ops
  98. return model
  99. def with_getitem(idx, layer):
  100. def begin_update(items, drop=0.):
  101. X, finish = layer.begin_update(items[idx], drop=drop)
  102. return items[:idx] + (X,) + items[idx+1:], finish
  103. model = layerize(begin_update)
  104. model._layers.append(layer)
  105. def on_data(self, items, y):
  106. for hook in layer.on_data_hooks:
  107. hook(layer, items[idx], y)
  108. model.on_data_hooks.append(on_data)
  109. return model
  110. def noop(*layers):
  111. '''Transform a sequences of layers into a null operation.'''
  112. def begin_update(X, drop=0.):
  113. return X, lambda D, *a, **k: D
  114. return begin_update
  115. def chain(*layers):
  116. '''Compose two models `f` and `g` such that they become layers of a single
  117. feed-forward model that computes `g(f(x))`.
  118. Raises exception if their dimensions don't match.
  119. '''
  120. if len(layers) == 0:
  121. return FeedForward([])
  122. elif len(layers) == 1:
  123. return layers[0]
  124. else:
  125. return FeedForward(layers)
  126. def clone(orig, n):
  127. '''Construct `n` copies of a layer, with distinct weights.
  128. i.e. `clone(f, 3)(x)` computes `f(f'(f''(x)))`.
  129. '''
  130. if n == 0:
  131. return layerize(noop())
  132. layers = [orig]
  133. for i in range(n-1):
  134. layers.append(copy.deepcopy(orig))
  135. layers[-1].set_id()
  136. return FeedForward(layers)
  137. def concatenate(*layers): # pragma: no cover
  138. '''Compose two or more models `f`, `g`, etc, such that their outputs are
  139. concatenated, i.e. `concatenate(f, g)(x)` computes `hstack(f(x), g(x))`
  140. '''
  141. if not layers:
  142. return noop()
  143. ops = layers[0].ops
  144. def begin_update(X, *a, **k):
  145. forward, backward = split_backward(layers)
  146. values = [fwd(X, *a, **k) for fwd in forward]
  147. output = ops.xp.hstack(values)
  148. shapes = [val.shape for val in values]
  149. def finish_update(gradient, *args, **kwargs):
  150. layer_grads = []
  151. start = 0
  152. for bwd, shape in zip(backward, shapes):
  153. end = start + shape[1]
  154. if bwd is not None:
  155. d = bwd(ops.xp.ascontiguousarray(gradient[:, start : end]),
  156. *args, **kwargs)
  157. if d is not None and hasattr(X, 'shape'):
  158. if not layer_grads:
  159. layer_grads.append(d)
  160. else:
  161. layer_grads[-1] += d
  162. start = end
  163. if layer_grads:
  164. return ops.asarray(layer_grads[-1])
  165. else:
  166. return None
  167. return output, finish_update
  168. layer = FunctionLayer(begin_update)
  169. layer._layers = list(layers)
  170. def on_data(self, X, y=None):
  171. for layer in self._layers:
  172. for hook in layer.on_data_hooks:
  173. hook(layer, X, y)
  174. layer.on_data_hooks.append(on_data)
  175. return layer
  176. def add(*layers):
  177. if not layers:
  178. return noop()
  179. ops = layers[0].ops
  180. def forward(X, drop=0.):
  181. outs, callbacks = zip(*[lyr.begin_update(X, drop=drop) for lyr in layers])
  182. out = outs[0]
  183. for o in outs:
  184. out += o
  185. def backward(d_out, sgd=None):
  186. grads = [bp(d_out, sgd=sgd) for bp in callbacks if bp is not None]
  187. grads = [g for g in grads if g is not None]
  188. if grads:
  189. total = grads[0]
  190. for g in grads:
  191. total += g
  192. return total
  193. else:
  194. return None
  195. return out, backward
  196. model = layerize(forward)
  197. model._layers = list(layers)
  198. def on_data(self, X, y):
  199. for layer in layers:
  200. for hook in layer.on_data_hooks:
  201. hook(layer, X, y)
  202. model.on_data_hooks.append(on_data)
  203. return model
  204. def split_backward(layers): # pragma: no cover
  205. '''Separate a sequence of layers' `begin_update` methods into two lists of
  206. functions: one that computes the forward values, and the other that completes
  207. the backward pass. The backward sequence is only populated after the forward
  208. functions have been applied.
  209. '''
  210. backward = []
  211. forward = [sink_return(op.begin_update, backward.append)
  212. for op in layers]
  213. return forward, backward
  214. def sink_return(func, sink, splitter=None): # pragma: no cover
  215. '''Transform a function `func` that returns tuples into a function that returns
  216. single values. Call a function `sink` on the unused values.
  217. '''
  218. def wrap(*args, **kwargs):
  219. output = func(*args, **kwargs)
  220. if splitter is None:
  221. to_keep, to_sink = output
  222. else:
  223. to_keep, to_sink = splitter(*output)
  224. sink(to_sink)
  225. return to_keep
  226. return wrap
  227. def Arg(i):
  228. @layerize
  229. def begin_update(batched_inputs, drop=0.):
  230. inputs = list(zip(*batched_inputs))
  231. return inputs[i], None
  232. return begin_update
  233. def with_flatten(layer, pad=0, ndim=4):
  234. def begin_update(seqs_in, drop=0.):
  235. lengths = layer.ops.asarray([len(seq) for seq in seqs_in])
  236. X, bp_layer = layer.begin_update(layer.ops.flatten(seqs_in, pad=pad),
  237. drop=drop)
  238. if bp_layer is None:
  239. return layer.ops.unflatten(X, lengths, pad=pad), None
  240. def finish_update(d_seqs_out, sgd=None):
  241. d_X = bp_layer(layer.ops.flatten(d_seqs_out, pad=pad), sgd=sgd)
  242. if d_X is None:
  243. return None
  244. else:
  245. return layer.ops.unflatten(d_X, lengths, pad=pad)
  246. return layer.ops.unflatten(X, lengths, pad=pad), finish_update
  247. def predict(seqs_in):
  248. lengths = layer.ops.asarray([len(seq) for seq in seqs_in])
  249. X = layer(layer.ops.flatten(seqs_in, pad=pad))
  250. return layer.ops.unflatten(X, lengths, pad=pad)
  251. model = layerize(begin_update, predict=predict)
  252. model._layers.append(layer)
  253. model.on_data_hooks.append(_with_flatten_on_data)
  254. model.name = 'flatten'
  255. return model
  256. def _with_flatten_on_data(model, X, y):
  257. X = model.ops.flatten(X)
  258. for layer in model._layers:
  259. for hook in layer.on_data_hooks:
  260. hook(layer, X, y)
  261. X = layer(X)
  262. def get_word_ids(ops, pad=1, token_drop=0., ignore=None):
  263. # TODO: Is this made obsolete by the FeatureExtractor?
  264. def forward(docs, drop=0.):
  265. '''Get word forms.'''
  266. seqs = []
  267. ops = Model.ops
  268. for doc in docs:
  269. if ignore is not None:
  270. doc = [token for token in doc if not ignore(token)]
  271. #seq = [0] * pad
  272. seq = [(token.lex_id or token.orth) for token in doc]
  273. #seq += [0] * pad
  274. seqs.append(ops.asarray(seq, dtype='uint64'))
  275. return seqs, None
  276. return layerize(forward)
  277. def FeatureExtracter(attrs, ops=None):
  278. if ops is None:
  279. ops = Model.ops
  280. def feature_extracter_fwd(docs, drop=0.):
  281. # Handle spans
  282. def get_feats(doc):
  283. if hasattr(doc, 'to_array'):
  284. return doc.to_array(attrs)
  285. else:
  286. return doc.doc.to_array(attrs)[doc.start:doc.end]
  287. features = [ops.asarray(get_feats(doc), dtype='uint64') for doc in docs]
  288. def feature_extracter_bwd(d_features, sgd=None):
  289. return d_features
  290. return features, feature_extracter_bwd
  291. return layerize(feature_extracter_fwd)
  292. def wrap(func, *child_layers):
  293. model = layerize(func)
  294. model._layers.extend(child_layers)
  295. def on_data(self, X, y):
  296. for child in self._layers:
  297. for hook in child.on_data_hooks:
  298. hook(child, X, y)
  299. model.on_data_hooks.append(on_data)
  300. return model
  301. def uniqued(layer, column=0):
  302. '''Group inputs to a layer, so that the layer only has to compute
  303. for the unique values. The data is transformed back before output, and the same
  304. transformation is applied for the gradient. Effectively, this is a cache
  305. local to each minibatch.
  306. The uniqued wrapper is useful for word inputs, because common words are
  307. seen often, but we may want to compute complicated features for the words,
  308. using e.g. character LSTM.
  309. '''
  310. def uniqued_fwd(X, drop=0.):
  311. keys = X[:, column]
  312. keys = layer.ops.xp.ascontiguousarray(keys)
  313. if not isinstance(keys, numpy.ndarray):
  314. keys = keys.get()
  315. uniq_keys, ind, inv, counts = numpy.unique(keys, return_index=True,
  316. return_inverse=True,
  317. return_counts=True)
  318. X_uniq = layer.ops.xp.ascontiguousarray(X[ind])
  319. Y_uniq, bp_Y_uniq = layer.begin_update(X_uniq, drop=drop)
  320. Y = Y_uniq[inv].reshape((X.shape[0],) + Y_uniq.shape[1:])
  321. def uniqued_bwd(dY, sgd=None):
  322. dY_uniq = layer.ops.allocate(Y_uniq.shape, dtype='f')
  323. layer.ops.scatter_add(dY_uniq, layer.ops.asarray(inv, dtype='i'), dY)
  324. d_uniques = bp_Y_uniq(dY_uniq, sgd=sgd)
  325. if d_uniques is not None:
  326. dX = (d_uniques / counts)[inv]
  327. return dX
  328. else:
  329. return None
  330. return Y, uniqued_bwd
  331. model = wrap(uniqued_fwd, layer)
  332. return model
  333. def foreach(layer, drop_factor=1.0):
  334. '''Map a layer across list items'''
  335. def foreach_fwd(docs, drop=0.):
  336. sents = []
  337. lengths = []
  338. for doc in docs:
  339. doc_sents = [sent for sent in doc if len(sent)]
  340. subset = [s for s in doc_sents if numpy.random.random() >= drop * drop_factor]
  341. if subset:
  342. sents.extend(subset)
  343. lengths.append(len(subset))
  344. else:
  345. numpy.random.shuffle(doc_sents)
  346. sents.append(doc_sents[0])
  347. lengths.append(1)
  348. flat, bp_flat = layer.begin_update(sents, drop=0.)
  349. output = layer.ops.unflatten(flat, lengths)
  350. def foreach_bwd(d_output, sgd=None):
  351. d_flat = layer.ops.flatten(d_output)
  352. d_sents = bp_flat(d_flat, sgd=sgd)
  353. if d_sents is None:
  354. return d_sents
  355. else:
  356. return layer.ops.unflatten(d_sents, lengths)
  357. return output, foreach_bwd
  358. model = wrap(foreach_fwd, layer)
  359. def _run_foreach_child_hooks(model, X, y):
  360. for layer in model._layers:
  361. for hook in layer.on_data_hooks:
  362. hook(layer, X[0], y[0])
  363. model.on_data_hooks = [_run_foreach_child_hooks]
  364. return model
  365. def foreach_sentence(layer, drop_factor=1.0):
  366. '''Map a layer across sentences (assumes spaCy-esque .sents interface)'''
  367. def sentence_fwd(docs, drop=0.):
  368. sents = []
  369. lengths = []
  370. for doc in docs:
  371. doc_sents = [sent for sent in doc.sents if len(sent)]
  372. subset = [s for s in doc_sents if numpy.random.random() >= drop * drop_factor]
  373. if subset:
  374. sents.extend(subset)
  375. lengths.append(len(subset))
  376. else:
  377. numpy.random.shuffle(doc_sents)
  378. sents.append(doc_sents[0])
  379. lengths.append(1)
  380. flat, bp_flat = layer.begin_update(sents, drop=0.)
  381. output = layer.ops.unflatten(flat, lengths)
  382. def sentence_bwd(d_output, sgd=None):
  383. d_flat = layer.ops.flatten(d_output)
  384. d_sents = bp_flat(d_flat, sgd=sgd)
  385. if d_sents is None:
  386. return d_sents
  387. else:
  388. return layer.ops.unflatten(d_sents, lengths)
  389. return output, sentence_bwd
  390. model = wrap(sentence_fwd, layer)
  391. return model