|
|
- import copy
- import numpy
-
- from .neural._classes.model import Model
- from . import check
- from .check import equal_axis
- from . import describe
-
-
- class FunctionLayer(Model):
- '''Wrap functions into weightless Model instances, for use as network
- components.'''
- def __init__(self, begin_update, predict=None, predict_one=None,
- nI=None, nO=None, *args, **kwargs):
- self.begin_update = begin_update
- if predict is not None:
- self.predict = predict
- if predict_one is not None:
- self.predict_one = predict_one
- self.nI = nI
- self.nO = nO
- Model.__init__(self)
-
-
- def _run_child_hooks(model, X, y):
- for layer in model._layers:
- for hook in layer.on_data_hooks:
- hook(layer, X, y)
- X = layer(X)
-
-
- @describe.on_data(_run_child_hooks)
- class FeedForward(Model):
- '''A feed-forward network, that chains multiple Model instances together.'''
- name = 'feed-forward'
- def __init__(self, layers, **kwargs):
- self._layers = []
- for layer in layers:
- if isinstance(layer, FeedForward):
- self._layers.extend(layer._layers)
- else:
- self._layers.append(layer)
- Model.__init__(self, **kwargs)
-
- @property
- def input_shape(self):
- return self._layers[0].input_shape
-
- @property
- def output_shape(self):
- return self._layers[-1].output_shape
-
- def predict(self, X):
- for layer in self._layers:
- X = layer(X)
- return X
-
- def begin_update(self, X, drop=0.):
- callbacks = []
- for layer in self._layers:
- X, inc_layer_grad = layer.begin_update(X, drop=drop)
- callbacks.append(inc_layer_grad)
- def continue_update(gradient, sgd=None):
- for callback in reversed(callbacks):
- if gradient is None or callback == None:
- break
- gradient = callback(gradient, sgd)
- return gradient
- return X, continue_update
-
- def layerize(begin_update=None, predict=None, *args, **kwargs):
- '''Wrap a function into a layer'''
- if begin_update is not None:
- return FunctionLayer(begin_update, predict=predict, *args, **kwargs)
- def wrapper(begin_update):
- return FunctionLayer(begin_update, *args, **kwargs)
- return wrapper
-
-
- def metalayerize(user_func):
- '''Wrap a function over a sequence of layers and an input into a layer.'''
- def returned(layers, *args, **kwargs):
- def begin_update(X, *args, **kwargs):
- return user_func(layers, X, *args, **kwargs)
- return FunctionLayer(begin_update, *args, **kwargs)
- return returned
-
-
- @layerize
- def flatten_add_lengths(seqs, pad=0, drop=0.):
- ops = Model.ops
- lengths = ops.asarray([len(seq) for seq in seqs], dtype='i')
- def finish_update(d_X, sgd=None):
- return ops.unflatten(d_X, lengths, pad=pad)
- X = ops.flatten(seqs, pad=pad)
- return (X, lengths), finish_update
-
-
- def remap_ids(ops=None, column=0):
- id_map = {0: 0}
- def remap_ids_fwd(ids, drop=0.):
- ids = ids[:, column]
- if not isinstance(ids, numpy.ndarray):
- ids = ids.get()
- n_vector = len(id_map)
- for i, id_ in enumerate(ids):
- id_ = int(id_)
- if id_ not in id_map:
- id_map[id_] = n_vector
- n_vector += 1
- ids[i] = id_map[id_]
- return ops.asarray(ids), None
- model = layerize(remap_ids_fwd)
- if ops is None:
- ops = model.ops
- return model
-
-
- def with_getitem(idx, layer):
- def begin_update(items, drop=0.):
- X, finish = layer.begin_update(items[idx], drop=drop)
- return items[:idx] + (X,) + items[idx+1:], finish
- model = layerize(begin_update)
- model._layers.append(layer)
- def on_data(self, items, y):
- for hook in layer.on_data_hooks:
- hook(layer, items[idx], y)
- model.on_data_hooks.append(on_data)
- return model
-
-
- def noop(*layers):
- '''Transform a sequences of layers into a null operation.'''
- def begin_update(X, drop=0.):
- return X, lambda D, *a, **k: D
- return begin_update
-
-
- def chain(*layers):
- '''Compose two models `f` and `g` such that they become layers of a single
- feed-forward model that computes `g(f(x))`.
-
- Raises exception if their dimensions don't match.
- '''
- if len(layers) == 0:
- return FeedForward([])
- elif len(layers) == 1:
- return layers[0]
- else:
- return FeedForward(layers)
-
-
- def clone(orig, n):
- '''Construct `n` copies of a layer, with distinct weights.
-
- i.e. `clone(f, 3)(x)` computes `f(f'(f''(x)))`.
- '''
- if n == 0:
- return layerize(noop())
- layers = [orig]
- for i in range(n-1):
- layers.append(copy.deepcopy(orig))
- layers[-1].set_id()
- return FeedForward(layers)
-
-
- def concatenate(*layers): # pragma: no cover
- '''Compose two or more models `f`, `g`, etc, such that their outputs are
- concatenated, i.e. `concatenate(f, g)(x)` computes `hstack(f(x), g(x))`
- '''
- if not layers:
- return noop()
- ops = layers[0].ops
- def begin_update(X, *a, **k):
- forward, backward = split_backward(layers)
- values = [fwd(X, *a, **k) for fwd in forward]
-
- output = ops.xp.hstack(values)
- shapes = [val.shape for val in values]
-
- def finish_update(gradient, *args, **kwargs):
- layer_grads = []
- start = 0
- for bwd, shape in zip(backward, shapes):
- end = start + shape[1]
- if bwd is not None:
- d = bwd(ops.xp.ascontiguousarray(gradient[:, start : end]),
- *args, **kwargs)
- if d is not None and hasattr(X, 'shape'):
- if not layer_grads:
- layer_grads.append(d)
- else:
- layer_grads[-1] += d
- start = end
- if layer_grads:
- return ops.asarray(layer_grads[-1])
- else:
- return None
- return output, finish_update
- layer = FunctionLayer(begin_update)
- layer._layers = list(layers)
- def on_data(self, X, y=None):
- for layer in self._layers:
- for hook in layer.on_data_hooks:
- hook(layer, X, y)
- layer.on_data_hooks.append(on_data)
- return layer
-
-
- def add(*layers):
- if not layers:
- return noop()
- ops = layers[0].ops
- def forward(X, drop=0.):
- outs, callbacks = zip(*[lyr.begin_update(X, drop=drop) for lyr in layers])
- out = outs[0]
- for o in outs:
- out += o
-
- def backward(d_out, sgd=None):
- grads = [bp(d_out, sgd=sgd) for bp in callbacks if bp is not None]
- grads = [g for g in grads if g is not None]
- if grads:
- total = grads[0]
- for g in grads:
- total += g
- return total
- else:
- return None
- return out, backward
- model = layerize(forward)
- model._layers = list(layers)
- def on_data(self, X, y):
- for layer in layers:
- for hook in layer.on_data_hooks:
- hook(layer, X, y)
- model.on_data_hooks.append(on_data)
- return model
-
-
- def split_backward(layers): # pragma: no cover
- '''Separate a sequence of layers' `begin_update` methods into two lists of
- functions: one that computes the forward values, and the other that completes
- the backward pass. The backward sequence is only populated after the forward
- functions have been applied.
- '''
- backward = []
- forward = [sink_return(op.begin_update, backward.append)
- for op in layers]
- return forward, backward
-
-
- def sink_return(func, sink, splitter=None): # pragma: no cover
- '''Transform a function `func` that returns tuples into a function that returns
- single values. Call a function `sink` on the unused values.
- '''
- def wrap(*args, **kwargs):
- output = func(*args, **kwargs)
- if splitter is None:
- to_keep, to_sink = output
- else:
- to_keep, to_sink = splitter(*output)
- sink(to_sink)
- return to_keep
- return wrap
-
-
- def Arg(i):
- @layerize
- def begin_update(batched_inputs, drop=0.):
- inputs = list(zip(*batched_inputs))
- return inputs[i], None
- return begin_update
-
-
- def with_flatten(layer, pad=0, ndim=4):
- def begin_update(seqs_in, drop=0.):
- lengths = layer.ops.asarray([len(seq) for seq in seqs_in])
- X, bp_layer = layer.begin_update(layer.ops.flatten(seqs_in, pad=pad),
- drop=drop)
- if bp_layer is None:
- return layer.ops.unflatten(X, lengths, pad=pad), None
- def finish_update(d_seqs_out, sgd=None):
- d_X = bp_layer(layer.ops.flatten(d_seqs_out, pad=pad), sgd=sgd)
- if d_X is None:
- return None
- else:
- return layer.ops.unflatten(d_X, lengths, pad=pad)
- return layer.ops.unflatten(X, lengths, pad=pad), finish_update
-
- def predict(seqs_in):
- lengths = layer.ops.asarray([len(seq) for seq in seqs_in])
- X = layer(layer.ops.flatten(seqs_in, pad=pad))
- return layer.ops.unflatten(X, lengths, pad=pad)
-
- model = layerize(begin_update, predict=predict)
- model._layers.append(layer)
- model.on_data_hooks.append(_with_flatten_on_data)
- model.name = 'flatten'
- return model
-
- def _with_flatten_on_data(model, X, y):
- X = model.ops.flatten(X)
- for layer in model._layers:
- for hook in layer.on_data_hooks:
- hook(layer, X, y)
- X = layer(X)
-
-
- def get_word_ids(ops, pad=1, token_drop=0., ignore=None):
- # TODO: Is this made obsolete by the FeatureExtractor?
- def forward(docs, drop=0.):
- '''Get word forms.'''
- seqs = []
- ops = Model.ops
- for doc in docs:
- if ignore is not None:
- doc = [token for token in doc if not ignore(token)]
- #seq = [0] * pad
- seq = [(token.lex_id or token.orth) for token in doc]
- #seq += [0] * pad
- seqs.append(ops.asarray(seq, dtype='uint64'))
- return seqs, None
- return layerize(forward)
-
-
- def FeatureExtracter(attrs, ops=None):
- if ops is None:
- ops = Model.ops
- def feature_extracter_fwd(docs, drop=0.):
- # Handle spans
- def get_feats(doc):
- if hasattr(doc, 'to_array'):
- return doc.to_array(attrs)
- else:
- return doc.doc.to_array(attrs)[doc.start:doc.end]
- features = [ops.asarray(get_feats(doc), dtype='uint64') for doc in docs]
- def feature_extracter_bwd(d_features, sgd=None):
- return d_features
- return features, feature_extracter_bwd
- return layerize(feature_extracter_fwd)
-
-
- def wrap(func, *child_layers):
- model = layerize(func)
- model._layers.extend(child_layers)
- def on_data(self, X, y):
- for child in self._layers:
- for hook in child.on_data_hooks:
- hook(child, X, y)
- model.on_data_hooks.append(on_data)
- return model
-
-
- def uniqued(layer, column=0):
- '''Group inputs to a layer, so that the layer only has to compute
- for the unique values. The data is transformed back before output, and the same
- transformation is applied for the gradient. Effectively, this is a cache
- local to each minibatch.
-
- The uniqued wrapper is useful for word inputs, because common words are
- seen often, but we may want to compute complicated features for the words,
- using e.g. character LSTM.
- '''
- def uniqued_fwd(X, drop=0.):
- keys = X[:, column]
- keys = layer.ops.xp.ascontiguousarray(keys)
- if not isinstance(keys, numpy.ndarray):
- keys = keys.get()
- uniq_keys, ind, inv, counts = numpy.unique(keys, return_index=True,
- return_inverse=True,
- return_counts=True)
- X_uniq = layer.ops.xp.ascontiguousarray(X[ind])
- Y_uniq, bp_Y_uniq = layer.begin_update(X_uniq, drop=drop)
- Y = Y_uniq[inv].reshape((X.shape[0],) + Y_uniq.shape[1:])
- def uniqued_bwd(dY, sgd=None):
- dY_uniq = layer.ops.allocate(Y_uniq.shape, dtype='f')
- layer.ops.scatter_add(dY_uniq, layer.ops.asarray(inv, dtype='i'), dY)
- d_uniques = bp_Y_uniq(dY_uniq, sgd=sgd)
- if d_uniques is not None:
- dX = (d_uniques / counts)[inv]
- return dX
- else:
- return None
- return Y, uniqued_bwd
- model = wrap(uniqued_fwd, layer)
- return model
-
-
- def foreach(layer, drop_factor=1.0):
- '''Map a layer across list items'''
- def foreach_fwd(docs, drop=0.):
- sents = []
- lengths = []
- for doc in docs:
- doc_sents = [sent for sent in doc if len(sent)]
- subset = [s for s in doc_sents if numpy.random.random() >= drop * drop_factor]
- if subset:
- sents.extend(subset)
- lengths.append(len(subset))
- else:
- numpy.random.shuffle(doc_sents)
- sents.append(doc_sents[0])
- lengths.append(1)
- flat, bp_flat = layer.begin_update(sents, drop=0.)
- output = layer.ops.unflatten(flat, lengths)
- def foreach_bwd(d_output, sgd=None):
- d_flat = layer.ops.flatten(d_output)
- d_sents = bp_flat(d_flat, sgd=sgd)
- if d_sents is None:
- return d_sents
- else:
- return layer.ops.unflatten(d_sents, lengths)
- return output, foreach_bwd
- model = wrap(foreach_fwd, layer)
-
- def _run_foreach_child_hooks(model, X, y):
- for layer in model._layers:
- for hook in layer.on_data_hooks:
- hook(layer, X[0], y[0])
- model.on_data_hooks = [_run_foreach_child_hooks]
-
- return model
-
-
- def foreach_sentence(layer, drop_factor=1.0):
- '''Map a layer across sentences (assumes spaCy-esque .sents interface)'''
- def sentence_fwd(docs, drop=0.):
- sents = []
- lengths = []
- for doc in docs:
- doc_sents = [sent for sent in doc.sents if len(sent)]
- subset = [s for s in doc_sents if numpy.random.random() >= drop * drop_factor]
- if subset:
- sents.extend(subset)
- lengths.append(len(subset))
- else:
- numpy.random.shuffle(doc_sents)
- sents.append(doc_sents[0])
- lengths.append(1)
- flat, bp_flat = layer.begin_update(sents, drop=0.)
- output = layer.ops.unflatten(flat, lengths)
- def sentence_bwd(d_output, sgd=None):
- d_flat = layer.ops.flatten(d_output)
- d_sents = bp_flat(d_flat, sgd=sgd)
- if d_sents is None:
- return d_sents
- else:
- return layer.ops.unflatten(d_sents, lengths)
- return output, sentence_bwd
- model = wrap(sentence_fwd, layer)
- return model
|