laywerrobot/lib/python3.6/site-packages/tensorflow/python/keras/preprocessing/sequence.py

# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Utilities for preprocessing sequence data.
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import random

import numpy as np
from six.moves import range  # pylint: disable=redefined-builtin

from tensorflow.python.keras.utils.data_utils import Sequence
from tensorflow.python.util.tf_export import tf_export


@tf_export('keras.preprocessing.sequence.pad_sequences')
def pad_sequences(sequences,
                  maxlen=None,
                  dtype='int32',
                  padding='pre',
                  truncating='pre',
                  value=0.):
  """Pads sequences to the same length.

  This function transforms a list of
  `num_samples` sequences (lists of integers)
  into a 2D Numpy array of shape `(num_samples, num_timesteps)`.
  `num_timesteps` is either the `maxlen` argument if provided,
  or the length of the longest sequence otherwise.

  Sequences that are shorter than `num_timesteps`
  are padded with `value` at the end.

  Sequences longer than `num_timesteps` are truncated
  so that they fit the desired length.
  The position where padding or truncation happens is determined by
  the arguments `padding` and `truncating`, respectively.

  Pre-padding is the default.

  Arguments:
      sequences: List of lists, where each element is a sequence.
      maxlen: Int, maximum length of all sequences.
      dtype: Type of the output sequences.
      padding: String, 'pre' or 'post':
          pad either before or after each sequence.
      truncating: String, 'pre' or 'post':
          remove values from sequences larger than
          `maxlen`, either at the beginning or at the end of the sequences.
      value: Float, padding value.

  Returns:
      x: Numpy array with shape `(len(sequences), maxlen)`

  Raises:
      ValueError: In case of invalid values for `truncating` or `padding`,
          or in case of invalid shape for a `sequences` entry.
  """
  if not hasattr(sequences, '__len__'):
    raise ValueError('`sequences` must be iterable.')
  lengths = []
  for x in sequences:
    if not hasattr(x, '__len__'):
      raise ValueError('`sequences` must be a list of iterables. '
                       'Found non-iterable: ' + str(x))
    lengths.append(len(x))

  num_samples = len(sequences)
  if maxlen is None:
    maxlen = np.max(lengths)

  # take the sample shape from the first non empty sequence
  # checking for consistency in the main loop below.
  sample_shape = tuple()
  for s in sequences:
    if len(s) > 0:  # pylint: disable=g-explicit-length-test
      sample_shape = np.asarray(s).shape[1:]
      break

  x = (np.ones((num_samples, maxlen) + sample_shape) * value).astype(dtype)
  for idx, s in enumerate(sequences):
    if not len(s):  # pylint: disable=g-explicit-length-test
      continue  # empty list/array was found
    if truncating == 'pre':
      trunc = s[-maxlen:]  # pylint: disable=invalid-unary-operand-type
    elif truncating == 'post':
      trunc = s[:maxlen]
    else:
      raise ValueError('Truncating type "%s" not understood' % truncating)

    # check `trunc` has expected shape
    trunc = np.asarray(trunc, dtype=dtype)
    if trunc.shape[1:] != sample_shape:
      raise ValueError('Shape of sample %s of sequence at position %s '
                       'is different from expected shape %s' %
                       (trunc.shape[1:], idx, sample_shape))

    if padding == 'post':
      x[idx, :len(trunc)] = trunc
    elif padding == 'pre':
      x[idx, -len(trunc):] = trunc
    else:
      raise ValueError('Padding type "%s" not understood' % padding)
  return x


@tf_export('keras.preprocessing.sequence.make_sampling_table')
def make_sampling_table(size, sampling_factor=1e-5):
  """Generates a word rank-based probabilistic sampling table.

  Used for generating the `sampling_table` argument for `skipgrams`.
  `sampling_table[i]` is the probability of sampling
  the word i-th most common word in a dataset
  (more common words should be sampled less frequently, for balance).

  The sampling probabilities are generated according
  to the sampling distribution used in word2vec:

  `p(word) = min(1, sqrt(word_frequency / sampling_factor) / (word_frequency /
  sampling_factor))`

  We assume that the word frequencies follow Zipf's law (s=1) to derive
  a numerical approximation of frequency(rank):

  `frequency(rank) ~ 1/(rank * (log(rank) + gamma) + 1/2 - 1/(12*rank))`
  where `gamma` is the Euler-Mascheroni constant.

  Arguments:
      size: Int, number of possible words to sample.
      sampling_factor: The sampling factor in the word2vec formula.

  Returns:
      A 1D Numpy array of length `size` where the ith entry
      is the probability that a word of rank i should be sampled.
  """
  gamma = 0.577
  rank = np.arange(size)
  rank[0] = 1
  inv_fq = rank * (np.log(rank) + gamma) + 0.5 - 1. / (12. * rank)
  f = sampling_factor * inv_fq

  return np.minimum(1., f / np.sqrt(f))


@tf_export('keras.preprocessing.sequence.skipgrams')
def skipgrams(sequence,
              vocabulary_size,
              window_size=4,
              negative_samples=1.,
              shuffle=True,
              categorical=False,
              sampling_table=None,
              seed=None):
  """Generates skipgram word pairs.

  This function transforms a sequence of word indexes (list of integers)
  into tuples of words of the form:

  - (word, word in the same window), with label 1 (positive samples).
  - (word, random word from the vocabulary), with label 0 (negative samples).

  Read more about Skipgram in this gnomic paper by Mikolov et al.:
  [Efficient Estimation of Word Representations in
  Vector Space](http://arxiv.org/pdf/1301.3781v3.pdf)

  Arguments:
      sequence: A word sequence (sentence), encoded as a list
          of word indices (integers). If using a `sampling_table`,
          word indices are expected to match the rank
          of the words in a reference dataset (e.g. 10 would encode
          the 10-th most frequently occurring token).
          Note that index 0 is expected to be a non-word and will be skipped.
      vocabulary_size: Int, maximum possible word index + 1
      window_size: Int, size of sampling windows (technically half-window).
          The window of a word `w_i` will be
          `[i - window_size, i + window_size+1]`.
      negative_samples: Float >= 0. 0 for no negative (i.e. random) samples.
          1 for same number as positive samples.
      shuffle: Whether to shuffle the word couples before returning them.
      categorical: bool. if False, labels will be
          integers (eg. `[0, 1, 1 .. ]`),
          if `True`, labels will be categorical, e.g.
          `[[1,0],[0,1],[0,1] .. ]`.
      sampling_table: 1D array of size `vocabulary_size` where the entry i
          encodes the probability to sample a word of rank i.
      seed: Random seed.

  Returns:
      couples, labels: where `couples` are int pairs and
          `labels` are either 0 or 1.

  # Note
      By convention, index 0 in the vocabulary is
      a non-word and will be skipped.
  """
  couples = []
  labels = []
  for i, wi in enumerate(sequence):
    if not wi:
      continue
    if sampling_table is not None:
      if sampling_table[wi] < random.random():
        continue

    window_start = max(0, i - window_size)
    window_end = min(len(sequence), i + window_size + 1)
    for j in range(window_start, window_end):
      if j != i:
        wj = sequence[j]
        if not wj:
          continue
        couples.append([wi, wj])
        if categorical:
          labels.append([0, 1])
        else:
          labels.append(1)

  if negative_samples > 0:
    num_negative_samples = int(len(labels) * negative_samples)
    words = [c[0] for c in couples]
    random.shuffle(words)

    couples += [[words[i % len(words)],
                 random.randint(1, vocabulary_size - 1)]
                for i in range(num_negative_samples)]
    if categorical:
      labels += [[1, 0]] * num_negative_samples
    else:
      labels += [0] * num_negative_samples

  if shuffle:
    if seed is None:
      seed = random.randint(0, 10e6)
    random.seed(seed)
    random.shuffle(couples)
    random.seed(seed)
    random.shuffle(labels)

  return couples, labels


def _remove_long_seq(maxlen, seq, label):
  """Removes sequences that exceed the maximum length.

  Arguments:
      maxlen: Int, maximum length of the output sequences.
      seq: List of lists, where each sublist is a sequence.
      label: List where each element is an integer.

  Returns:
      new_seq, new_label: shortened lists for `seq` and `label`.
  """
  new_seq, new_label = [], []
  for x, y in zip(seq, label):
    if len(x) < maxlen:
      new_seq.append(x)
      new_label.append(y)
  return new_seq, new_label


@tf_export('keras.preprocessing.sequence.TimeseriesGenerator')
class TimeseriesGenerator(Sequence):
  """Utility class for generating batches of temporal data.

  This class takes in a sequence of data-points gathered at
  equal intervals, along with time series parameters such as
  stride, length of history, etc., to produce batches for
  training/validation.

  Arguments:
      data: Indexable generator (such as list or Numpy array)
          containing consecutive data points (timesteps).
          The data should be at 2D, and axis 0 is expected
          to be the time dimension.
      targets: Targets corresponding to timesteps in `data`.
          It should have same length as `data`.
      length: Length of the output sequences (in number of timesteps).
      sampling_rate: Period between successive individual timesteps
          within sequences. For rate `r`, timesteps
          `data[i]`, `data[i-r]`, ... `data[i - length]`
          are used for create a sample sequence.
      stride: Period between successive output sequences.
          For stride `s`, consecutive output samples would
          be centered around `data[i]`, `data[i+s]`, `data[i+2*s]`, etc.
      start_index, end_index: Data points earlier than `start_index`
          or later than `end_index` will not be used in the output sequences.
          This is useful to reserve part of the data for test or validation.
      shuffle: Whether to shuffle output samples,
          or instead draw them in chronological order.
      reverse: Boolean: if `true`, timesteps in each output sample will be
          in reverse chronological order.
      batch_size: Number of timeseries samples in each batch
          (except maybe the last one).

  Returns:
      A [Sequence](/utils/#sequence) instance.

  Examples:

  ```python
  from keras.preprocessing.sequence import TimeseriesGenerator
  import numpy as np

  data = np.array([[i] for i in range(50)])
  targets = np.array([[i] for i in range(50)])

  data_gen = TimeseriesGenerator(data, targets,
                                 length=10, sampling_rate=2,
                                 batch_size=2)
  assert len(data_gen) == 20

  batch_0 = data_gen[0]
  x, y = batch_0
  assert np.array_equal(x,
                        np.array([[[0], [2], [4], [6], [8]],
                                  [[1], [3], [5], [7], [9]]]))
  assert np.array_equal(y,
                        np.array([[10], [11]]))
  ```
  """

  def __init__(self,
               data,
               targets,
               length,
               sampling_rate=1,
               stride=1,
               start_index=0,
               end_index=None,
               shuffle=False,
               reverse=False,
               batch_size=128):
    self.data = data
    self.targets = targets
    self.length = length
    self.sampling_rate = sampling_rate
    self.stride = stride
    self.start_index = start_index + length
    if end_index is None:
      end_index = len(data) - 1
    self.end_index = end_index
    self.shuffle = shuffle
    self.reverse = reverse
    self.batch_size = batch_size

    if self.start_index > self.end_index:
      raise ValueError('`start_index+length=%i > end_index=%i` '
                       'is disallowed, as no part of the sequence '
                       'would be left to be used as current step.' %
                       (self.start_index, self.end_index))

  def __len__(self):
    length = int(
        np.ceil((self.end_index - self.start_index + 1) /
                (self.batch_size * self.stride)))
    return length if length >= 0 else 0

  def _empty_batch(self, num_rows):
    samples_shape = [num_rows, self.length // self.sampling_rate]
    samples_shape.extend(self.data.shape[1:])
    targets_shape = [num_rows]
    targets_shape.extend(self.targets.shape[1:])
    return np.empty(samples_shape), np.empty(targets_shape)

  def __getitem__(self, index):
    if self.shuffle:
      rows = np.random.randint(
          self.start_index, self.end_index + 1, size=self.batch_size)
    else:
      i = self.start_index + self.batch_size * self.stride * index
      rows = np.arange(
          i, min(i + self.batch_size * self.stride, self.end_index + 1),
          self.stride)

    samples, targets = self._empty_batch(len(rows))
    for j in range(len(rows)):
      indices = range(rows[j] - self.length, rows[j], self.sampling_rate)
      samples[j] = self.data[indices]
      targets[j] = self.targets[rows[j]]
    if self.reverse:
      return samples[:, ::-1, ...], targets
    return samples, targets
first commit 2020-08-27 21:55:39 +02:00			`# Copyright 2015 The TensorFlow Authors. All Rights Reserved.`
			`#`
			`# Licensed under the Apache License, Version 2.0 (the "License");`
			`# you may not use this file except in compliance with the License.`
			`# You may obtain a copy of the License at`
			`#`
			`# http://www.apache.org/licenses/LICENSE-2.0`
			`#`
			`# Unless required by applicable law or agreed to in writing, software`
			`# distributed under the License is distributed on an "AS IS" BASIS,`
			`# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.`
			`# See the License for the specific language governing permissions and`
			`# limitations under the License.`
			`# ==============================================================================`
			`"""Utilities for preprocessing sequence data.`
			`"""`
			`from __future__ import absolute_import`
			`from __future__ import division`
			`from __future__ import print_function`

			`import random`

			`import numpy as np`
			`from six.moves import range # pylint: disable=redefined-builtin`

			`from tensorflow.python.keras.utils.data_utils import Sequence`
			`from tensorflow.python.util.tf_export import tf_export`


			`@tf_export('keras.preprocessing.sequence.pad_sequences')`
			`def pad_sequences(sequences,`
			`maxlen=None,`
			`dtype='int32',`
			`padding='pre',`
			`truncating='pre',`
			`value=0.):`
			`"""Pads sequences to the same length.`

			`This function transforms a list of`
			`num_samples` sequences (lists of integers)
			into a 2D Numpy array of shape `(num_samples, num_timesteps)`.
			`num_timesteps` is either the `maxlen` argument if provided,
			`or the length of the longest sequence otherwise.`

			Sequences that are shorter than `num_timesteps`
			are padded with `value` at the end.

			Sequences longer than `num_timesteps` are truncated
			`so that they fit the desired length.`
			`The position where padding or truncation happens is determined by`
			the arguments `padding` and `truncating`, respectively.

			`Pre-padding is the default.`

			`Arguments:`
			`sequences: List of lists, where each element is a sequence.`
			`maxlen: Int, maximum length of all sequences.`
			`dtype: Type of the output sequences.`
			`padding: String, 'pre' or 'post':`
			`pad either before or after each sequence.`
			`truncating: String, 'pre' or 'post':`
			`remove values from sequences larger than`
			`maxlen`, either at the beginning or at the end of the sequences.
			`value: Float, padding value.`

			`Returns:`
			x: Numpy array with shape `(len(sequences), maxlen)`

			`Raises:`
			ValueError: In case of invalid values for `truncating` or `padding`,
			or in case of invalid shape for a `sequences` entry.
			`"""`
			`if not hasattr(sequences, '__len__'):`
			raise ValueError('`sequences` must be iterable.')
			`lengths = []`
			`for x in sequences:`
			`if not hasattr(x, '__len__'):`
			raise ValueError('`sequences` must be a list of iterables. '
			`'Found non-iterable: ' + str(x))`
			`lengths.append(len(x))`

			`num_samples = len(sequences)`
			`if maxlen is None:`
			`maxlen = np.max(lengths)`

			`# take the sample shape from the first non empty sequence`
			`# checking for consistency in the main loop below.`
			`sample_shape = tuple()`
			`for s in sequences:`
			`if len(s) > 0: # pylint: disable=g-explicit-length-test`
			`sample_shape = np.asarray(s).shape[1:]`
			`break`

			`x = (np.ones((num_samples, maxlen) + sample_shape) * value).astype(dtype)`
			`for idx, s in enumerate(sequences):`
			`if not len(s): # pylint: disable=g-explicit-length-test`
			`continue # empty list/array was found`
			`if truncating == 'pre':`
			`trunc = s[-maxlen:] # pylint: disable=invalid-unary-operand-type`
			`elif truncating == 'post':`
			`trunc = s[:maxlen]`
			`else:`
			`raise ValueError('Truncating type "%s" not understood' % truncating)`

			# check `trunc` has expected shape
			`trunc = np.asarray(trunc, dtype=dtype)`
			`if trunc.shape[1:] != sample_shape:`
			`raise ValueError('Shape of sample %s of sequence at position %s '`
			`'is different from expected shape %s' %`
			`(trunc.shape[1:], idx, sample_shape))`

			`if padding == 'post':`
			`x[idx, :len(trunc)] = trunc`
			`elif padding == 'pre':`
			`x[idx, -len(trunc):] = trunc`
			`else:`
			`raise ValueError('Padding type "%s" not understood' % padding)`
			`return x`


			`@tf_export('keras.preprocessing.sequence.make_sampling_table')`
			`def make_sampling_table(size, sampling_factor=1e-5):`
			`"""Generates a word rank-based probabilistic sampling table.`

			Used for generating the `sampling_table` argument for `skipgrams`.
			`sampling_table[i]` is the probability of sampling
			`the word i-th most common word in a dataset`
			`(more common words should be sampled less frequently, for balance).`

			`The sampling probabilities are generated according`
			`to the sampling distribution used in word2vec:`

			`p(word) = min(1, sqrt(word_frequency / sampling_factor) / (word_frequency /
			sampling_factor))`

			`We assume that the word frequencies follow Zipf's law (s=1) to derive`
			`a numerical approximation of frequency(rank):`

			`frequency(rank) ~ 1/(rank * (log(rank) + gamma) + 1/2 - 1/(12*rank))`
			where `gamma` is the Euler-Mascheroni constant.

			`Arguments:`
			`size: Int, number of possible words to sample.`
			`sampling_factor: The sampling factor in the word2vec formula.`

			`Returns:`
			A 1D Numpy array of length `size` where the ith entry
			`is the probability that a word of rank i should be sampled.`
			`"""`
			`gamma = 0.577`
			`rank = np.arange(size)`
			`rank[0] = 1`
			`inv_fq = rank * (np.log(rank) + gamma) + 0.5 - 1. / (12. * rank)`
			`f = sampling_factor * inv_fq`

			`return np.minimum(1., f / np.sqrt(f))`


			`@tf_export('keras.preprocessing.sequence.skipgrams')`
			`def skipgrams(sequence,`
			`vocabulary_size,`
			`window_size=4,`
			`negative_samples=1.,`
			`shuffle=True,`
			`categorical=False,`
			`sampling_table=None,`
			`seed=None):`
			`"""Generates skipgram word pairs.`

			`This function transforms a sequence of word indexes (list of integers)`
			`into tuples of words of the form:`

			`- (word, word in the same window), with label 1 (positive samples).`
			`- (word, random word from the vocabulary), with label 0 (negative samples).`

			`Read more about Skipgram in this gnomic paper by Mikolov et al.:`
			`[Efficient Estimation of Word Representations in`
			`Vector Space](http://arxiv.org/pdf/1301.3781v3.pdf)`

			`Arguments:`
			`sequence: A word sequence (sentence), encoded as a list`
			of word indices (integers). If using a `sampling_table`,
			`word indices are expected to match the rank`
			`of the words in a reference dataset (e.g. 10 would encode`
			`the 10-th most frequently occurring token).`
			`Note that index 0 is expected to be a non-word and will be skipped.`
			`vocabulary_size: Int, maximum possible word index + 1`
			`window_size: Int, size of sampling windows (technically half-window).`
			The window of a word `w_i` will be
			`[i - window_size, i + window_size+1]`.
			`negative_samples: Float >= 0. 0 for no negative (i.e. random) samples.`
			`1 for same number as positive samples.`
			`shuffle: Whether to shuffle the word couples before returning them.`
			`categorical: bool. if False, labels will be`
			integers (eg. `[0, 1, 1 .. ]`),
			if `True`, labels will be categorical, e.g.
			`[[1,0],[0,1],[0,1] .. ]`.
			sampling_table: 1D array of size `vocabulary_size` where the entry i
			`encodes the probability to sample a word of rank i.`
			`seed: Random seed.`

			`Returns:`
			couples, labels: where `couples` are int pairs and
			`labels` are either 0 or 1.

			`# Note`
			`By convention, index 0 in the vocabulary is`
			`a non-word and will be skipped.`
			`"""`
			`couples = []`
			`labels = []`
			`for i, wi in enumerate(sequence):`
			`if not wi:`
			`continue`
			`if sampling_table is not None:`
			`if sampling_table[wi] < random.random():`
			`continue`

			`window_start = max(0, i - window_size)`
			`window_end = min(len(sequence), i + window_size + 1)`
			`for j in range(window_start, window_end):`
			`if j != i:`
			`wj = sequence[j]`
			`if not wj:`
			`continue`
			`couples.append([wi, wj])`
			`if categorical:`
			`labels.append([0, 1])`
			`else:`
			`labels.append(1)`

			`if negative_samples > 0:`
			`num_negative_samples = int(len(labels) * negative_samples)`
			`words = [c[0] for c in couples]`
			`random.shuffle(words)`

			`couples += [[words[i % len(words)],`
			`random.randint(1, vocabulary_size - 1)]`
			`for i in range(num_negative_samples)]`
			`if categorical:`
			`labels += [[1, 0]] * num_negative_samples`
			`else:`
			`labels += [0] * num_negative_samples`

			`if shuffle:`
			`if seed is None:`
			`seed = random.randint(0, 10e6)`
			`random.seed(seed)`
			`random.shuffle(couples)`
			`random.seed(seed)`
			`random.shuffle(labels)`

			`return couples, labels`


			`def _remove_long_seq(maxlen, seq, label):`
			`"""Removes sequences that exceed the maximum length.`

			`Arguments:`
			`maxlen: Int, maximum length of the output sequences.`
			`seq: List of lists, where each sublist is a sequence.`
			`label: List where each element is an integer.`

			`Returns:`
			new_seq, new_label: shortened lists for `seq` and `label`.
			`"""`
			`new_seq, new_label = [], []`
			`for x, y in zip(seq, label):`
			`if len(x) < maxlen:`
			`new_seq.append(x)`
			`new_label.append(y)`
			`return new_seq, new_label`


			`@tf_export('keras.preprocessing.sequence.TimeseriesGenerator')`
			`class TimeseriesGenerator(Sequence):`
			`"""Utility class for generating batches of temporal data.`

			`This class takes in a sequence of data-points gathered at`
			`equal intervals, along with time series parameters such as`
			`stride, length of history, etc., to produce batches for`
			`training/validation.`

			`Arguments:`
			`data: Indexable generator (such as list or Numpy array)`
			`containing consecutive data points (timesteps).`
			`The data should be at 2D, and axis 0 is expected`
			`to be the time dimension.`
			targets: Targets corresponding to timesteps in `data`.
			It should have same length as `data`.
			`length: Length of the output sequences (in number of timesteps).`
			`sampling_rate: Period between successive individual timesteps`
			within sequences. For rate `r`, timesteps
			`data[i]`, `data[i-r]`, ... `data[i - length]`
			`are used for create a sample sequence.`
			`stride: Period between successive output sequences.`
			For stride `s`, consecutive output samples would
			be centered around `data[i]`, `data[i+s]`, `data[i+2*s]`, etc.
			start_index, end_index: Data points earlier than `start_index`
			or later than `end_index` will not be used in the output sequences.
			`This is useful to reserve part of the data for test or validation.`
			`shuffle: Whether to shuffle output samples,`
			`or instead draw them in chronological order.`
			reverse: Boolean: if `true`, timesteps in each output sample will be
			`in reverse chronological order.`
			`batch_size: Number of timeseries samples in each batch`
			`(except maybe the last one).`

			`Returns:`
			`A [Sequence](/utils/#sequence) instance.`

			`Examples:`

			```python
			`from keras.preprocessing.sequence import TimeseriesGenerator`
			`import numpy as np`

			`data = np.array([[i] for i in range(50)])`
			`targets = np.array([[i] for i in range(50)])`

			`data_gen = TimeseriesGenerator(data, targets,`
			`length=10, sampling_rate=2,`
			`batch_size=2)`
			`assert len(data_gen) == 20`

			`batch_0 = data_gen[0]`
			`x, y = batch_0`
			`assert np.array_equal(x,`
			`np.array([[[0], [2], [4], [6], [8]],`
			`[[1], [3], [5], [7], [9]]]))`
			`assert np.array_equal(y,`
			`np.array([[10], [11]]))`
			```
			`"""`

			`def __init__(self,`
			`data,`
			`targets,`
			`length,`
			`sampling_rate=1,`
			`stride=1,`
			`start_index=0,`
			`end_index=None,`
			`shuffle=False,`
			`reverse=False,`
			`batch_size=128):`
			`self.data = data`
			`self.targets = targets`
			`self.length = length`
			`self.sampling_rate = sampling_rate`
			`self.stride = stride`
			`self.start_index = start_index + length`
			`if end_index is None:`
			`end_index = len(data) - 1`
			`self.end_index = end_index`
			`self.shuffle = shuffle`
			`self.reverse = reverse`
			`self.batch_size = batch_size`

			`if self.start_index > self.end_index:`
			raise ValueError('`start_index+length=%i > end_index=%i` '
			`'is disallowed, as no part of the sequence '`
			`'would be left to be used as current step.' %`
			`(self.start_index, self.end_index))`

			`def __len__(self):`
			`length = int(`
			`np.ceil((self.end_index - self.start_index + 1) /`
			`(self.batch_size * self.stride)))`
			`return length if length >= 0 else 0`

			`def _empty_batch(self, num_rows):`
			`samples_shape = [num_rows, self.length // self.sampling_rate]`
			`samples_shape.extend(self.data.shape[1:])`
			`targets_shape = [num_rows]`
			`targets_shape.extend(self.targets.shape[1:])`
			`return np.empty(samples_shape), np.empty(targets_shape)`

			`def __getitem__(self, index):`
			`if self.shuffle:`
			`rows = np.random.randint(`
			`self.start_index, self.end_index + 1, size=self.batch_size)`
			`else:`
			`i = self.start_index + self.batch_size * self.stride * index`
			`rows = np.arange(`
			`i, min(i + self.batch_size * self.stride, self.end_index + 1),`
			`self.stride)`

			`samples, targets = self._empty_batch(len(rows))`
			`for j in range(len(rows)):`
			`indices = range(rows[j] - self.length, rows[j], self.sampling_rate)`
			`samples[j] = self.data[indices]`
			`targets[j] = self.targets[rows[j]]`
			`if self.reverse:`
			`return samples[:, ::-1, ...], targets`
			`return samples, targets`