You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

306 lines
11 KiB

4 years ago
  1. import time
  2. from .exceptions import EOF, TIMEOUT
  3. class Expecter(object):
  4. def __init__(self, spawn, searcher, searchwindowsize=-1):
  5. self.spawn = spawn
  6. self.searcher = searcher
  7. if searchwindowsize == -1:
  8. searchwindowsize = spawn.searchwindowsize
  9. self.searchwindowsize = searchwindowsize
  10. def new_data(self, data):
  11. spawn = self.spawn
  12. searcher = self.searcher
  13. pos = spawn._buffer.tell()
  14. spawn._buffer.write(data)
  15. spawn._before.write(data)
  16. # determine which chunk of data to search; if a windowsize is
  17. # specified, this is the *new* data + the preceding <windowsize> bytes
  18. if self.searchwindowsize:
  19. spawn._buffer.seek(max(0, pos - self.searchwindowsize))
  20. window = spawn._buffer.read(self.searchwindowsize + len(data))
  21. else:
  22. # otherwise, search the whole buffer (really slow for large datasets)
  23. window = spawn.buffer
  24. index = searcher.search(window, len(data))
  25. if index >= 0:
  26. spawn._buffer = spawn.buffer_type()
  27. spawn._buffer.write(window[searcher.end:])
  28. spawn.before = spawn._before.getvalue()[0:-(len(window) - searcher.start)]
  29. spawn._before = spawn.buffer_type()
  30. spawn.after = window[searcher.start: searcher.end]
  31. spawn.match = searcher.match
  32. spawn.match_index = index
  33. # Found a match
  34. return index
  35. elif self.searchwindowsize:
  36. spawn._buffer = spawn.buffer_type()
  37. spawn._buffer.write(window)
  38. def eof(self, err=None):
  39. spawn = self.spawn
  40. spawn.before = spawn.buffer
  41. spawn._buffer = spawn.buffer_type()
  42. spawn._before = spawn.buffer_type()
  43. spawn.after = EOF
  44. index = self.searcher.eof_index
  45. if index >= 0:
  46. spawn.match = EOF
  47. spawn.match_index = index
  48. return index
  49. else:
  50. spawn.match = None
  51. spawn.match_index = None
  52. msg = str(spawn)
  53. msg += '\nsearcher: %s' % self.searcher
  54. if err is not None:
  55. msg = str(err) + '\n' + msg
  56. raise EOF(msg)
  57. def timeout(self, err=None):
  58. spawn = self.spawn
  59. spawn.before = spawn.buffer
  60. spawn.after = TIMEOUT
  61. index = self.searcher.timeout_index
  62. if index >= 0:
  63. spawn.match = TIMEOUT
  64. spawn.match_index = index
  65. return index
  66. else:
  67. spawn.match = None
  68. spawn.match_index = None
  69. msg = str(spawn)
  70. msg += '\nsearcher: %s' % self.searcher
  71. if err is not None:
  72. msg = str(err) + '\n' + msg
  73. raise TIMEOUT(msg)
  74. def errored(self):
  75. spawn = self.spawn
  76. spawn.before = spawn.buffer
  77. spawn.after = None
  78. spawn.match = None
  79. spawn.match_index = None
  80. def expect_loop(self, timeout=-1):
  81. """Blocking expect"""
  82. spawn = self.spawn
  83. if timeout is not None:
  84. end_time = time.time() + timeout
  85. try:
  86. incoming = spawn.buffer
  87. spawn._buffer = spawn.buffer_type()
  88. spawn._before = spawn.buffer_type()
  89. while True:
  90. idx = self.new_data(incoming)
  91. # Keep reading until exception or return.
  92. if idx is not None:
  93. return idx
  94. # No match at this point
  95. if (timeout is not None) and (timeout < 0):
  96. return self.timeout()
  97. # Still have time left, so read more data
  98. incoming = spawn.read_nonblocking(spawn.maxread, timeout)
  99. if self.spawn.delayafterread is not None:
  100. time.sleep(self.spawn.delayafterread)
  101. if timeout is not None:
  102. timeout = end_time - time.time()
  103. except EOF as e:
  104. return self.eof(e)
  105. except TIMEOUT as e:
  106. return self.timeout(e)
  107. except:
  108. self.errored()
  109. raise
  110. class searcher_string(object):
  111. '''This is a plain string search helper for the spawn.expect_any() method.
  112. This helper class is for speed. For more powerful regex patterns
  113. see the helper class, searcher_re.
  114. Attributes:
  115. eof_index - index of EOF, or -1
  116. timeout_index - index of TIMEOUT, or -1
  117. After a successful match by the search() method the following attributes
  118. are available:
  119. start - index into the buffer, first byte of match
  120. end - index into the buffer, first byte after match
  121. match - the matching string itself
  122. '''
  123. def __init__(self, strings):
  124. '''This creates an instance of searcher_string. This argument 'strings'
  125. may be a list; a sequence of strings; or the EOF or TIMEOUT types. '''
  126. self.eof_index = -1
  127. self.timeout_index = -1
  128. self._strings = []
  129. for n, s in enumerate(strings):
  130. if s is EOF:
  131. self.eof_index = n
  132. continue
  133. if s is TIMEOUT:
  134. self.timeout_index = n
  135. continue
  136. self._strings.append((n, s))
  137. def __str__(self):
  138. '''This returns a human-readable string that represents the state of
  139. the object.'''
  140. ss = [(ns[0], ' %d: %r' % ns) for ns in self._strings]
  141. ss.append((-1, 'searcher_string:'))
  142. if self.eof_index >= 0:
  143. ss.append((self.eof_index, ' %d: EOF' % self.eof_index))
  144. if self.timeout_index >= 0:
  145. ss.append((self.timeout_index,
  146. ' %d: TIMEOUT' % self.timeout_index))
  147. ss.sort()
  148. ss = list(zip(*ss))[1]
  149. return '\n'.join(ss)
  150. def search(self, buffer, freshlen, searchwindowsize=None):
  151. '''This searches 'buffer' for the first occurrence of one of the search
  152. strings. 'freshlen' must indicate the number of bytes at the end of
  153. 'buffer' which have not been searched before. It helps to avoid
  154. searching the same, possibly big, buffer over and over again.
  155. See class spawn for the 'searchwindowsize' argument.
  156. If there is a match this returns the index of that string, and sets
  157. 'start', 'end' and 'match'. Otherwise, this returns -1. '''
  158. first_match = None
  159. # 'freshlen' helps a lot here. Further optimizations could
  160. # possibly include:
  161. #
  162. # using something like the Boyer-Moore Fast String Searching
  163. # Algorithm; pre-compiling the search through a list of
  164. # strings into something that can scan the input once to
  165. # search for all N strings; realize that if we search for
  166. # ['bar', 'baz'] and the input is '...foo' we need not bother
  167. # rescanning until we've read three more bytes.
  168. #
  169. # Sadly, I don't know enough about this interesting topic. /grahn
  170. for index, s in self._strings:
  171. if searchwindowsize is None:
  172. # the match, if any, can only be in the fresh data,
  173. # or at the very end of the old data
  174. offset = -(freshlen + len(s))
  175. else:
  176. # better obey searchwindowsize
  177. offset = -searchwindowsize
  178. n = buffer.find(s, offset)
  179. if n >= 0 and (first_match is None or n < first_match):
  180. first_match = n
  181. best_index, best_match = index, s
  182. if first_match is None:
  183. return -1
  184. self.match = best_match
  185. self.start = first_match
  186. self.end = self.start + len(self.match)
  187. return best_index
  188. class searcher_re(object):
  189. '''This is regular expression string search helper for the
  190. spawn.expect_any() method. This helper class is for powerful
  191. pattern matching. For speed, see the helper class, searcher_string.
  192. Attributes:
  193. eof_index - index of EOF, or -1
  194. timeout_index - index of TIMEOUT, or -1
  195. After a successful match by the search() method the following attributes
  196. are available:
  197. start - index into the buffer, first byte of match
  198. end - index into the buffer, first byte after match
  199. match - the re.match object returned by a successful re.search
  200. '''
  201. def __init__(self, patterns):
  202. '''This creates an instance that searches for 'patterns' Where
  203. 'patterns' may be a list or other sequence of compiled regular
  204. expressions, or the EOF or TIMEOUT types.'''
  205. self.eof_index = -1
  206. self.timeout_index = -1
  207. self._searches = []
  208. for n, s in zip(list(range(len(patterns))), patterns):
  209. if s is EOF:
  210. self.eof_index = n
  211. continue
  212. if s is TIMEOUT:
  213. self.timeout_index = n
  214. continue
  215. self._searches.append((n, s))
  216. def __str__(self):
  217. '''This returns a human-readable string that represents the state of
  218. the object.'''
  219. #ss = [(n, ' %d: re.compile("%s")' %
  220. # (n, repr(s.pattern))) for n, s in self._searches]
  221. ss = list()
  222. for n, s in self._searches:
  223. ss.append((n, ' %d: re.compile(%r)' % (n, s.pattern)))
  224. ss.append((-1, 'searcher_re:'))
  225. if self.eof_index >= 0:
  226. ss.append((self.eof_index, ' %d: EOF' % self.eof_index))
  227. if self.timeout_index >= 0:
  228. ss.append((self.timeout_index, ' %d: TIMEOUT' %
  229. self.timeout_index))
  230. ss.sort()
  231. ss = list(zip(*ss))[1]
  232. return '\n'.join(ss)
  233. def search(self, buffer, freshlen, searchwindowsize=None):
  234. '''This searches 'buffer' for the first occurrence of one of the regular
  235. expressions. 'freshlen' must indicate the number of bytes at the end of
  236. 'buffer' which have not been searched before.
  237. See class spawn for the 'searchwindowsize' argument.
  238. If there is a match this returns the index of that string, and sets
  239. 'start', 'end' and 'match'. Otherwise, returns -1.'''
  240. first_match = None
  241. # 'freshlen' doesn't help here -- we cannot predict the
  242. # length of a match, and the re module provides no help.
  243. if searchwindowsize is None:
  244. searchstart = 0
  245. else:
  246. searchstart = max(0, len(buffer) - searchwindowsize)
  247. for index, s in self._searches:
  248. match = s.search(buffer, searchstart)
  249. if match is None:
  250. continue
  251. n = match.start()
  252. if first_match is None or n < first_match:
  253. first_match = n
  254. the_match = match
  255. best_index = index
  256. if first_match is None:
  257. return -1
  258. self.start = first_match
  259. self.match = the_match
  260. self.end = self.match.end()
  261. return best_index