You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

76 lines
1.6 KiB

4 years ago
  1. """
  2. All of the Enums that are used throughout the chardet package.
  3. :author: Dan Blanchard (dan.blanchard@gmail.com)
  4. """
  5. class InputState(object):
  6. """
  7. This enum represents the different states a universal detector can be in.
  8. """
  9. PURE_ASCII = 0
  10. ESC_ASCII = 1
  11. HIGH_BYTE = 2
  12. class LanguageFilter(object):
  13. """
  14. This enum represents the different language filters we can apply to a
  15. ``UniversalDetector``.
  16. """
  17. CHINESE_SIMPLIFIED = 0x01
  18. CHINESE_TRADITIONAL = 0x02
  19. JAPANESE = 0x04
  20. KOREAN = 0x08
  21. NON_CJK = 0x10
  22. ALL = 0x1F
  23. CHINESE = CHINESE_SIMPLIFIED | CHINESE_TRADITIONAL
  24. CJK = CHINESE | JAPANESE | KOREAN
  25. class ProbingState(object):
  26. """
  27. This enum represents the different states a prober can be in.
  28. """
  29. DETECTING = 0
  30. FOUND_IT = 1
  31. NOT_ME = 2
  32. class MachineState(object):
  33. """
  34. This enum represents the different states a state machine can be in.
  35. """
  36. START = 0
  37. ERROR = 1
  38. ITS_ME = 2
  39. class SequenceLikelihood(object):
  40. """
  41. This enum represents the likelihood of a character following the previous one.
  42. """
  43. NEGATIVE = 0
  44. UNLIKELY = 1
  45. LIKELY = 2
  46. POSITIVE = 3
  47. @classmethod
  48. def get_num_categories(cls):
  49. """:returns: The number of likelihood categories in the enum."""
  50. return 4
  51. class CharacterCategory(object):
  52. """
  53. This enum represents the different categories language models for
  54. ``SingleByteCharsetProber`` put characters into.
  55. Anything less than CONTROL is considered a letter.
  56. """
  57. UNDEFINED = 255
  58. LINE_BREAK = 254
  59. SYMBOL = 253
  60. DIGIT = 252
  61. CONTROL = 251