You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

107 lines
3.3 KiB

4 years ago
  1. from .typedefs cimport attr_t, hash_t, flags_t, len_t, tag_t
  2. from .attrs cimport attr_id_t
  3. from .attrs cimport ID, ORTH, LOWER, NORM, SHAPE, PREFIX, SUFFIX, LENGTH, CLUSTER, LANG
  4. from .structs cimport LexemeC, SerializedLexemeC
  5. from .strings cimport StringStore
  6. from .vocab cimport Vocab
  7. from numpy cimport ndarray
  8. cdef LexemeC EMPTY_LEXEME
  9. cdef class Lexeme:
  10. cdef LexemeC* c
  11. cdef readonly Vocab vocab
  12. cdef readonly attr_t orth
  13. @staticmethod
  14. cdef inline Lexeme from_ptr(LexemeC* lex, Vocab vocab, int vector_length):
  15. cdef Lexeme self = Lexeme.__new__(Lexeme, vocab, lex.orth)
  16. self.c = lex
  17. self.vocab = vocab
  18. self.orth = lex.orth
  19. @staticmethod
  20. cdef inline SerializedLexemeC c_to_bytes(const LexemeC* lex) nogil:
  21. cdef SerializedLexemeC lex_data
  22. buff = <const unsigned char*>&lex.flags
  23. end = <const unsigned char*>&lex.sentiment + sizeof(lex.sentiment)
  24. for i in range(sizeof(lex_data.data)):
  25. lex_data.data[i] = buff[i]
  26. return lex_data
  27. @staticmethod
  28. cdef inline void c_from_bytes(LexemeC* lex, SerializedLexemeC lex_data) nogil:
  29. buff = <unsigned char*>&lex.flags
  30. end = <unsigned char*>&lex.sentiment + sizeof(lex.sentiment)
  31. for i in range(sizeof(lex_data.data)):
  32. buff[i] = lex_data.data[i]
  33. @staticmethod
  34. cdef inline void set_struct_attr(LexemeC* lex, attr_id_t name, attr_t value) nogil:
  35. if name < (sizeof(flags_t) * 8):
  36. Lexeme.c_set_flag(lex, name, value)
  37. elif name == ID:
  38. lex.id = value
  39. elif name == LOWER:
  40. lex.lower = value
  41. elif name == NORM:
  42. lex.norm = value
  43. elif name == SHAPE:
  44. lex.shape = value
  45. elif name == PREFIX:
  46. lex.prefix = value
  47. elif name == SUFFIX:
  48. lex.suffix = value
  49. elif name == CLUSTER:
  50. lex.cluster = value
  51. elif name == LANG:
  52. lex.lang = value
  53. @staticmethod
  54. cdef inline attr_t get_struct_attr(const LexemeC* lex, attr_id_t feat_name) nogil:
  55. if feat_name < (sizeof(flags_t) * 8):
  56. if Lexeme.c_check_flag(lex, feat_name):
  57. return 1
  58. else:
  59. return 0
  60. elif feat_name == ID:
  61. return lex.id
  62. elif feat_name == ORTH:
  63. return lex.orth
  64. elif feat_name == LOWER:
  65. return lex.lower
  66. elif feat_name == NORM:
  67. return lex.norm
  68. elif feat_name == SHAPE:
  69. return lex.shape
  70. elif feat_name == PREFIX:
  71. return lex.prefix
  72. elif feat_name == SUFFIX:
  73. return lex.suffix
  74. elif feat_name == LENGTH:
  75. return lex.length
  76. elif feat_name == CLUSTER:
  77. return lex.cluster
  78. elif feat_name == LANG:
  79. return lex.lang
  80. else:
  81. return 0
  82. @staticmethod
  83. cdef inline bint c_check_flag(const LexemeC* lexeme, attr_id_t flag_id) nogil:
  84. cdef flags_t one = 1
  85. if lexeme.flags & (one << flag_id):
  86. return True
  87. else:
  88. return False
  89. @staticmethod
  90. cdef inline bint c_set_flag(LexemeC* lex, attr_id_t flag_id, bint value) nogil:
  91. cdef flags_t one = 1
  92. if value:
  93. lex.flags |= one << flag_id
  94. else:
  95. lex.flags &= ~(one << flag_id)