You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

154 lines
4.5 KiB

4 years ago
  1. {
  2. "cells": [
  3. {
  4. "cell_type": "code",
  5. "execution_count": 1,
  6. "metadata": {},
  7. "outputs": [],
  8. "source": [
  9. "from FremdWB import *\n",
  10. "\n",
  11. "\n",
  12. "\n",
  13. "fwb = FremdWB(None,None)"
  14. ]
  15. },
  16. {
  17. "cell_type": "code",
  18. "execution_count": 2,
  19. "metadata": {},
  20. "outputs": [],
  21. "source": [
  22. "#fwb.create_hklDB_from_csv('HurrakiWoerterbuch_nodoubles.txt', 'None')"
  23. ]
  24. },
  25. {
  26. "cell_type": "code",
  27. "execution_count": 3,
  28. "metadata": {},
  29. "outputs": [
  30. {
  31. "name": "stdout",
  32. "output_type": "stream",
  33. "text": [
  34. "generating BoW Model 1..\n",
  35. "Creating the bag of words...\n",
  36. "\n",
  37. "dumping the data to hkl format..\n",
  38. "done\n",
  39. "generating BoW Model 2..\n",
  40. "Creating the bag of words...\n",
  41. "\n",
  42. "dumping the data to hkl format..\n",
  43. "done\n",
  44. "loading the bow model 1\n",
  45. "loading the bow model 2\n",
  46. "done\n"
  47. ]
  48. }
  49. ],
  50. "source": [
  51. "fwb.load_DB_into_FASTsearch()"
  52. ]
  53. },
  54. {
  55. "cell_type": "code",
  56. "execution_count": 4,
  57. "metadata": {},
  58. "outputs": [],
  59. "source": [
  60. "sentences = [['das', 'ist', 'Abfall'],['er', 'ging', 'über', 'die', 'Straße'], ['halt', 'ab', 'hier']]\n",
  61. "punctuations = ['.', '!', '.']"
  62. ]
  63. },
  64. {
  65. "cell_type": "code",
  66. "execution_count": 5,
  67. "metadata": {},
  68. "outputs": [
  69. {
  70. "name": "stdout",
  71. "output_type": "stream",
  72. "text": [
  73. "something\n",
  74. "sentence ['das', 'ist', 'Abfall']\n",
  75. "['ist', 'Abfall']\n",
  76. "['Zyprer']\n",
  77. "fremdeintrag ['Ein', 'Zyprer', 'oder', 'eine', 'Zyprerin', 'ist', 'ein', 'Mensch.', 'Dieser', 'Mensch', 'lebt', 'auf', 'der', 'Insel', 'Zypern.']\n",
  78. "['Abfall']\n",
  79. "fremdeintrag ['Abfall', 'sind', 'Sachen', 'die', 'wir', 'nicht', 'mehr', 'brauchen.']\n",
  80. "fremdeintraege [['Abfall', 'sind', 'Sachen', 'die', 'wir', 'nicht', 'mehr', 'brauchen.']]\n",
  81. "sentence ['er', 'ging', 'über', 'die', 'Straße']\n",
  82. "['ging', 'über', 'die', 'Straße']\n",
  83. "['Zyprer']\n",
  84. "fremdeintrag ['Ein', 'Zyprer', 'oder', 'eine', 'Zyprerin', 'ist', 'ein', 'Mensch.', 'Dieser', 'Mensch', 'lebt', 'auf', 'der', 'Insel', 'Zypern.']\n",
  85. "['Zyprer']\n",
  86. "fremdeintrag ['Ein', 'Zyprer', 'oder', 'eine', 'Zyprerin', 'ist', 'ein', 'Mensch.', 'Dieser', 'Mensch', 'lebt', 'auf', 'der', 'Insel', 'Zypern.']\n",
  87. "['Enzyklopädie']\n",
  88. "fremdeintrag ['Enzyklopädie', 'ist', 'ein', 'anderes', 'Wort', 'für', 'Lexikon.', 'In', 'einer', 'Enzyklopädie', 'findet', 'man', 'Informationen.']\n",
  89. "['Zyprer']\n",
  90. "fremdeintrag ['Ein', 'Zyprer', 'oder', 'eine', 'Zyprerin', 'ist', 'ein', 'Mensch.', 'Dieser', 'Mensch', 'lebt', 'auf', 'der', 'Insel', 'Zypern.']\n",
  91. "fremdeintraege []\n",
  92. "sentence ['halt', 'ab', 'hier']\n",
  93. "['halt', 'ab', 'hier']\n",
  94. "['Zyprer']\n",
  95. "fremdeintrag ['Ein', 'Zyprer', 'oder', 'eine', 'Zyprerin', 'ist', 'ein', 'Mensch.', 'Dieser', 'Mensch', 'lebt', 'auf', 'der', 'Insel', 'Zypern.']\n",
  96. "['Zyprer']\n",
  97. "fremdeintrag ['Ein', 'Zyprer', 'oder', 'eine', 'Zyprerin', 'ist', 'ein', 'Mensch.', 'Dieser', 'Mensch', 'lebt', 'auf', 'der', 'Insel', 'Zypern.']\n",
  98. "['Zyprer']\n",
  99. "fremdeintrag ['Ein', 'Zyprer', 'oder', 'eine', 'Zyprerin', 'ist', 'ein', 'Mensch.', 'Dieser', 'Mensch', 'lebt', 'auf', 'der', 'Insel', 'Zypern.']\n",
  100. "fremdeintraege []\n"
  101. ]
  102. }
  103. ],
  104. "source": [
  105. "outsentences, punctuations = fwb.fremdEintragAppend(sentences, punctuations)"
  106. ]
  107. },
  108. {
  109. "cell_type": "code",
  110. "execution_count": 6,
  111. "metadata": {},
  112. "outputs": [
  113. {
  114. "name": "stdout",
  115. "output_type": "stream",
  116. "text": [
  117. "[['das', 'ist', 'Abfall'], ['Abfall', 'sind', 'Sachen', 'die', 'wir', 'nicht', 'mehr', 'brauchen'], ['er', 'ging', 'über', 'die', 'Straße'], ['halt', 'ab', 'hier']] ['.', '.', '.', '!']\n"
  118. ]
  119. }
  120. ],
  121. "source": [
  122. "print(outsentences, punctuations)"
  123. ]
  124. },
  125. {
  126. "cell_type": "code",
  127. "execution_count": null,
  128. "metadata": {},
  129. "outputs": [],
  130. "source": []
  131. }
  132. ],
  133. "metadata": {
  134. "kernelspec": {
  135. "display_name": "Python 3",
  136. "language": "python",
  137. "name": "python3"
  138. },
  139. "language_info": {
  140. "codemirror_mode": {
  141. "name": "ipython",
  142. "version": 3
  143. },
  144. "file_extension": ".py",
  145. "mimetype": "text/x-python",
  146. "name": "python",
  147. "nbconvert_exporter": "python",
  148. "pygments_lexer": "ipython3",
  149. "version": "3.5.3"
  150. }
  151. },
  152. "nbformat": 4,
  153. "nbformat_minor": 2
  154. }