You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

137 lines
2.7 KiB

4 years ago
  1. {
  2. "cells": [
  3. {
  4. "cell_type": "code",
  5. "execution_count": 1,
  6. "metadata": {},
  7. "outputs": [],
  8. "source": [
  9. "from FremdWB import *\n",
  10. "\n",
  11. "\n",
  12. "\n",
  13. "fwb = FremdWB(None,None)"
  14. ]
  15. },
  16. {
  17. "cell_type": "code",
  18. "execution_count": 2,
  19. "metadata": {},
  20. "outputs": [
  21. {
  22. "name": "stdout",
  23. "output_type": "stream",
  24. "text": [
  25. "1000\n",
  26. "2000\n",
  27. "creating the hkl dump of Fremd_WBDBAll\n",
  28. "done..\n",
  29. "Creating the hkl dump of Fremd_WBDB 1\n",
  30. "Creating the hkl dump of Fremd_WBDB 2\n"
  31. ]
  32. },
  33. {
  34. "data": {
  35. "text/plain": [
  36. "'done'"
  37. ]
  38. },
  39. "execution_count": 2,
  40. "metadata": {},
  41. "output_type": "execute_result"
  42. }
  43. ],
  44. "source": [
  45. "fwb.create_hklDB_from_csv('HurrakiWoerterbuch_nodoubles.txt', 'None')"
  46. ]
  47. },
  48. {
  49. "cell_type": "code",
  50. "execution_count": 3,
  51. "metadata": {},
  52. "outputs": [
  53. {
  54. "name": "stdout",
  55. "output_type": "stream",
  56. "text": [
  57. "Creating the bag of words...\n",
  58. "\n",
  59. "dumping the data to hkl format..\n",
  60. "done\n",
  61. "Creating the bag of words...\n",
  62. "\n",
  63. "dumping the data to hkl format..\n",
  64. "done\n"
  65. ]
  66. }
  67. ],
  68. "source": [
  69. "fwb.load_DB_into_FASTsearch()"
  70. ]
  71. },
  72. {
  73. "cell_type": "code",
  74. "execution_count": 4,
  75. "metadata": {},
  76. "outputs": [],
  77. "source": [
  78. "sentences = [['das', 'ist', 'Abfall'],['er', 'ging', 'über', 'die', 'Straße'], ['halt', 'ab', 'hier']]\n",
  79. "punctuations = ['.', '!', '.']"
  80. ]
  81. },
  82. {
  83. "cell_type": "code",
  84. "execution_count": 5,
  85. "metadata": {},
  86. "outputs": [],
  87. "source": [
  88. "outsentences, punctuations = fwb.fremdEintragAppend(sentences, punctuations)"
  89. ]
  90. },
  91. {
  92. "cell_type": "code",
  93. "execution_count": 6,
  94. "metadata": {},
  95. "outputs": [
  96. {
  97. "name": "stdout",
  98. "output_type": "stream",
  99. "text": [
  100. "[['das', 'ist', 'Abfall'], ['Abfall', 'sind', 'Sachen', 'die', 'wir', 'nicht', 'mehr', 'brauchen'], ['er', 'ging', 'über', 'die', 'Straße'], ['halt', 'ab', 'hier']] ['.', '.', '!', '.']\n"
  101. ]
  102. }
  103. ],
  104. "source": [
  105. "print(outsentences, punctuations)"
  106. ]
  107. },
  108. {
  109. "cell_type": "code",
  110. "execution_count": null,
  111. "metadata": {},
  112. "outputs": [],
  113. "source": []
  114. }
  115. ],
  116. "metadata": {
  117. "kernelspec": {
  118. "display_name": "Python 3",
  119. "language": "python",
  120. "name": "python3"
  121. },
  122. "language_info": {
  123. "codemirror_mode": {
  124. "name": "ipython",
  125. "version": 3
  126. },
  127. "file_extension": ".py",
  128. "mimetype": "text/x-python",
  129. "name": "python",
  130. "nbconvert_exporter": "python",
  131. "pygments_lexer": "ipython3",
  132. "version": "3.5.3"
  133. }
  134. },
  135. "nbformat": 4,
  136. "nbformat_minor": 2
  137. }