You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

149 lines
3.5 KiB

  1. {
  2. "cells": [
  3. {
  4. "cell_type": "code",
  5. "execution_count": 1,
  6. "metadata": {},
  7. "outputs": [],
  8. "source": [
  9. "from Medio import *\n",
  10. "\n",
  11. "\n",
  12. "\n",
  13. "medi = Medio(None,None)"
  14. ]
  15. },
  16. {
  17. "cell_type": "code",
  18. "execution_count": 2,
  19. "metadata": {},
  20. "outputs": [],
  21. "source": [
  22. "#medi.create_hklDB_from_csv('mediowords.txt', 'None')"
  23. ]
  24. },
  25. {
  26. "cell_type": "code",
  27. "execution_count": 3,
  28. "metadata": {},
  29. "outputs": [
  30. {
  31. "name": "stdout",
  32. "output_type": "stream",
  33. "text": [
  34. "Creating the bag of words...\n",
  35. "\n",
  36. "dumping the data to hkl format..\n",
  37. "done\n",
  38. "Creating the bag of words...\n",
  39. "\n",
  40. "dumping the data to hkl format..\n",
  41. "done\n"
  42. ]
  43. }
  44. ],
  45. "source": [
  46. "medi.load_DB_into_FASTsearch()"
  47. ]
  48. },
  49. {
  50. "cell_type": "code",
  51. "execution_count": 4,
  52. "metadata": {},
  53. "outputs": [],
  54. "source": [
  55. "sentences = [['das', 'ist', 'Kindersprache'],['er', 'ging', 'über', 'die', 'Straße'], ['halt', 'ab', 'hier']]\n",
  56. "punctuations = ['.', '!', '.']"
  57. ]
  58. },
  59. {
  60. "cell_type": "code",
  61. "execution_count": 5,
  62. "metadata": {},
  63. "outputs": [
  64. {
  65. "name": "stdout",
  66. "output_type": "stream",
  67. "text": [
  68. "mediosofsentence ['das', 'ist', 'Kindersprache']\n",
  69. "['Oberbuergermeister']\n",
  70. "medioeintrag ['Ober·buerger·meister']\n",
  71. "['Oberbuergermeister']\n",
  72. "medioeintrag ['Ober·buerger·meister']\n",
  73. "['Kindersprache']\n",
  74. "medioeintrag ['Kinder·sprache']\n",
  75. "medioeintraege [['Kindersprache', ['Kinder·sprache']]]\n",
  76. "mediosofsentence ['er', 'ging', 'über', 'die', 'Straße']\n",
  77. "['Oberbuergermeister']\n",
  78. "medioeintrag ['Ober·buerger·meister']\n",
  79. "['Oberbuergermeister']\n",
  80. "medioeintrag ['Ober·buerger·meister']\n",
  81. "['Oberbuergermeister']\n",
  82. "medioeintrag ['Ober·buerger·meister']\n",
  83. "['Oberbuergermeister']\n",
  84. "medioeintrag ['Ober·buerger·meister']\n",
  85. "['Oberbuergermeister']\n",
  86. "medioeintrag ['Ober·buerger·meister']\n",
  87. "medioeintraege []\n",
  88. "mediosofsentence ['halt', 'ab', 'hier']\n",
  89. "['Oberbuergermeister']\n",
  90. "medioeintrag ['Ober·buerger·meister']\n",
  91. "['Oberbuergermeister']\n",
  92. "medioeintrag ['Ober·buerger·meister']\n",
  93. "['Oberbuergermeister']\n",
  94. "medioeintrag ['Ober·buerger·meister']\n",
  95. "medioeintraege []\n"
  96. ]
  97. }
  98. ],
  99. "source": [
  100. "outsentences, punctuations = medi.Medioreplace(sentences, punctuations)"
  101. ]
  102. },
  103. {
  104. "cell_type": "code",
  105. "execution_count": 6,
  106. "metadata": {},
  107. "outputs": [
  108. {
  109. "name": "stdout",
  110. "output_type": "stream",
  111. "text": [
  112. "[['das', 'ist', ['Kinder·sprache']], ['er', 'ging', 'über', 'die', 'Straße'], ['halt', 'ab', 'hier']] ['.', '!', '.']\n"
  113. ]
  114. }
  115. ],
  116. "source": [
  117. "print(outsentences, punctuations)"
  118. ]
  119. },
  120. {
  121. "cell_type": "code",
  122. "execution_count": null,
  123. "metadata": {},
  124. "outputs": [],
  125. "source": []
  126. }
  127. ],
  128. "metadata": {
  129. "kernelspec": {
  130. "display_name": "Python 3",
  131. "language": "python",
  132. "name": "python3"
  133. },
  134. "language_info": {
  135. "codemirror_mode": {
  136. "name": "ipython",
  137. "version": 3
  138. },
  139. "file_extension": ".py",
  140. "mimetype": "text/x-python",
  141. "name": "python",
  142. "nbconvert_exporter": "python",
  143. "pygments_lexer": "ipython3",
  144. "version": "3.5.3"
  145. }
  146. },
  147. "nbformat": 4,
  148. "nbformat_minor": 2
  149. }