You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

104 lines
2.8 KiB

4 years ago
  1. {
  2. "cells": [
  3. {
  4. "cell_type": "code",
  5. "execution_count": 2,
  6. "metadata": {},
  7. "outputs": [],
  8. "source": [
  9. "from SolveShorts import *\n",
  10. "\n",
  11. "from oi import *\n"
  12. ]
  13. },
  14. {
  15. "cell_type": "code",
  16. "execution_count": 3,
  17. "metadata": {},
  18. "outputs": [],
  19. "source": [
  20. "solSh = SolveShorts('hkldbShorts.hkl', 'hkldbShorts_All.hkl')\n",
  21. "\n",
  22. "#solSh.create_hklDB_from_csv('Abkuerzungen.txt')\n",
  23. "\n",
  24. "oi = oi()\n"
  25. ]
  26. },
  27. {
  28. "cell_type": "code",
  29. "execution_count": null,
  30. "metadata": {},
  31. "outputs": [],
  32. "source": [
  33. "sentences, punctuations = oi.ReadDoc2Sent('test1')"
  34. ]
  35. },
  36. {
  37. "cell_type": "code",
  38. "execution_count": 5,
  39. "metadata": {},
  40. "outputs": [
  41. {
  42. "name": "stdout",
  43. "output_type": "stream",
  44. "text": [
  45. "loading hkldbShorts ..\n",
  46. "done\n",
  47. "loading the bow model\n",
  48. "done\n"
  49. ]
  50. }
  51. ],
  52. "source": [
  53. "solSh.load_DB_into_FASTsearch()\n"
  54. ]
  55. },
  56. {
  57. "cell_type": "code",
  58. "execution_count": 6,
  59. "metadata": {},
  60. "outputs": [
  61. {
  62. "name": "stdout",
  63. "output_type": "stream",
  64. "text": [
  65. "[['Hallo', 'was', 'geht', 'denn', 'hier', 'so', '(sonntag)'], ['Ich', 'habe', 'echt', 'keine', 'Ahnung', 'verdammt'], ['I.', 'd.', 'R.', '(in der regel)', '(rechts rot)', 'gibt', 'es', 'keine', 'Abschiebungen'], ['Ende', 'd.', 'J.', '(juristisch)', 'wird', 'alles', 'problematisch'], ['Er', '(Europarekord Einfuhrerklärung Empfangsrelais Entschädigungsrente Ergänzungsrichtlinie Europarat endoplasmatisches Retikulum)', 'habe', 'es', 'sehr', 'schwer'], ['Es', 'war', 'die', 'Hose', 'des', 'Gauners'], ['Bliblablub'], ['Er', '(Europarekord Einfuhrerklärung Empfangsrelais Entschädigungsrente Ergänzungsrichtlinie Europarat endoplasmatisches Retikulum)', 'ging', 'nicht', 'schnell'], ['Er', '(Europarekord Einfuhrerklärung Empfangsrelais Entschädigungsrente Ergänzungsrichtlinie Europarat endoplasmatisches Retikulum)', 'geht', 'davon', 'aus,', 'dass', 'es', 'schnell', 'zu', 'Ende', 'sein', 'wird']]\n"
  66. ]
  67. }
  68. ],
  69. "source": [
  70. "outsentences = solSh.ExplainShortsInSentencesWithBrackets(sentences)"
  71. ]
  72. },
  73. {
  74. "cell_type": "code",
  75. "execution_count": null,
  76. "metadata": {},
  77. "outputs": [],
  78. "source": [
  79. "oi.PrintSplitSentencesToTextFile(punctuations, outsentences, 'test1out')"
  80. ]
  81. }
  82. ],
  83. "metadata": {
  84. "kernelspec": {
  85. "display_name": "Python 3",
  86. "language": "python",
  87. "name": "python3"
  88. },
  89. "language_info": {
  90. "codemirror_mode": {
  91. "name": "ipython",
  92. "version": 3
  93. },
  94. "file_extension": ".py",
  95. "mimetype": "text/x-python",
  96. "name": "python",
  97. "nbconvert_exporter": "python",
  98. "pygments_lexer": "ipython3",
  99. "version": "3.5.3"
  100. }
  101. },
  102. "nbformat": 4,
  103. "nbformat_minor": 2
  104. }