{ "cells": [ { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "from SolveShorts import *\n", "\n", "from oi import *\n" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "solSh = SolveShorts('hkldbShorts.hkl', 'hkldbShorts_All.hkl')\n", "\n", "#solSh.create_hklDB_from_csv('Abkuerzungen.txt')\n", "\n", "oi = oi()\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "sentences, punctuations = oi.ReadDoc2Sent('test1')" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "loading hkldbShorts ..\n", "done\n", "loading the bow model\n", "done\n" ] } ], "source": [ "solSh.load_DB_into_FASTsearch()\n" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[['Hallo', 'was', 'geht', 'denn', 'hier', 'so', '(sonntag)'], ['Ich', 'habe', 'echt', 'keine', 'Ahnung', 'verdammt'], ['I.', 'd.', 'R.', '(in der regel)', '(rechts rot)', 'gibt', 'es', 'keine', 'Abschiebungen'], ['Ende', 'd.', 'J.', '(juristisch)', 'wird', 'alles', 'problematisch'], ['Er', '(Europarekord Einfuhrerklärung Empfangsrelais Entschädigungsrente Ergänzungsrichtlinie Europarat endoplasmatisches Retikulum)', 'habe', 'es', 'sehr', 'schwer'], ['Es', 'war', 'die', 'Hose', 'des', 'Gauners'], ['Bliblablub'], ['Er', '(Europarekord Einfuhrerklärung Empfangsrelais Entschädigungsrente Ergänzungsrichtlinie Europarat endoplasmatisches Retikulum)', 'ging', 'nicht', 'schnell'], ['Er', '(Europarekord Einfuhrerklärung Empfangsrelais Entschädigungsrente Ergänzungsrichtlinie Europarat endoplasmatisches Retikulum)', 'geht', 'davon', 'aus,', 'dass', 'es', 'schnell', 'zu', 'Ende', 'sein', 'wird']]\n" ] } ], "source": [ "outsentences = solSh.ExplainShortsInSentencesWithBrackets(sentences)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "oi.PrintSplitSentencesToTextFile(punctuations, outsentences, 'test1out')" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.5.3" } }, "nbformat": 4, "nbformat_minor": 2 }