Folgefehler des implementierten Error Handling behoben + Spezialfall doppelte Abkuerzung am Satzende Fehler behoben

2020-09-17 14:40:50 +02:00 · 2020-09-17 14:40:50 +02:00 · ed40090463
commit ed40090463
parent 441eeed1d6
15 changed files with 70 additions and 37 deletions
--- a/Prototyp/.ipynb_checkpoints/Prototype-checkpoint.ipynb
+++ b/Prototyp/.ipynb_checkpoints/Prototype-checkpoint.ipynb
@ -241,7 +241,7 @@
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "loading SentSeg Databases\n",
+      "loading SolveShorts Databases\n",
      "Creating the bag of words...\n",
      "\n",
      "dumping the data to hkl format..\n",
@ -257,7 +257,7 @@
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "6793c5121aaf498e8960726a40709e19",
+       "model_id": "82646fa586ba44aabc1608ec7a268b2c",
       "version_major": 2,
       "version_minor": 0
      },
@ -315,6 +315,10 @@
    "import dill\n",
    "dill.load_session('voilastate.db')\n",
    "\n",
    "#from SolveShorts import *\n",
    "#print('loading SolveShorts Databases')\n",
    "#solSh.load_DB_into_FASTsearch()\n",
    "\n",
    "#import SentSeg\n",
    "#sent_seg = SentSeg.SentSeg('de')\n",
    "#print('loading SentSeg Databases')\n",
@ -440,7 +444,7 @@
   "outputs": [],
   "source": [
    "import ipywidgets\n",
-    "out = 'Hier kommt der übersetzte Text raus.'\n",
+    "out = 'Hier kommt der übersetzte Text heraus.'\n",
    "\n",
    "TextFeld.add_class(\"thotext\")\n",
    "TextFelddouble.add_class(\"thotext\")\n",
@ -470,7 +474,7 @@
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "c833de5ff5d340bbb1988584eee0c368",
+       "model_id": "9e547f27f67f484c9b455ead6f63afb2",
       "version_major": 2,
       "version_minor": 0
      },
@ -607,7 +611,7 @@
    "        check = 1\n",
    "    \n",
    "    #print('sentences after cs', outsentences)\n",
-    "    #print(len(punctuations))\n",
+    "    print(len(punctuations))\n",
    "    \n",
    "    if FremdWB.value == True:\n",
    "        if check == 1:\n",
@ -617,6 +621,7 @@
    "        outsentences, punctuations = fwb.fremdEintragAppend(insentences, punctuations)\n",
    "        #print('outsentences')\n",
    "        check = 1\n",
    "    \n",
    "    if Medio.value == True:\n",
    "        if check == 1:\n",
    "            insentences = outsentences\n",
@ -630,7 +635,7 @@
    "    \n",
    "    #print('sentences after fwb', outsentences)\n",
    "    \n",
-    "    #print(len(punctuations))\n",
+    "    print(len(punctuations))\n",
    "    \n",
    "    if check == 1:\n",
    "        out = ''\n",
@ -669,12 +674,12 @@
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "aa136a24ef044b4fb5d10f6c9278d35f",
+       "model_id": "0d27a028dcb449e2a2a6a7dfd25acd49",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
-       "interactive(children=(Button(description='Übersetzen', style=ButtonStyle()), Output()), _dom_classes=('widget-…"
+       "interactive(children=(Button(description='Übersetzen in Leichte Sprache', style=ButtonStyle()), Output()), _do…"
      ]
     },
     "metadata": {},
@ -690,7 +695,7 @@
    "\n",
    "#print(widgets.interact_manual.opts)\n",
    "\n",
-    "widgets.interact_manual.opts['manual_name']= 'Übersetzen'\n",
+    "widgets.interact_manual.opts['manual_name']= 'Übersetzen in Leichte Sprache'\n",
    "ola =  widgets.interact_manual(function)\n",
    "ola.widget.children[0].style.button_color = 'lightgreen'\n",
    "ola.widget.children[0].layout.height = '50px'\n",
@ -709,12 +714,12 @@
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "1474be19da7a4b1bbd7fee229dd5a8ee",
+       "model_id": "564058b35ab743fabff90d4c49c5aac3",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
-       "Textarea(value='', layout=Layout(height='180px', width='99%'), placeholder='Hier kommt der übersetzte Text rau…"
+       "Textarea(value='', layout=Layout(height='180px', width='99%'), placeholder='Hier kommt der übersetzte Text her…"
      ]
     },
     "metadata": {},
@ -831,7 +836,7 @@
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "c2f3338821ae4ee59d205af8cb1083a8",
+       "model_id": "2e67ffb1c4ec4ddeb2c18935f4d0fdc4",
       "version_major": 2,
       "version_minor": 0
      },
@ -857,7 +862,7 @@
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "1182e1d2f6c44b9ab0d50c9d388a2765",
+       "model_id": "f8e8a92efa8e41bbb3efe44c35c37ec1",
       "version_major": 2,
       "version_minor": 0
      },
--- a/Prototyp/DataBaseOneZeroshkldbFremd_WB1.hkl
+++ b/Prototyp/DataBaseOneZeroshkldbFremd_WB1.hkl
--- a/Prototyp/DataBaseOneZeroshkldbFremd_WB2.hkl
+++ b/Prototyp/DataBaseOneZeroshkldbFremd_WB2.hkl
--- a/Prototyp/FremdWB.py
+++ b/Prototyp/FremdWB.py
@ -176,8 +176,9 @@ class FremdWB(object):
                #print('the endsentence',sentence)
            except:
-                print('konnte nicht' + str(sentence) + 'in FremdWB prozessieren..')
+                #print('konnte nicht' + str(sentence) + 'in FremdWB prozessieren..')
-                if sentence != outsentences[-1]:
+                #print('outsentence und co ', outsentences[-1], eintrag, sentence) 
                if sentence != outsentences[-1] and alleeintraege[-1] != outsentences[-1]:
                    outsentences.append(sentence)
                punctuations = oldpunctuations
        return outsentences, punctuations
--- a/Prototyp/Prototype.ipynb
+++ b/Prototyp/Prototype.ipynb
@ -241,7 +241,7 @@
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "loading SentSeg Databases\n",
+      "loading SolveShorts Databases\n",
      "Creating the bag of words...\n",
      "\n",
      "dumping the data to hkl format..\n",
@ -257,7 +257,7 @@
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "6793c5121aaf498e8960726a40709e19",
+       "model_id": "82646fa586ba44aabc1608ec7a268b2c",
       "version_major": 2,
       "version_minor": 0
      },
@ -315,6 +315,10 @@
    "import dill\n",
    "dill.load_session('voilastate.db')\n",
    "\n",
    "#from SolveShorts import *\n",
    "#print('loading SolveShorts Databases')\n",
    "#solSh.load_DB_into_FASTsearch()\n",
    "\n",
    "#import SentSeg\n",
    "#sent_seg = SentSeg.SentSeg('de')\n",
    "#print('loading SentSeg Databases')\n",
@ -440,7 +444,7 @@
   "outputs": [],
   "source": [
    "import ipywidgets\n",
-    "out = 'Hier kommt der übersetzte Text raus.'\n",
+    "out = 'Hier kommt der übersetzte Text heraus.'\n",
    "\n",
    "TextFeld.add_class(\"thotext\")\n",
    "TextFelddouble.add_class(\"thotext\")\n",
@ -470,7 +474,7 @@
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "c833de5ff5d340bbb1988584eee0c368",
+       "model_id": "9e547f27f67f484c9b455ead6f63afb2",
       "version_major": 2,
       "version_minor": 0
      },
@ -607,7 +611,7 @@
    "        check = 1\n",
    "    \n",
    "    #print('sentences after cs', outsentences)\n",
-    "    #print(len(punctuations))\n",
+    "    print(len(punctuations))\n",
    "    \n",
    "    if FremdWB.value == True:\n",
    "        if check == 1:\n",
@ -617,6 +621,7 @@
    "        outsentences, punctuations = fwb.fremdEintragAppend(insentences, punctuations)\n",
    "        #print('outsentences')\n",
    "        check = 1\n",
    "    \n",
    "    if Medio.value == True:\n",
    "        if check == 1:\n",
    "            insentences = outsentences\n",
@ -630,7 +635,7 @@
    "    \n",
    "    #print('sentences after fwb', outsentences)\n",
    "    \n",
-    "    #print(len(punctuations))\n",
+    "    print(len(punctuations))\n",
    "    \n",
    "    if check == 1:\n",
    "        out = ''\n",
@ -669,12 +674,12 @@
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "aa136a24ef044b4fb5d10f6c9278d35f",
+       "model_id": "0d27a028dcb449e2a2a6a7dfd25acd49",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
-       "interactive(children=(Button(description='Übersetzen', style=ButtonStyle()), Output()), _dom_classes=('widget-…"
+       "interactive(children=(Button(description='Übersetzen in Leichte Sprache', style=ButtonStyle()), Output()), _do…"
      ]
     },
     "metadata": {},
@ -690,7 +695,7 @@
    "\n",
    "#print(widgets.interact_manual.opts)\n",
    "\n",
-    "widgets.interact_manual.opts['manual_name']= 'Übersetzen'\n",
+    "widgets.interact_manual.opts['manual_name']= 'Übersetzen in Leichte Sprache'\n",
    "ola =  widgets.interact_manual(function)\n",
    "ola.widget.children[0].style.button_color = 'lightgreen'\n",
    "ola.widget.children[0].layout.height = '50px'\n",
@ -709,12 +714,12 @@
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "1474be19da7a4b1bbd7fee229dd5a8ee",
+       "model_id": "564058b35ab743fabff90d4c49c5aac3",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
-       "Textarea(value='', layout=Layout(height='180px', width='99%'), placeholder='Hier kommt der übersetzte Text rau…"
+       "Textarea(value='', layout=Layout(height='180px', width='99%'), placeholder='Hier kommt der übersetzte Text her…"
      ]
     },
     "metadata": {},
@ -831,7 +836,7 @@
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "c2f3338821ae4ee59d205af8cb1083a8",
+       "model_id": "2e67ffb1c4ec4ddeb2c18935f4d0fdc4",
       "version_major": 2,
       "version_minor": 0
      },
@ -857,7 +862,7 @@
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "1182e1d2f6c44b9ab0d50c9d388a2765",
+       "model_id": "f8e8a92efa8e41bbb3efe44c35c37ec1",
       "version_major": 2,
       "version_minor": 0
      },
--- a/Prototyp/SolveShorts.py
+++ b/Prototyp/SolveShorts.py
@ -91,14 +91,19 @@ class SolveShorts(object):
            doc = self.nlp(' '.join(sentence))
            #print('da sentence', sentence)
            newshorts = []
-            for word in sentence:
+            wordcount = 0
-                
+            for oriword in sentence:
                wordcount += 1
                if wordcount == len(sentence):
                     word = oriword + '.'
                else:
                    word = oriword
                newshort = []
                prenewshort = []
                punctcount = list(word).count('.')
                #print(word, list(word), punctcount)
                if punctcount > 1:
-                    replaceindex = sentence.index(word)
+                    replaceindex = sentence.index(oriword)
                    dacount = 0
                    for letter in list(word):
                        #print('letter in word split', letter)
@ -153,11 +158,18 @@ class SolveShorts(object):
                                    interestingindex = m
                                    break
                        if len(sentence) - n <= 5 and n != len(sentence) - 1:
-                            for m in range((len(sentence) - n) - 1):
+                            for m in range((len(sentence) - n)):
-                                #print(n, m, n+m+1, len(sentence))
+                                #print('oleolaolu',n, m, n+m+1, len(sentence))
-                                if sentence[n + m][-1] == '.' and sentence[n + m + 1][-1] != '.':
+                                
-                                    interestingindex = m
+                                if m == (len(sentence) - n) - 1:
-                                    break
+                                    if sentence[n + m][-1] == '.':
                                        interestingindex = m
                                        break
                                else:
                                    if sentence[n + m][-1] == '.' and sentence[n + m + 1][-1] != '.' :
                                        interestingindex = m
                                        break
                    #print(interestingindex, 'interestingindex')
                    if interestingindex == 0:
--- a/Prototyp/Verbesserungen/Input143.txt
+++ b/Prototyp/Verbesserungen/Input143.txt
@ -0,0 +1,2 @@
 mit dem Rechtsmittel soll die mittlerweile seit 439 Tagen (Stand Dienstag) andauernde Prüfung nach mehrfach enttäuschten Versprechen gegenüber der Bürger-Ini auf juristischem Weg erzwungen werden .
 die Volks-Initiative klagt seit Mai gegen die lange Prüfdauer ihres Anliegens, große Wohnungskonzerne gegen Entschädigungen zu vergesellschaften und spricht von Verschleppung .
--- a/Prototyp/Verbesserungen/Output143.txt
+++ b/Prototyp/Verbesserungen/Output143.txt
@ -0,0 +1,3 @@
 mit dem Rechtsmittel soll die mittlerweile seit 439 Tagen (Stand Dienstag) andauernde Pruefung nach mehrfach enttaeuschten Versprechen gegenueber der Buerger-Ini auf juristischem Weg erzwungen werden .
 die Volks-Initiative klagt seit Mai gegen die lange Pruefdauer ihres Anliegens .
 Grosse Wohnungskonzerne gegen Entschaedigungen zu vergesellschaften und spricht von Verschleppung .
--- a/Prototyp/Verbesserungen/Verbesserungen143.txt
+++ b/Prototyp/Verbesserungen/Verbesserungen143.txt
@ -0,0 +1,5 @@
 mit dem Rechtsmittel soll die Pruefung nach mehrfach enttaeuschten Versprechen gegenueber der Buerger-Ini auf juristischen Weg erzwungen werden .
 die Pruefung dauert mittlerweile seit 439 Tagen an .
 die Volks-Initiative klagt seit Mai gegen die lange Pruefdauer ihres Anliegens .
 ihr Anliegen ist Grosse Wohnungskonzerne gegen Entschaedigungen zu vergesellschaften .
 und die Volks-Initiative spricht von Verschleppung .
--- a/Prototyp/Verbesserungen/indexDerVerbesserungen.txt
+++ b/Prototyp/Verbesserungen/indexDerVerbesserungen.txt
@ -1 +1 @@
-142
+143
--- a/Prototyp/pycache/FremdWB.cpython-35.pyc
+++ b/Prototyp/pycache/FremdWB.cpython-35.pyc
--- a/Prototyp/pycache/SolveShorts.cpython-35.pyc
+++ b/Prototyp/pycache/SolveShorts.cpython-35.pyc
--- a/Prototyp/bagofwordshkldbFremd_WB1.pkl
+++ b/Prototyp/bagofwordshkldbFremd_WB1.pkl
--- a/Prototyp/bagofwordshkldbFremd_WB2.pkl
+++ b/Prototyp/bagofwordshkldbFremd_WB2.pkl
--- a/Prototyp/voilastate.db
+++ b/Prototyp/voilastate.db
		`@ -0,0 +1,2 @@`
							`mit dem Rechtsmittel soll die mittlerweile seit 439 Tagen (Stand Dienstag) andauernde Prüfung nach mehrfach enttäuschten Versprechen gegenüber der Bürger-Ini auf juristischem Weg erzwungen werden .`
							`die Volks-Initiative klagt seit Mai gegen die lange Prüfdauer ihres Anliegens, große Wohnungskonzerne gegen Entschädigungen zu vergesellschaften und spricht von Verschleppung .`