Link zum Code eingefuegt | Verbessertes Errorhandling fuer zwei FremdWB und CharAppend

2020-09-06 01:33:50 +02:00 · 2020-09-06 01:33:50 +02:00 · 441eeed1d6
commit 441eeed1d6
parent 2487d6de07
15 changed files with 351 additions and 157 deletions
--- a/Prototyp/.ipynb_checkpoints/Prototype-checkpoint.ipynb
+++ b/Prototyp/.ipynb_checkpoints/Prototype-checkpoint.ipynb
@ -36,7 +36,26 @@
    {
     "data": {
      "text/html": [
-       "<img src=\"brainBasaBuuka5.png\" width=\"300\" height=\"200\" align=\"center\"/>\n"
+       "<style>\n",
       "\n",
       ".center {\n",
       "  display: block;\n",
       "  margin-left: auto;\n",
       "  margin-right: auto;\n",
       "  width: 20%;\n",
       "}\n",
       "\n",
       "body {\n",
       "    align: center;\n",
       "  }\n",
       "\n",
       "</style>\n",
       "<body>\n",
       "<a href=\"https://basabuuka.zapto.org/alpcentaur/Basabuuka_Prototyp\">\n",
       "<img src=\"brainBasaBuuka5.png\" width=\"300\" height=\"200\" class=\"center\"/>\n",
       "</a>\n",
       "<p style=\"font-family: courier\"><center><b>Du kommst zu dem Code - wenn du auf das Logo klickst!</b></center></p>  \n",
       "</body>\n"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
@ -48,7 +67,35 @@
   ],
   "source": [
    "%%html\n",
-    "<img src=\"brainBasaBuuka5.png\" width=\"300\" height=\"200\" align=\"center\"/>"
+    "<style>\n",
    "\n",
    ".center {\n",
    "  display: block;\n",
    "  margin-left: auto;\n",
    "  margin-right: auto;\n",
    "  width: 20%;\n",
    "}\n",
    "\n",
    "body {\n",
    "    align: center;\n",
    "  }\n",
    "\n",
    "</style>\n",
    "<body>\n",
    "<a href=\"https://basabuuka.zapto.org/alpcentaur/Basabuuka_Prototyp\">\n",
    "<img src=\"brainBasaBuuka5.png\" width=\"300\" height=\"200\" class=\"center\"/>\n",
    "</a>\n",
    "<p style=\"font-family: courier\"><center><b>Du kommst zu dem Code - wenn du auf das Logo klickst!</b></center></p>  \n",
    "</body>"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "\n"
   ]
  },
  {
@ -66,7 +113,8 @@
       "    font-family: courier;\n",
       "  }\n",
       "\n",
-       "</style>\n"
+       "</style>\n",
       "\n"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
@ -85,7 +133,8 @@
    "    font-family: courier;\n",
    "  }\n",
    "\n",
-    "</style>\n"
+    "</style>\n",
    "\n"
   ]
  },
  {
@ -188,10 +237,27 @@
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "loading SentSeg Databases\n",
      "Creating the bag of words...\n",
      "\n",
      "dumping the data to hkl format..\n",
      "done\n",
      "Creating the bag of words...\n",
      "\n",
      "dumping the data to hkl format..\n",
      "done\n",
      "dumping the session\n",
      "done\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "c0fcb1a9556e4d54a43fd7a969210844",
+       "model_id": "6793c5121aaf498e8960726a40709e19",
       "version_major": 2,
       "version_minor": 0
      },
@ -249,6 +315,15 @@
    "import dill\n",
    "dill.load_session('voilastate.db')\n",
    "\n",
    "#import SentSeg\n",
    "#sent_seg = SentSeg.SentSeg('de')\n",
    "#print('loading SentSeg Databases')\n",
    "#sent_seg.LoadSentGlueSGDandGSUtils()\n",
    "\n",
    "#from FremdWB import *\n",
    "#fwb = FremdWB(None,None)\n",
    "#fwb.load_DB_into_FASTsearch()\n",
    "\n",
    "#from Medio import *\n",
    "#medi = Medio(None,None)\n",
    "#medi.load_DB_into_FASTsearch()\n",
@ -395,7 +470,7 @@
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "f49a3f799a884277ab40f1839c8c1afd",
+       "model_id": "c833de5ff5d340bbb1988584eee0c368",
       "version_major": 2,
       "version_minor": 0
      },
@ -594,7 +669,7 @@
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "4c00c7b9b76e493481cb078f77f50258",
+       "model_id": "aa136a24ef044b4fb5d10f6c9278d35f",
       "version_major": 2,
       "version_minor": 0
      },
@ -634,7 +709,7 @@
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "9e7fab660d534ed7925dd0d21af74957",
+       "model_id": "1474be19da7a4b1bbd7fee229dd5a8ee",
       "version_major": 2,
       "version_minor": 0
      },
@ -660,7 +735,7 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "beschreibung = 'Textfeld für die verbesserte Übersetzung. Satzzeichen bitte nicht vergessen! \\nFalls ein neuer Eintrag ins Fremdwörterbuch vorgenommen werden soll, bitte unter die Verbesserungen einen Eintrag der Form: [\\'Abfall\\', \\'Abfall sind Sachen die wir nicht mehr brauchen\\']' \n",
+    "beschreibung = 'Textfeld für die verbesserte Übersetzung. Satzzeichen bitte nicht vergessen! \\nFalls ein neuer Eintrag ins Fremdwörterbuch vorgenommen werden soll, schreibe einfach unter die Verbesserungen einen Eintrag der Form: [\\'Abfall\\', \\'Abfall sind Sachen die wir nicht mehr brauchen\\'] \\nFalls ein neuer Eintrag in die Mediopunkte-Datenbank vorgenommen werden soll, bitte schreibe unter die Verbesserungen einen Eintrag der Form: [\\'Mediopunkt\\', \\'Medio·punkt\\']' \n",
    "VerbeTextFeld = ipywidgets.Textarea(placeholder=beschreibung, disabled=False)\n",
    "VerbeTextFeld.layout.height = '180px'\n",
    "VerbeTextFeld.layout.width = '99%'\n",
@ -756,7 +831,7 @@
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "6408b21ab24c482fa3ddc9e047592bb8",
+       "model_id": "c2f3338821ae4ee59d205af8cb1083a8",
       "version_major": 2,
       "version_minor": 0
      },
@ -782,7 +857,7 @@
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "7702fbe3ca5b4041a3d4e9b5167d8f38",
+       "model_id": "1182e1d2f6c44b9ab0d50c9d388a2765",
       "version_major": 2,
       "version_minor": 0
      },
--- a/Prototyp/DataBaseOneZeroshkldbFremd_WB1.hkl
+++ b/Prototyp/DataBaseOneZeroshkldbFremd_WB1.hkl
--- a/Prototyp/DataBaseOneZeroshkldbFremd_WB2.hkl
+++ b/Prototyp/DataBaseOneZeroshkldbFremd_WB2.hkl
--- a/Prototyp/FremdWB.py
+++ b/Prototyp/FremdWB.py
@ -125,54 +125,61 @@ class FremdWB(object):
        sentencecount = 0
        alleeintraege = []
        for sentence in sentences:
-            #print('sentence', sentence)
+            oldpunctuations = punctuations
-            sentencecount += 1
+            try:
-            #print('processing sentence', sentencecount)
+                #print('sentence', sentence)
-            
+                sentencecount += 1
-            doc = self.nlp(' '.join(sentence))
+                #print('processing sentence', sentencecount)
-            
+
-            fremds_of_sentence = []
+                doc = self.nlp(' '.join(sentence))
-            count = 0
+
-            
+                fremds_of_sentence = []
-            for word in doc:
+                count = 0
-                count += 1
+
-                
+                for word in doc:
-                
+                    count += 1
-                
+
-                if word.tag_[0] == 'V' or word.tag_[0] == 'N' or word.tag_[0] == 'A':
+
-                    fremds_of_sentence.append(word.text)
+
-                    
+                    if word.tag_[0] == 'V' or word.tag_[0] == 'N' or word.tag_[0] == 'A':
-            
+                        fremds_of_sentence.append(word.text)
-            #print(fremds_of_sentence)
+
-            fremdeintraege = []   
+
-            for word in fremds_of_sentence:
+                #print(fremds_of_sentence)
-                
+                fremdeintraege = []   
-                bestmatches2, matchindex2 = self.fsearch1.search_with_highest_multiplikation_Output(word, 1)
+                for word in fremds_of_sentence:
-                
+
-                
+                    bestmatches2, matchindex2 = self.fsearch1.search_with_highest_multiplikation_Output(word, 1)
-                
+
-                fremd = self.hkldbFremd_WB1[matchindex2[0]][0].split()
+
-                fremdeintrag = self.hkldbFremd_WB2[matchindex2[0]][0].split()
+
-                
+                    fremd = self.hkldbFremd_WB1[matchindex2[0]][0].split()
-                #print(fremd)
+                    fremdeintrag = self.hkldbFremd_WB2[matchindex2[0]][0].split()
-                #print('fremdeintrag', fremdeintrag)
+
-                
+                    #print(fremd)
-                if fremd[0] == word:
+                    #print('fremdeintrag', fremdeintrag)
-                    fremdeintraege.append(fremdeintrag)
+
-            #print('fremdeintraege',fremdeintraege)
+                    if fremd[0] == word:
-            outsentences.append(sentence)
+                        fremdeintraege.append(fremdeintrag)
-            
+                #print('fremdeintraege',fremdeintraege)
-            for eintrag in fremdeintraege:
+                outsentences.append(sentence)
-                if eintrag[-1][-1] == '.':
+
-                    eintrag[-1] = eintrag[-1][:-1]
+                for eintrag in fremdeintraege:
-                if eintrag not in alleeintraege:
+                    if eintrag[-1][-1] == '.':
-                    outsentences.append(eintrag)
+                        eintrag[-1] = eintrag[-1][:-1]
-                    punctuations.insert(sentencecount, '.')
+                    if eintrag not in alleeintraege:
-                alleeintraege.append(eintrag)
+                        outsentences.append(eintrag)
-            
+                        punctuations.insert(sentencecount, '.')
-            
+                    alleeintraege.append(eintrag)
-            
+
-            #print('the endsentence',sentence)
+
                #print('the endsentence',sentence)
            except:
                print('konnte nicht' + str(sentence) + 'in FremdWB prozessieren..')
                if sentence != outsentences[-1]:
                    outsentences.append(sentence)
                punctuations = oldpunctuations
        return outsentences, punctuations
--- a/Prototyp/Prototype.ipynb
+++ b/Prototyp/Prototype.ipynb
@ -36,7 +36,26 @@
    {
     "data": {
      "text/html": [
-       "<img src=\"brainBasaBuuka5.png\" width=\"300\" height=\"200\" align=\"center\"/>\n"
+       "<style>\n",
       "\n",
       ".center {\n",
       "  display: block;\n",
       "  margin-left: auto;\n",
       "  margin-right: auto;\n",
       "  width: 20%;\n",
       "}\n",
       "\n",
       "body {\n",
       "    align: center;\n",
       "  }\n",
       "\n",
       "</style>\n",
       "<body>\n",
       "<a href=\"https://basabuuka.zapto.org/alpcentaur/Basabuuka_Prototyp\">\n",
       "<img src=\"brainBasaBuuka5.png\" width=\"300\" height=\"200\" class=\"center\"/>\n",
       "</a>\n",
       "<p style=\"font-family: courier\"><center><b>Du kommst zu dem Code - wenn du auf das Logo klickst!</b></center></p>  \n",
       "</body>\n"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
@ -48,7 +67,35 @@
   ],
   "source": [
    "%%html\n",
-    "<img src=\"brainBasaBuuka5.png\" width=\"300\" height=\"200\" align=\"center\"/>"
+    "<style>\n",
    "\n",
    ".center {\n",
    "  display: block;\n",
    "  margin-left: auto;\n",
    "  margin-right: auto;\n",
    "  width: 20%;\n",
    "}\n",
    "\n",
    "body {\n",
    "    align: center;\n",
    "  }\n",
    "\n",
    "</style>\n",
    "<body>\n",
    "<a href=\"https://basabuuka.zapto.org/alpcentaur/Basabuuka_Prototyp\">\n",
    "<img src=\"brainBasaBuuka5.png\" width=\"300\" height=\"200\" class=\"center\"/>\n",
    "</a>\n",
    "<p style=\"font-family: courier\"><center><b>Du kommst zu dem Code - wenn du auf das Logo klickst!</b></center></p>  \n",
    "</body>"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "\n"
   ]
  },
  {
@ -66,7 +113,8 @@
       "    font-family: courier;\n",
       "  }\n",
       "\n",
-       "</style>\n"
+       "</style>\n",
       "\n"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
@ -85,7 +133,8 @@
    "    font-family: courier;\n",
    "  }\n",
    "\n",
-    "</style>\n"
+    "</style>\n",
    "\n"
   ]
  },
  {
@ -188,10 +237,27 @@
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "loading SentSeg Databases\n",
      "Creating the bag of words...\n",
      "\n",
      "dumping the data to hkl format..\n",
      "done\n",
      "Creating the bag of words...\n",
      "\n",
      "dumping the data to hkl format..\n",
      "done\n",
      "dumping the session\n",
      "done\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "c0fcb1a9556e4d54a43fd7a969210844",
+       "model_id": "6793c5121aaf498e8960726a40709e19",
       "version_major": 2,
       "version_minor": 0
      },
@ -249,6 +315,15 @@
    "import dill\n",
    "dill.load_session('voilastate.db')\n",
    "\n",
    "#import SentSeg\n",
    "#sent_seg = SentSeg.SentSeg('de')\n",
    "#print('loading SentSeg Databases')\n",
    "#sent_seg.LoadSentGlueSGDandGSUtils()\n",
    "\n",
    "#from FremdWB import *\n",
    "#fwb = FremdWB(None,None)\n",
    "#fwb.load_DB_into_FASTsearch()\n",
    "\n",
    "#from Medio import *\n",
    "#medi = Medio(None,None)\n",
    "#medi.load_DB_into_FASTsearch()\n",
@ -395,7 +470,7 @@
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "f49a3f799a884277ab40f1839c8c1afd",
+       "model_id": "c833de5ff5d340bbb1988584eee0c368",
       "version_major": 2,
       "version_minor": 0
      },
@ -594,7 +669,7 @@
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "4c00c7b9b76e493481cb078f77f50258",
+       "model_id": "aa136a24ef044b4fb5d10f6c9278d35f",
       "version_major": 2,
       "version_minor": 0
      },
@ -634,7 +709,7 @@
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "9e7fab660d534ed7925dd0d21af74957",
+       "model_id": "1474be19da7a4b1bbd7fee229dd5a8ee",
       "version_major": 2,
       "version_minor": 0
      },
@ -660,7 +735,7 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "beschreibung = 'Textfeld für die verbesserte Übersetzung. Satzzeichen bitte nicht vergessen! \\nFalls ein neuer Eintrag ins Fremdwörterbuch vorgenommen werden soll, bitte unter die Verbesserungen einen Eintrag der Form: [\\'Abfall\\', \\'Abfall sind Sachen die wir nicht mehr brauchen\\']' \n",
+    "beschreibung = 'Textfeld für die verbesserte Übersetzung. Satzzeichen bitte nicht vergessen! \\nFalls ein neuer Eintrag ins Fremdwörterbuch vorgenommen werden soll, schreibe einfach unter die Verbesserungen einen Eintrag der Form: [\\'Abfall\\', \\'Abfall sind Sachen die wir nicht mehr brauchen\\'] \\nFalls ein neuer Eintrag in die Mediopunkte-Datenbank vorgenommen werden soll, bitte schreibe unter die Verbesserungen einen Eintrag der Form: [\\'Mediopunkt\\', \\'Medio·punkt\\']' \n",
    "VerbeTextFeld = ipywidgets.Textarea(placeholder=beschreibung, disabled=False)\n",
    "VerbeTextFeld.layout.height = '180px'\n",
    "VerbeTextFeld.layout.width = '99%'\n",
@ -756,7 +831,7 @@
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "6408b21ab24c482fa3ddc9e047592bb8",
+       "model_id": "c2f3338821ae4ee59d205af8cb1083a8",
       "version_major": 2,
       "version_minor": 0
      },
@ -782,7 +857,7 @@
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "7702fbe3ca5b4041a3d4e9b5167d8f38",
+       "model_id": "1182e1d2f6c44b9ab0d50c9d388a2765",
       "version_major": 2,
       "version_minor": 0
      },
--- a/Prototyp/SentSeg.py
+++ b/Prototyp/SentSeg.py
@ -2110,96 +2110,107 @@ class SentSeg(object):
        newsentences = []
        newpunctuations = []
        Whatisofnouns = []
        oldsentences = sentences
        oldpunctuations = punctuations
        for hauptindex in range(len(sentences)):
            sentence = sentences[hauptindex]
-            #for triple in triples:
+            try:
-            #    AnnoOrNot, tripleInWords = gs.checkForAnnotationTriple(sentence, triple, 'word.tag_', 'None')
+                #for triple in triples:
-            #    for tripleinwor in tripleInWords:
+                #    AnnoOrNot, tripleInWords = gs.checkForAnnotationTriple(sentence, triple, 'word.tag_', 'None')
-            #        triplestochange.append([triple, tripleinwor])
+                #    for tripleinwor in tripleInWords:
-                
+                #        triplestochange.append([triple, tripleinwor])
-            for quadruple in quadruples:
+
-                AnnoOrNot, quadrupleInWords = gs.checkForAnnotationQuadruple(sentence, quadruple, 'word.tag_', 'None')
+                for quadruple in quadruples:
-                #print('quadinwords', quadrupleInWords)
+                    AnnoOrNot, quadrupleInWords = gs.checkForAnnotationQuadruple(sentence, quadruple, 'word.tag_', 'None')
-                #print('ANNOORNOT', AnnoOrNot)
+                    #print('quadinwords', quadrupleInWords)
-                for quadrupleInWo in quadrupleInWords:
+                    #print('ANNOORNOT', AnnoOrNot)
-                    quadruplestochange.append([quadruple, quadrupleInWo])
+                    for quadrupleInWo in quadrupleInWords:
-            
+                        quadruplestochange.append([quadruple, quadrupleInWo])
-            #print('quadstochange',quadruplestochange)
+
-            for quad in quadruplestochange:
+                #print('quadstochange',quadruplestochange)
-                for n in range(len(sentence) - 4):
+                for quad in quadruplestochange:
-                    if sentence[n] == quad[1][0]:
+                    for n in range(len(sentence) - 4):
-                        if sentence[n + 1] == quad[1][1]:
+                        if sentence[n] == quad[1][0]:
-                            if sentence[n + 2] == quad[1][2]:
+                            if sentence[n + 1] == quad[1][1]:
-                                artword = None
+                                if sentence[n + 2] == quad[1][2]:
-                                longerWhatisnoun = 0
+                                    artword = None
-                                for m in range(2):
+                                    longerWhatisnoun = 0
-                                    for word in self.nlp(sentence[n - m]):
+                                    for m in range(2):
-                                        if word.tag_ == 'ART':
+                                        for word in self.nlp(sentence[n - m]):
-                                            Nounthatis = sentence[n - m:n + 1]
+                                            if word.tag_ == 'ART':
-                                            import spacy
+                                                Nounthatis = sentence[n - m:n + 1]
-                                            nlp = spacy.load('de_core_news_sm')
+                                                import spacy
-                                            token3 = nlp(sentence[n+4])
+                                                nlp = spacy.load('de_core_news_sm')
-                                            counter = 0
+                                                token3 = nlp(sentence[n+4])
-                                            Whatisnoun = sentence[n + 1:n + 4]
+                                                counter = 0
-                                            for wor in token3:
+                                                Whatisnoun = sentence[n + 1:n + 4]
-                                                counter += 1
+                                                for wor in token3:
-                                                if wor.tag_ == 'NN' or wor.tag_ == 'NE':
+                                                    counter += 1
-                                                    if counter == 1:
+                                                    if wor.tag_ == 'NN' or wor.tag_ == 'NE':
-                                                        Whatisnoun = sentence[n + 1:n + 5]
+                                                        if counter == 1:
-                                                        longerWhatisnoun = 1
+                                                            Whatisnoun = sentence[n + 1:n + 5]
-                                                    if counter == 2:
+                                                            longerWhatisnoun = 1
-                                                        Whatisnoun = sentence[n + 1:n + 4]
+                                                        if counter == 2:
-                                            
+                                                            Whatisnoun = sentence[n + 1:n + 4]
-                                            
+
-                                            
+
-                                            artword = word.text
+
-                                #print(sentence[n - 1],'oi')
+                                                artword = word.text
-                                if ((artword == 'die' or artword == 'Die') and sentence[n][-1] != 'n') or ((artword == 'der' or artword == 'einer' or artword == 'dieser') and (sentence[n - 2] in ['von', 'in', 'auf', 'ueber', 'unter', 'nach', 'mit'])):
+                                    #print(sentence[n - 1],'oi')
-                                    
+                                    if ((artword == 'die' or artword == 'Die') and sentence[n][-1] != 'n') or ((artword == 'der' or artword == 'einer' or artword == 'dieser') and (sentence[n - 2] in ['von', 'in', 'auf', 'ueber', 'unter', 'nach', 'mit'])):
-                                    if artword == 'der':
+
-                                        Nounthatis[0] = 'die'
+                                        if artword == 'der':
-                                    
+                                            Nounthatis[0] = 'die'
-                                    donothing = 0
+
-                                    if sentence[n + 1] == 'mit':
+                                        donothing = 0
-                                        if sentence[n + 2] == 'den':
+                                        if sentence[n + 1] == 'mit':
-                                            verb = ' hat die '
+                                            if sentence[n + 2] == 'den':
-                                            Whatisnoun = Whatisnoun[2:]
+                                                verb = ' hat die '
-                                        if sentence[n + 2] == 'der':
+                                                Whatisnoun = Whatisnoun[2:]
-                                            verb = ' hat eine '
+                                            if sentence[n + 2] == 'der':
-                                            Whatisnoun = Whatisnoun[2:]
+                                                verb = ' hat eine '
-                                        if sentence[n + 2] != 'der' and sentence[n + 2] != 'den':
+                                                Whatisnoun = Whatisnoun[2:]
-                                            donothing = 1
+                                            if sentence[n + 2] != 'der' and sentence[n + 2] != 'den':
-                                    else:
+                                                donothing = 1
                                        verb = ' ist '
                                    if donothing == 0:
                                        newsentence = ' '.join(Nounthatis) + verb + ' '.join(Whatisnoun)
                                        newsentences.append([hauptindex + 1, newsentence.split()])
                                        newpunctuations.append([hauptindex + 1, punctuations[hauptindex]])
                                        if longerWhatisnoun == 0:
                                            Whatisofnouns.append([n + 1, n + 4, hauptindex])
                                        else:
-                                            Whatisofnouns.append([n + 1, n + 5, hauptindex])
+                                            verb = ' ist '
                                        if donothing == 0:
                                            newsentence = ' '.join(Nounthatis) + verb + ' '.join(Whatisnoun)
                                            newsentences.append([hauptindex + 1, newsentence.split()])
                                            newpunctuations.append([hauptindex + 1, punctuations[hauptindex]])
                                            if longerWhatisnoun == 0:
                                                Whatisofnouns.append([n + 1, n + 4, hauptindex])
                                            else:
                                                Whatisofnouns.append([n + 1, n + 5, hauptindex])
            except:
                print('Konnte nicht ' + str(sentence) + 'in Characterisierung pro Satz prozessieren..')
        try:        
            for whatis in Whatisofnouns[::-1]:
                thereisacomma = 0
                #print(sentences[whatis[2]][whatis[1] - 1])
                if sentences[whatis[2]][whatis[1] - 1][-1] == ',':
                    thereisacomma = 1
                if thereisacomma == 1:
                    #print(sentences[whatis[2]][whatis[0] - 1])
                    sentences[whatis[2]][whatis[0] - 1] = sentences[whatis[2]][whatis[0] - 1] + ','
                del sentences[whatis[2]][whatis[0]:whatis[1]]
            for newsent in newsentences[::-1]:
                sentences.insert(newsent[0], newsent[1])
            for newpunct in newpunctuations[::-1]:
                punctuations.insert(newpunct[0], newpunct[1])
            for sentence in sentences:
                if sentence[-1][-1] == ',':
                    sentence[-1] = sentence[-1][:-1]
        except:
            print('konnte nicht die gesammelten Characterisierungen prozessieren')
            sentences = oldsentences
            punctuations = oldpunctuations
        for whatis in Whatisofnouns[::-1]:
            thereisacomma = 0
            #print(sentences[whatis[2]][whatis[1] - 1])
            if sentences[whatis[2]][whatis[1] - 1][-1] == ',':
                thereisacomma = 1
            if thereisacomma == 1:
                #print(sentences[whatis[2]][whatis[0] - 1])
                sentences[whatis[2]][whatis[0] - 1] = sentences[whatis[2]][whatis[0] - 1] + ','
            del sentences[whatis[2]][whatis[0]:whatis[1]]
        for newsent in newsentences[::-1]:
            sentences.insert(newsent[0], newsent[1])
        for newpunct in newpunctuations[::-1]:
            punctuations.insert(newpunct[0], newpunct[1])
        for sentence in sentences:
            if sentence[-1][-1] == ',':
                sentence[-1] = sentence[-1][:-1]
        return sentences, punctuations
--- a/Prototyp/Verbesserungen/Input142.txt
+++ b/Prototyp/Verbesserungen/Input142.txt
@ -0,0 +1,3 @@
 er hat als Trainer im Fußball gearbeitet, bis er nicht mehr konnte .
 seine Beine schmerzten zu sehr und er konnte nicht mehr lange stehen .
 außerdem tat ihm auch seine Stimme weh, denn er musste immer soviel schreien, weil die Kinder nicht richtig Fußball spielten .
--- a/Prototyp/Verbesserungen/Output142.txt
+++ b/Prototyp/Verbesserungen/Output142.txt
@ -0,0 +1,11 @@
 er hat als Trainer im Fuss·ball gearbeitet .
 ein Trainer ist eine Person .
 ein Trainer leitet Menschen an .
 zum Beispiel beim Sport .
 das Wort Fuss·ball kann 3 verschiedene Bedeutungen haben .
 Fuss·ball - die Ball-sport-art Fuss·ball - eine Zeitung Fuss·ball - das Sport-Geraet .
 bis er geringerer konnte .
 seine Beine schmerzten zu sehr und er konnte geringerer lange stehen .
 ausserdem tat ihm auch seine Stimme weh .
 denn er musste immer soviel schreien .
 weil die Kinder pseudo Fuss·ball spielten .
--- a/Prototyp/Verbesserungen/Verbesserungen142.txt
+++ b/Prototyp/Verbesserungen/Verbesserungen142.txt
@ -0,0 +1,12 @@
 er hat als Trainer im Fuss·ball gearbeitet .
 ein Trainer ist eine Person .
 ein Trainer leitet Menschen an .
 zum Beispiel beim Sport .
 das Wort Fuss·ball kann 3 verschiedene Bedeutungen haben .
 Fuss·ball - die Ball•sport•art, Fuss·ball - eine Zeitung, Fuss·ball - das Sport•gerät .
 er konnte nicht mehr .
 seine Beine schmerzten zu sehr .
 und er konnte nicht mehr lange stehen .
 ausserdem tat ihm auch seine Stimme weh .
 denn er musste immer soviel schreien .
 wenn die Kinder Fuss·ball spielten .
--- a/Prototyp/Verbesserungen/indexDerVerbesserungen.txt
+++ b/Prototyp/Verbesserungen/indexDerVerbesserungen.txt
@ -1 +1 @@
-141
+142
--- a/Prototyp/pycache/FremdWB.cpython-35.pyc
+++ b/Prototyp/pycache/FremdWB.cpython-35.pyc
--- a/Prototyp/pycache/SentSeg.cpython-35.pyc
+++ b/Prototyp/pycache/SentSeg.cpython-35.pyc
--- a/Prototyp/bagofwordshkldbFremd_WB1.pkl
+++ b/Prototyp/bagofwordshkldbFremd_WB1.pkl
--- a/Prototyp/bagofwordshkldbFremd_WB2.pkl
+++ b/Prototyp/bagofwordshkldbFremd_WB2.pkl
--- a/Prototyp/voilastate.db
+++ b/Prototyp/voilastate.db