diff --git a/Prototyp/.ipynb_checkpoints/Prototype-checkpoint.ipynb b/Prototyp/.ipynb_checkpoints/Prototype-checkpoint.ipynb
index a9d5188a..70925802 100644
--- a/Prototyp/.ipynb_checkpoints/Prototype-checkpoint.ipynb
+++ b/Prototyp/.ipynb_checkpoints/Prototype-checkpoint.ipynb
@@ -36,7 +36,26 @@
{
"data": {
"text/html": [
- "\n"
+ "\n",
+ "
\n",
+ "\n",
+ "\n",
+ "\n",
+ "Du kommst zu dem Code - wenn du auf das Logo klickst! \n",
+ "\n"
],
"text/plain": [
""
@@ -48,7 +67,35 @@
],
"source": [
"%%html\n",
- ""
+ "\n",
+ "\n",
+ "\n",
+ "\n",
+ "\n",
+ "Du kommst zu dem Code - wenn du auf das Logo klickst! \n",
+ ""
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "\n"
]
},
{
@@ -66,7 +113,8 @@
" font-family: courier;\n",
" }\n",
"\n",
- "\n"
+ "\n",
+ "\n"
],
"text/plain": [
""
@@ -85,7 +133,8 @@
" font-family: courier;\n",
" }\n",
"\n",
- "\n"
+ "\n",
+ "\n"
]
},
{
@@ -188,10 +237,27 @@
"scrolled": true
},
"outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "loading SentSeg Databases\n",
+ "Creating the bag of words...\n",
+ "\n",
+ "dumping the data to hkl format..\n",
+ "done\n",
+ "Creating the bag of words...\n",
+ "\n",
+ "dumping the data to hkl format..\n",
+ "done\n",
+ "dumping the session\n",
+ "done\n"
+ ]
+ },
{
"data": {
"application/vnd.jupyter.widget-view+json": {
- "model_id": "c0fcb1a9556e4d54a43fd7a969210844",
+ "model_id": "6793c5121aaf498e8960726a40709e19",
"version_major": 2,
"version_minor": 0
},
@@ -249,6 +315,15 @@
"import dill\n",
"dill.load_session('voilastate.db')\n",
"\n",
+ "#import SentSeg\n",
+ "#sent_seg = SentSeg.SentSeg('de')\n",
+ "#print('loading SentSeg Databases')\n",
+ "#sent_seg.LoadSentGlueSGDandGSUtils()\n",
+ "\n",
+ "#from FremdWB import *\n",
+ "#fwb = FremdWB(None,None)\n",
+ "#fwb.load_DB_into_FASTsearch()\n",
+ "\n",
"#from Medio import *\n",
"#medi = Medio(None,None)\n",
"#medi.load_DB_into_FASTsearch()\n",
@@ -395,7 +470,7 @@
{
"data": {
"application/vnd.jupyter.widget-view+json": {
- "model_id": "f49a3f799a884277ab40f1839c8c1afd",
+ "model_id": "c833de5ff5d340bbb1988584eee0c368",
"version_major": 2,
"version_minor": 0
},
@@ -594,7 +669,7 @@
{
"data": {
"application/vnd.jupyter.widget-view+json": {
- "model_id": "4c00c7b9b76e493481cb078f77f50258",
+ "model_id": "aa136a24ef044b4fb5d10f6c9278d35f",
"version_major": 2,
"version_minor": 0
},
@@ -634,7 +709,7 @@
{
"data": {
"application/vnd.jupyter.widget-view+json": {
- "model_id": "9e7fab660d534ed7925dd0d21af74957",
+ "model_id": "1474be19da7a4b1bbd7fee229dd5a8ee",
"version_major": 2,
"version_minor": 0
},
@@ -660,7 +735,7 @@
"metadata": {},
"outputs": [],
"source": [
- "beschreibung = 'Textfeld für die verbesserte Übersetzung. Satzzeichen bitte nicht vergessen! \\nFalls ein neuer Eintrag ins Fremdwörterbuch vorgenommen werden soll, bitte unter die Verbesserungen einen Eintrag der Form: [\\'Abfall\\', \\'Abfall sind Sachen die wir nicht mehr brauchen\\']' \n",
+ "beschreibung = 'Textfeld für die verbesserte Übersetzung. Satzzeichen bitte nicht vergessen! \\nFalls ein neuer Eintrag ins Fremdwörterbuch vorgenommen werden soll, schreibe einfach unter die Verbesserungen einen Eintrag der Form: [\\'Abfall\\', \\'Abfall sind Sachen die wir nicht mehr brauchen\\'] \\nFalls ein neuer Eintrag in die Mediopunkte-Datenbank vorgenommen werden soll, bitte schreibe unter die Verbesserungen einen Eintrag der Form: [\\'Mediopunkt\\', \\'Medio·punkt\\']' \n",
"VerbeTextFeld = ipywidgets.Textarea(placeholder=beschreibung, disabled=False)\n",
"VerbeTextFeld.layout.height = '180px'\n",
"VerbeTextFeld.layout.width = '99%'\n",
@@ -756,7 +831,7 @@
{
"data": {
"application/vnd.jupyter.widget-view+json": {
- "model_id": "6408b21ab24c482fa3ddc9e047592bb8",
+ "model_id": "c2f3338821ae4ee59d205af8cb1083a8",
"version_major": 2,
"version_minor": 0
},
@@ -782,7 +857,7 @@
{
"data": {
"application/vnd.jupyter.widget-view+json": {
- "model_id": "7702fbe3ca5b4041a3d4e9b5167d8f38",
+ "model_id": "1182e1d2f6c44b9ab0d50c9d388a2765",
"version_major": 2,
"version_minor": 0
},
diff --git a/Prototyp/DataBaseOneZeroshkldbFremd_WB1.hkl b/Prototyp/DataBaseOneZeroshkldbFremd_WB1.hkl
index 55354a87..b8950323 100644
Binary files a/Prototyp/DataBaseOneZeroshkldbFremd_WB1.hkl and b/Prototyp/DataBaseOneZeroshkldbFremd_WB1.hkl differ
diff --git a/Prototyp/DataBaseOneZeroshkldbFremd_WB2.hkl b/Prototyp/DataBaseOneZeroshkldbFremd_WB2.hkl
index d0b00ea3..a1442e0f 100644
Binary files a/Prototyp/DataBaseOneZeroshkldbFremd_WB2.hkl and b/Prototyp/DataBaseOneZeroshkldbFremd_WB2.hkl differ
diff --git a/Prototyp/FremdWB.py b/Prototyp/FremdWB.py
index 48303ea0..041c800e 100644
--- a/Prototyp/FremdWB.py
+++ b/Prototyp/FremdWB.py
@@ -125,54 +125,61 @@ class FremdWB(object):
sentencecount = 0
alleeintraege = []
for sentence in sentences:
- #print('sentence', sentence)
- sentencecount += 1
- #print('processing sentence', sentencecount)
-
- doc = self.nlp(' '.join(sentence))
-
- fremds_of_sentence = []
- count = 0
-
- for word in doc:
- count += 1
-
-
-
- if word.tag_[0] == 'V' or word.tag_[0] == 'N' or word.tag_[0] == 'A':
- fremds_of_sentence.append(word.text)
-
-
- #print(fremds_of_sentence)
- fremdeintraege = []
- for word in fremds_of_sentence:
-
- bestmatches2, matchindex2 = self.fsearch1.search_with_highest_multiplikation_Output(word, 1)
-
-
-
- fremd = self.hkldbFremd_WB1[matchindex2[0]][0].split()
- fremdeintrag = self.hkldbFremd_WB2[matchindex2[0]][0].split()
-
- #print(fremd)
- #print('fremdeintrag', fremdeintrag)
-
- if fremd[0] == word:
- fremdeintraege.append(fremdeintrag)
- #print('fremdeintraege',fremdeintraege)
- outsentences.append(sentence)
-
- for eintrag in fremdeintraege:
- if eintrag[-1][-1] == '.':
- eintrag[-1] = eintrag[-1][:-1]
- if eintrag not in alleeintraege:
- outsentences.append(eintrag)
- punctuations.insert(sentencecount, '.')
- alleeintraege.append(eintrag)
-
-
-
- #print('the endsentence',sentence)
+ oldpunctuations = punctuations
+ try:
+ #print('sentence', sentence)
+ sentencecount += 1
+ #print('processing sentence', sentencecount)
+
+ doc = self.nlp(' '.join(sentence))
+
+ fremds_of_sentence = []
+ count = 0
+
+ for word in doc:
+ count += 1
+
+
+
+ if word.tag_[0] == 'V' or word.tag_[0] == 'N' or word.tag_[0] == 'A':
+ fremds_of_sentence.append(word.text)
+
+
+ #print(fremds_of_sentence)
+ fremdeintraege = []
+ for word in fremds_of_sentence:
+
+ bestmatches2, matchindex2 = self.fsearch1.search_with_highest_multiplikation_Output(word, 1)
+
+
+
+ fremd = self.hkldbFremd_WB1[matchindex2[0]][0].split()
+ fremdeintrag = self.hkldbFremd_WB2[matchindex2[0]][0].split()
+
+ #print(fremd)
+ #print('fremdeintrag', fremdeintrag)
+
+ if fremd[0] == word:
+ fremdeintraege.append(fremdeintrag)
+ #print('fremdeintraege',fremdeintraege)
+ outsentences.append(sentence)
+
+ for eintrag in fremdeintraege:
+ if eintrag[-1][-1] == '.':
+ eintrag[-1] = eintrag[-1][:-1]
+ if eintrag not in alleeintraege:
+ outsentences.append(eintrag)
+ punctuations.insert(sentencecount, '.')
+ alleeintraege.append(eintrag)
+
+
+
+ #print('the endsentence',sentence)
+ except:
+ print('konnte nicht' + str(sentence) + 'in FremdWB prozessieren..')
+ if sentence != outsentences[-1]:
+ outsentences.append(sentence)
+ punctuations = oldpunctuations
return outsentences, punctuations
\ No newline at end of file
diff --git a/Prototyp/Prototype.ipynb b/Prototyp/Prototype.ipynb
index a9d5188a..70925802 100644
--- a/Prototyp/Prototype.ipynb
+++ b/Prototyp/Prototype.ipynb
@@ -36,7 +36,26 @@
{
"data": {
"text/html": [
- "\n"
+ "\n",
+ "\n",
+ "\n",
+ "\n",
+ "\n",
+ "Du kommst zu dem Code - wenn du auf das Logo klickst! \n",
+ "\n"
],
"text/plain": [
""
@@ -48,7 +67,35 @@
],
"source": [
"%%html\n",
- ""
+ "\n",
+ "\n",
+ "\n",
+ "\n",
+ "\n",
+ "Du kommst zu dem Code - wenn du auf das Logo klickst! \n",
+ ""
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "\n"
]
},
{
@@ -66,7 +113,8 @@
" font-family: courier;\n",
" }\n",
"\n",
- "\n"
+ "\n",
+ "\n"
],
"text/plain": [
""
@@ -85,7 +133,8 @@
" font-family: courier;\n",
" }\n",
"\n",
- "\n"
+ "\n",
+ "\n"
]
},
{
@@ -188,10 +237,27 @@
"scrolled": true
},
"outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "loading SentSeg Databases\n",
+ "Creating the bag of words...\n",
+ "\n",
+ "dumping the data to hkl format..\n",
+ "done\n",
+ "Creating the bag of words...\n",
+ "\n",
+ "dumping the data to hkl format..\n",
+ "done\n",
+ "dumping the session\n",
+ "done\n"
+ ]
+ },
{
"data": {
"application/vnd.jupyter.widget-view+json": {
- "model_id": "c0fcb1a9556e4d54a43fd7a969210844",
+ "model_id": "6793c5121aaf498e8960726a40709e19",
"version_major": 2,
"version_minor": 0
},
@@ -249,6 +315,15 @@
"import dill\n",
"dill.load_session('voilastate.db')\n",
"\n",
+ "#import SentSeg\n",
+ "#sent_seg = SentSeg.SentSeg('de')\n",
+ "#print('loading SentSeg Databases')\n",
+ "#sent_seg.LoadSentGlueSGDandGSUtils()\n",
+ "\n",
+ "#from FremdWB import *\n",
+ "#fwb = FremdWB(None,None)\n",
+ "#fwb.load_DB_into_FASTsearch()\n",
+ "\n",
"#from Medio import *\n",
"#medi = Medio(None,None)\n",
"#medi.load_DB_into_FASTsearch()\n",
@@ -395,7 +470,7 @@
{
"data": {
"application/vnd.jupyter.widget-view+json": {
- "model_id": "f49a3f799a884277ab40f1839c8c1afd",
+ "model_id": "c833de5ff5d340bbb1988584eee0c368",
"version_major": 2,
"version_minor": 0
},
@@ -594,7 +669,7 @@
{
"data": {
"application/vnd.jupyter.widget-view+json": {
- "model_id": "4c00c7b9b76e493481cb078f77f50258",
+ "model_id": "aa136a24ef044b4fb5d10f6c9278d35f",
"version_major": 2,
"version_minor": 0
},
@@ -634,7 +709,7 @@
{
"data": {
"application/vnd.jupyter.widget-view+json": {
- "model_id": "9e7fab660d534ed7925dd0d21af74957",
+ "model_id": "1474be19da7a4b1bbd7fee229dd5a8ee",
"version_major": 2,
"version_minor": 0
},
@@ -660,7 +735,7 @@
"metadata": {},
"outputs": [],
"source": [
- "beschreibung = 'Textfeld für die verbesserte Übersetzung. Satzzeichen bitte nicht vergessen! \\nFalls ein neuer Eintrag ins Fremdwörterbuch vorgenommen werden soll, bitte unter die Verbesserungen einen Eintrag der Form: [\\'Abfall\\', \\'Abfall sind Sachen die wir nicht mehr brauchen\\']' \n",
+ "beschreibung = 'Textfeld für die verbesserte Übersetzung. Satzzeichen bitte nicht vergessen! \\nFalls ein neuer Eintrag ins Fremdwörterbuch vorgenommen werden soll, schreibe einfach unter die Verbesserungen einen Eintrag der Form: [\\'Abfall\\', \\'Abfall sind Sachen die wir nicht mehr brauchen\\'] \\nFalls ein neuer Eintrag in die Mediopunkte-Datenbank vorgenommen werden soll, bitte schreibe unter die Verbesserungen einen Eintrag der Form: [\\'Mediopunkt\\', \\'Medio·punkt\\']' \n",
"VerbeTextFeld = ipywidgets.Textarea(placeholder=beschreibung, disabled=False)\n",
"VerbeTextFeld.layout.height = '180px'\n",
"VerbeTextFeld.layout.width = '99%'\n",
@@ -756,7 +831,7 @@
{
"data": {
"application/vnd.jupyter.widget-view+json": {
- "model_id": "6408b21ab24c482fa3ddc9e047592bb8",
+ "model_id": "c2f3338821ae4ee59d205af8cb1083a8",
"version_major": 2,
"version_minor": 0
},
@@ -782,7 +857,7 @@
{
"data": {
"application/vnd.jupyter.widget-view+json": {
- "model_id": "7702fbe3ca5b4041a3d4e9b5167d8f38",
+ "model_id": "1182e1d2f6c44b9ab0d50c9d388a2765",
"version_major": 2,
"version_minor": 0
},
diff --git a/Prototyp/SentSeg.py b/Prototyp/SentSeg.py
index 9ac4b6c6..ce678ea3 100644
--- a/Prototyp/SentSeg.py
+++ b/Prototyp/SentSeg.py
@@ -2110,96 +2110,107 @@ class SentSeg(object):
newsentences = []
newpunctuations = []
Whatisofnouns = []
+ oldsentences = sentences
+ oldpunctuations = punctuations
for hauptindex in range(len(sentences)):
+
sentence = sentences[hauptindex]
- #for triple in triples:
- # AnnoOrNot, tripleInWords = gs.checkForAnnotationTriple(sentence, triple, 'word.tag_', 'None')
- # for tripleinwor in tripleInWords:
- # triplestochange.append([triple, tripleinwor])
-
- for quadruple in quadruples:
- AnnoOrNot, quadrupleInWords = gs.checkForAnnotationQuadruple(sentence, quadruple, 'word.tag_', 'None')
- #print('quadinwords', quadrupleInWords)
- #print('ANNOORNOT', AnnoOrNot)
- for quadrupleInWo in quadrupleInWords:
- quadruplestochange.append([quadruple, quadrupleInWo])
-
- #print('quadstochange',quadruplestochange)
- for quad in quadruplestochange:
- for n in range(len(sentence) - 4):
- if sentence[n] == quad[1][0]:
- if sentence[n + 1] == quad[1][1]:
- if sentence[n + 2] == quad[1][2]:
- artword = None
- longerWhatisnoun = 0
- for m in range(2):
- for word in self.nlp(sentence[n - m]):
- if word.tag_ == 'ART':
- Nounthatis = sentence[n - m:n + 1]
- import spacy
- nlp = spacy.load('de_core_news_sm')
- token3 = nlp(sentence[n+4])
- counter = 0
- Whatisnoun = sentence[n + 1:n + 4]
- for wor in token3:
- counter += 1
- if wor.tag_ == 'NN' or wor.tag_ == 'NE':
- if counter == 1:
- Whatisnoun = sentence[n + 1:n + 5]
- longerWhatisnoun = 1
- if counter == 2:
- Whatisnoun = sentence[n + 1:n + 4]
-
-
-
- artword = word.text
- #print(sentence[n - 1],'oi')
- if ((artword == 'die' or artword == 'Die') and sentence[n][-1] != 'n') or ((artword == 'der' or artword == 'einer' or artword == 'dieser') and (sentence[n - 2] in ['von', 'in', 'auf', 'ueber', 'unter', 'nach', 'mit'])):
-
- if artword == 'der':
- Nounthatis[0] = 'die'
-
- donothing = 0
- if sentence[n + 1] == 'mit':
- if sentence[n + 2] == 'den':
- verb = ' hat die '
- Whatisnoun = Whatisnoun[2:]
- if sentence[n + 2] == 'der':
- verb = ' hat eine '
- Whatisnoun = Whatisnoun[2:]
- if sentence[n + 2] != 'der' and sentence[n + 2] != 'den':
- donothing = 1
- else:
- verb = ' ist '
- if donothing == 0:
- newsentence = ' '.join(Nounthatis) + verb + ' '.join(Whatisnoun)
-
-
- newsentences.append([hauptindex + 1, newsentence.split()])
- newpunctuations.append([hauptindex + 1, punctuations[hauptindex]])
- if longerWhatisnoun == 0:
- Whatisofnouns.append([n + 1, n + 4, hauptindex])
+ try:
+ #for triple in triples:
+ # AnnoOrNot, tripleInWords = gs.checkForAnnotationTriple(sentence, triple, 'word.tag_', 'None')
+ # for tripleinwor in tripleInWords:
+ # triplestochange.append([triple, tripleinwor])
+
+ for quadruple in quadruples:
+ AnnoOrNot, quadrupleInWords = gs.checkForAnnotationQuadruple(sentence, quadruple, 'word.tag_', 'None')
+ #print('quadinwords', quadrupleInWords)
+ #print('ANNOORNOT', AnnoOrNot)
+ for quadrupleInWo in quadrupleInWords:
+ quadruplestochange.append([quadruple, quadrupleInWo])
+
+ #print('quadstochange',quadruplestochange)
+ for quad in quadruplestochange:
+ for n in range(len(sentence) - 4):
+ if sentence[n] == quad[1][0]:
+ if sentence[n + 1] == quad[1][1]:
+ if sentence[n + 2] == quad[1][2]:
+ artword = None
+ longerWhatisnoun = 0
+ for m in range(2):
+ for word in self.nlp(sentence[n - m]):
+ if word.tag_ == 'ART':
+ Nounthatis = sentence[n - m:n + 1]
+ import spacy
+ nlp = spacy.load('de_core_news_sm')
+ token3 = nlp(sentence[n+4])
+ counter = 0
+ Whatisnoun = sentence[n + 1:n + 4]
+ for wor in token3:
+ counter += 1
+ if wor.tag_ == 'NN' or wor.tag_ == 'NE':
+ if counter == 1:
+ Whatisnoun = sentence[n + 1:n + 5]
+ longerWhatisnoun = 1
+ if counter == 2:
+ Whatisnoun = sentence[n + 1:n + 4]
+
+
+
+ artword = word.text
+ #print(sentence[n - 1],'oi')
+ if ((artword == 'die' or artword == 'Die') and sentence[n][-1] != 'n') or ((artword == 'der' or artword == 'einer' or artword == 'dieser') and (sentence[n - 2] in ['von', 'in', 'auf', 'ueber', 'unter', 'nach', 'mit'])):
+
+ if artword == 'der':
+ Nounthatis[0] = 'die'
+
+ donothing = 0
+ if sentence[n + 1] == 'mit':
+ if sentence[n + 2] == 'den':
+ verb = ' hat die '
+ Whatisnoun = Whatisnoun[2:]
+ if sentence[n + 2] == 'der':
+ verb = ' hat eine '
+ Whatisnoun = Whatisnoun[2:]
+ if sentence[n + 2] != 'der' and sentence[n + 2] != 'den':
+ donothing = 1
else:
- Whatisofnouns.append([n + 1, n + 5, hauptindex])
+ verb = ' ist '
+ if donothing == 0:
+ newsentence = ' '.join(Nounthatis) + verb + ' '.join(Whatisnoun)
+
+
+ newsentences.append([hauptindex + 1, newsentence.split()])
+ newpunctuations.append([hauptindex + 1, punctuations[hauptindex]])
+ if longerWhatisnoun == 0:
+ Whatisofnouns.append([n + 1, n + 4, hauptindex])
+ else:
+ Whatisofnouns.append([n + 1, n + 5, hauptindex])
+ except:
+ print('Konnte nicht ' + str(sentence) + 'in Characterisierung pro Satz prozessieren..')
+ try:
+ for whatis in Whatisofnouns[::-1]:
+ thereisacomma = 0
+ #print(sentences[whatis[2]][whatis[1] - 1])
+ if sentences[whatis[2]][whatis[1] - 1][-1] == ',':
+
+ thereisacomma = 1
+ if thereisacomma == 1:
+ #print(sentences[whatis[2]][whatis[0] - 1])
+ sentences[whatis[2]][whatis[0] - 1] = sentences[whatis[2]][whatis[0] - 1] + ','
+ del sentences[whatis[2]][whatis[0]:whatis[1]]
+ for newsent in newsentences[::-1]:
+ sentences.insert(newsent[0], newsent[1])
+ for newpunct in newpunctuations[::-1]:
+ punctuations.insert(newpunct[0], newpunct[1])
+ for sentence in sentences:
+ if sentence[-1][-1] == ',':
+ sentence[-1] = sentence[-1][:-1]
+ except:
+ print('konnte nicht die gesammelten Characterisierungen prozessieren')
+ sentences = oldsentences
+ punctuations = oldpunctuations
+
- for whatis in Whatisofnouns[::-1]:
- thereisacomma = 0
- #print(sentences[whatis[2]][whatis[1] - 1])
- if sentences[whatis[2]][whatis[1] - 1][-1] == ',':
-
- thereisacomma = 1
- if thereisacomma == 1:
- #print(sentences[whatis[2]][whatis[0] - 1])
- sentences[whatis[2]][whatis[0] - 1] = sentences[whatis[2]][whatis[0] - 1] + ','
- del sentences[whatis[2]][whatis[0]:whatis[1]]
- for newsent in newsentences[::-1]:
- sentences.insert(newsent[0], newsent[1])
- for newpunct in newpunctuations[::-1]:
- punctuations.insert(newpunct[0], newpunct[1])
- for sentence in sentences:
- if sentence[-1][-1] == ',':
- sentence[-1] = sentence[-1][:-1]
-
return sentences, punctuations
diff --git a/Prototyp/Verbesserungen/Input142.txt b/Prototyp/Verbesserungen/Input142.txt
new file mode 100644
index 00000000..1338013b
--- /dev/null
+++ b/Prototyp/Verbesserungen/Input142.txt
@@ -0,0 +1,3 @@
+er hat als Trainer im Fußball gearbeitet, bis er nicht mehr konnte .
+seine Beine schmerzten zu sehr und er konnte nicht mehr lange stehen .
+außerdem tat ihm auch seine Stimme weh, denn er musste immer soviel schreien, weil die Kinder nicht richtig Fußball spielten .
diff --git a/Prototyp/Verbesserungen/Output142.txt b/Prototyp/Verbesserungen/Output142.txt
new file mode 100644
index 00000000..8dfb85a8
--- /dev/null
+++ b/Prototyp/Verbesserungen/Output142.txt
@@ -0,0 +1,11 @@
+er hat als Trainer im Fuss·ball gearbeitet .
+ein Trainer ist eine Person .
+ein Trainer leitet Menschen an .
+zum Beispiel beim Sport .
+das Wort Fuss·ball kann 3 verschiedene Bedeutungen haben .
+Fuss·ball - die Ball-sport-art Fuss·ball - eine Zeitung Fuss·ball - das Sport-Geraet .
+bis er geringerer konnte .
+seine Beine schmerzten zu sehr und er konnte geringerer lange stehen .
+ausserdem tat ihm auch seine Stimme weh .
+denn er musste immer soviel schreien .
+weil die Kinder pseudo Fuss·ball spielten .
diff --git a/Prototyp/Verbesserungen/Verbesserungen142.txt b/Prototyp/Verbesserungen/Verbesserungen142.txt
new file mode 100644
index 00000000..775a207e
--- /dev/null
+++ b/Prototyp/Verbesserungen/Verbesserungen142.txt
@@ -0,0 +1,12 @@
+er hat als Trainer im Fuss·ball gearbeitet .
+ein Trainer ist eine Person .
+ein Trainer leitet Menschen an .
+zum Beispiel beim Sport .
+das Wort Fuss·ball kann 3 verschiedene Bedeutungen haben .
+Fuss·ball - die Ball•sport•art, Fuss·ball - eine Zeitung, Fuss·ball - das Sport•gerät .
+er konnte nicht mehr .
+seine Beine schmerzten zu sehr .
+und er konnte nicht mehr lange stehen .
+ausserdem tat ihm auch seine Stimme weh .
+denn er musste immer soviel schreien .
+wenn die Kinder Fuss·ball spielten .
diff --git a/Prototyp/Verbesserungen/indexDerVerbesserungen.txt b/Prototyp/Verbesserungen/indexDerVerbesserungen.txt
index acfba609..83248fb9 100644
--- a/Prototyp/Verbesserungen/indexDerVerbesserungen.txt
+++ b/Prototyp/Verbesserungen/indexDerVerbesserungen.txt
@@ -1 +1 @@
-141
\ No newline at end of file
+142
\ No newline at end of file
diff --git a/Prototyp/__pycache__/FremdWB.cpython-35.pyc b/Prototyp/__pycache__/FremdWB.cpython-35.pyc
index dfdc2d42..a4630b77 100644
Binary files a/Prototyp/__pycache__/FremdWB.cpython-35.pyc and b/Prototyp/__pycache__/FremdWB.cpython-35.pyc differ
diff --git a/Prototyp/__pycache__/SentSeg.cpython-35.pyc b/Prototyp/__pycache__/SentSeg.cpython-35.pyc
index 061be259..adc69d8a 100644
Binary files a/Prototyp/__pycache__/SentSeg.cpython-35.pyc and b/Prototyp/__pycache__/SentSeg.cpython-35.pyc differ
diff --git a/Prototyp/bagofwordshkldbFremd_WB1.pkl b/Prototyp/bagofwordshkldbFremd_WB1.pkl
index 13f506fc..046aaad7 100644
Binary files a/Prototyp/bagofwordshkldbFremd_WB1.pkl and b/Prototyp/bagofwordshkldbFremd_WB1.pkl differ
diff --git a/Prototyp/bagofwordshkldbFremd_WB2.pkl b/Prototyp/bagofwordshkldbFremd_WB2.pkl
index 9d091fb1..91425417 100644
Binary files a/Prototyp/bagofwordshkldbFremd_WB2.pkl and b/Prototyp/bagofwordshkldbFremd_WB2.pkl differ
diff --git a/Prototyp/voilastate.db b/Prototyp/voilastate.db
index 9b0d61ea..5a539940 100644
Binary files a/Prototyp/voilastate.db and b/Prototyp/voilastate.db differ