Link zum Code eingefuegt | Verbessertes Errorhandling fuer zwei FremdWB und CharAppend

This commit is contained in:
alpcentaur 2020-09-06 01:33:50 +02:00
parent 2487d6de07
commit 441eeed1d6
15 changed files with 351 additions and 157 deletions

View file

@ -36,7 +36,26 @@
{ {
"data": { "data": {
"text/html": [ "text/html": [
"<img src=\"brainBasaBuuka5.png\" width=\"300\" height=\"200\" align=\"center\"/>\n" "<style>\n",
"\n",
".center {\n",
" display: block;\n",
" margin-left: auto;\n",
" margin-right: auto;\n",
" width: 20%;\n",
"}\n",
"\n",
"body {\n",
" align: center;\n",
" }\n",
"\n",
"</style>\n",
"<body>\n",
"<a href=\"https://basabuuka.zapto.org/alpcentaur/Basabuuka_Prototyp\">\n",
"<img src=\"brainBasaBuuka5.png\" width=\"300\" height=\"200\" class=\"center\"/>\n",
"</a>\n",
"<p style=\"font-family: courier\"><center><b>Du kommst zu dem Code - wenn du auf das Logo klickst!</b></center></p> \n",
"</body>\n"
], ],
"text/plain": [ "text/plain": [
"<IPython.core.display.HTML object>" "<IPython.core.display.HTML object>"
@ -48,7 +67,35 @@
], ],
"source": [ "source": [
"%%html\n", "%%html\n",
"<img src=\"brainBasaBuuka5.png\" width=\"300\" height=\"200\" align=\"center\"/>" "<style>\n",
"\n",
".center {\n",
" display: block;\n",
" margin-left: auto;\n",
" margin-right: auto;\n",
" width: 20%;\n",
"}\n",
"\n",
"body {\n",
" align: center;\n",
" }\n",
"\n",
"</style>\n",
"<body>\n",
"<a href=\"https://basabuuka.zapto.org/alpcentaur/Basabuuka_Prototyp\">\n",
"<img src=\"brainBasaBuuka5.png\" width=\"300\" height=\"200\" class=\"center\"/>\n",
"</a>\n",
"<p style=\"font-family: courier\"><center><b>Du kommst zu dem Code - wenn du auf das Logo klickst!</b></center></p> \n",
"</body>"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"\n"
] ]
}, },
{ {
@ -66,7 +113,8 @@
" font-family: courier;\n", " font-family: courier;\n",
" }\n", " }\n",
"\n", "\n",
"</style>\n" "</style>\n",
"\n"
], ],
"text/plain": [ "text/plain": [
"<IPython.core.display.HTML object>" "<IPython.core.display.HTML object>"
@ -85,7 +133,8 @@
" font-family: courier;\n", " font-family: courier;\n",
" }\n", " }\n",
"\n", "\n",
"</style>\n" "</style>\n",
"\n"
] ]
}, },
{ {
@ -188,10 +237,27 @@
"scrolled": true "scrolled": true
}, },
"outputs": [ "outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"loading SentSeg Databases\n",
"Creating the bag of words...\n",
"\n",
"dumping the data to hkl format..\n",
"done\n",
"Creating the bag of words...\n",
"\n",
"dumping the data to hkl format..\n",
"done\n",
"dumping the session\n",
"done\n"
]
},
{ {
"data": { "data": {
"application/vnd.jupyter.widget-view+json": { "application/vnd.jupyter.widget-view+json": {
"model_id": "c0fcb1a9556e4d54a43fd7a969210844", "model_id": "6793c5121aaf498e8960726a40709e19",
"version_major": 2, "version_major": 2,
"version_minor": 0 "version_minor": 0
}, },
@ -249,6 +315,15 @@
"import dill\n", "import dill\n",
"dill.load_session('voilastate.db')\n", "dill.load_session('voilastate.db')\n",
"\n", "\n",
"#import SentSeg\n",
"#sent_seg = SentSeg.SentSeg('de')\n",
"#print('loading SentSeg Databases')\n",
"#sent_seg.LoadSentGlueSGDandGSUtils()\n",
"\n",
"#from FremdWB import *\n",
"#fwb = FremdWB(None,None)\n",
"#fwb.load_DB_into_FASTsearch()\n",
"\n",
"#from Medio import *\n", "#from Medio import *\n",
"#medi = Medio(None,None)\n", "#medi = Medio(None,None)\n",
"#medi.load_DB_into_FASTsearch()\n", "#medi.load_DB_into_FASTsearch()\n",
@ -395,7 +470,7 @@
{ {
"data": { "data": {
"application/vnd.jupyter.widget-view+json": { "application/vnd.jupyter.widget-view+json": {
"model_id": "f49a3f799a884277ab40f1839c8c1afd", "model_id": "c833de5ff5d340bbb1988584eee0c368",
"version_major": 2, "version_major": 2,
"version_minor": 0 "version_minor": 0
}, },
@ -594,7 +669,7 @@
{ {
"data": { "data": {
"application/vnd.jupyter.widget-view+json": { "application/vnd.jupyter.widget-view+json": {
"model_id": "4c00c7b9b76e493481cb078f77f50258", "model_id": "aa136a24ef044b4fb5d10f6c9278d35f",
"version_major": 2, "version_major": 2,
"version_minor": 0 "version_minor": 0
}, },
@ -634,7 +709,7 @@
{ {
"data": { "data": {
"application/vnd.jupyter.widget-view+json": { "application/vnd.jupyter.widget-view+json": {
"model_id": "9e7fab660d534ed7925dd0d21af74957", "model_id": "1474be19da7a4b1bbd7fee229dd5a8ee",
"version_major": 2, "version_major": 2,
"version_minor": 0 "version_minor": 0
}, },
@ -660,7 +735,7 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"beschreibung = 'Textfeld für die verbesserte Übersetzung. Satzzeichen bitte nicht vergessen! \\nFalls ein neuer Eintrag ins Fremdwörterbuch vorgenommen werden soll, bitte unter die Verbesserungen einen Eintrag der Form: [\\'Abfall\\', \\'Abfall sind Sachen die wir nicht mehr brauchen\\']' \n", "beschreibung = 'Textfeld für die verbesserte Übersetzung. Satzzeichen bitte nicht vergessen! \\nFalls ein neuer Eintrag ins Fremdwörterbuch vorgenommen werden soll, schreibe einfach unter die Verbesserungen einen Eintrag der Form: [\\'Abfall\\', \\'Abfall sind Sachen die wir nicht mehr brauchen\\'] \\nFalls ein neuer Eintrag in die Mediopunkte-Datenbank vorgenommen werden soll, bitte schreibe unter die Verbesserungen einen Eintrag der Form: [\\'Mediopunkt\\', \\'Medio·punkt\\']' \n",
"VerbeTextFeld = ipywidgets.Textarea(placeholder=beschreibung, disabled=False)\n", "VerbeTextFeld = ipywidgets.Textarea(placeholder=beschreibung, disabled=False)\n",
"VerbeTextFeld.layout.height = '180px'\n", "VerbeTextFeld.layout.height = '180px'\n",
"VerbeTextFeld.layout.width = '99%'\n", "VerbeTextFeld.layout.width = '99%'\n",
@ -756,7 +831,7 @@
{ {
"data": { "data": {
"application/vnd.jupyter.widget-view+json": { "application/vnd.jupyter.widget-view+json": {
"model_id": "6408b21ab24c482fa3ddc9e047592bb8", "model_id": "c2f3338821ae4ee59d205af8cb1083a8",
"version_major": 2, "version_major": 2,
"version_minor": 0 "version_minor": 0
}, },
@ -782,7 +857,7 @@
{ {
"data": { "data": {
"application/vnd.jupyter.widget-view+json": { "application/vnd.jupyter.widget-view+json": {
"model_id": "7702fbe3ca5b4041a3d4e9b5167d8f38", "model_id": "1182e1d2f6c44b9ab0d50c9d388a2765",
"version_major": 2, "version_major": 2,
"version_minor": 0 "version_minor": 0
}, },

View file

@ -125,54 +125,61 @@ class FremdWB(object):
sentencecount = 0 sentencecount = 0
alleeintraege = [] alleeintraege = []
for sentence in sentences: for sentence in sentences:
#print('sentence', sentence) oldpunctuations = punctuations
sentencecount += 1 try:
#print('processing sentence', sentencecount) #print('sentence', sentence)
sentencecount += 1
doc = self.nlp(' '.join(sentence)) #print('processing sentence', sentencecount)
fremds_of_sentence = [] doc = self.nlp(' '.join(sentence))
count = 0
fremds_of_sentence = []
for word in doc: count = 0
count += 1
for word in doc:
count += 1
if word.tag_[0] == 'V' or word.tag_[0] == 'N' or word.tag_[0] == 'A':
fremds_of_sentence.append(word.text)
if word.tag_[0] == 'V' or word.tag_[0] == 'N' or word.tag_[0] == 'A':
fremds_of_sentence.append(word.text)
#print(fremds_of_sentence)
fremdeintraege = []
for word in fremds_of_sentence: #print(fremds_of_sentence)
fremdeintraege = []
bestmatches2, matchindex2 = self.fsearch1.search_with_highest_multiplikation_Output(word, 1) for word in fremds_of_sentence:
bestmatches2, matchindex2 = self.fsearch1.search_with_highest_multiplikation_Output(word, 1)
fremd = self.hkldbFremd_WB1[matchindex2[0]][0].split()
fremdeintrag = self.hkldbFremd_WB2[matchindex2[0]][0].split()
fremd = self.hkldbFremd_WB1[matchindex2[0]][0].split()
#print(fremd) fremdeintrag = self.hkldbFremd_WB2[matchindex2[0]][0].split()
#print('fremdeintrag', fremdeintrag)
#print(fremd)
if fremd[0] == word: #print('fremdeintrag', fremdeintrag)
fremdeintraege.append(fremdeintrag)
#print('fremdeintraege',fremdeintraege) if fremd[0] == word:
outsentences.append(sentence) fremdeintraege.append(fremdeintrag)
#print('fremdeintraege',fremdeintraege)
for eintrag in fremdeintraege: outsentences.append(sentence)
if eintrag[-1][-1] == '.':
eintrag[-1] = eintrag[-1][:-1] for eintrag in fremdeintraege:
if eintrag not in alleeintraege: if eintrag[-1][-1] == '.':
outsentences.append(eintrag) eintrag[-1] = eintrag[-1][:-1]
punctuations.insert(sentencecount, '.') if eintrag not in alleeintraege:
alleeintraege.append(eintrag) outsentences.append(eintrag)
punctuations.insert(sentencecount, '.')
alleeintraege.append(eintrag)
#print('the endsentence',sentence)
#print('the endsentence',sentence)
except:
print('konnte nicht' + str(sentence) + 'in FremdWB prozessieren..')
if sentence != outsentences[-1]:
outsentences.append(sentence)
punctuations = oldpunctuations
return outsentences, punctuations return outsentences, punctuations

View file

@ -36,7 +36,26 @@
{ {
"data": { "data": {
"text/html": [ "text/html": [
"<img src=\"brainBasaBuuka5.png\" width=\"300\" height=\"200\" align=\"center\"/>\n" "<style>\n",
"\n",
".center {\n",
" display: block;\n",
" margin-left: auto;\n",
" margin-right: auto;\n",
" width: 20%;\n",
"}\n",
"\n",
"body {\n",
" align: center;\n",
" }\n",
"\n",
"</style>\n",
"<body>\n",
"<a href=\"https://basabuuka.zapto.org/alpcentaur/Basabuuka_Prototyp\">\n",
"<img src=\"brainBasaBuuka5.png\" width=\"300\" height=\"200\" class=\"center\"/>\n",
"</a>\n",
"<p style=\"font-family: courier\"><center><b>Du kommst zu dem Code - wenn du auf das Logo klickst!</b></center></p> \n",
"</body>\n"
], ],
"text/plain": [ "text/plain": [
"<IPython.core.display.HTML object>" "<IPython.core.display.HTML object>"
@ -48,7 +67,35 @@
], ],
"source": [ "source": [
"%%html\n", "%%html\n",
"<img src=\"brainBasaBuuka5.png\" width=\"300\" height=\"200\" align=\"center\"/>" "<style>\n",
"\n",
".center {\n",
" display: block;\n",
" margin-left: auto;\n",
" margin-right: auto;\n",
" width: 20%;\n",
"}\n",
"\n",
"body {\n",
" align: center;\n",
" }\n",
"\n",
"</style>\n",
"<body>\n",
"<a href=\"https://basabuuka.zapto.org/alpcentaur/Basabuuka_Prototyp\">\n",
"<img src=\"brainBasaBuuka5.png\" width=\"300\" height=\"200\" class=\"center\"/>\n",
"</a>\n",
"<p style=\"font-family: courier\"><center><b>Du kommst zu dem Code - wenn du auf das Logo klickst!</b></center></p> \n",
"</body>"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"\n"
] ]
}, },
{ {
@ -66,7 +113,8 @@
" font-family: courier;\n", " font-family: courier;\n",
" }\n", " }\n",
"\n", "\n",
"</style>\n" "</style>\n",
"\n"
], ],
"text/plain": [ "text/plain": [
"<IPython.core.display.HTML object>" "<IPython.core.display.HTML object>"
@ -85,7 +133,8 @@
" font-family: courier;\n", " font-family: courier;\n",
" }\n", " }\n",
"\n", "\n",
"</style>\n" "</style>\n",
"\n"
] ]
}, },
{ {
@ -188,10 +237,27 @@
"scrolled": true "scrolled": true
}, },
"outputs": [ "outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"loading SentSeg Databases\n",
"Creating the bag of words...\n",
"\n",
"dumping the data to hkl format..\n",
"done\n",
"Creating the bag of words...\n",
"\n",
"dumping the data to hkl format..\n",
"done\n",
"dumping the session\n",
"done\n"
]
},
{ {
"data": { "data": {
"application/vnd.jupyter.widget-view+json": { "application/vnd.jupyter.widget-view+json": {
"model_id": "c0fcb1a9556e4d54a43fd7a969210844", "model_id": "6793c5121aaf498e8960726a40709e19",
"version_major": 2, "version_major": 2,
"version_minor": 0 "version_minor": 0
}, },
@ -249,6 +315,15 @@
"import dill\n", "import dill\n",
"dill.load_session('voilastate.db')\n", "dill.load_session('voilastate.db')\n",
"\n", "\n",
"#import SentSeg\n",
"#sent_seg = SentSeg.SentSeg('de')\n",
"#print('loading SentSeg Databases')\n",
"#sent_seg.LoadSentGlueSGDandGSUtils()\n",
"\n",
"#from FremdWB import *\n",
"#fwb = FremdWB(None,None)\n",
"#fwb.load_DB_into_FASTsearch()\n",
"\n",
"#from Medio import *\n", "#from Medio import *\n",
"#medi = Medio(None,None)\n", "#medi = Medio(None,None)\n",
"#medi.load_DB_into_FASTsearch()\n", "#medi.load_DB_into_FASTsearch()\n",
@ -395,7 +470,7 @@
{ {
"data": { "data": {
"application/vnd.jupyter.widget-view+json": { "application/vnd.jupyter.widget-view+json": {
"model_id": "f49a3f799a884277ab40f1839c8c1afd", "model_id": "c833de5ff5d340bbb1988584eee0c368",
"version_major": 2, "version_major": 2,
"version_minor": 0 "version_minor": 0
}, },
@ -594,7 +669,7 @@
{ {
"data": { "data": {
"application/vnd.jupyter.widget-view+json": { "application/vnd.jupyter.widget-view+json": {
"model_id": "4c00c7b9b76e493481cb078f77f50258", "model_id": "aa136a24ef044b4fb5d10f6c9278d35f",
"version_major": 2, "version_major": 2,
"version_minor": 0 "version_minor": 0
}, },
@ -634,7 +709,7 @@
{ {
"data": { "data": {
"application/vnd.jupyter.widget-view+json": { "application/vnd.jupyter.widget-view+json": {
"model_id": "9e7fab660d534ed7925dd0d21af74957", "model_id": "1474be19da7a4b1bbd7fee229dd5a8ee",
"version_major": 2, "version_major": 2,
"version_minor": 0 "version_minor": 0
}, },
@ -660,7 +735,7 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"beschreibung = 'Textfeld für die verbesserte Übersetzung. Satzzeichen bitte nicht vergessen! \\nFalls ein neuer Eintrag ins Fremdwörterbuch vorgenommen werden soll, bitte unter die Verbesserungen einen Eintrag der Form: [\\'Abfall\\', \\'Abfall sind Sachen die wir nicht mehr brauchen\\']' \n", "beschreibung = 'Textfeld für die verbesserte Übersetzung. Satzzeichen bitte nicht vergessen! \\nFalls ein neuer Eintrag ins Fremdwörterbuch vorgenommen werden soll, schreibe einfach unter die Verbesserungen einen Eintrag der Form: [\\'Abfall\\', \\'Abfall sind Sachen die wir nicht mehr brauchen\\'] \\nFalls ein neuer Eintrag in die Mediopunkte-Datenbank vorgenommen werden soll, bitte schreibe unter die Verbesserungen einen Eintrag der Form: [\\'Mediopunkt\\', \\'Medio·punkt\\']' \n",
"VerbeTextFeld = ipywidgets.Textarea(placeholder=beschreibung, disabled=False)\n", "VerbeTextFeld = ipywidgets.Textarea(placeholder=beschreibung, disabled=False)\n",
"VerbeTextFeld.layout.height = '180px'\n", "VerbeTextFeld.layout.height = '180px'\n",
"VerbeTextFeld.layout.width = '99%'\n", "VerbeTextFeld.layout.width = '99%'\n",
@ -756,7 +831,7 @@
{ {
"data": { "data": {
"application/vnd.jupyter.widget-view+json": { "application/vnd.jupyter.widget-view+json": {
"model_id": "6408b21ab24c482fa3ddc9e047592bb8", "model_id": "c2f3338821ae4ee59d205af8cb1083a8",
"version_major": 2, "version_major": 2,
"version_minor": 0 "version_minor": 0
}, },
@ -782,7 +857,7 @@
{ {
"data": { "data": {
"application/vnd.jupyter.widget-view+json": { "application/vnd.jupyter.widget-view+json": {
"model_id": "7702fbe3ca5b4041a3d4e9b5167d8f38", "model_id": "1182e1d2f6c44b9ab0d50c9d388a2765",
"version_major": 2, "version_major": 2,
"version_minor": 0 "version_minor": 0
}, },

View file

@ -2110,96 +2110,107 @@ class SentSeg(object):
newsentences = [] newsentences = []
newpunctuations = [] newpunctuations = []
Whatisofnouns = [] Whatisofnouns = []
oldsentences = sentences
oldpunctuations = punctuations
for hauptindex in range(len(sentences)): for hauptindex in range(len(sentences)):
sentence = sentences[hauptindex] sentence = sentences[hauptindex]
#for triple in triples: try:
# AnnoOrNot, tripleInWords = gs.checkForAnnotationTriple(sentence, triple, 'word.tag_', 'None') #for triple in triples:
# for tripleinwor in tripleInWords: # AnnoOrNot, tripleInWords = gs.checkForAnnotationTriple(sentence, triple, 'word.tag_', 'None')
# triplestochange.append([triple, tripleinwor]) # for tripleinwor in tripleInWords:
# triplestochange.append([triple, tripleinwor])
for quadruple in quadruples:
AnnoOrNot, quadrupleInWords = gs.checkForAnnotationQuadruple(sentence, quadruple, 'word.tag_', 'None') for quadruple in quadruples:
#print('quadinwords', quadrupleInWords) AnnoOrNot, quadrupleInWords = gs.checkForAnnotationQuadruple(sentence, quadruple, 'word.tag_', 'None')
#print('ANNOORNOT', AnnoOrNot) #print('quadinwords', quadrupleInWords)
for quadrupleInWo in quadrupleInWords: #print('ANNOORNOT', AnnoOrNot)
quadruplestochange.append([quadruple, quadrupleInWo]) for quadrupleInWo in quadrupleInWords:
quadruplestochange.append([quadruple, quadrupleInWo])
#print('quadstochange',quadruplestochange)
for quad in quadruplestochange: #print('quadstochange',quadruplestochange)
for n in range(len(sentence) - 4): for quad in quadruplestochange:
if sentence[n] == quad[1][0]: for n in range(len(sentence) - 4):
if sentence[n + 1] == quad[1][1]: if sentence[n] == quad[1][0]:
if sentence[n + 2] == quad[1][2]: if sentence[n + 1] == quad[1][1]:
artword = None if sentence[n + 2] == quad[1][2]:
longerWhatisnoun = 0 artword = None
for m in range(2): longerWhatisnoun = 0
for word in self.nlp(sentence[n - m]): for m in range(2):
if word.tag_ == 'ART': for word in self.nlp(sentence[n - m]):
Nounthatis = sentence[n - m:n + 1] if word.tag_ == 'ART':
import spacy Nounthatis = sentence[n - m:n + 1]
nlp = spacy.load('de_core_news_sm') import spacy
token3 = nlp(sentence[n+4]) nlp = spacy.load('de_core_news_sm')
counter = 0 token3 = nlp(sentence[n+4])
Whatisnoun = sentence[n + 1:n + 4] counter = 0
for wor in token3: Whatisnoun = sentence[n + 1:n + 4]
counter += 1 for wor in token3:
if wor.tag_ == 'NN' or wor.tag_ == 'NE': counter += 1
if counter == 1: if wor.tag_ == 'NN' or wor.tag_ == 'NE':
Whatisnoun = sentence[n + 1:n + 5] if counter == 1:
longerWhatisnoun = 1 Whatisnoun = sentence[n + 1:n + 5]
if counter == 2: longerWhatisnoun = 1
Whatisnoun = sentence[n + 1:n + 4] if counter == 2:
Whatisnoun = sentence[n + 1:n + 4]
artword = word.text
#print(sentence[n - 1],'oi') artword = word.text
if ((artword == 'die' or artword == 'Die') and sentence[n][-1] != 'n') or ((artword == 'der' or artword == 'einer' or artword == 'dieser') and (sentence[n - 2] in ['von', 'in', 'auf', 'ueber', 'unter', 'nach', 'mit'])): #print(sentence[n - 1],'oi')
if ((artword == 'die' or artword == 'Die') and sentence[n][-1] != 'n') or ((artword == 'der' or artword == 'einer' or artword == 'dieser') and (sentence[n - 2] in ['von', 'in', 'auf', 'ueber', 'unter', 'nach', 'mit'])):
if artword == 'der':
Nounthatis[0] = 'die' if artword == 'der':
Nounthatis[0] = 'die'
donothing = 0
if sentence[n + 1] == 'mit': donothing = 0
if sentence[n + 2] == 'den': if sentence[n + 1] == 'mit':
verb = ' hat die ' if sentence[n + 2] == 'den':
Whatisnoun = Whatisnoun[2:] verb = ' hat die '
if sentence[n + 2] == 'der': Whatisnoun = Whatisnoun[2:]
verb = ' hat eine ' if sentence[n + 2] == 'der':
Whatisnoun = Whatisnoun[2:] verb = ' hat eine '
if sentence[n + 2] != 'der' and sentence[n + 2] != 'den': Whatisnoun = Whatisnoun[2:]
donothing = 1 if sentence[n + 2] != 'der' and sentence[n + 2] != 'den':
else: donothing = 1
verb = ' ist '
if donothing == 0:
newsentence = ' '.join(Nounthatis) + verb + ' '.join(Whatisnoun)
newsentences.append([hauptindex + 1, newsentence.split()])
newpunctuations.append([hauptindex + 1, punctuations[hauptindex]])
if longerWhatisnoun == 0:
Whatisofnouns.append([n + 1, n + 4, hauptindex])
else: else:
Whatisofnouns.append([n + 1, n + 5, hauptindex]) verb = ' ist '
if donothing == 0:
newsentence = ' '.join(Nounthatis) + verb + ' '.join(Whatisnoun)
newsentences.append([hauptindex + 1, newsentence.split()])
newpunctuations.append([hauptindex + 1, punctuations[hauptindex]])
if longerWhatisnoun == 0:
Whatisofnouns.append([n + 1, n + 4, hauptindex])
else:
Whatisofnouns.append([n + 1, n + 5, hauptindex])
except:
print('Konnte nicht ' + str(sentence) + 'in Characterisierung pro Satz prozessieren..')
try:
for whatis in Whatisofnouns[::-1]:
thereisacomma = 0
#print(sentences[whatis[2]][whatis[1] - 1])
if sentences[whatis[2]][whatis[1] - 1][-1] == ',':
thereisacomma = 1
if thereisacomma == 1:
#print(sentences[whatis[2]][whatis[0] - 1])
sentences[whatis[2]][whatis[0] - 1] = sentences[whatis[2]][whatis[0] - 1] + ','
del sentences[whatis[2]][whatis[0]:whatis[1]]
for newsent in newsentences[::-1]:
sentences.insert(newsent[0], newsent[1])
for newpunct in newpunctuations[::-1]:
punctuations.insert(newpunct[0], newpunct[1])
for sentence in sentences:
if sentence[-1][-1] == ',':
sentence[-1] = sentence[-1][:-1]
except:
print('konnte nicht die gesammelten Characterisierungen prozessieren')
sentences = oldsentences
punctuations = oldpunctuations
for whatis in Whatisofnouns[::-1]:
thereisacomma = 0
#print(sentences[whatis[2]][whatis[1] - 1])
if sentences[whatis[2]][whatis[1] - 1][-1] == ',':
thereisacomma = 1
if thereisacomma == 1:
#print(sentences[whatis[2]][whatis[0] - 1])
sentences[whatis[2]][whatis[0] - 1] = sentences[whatis[2]][whatis[0] - 1] + ','
del sentences[whatis[2]][whatis[0]:whatis[1]]
for newsent in newsentences[::-1]:
sentences.insert(newsent[0], newsent[1])
for newpunct in newpunctuations[::-1]:
punctuations.insert(newpunct[0], newpunct[1])
for sentence in sentences:
if sentence[-1][-1] == ',':
sentence[-1] = sentence[-1][:-1]
return sentences, punctuations return sentences, punctuations

View file

@ -0,0 +1,3 @@
er hat als Trainer im Fußball gearbeitet, bis er nicht mehr konnte .
seine Beine schmerzten zu sehr und er konnte nicht mehr lange stehen .
außerdem tat ihm auch seine Stimme weh, denn er musste immer soviel schreien, weil die Kinder nicht richtig Fußball spielten .

View file

@ -0,0 +1,11 @@
er hat als Trainer im Fuss·ball gearbeitet .
ein Trainer ist eine Person .
ein Trainer leitet Menschen an .
zum Beispiel beim Sport .
das Wort Fuss·ball kann 3 verschiedene Bedeutungen haben .
Fuss·ball - die Ball-sport-art Fuss·ball - eine Zeitung Fuss·ball - das Sport-Geraet .
bis er geringerer konnte .
seine Beine schmerzten zu sehr und er konnte geringerer lange stehen .
ausserdem tat ihm auch seine Stimme weh .
denn er musste immer soviel schreien .
weil die Kinder pseudo Fuss·ball spielten .

View file

@ -0,0 +1,12 @@
er hat als Trainer im Fuss·ball gearbeitet .
ein Trainer ist eine Person .
ein Trainer leitet Menschen an .
zum Beispiel beim Sport .
das Wort Fuss·ball kann 3 verschiedene Bedeutungen haben .
Fuss·ball - die Ball•sport•art, Fuss·ball - eine Zeitung, Fuss·ball - das Sport•gerät .
er konnte nicht mehr .
seine Beine schmerzten zu sehr .
und er konnte nicht mehr lange stehen .
ausserdem tat ihm auch seine Stimme weh .
denn er musste immer soviel schreien .
wenn die Kinder Fuss·ball spielten .

View file

@ -1 +1 @@
141 142

Binary file not shown.