Link zum Code eingefuegt | Verbessertes Errorhandling fuer zwei FremdWB und CharAppend
This commit is contained in:
parent
2487d6de07
commit
441eeed1d6
15 changed files with 351 additions and 157 deletions
|
@ -36,7 +36,26 @@
|
||||||
{
|
{
|
||||||
"data": {
|
"data": {
|
||||||
"text/html": [
|
"text/html": [
|
||||||
"<img src=\"brainBasaBuuka5.png\" width=\"300\" height=\"200\" align=\"center\"/>\n"
|
"<style>\n",
|
||||||
|
"\n",
|
||||||
|
".center {\n",
|
||||||
|
" display: block;\n",
|
||||||
|
" margin-left: auto;\n",
|
||||||
|
" margin-right: auto;\n",
|
||||||
|
" width: 20%;\n",
|
||||||
|
"}\n",
|
||||||
|
"\n",
|
||||||
|
"body {\n",
|
||||||
|
" align: center;\n",
|
||||||
|
" }\n",
|
||||||
|
"\n",
|
||||||
|
"</style>\n",
|
||||||
|
"<body>\n",
|
||||||
|
"<a href=\"https://basabuuka.zapto.org/alpcentaur/Basabuuka_Prototyp\">\n",
|
||||||
|
"<img src=\"brainBasaBuuka5.png\" width=\"300\" height=\"200\" class=\"center\"/>\n",
|
||||||
|
"</a>\n",
|
||||||
|
"<p style=\"font-family: courier\"><center><b>Du kommst zu dem Code - wenn du auf das Logo klickst!</b></center></p> \n",
|
||||||
|
"</body>\n"
|
||||||
],
|
],
|
||||||
"text/plain": [
|
"text/plain": [
|
||||||
"<IPython.core.display.HTML object>"
|
"<IPython.core.display.HTML object>"
|
||||||
|
@ -48,7 +67,35 @@
|
||||||
],
|
],
|
||||||
"source": [
|
"source": [
|
||||||
"%%html\n",
|
"%%html\n",
|
||||||
"<img src=\"brainBasaBuuka5.png\" width=\"300\" height=\"200\" align=\"center\"/>"
|
"<style>\n",
|
||||||
|
"\n",
|
||||||
|
".center {\n",
|
||||||
|
" display: block;\n",
|
||||||
|
" margin-left: auto;\n",
|
||||||
|
" margin-right: auto;\n",
|
||||||
|
" width: 20%;\n",
|
||||||
|
"}\n",
|
||||||
|
"\n",
|
||||||
|
"body {\n",
|
||||||
|
" align: center;\n",
|
||||||
|
" }\n",
|
||||||
|
"\n",
|
||||||
|
"</style>\n",
|
||||||
|
"<body>\n",
|
||||||
|
"<a href=\"https://basabuuka.zapto.org/alpcentaur/Basabuuka_Prototyp\">\n",
|
||||||
|
"<img src=\"brainBasaBuuka5.png\" width=\"300\" height=\"200\" class=\"center\"/>\n",
|
||||||
|
"</a>\n",
|
||||||
|
"<p style=\"font-family: courier\"><center><b>Du kommst zu dem Code - wenn du auf das Logo klickst!</b></center></p> \n",
|
||||||
|
"</body>"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"\n"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
@ -66,7 +113,8 @@
|
||||||
" font-family: courier;\n",
|
" font-family: courier;\n",
|
||||||
" }\n",
|
" }\n",
|
||||||
"\n",
|
"\n",
|
||||||
"</style>\n"
|
"</style>\n",
|
||||||
|
"\n"
|
||||||
],
|
],
|
||||||
"text/plain": [
|
"text/plain": [
|
||||||
"<IPython.core.display.HTML object>"
|
"<IPython.core.display.HTML object>"
|
||||||
|
@ -85,7 +133,8 @@
|
||||||
" font-family: courier;\n",
|
" font-family: courier;\n",
|
||||||
" }\n",
|
" }\n",
|
||||||
"\n",
|
"\n",
|
||||||
"</style>\n"
|
"</style>\n",
|
||||||
|
"\n"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
@ -188,10 +237,27 @@
|
||||||
"scrolled": true
|
"scrolled": true
|
||||||
},
|
},
|
||||||
"outputs": [
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"loading SentSeg Databases\n",
|
||||||
|
"Creating the bag of words...\n",
|
||||||
|
"\n",
|
||||||
|
"dumping the data to hkl format..\n",
|
||||||
|
"done\n",
|
||||||
|
"Creating the bag of words...\n",
|
||||||
|
"\n",
|
||||||
|
"dumping the data to hkl format..\n",
|
||||||
|
"done\n",
|
||||||
|
"dumping the session\n",
|
||||||
|
"done\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"data": {
|
"data": {
|
||||||
"application/vnd.jupyter.widget-view+json": {
|
"application/vnd.jupyter.widget-view+json": {
|
||||||
"model_id": "c0fcb1a9556e4d54a43fd7a969210844",
|
"model_id": "6793c5121aaf498e8960726a40709e19",
|
||||||
"version_major": 2,
|
"version_major": 2,
|
||||||
"version_minor": 0
|
"version_minor": 0
|
||||||
},
|
},
|
||||||
|
@ -249,6 +315,15 @@
|
||||||
"import dill\n",
|
"import dill\n",
|
||||||
"dill.load_session('voilastate.db')\n",
|
"dill.load_session('voilastate.db')\n",
|
||||||
"\n",
|
"\n",
|
||||||
|
"#import SentSeg\n",
|
||||||
|
"#sent_seg = SentSeg.SentSeg('de')\n",
|
||||||
|
"#print('loading SentSeg Databases')\n",
|
||||||
|
"#sent_seg.LoadSentGlueSGDandGSUtils()\n",
|
||||||
|
"\n",
|
||||||
|
"#from FremdWB import *\n",
|
||||||
|
"#fwb = FremdWB(None,None)\n",
|
||||||
|
"#fwb.load_DB_into_FASTsearch()\n",
|
||||||
|
"\n",
|
||||||
"#from Medio import *\n",
|
"#from Medio import *\n",
|
||||||
"#medi = Medio(None,None)\n",
|
"#medi = Medio(None,None)\n",
|
||||||
"#medi.load_DB_into_FASTsearch()\n",
|
"#medi.load_DB_into_FASTsearch()\n",
|
||||||
|
@ -395,7 +470,7 @@
|
||||||
{
|
{
|
||||||
"data": {
|
"data": {
|
||||||
"application/vnd.jupyter.widget-view+json": {
|
"application/vnd.jupyter.widget-view+json": {
|
||||||
"model_id": "f49a3f799a884277ab40f1839c8c1afd",
|
"model_id": "c833de5ff5d340bbb1988584eee0c368",
|
||||||
"version_major": 2,
|
"version_major": 2,
|
||||||
"version_minor": 0
|
"version_minor": 0
|
||||||
},
|
},
|
||||||
|
@ -594,7 +669,7 @@
|
||||||
{
|
{
|
||||||
"data": {
|
"data": {
|
||||||
"application/vnd.jupyter.widget-view+json": {
|
"application/vnd.jupyter.widget-view+json": {
|
||||||
"model_id": "4c00c7b9b76e493481cb078f77f50258",
|
"model_id": "aa136a24ef044b4fb5d10f6c9278d35f",
|
||||||
"version_major": 2,
|
"version_major": 2,
|
||||||
"version_minor": 0
|
"version_minor": 0
|
||||||
},
|
},
|
||||||
|
@ -634,7 +709,7 @@
|
||||||
{
|
{
|
||||||
"data": {
|
"data": {
|
||||||
"application/vnd.jupyter.widget-view+json": {
|
"application/vnd.jupyter.widget-view+json": {
|
||||||
"model_id": "9e7fab660d534ed7925dd0d21af74957",
|
"model_id": "1474be19da7a4b1bbd7fee229dd5a8ee",
|
||||||
"version_major": 2,
|
"version_major": 2,
|
||||||
"version_minor": 0
|
"version_minor": 0
|
||||||
},
|
},
|
||||||
|
@ -660,7 +735,7 @@
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"beschreibung = 'Textfeld für die verbesserte Übersetzung. Satzzeichen bitte nicht vergessen! \\nFalls ein neuer Eintrag ins Fremdwörterbuch vorgenommen werden soll, bitte unter die Verbesserungen einen Eintrag der Form: [\\'Abfall\\', \\'Abfall sind Sachen die wir nicht mehr brauchen\\']' \n",
|
"beschreibung = 'Textfeld für die verbesserte Übersetzung. Satzzeichen bitte nicht vergessen! \\nFalls ein neuer Eintrag ins Fremdwörterbuch vorgenommen werden soll, schreibe einfach unter die Verbesserungen einen Eintrag der Form: [\\'Abfall\\', \\'Abfall sind Sachen die wir nicht mehr brauchen\\'] \\nFalls ein neuer Eintrag in die Mediopunkte-Datenbank vorgenommen werden soll, bitte schreibe unter die Verbesserungen einen Eintrag der Form: [\\'Mediopunkt\\', \\'Medio·punkt\\']' \n",
|
||||||
"VerbeTextFeld = ipywidgets.Textarea(placeholder=beschreibung, disabled=False)\n",
|
"VerbeTextFeld = ipywidgets.Textarea(placeholder=beschreibung, disabled=False)\n",
|
||||||
"VerbeTextFeld.layout.height = '180px'\n",
|
"VerbeTextFeld.layout.height = '180px'\n",
|
||||||
"VerbeTextFeld.layout.width = '99%'\n",
|
"VerbeTextFeld.layout.width = '99%'\n",
|
||||||
|
@ -756,7 +831,7 @@
|
||||||
{
|
{
|
||||||
"data": {
|
"data": {
|
||||||
"application/vnd.jupyter.widget-view+json": {
|
"application/vnd.jupyter.widget-view+json": {
|
||||||
"model_id": "6408b21ab24c482fa3ddc9e047592bb8",
|
"model_id": "c2f3338821ae4ee59d205af8cb1083a8",
|
||||||
"version_major": 2,
|
"version_major": 2,
|
||||||
"version_minor": 0
|
"version_minor": 0
|
||||||
},
|
},
|
||||||
|
@ -782,7 +857,7 @@
|
||||||
{
|
{
|
||||||
"data": {
|
"data": {
|
||||||
"application/vnd.jupyter.widget-view+json": {
|
"application/vnd.jupyter.widget-view+json": {
|
||||||
"model_id": "7702fbe3ca5b4041a3d4e9b5167d8f38",
|
"model_id": "1182e1d2f6c44b9ab0d50c9d388a2765",
|
||||||
"version_major": 2,
|
"version_major": 2,
|
||||||
"version_minor": 0
|
"version_minor": 0
|
||||||
},
|
},
|
||||||
|
|
Binary file not shown.
Binary file not shown.
|
@ -125,54 +125,61 @@ class FremdWB(object):
|
||||||
sentencecount = 0
|
sentencecount = 0
|
||||||
alleeintraege = []
|
alleeintraege = []
|
||||||
for sentence in sentences:
|
for sentence in sentences:
|
||||||
#print('sentence', sentence)
|
oldpunctuations = punctuations
|
||||||
sentencecount += 1
|
try:
|
||||||
#print('processing sentence', sentencecount)
|
#print('sentence', sentence)
|
||||||
|
sentencecount += 1
|
||||||
doc = self.nlp(' '.join(sentence))
|
#print('processing sentence', sentencecount)
|
||||||
|
|
||||||
fremds_of_sentence = []
|
doc = self.nlp(' '.join(sentence))
|
||||||
count = 0
|
|
||||||
|
fremds_of_sentence = []
|
||||||
for word in doc:
|
count = 0
|
||||||
count += 1
|
|
||||||
|
for word in doc:
|
||||||
|
count += 1
|
||||||
|
|
||||||
if word.tag_[0] == 'V' or word.tag_[0] == 'N' or word.tag_[0] == 'A':
|
|
||||||
fremds_of_sentence.append(word.text)
|
|
||||||
|
if word.tag_[0] == 'V' or word.tag_[0] == 'N' or word.tag_[0] == 'A':
|
||||||
|
fremds_of_sentence.append(word.text)
|
||||||
#print(fremds_of_sentence)
|
|
||||||
fremdeintraege = []
|
|
||||||
for word in fremds_of_sentence:
|
#print(fremds_of_sentence)
|
||||||
|
fremdeintraege = []
|
||||||
bestmatches2, matchindex2 = self.fsearch1.search_with_highest_multiplikation_Output(word, 1)
|
for word in fremds_of_sentence:
|
||||||
|
|
||||||
|
bestmatches2, matchindex2 = self.fsearch1.search_with_highest_multiplikation_Output(word, 1)
|
||||||
|
|
||||||
fremd = self.hkldbFremd_WB1[matchindex2[0]][0].split()
|
|
||||||
fremdeintrag = self.hkldbFremd_WB2[matchindex2[0]][0].split()
|
|
||||||
|
fremd = self.hkldbFremd_WB1[matchindex2[0]][0].split()
|
||||||
#print(fremd)
|
fremdeintrag = self.hkldbFremd_WB2[matchindex2[0]][0].split()
|
||||||
#print('fremdeintrag', fremdeintrag)
|
|
||||||
|
#print(fremd)
|
||||||
if fremd[0] == word:
|
#print('fremdeintrag', fremdeintrag)
|
||||||
fremdeintraege.append(fremdeintrag)
|
|
||||||
#print('fremdeintraege',fremdeintraege)
|
if fremd[0] == word:
|
||||||
outsentences.append(sentence)
|
fremdeintraege.append(fremdeintrag)
|
||||||
|
#print('fremdeintraege',fremdeintraege)
|
||||||
for eintrag in fremdeintraege:
|
outsentences.append(sentence)
|
||||||
if eintrag[-1][-1] == '.':
|
|
||||||
eintrag[-1] = eintrag[-1][:-1]
|
for eintrag in fremdeintraege:
|
||||||
if eintrag not in alleeintraege:
|
if eintrag[-1][-1] == '.':
|
||||||
outsentences.append(eintrag)
|
eintrag[-1] = eintrag[-1][:-1]
|
||||||
punctuations.insert(sentencecount, '.')
|
if eintrag not in alleeintraege:
|
||||||
alleeintraege.append(eintrag)
|
outsentences.append(eintrag)
|
||||||
|
punctuations.insert(sentencecount, '.')
|
||||||
|
alleeintraege.append(eintrag)
|
||||||
|
|
||||||
#print('the endsentence',sentence)
|
|
||||||
|
|
||||||
|
#print('the endsentence',sentence)
|
||||||
|
except:
|
||||||
|
print('konnte nicht' + str(sentence) + 'in FremdWB prozessieren..')
|
||||||
|
if sentence != outsentences[-1]:
|
||||||
|
outsentences.append(sentence)
|
||||||
|
punctuations = oldpunctuations
|
||||||
return outsentences, punctuations
|
return outsentences, punctuations
|
||||||
|
|
||||||
|
|
|
@ -36,7 +36,26 @@
|
||||||
{
|
{
|
||||||
"data": {
|
"data": {
|
||||||
"text/html": [
|
"text/html": [
|
||||||
"<img src=\"brainBasaBuuka5.png\" width=\"300\" height=\"200\" align=\"center\"/>\n"
|
"<style>\n",
|
||||||
|
"\n",
|
||||||
|
".center {\n",
|
||||||
|
" display: block;\n",
|
||||||
|
" margin-left: auto;\n",
|
||||||
|
" margin-right: auto;\n",
|
||||||
|
" width: 20%;\n",
|
||||||
|
"}\n",
|
||||||
|
"\n",
|
||||||
|
"body {\n",
|
||||||
|
" align: center;\n",
|
||||||
|
" }\n",
|
||||||
|
"\n",
|
||||||
|
"</style>\n",
|
||||||
|
"<body>\n",
|
||||||
|
"<a href=\"https://basabuuka.zapto.org/alpcentaur/Basabuuka_Prototyp\">\n",
|
||||||
|
"<img src=\"brainBasaBuuka5.png\" width=\"300\" height=\"200\" class=\"center\"/>\n",
|
||||||
|
"</a>\n",
|
||||||
|
"<p style=\"font-family: courier\"><center><b>Du kommst zu dem Code - wenn du auf das Logo klickst!</b></center></p> \n",
|
||||||
|
"</body>\n"
|
||||||
],
|
],
|
||||||
"text/plain": [
|
"text/plain": [
|
||||||
"<IPython.core.display.HTML object>"
|
"<IPython.core.display.HTML object>"
|
||||||
|
@ -48,7 +67,35 @@
|
||||||
],
|
],
|
||||||
"source": [
|
"source": [
|
||||||
"%%html\n",
|
"%%html\n",
|
||||||
"<img src=\"brainBasaBuuka5.png\" width=\"300\" height=\"200\" align=\"center\"/>"
|
"<style>\n",
|
||||||
|
"\n",
|
||||||
|
".center {\n",
|
||||||
|
" display: block;\n",
|
||||||
|
" margin-left: auto;\n",
|
||||||
|
" margin-right: auto;\n",
|
||||||
|
" width: 20%;\n",
|
||||||
|
"}\n",
|
||||||
|
"\n",
|
||||||
|
"body {\n",
|
||||||
|
" align: center;\n",
|
||||||
|
" }\n",
|
||||||
|
"\n",
|
||||||
|
"</style>\n",
|
||||||
|
"<body>\n",
|
||||||
|
"<a href=\"https://basabuuka.zapto.org/alpcentaur/Basabuuka_Prototyp\">\n",
|
||||||
|
"<img src=\"brainBasaBuuka5.png\" width=\"300\" height=\"200\" class=\"center\"/>\n",
|
||||||
|
"</a>\n",
|
||||||
|
"<p style=\"font-family: courier\"><center><b>Du kommst zu dem Code - wenn du auf das Logo klickst!</b></center></p> \n",
|
||||||
|
"</body>"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"\n"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
@ -66,7 +113,8 @@
|
||||||
" font-family: courier;\n",
|
" font-family: courier;\n",
|
||||||
" }\n",
|
" }\n",
|
||||||
"\n",
|
"\n",
|
||||||
"</style>\n"
|
"</style>\n",
|
||||||
|
"\n"
|
||||||
],
|
],
|
||||||
"text/plain": [
|
"text/plain": [
|
||||||
"<IPython.core.display.HTML object>"
|
"<IPython.core.display.HTML object>"
|
||||||
|
@ -85,7 +133,8 @@
|
||||||
" font-family: courier;\n",
|
" font-family: courier;\n",
|
||||||
" }\n",
|
" }\n",
|
||||||
"\n",
|
"\n",
|
||||||
"</style>\n"
|
"</style>\n",
|
||||||
|
"\n"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
@ -188,10 +237,27 @@
|
||||||
"scrolled": true
|
"scrolled": true
|
||||||
},
|
},
|
||||||
"outputs": [
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"loading SentSeg Databases\n",
|
||||||
|
"Creating the bag of words...\n",
|
||||||
|
"\n",
|
||||||
|
"dumping the data to hkl format..\n",
|
||||||
|
"done\n",
|
||||||
|
"Creating the bag of words...\n",
|
||||||
|
"\n",
|
||||||
|
"dumping the data to hkl format..\n",
|
||||||
|
"done\n",
|
||||||
|
"dumping the session\n",
|
||||||
|
"done\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"data": {
|
"data": {
|
||||||
"application/vnd.jupyter.widget-view+json": {
|
"application/vnd.jupyter.widget-view+json": {
|
||||||
"model_id": "c0fcb1a9556e4d54a43fd7a969210844",
|
"model_id": "6793c5121aaf498e8960726a40709e19",
|
||||||
"version_major": 2,
|
"version_major": 2,
|
||||||
"version_minor": 0
|
"version_minor": 0
|
||||||
},
|
},
|
||||||
|
@ -249,6 +315,15 @@
|
||||||
"import dill\n",
|
"import dill\n",
|
||||||
"dill.load_session('voilastate.db')\n",
|
"dill.load_session('voilastate.db')\n",
|
||||||
"\n",
|
"\n",
|
||||||
|
"#import SentSeg\n",
|
||||||
|
"#sent_seg = SentSeg.SentSeg('de')\n",
|
||||||
|
"#print('loading SentSeg Databases')\n",
|
||||||
|
"#sent_seg.LoadSentGlueSGDandGSUtils()\n",
|
||||||
|
"\n",
|
||||||
|
"#from FremdWB import *\n",
|
||||||
|
"#fwb = FremdWB(None,None)\n",
|
||||||
|
"#fwb.load_DB_into_FASTsearch()\n",
|
||||||
|
"\n",
|
||||||
"#from Medio import *\n",
|
"#from Medio import *\n",
|
||||||
"#medi = Medio(None,None)\n",
|
"#medi = Medio(None,None)\n",
|
||||||
"#medi.load_DB_into_FASTsearch()\n",
|
"#medi.load_DB_into_FASTsearch()\n",
|
||||||
|
@ -395,7 +470,7 @@
|
||||||
{
|
{
|
||||||
"data": {
|
"data": {
|
||||||
"application/vnd.jupyter.widget-view+json": {
|
"application/vnd.jupyter.widget-view+json": {
|
||||||
"model_id": "f49a3f799a884277ab40f1839c8c1afd",
|
"model_id": "c833de5ff5d340bbb1988584eee0c368",
|
||||||
"version_major": 2,
|
"version_major": 2,
|
||||||
"version_minor": 0
|
"version_minor": 0
|
||||||
},
|
},
|
||||||
|
@ -594,7 +669,7 @@
|
||||||
{
|
{
|
||||||
"data": {
|
"data": {
|
||||||
"application/vnd.jupyter.widget-view+json": {
|
"application/vnd.jupyter.widget-view+json": {
|
||||||
"model_id": "4c00c7b9b76e493481cb078f77f50258",
|
"model_id": "aa136a24ef044b4fb5d10f6c9278d35f",
|
||||||
"version_major": 2,
|
"version_major": 2,
|
||||||
"version_minor": 0
|
"version_minor": 0
|
||||||
},
|
},
|
||||||
|
@ -634,7 +709,7 @@
|
||||||
{
|
{
|
||||||
"data": {
|
"data": {
|
||||||
"application/vnd.jupyter.widget-view+json": {
|
"application/vnd.jupyter.widget-view+json": {
|
||||||
"model_id": "9e7fab660d534ed7925dd0d21af74957",
|
"model_id": "1474be19da7a4b1bbd7fee229dd5a8ee",
|
||||||
"version_major": 2,
|
"version_major": 2,
|
||||||
"version_minor": 0
|
"version_minor": 0
|
||||||
},
|
},
|
||||||
|
@ -660,7 +735,7 @@
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"beschreibung = 'Textfeld für die verbesserte Übersetzung. Satzzeichen bitte nicht vergessen! \\nFalls ein neuer Eintrag ins Fremdwörterbuch vorgenommen werden soll, bitte unter die Verbesserungen einen Eintrag der Form: [\\'Abfall\\', \\'Abfall sind Sachen die wir nicht mehr brauchen\\']' \n",
|
"beschreibung = 'Textfeld für die verbesserte Übersetzung. Satzzeichen bitte nicht vergessen! \\nFalls ein neuer Eintrag ins Fremdwörterbuch vorgenommen werden soll, schreibe einfach unter die Verbesserungen einen Eintrag der Form: [\\'Abfall\\', \\'Abfall sind Sachen die wir nicht mehr brauchen\\'] \\nFalls ein neuer Eintrag in die Mediopunkte-Datenbank vorgenommen werden soll, bitte schreibe unter die Verbesserungen einen Eintrag der Form: [\\'Mediopunkt\\', \\'Medio·punkt\\']' \n",
|
||||||
"VerbeTextFeld = ipywidgets.Textarea(placeholder=beschreibung, disabled=False)\n",
|
"VerbeTextFeld = ipywidgets.Textarea(placeholder=beschreibung, disabled=False)\n",
|
||||||
"VerbeTextFeld.layout.height = '180px'\n",
|
"VerbeTextFeld.layout.height = '180px'\n",
|
||||||
"VerbeTextFeld.layout.width = '99%'\n",
|
"VerbeTextFeld.layout.width = '99%'\n",
|
||||||
|
@ -756,7 +831,7 @@
|
||||||
{
|
{
|
||||||
"data": {
|
"data": {
|
||||||
"application/vnd.jupyter.widget-view+json": {
|
"application/vnd.jupyter.widget-view+json": {
|
||||||
"model_id": "6408b21ab24c482fa3ddc9e047592bb8",
|
"model_id": "c2f3338821ae4ee59d205af8cb1083a8",
|
||||||
"version_major": 2,
|
"version_major": 2,
|
||||||
"version_minor": 0
|
"version_minor": 0
|
||||||
},
|
},
|
||||||
|
@ -782,7 +857,7 @@
|
||||||
{
|
{
|
||||||
"data": {
|
"data": {
|
||||||
"application/vnd.jupyter.widget-view+json": {
|
"application/vnd.jupyter.widget-view+json": {
|
||||||
"model_id": "7702fbe3ca5b4041a3d4e9b5167d8f38",
|
"model_id": "1182e1d2f6c44b9ab0d50c9d388a2765",
|
||||||
"version_major": 2,
|
"version_major": 2,
|
||||||
"version_minor": 0
|
"version_minor": 0
|
||||||
},
|
},
|
||||||
|
|
|
@ -2110,96 +2110,107 @@ class SentSeg(object):
|
||||||
newsentences = []
|
newsentences = []
|
||||||
newpunctuations = []
|
newpunctuations = []
|
||||||
Whatisofnouns = []
|
Whatisofnouns = []
|
||||||
|
oldsentences = sentences
|
||||||
|
oldpunctuations = punctuations
|
||||||
for hauptindex in range(len(sentences)):
|
for hauptindex in range(len(sentences)):
|
||||||
|
|
||||||
sentence = sentences[hauptindex]
|
sentence = sentences[hauptindex]
|
||||||
#for triple in triples:
|
try:
|
||||||
# AnnoOrNot, tripleInWords = gs.checkForAnnotationTriple(sentence, triple, 'word.tag_', 'None')
|
#for triple in triples:
|
||||||
# for tripleinwor in tripleInWords:
|
# AnnoOrNot, tripleInWords = gs.checkForAnnotationTriple(sentence, triple, 'word.tag_', 'None')
|
||||||
# triplestochange.append([triple, tripleinwor])
|
# for tripleinwor in tripleInWords:
|
||||||
|
# triplestochange.append([triple, tripleinwor])
|
||||||
for quadruple in quadruples:
|
|
||||||
AnnoOrNot, quadrupleInWords = gs.checkForAnnotationQuadruple(sentence, quadruple, 'word.tag_', 'None')
|
for quadruple in quadruples:
|
||||||
#print('quadinwords', quadrupleInWords)
|
AnnoOrNot, quadrupleInWords = gs.checkForAnnotationQuadruple(sentence, quadruple, 'word.tag_', 'None')
|
||||||
#print('ANNOORNOT', AnnoOrNot)
|
#print('quadinwords', quadrupleInWords)
|
||||||
for quadrupleInWo in quadrupleInWords:
|
#print('ANNOORNOT', AnnoOrNot)
|
||||||
quadruplestochange.append([quadruple, quadrupleInWo])
|
for quadrupleInWo in quadrupleInWords:
|
||||||
|
quadruplestochange.append([quadruple, quadrupleInWo])
|
||||||
#print('quadstochange',quadruplestochange)
|
|
||||||
for quad in quadruplestochange:
|
#print('quadstochange',quadruplestochange)
|
||||||
for n in range(len(sentence) - 4):
|
for quad in quadruplestochange:
|
||||||
if sentence[n] == quad[1][0]:
|
for n in range(len(sentence) - 4):
|
||||||
if sentence[n + 1] == quad[1][1]:
|
if sentence[n] == quad[1][0]:
|
||||||
if sentence[n + 2] == quad[1][2]:
|
if sentence[n + 1] == quad[1][1]:
|
||||||
artword = None
|
if sentence[n + 2] == quad[1][2]:
|
||||||
longerWhatisnoun = 0
|
artword = None
|
||||||
for m in range(2):
|
longerWhatisnoun = 0
|
||||||
for word in self.nlp(sentence[n - m]):
|
for m in range(2):
|
||||||
if word.tag_ == 'ART':
|
for word in self.nlp(sentence[n - m]):
|
||||||
Nounthatis = sentence[n - m:n + 1]
|
if word.tag_ == 'ART':
|
||||||
import spacy
|
Nounthatis = sentence[n - m:n + 1]
|
||||||
nlp = spacy.load('de_core_news_sm')
|
import spacy
|
||||||
token3 = nlp(sentence[n+4])
|
nlp = spacy.load('de_core_news_sm')
|
||||||
counter = 0
|
token3 = nlp(sentence[n+4])
|
||||||
Whatisnoun = sentence[n + 1:n + 4]
|
counter = 0
|
||||||
for wor in token3:
|
Whatisnoun = sentence[n + 1:n + 4]
|
||||||
counter += 1
|
for wor in token3:
|
||||||
if wor.tag_ == 'NN' or wor.tag_ == 'NE':
|
counter += 1
|
||||||
if counter == 1:
|
if wor.tag_ == 'NN' or wor.tag_ == 'NE':
|
||||||
Whatisnoun = sentence[n + 1:n + 5]
|
if counter == 1:
|
||||||
longerWhatisnoun = 1
|
Whatisnoun = sentence[n + 1:n + 5]
|
||||||
if counter == 2:
|
longerWhatisnoun = 1
|
||||||
Whatisnoun = sentence[n + 1:n + 4]
|
if counter == 2:
|
||||||
|
Whatisnoun = sentence[n + 1:n + 4]
|
||||||
|
|
||||||
|
|
||||||
artword = word.text
|
|
||||||
#print(sentence[n - 1],'oi')
|
artword = word.text
|
||||||
if ((artword == 'die' or artword == 'Die') and sentence[n][-1] != 'n') or ((artword == 'der' or artword == 'einer' or artword == 'dieser') and (sentence[n - 2] in ['von', 'in', 'auf', 'ueber', 'unter', 'nach', 'mit'])):
|
#print(sentence[n - 1],'oi')
|
||||||
|
if ((artword == 'die' or artword == 'Die') and sentence[n][-1] != 'n') or ((artword == 'der' or artword == 'einer' or artword == 'dieser') and (sentence[n - 2] in ['von', 'in', 'auf', 'ueber', 'unter', 'nach', 'mit'])):
|
||||||
if artword == 'der':
|
|
||||||
Nounthatis[0] = 'die'
|
if artword == 'der':
|
||||||
|
Nounthatis[0] = 'die'
|
||||||
donothing = 0
|
|
||||||
if sentence[n + 1] == 'mit':
|
donothing = 0
|
||||||
if sentence[n + 2] == 'den':
|
if sentence[n + 1] == 'mit':
|
||||||
verb = ' hat die '
|
if sentence[n + 2] == 'den':
|
||||||
Whatisnoun = Whatisnoun[2:]
|
verb = ' hat die '
|
||||||
if sentence[n + 2] == 'der':
|
Whatisnoun = Whatisnoun[2:]
|
||||||
verb = ' hat eine '
|
if sentence[n + 2] == 'der':
|
||||||
Whatisnoun = Whatisnoun[2:]
|
verb = ' hat eine '
|
||||||
if sentence[n + 2] != 'der' and sentence[n + 2] != 'den':
|
Whatisnoun = Whatisnoun[2:]
|
||||||
donothing = 1
|
if sentence[n + 2] != 'der' and sentence[n + 2] != 'den':
|
||||||
else:
|
donothing = 1
|
||||||
verb = ' ist '
|
|
||||||
if donothing == 0:
|
|
||||||
newsentence = ' '.join(Nounthatis) + verb + ' '.join(Whatisnoun)
|
|
||||||
|
|
||||||
|
|
||||||
newsentences.append([hauptindex + 1, newsentence.split()])
|
|
||||||
newpunctuations.append([hauptindex + 1, punctuations[hauptindex]])
|
|
||||||
if longerWhatisnoun == 0:
|
|
||||||
Whatisofnouns.append([n + 1, n + 4, hauptindex])
|
|
||||||
else:
|
else:
|
||||||
Whatisofnouns.append([n + 1, n + 5, hauptindex])
|
verb = ' ist '
|
||||||
|
if donothing == 0:
|
||||||
|
newsentence = ' '.join(Nounthatis) + verb + ' '.join(Whatisnoun)
|
||||||
|
|
||||||
|
|
||||||
|
newsentences.append([hauptindex + 1, newsentence.split()])
|
||||||
|
newpunctuations.append([hauptindex + 1, punctuations[hauptindex]])
|
||||||
|
if longerWhatisnoun == 0:
|
||||||
|
Whatisofnouns.append([n + 1, n + 4, hauptindex])
|
||||||
|
else:
|
||||||
|
Whatisofnouns.append([n + 1, n + 5, hauptindex])
|
||||||
|
except:
|
||||||
|
print('Konnte nicht ' + str(sentence) + 'in Characterisierung pro Satz prozessieren..')
|
||||||
|
try:
|
||||||
|
for whatis in Whatisofnouns[::-1]:
|
||||||
|
thereisacomma = 0
|
||||||
|
#print(sentences[whatis[2]][whatis[1] - 1])
|
||||||
|
if sentences[whatis[2]][whatis[1] - 1][-1] == ',':
|
||||||
|
|
||||||
|
thereisacomma = 1
|
||||||
|
if thereisacomma == 1:
|
||||||
|
#print(sentences[whatis[2]][whatis[0] - 1])
|
||||||
|
sentences[whatis[2]][whatis[0] - 1] = sentences[whatis[2]][whatis[0] - 1] + ','
|
||||||
|
del sentences[whatis[2]][whatis[0]:whatis[1]]
|
||||||
|
for newsent in newsentences[::-1]:
|
||||||
|
sentences.insert(newsent[0], newsent[1])
|
||||||
|
for newpunct in newpunctuations[::-1]:
|
||||||
|
punctuations.insert(newpunct[0], newpunct[1])
|
||||||
|
for sentence in sentences:
|
||||||
|
if sentence[-1][-1] == ',':
|
||||||
|
sentence[-1] = sentence[-1][:-1]
|
||||||
|
except:
|
||||||
|
print('konnte nicht die gesammelten Characterisierungen prozessieren')
|
||||||
|
sentences = oldsentences
|
||||||
|
punctuations = oldpunctuations
|
||||||
|
|
||||||
|
|
||||||
for whatis in Whatisofnouns[::-1]:
|
|
||||||
thereisacomma = 0
|
|
||||||
#print(sentences[whatis[2]][whatis[1] - 1])
|
|
||||||
if sentences[whatis[2]][whatis[1] - 1][-1] == ',':
|
|
||||||
|
|
||||||
thereisacomma = 1
|
|
||||||
if thereisacomma == 1:
|
|
||||||
#print(sentences[whatis[2]][whatis[0] - 1])
|
|
||||||
sentences[whatis[2]][whatis[0] - 1] = sentences[whatis[2]][whatis[0] - 1] + ','
|
|
||||||
del sentences[whatis[2]][whatis[0]:whatis[1]]
|
|
||||||
for newsent in newsentences[::-1]:
|
|
||||||
sentences.insert(newsent[0], newsent[1])
|
|
||||||
for newpunct in newpunctuations[::-1]:
|
|
||||||
punctuations.insert(newpunct[0], newpunct[1])
|
|
||||||
for sentence in sentences:
|
|
||||||
if sentence[-1][-1] == ',':
|
|
||||||
sentence[-1] = sentence[-1][:-1]
|
|
||||||
|
|
||||||
|
|
||||||
return sentences, punctuations
|
return sentences, punctuations
|
||||||
|
|
||||||
|
|
3
Prototyp/Verbesserungen/Input142.txt
Normal file
3
Prototyp/Verbesserungen/Input142.txt
Normal file
|
@ -0,0 +1,3 @@
|
||||||
|
er hat als Trainer im Fußball gearbeitet, bis er nicht mehr konnte .
|
||||||
|
seine Beine schmerzten zu sehr und er konnte nicht mehr lange stehen .
|
||||||
|
außerdem tat ihm auch seine Stimme weh, denn er musste immer soviel schreien, weil die Kinder nicht richtig Fußball spielten .
|
11
Prototyp/Verbesserungen/Output142.txt
Normal file
11
Prototyp/Verbesserungen/Output142.txt
Normal file
|
@ -0,0 +1,11 @@
|
||||||
|
er hat als Trainer im Fuss·ball gearbeitet .
|
||||||
|
ein Trainer ist eine Person .
|
||||||
|
ein Trainer leitet Menschen an .
|
||||||
|
zum Beispiel beim Sport .
|
||||||
|
das Wort Fuss·ball kann 3 verschiedene Bedeutungen haben .
|
||||||
|
Fuss·ball - die Ball-sport-art Fuss·ball - eine Zeitung Fuss·ball - das Sport-Geraet .
|
||||||
|
bis er geringerer konnte .
|
||||||
|
seine Beine schmerzten zu sehr und er konnte geringerer lange stehen .
|
||||||
|
ausserdem tat ihm auch seine Stimme weh .
|
||||||
|
denn er musste immer soviel schreien .
|
||||||
|
weil die Kinder pseudo Fuss·ball spielten .
|
12
Prototyp/Verbesserungen/Verbesserungen142.txt
Normal file
12
Prototyp/Verbesserungen/Verbesserungen142.txt
Normal file
|
@ -0,0 +1,12 @@
|
||||||
|
er hat als Trainer im Fuss·ball gearbeitet .
|
||||||
|
ein Trainer ist eine Person .
|
||||||
|
ein Trainer leitet Menschen an .
|
||||||
|
zum Beispiel beim Sport .
|
||||||
|
das Wort Fuss·ball kann 3 verschiedene Bedeutungen haben .
|
||||||
|
Fuss·ball - die Ball•sport•art, Fuss·ball - eine Zeitung, Fuss·ball - das Sport•gerät .
|
||||||
|
er konnte nicht mehr .
|
||||||
|
seine Beine schmerzten zu sehr .
|
||||||
|
und er konnte nicht mehr lange stehen .
|
||||||
|
ausserdem tat ihm auch seine Stimme weh .
|
||||||
|
denn er musste immer soviel schreien .
|
||||||
|
wenn die Kinder Fuss·ball spielten .
|
|
@ -1 +1 @@
|
||||||
141
|
142
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Loading…
Reference in a new issue