Link zum Code eingefuegt | Verbessertes Errorhandling fuer zwei FremdWB und CharAppend
This commit is contained in:
parent
2487d6de07
commit
441eeed1d6
15 changed files with 351 additions and 157 deletions
|
@ -36,7 +36,26 @@
|
|||
{
|
||||
"data": {
|
||||
"text/html": [
|
||||
"<img src=\"brainBasaBuuka5.png\" width=\"300\" height=\"200\" align=\"center\"/>\n"
|
||||
"<style>\n",
|
||||
"\n",
|
||||
".center {\n",
|
||||
" display: block;\n",
|
||||
" margin-left: auto;\n",
|
||||
" margin-right: auto;\n",
|
||||
" width: 20%;\n",
|
||||
"}\n",
|
||||
"\n",
|
||||
"body {\n",
|
||||
" align: center;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
"</style>\n",
|
||||
"<body>\n",
|
||||
"<a href=\"https://basabuuka.zapto.org/alpcentaur/Basabuuka_Prototyp\">\n",
|
||||
"<img src=\"brainBasaBuuka5.png\" width=\"300\" height=\"200\" class=\"center\"/>\n",
|
||||
"</a>\n",
|
||||
"<p style=\"font-family: courier\"><center><b>Du kommst zu dem Code - wenn du auf das Logo klickst!</b></center></p> \n",
|
||||
"</body>\n"
|
||||
],
|
||||
"text/plain": [
|
||||
"<IPython.core.display.HTML object>"
|
||||
|
@ -48,7 +67,35 @@
|
|||
],
|
||||
"source": [
|
||||
"%%html\n",
|
||||
"<img src=\"brainBasaBuuka5.png\" width=\"300\" height=\"200\" align=\"center\"/>"
|
||||
"<style>\n",
|
||||
"\n",
|
||||
".center {\n",
|
||||
" display: block;\n",
|
||||
" margin-left: auto;\n",
|
||||
" margin-right: auto;\n",
|
||||
" width: 20%;\n",
|
||||
"}\n",
|
||||
"\n",
|
||||
"body {\n",
|
||||
" align: center;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
"</style>\n",
|
||||
"<body>\n",
|
||||
"<a href=\"https://basabuuka.zapto.org/alpcentaur/Basabuuka_Prototyp\">\n",
|
||||
"<img src=\"brainBasaBuuka5.png\" width=\"300\" height=\"200\" class=\"center\"/>\n",
|
||||
"</a>\n",
|
||||
"<p style=\"font-family: courier\"><center><b>Du kommst zu dem Code - wenn du auf das Logo klickst!</b></center></p> \n",
|
||||
"</body>"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -66,7 +113,8 @@
|
|||
" font-family: courier;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
"</style>\n"
|
||||
"</style>\n",
|
||||
"\n"
|
||||
],
|
||||
"text/plain": [
|
||||
"<IPython.core.display.HTML object>"
|
||||
|
@ -85,7 +133,8 @@
|
|||
" font-family: courier;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
"</style>\n"
|
||||
"</style>\n",
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -188,10 +237,27 @@
|
|||
"scrolled": true
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"loading SentSeg Databases\n",
|
||||
"Creating the bag of words...\n",
|
||||
"\n",
|
||||
"dumping the data to hkl format..\n",
|
||||
"done\n",
|
||||
"Creating the bag of words...\n",
|
||||
"\n",
|
||||
"dumping the data to hkl format..\n",
|
||||
"done\n",
|
||||
"dumping the session\n",
|
||||
"done\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"application/vnd.jupyter.widget-view+json": {
|
||||
"model_id": "c0fcb1a9556e4d54a43fd7a969210844",
|
||||
"model_id": "6793c5121aaf498e8960726a40709e19",
|
||||
"version_major": 2,
|
||||
"version_minor": 0
|
||||
},
|
||||
|
@ -249,6 +315,15 @@
|
|||
"import dill\n",
|
||||
"dill.load_session('voilastate.db')\n",
|
||||
"\n",
|
||||
"#import SentSeg\n",
|
||||
"#sent_seg = SentSeg.SentSeg('de')\n",
|
||||
"#print('loading SentSeg Databases')\n",
|
||||
"#sent_seg.LoadSentGlueSGDandGSUtils()\n",
|
||||
"\n",
|
||||
"#from FremdWB import *\n",
|
||||
"#fwb = FremdWB(None,None)\n",
|
||||
"#fwb.load_DB_into_FASTsearch()\n",
|
||||
"\n",
|
||||
"#from Medio import *\n",
|
||||
"#medi = Medio(None,None)\n",
|
||||
"#medi.load_DB_into_FASTsearch()\n",
|
||||
|
@ -395,7 +470,7 @@
|
|||
{
|
||||
"data": {
|
||||
"application/vnd.jupyter.widget-view+json": {
|
||||
"model_id": "f49a3f799a884277ab40f1839c8c1afd",
|
||||
"model_id": "c833de5ff5d340bbb1988584eee0c368",
|
||||
"version_major": 2,
|
||||
"version_minor": 0
|
||||
},
|
||||
|
@ -594,7 +669,7 @@
|
|||
{
|
||||
"data": {
|
||||
"application/vnd.jupyter.widget-view+json": {
|
||||
"model_id": "4c00c7b9b76e493481cb078f77f50258",
|
||||
"model_id": "aa136a24ef044b4fb5d10f6c9278d35f",
|
||||
"version_major": 2,
|
||||
"version_minor": 0
|
||||
},
|
||||
|
@ -634,7 +709,7 @@
|
|||
{
|
||||
"data": {
|
||||
"application/vnd.jupyter.widget-view+json": {
|
||||
"model_id": "9e7fab660d534ed7925dd0d21af74957",
|
||||
"model_id": "1474be19da7a4b1bbd7fee229dd5a8ee",
|
||||
"version_major": 2,
|
||||
"version_minor": 0
|
||||
},
|
||||
|
@ -660,7 +735,7 @@
|
|||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"beschreibung = 'Textfeld für die verbesserte Übersetzung. Satzzeichen bitte nicht vergessen! \\nFalls ein neuer Eintrag ins Fremdwörterbuch vorgenommen werden soll, bitte unter die Verbesserungen einen Eintrag der Form: [\\'Abfall\\', \\'Abfall sind Sachen die wir nicht mehr brauchen\\']' \n",
|
||||
"beschreibung = 'Textfeld für die verbesserte Übersetzung. Satzzeichen bitte nicht vergessen! \\nFalls ein neuer Eintrag ins Fremdwörterbuch vorgenommen werden soll, schreibe einfach unter die Verbesserungen einen Eintrag der Form: [\\'Abfall\\', \\'Abfall sind Sachen die wir nicht mehr brauchen\\'] \\nFalls ein neuer Eintrag in die Mediopunkte-Datenbank vorgenommen werden soll, bitte schreibe unter die Verbesserungen einen Eintrag der Form: [\\'Mediopunkt\\', \\'Medio·punkt\\']' \n",
|
||||
"VerbeTextFeld = ipywidgets.Textarea(placeholder=beschreibung, disabled=False)\n",
|
||||
"VerbeTextFeld.layout.height = '180px'\n",
|
||||
"VerbeTextFeld.layout.width = '99%'\n",
|
||||
|
@ -756,7 +831,7 @@
|
|||
{
|
||||
"data": {
|
||||
"application/vnd.jupyter.widget-view+json": {
|
||||
"model_id": "6408b21ab24c482fa3ddc9e047592bb8",
|
||||
"model_id": "c2f3338821ae4ee59d205af8cb1083a8",
|
||||
"version_major": 2,
|
||||
"version_minor": 0
|
||||
},
|
||||
|
@ -782,7 +857,7 @@
|
|||
{
|
||||
"data": {
|
||||
"application/vnd.jupyter.widget-view+json": {
|
||||
"model_id": "7702fbe3ca5b4041a3d4e9b5167d8f38",
|
||||
"model_id": "1182e1d2f6c44b9ab0d50c9d388a2765",
|
||||
"version_major": 2,
|
||||
"version_minor": 0
|
||||
},
|
||||
|
|
Binary file not shown.
Binary file not shown.
|
@ -125,6 +125,8 @@ class FremdWB(object):
|
|||
sentencecount = 0
|
||||
alleeintraege = []
|
||||
for sentence in sentences:
|
||||
oldpunctuations = punctuations
|
||||
try:
|
||||
#print('sentence', sentence)
|
||||
sentencecount += 1
|
||||
#print('processing sentence', sentencecount)
|
||||
|
@ -173,6 +175,11 @@ class FremdWB(object):
|
|||
|
||||
|
||||
#print('the endsentence',sentence)
|
||||
except:
|
||||
print('konnte nicht' + str(sentence) + 'in FremdWB prozessieren..')
|
||||
if sentence != outsentences[-1]:
|
||||
outsentences.append(sentence)
|
||||
punctuations = oldpunctuations
|
||||
return outsentences, punctuations
|
||||
|
||||
|
|
@ -36,7 +36,26 @@
|
|||
{
|
||||
"data": {
|
||||
"text/html": [
|
||||
"<img src=\"brainBasaBuuka5.png\" width=\"300\" height=\"200\" align=\"center\"/>\n"
|
||||
"<style>\n",
|
||||
"\n",
|
||||
".center {\n",
|
||||
" display: block;\n",
|
||||
" margin-left: auto;\n",
|
||||
" margin-right: auto;\n",
|
||||
" width: 20%;\n",
|
||||
"}\n",
|
||||
"\n",
|
||||
"body {\n",
|
||||
" align: center;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
"</style>\n",
|
||||
"<body>\n",
|
||||
"<a href=\"https://basabuuka.zapto.org/alpcentaur/Basabuuka_Prototyp\">\n",
|
||||
"<img src=\"brainBasaBuuka5.png\" width=\"300\" height=\"200\" class=\"center\"/>\n",
|
||||
"</a>\n",
|
||||
"<p style=\"font-family: courier\"><center><b>Du kommst zu dem Code - wenn du auf das Logo klickst!</b></center></p> \n",
|
||||
"</body>\n"
|
||||
],
|
||||
"text/plain": [
|
||||
"<IPython.core.display.HTML object>"
|
||||
|
@ -48,7 +67,35 @@
|
|||
],
|
||||
"source": [
|
||||
"%%html\n",
|
||||
"<img src=\"brainBasaBuuka5.png\" width=\"300\" height=\"200\" align=\"center\"/>"
|
||||
"<style>\n",
|
||||
"\n",
|
||||
".center {\n",
|
||||
" display: block;\n",
|
||||
" margin-left: auto;\n",
|
||||
" margin-right: auto;\n",
|
||||
" width: 20%;\n",
|
||||
"}\n",
|
||||
"\n",
|
||||
"body {\n",
|
||||
" align: center;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
"</style>\n",
|
||||
"<body>\n",
|
||||
"<a href=\"https://basabuuka.zapto.org/alpcentaur/Basabuuka_Prototyp\">\n",
|
||||
"<img src=\"brainBasaBuuka5.png\" width=\"300\" height=\"200\" class=\"center\"/>\n",
|
||||
"</a>\n",
|
||||
"<p style=\"font-family: courier\"><center><b>Du kommst zu dem Code - wenn du auf das Logo klickst!</b></center></p> \n",
|
||||
"</body>"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -66,7 +113,8 @@
|
|||
" font-family: courier;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
"</style>\n"
|
||||
"</style>\n",
|
||||
"\n"
|
||||
],
|
||||
"text/plain": [
|
||||
"<IPython.core.display.HTML object>"
|
||||
|
@ -85,7 +133,8 @@
|
|||
" font-family: courier;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
"</style>\n"
|
||||
"</style>\n",
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -188,10 +237,27 @@
|
|||
"scrolled": true
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"loading SentSeg Databases\n",
|
||||
"Creating the bag of words...\n",
|
||||
"\n",
|
||||
"dumping the data to hkl format..\n",
|
||||
"done\n",
|
||||
"Creating the bag of words...\n",
|
||||
"\n",
|
||||
"dumping the data to hkl format..\n",
|
||||
"done\n",
|
||||
"dumping the session\n",
|
||||
"done\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"application/vnd.jupyter.widget-view+json": {
|
||||
"model_id": "c0fcb1a9556e4d54a43fd7a969210844",
|
||||
"model_id": "6793c5121aaf498e8960726a40709e19",
|
||||
"version_major": 2,
|
||||
"version_minor": 0
|
||||
},
|
||||
|
@ -249,6 +315,15 @@
|
|||
"import dill\n",
|
||||
"dill.load_session('voilastate.db')\n",
|
||||
"\n",
|
||||
"#import SentSeg\n",
|
||||
"#sent_seg = SentSeg.SentSeg('de')\n",
|
||||
"#print('loading SentSeg Databases')\n",
|
||||
"#sent_seg.LoadSentGlueSGDandGSUtils()\n",
|
||||
"\n",
|
||||
"#from FremdWB import *\n",
|
||||
"#fwb = FremdWB(None,None)\n",
|
||||
"#fwb.load_DB_into_FASTsearch()\n",
|
||||
"\n",
|
||||
"#from Medio import *\n",
|
||||
"#medi = Medio(None,None)\n",
|
||||
"#medi.load_DB_into_FASTsearch()\n",
|
||||
|
@ -395,7 +470,7 @@
|
|||
{
|
||||
"data": {
|
||||
"application/vnd.jupyter.widget-view+json": {
|
||||
"model_id": "f49a3f799a884277ab40f1839c8c1afd",
|
||||
"model_id": "c833de5ff5d340bbb1988584eee0c368",
|
||||
"version_major": 2,
|
||||
"version_minor": 0
|
||||
},
|
||||
|
@ -594,7 +669,7 @@
|
|||
{
|
||||
"data": {
|
||||
"application/vnd.jupyter.widget-view+json": {
|
||||
"model_id": "4c00c7b9b76e493481cb078f77f50258",
|
||||
"model_id": "aa136a24ef044b4fb5d10f6c9278d35f",
|
||||
"version_major": 2,
|
||||
"version_minor": 0
|
||||
},
|
||||
|
@ -634,7 +709,7 @@
|
|||
{
|
||||
"data": {
|
||||
"application/vnd.jupyter.widget-view+json": {
|
||||
"model_id": "9e7fab660d534ed7925dd0d21af74957",
|
||||
"model_id": "1474be19da7a4b1bbd7fee229dd5a8ee",
|
||||
"version_major": 2,
|
||||
"version_minor": 0
|
||||
},
|
||||
|
@ -660,7 +735,7 @@
|
|||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"beschreibung = 'Textfeld für die verbesserte Übersetzung. Satzzeichen bitte nicht vergessen! \\nFalls ein neuer Eintrag ins Fremdwörterbuch vorgenommen werden soll, bitte unter die Verbesserungen einen Eintrag der Form: [\\'Abfall\\', \\'Abfall sind Sachen die wir nicht mehr brauchen\\']' \n",
|
||||
"beschreibung = 'Textfeld für die verbesserte Übersetzung. Satzzeichen bitte nicht vergessen! \\nFalls ein neuer Eintrag ins Fremdwörterbuch vorgenommen werden soll, schreibe einfach unter die Verbesserungen einen Eintrag der Form: [\\'Abfall\\', \\'Abfall sind Sachen die wir nicht mehr brauchen\\'] \\nFalls ein neuer Eintrag in die Mediopunkte-Datenbank vorgenommen werden soll, bitte schreibe unter die Verbesserungen einen Eintrag der Form: [\\'Mediopunkt\\', \\'Medio·punkt\\']' \n",
|
||||
"VerbeTextFeld = ipywidgets.Textarea(placeholder=beschreibung, disabled=False)\n",
|
||||
"VerbeTextFeld.layout.height = '180px'\n",
|
||||
"VerbeTextFeld.layout.width = '99%'\n",
|
||||
|
@ -756,7 +831,7 @@
|
|||
{
|
||||
"data": {
|
||||
"application/vnd.jupyter.widget-view+json": {
|
||||
"model_id": "6408b21ab24c482fa3ddc9e047592bb8",
|
||||
"model_id": "c2f3338821ae4ee59d205af8cb1083a8",
|
||||
"version_major": 2,
|
||||
"version_minor": 0
|
||||
},
|
||||
|
@ -782,7 +857,7 @@
|
|||
{
|
||||
"data": {
|
||||
"application/vnd.jupyter.widget-view+json": {
|
||||
"model_id": "7702fbe3ca5b4041a3d4e9b5167d8f38",
|
||||
"model_id": "1182e1d2f6c44b9ab0d50c9d388a2765",
|
||||
"version_major": 2,
|
||||
"version_minor": 0
|
||||
},
|
||||
|
|
|
@ -2110,8 +2110,12 @@ class SentSeg(object):
|
|||
newsentences = []
|
||||
newpunctuations = []
|
||||
Whatisofnouns = []
|
||||
oldsentences = sentences
|
||||
oldpunctuations = punctuations
|
||||
for hauptindex in range(len(sentences)):
|
||||
|
||||
sentence = sentences[hauptindex]
|
||||
try:
|
||||
#for triple in triples:
|
||||
# AnnoOrNot, tripleInWords = gs.checkForAnnotationTriple(sentence, triple, 'word.tag_', 'None')
|
||||
# for tripleinwor in tripleInWords:
|
||||
|
@ -2181,7 +2185,9 @@ class SentSeg(object):
|
|||
Whatisofnouns.append([n + 1, n + 4, hauptindex])
|
||||
else:
|
||||
Whatisofnouns.append([n + 1, n + 5, hauptindex])
|
||||
|
||||
except:
|
||||
print('Konnte nicht ' + str(sentence) + 'in Characterisierung pro Satz prozessieren..')
|
||||
try:
|
||||
for whatis in Whatisofnouns[::-1]:
|
||||
thereisacomma = 0
|
||||
#print(sentences[whatis[2]][whatis[1] - 1])
|
||||
|
@ -2199,6 +2205,11 @@ class SentSeg(object):
|
|||
for sentence in sentences:
|
||||
if sentence[-1][-1] == ',':
|
||||
sentence[-1] = sentence[-1][:-1]
|
||||
except:
|
||||
print('konnte nicht die gesammelten Characterisierungen prozessieren')
|
||||
sentences = oldsentences
|
||||
punctuations = oldpunctuations
|
||||
|
||||
|
||||
|
||||
return sentences, punctuations
|
||||
|
|
3
Prototyp/Verbesserungen/Input142.txt
Normal file
3
Prototyp/Verbesserungen/Input142.txt
Normal file
|
@ -0,0 +1,3 @@
|
|||
er hat als Trainer im Fußball gearbeitet, bis er nicht mehr konnte .
|
||||
seine Beine schmerzten zu sehr und er konnte nicht mehr lange stehen .
|
||||
außerdem tat ihm auch seine Stimme weh, denn er musste immer soviel schreien, weil die Kinder nicht richtig Fußball spielten .
|
11
Prototyp/Verbesserungen/Output142.txt
Normal file
11
Prototyp/Verbesserungen/Output142.txt
Normal file
|
@ -0,0 +1,11 @@
|
|||
er hat als Trainer im Fuss·ball gearbeitet .
|
||||
ein Trainer ist eine Person .
|
||||
ein Trainer leitet Menschen an .
|
||||
zum Beispiel beim Sport .
|
||||
das Wort Fuss·ball kann 3 verschiedene Bedeutungen haben .
|
||||
Fuss·ball - die Ball-sport-art Fuss·ball - eine Zeitung Fuss·ball - das Sport-Geraet .
|
||||
bis er geringerer konnte .
|
||||
seine Beine schmerzten zu sehr und er konnte geringerer lange stehen .
|
||||
ausserdem tat ihm auch seine Stimme weh .
|
||||
denn er musste immer soviel schreien .
|
||||
weil die Kinder pseudo Fuss·ball spielten .
|
12
Prototyp/Verbesserungen/Verbesserungen142.txt
Normal file
12
Prototyp/Verbesserungen/Verbesserungen142.txt
Normal file
|
@ -0,0 +1,12 @@
|
|||
er hat als Trainer im Fuss·ball gearbeitet .
|
||||
ein Trainer ist eine Person .
|
||||
ein Trainer leitet Menschen an .
|
||||
zum Beispiel beim Sport .
|
||||
das Wort Fuss·ball kann 3 verschiedene Bedeutungen haben .
|
||||
Fuss·ball - die Ball•sport•art, Fuss·ball - eine Zeitung, Fuss·ball - das Sport•gerät .
|
||||
er konnte nicht mehr .
|
||||
seine Beine schmerzten zu sehr .
|
||||
und er konnte nicht mehr lange stehen .
|
||||
ausserdem tat ihm auch seine Stimme weh .
|
||||
denn er musste immer soviel schreien .
|
||||
wenn die Kinder Fuss·ball spielten .
|
|
@ -1 +1 @@
|
|||
141
|
||||
142
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Loading…
Reference in a new issue