Folgefehler des implementierten Error Handling behoben + Spezialfall doppelte Abkuerzung am Satzende Fehler behoben

This commit is contained in:
alpcentaur 2020-09-17 14:40:50 +02:00
parent 441eeed1d6
commit ed40090463
15 changed files with 70 additions and 37 deletions

View file

@ -241,7 +241,7 @@
"name": "stdout", "name": "stdout",
"output_type": "stream", "output_type": "stream",
"text": [ "text": [
"loading SentSeg Databases\n", "loading SolveShorts Databases\n",
"Creating the bag of words...\n", "Creating the bag of words...\n",
"\n", "\n",
"dumping the data to hkl format..\n", "dumping the data to hkl format..\n",
@ -257,7 +257,7 @@
{ {
"data": { "data": {
"application/vnd.jupyter.widget-view+json": { "application/vnd.jupyter.widget-view+json": {
"model_id": "6793c5121aaf498e8960726a40709e19", "model_id": "82646fa586ba44aabc1608ec7a268b2c",
"version_major": 2, "version_major": 2,
"version_minor": 0 "version_minor": 0
}, },
@ -315,6 +315,10 @@
"import dill\n", "import dill\n",
"dill.load_session('voilastate.db')\n", "dill.load_session('voilastate.db')\n",
"\n", "\n",
"#from SolveShorts import *\n",
"#print('loading SolveShorts Databases')\n",
"#solSh.load_DB_into_FASTsearch()\n",
"\n",
"#import SentSeg\n", "#import SentSeg\n",
"#sent_seg = SentSeg.SentSeg('de')\n", "#sent_seg = SentSeg.SentSeg('de')\n",
"#print('loading SentSeg Databases')\n", "#print('loading SentSeg Databases')\n",
@ -440,7 +444,7 @@
"outputs": [], "outputs": [],
"source": [ "source": [
"import ipywidgets\n", "import ipywidgets\n",
"out = 'Hier kommt der übersetzte Text raus.'\n", "out = 'Hier kommt der übersetzte Text heraus.'\n",
"\n", "\n",
"TextFeld.add_class(\"thotext\")\n", "TextFeld.add_class(\"thotext\")\n",
"TextFelddouble.add_class(\"thotext\")\n", "TextFelddouble.add_class(\"thotext\")\n",
@ -470,7 +474,7 @@
{ {
"data": { "data": {
"application/vnd.jupyter.widget-view+json": { "application/vnd.jupyter.widget-view+json": {
"model_id": "c833de5ff5d340bbb1988584eee0c368", "model_id": "9e547f27f67f484c9b455ead6f63afb2",
"version_major": 2, "version_major": 2,
"version_minor": 0 "version_minor": 0
}, },
@ -607,7 +611,7 @@
" check = 1\n", " check = 1\n",
" \n", " \n",
" #print('sentences after cs', outsentences)\n", " #print('sentences after cs', outsentences)\n",
" #print(len(punctuations))\n", " print(len(punctuations))\n",
" \n", " \n",
" if FremdWB.value == True:\n", " if FremdWB.value == True:\n",
" if check == 1:\n", " if check == 1:\n",
@ -617,6 +621,7 @@
" outsentences, punctuations = fwb.fremdEintragAppend(insentences, punctuations)\n", " outsentences, punctuations = fwb.fremdEintragAppend(insentences, punctuations)\n",
" #print('outsentences')\n", " #print('outsentences')\n",
" check = 1\n", " check = 1\n",
" \n",
" if Medio.value == True:\n", " if Medio.value == True:\n",
" if check == 1:\n", " if check == 1:\n",
" insentences = outsentences\n", " insentences = outsentences\n",
@ -630,7 +635,7 @@
" \n", " \n",
" #print('sentences after fwb', outsentences)\n", " #print('sentences after fwb', outsentences)\n",
" \n", " \n",
" #print(len(punctuations))\n", " print(len(punctuations))\n",
" \n", " \n",
" if check == 1:\n", " if check == 1:\n",
" out = ''\n", " out = ''\n",
@ -669,12 +674,12 @@
{ {
"data": { "data": {
"application/vnd.jupyter.widget-view+json": { "application/vnd.jupyter.widget-view+json": {
"model_id": "aa136a24ef044b4fb5d10f6c9278d35f", "model_id": "0d27a028dcb449e2a2a6a7dfd25acd49",
"version_major": 2, "version_major": 2,
"version_minor": 0 "version_minor": 0
}, },
"text/plain": [ "text/plain": [
"interactive(children=(Button(description='Übersetzen', style=ButtonStyle()), Output()), _dom_classes=('widget-…" "interactive(children=(Button(description='Übersetzen in Leichte Sprache', style=ButtonStyle()), Output()), _do…"
] ]
}, },
"metadata": {}, "metadata": {},
@ -690,7 +695,7 @@
"\n", "\n",
"#print(widgets.interact_manual.opts)\n", "#print(widgets.interact_manual.opts)\n",
"\n", "\n",
"widgets.interact_manual.opts['manual_name']= 'Übersetzen'\n", "widgets.interact_manual.opts['manual_name']= 'Übersetzen in Leichte Sprache'\n",
"ola = widgets.interact_manual(function)\n", "ola = widgets.interact_manual(function)\n",
"ola.widget.children[0].style.button_color = 'lightgreen'\n", "ola.widget.children[0].style.button_color = 'lightgreen'\n",
"ola.widget.children[0].layout.height = '50px'\n", "ola.widget.children[0].layout.height = '50px'\n",
@ -709,12 +714,12 @@
{ {
"data": { "data": {
"application/vnd.jupyter.widget-view+json": { "application/vnd.jupyter.widget-view+json": {
"model_id": "1474be19da7a4b1bbd7fee229dd5a8ee", "model_id": "564058b35ab743fabff90d4c49c5aac3",
"version_major": 2, "version_major": 2,
"version_minor": 0 "version_minor": 0
}, },
"text/plain": [ "text/plain": [
"Textarea(value='', layout=Layout(height='180px', width='99%'), placeholder='Hier kommt der übersetzte Text rau…" "Textarea(value='', layout=Layout(height='180px', width='99%'), placeholder='Hier kommt der übersetzte Text her…"
] ]
}, },
"metadata": {}, "metadata": {},
@ -831,7 +836,7 @@
{ {
"data": { "data": {
"application/vnd.jupyter.widget-view+json": { "application/vnd.jupyter.widget-view+json": {
"model_id": "c2f3338821ae4ee59d205af8cb1083a8", "model_id": "2e67ffb1c4ec4ddeb2c18935f4d0fdc4",
"version_major": 2, "version_major": 2,
"version_minor": 0 "version_minor": 0
}, },
@ -857,7 +862,7 @@
{ {
"data": { "data": {
"application/vnd.jupyter.widget-view+json": { "application/vnd.jupyter.widget-view+json": {
"model_id": "1182e1d2f6c44b9ab0d50c9d388a2765", "model_id": "f8e8a92efa8e41bbb3efe44c35c37ec1",
"version_major": 2, "version_major": 2,
"version_minor": 0 "version_minor": 0
}, },

View file

@ -176,8 +176,9 @@ class FremdWB(object):
#print('the endsentence',sentence) #print('the endsentence',sentence)
except: except:
print('konnte nicht' + str(sentence) + 'in FremdWB prozessieren..') #print('konnte nicht' + str(sentence) + 'in FremdWB prozessieren..')
if sentence != outsentences[-1]: #print('outsentence und co ', outsentences[-1], eintrag, sentence)
if sentence != outsentences[-1] and alleeintraege[-1] != outsentences[-1]:
outsentences.append(sentence) outsentences.append(sentence)
punctuations = oldpunctuations punctuations = oldpunctuations
return outsentences, punctuations return outsentences, punctuations

View file

@ -241,7 +241,7 @@
"name": "stdout", "name": "stdout",
"output_type": "stream", "output_type": "stream",
"text": [ "text": [
"loading SentSeg Databases\n", "loading SolveShorts Databases\n",
"Creating the bag of words...\n", "Creating the bag of words...\n",
"\n", "\n",
"dumping the data to hkl format..\n", "dumping the data to hkl format..\n",
@ -257,7 +257,7 @@
{ {
"data": { "data": {
"application/vnd.jupyter.widget-view+json": { "application/vnd.jupyter.widget-view+json": {
"model_id": "6793c5121aaf498e8960726a40709e19", "model_id": "82646fa586ba44aabc1608ec7a268b2c",
"version_major": 2, "version_major": 2,
"version_minor": 0 "version_minor": 0
}, },
@ -315,6 +315,10 @@
"import dill\n", "import dill\n",
"dill.load_session('voilastate.db')\n", "dill.load_session('voilastate.db')\n",
"\n", "\n",
"#from SolveShorts import *\n",
"#print('loading SolveShorts Databases')\n",
"#solSh.load_DB_into_FASTsearch()\n",
"\n",
"#import SentSeg\n", "#import SentSeg\n",
"#sent_seg = SentSeg.SentSeg('de')\n", "#sent_seg = SentSeg.SentSeg('de')\n",
"#print('loading SentSeg Databases')\n", "#print('loading SentSeg Databases')\n",
@ -440,7 +444,7 @@
"outputs": [], "outputs": [],
"source": [ "source": [
"import ipywidgets\n", "import ipywidgets\n",
"out = 'Hier kommt der übersetzte Text raus.'\n", "out = 'Hier kommt der übersetzte Text heraus.'\n",
"\n", "\n",
"TextFeld.add_class(\"thotext\")\n", "TextFeld.add_class(\"thotext\")\n",
"TextFelddouble.add_class(\"thotext\")\n", "TextFelddouble.add_class(\"thotext\")\n",
@ -470,7 +474,7 @@
{ {
"data": { "data": {
"application/vnd.jupyter.widget-view+json": { "application/vnd.jupyter.widget-view+json": {
"model_id": "c833de5ff5d340bbb1988584eee0c368", "model_id": "9e547f27f67f484c9b455ead6f63afb2",
"version_major": 2, "version_major": 2,
"version_minor": 0 "version_minor": 0
}, },
@ -607,7 +611,7 @@
" check = 1\n", " check = 1\n",
" \n", " \n",
" #print('sentences after cs', outsentences)\n", " #print('sentences after cs', outsentences)\n",
" #print(len(punctuations))\n", " print(len(punctuations))\n",
" \n", " \n",
" if FremdWB.value == True:\n", " if FremdWB.value == True:\n",
" if check == 1:\n", " if check == 1:\n",
@ -617,6 +621,7 @@
" outsentences, punctuations = fwb.fremdEintragAppend(insentences, punctuations)\n", " outsentences, punctuations = fwb.fremdEintragAppend(insentences, punctuations)\n",
" #print('outsentences')\n", " #print('outsentences')\n",
" check = 1\n", " check = 1\n",
" \n",
" if Medio.value == True:\n", " if Medio.value == True:\n",
" if check == 1:\n", " if check == 1:\n",
" insentences = outsentences\n", " insentences = outsentences\n",
@ -630,7 +635,7 @@
" \n", " \n",
" #print('sentences after fwb', outsentences)\n", " #print('sentences after fwb', outsentences)\n",
" \n", " \n",
" #print(len(punctuations))\n", " print(len(punctuations))\n",
" \n", " \n",
" if check == 1:\n", " if check == 1:\n",
" out = ''\n", " out = ''\n",
@ -669,12 +674,12 @@
{ {
"data": { "data": {
"application/vnd.jupyter.widget-view+json": { "application/vnd.jupyter.widget-view+json": {
"model_id": "aa136a24ef044b4fb5d10f6c9278d35f", "model_id": "0d27a028dcb449e2a2a6a7dfd25acd49",
"version_major": 2, "version_major": 2,
"version_minor": 0 "version_minor": 0
}, },
"text/plain": [ "text/plain": [
"interactive(children=(Button(description='Übersetzen', style=ButtonStyle()), Output()), _dom_classes=('widget-…" "interactive(children=(Button(description='Übersetzen in Leichte Sprache', style=ButtonStyle()), Output()), _do…"
] ]
}, },
"metadata": {}, "metadata": {},
@ -690,7 +695,7 @@
"\n", "\n",
"#print(widgets.interact_manual.opts)\n", "#print(widgets.interact_manual.opts)\n",
"\n", "\n",
"widgets.interact_manual.opts['manual_name']= 'Übersetzen'\n", "widgets.interact_manual.opts['manual_name']= 'Übersetzen in Leichte Sprache'\n",
"ola = widgets.interact_manual(function)\n", "ola = widgets.interact_manual(function)\n",
"ola.widget.children[0].style.button_color = 'lightgreen'\n", "ola.widget.children[0].style.button_color = 'lightgreen'\n",
"ola.widget.children[0].layout.height = '50px'\n", "ola.widget.children[0].layout.height = '50px'\n",
@ -709,12 +714,12 @@
{ {
"data": { "data": {
"application/vnd.jupyter.widget-view+json": { "application/vnd.jupyter.widget-view+json": {
"model_id": "1474be19da7a4b1bbd7fee229dd5a8ee", "model_id": "564058b35ab743fabff90d4c49c5aac3",
"version_major": 2, "version_major": 2,
"version_minor": 0 "version_minor": 0
}, },
"text/plain": [ "text/plain": [
"Textarea(value='', layout=Layout(height='180px', width='99%'), placeholder='Hier kommt der übersetzte Text rau…" "Textarea(value='', layout=Layout(height='180px', width='99%'), placeholder='Hier kommt der übersetzte Text her…"
] ]
}, },
"metadata": {}, "metadata": {},
@ -831,7 +836,7 @@
{ {
"data": { "data": {
"application/vnd.jupyter.widget-view+json": { "application/vnd.jupyter.widget-view+json": {
"model_id": "c2f3338821ae4ee59d205af8cb1083a8", "model_id": "2e67ffb1c4ec4ddeb2c18935f4d0fdc4",
"version_major": 2, "version_major": 2,
"version_minor": 0 "version_minor": 0
}, },
@ -857,7 +862,7 @@
{ {
"data": { "data": {
"application/vnd.jupyter.widget-view+json": { "application/vnd.jupyter.widget-view+json": {
"model_id": "1182e1d2f6c44b9ab0d50c9d388a2765", "model_id": "f8e8a92efa8e41bbb3efe44c35c37ec1",
"version_major": 2, "version_major": 2,
"version_minor": 0 "version_minor": 0
}, },

View file

@ -91,14 +91,19 @@ class SolveShorts(object):
doc = self.nlp(' '.join(sentence)) doc = self.nlp(' '.join(sentence))
#print('da sentence', sentence) #print('da sentence', sentence)
newshorts = [] newshorts = []
for word in sentence: wordcount = 0
for oriword in sentence:
wordcount += 1
if wordcount == len(sentence):
word = oriword + '.'
else:
word = oriword
newshort = [] newshort = []
prenewshort = [] prenewshort = []
punctcount = list(word).count('.') punctcount = list(word).count('.')
#print(word, list(word), punctcount) #print(word, list(word), punctcount)
if punctcount > 1: if punctcount > 1:
replaceindex = sentence.index(word) replaceindex = sentence.index(oriword)
dacount = 0 dacount = 0
for letter in list(word): for letter in list(word):
#print('letter in word split', letter) #print('letter in word split', letter)
@ -153,9 +158,16 @@ class SolveShorts(object):
interestingindex = m interestingindex = m
break break
if len(sentence) - n <= 5 and n != len(sentence) - 1: if len(sentence) - n <= 5 and n != len(sentence) - 1:
for m in range((len(sentence) - n) - 1): for m in range((len(sentence) - n)):
#print(n, m, n+m+1, len(sentence)) #print('oleolaolu',n, m, n+m+1, len(sentence))
if sentence[n + m][-1] == '.' and sentence[n + m + 1][-1] != '.':
if m == (len(sentence) - n) - 1:
if sentence[n + m][-1] == '.':
interestingindex = m
break
else:
if sentence[n + m][-1] == '.' and sentence[n + m + 1][-1] != '.' :
interestingindex = m interestingindex = m
break break

View file

@ -0,0 +1,2 @@
mit dem Rechtsmittel soll die mittlerweile seit 439 Tagen (Stand Dienstag) andauernde Prüfung nach mehrfach enttäuschten Versprechen gegenüber der Bürger-Ini auf juristischem Weg erzwungen werden .
die Volks-Initiative klagt seit Mai gegen die lange Prüfdauer ihres Anliegens, große Wohnungskonzerne gegen Entschädigungen zu vergesellschaften und spricht von Verschleppung .

View file

@ -0,0 +1,3 @@
mit dem Rechtsmittel soll die mittlerweile seit 439 Tagen (Stand Dienstag) andauernde Pruefung nach mehrfach enttaeuschten Versprechen gegenueber der Buerger-Ini auf juristischem Weg erzwungen werden .
die Volks-Initiative klagt seit Mai gegen die lange Pruefdauer ihres Anliegens .
Grosse Wohnungskonzerne gegen Entschaedigungen zu vergesellschaften und spricht von Verschleppung .

View file

@ -0,0 +1,5 @@
mit dem Rechtsmittel soll die Pruefung nach mehrfach enttaeuschten Versprechen gegenueber der Buerger-Ini auf juristischen Weg erzwungen werden .
die Pruefung dauert mittlerweile seit 439 Tagen an .
die Volks-Initiative klagt seit Mai gegen die lange Pruefdauer ihres Anliegens .
ihr Anliegen ist Grosse Wohnungskonzerne gegen Entschaedigungen zu vergesellschaften .
und die Volks-Initiative spricht von Verschleppung .

View file

@ -1 +1 @@
142 143

Binary file not shown.