Folgefehler des implementierten Error Handling behoben + Spezialfall doppelte Abkuerzung am Satzende Fehler behoben
This commit is contained in:
parent
441eeed1d6
commit
ed40090463
15 changed files with 70 additions and 37 deletions
|
@ -241,7 +241,7 @@
|
||||||
"name": "stdout",
|
"name": "stdout",
|
||||||
"output_type": "stream",
|
"output_type": "stream",
|
||||||
"text": [
|
"text": [
|
||||||
"loading SentSeg Databases\n",
|
"loading SolveShorts Databases\n",
|
||||||
"Creating the bag of words...\n",
|
"Creating the bag of words...\n",
|
||||||
"\n",
|
"\n",
|
||||||
"dumping the data to hkl format..\n",
|
"dumping the data to hkl format..\n",
|
||||||
|
@ -257,7 +257,7 @@
|
||||||
{
|
{
|
||||||
"data": {
|
"data": {
|
||||||
"application/vnd.jupyter.widget-view+json": {
|
"application/vnd.jupyter.widget-view+json": {
|
||||||
"model_id": "6793c5121aaf498e8960726a40709e19",
|
"model_id": "82646fa586ba44aabc1608ec7a268b2c",
|
||||||
"version_major": 2,
|
"version_major": 2,
|
||||||
"version_minor": 0
|
"version_minor": 0
|
||||||
},
|
},
|
||||||
|
@ -315,6 +315,10 @@
|
||||||
"import dill\n",
|
"import dill\n",
|
||||||
"dill.load_session('voilastate.db')\n",
|
"dill.load_session('voilastate.db')\n",
|
||||||
"\n",
|
"\n",
|
||||||
|
"#from SolveShorts import *\n",
|
||||||
|
"#print('loading SolveShorts Databases')\n",
|
||||||
|
"#solSh.load_DB_into_FASTsearch()\n",
|
||||||
|
"\n",
|
||||||
"#import SentSeg\n",
|
"#import SentSeg\n",
|
||||||
"#sent_seg = SentSeg.SentSeg('de')\n",
|
"#sent_seg = SentSeg.SentSeg('de')\n",
|
||||||
"#print('loading SentSeg Databases')\n",
|
"#print('loading SentSeg Databases')\n",
|
||||||
|
@ -440,7 +444,7 @@
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"import ipywidgets\n",
|
"import ipywidgets\n",
|
||||||
"out = 'Hier kommt der übersetzte Text raus.'\n",
|
"out = 'Hier kommt der übersetzte Text heraus.'\n",
|
||||||
"\n",
|
"\n",
|
||||||
"TextFeld.add_class(\"thotext\")\n",
|
"TextFeld.add_class(\"thotext\")\n",
|
||||||
"TextFelddouble.add_class(\"thotext\")\n",
|
"TextFelddouble.add_class(\"thotext\")\n",
|
||||||
|
@ -470,7 +474,7 @@
|
||||||
{
|
{
|
||||||
"data": {
|
"data": {
|
||||||
"application/vnd.jupyter.widget-view+json": {
|
"application/vnd.jupyter.widget-view+json": {
|
||||||
"model_id": "c833de5ff5d340bbb1988584eee0c368",
|
"model_id": "9e547f27f67f484c9b455ead6f63afb2",
|
||||||
"version_major": 2,
|
"version_major": 2,
|
||||||
"version_minor": 0
|
"version_minor": 0
|
||||||
},
|
},
|
||||||
|
@ -607,7 +611,7 @@
|
||||||
" check = 1\n",
|
" check = 1\n",
|
||||||
" \n",
|
" \n",
|
||||||
" #print('sentences after cs', outsentences)\n",
|
" #print('sentences after cs', outsentences)\n",
|
||||||
" #print(len(punctuations))\n",
|
" print(len(punctuations))\n",
|
||||||
" \n",
|
" \n",
|
||||||
" if FremdWB.value == True:\n",
|
" if FremdWB.value == True:\n",
|
||||||
" if check == 1:\n",
|
" if check == 1:\n",
|
||||||
|
@ -617,6 +621,7 @@
|
||||||
" outsentences, punctuations = fwb.fremdEintragAppend(insentences, punctuations)\n",
|
" outsentences, punctuations = fwb.fremdEintragAppend(insentences, punctuations)\n",
|
||||||
" #print('outsentences')\n",
|
" #print('outsentences')\n",
|
||||||
" check = 1\n",
|
" check = 1\n",
|
||||||
|
" \n",
|
||||||
" if Medio.value == True:\n",
|
" if Medio.value == True:\n",
|
||||||
" if check == 1:\n",
|
" if check == 1:\n",
|
||||||
" insentences = outsentences\n",
|
" insentences = outsentences\n",
|
||||||
|
@ -630,7 +635,7 @@
|
||||||
" \n",
|
" \n",
|
||||||
" #print('sentences after fwb', outsentences)\n",
|
" #print('sentences after fwb', outsentences)\n",
|
||||||
" \n",
|
" \n",
|
||||||
" #print(len(punctuations))\n",
|
" print(len(punctuations))\n",
|
||||||
" \n",
|
" \n",
|
||||||
" if check == 1:\n",
|
" if check == 1:\n",
|
||||||
" out = ''\n",
|
" out = ''\n",
|
||||||
|
@ -669,12 +674,12 @@
|
||||||
{
|
{
|
||||||
"data": {
|
"data": {
|
||||||
"application/vnd.jupyter.widget-view+json": {
|
"application/vnd.jupyter.widget-view+json": {
|
||||||
"model_id": "aa136a24ef044b4fb5d10f6c9278d35f",
|
"model_id": "0d27a028dcb449e2a2a6a7dfd25acd49",
|
||||||
"version_major": 2,
|
"version_major": 2,
|
||||||
"version_minor": 0
|
"version_minor": 0
|
||||||
},
|
},
|
||||||
"text/plain": [
|
"text/plain": [
|
||||||
"interactive(children=(Button(description='Übersetzen', style=ButtonStyle()), Output()), _dom_classes=('widget-…"
|
"interactive(children=(Button(description='Übersetzen in Leichte Sprache', style=ButtonStyle()), Output()), _do…"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
|
@ -690,7 +695,7 @@
|
||||||
"\n",
|
"\n",
|
||||||
"#print(widgets.interact_manual.opts)\n",
|
"#print(widgets.interact_manual.opts)\n",
|
||||||
"\n",
|
"\n",
|
||||||
"widgets.interact_manual.opts['manual_name']= 'Übersetzen'\n",
|
"widgets.interact_manual.opts['manual_name']= 'Übersetzen in Leichte Sprache'\n",
|
||||||
"ola = widgets.interact_manual(function)\n",
|
"ola = widgets.interact_manual(function)\n",
|
||||||
"ola.widget.children[0].style.button_color = 'lightgreen'\n",
|
"ola.widget.children[0].style.button_color = 'lightgreen'\n",
|
||||||
"ola.widget.children[0].layout.height = '50px'\n",
|
"ola.widget.children[0].layout.height = '50px'\n",
|
||||||
|
@ -709,12 +714,12 @@
|
||||||
{
|
{
|
||||||
"data": {
|
"data": {
|
||||||
"application/vnd.jupyter.widget-view+json": {
|
"application/vnd.jupyter.widget-view+json": {
|
||||||
"model_id": "1474be19da7a4b1bbd7fee229dd5a8ee",
|
"model_id": "564058b35ab743fabff90d4c49c5aac3",
|
||||||
"version_major": 2,
|
"version_major": 2,
|
||||||
"version_minor": 0
|
"version_minor": 0
|
||||||
},
|
},
|
||||||
"text/plain": [
|
"text/plain": [
|
||||||
"Textarea(value='', layout=Layout(height='180px', width='99%'), placeholder='Hier kommt der übersetzte Text rau…"
|
"Textarea(value='', layout=Layout(height='180px', width='99%'), placeholder='Hier kommt der übersetzte Text her…"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
|
@ -831,7 +836,7 @@
|
||||||
{
|
{
|
||||||
"data": {
|
"data": {
|
||||||
"application/vnd.jupyter.widget-view+json": {
|
"application/vnd.jupyter.widget-view+json": {
|
||||||
"model_id": "c2f3338821ae4ee59d205af8cb1083a8",
|
"model_id": "2e67ffb1c4ec4ddeb2c18935f4d0fdc4",
|
||||||
"version_major": 2,
|
"version_major": 2,
|
||||||
"version_minor": 0
|
"version_minor": 0
|
||||||
},
|
},
|
||||||
|
@ -857,7 +862,7 @@
|
||||||
{
|
{
|
||||||
"data": {
|
"data": {
|
||||||
"application/vnd.jupyter.widget-view+json": {
|
"application/vnd.jupyter.widget-view+json": {
|
||||||
"model_id": "1182e1d2f6c44b9ab0d50c9d388a2765",
|
"model_id": "f8e8a92efa8e41bbb3efe44c35c37ec1",
|
||||||
"version_major": 2,
|
"version_major": 2,
|
||||||
"version_minor": 0
|
"version_minor": 0
|
||||||
},
|
},
|
||||||
|
|
Binary file not shown.
Binary file not shown.
|
@ -176,8 +176,9 @@ class FremdWB(object):
|
||||||
|
|
||||||
#print('the endsentence',sentence)
|
#print('the endsentence',sentence)
|
||||||
except:
|
except:
|
||||||
print('konnte nicht' + str(sentence) + 'in FremdWB prozessieren..')
|
#print('konnte nicht' + str(sentence) + 'in FremdWB prozessieren..')
|
||||||
if sentence != outsentences[-1]:
|
#print('outsentence und co ', outsentences[-1], eintrag, sentence)
|
||||||
|
if sentence != outsentences[-1] and alleeintraege[-1] != outsentences[-1]:
|
||||||
outsentences.append(sentence)
|
outsentences.append(sentence)
|
||||||
punctuations = oldpunctuations
|
punctuations = oldpunctuations
|
||||||
return outsentences, punctuations
|
return outsentences, punctuations
|
||||||
|
|
|
@ -241,7 +241,7 @@
|
||||||
"name": "stdout",
|
"name": "stdout",
|
||||||
"output_type": "stream",
|
"output_type": "stream",
|
||||||
"text": [
|
"text": [
|
||||||
"loading SentSeg Databases\n",
|
"loading SolveShorts Databases\n",
|
||||||
"Creating the bag of words...\n",
|
"Creating the bag of words...\n",
|
||||||
"\n",
|
"\n",
|
||||||
"dumping the data to hkl format..\n",
|
"dumping the data to hkl format..\n",
|
||||||
|
@ -257,7 +257,7 @@
|
||||||
{
|
{
|
||||||
"data": {
|
"data": {
|
||||||
"application/vnd.jupyter.widget-view+json": {
|
"application/vnd.jupyter.widget-view+json": {
|
||||||
"model_id": "6793c5121aaf498e8960726a40709e19",
|
"model_id": "82646fa586ba44aabc1608ec7a268b2c",
|
||||||
"version_major": 2,
|
"version_major": 2,
|
||||||
"version_minor": 0
|
"version_minor": 0
|
||||||
},
|
},
|
||||||
|
@ -315,6 +315,10 @@
|
||||||
"import dill\n",
|
"import dill\n",
|
||||||
"dill.load_session('voilastate.db')\n",
|
"dill.load_session('voilastate.db')\n",
|
||||||
"\n",
|
"\n",
|
||||||
|
"#from SolveShorts import *\n",
|
||||||
|
"#print('loading SolveShorts Databases')\n",
|
||||||
|
"#solSh.load_DB_into_FASTsearch()\n",
|
||||||
|
"\n",
|
||||||
"#import SentSeg\n",
|
"#import SentSeg\n",
|
||||||
"#sent_seg = SentSeg.SentSeg('de')\n",
|
"#sent_seg = SentSeg.SentSeg('de')\n",
|
||||||
"#print('loading SentSeg Databases')\n",
|
"#print('loading SentSeg Databases')\n",
|
||||||
|
@ -440,7 +444,7 @@
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"import ipywidgets\n",
|
"import ipywidgets\n",
|
||||||
"out = 'Hier kommt der übersetzte Text raus.'\n",
|
"out = 'Hier kommt der übersetzte Text heraus.'\n",
|
||||||
"\n",
|
"\n",
|
||||||
"TextFeld.add_class(\"thotext\")\n",
|
"TextFeld.add_class(\"thotext\")\n",
|
||||||
"TextFelddouble.add_class(\"thotext\")\n",
|
"TextFelddouble.add_class(\"thotext\")\n",
|
||||||
|
@ -470,7 +474,7 @@
|
||||||
{
|
{
|
||||||
"data": {
|
"data": {
|
||||||
"application/vnd.jupyter.widget-view+json": {
|
"application/vnd.jupyter.widget-view+json": {
|
||||||
"model_id": "c833de5ff5d340bbb1988584eee0c368",
|
"model_id": "9e547f27f67f484c9b455ead6f63afb2",
|
||||||
"version_major": 2,
|
"version_major": 2,
|
||||||
"version_minor": 0
|
"version_minor": 0
|
||||||
},
|
},
|
||||||
|
@ -607,7 +611,7 @@
|
||||||
" check = 1\n",
|
" check = 1\n",
|
||||||
" \n",
|
" \n",
|
||||||
" #print('sentences after cs', outsentences)\n",
|
" #print('sentences after cs', outsentences)\n",
|
||||||
" #print(len(punctuations))\n",
|
" print(len(punctuations))\n",
|
||||||
" \n",
|
" \n",
|
||||||
" if FremdWB.value == True:\n",
|
" if FremdWB.value == True:\n",
|
||||||
" if check == 1:\n",
|
" if check == 1:\n",
|
||||||
|
@ -617,6 +621,7 @@
|
||||||
" outsentences, punctuations = fwb.fremdEintragAppend(insentences, punctuations)\n",
|
" outsentences, punctuations = fwb.fremdEintragAppend(insentences, punctuations)\n",
|
||||||
" #print('outsentences')\n",
|
" #print('outsentences')\n",
|
||||||
" check = 1\n",
|
" check = 1\n",
|
||||||
|
" \n",
|
||||||
" if Medio.value == True:\n",
|
" if Medio.value == True:\n",
|
||||||
" if check == 1:\n",
|
" if check == 1:\n",
|
||||||
" insentences = outsentences\n",
|
" insentences = outsentences\n",
|
||||||
|
@ -630,7 +635,7 @@
|
||||||
" \n",
|
" \n",
|
||||||
" #print('sentences after fwb', outsentences)\n",
|
" #print('sentences after fwb', outsentences)\n",
|
||||||
" \n",
|
" \n",
|
||||||
" #print(len(punctuations))\n",
|
" print(len(punctuations))\n",
|
||||||
" \n",
|
" \n",
|
||||||
" if check == 1:\n",
|
" if check == 1:\n",
|
||||||
" out = ''\n",
|
" out = ''\n",
|
||||||
|
@ -669,12 +674,12 @@
|
||||||
{
|
{
|
||||||
"data": {
|
"data": {
|
||||||
"application/vnd.jupyter.widget-view+json": {
|
"application/vnd.jupyter.widget-view+json": {
|
||||||
"model_id": "aa136a24ef044b4fb5d10f6c9278d35f",
|
"model_id": "0d27a028dcb449e2a2a6a7dfd25acd49",
|
||||||
"version_major": 2,
|
"version_major": 2,
|
||||||
"version_minor": 0
|
"version_minor": 0
|
||||||
},
|
},
|
||||||
"text/plain": [
|
"text/plain": [
|
||||||
"interactive(children=(Button(description='Übersetzen', style=ButtonStyle()), Output()), _dom_classes=('widget-…"
|
"interactive(children=(Button(description='Übersetzen in Leichte Sprache', style=ButtonStyle()), Output()), _do…"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
|
@ -690,7 +695,7 @@
|
||||||
"\n",
|
"\n",
|
||||||
"#print(widgets.interact_manual.opts)\n",
|
"#print(widgets.interact_manual.opts)\n",
|
||||||
"\n",
|
"\n",
|
||||||
"widgets.interact_manual.opts['manual_name']= 'Übersetzen'\n",
|
"widgets.interact_manual.opts['manual_name']= 'Übersetzen in Leichte Sprache'\n",
|
||||||
"ola = widgets.interact_manual(function)\n",
|
"ola = widgets.interact_manual(function)\n",
|
||||||
"ola.widget.children[0].style.button_color = 'lightgreen'\n",
|
"ola.widget.children[0].style.button_color = 'lightgreen'\n",
|
||||||
"ola.widget.children[0].layout.height = '50px'\n",
|
"ola.widget.children[0].layout.height = '50px'\n",
|
||||||
|
@ -709,12 +714,12 @@
|
||||||
{
|
{
|
||||||
"data": {
|
"data": {
|
||||||
"application/vnd.jupyter.widget-view+json": {
|
"application/vnd.jupyter.widget-view+json": {
|
||||||
"model_id": "1474be19da7a4b1bbd7fee229dd5a8ee",
|
"model_id": "564058b35ab743fabff90d4c49c5aac3",
|
||||||
"version_major": 2,
|
"version_major": 2,
|
||||||
"version_minor": 0
|
"version_minor": 0
|
||||||
},
|
},
|
||||||
"text/plain": [
|
"text/plain": [
|
||||||
"Textarea(value='', layout=Layout(height='180px', width='99%'), placeholder='Hier kommt der übersetzte Text rau…"
|
"Textarea(value='', layout=Layout(height='180px', width='99%'), placeholder='Hier kommt der übersetzte Text her…"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
|
@ -831,7 +836,7 @@
|
||||||
{
|
{
|
||||||
"data": {
|
"data": {
|
||||||
"application/vnd.jupyter.widget-view+json": {
|
"application/vnd.jupyter.widget-view+json": {
|
||||||
"model_id": "c2f3338821ae4ee59d205af8cb1083a8",
|
"model_id": "2e67ffb1c4ec4ddeb2c18935f4d0fdc4",
|
||||||
"version_major": 2,
|
"version_major": 2,
|
||||||
"version_minor": 0
|
"version_minor": 0
|
||||||
},
|
},
|
||||||
|
@ -857,7 +862,7 @@
|
||||||
{
|
{
|
||||||
"data": {
|
"data": {
|
||||||
"application/vnd.jupyter.widget-view+json": {
|
"application/vnd.jupyter.widget-view+json": {
|
||||||
"model_id": "1182e1d2f6c44b9ab0d50c9d388a2765",
|
"model_id": "f8e8a92efa8e41bbb3efe44c35c37ec1",
|
||||||
"version_major": 2,
|
"version_major": 2,
|
||||||
"version_minor": 0
|
"version_minor": 0
|
||||||
},
|
},
|
||||||
|
|
|
@ -91,14 +91,19 @@ class SolveShorts(object):
|
||||||
doc = self.nlp(' '.join(sentence))
|
doc = self.nlp(' '.join(sentence))
|
||||||
#print('da sentence', sentence)
|
#print('da sentence', sentence)
|
||||||
newshorts = []
|
newshorts = []
|
||||||
for word in sentence:
|
wordcount = 0
|
||||||
|
for oriword in sentence:
|
||||||
|
wordcount += 1
|
||||||
|
if wordcount == len(sentence):
|
||||||
|
word = oriword + '.'
|
||||||
|
else:
|
||||||
|
word = oriword
|
||||||
newshort = []
|
newshort = []
|
||||||
prenewshort = []
|
prenewshort = []
|
||||||
punctcount = list(word).count('.')
|
punctcount = list(word).count('.')
|
||||||
#print(word, list(word), punctcount)
|
#print(word, list(word), punctcount)
|
||||||
if punctcount > 1:
|
if punctcount > 1:
|
||||||
replaceindex = sentence.index(word)
|
replaceindex = sentence.index(oriword)
|
||||||
dacount = 0
|
dacount = 0
|
||||||
for letter in list(word):
|
for letter in list(word):
|
||||||
#print('letter in word split', letter)
|
#print('letter in word split', letter)
|
||||||
|
@ -153,11 +158,18 @@ class SolveShorts(object):
|
||||||
interestingindex = m
|
interestingindex = m
|
||||||
break
|
break
|
||||||
if len(sentence) - n <= 5 and n != len(sentence) - 1:
|
if len(sentence) - n <= 5 and n != len(sentence) - 1:
|
||||||
for m in range((len(sentence) - n) - 1):
|
for m in range((len(sentence) - n)):
|
||||||
#print(n, m, n+m+1, len(sentence))
|
#print('oleolaolu',n, m, n+m+1, len(sentence))
|
||||||
if sentence[n + m][-1] == '.' and sentence[n + m + 1][-1] != '.':
|
|
||||||
interestingindex = m
|
if m == (len(sentence) - n) - 1:
|
||||||
break
|
if sentence[n + m][-1] == '.':
|
||||||
|
interestingindex = m
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
|
||||||
|
if sentence[n + m][-1] == '.' and sentence[n + m + 1][-1] != '.' :
|
||||||
|
interestingindex = m
|
||||||
|
break
|
||||||
|
|
||||||
#print(interestingindex, 'interestingindex')
|
#print(interestingindex, 'interestingindex')
|
||||||
if interestingindex == 0:
|
if interestingindex == 0:
|
||||||
|
|
2
Prototyp/Verbesserungen/Input143.txt
Normal file
2
Prototyp/Verbesserungen/Input143.txt
Normal file
|
@ -0,0 +1,2 @@
|
||||||
|
mit dem Rechtsmittel soll die mittlerweile seit 439 Tagen (Stand Dienstag) andauernde Prüfung nach mehrfach enttäuschten Versprechen gegenüber der Bürger-Ini auf juristischem Weg erzwungen werden .
|
||||||
|
die Volks-Initiative klagt seit Mai gegen die lange Prüfdauer ihres Anliegens, große Wohnungskonzerne gegen Entschädigungen zu vergesellschaften und spricht von Verschleppung .
|
3
Prototyp/Verbesserungen/Output143.txt
Normal file
3
Prototyp/Verbesserungen/Output143.txt
Normal file
|
@ -0,0 +1,3 @@
|
||||||
|
mit dem Rechtsmittel soll die mittlerweile seit 439 Tagen (Stand Dienstag) andauernde Pruefung nach mehrfach enttaeuschten Versprechen gegenueber der Buerger-Ini auf juristischem Weg erzwungen werden .
|
||||||
|
die Volks-Initiative klagt seit Mai gegen die lange Pruefdauer ihres Anliegens .
|
||||||
|
Grosse Wohnungskonzerne gegen Entschaedigungen zu vergesellschaften und spricht von Verschleppung .
|
5
Prototyp/Verbesserungen/Verbesserungen143.txt
Normal file
5
Prototyp/Verbesserungen/Verbesserungen143.txt
Normal file
|
@ -0,0 +1,5 @@
|
||||||
|
mit dem Rechtsmittel soll die Pruefung nach mehrfach enttaeuschten Versprechen gegenueber der Buerger-Ini auf juristischen Weg erzwungen werden .
|
||||||
|
die Pruefung dauert mittlerweile seit 439 Tagen an .
|
||||||
|
die Volks-Initiative klagt seit Mai gegen die lange Pruefdauer ihres Anliegens .
|
||||||
|
ihr Anliegen ist Grosse Wohnungskonzerne gegen Entschaedigungen zu vergesellschaften .
|
||||||
|
und die Volks-Initiative spricht von Verschleppung .
|
|
@ -1 +1 @@
|
||||||
142
|
143
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Loading…
Reference in a new issue