Folgefehler des implementierten Error Handling behoben + Spezialfall doppelte Abkuerzung am Satzende Fehler behoben
This commit is contained in:
parent
441eeed1d6
commit
ed40090463
15 changed files with 70 additions and 37 deletions
|
@ -241,7 +241,7 @@
|
|||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"loading SentSeg Databases\n",
|
||||
"loading SolveShorts Databases\n",
|
||||
"Creating the bag of words...\n",
|
||||
"\n",
|
||||
"dumping the data to hkl format..\n",
|
||||
|
@ -257,7 +257,7 @@
|
|||
{
|
||||
"data": {
|
||||
"application/vnd.jupyter.widget-view+json": {
|
||||
"model_id": "6793c5121aaf498e8960726a40709e19",
|
||||
"model_id": "82646fa586ba44aabc1608ec7a268b2c",
|
||||
"version_major": 2,
|
||||
"version_minor": 0
|
||||
},
|
||||
|
@ -315,6 +315,10 @@
|
|||
"import dill\n",
|
||||
"dill.load_session('voilastate.db')\n",
|
||||
"\n",
|
||||
"#from SolveShorts import *\n",
|
||||
"#print('loading SolveShorts Databases')\n",
|
||||
"#solSh.load_DB_into_FASTsearch()\n",
|
||||
"\n",
|
||||
"#import SentSeg\n",
|
||||
"#sent_seg = SentSeg.SentSeg('de')\n",
|
||||
"#print('loading SentSeg Databases')\n",
|
||||
|
@ -440,7 +444,7 @@
|
|||
"outputs": [],
|
||||
"source": [
|
||||
"import ipywidgets\n",
|
||||
"out = 'Hier kommt der übersetzte Text raus.'\n",
|
||||
"out = 'Hier kommt der übersetzte Text heraus.'\n",
|
||||
"\n",
|
||||
"TextFeld.add_class(\"thotext\")\n",
|
||||
"TextFelddouble.add_class(\"thotext\")\n",
|
||||
|
@ -470,7 +474,7 @@
|
|||
{
|
||||
"data": {
|
||||
"application/vnd.jupyter.widget-view+json": {
|
||||
"model_id": "c833de5ff5d340bbb1988584eee0c368",
|
||||
"model_id": "9e547f27f67f484c9b455ead6f63afb2",
|
||||
"version_major": 2,
|
||||
"version_minor": 0
|
||||
},
|
||||
|
@ -607,7 +611,7 @@
|
|||
" check = 1\n",
|
||||
" \n",
|
||||
" #print('sentences after cs', outsentences)\n",
|
||||
" #print(len(punctuations))\n",
|
||||
" print(len(punctuations))\n",
|
||||
" \n",
|
||||
" if FremdWB.value == True:\n",
|
||||
" if check == 1:\n",
|
||||
|
@ -617,6 +621,7 @@
|
|||
" outsentences, punctuations = fwb.fremdEintragAppend(insentences, punctuations)\n",
|
||||
" #print('outsentences')\n",
|
||||
" check = 1\n",
|
||||
" \n",
|
||||
" if Medio.value == True:\n",
|
||||
" if check == 1:\n",
|
||||
" insentences = outsentences\n",
|
||||
|
@ -630,7 +635,7 @@
|
|||
" \n",
|
||||
" #print('sentences after fwb', outsentences)\n",
|
||||
" \n",
|
||||
" #print(len(punctuations))\n",
|
||||
" print(len(punctuations))\n",
|
||||
" \n",
|
||||
" if check == 1:\n",
|
||||
" out = ''\n",
|
||||
|
@ -669,12 +674,12 @@
|
|||
{
|
||||
"data": {
|
||||
"application/vnd.jupyter.widget-view+json": {
|
||||
"model_id": "aa136a24ef044b4fb5d10f6c9278d35f",
|
||||
"model_id": "0d27a028dcb449e2a2a6a7dfd25acd49",
|
||||
"version_major": 2,
|
||||
"version_minor": 0
|
||||
},
|
||||
"text/plain": [
|
||||
"interactive(children=(Button(description='Übersetzen', style=ButtonStyle()), Output()), _dom_classes=('widget-…"
|
||||
"interactive(children=(Button(description='Übersetzen in Leichte Sprache', style=ButtonStyle()), Output()), _do…"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
|
@ -690,7 +695,7 @@
|
|||
"\n",
|
||||
"#print(widgets.interact_manual.opts)\n",
|
||||
"\n",
|
||||
"widgets.interact_manual.opts['manual_name']= 'Übersetzen'\n",
|
||||
"widgets.interact_manual.opts['manual_name']= 'Übersetzen in Leichte Sprache'\n",
|
||||
"ola = widgets.interact_manual(function)\n",
|
||||
"ola.widget.children[0].style.button_color = 'lightgreen'\n",
|
||||
"ola.widget.children[0].layout.height = '50px'\n",
|
||||
|
@ -709,12 +714,12 @@
|
|||
{
|
||||
"data": {
|
||||
"application/vnd.jupyter.widget-view+json": {
|
||||
"model_id": "1474be19da7a4b1bbd7fee229dd5a8ee",
|
||||
"model_id": "564058b35ab743fabff90d4c49c5aac3",
|
||||
"version_major": 2,
|
||||
"version_minor": 0
|
||||
},
|
||||
"text/plain": [
|
||||
"Textarea(value='', layout=Layout(height='180px', width='99%'), placeholder='Hier kommt der übersetzte Text rau…"
|
||||
"Textarea(value='', layout=Layout(height='180px', width='99%'), placeholder='Hier kommt der übersetzte Text her…"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
|
@ -831,7 +836,7 @@
|
|||
{
|
||||
"data": {
|
||||
"application/vnd.jupyter.widget-view+json": {
|
||||
"model_id": "c2f3338821ae4ee59d205af8cb1083a8",
|
||||
"model_id": "2e67ffb1c4ec4ddeb2c18935f4d0fdc4",
|
||||
"version_major": 2,
|
||||
"version_minor": 0
|
||||
},
|
||||
|
@ -857,7 +862,7 @@
|
|||
{
|
||||
"data": {
|
||||
"application/vnd.jupyter.widget-view+json": {
|
||||
"model_id": "1182e1d2f6c44b9ab0d50c9d388a2765",
|
||||
"model_id": "f8e8a92efa8e41bbb3efe44c35c37ec1",
|
||||
"version_major": 2,
|
||||
"version_minor": 0
|
||||
},
|
||||
|
|
Binary file not shown.
Binary file not shown.
|
@ -176,8 +176,9 @@ class FremdWB(object):
|
|||
|
||||
#print('the endsentence',sentence)
|
||||
except:
|
||||
print('konnte nicht' + str(sentence) + 'in FremdWB prozessieren..')
|
||||
if sentence != outsentences[-1]:
|
||||
#print('konnte nicht' + str(sentence) + 'in FremdWB prozessieren..')
|
||||
#print('outsentence und co ', outsentences[-1], eintrag, sentence)
|
||||
if sentence != outsentences[-1] and alleeintraege[-1] != outsentences[-1]:
|
||||
outsentences.append(sentence)
|
||||
punctuations = oldpunctuations
|
||||
return outsentences, punctuations
|
||||
|
|
|
@ -241,7 +241,7 @@
|
|||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"loading SentSeg Databases\n",
|
||||
"loading SolveShorts Databases\n",
|
||||
"Creating the bag of words...\n",
|
||||
"\n",
|
||||
"dumping the data to hkl format..\n",
|
||||
|
@ -257,7 +257,7 @@
|
|||
{
|
||||
"data": {
|
||||
"application/vnd.jupyter.widget-view+json": {
|
||||
"model_id": "6793c5121aaf498e8960726a40709e19",
|
||||
"model_id": "82646fa586ba44aabc1608ec7a268b2c",
|
||||
"version_major": 2,
|
||||
"version_minor": 0
|
||||
},
|
||||
|
@ -315,6 +315,10 @@
|
|||
"import dill\n",
|
||||
"dill.load_session('voilastate.db')\n",
|
||||
"\n",
|
||||
"#from SolveShorts import *\n",
|
||||
"#print('loading SolveShorts Databases')\n",
|
||||
"#solSh.load_DB_into_FASTsearch()\n",
|
||||
"\n",
|
||||
"#import SentSeg\n",
|
||||
"#sent_seg = SentSeg.SentSeg('de')\n",
|
||||
"#print('loading SentSeg Databases')\n",
|
||||
|
@ -440,7 +444,7 @@
|
|||
"outputs": [],
|
||||
"source": [
|
||||
"import ipywidgets\n",
|
||||
"out = 'Hier kommt der übersetzte Text raus.'\n",
|
||||
"out = 'Hier kommt der übersetzte Text heraus.'\n",
|
||||
"\n",
|
||||
"TextFeld.add_class(\"thotext\")\n",
|
||||
"TextFelddouble.add_class(\"thotext\")\n",
|
||||
|
@ -470,7 +474,7 @@
|
|||
{
|
||||
"data": {
|
||||
"application/vnd.jupyter.widget-view+json": {
|
||||
"model_id": "c833de5ff5d340bbb1988584eee0c368",
|
||||
"model_id": "9e547f27f67f484c9b455ead6f63afb2",
|
||||
"version_major": 2,
|
||||
"version_minor": 0
|
||||
},
|
||||
|
@ -607,7 +611,7 @@
|
|||
" check = 1\n",
|
||||
" \n",
|
||||
" #print('sentences after cs', outsentences)\n",
|
||||
" #print(len(punctuations))\n",
|
||||
" print(len(punctuations))\n",
|
||||
" \n",
|
||||
" if FremdWB.value == True:\n",
|
||||
" if check == 1:\n",
|
||||
|
@ -617,6 +621,7 @@
|
|||
" outsentences, punctuations = fwb.fremdEintragAppend(insentences, punctuations)\n",
|
||||
" #print('outsentences')\n",
|
||||
" check = 1\n",
|
||||
" \n",
|
||||
" if Medio.value == True:\n",
|
||||
" if check == 1:\n",
|
||||
" insentences = outsentences\n",
|
||||
|
@ -630,7 +635,7 @@
|
|||
" \n",
|
||||
" #print('sentences after fwb', outsentences)\n",
|
||||
" \n",
|
||||
" #print(len(punctuations))\n",
|
||||
" print(len(punctuations))\n",
|
||||
" \n",
|
||||
" if check == 1:\n",
|
||||
" out = ''\n",
|
||||
|
@ -669,12 +674,12 @@
|
|||
{
|
||||
"data": {
|
||||
"application/vnd.jupyter.widget-view+json": {
|
||||
"model_id": "aa136a24ef044b4fb5d10f6c9278d35f",
|
||||
"model_id": "0d27a028dcb449e2a2a6a7dfd25acd49",
|
||||
"version_major": 2,
|
||||
"version_minor": 0
|
||||
},
|
||||
"text/plain": [
|
||||
"interactive(children=(Button(description='Übersetzen', style=ButtonStyle()), Output()), _dom_classes=('widget-…"
|
||||
"interactive(children=(Button(description='Übersetzen in Leichte Sprache', style=ButtonStyle()), Output()), _do…"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
|
@ -690,7 +695,7 @@
|
|||
"\n",
|
||||
"#print(widgets.interact_manual.opts)\n",
|
||||
"\n",
|
||||
"widgets.interact_manual.opts['manual_name']= 'Übersetzen'\n",
|
||||
"widgets.interact_manual.opts['manual_name']= 'Übersetzen in Leichte Sprache'\n",
|
||||
"ola = widgets.interact_manual(function)\n",
|
||||
"ola.widget.children[0].style.button_color = 'lightgreen'\n",
|
||||
"ola.widget.children[0].layout.height = '50px'\n",
|
||||
|
@ -709,12 +714,12 @@
|
|||
{
|
||||
"data": {
|
||||
"application/vnd.jupyter.widget-view+json": {
|
||||
"model_id": "1474be19da7a4b1bbd7fee229dd5a8ee",
|
||||
"model_id": "564058b35ab743fabff90d4c49c5aac3",
|
||||
"version_major": 2,
|
||||
"version_minor": 0
|
||||
},
|
||||
"text/plain": [
|
||||
"Textarea(value='', layout=Layout(height='180px', width='99%'), placeholder='Hier kommt der übersetzte Text rau…"
|
||||
"Textarea(value='', layout=Layout(height='180px', width='99%'), placeholder='Hier kommt der übersetzte Text her…"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
|
@ -831,7 +836,7 @@
|
|||
{
|
||||
"data": {
|
||||
"application/vnd.jupyter.widget-view+json": {
|
||||
"model_id": "c2f3338821ae4ee59d205af8cb1083a8",
|
||||
"model_id": "2e67ffb1c4ec4ddeb2c18935f4d0fdc4",
|
||||
"version_major": 2,
|
||||
"version_minor": 0
|
||||
},
|
||||
|
@ -857,7 +862,7 @@
|
|||
{
|
||||
"data": {
|
||||
"application/vnd.jupyter.widget-view+json": {
|
||||
"model_id": "1182e1d2f6c44b9ab0d50c9d388a2765",
|
||||
"model_id": "f8e8a92efa8e41bbb3efe44c35c37ec1",
|
||||
"version_major": 2,
|
||||
"version_minor": 0
|
||||
},
|
||||
|
|
|
@ -91,14 +91,19 @@ class SolveShorts(object):
|
|||
doc = self.nlp(' '.join(sentence))
|
||||
#print('da sentence', sentence)
|
||||
newshorts = []
|
||||
for word in sentence:
|
||||
|
||||
wordcount = 0
|
||||
for oriword in sentence:
|
||||
wordcount += 1
|
||||
if wordcount == len(sentence):
|
||||
word = oriword + '.'
|
||||
else:
|
||||
word = oriword
|
||||
newshort = []
|
||||
prenewshort = []
|
||||
punctcount = list(word).count('.')
|
||||
#print(word, list(word), punctcount)
|
||||
if punctcount > 1:
|
||||
replaceindex = sentence.index(word)
|
||||
replaceindex = sentence.index(oriword)
|
||||
dacount = 0
|
||||
for letter in list(word):
|
||||
#print('letter in word split', letter)
|
||||
|
@ -153,11 +158,18 @@ class SolveShorts(object):
|
|||
interestingindex = m
|
||||
break
|
||||
if len(sentence) - n <= 5 and n != len(sentence) - 1:
|
||||
for m in range((len(sentence) - n) - 1):
|
||||
#print(n, m, n+m+1, len(sentence))
|
||||
if sentence[n + m][-1] == '.' and sentence[n + m + 1][-1] != '.':
|
||||
interestingindex = m
|
||||
break
|
||||
for m in range((len(sentence) - n)):
|
||||
#print('oleolaolu',n, m, n+m+1, len(sentence))
|
||||
|
||||
if m == (len(sentence) - n) - 1:
|
||||
if sentence[n + m][-1] == '.':
|
||||
interestingindex = m
|
||||
break
|
||||
else:
|
||||
|
||||
if sentence[n + m][-1] == '.' and sentence[n + m + 1][-1] != '.' :
|
||||
interestingindex = m
|
||||
break
|
||||
|
||||
#print(interestingindex, 'interestingindex')
|
||||
if interestingindex == 0:
|
||||
|
|
2
Prototyp/Verbesserungen/Input143.txt
Normal file
2
Prototyp/Verbesserungen/Input143.txt
Normal file
|
@ -0,0 +1,2 @@
|
|||
mit dem Rechtsmittel soll die mittlerweile seit 439 Tagen (Stand Dienstag) andauernde Prüfung nach mehrfach enttäuschten Versprechen gegenüber der Bürger-Ini auf juristischem Weg erzwungen werden .
|
||||
die Volks-Initiative klagt seit Mai gegen die lange Prüfdauer ihres Anliegens, große Wohnungskonzerne gegen Entschädigungen zu vergesellschaften und spricht von Verschleppung .
|
3
Prototyp/Verbesserungen/Output143.txt
Normal file
3
Prototyp/Verbesserungen/Output143.txt
Normal file
|
@ -0,0 +1,3 @@
|
|||
mit dem Rechtsmittel soll die mittlerweile seit 439 Tagen (Stand Dienstag) andauernde Pruefung nach mehrfach enttaeuschten Versprechen gegenueber der Buerger-Ini auf juristischem Weg erzwungen werden .
|
||||
die Volks-Initiative klagt seit Mai gegen die lange Pruefdauer ihres Anliegens .
|
||||
Grosse Wohnungskonzerne gegen Entschaedigungen zu vergesellschaften und spricht von Verschleppung .
|
5
Prototyp/Verbesserungen/Verbesserungen143.txt
Normal file
5
Prototyp/Verbesserungen/Verbesserungen143.txt
Normal file
|
@ -0,0 +1,5 @@
|
|||
mit dem Rechtsmittel soll die Pruefung nach mehrfach enttaeuschten Versprechen gegenueber der Buerger-Ini auf juristischen Weg erzwungen werden .
|
||||
die Pruefung dauert mittlerweile seit 439 Tagen an .
|
||||
die Volks-Initiative klagt seit Mai gegen die lange Pruefdauer ihres Anliegens .
|
||||
ihr Anliegen ist Grosse Wohnungskonzerne gegen Entschaedigungen zu vergesellschaften .
|
||||
und die Volks-Initiative spricht von Verschleppung .
|
|
@ -1 +1 @@
|
|||
142
|
||||
143
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Loading…
Reference in a new issue