674 lines
27 KiB
Python
674 lines
27 KiB
Python
|
|
||
|
|
||
|
import spacy
|
||
|
import nltk
|
||
|
from nltk.stem.snowball import SnowballStemmer
|
||
|
|
||
|
import hickle as hkl
|
||
|
import FASTsearch
|
||
|
|
||
|
stemmer = SnowballStemmer("german")
|
||
|
|
||
|
|
||
|
class Passiv2Aktiv(object):
|
||
|
|
||
|
def __init__(self, hklDatabaseDir_Aktiv, hklDatabaseDir_Vorgangspassiv, hklDatabaseDir_Zustandspassiv):
|
||
|
|
||
|
if hklDatabaseDir_Aktiv is not None:
|
||
|
self.AktivDB = hkl.load(hklDatabaseDir_Aktiv)
|
||
|
|
||
|
if hklDatabaseDir_Vorgangspassiv is not None:
|
||
|
self.VorgangspassivDB = hkl.load(hklDatabaseDir_Vorgangspassiv)
|
||
|
|
||
|
if hklDatabaseDir_Zustandspassiv is not None:
|
||
|
self.ZustandspassivDB = hkl.load(hklDatabaseDir_Zustandspassiv)
|
||
|
|
||
|
|
||
|
#print('loading the german spacy model..')
|
||
|
self.nlp = spacy.load('de_core_news_sm')
|
||
|
#print('done')
|
||
|
|
||
|
#print('loading the stemmer..')
|
||
|
self.stemmer = SnowballStemmer("german")
|
||
|
#print('done')
|
||
|
|
||
|
return
|
||
|
|
||
|
|
||
|
def create_hklDB_from_csv(self, csvDbDir, StemOrNot):
|
||
|
|
||
|
with open(csvDbDir) as lines:
|
||
|
|
||
|
self.DB_All = []
|
||
|
|
||
|
for line in lines:
|
||
|
|
||
|
#print(line)
|
||
|
|
||
|
self.DB_All.append(list(eval(line)))
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
self.hkldb1 = []
|
||
|
self.hkldb2 = []
|
||
|
|
||
|
|
||
|
counter = 0
|
||
|
for n in range(len(self.DB_All)):
|
||
|
|
||
|
counter += 1
|
||
|
if counter % 1000 == 0:
|
||
|
print(counter)
|
||
|
|
||
|
|
||
|
self.hkldb1.append([self.DB_All[n][0][0]] )
|
||
|
self.hkldb2.append([self.DB_All[n][1][0]] )
|
||
|
|
||
|
|
||
|
|
||
|
print('creating the hkl dump of DBAll')
|
||
|
hkl.dump(self.DB_All, 'hkldb_All' + csvDbDir[:-4] + '.hkl', mode='w', compression='lzf')
|
||
|
#print('done..')
|
||
|
|
||
|
print('Creating the hkl dump of DB 1')
|
||
|
hkl.dump(self.hkldb1, 'hkldb1' + csvDbDir[:-4] + '.hkl', mode='w', compression='lzf')
|
||
|
#print('done..')
|
||
|
|
||
|
print('Creating the hkl dump of DB 2')
|
||
|
hkl.dump(self.hkldb2, 'hkldb2' + csvDbDir[:-4] + '.hkl', mode='w', compression='lzf')
|
||
|
#print('done..')
|
||
|
|
||
|
|
||
|
|
||
|
return 'done'
|
||
|
|
||
|
|
||
|
|
||
|
def load_DB_into_FASTsearch(self):
|
||
|
|
||
|
#print('loading the hkldb_All databases..')
|
||
|
self.hkldbAktiv_All = hkl.load('hkldb_AllAktiv.hkl')
|
||
|
#print('first done')
|
||
|
self.hkldbVorgangspassiv_All = hkl.load('hkldb_AllVorgangspassiv.hkl')
|
||
|
#print('second done')
|
||
|
self.hkldbZustandspassiv_All = hkl.load('hkldb_AllZustandspassiv.hkl')
|
||
|
#print('third done')
|
||
|
|
||
|
|
||
|
#print('loading hkldbIndi_Conju 1..')
|
||
|
self.fsearchAktiv1 = FASTsearch.FASTsearch('hkldb1Aktiv.hkl')
|
||
|
#print('done')
|
||
|
|
||
|
#print('loading hkldbIndi_Conju 2..')
|
||
|
self.fsearchAktiv2 = FASTsearch.FASTsearch('hkldb2Aktiv.hkl')
|
||
|
#print('done')
|
||
|
|
||
|
|
||
|
# generate bow model only necessary the first time
|
||
|
#print('generating BoW Model 1..')
|
||
|
#self.fsearchAktiv1.Gen_BoW_Model(20000, "word", punctuation = False)
|
||
|
#print('done')
|
||
|
|
||
|
#print('generating BoW Model 2..')
|
||
|
#self.fsearchAktiv2.Gen_BoW_Model(20000, "word", punctuation = False)
|
||
|
#print('done')
|
||
|
|
||
|
|
||
|
#print('loading the bow model 1')
|
||
|
self.fsearchAktiv1.Load_BoW_Model('bagofwordshkldb1Aktiv.pkl', 'DataBaseOneZeroshkldb1Aktiv.hkl')
|
||
|
#print('done')
|
||
|
|
||
|
#print('loading the bow model 2')
|
||
|
self.fsearchAktiv2.Load_BoW_Model('bagofwordshkldb2Aktiv.pkl', 'DataBaseOneZeroshkldb2Aktiv.hkl')
|
||
|
#print('done')
|
||
|
|
||
|
#print('loading hkldbIndi_Conju 1..')
|
||
|
self.fsearchVorgangspassiv1 = FASTsearch.FASTsearch('hkldb1Vorgangspassiv.hkl')
|
||
|
#print('done')
|
||
|
|
||
|
#print('loading hkldbIndi_Conju 2..')
|
||
|
self.fsearchVorgangspassiv2 = FASTsearch.FASTsearch('hkldb2Vorgangspassiv.hkl')
|
||
|
#print('done')
|
||
|
|
||
|
# uncomment if models are not there
|
||
|
#print('generating BoW Model 1..')
|
||
|
#self.fsearchVorgangspassiv1.Gen_BoW_Model(20000, "word", punctuation = False)
|
||
|
#print('done')
|
||
|
|
||
|
#print('generating BoW Model 2..')
|
||
|
#self.fsearchVorgangspassiv2.Gen_BoW_Model(20000, "word", punctuation = False)
|
||
|
#print('done')
|
||
|
|
||
|
|
||
|
#print('loading the bow model 1')
|
||
|
self.fsearchVorgangspassiv1.Load_BoW_Model('bagofwordshkldb1Vorgangspassiv.pkl', 'DataBaseOneZeroshkldb1Vorgangspassiv.hkl')
|
||
|
#print('done')
|
||
|
|
||
|
#print('loading the bow model 2')
|
||
|
self.fsearchVorgangspassiv2.Load_BoW_Model('bagofwordshkldb2Vorgangspassiv.pkl', 'DataBaseOneZeroshkldb2Vorgangspassiv.hkl')
|
||
|
#print('done')
|
||
|
|
||
|
#print('loading hkldbIndi_Conju 1..')
|
||
|
self.fsearchZustandspassiv1 = FASTsearch.FASTsearch('hkldb1Zustandspassiv.hkl')
|
||
|
#print('done')
|
||
|
|
||
|
#print('loading hkldbIndi_Conju 2..')
|
||
|
self.fsearchZustandspassiv2 = FASTsearch.FASTsearch('hkldb2Zustandspassiv.hkl')
|
||
|
#print('done')
|
||
|
|
||
|
#print('generating BoW Model 1..')
|
||
|
#self.fsearchZustandspassiv1.Gen_BoW_Model(20000, "word", punctuation = False)
|
||
|
#print('done')
|
||
|
|
||
|
#print('generating BoW Model 2..')
|
||
|
#self.fsearchZustandspassiv2.Gen_BoW_Model(20000, "word", punctuation = False)
|
||
|
#print('done')
|
||
|
|
||
|
|
||
|
#print('loading the bow model 1')
|
||
|
self.fsearchZustandspassiv1.Load_BoW_Model('bagofwordshkldb1Zustandspassiv.pkl', 'DataBaseOneZeroshkldb1Zustandspassiv.hkl')
|
||
|
#print('done')
|
||
|
|
||
|
#print('loading the bow model 2')
|
||
|
self.fsearchZustandspassiv2.Load_BoW_Model('bagofwordshkldb2Zustandspassiv.pkl', 'DataBaseOneZeroshkldb2Zustandspassiv.hkl')
|
||
|
#print('done')
|
||
|
|
||
|
import GS_Utils
|
||
|
#print('initializing the gs utils..')
|
||
|
self.gs = GS_Utils.GS_Utils('de_core_news_sm')
|
||
|
#print('done')
|
||
|
|
||
|
|
||
|
from SentGlue import SentGlueMach
|
||
|
#print('loading the Stochastic Gradient models..')
|
||
|
self.sgm = SentGlueMach('trainedSGD.pkl', 'bagofwords.pkl')
|
||
|
#print('done')
|
||
|
#print('initializing the SGM..')
|
||
|
self.sgm.initialize()
|
||
|
#print('done')
|
||
|
|
||
|
#print('oi thats the get_feature_names', self.fsearch1.vectorizer.get_feature_names())
|
||
|
#print('oi thats the get_feature_names', self.fsearch2.vectorizer.get_feature_names())
|
||
|
|
||
|
|
||
|
|
||
|
def replacePassivForms(self,sentences):
|
||
|
|
||
|
endsentences = []
|
||
|
sentencecount = 0
|
||
|
for sentence in sentences:
|
||
|
|
||
|
sentencecount += 1
|
||
|
#print('processing sentence', sentencecount)
|
||
|
|
||
|
doc = self.nlp(' '.join(sentence))
|
||
|
|
||
|
verbs_of_sentence = []
|
||
|
wordindex_to_replace = []
|
||
|
count = 0
|
||
|
subjectofsentence = []
|
||
|
subjectindex = []
|
||
|
erindex = []
|
||
|
Erindex = []
|
||
|
undindex = []
|
||
|
|
||
|
for word in doc:
|
||
|
|
||
|
count += 1
|
||
|
|
||
|
#print(word.text)
|
||
|
#print(word.dep_)
|
||
|
|
||
|
if word.dep_ == 'sb':
|
||
|
|
||
|
#print('oi')
|
||
|
subjectofsentence.append(word.text)
|
||
|
subjectindex.append(count)
|
||
|
|
||
|
if word.text == 'er':
|
||
|
erindex.append(count)
|
||
|
if word.text == 'Er':
|
||
|
Erindex.append(count)
|
||
|
if word.text == 'und':
|
||
|
undindex.append(count)
|
||
|
|
||
|
if word.tag_[0] == 'V':
|
||
|
verbs_of_sentence.append(word.text)
|
||
|
wordindex_to_replace.append(count)
|
||
|
|
||
|
|
||
|
if len(verbs_of_sentence) == 1 and verbs_of_sentence[0] == ('wurde' or 'wird' or 'werden' or 'wirst' or 'werde' or 'war'):
|
||
|
verbs_of_sentence[0] = 'bliblablubdudidu'
|
||
|
|
||
|
verbs_of_sentence_string = ' '.join(verbs_of_sentence)
|
||
|
|
||
|
|
||
|
|
||
|
length_verbs_of_sentence_string = len(verbs_of_sentence_string)
|
||
|
|
||
|
verbs_of_sentence_string += ' ' + str(length_verbs_of_sentence_string)
|
||
|
#print(verbs_of_sentence_string)
|
||
|
bestmatchesZustandspassiv1, matchindexZustandspassiv1 = self.fsearchZustandspassiv1.search_with_highest_multiplikation_Output(verbs_of_sentence_string, 1)
|
||
|
|
||
|
bestmatchesVorgangspassiv1, matchindexVorgangspassiv1 = self.fsearchVorgangspassiv1.search_with_highest_multiplikation_Output(verbs_of_sentence_string, 1)
|
||
|
|
||
|
|
||
|
#print('verbs of sentence string', verbs_of_sentence_string)
|
||
|
#print(len(verbs_of_sentence))
|
||
|
#print(matchindexVorgangspassiv1)
|
||
|
#print(matchindexZustandspassiv1)
|
||
|
|
||
|
|
||
|
vorgangORnot = 0
|
||
|
zustandORnot = 0
|
||
|
if (len(verbs_of_sentence) + 1) == matchindexVorgangspassiv1[1]:
|
||
|
workindex = matchindexVorgangspassiv1[0]
|
||
|
vorgangORnot = 1
|
||
|
|
||
|
if (len(verbs_of_sentence) + 1) == matchindexZustandspassiv1[1]:
|
||
|
workindex = matchindexZustandspassiv1[0]
|
||
|
zustandORnot = 1
|
||
|
|
||
|
|
||
|
#print(workindex)
|
||
|
|
||
|
#print(self.hkldbAktiv_All[matchindexVorgangspassiv1[0]])
|
||
|
#print(self.hkldbVorgangspassiv_All[matchindexVorgangspassiv1[0]])
|
||
|
#print(self.hkldbZustandspassiv_All[matchindexZustandspassiv1[0]])
|
||
|
|
||
|
formToReplace = []
|
||
|
|
||
|
if vorgangORnot == 1:
|
||
|
completeform = self.hkldbVorgangspassiv_All[workindex]
|
||
|
if len(verbs_of_sentence_string.split()) != len(completeform[0][0].split()):
|
||
|
vorgangORnot = 0
|
||
|
|
||
|
if vorgangORnot == 1:
|
||
|
completeform = self.hkldbVorgangspassiv_All[workindex]
|
||
|
formToReplace = self.hkldbVorgangspassiv_All[workindex][1][0].split()[-2:]
|
||
|
|
||
|
|
||
|
#print('formtoreplace vorgang',formToReplace)
|
||
|
#print('complete form', completeform)
|
||
|
|
||
|
formToReplace = '3. Person Singular ' + ' '.join(formToReplace)
|
||
|
|
||
|
#print(formToReplace)
|
||
|
|
||
|
thrdPersonAktivindex = self.fsearchAktiv2.search_with_highest_multiplikation_Output(formToReplace, 1)[0]
|
||
|
|
||
|
thrdPersonAktiv = self.hkldbAktiv_All[thrdPersonAktivindex[0]][0][0].split()[:-1]
|
||
|
|
||
|
#print(thrdPersonAktiv)
|
||
|
|
||
|
thrdPersonAktiv = ' '.join(thrdPersonAktiv)
|
||
|
|
||
|
dalist = verbs_of_sentence_string.split()[:-1]
|
||
|
|
||
|
for verb in dalist:
|
||
|
#print(sentence)
|
||
|
#print(index)
|
||
|
|
||
|
sentence.remove(verb)
|
||
|
|
||
|
thereisasubjectEr = 0
|
||
|
|
||
|
for index in subjectindex:
|
||
|
for ind in undindex:
|
||
|
if index - 1 == ind:
|
||
|
if index - 2 == ('er' or 'Er'):
|
||
|
thereisasubjectEr = 1
|
||
|
if index + 1 == ind:
|
||
|
if index + 2 == 'er' or index + 2 == 'Er':
|
||
|
thereisasubjectEr = 1
|
||
|
#print('subjectofsentence', subjectofsentence)
|
||
|
thereisasubjectich = 0
|
||
|
thereisasubjectdu = 0
|
||
|
thereisasubjectihr = 0
|
||
|
thereisasubjectwir = 0
|
||
|
for word in subjectofsentence:
|
||
|
if word == 'er' or word == 'Er':
|
||
|
thereisasubjectEr = 1
|
||
|
if word == 'ich':
|
||
|
thereisasubjectich = 1
|
||
|
if word == 'du':
|
||
|
thereisasubjectdu = 1
|
||
|
if word == 'ihr':
|
||
|
thereisasubjectihr = 1
|
||
|
if word == 'wir':
|
||
|
thereisasubjectwir = 1
|
||
|
#print('there is a subjecter', thereisasubjectEr)
|
||
|
if thereisasubjectEr == 1:
|
||
|
try:
|
||
|
sentence.remove('Er')
|
||
|
except:
|
||
|
sentence.remove('er')
|
||
|
sentence.append('ihn')
|
||
|
if thereisasubjectich == 1:
|
||
|
sentence.remove('ich')
|
||
|
sentence.append('mich')
|
||
|
if thereisasubjectdu == 1:
|
||
|
sentence.remove('du')
|
||
|
sentence.append('dich')
|
||
|
if thereisasubjectihr == 1:
|
||
|
sentence.remove('ihr')
|
||
|
sentence.append('euch')
|
||
|
if thereisasubjectwir == 1:
|
||
|
sentence.remove('wir')
|
||
|
sentence.append('uns')
|
||
|
|
||
|
sentence.append(thrdPersonAktiv)
|
||
|
#print('sentence in the vorgangornot', sentence)
|
||
|
jemandornot = 1
|
||
|
wordstodelete = []
|
||
|
for n in range(len(sentence) - 1):
|
||
|
if sentence[n] == 'von':
|
||
|
if sentence[n + 1] == 'ihr':
|
||
|
sentence[n + 1] = 'sie'
|
||
|
wordstodelete.append(n)
|
||
|
jemandornot = 0
|
||
|
if sentence[n + 1] == 'ihm':
|
||
|
sentence[n + 1] = 'er'
|
||
|
wordstodelete.append(n)
|
||
|
jemandornot = 0
|
||
|
import spacy
|
||
|
nlp = spacy.load('de_core_news_sm')
|
||
|
token1 = nlp(sentence[n - 1])
|
||
|
token2 = nlp(sentence[n + 1])
|
||
|
for word in token1:
|
||
|
if word.tag_ != 'NN' and word.tag_ != 'NE':
|
||
|
for word in token2:
|
||
|
if word.tag_ == 'NN' or word.tag_ == 'NE':
|
||
|
wordstodelete.append(n)
|
||
|
|
||
|
jemandornot = 0
|
||
|
if sentence[n + 1] == 'dem' or sentence[n + 1] == 'einem':
|
||
|
|
||
|
token3 = nlp(sentence[n-1])
|
||
|
for word in token3:
|
||
|
if word.tag_ != 'NN' and word.tag_ != 'NE':
|
||
|
sentence[n + 1] = 'ein'
|
||
|
wordstodelete.append(n)
|
||
|
jemandornot = 0
|
||
|
if sentence[n + 1] == 'der' or sentence[n + 1] == 'einer':
|
||
|
token4 = nlp(sentence[n-1])
|
||
|
for word in token4:
|
||
|
if word.tag_ != 'NN' and word.tag_ != 'NE':
|
||
|
sentence[n + 1] = 'eine'
|
||
|
wordstodelete.append(n)
|
||
|
jemandornot = 0
|
||
|
|
||
|
if sentence[n] == 'vom':
|
||
|
|
||
|
sentence[n] = 'ein'
|
||
|
jemandornot = 0
|
||
|
for index in wordstodelete[::-1]:
|
||
|
del sentence[index]
|
||
|
if jemandornot == 1:
|
||
|
sentence.append('jemand')
|
||
|
|
||
|
|
||
|
#print('sentence checkpoint 2', sentence)
|
||
|
|
||
|
#print('get the tuples and triples to check..')
|
||
|
tuplesTocheck, triplesTocheck, quadruplesToCheck = self.gs.GetTuplesinSentence(sentence)
|
||
|
#print('done')
|
||
|
#print(tuplesTocheck, triplesTocheck)
|
||
|
|
||
|
grammpiecessentence = self.gs.createTupleofGrammarpieces( sentence, tuplesTocheck, triplesTocheck, quadruplesToCheck)
|
||
|
|
||
|
if len(grammpiecessentence) > 7:
|
||
|
print('A sentence is too long, too many permutations. \n piping wrong grammar..')
|
||
|
endsentences.append(' '.join(grammpiecessentence).split())
|
||
|
|
||
|
else:
|
||
|
|
||
|
#print('the grammpiecessentence', grammpiecessentence)
|
||
|
#print('genrating the permutations')
|
||
|
permutations = self.sgm.GeneratePermutationsOfSentence(grammpiecessentence)
|
||
|
#print('done')
|
||
|
#print(permutations)
|
||
|
#if (len(tuplesTocheck) != 0) or (len(triplesTocheck) != 0):
|
||
|
# print('filtering the permutations based on the tuples and triples..')
|
||
|
# filteredpermutations = self.gs.filterpermutationsaccordingtotuples(permutations, tuplesTocheck, triplesTocheck)
|
||
|
# print('done')
|
||
|
#else:
|
||
|
# print('there are no triples or tuples to check..')
|
||
|
# filteredpermutations = permutations
|
||
|
|
||
|
sentencesToCheck = []
|
||
|
for sentence in permutations:
|
||
|
sentencesToCheck.append(' '.join(sentence))
|
||
|
|
||
|
#print('sentencesToCheck', sentencesToCheck)
|
||
|
#print('classifying the probability for right grammar in the filtered permutations..')
|
||
|
#print(' '.join(sentence))
|
||
|
endsentence = self.sgm.GetBestSentenceFromSentencesAccordingToGrammar(sentencesToCheck, ' '.join(sentence))
|
||
|
#print('done')
|
||
|
|
||
|
#print('the endsentence', endsentence)
|
||
|
endsentences.append(endsentence.split())
|
||
|
|
||
|
#count1 = 0
|
||
|
|
||
|
#print(subjectindex)
|
||
|
#subjectindex = subjectindex[0]
|
||
|
#if subjectindex != 0:
|
||
|
#for word in sentence[subjectindex - 1:subjectindex + 1]:
|
||
|
#count1 += 1
|
||
|
#if word == 'und':
|
||
|
#thereIsanUnd = count1
|
||
|
#if subjectindex == 0:
|
||
|
#for word in sentence[subjectindex:subjectindex + 1]:
|
||
|
#count1 += 1
|
||
|
#if word == 'und':
|
||
|
#thereIsanUnd = count1
|
||
|
#thereisanEr = 0
|
||
|
#if sentence[subjectindex - 1 + thereIsanUnd] == 'er' or sentence[subjectindex - 1 + thereIsanUnd] == 'Er':
|
||
|
|
||
|
#thereisanEr = 1
|
||
|
|
||
|
|
||
|
#if thereisanEr == 1:
|
||
|
|
||
|
#sentence.remove('Er')
|
||
|
#sentence.remove('er')
|
||
|
#sentence.append('ihn')
|
||
|
|
||
|
|
||
|
#print('zustandornot',zustandORnot)
|
||
|
#print('vorgang', vorgangORnot)
|
||
|
|
||
|
if zustandORnot == 1:
|
||
|
completeform = self.hkldbZustandspassiv_All[workindex]
|
||
|
if len(verbs_of_sentence_string.split()) != len(completeform[0][0].split()):
|
||
|
zustandORnot = 0
|
||
|
|
||
|
|
||
|
if zustandORnot == 1:
|
||
|
#completeform = self.hkldbZustandspassiv_All[workindex]
|
||
|
formToReplace = self.hkldbZustandspassiv_All[workindex][1][0].split()[-2:]
|
||
|
formToReplace = '3. Person Singular ' + ' '.join(formToReplace)
|
||
|
#print('formtoreplace zustand',formToReplace)
|
||
|
#print('complete form', completeform)
|
||
|
|
||
|
thrdPersonAktivindex = self.fsearchAktiv2.search_with_highest_multiplikation_Output(formToReplace, 1)[0]
|
||
|
|
||
|
thrdPersonAktiv = self.hkldbAktiv_All[thrdPersonAktivindex[0]][0][0].split()[:-1]
|
||
|
|
||
|
thrdPersonAktiv = ' '.join(thrdPersonAktiv)
|
||
|
|
||
|
for verb in verbs_of_sentence_string.split()[:-1]:
|
||
|
#print(sentence)
|
||
|
#print(index)
|
||
|
|
||
|
sentence.remove(verb)
|
||
|
|
||
|
thereisasubjectEr = 0
|
||
|
|
||
|
for index in subjectindex:
|
||
|
for ind in undindex:
|
||
|
if index - 1 == ind:
|
||
|
if index - 2 == ('er' or 'Er'):
|
||
|
thereisasubjectEr = 1
|
||
|
if index + 1 == ind:
|
||
|
if index + 2 == 'er' or index + 2 == 'Er':
|
||
|
thereisasubjectEr = 1
|
||
|
#print('subjectofsentence', subjectofsentence)
|
||
|
|
||
|
thereisasubjectich = 0
|
||
|
thereisasubjectdu = 0
|
||
|
thereisasubjectihr = 0
|
||
|
thereisasubjectwir = 0
|
||
|
for word in subjectofsentence:
|
||
|
if word == 'er' or word == 'Er':
|
||
|
thereisasubjectEr = 1
|
||
|
if word == 'ich':
|
||
|
thereisasubjectich = 1
|
||
|
if word == 'du':
|
||
|
thereisasubjectdu = 1
|
||
|
if word == 'ihr':
|
||
|
thereisasubjectihr = 1
|
||
|
if word == 'wir':
|
||
|
thereisasubjectwir = 1
|
||
|
if thereisasubjectEr == 1:
|
||
|
try:
|
||
|
sentence.remove('Er')
|
||
|
except:
|
||
|
sentence.remove('er')
|
||
|
sentence.append('ihn')
|
||
|
|
||
|
if thereisasubjectich == 1:
|
||
|
sentence.remove('ich')
|
||
|
sentence.append('mich')
|
||
|
if thereisasubjectdu == 1:
|
||
|
sentence.remove('du')
|
||
|
sentence.append('dich')
|
||
|
if thereisasubjectihr == 1:
|
||
|
sentence.remove('ihr')
|
||
|
sentence.append('euch')
|
||
|
if thereisasubjectwir == 1:
|
||
|
sentence.remove('wir')
|
||
|
sentence.append('uns')
|
||
|
|
||
|
sentence.append(thrdPersonAktiv)
|
||
|
|
||
|
jemandornot = 1
|
||
|
wordstodelete = []
|
||
|
for n in range(len(sentence) - 1):
|
||
|
if sentence[n] == 'von':
|
||
|
if sentence[n + 1] == 'ihr':
|
||
|
sentence[n + 1] = 'sie'
|
||
|
wordstodelete.append(n)
|
||
|
jemandornot = 0
|
||
|
if sentence[n + 1] == 'ihm':
|
||
|
sentence[n + 1] = 'er'
|
||
|
wordstodelete.append(n)
|
||
|
jemandornot = 0
|
||
|
|
||
|
import spacy
|
||
|
nlp = spacy.load('de_core_news_sm')
|
||
|
token1 = nlp(sentence[n - 1])
|
||
|
token2 = nlp(sentence[n + 1])
|
||
|
for word in token1:
|
||
|
if word.tag_ != 'NN' and word.tag_ != 'NE':
|
||
|
for word in token2:
|
||
|
if word.tag_ == 'NN' or word.tag_ == 'NE':
|
||
|
wordstodelete.append(n)
|
||
|
|
||
|
jemandornot = 0
|
||
|
if sentence[n + 1] == 'dem' or sentence[n + 1] == 'einem':
|
||
|
|
||
|
token3 = nlp(sentence[n-1])
|
||
|
for word in token3:
|
||
|
if word.tag_ != 'NN' and word.tag_ != 'NE':
|
||
|
sentence[n + 1] = 'ein'
|
||
|
wordstodelete.append(n)
|
||
|
jemandornot = 0
|
||
|
if sentence[n + 1] == 'der' or sentence[n + 1] == 'einer':
|
||
|
token4 = nlp(sentence[n-1])
|
||
|
for word in token4:
|
||
|
if word.tag_ != 'NN' and word.tag_ != 'NE':
|
||
|
sentence[n + 1] = 'eine'
|
||
|
wordstodelete.append(n)
|
||
|
jemandornot = 0
|
||
|
|
||
|
if sentence[n] == 'vom':
|
||
|
|
||
|
sentence[n] = 'ein'
|
||
|
jemandornot = 0
|
||
|
|
||
|
for index in wordstodelete[::-1]:
|
||
|
del sentence[index]
|
||
|
|
||
|
if jemandornot == 1:
|
||
|
sentence.append('jemand')
|
||
|
|
||
|
|
||
|
#print(sentence)
|
||
|
|
||
|
#print('get the tuples and triples to check..')
|
||
|
tuplesTocheck, triplesTocheck, quadruplesTocheck = self.gs.GetTuplesinSentence(sentence)
|
||
|
#print('done')
|
||
|
#print(tuplesTocheck, triplesTocheck)
|
||
|
|
||
|
grammpiecessentence = self.gs.createTupleofGrammarpieces( sentence, tuplesTocheck, triplesTocheck, quadruplesTocheck)
|
||
|
|
||
|
if len(grammpiecessentence) > 7:
|
||
|
print('A sentence is too long, too many permutations. \n piping wrong grammar..')
|
||
|
endsentences.append(' '.join(grammpiecessentence).split())
|
||
|
|
||
|
else:
|
||
|
|
||
|
#print('the grammpiecessentence', grammpiecessentence)
|
||
|
#print('genrating the permutations')
|
||
|
permutations = self.sgm.GeneratePermutationsOfSentence(grammpiecessentence)
|
||
|
#print('done')
|
||
|
#print(permutations)
|
||
|
#if (len(tuplesTocheck) != 0) or (len(triplesTocheck) != 0):
|
||
|
# print('filtering the permutations based on the tuples and triples..')
|
||
|
# filteredpermutations = self.gs.filterpermutationsaccordingtotuples(permutations, tuplesTocheck, triplesTocheck)
|
||
|
# print('done')
|
||
|
#else:
|
||
|
# print('there are no triples or tuples to check..')
|
||
|
# filteredpermutations = permutations
|
||
|
|
||
|
sentencesToCheck = []
|
||
|
for sentence in permutations:
|
||
|
sentencesToCheck.append(' '.join(sentence))
|
||
|
|
||
|
#print('sentencesToCheck', sentencesToCheck)
|
||
|
#print('classifying the probability for right grammar in the filtered permutations..')
|
||
|
#print(' '.join(sentence))
|
||
|
endsentence = self.sgm.GetBestSentenceFromSentencesAccordingToGrammar(sentencesToCheck, ' '.join(sentence))
|
||
|
#print('done')
|
||
|
|
||
|
#print('the endsentence', endsentence)
|
||
|
endsentences.append(endsentence.split())
|
||
|
|
||
|
|
||
|
|
||
|
if zustandORnot == 0 and vorgangORnot == 0:
|
||
|
#print('it is coming to the else')
|
||
|
endsentences.append(sentence)
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
return endsentences
|
||
|
|
||
|
|
||
|
|
||
|
# Vorgangspassiv wird auf selbe Zeit gemappt, 3. Person Singular.
|
||
|
# Zustandspassiv: Immer eine Zeit dahinter. D.h.
|
||
|
# Präsens => Präteritum, Präteritum => Perfekt
|
||
|
|