522 lines
24 KiB
Python
522 lines
24 KiB
Python
|
|
||
|
# class to implement GS utils and Search
|
||
|
|
||
|
import resource
|
||
|
|
||
|
|
||
|
|
||
|
class GS_Utils(object):
|
||
|
|
||
|
def __init__(self, language):
|
||
|
|
||
|
|
||
|
|
||
|
#print('loading spacy..')
|
||
|
import spacy
|
||
|
self.nlp = spacy.load(language)
|
||
|
#print('done')
|
||
|
self.oi = 'oi'
|
||
|
|
||
|
|
||
|
|
||
|
def Sentence2GrammarSchema(self, sentence, spacyclass):
|
||
|
|
||
|
doc = self.nlp(sentence)
|
||
|
|
||
|
#print(doc)
|
||
|
|
||
|
GsDBsentence = []
|
||
|
|
||
|
for word in doc:
|
||
|
|
||
|
# es eignet sich hierbei word.pos_ fuer noun und verb, word.dep_ fuer sb pd, und evtl tag
|
||
|
if len(eval(spacyclass)) > 1:
|
||
|
GsDBsentence.append(eval(spacyclass))
|
||
|
|
||
|
return GsDBsentence
|
||
|
|
||
|
def Sentence2RightGrammarTupel(self, sentence, gs_sentence, right_gs_tupel):
|
||
|
grammcorr_sentences = []
|
||
|
|
||
|
#print(sentence)
|
||
|
#print(gs_sentence)
|
||
|
#print(right_gs_tupel)
|
||
|
|
||
|
sentence = sentence.split()
|
||
|
|
||
|
for elements in right_gs_tupel:
|
||
|
grammcor_sentence = []
|
||
|
|
||
|
usedwordslist = []
|
||
|
usedwords = set(usedwordslist)
|
||
|
|
||
|
for element in elements.split():
|
||
|
ok = 0
|
||
|
#print('1')
|
||
|
for n in range(len(gs_sentence)):
|
||
|
#print(element)
|
||
|
#print(gs_sentence)
|
||
|
|
||
|
if element == gs_sentence[n] and n not in usedwords:
|
||
|
if ok == 0:
|
||
|
#print('bla', sentence[n])
|
||
|
|
||
|
grammcor_sentence.append(sentence[n])
|
||
|
|
||
|
usedwordslist.append(n)
|
||
|
usedwords = set(usedwordslist)
|
||
|
|
||
|
ok = 1
|
||
|
grammcorr_sentences.append(grammcor_sentence)
|
||
|
|
||
|
return grammcorr_sentences
|
||
|
|
||
|
|
||
|
# gets the best grammar scheme from both, depending on which one appears the most in both, and whether rules are still present.
|
||
|
def GetBestgsAccordingRules(self, sentence, gs_sentence1, right_gs_tupel1, right_gs_tupel2, grammcorr_sentences1 , grammcorr_sentences2, rules , specialrules):
|
||
|
equals = []
|
||
|
for n in range(len(grammcorr_sentences1)):
|
||
|
equalcount = 0
|
||
|
for l in range(len(grammcorr_sentences2)):
|
||
|
|
||
|
if len(grammcorr_sentences1[n]) <= len(grammcorr_sentences2[l]):
|
||
|
for m in range(len(grammcorr_sentences1[n])):
|
||
|
if grammcorr_sentences1[n][m] == grammcorr_sentences2[l][m]:
|
||
|
equalcount += 1
|
||
|
else:
|
||
|
for m in range(len(grammcorr_sentences2[l])):
|
||
|
if grammcorr_sentences1[n][m] == grammcorr_sentences2[l][m]:
|
||
|
equalcount += 1
|
||
|
equals.append(equalcount)
|
||
|
|
||
|
# from here check the if a rule is in the input, if yes then check it for grammar schemes and raise scores.
|
||
|
|
||
|
newrules = []
|
||
|
for n in range(len(rules)):
|
||
|
newrules.append([])
|
||
|
|
||
|
ruleapplicable = []
|
||
|
for m in range(len(rules)):
|
||
|
|
||
|
ruleapplicable.append(False)
|
||
|
|
||
|
|
||
|
if len(rules[m]) == 2:
|
||
|
for n in range(len(gs_sentence1)-1):
|
||
|
if rules[m][0] == gs_sentence1[n]:
|
||
|
if rules[m][1] == gs_sentence1[n+1]:
|
||
|
ruleapplicable[m] = True
|
||
|
newrules[m] = sentence.split()[n:n+2]
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
for n in range(len(grammcorr_sentences1)):
|
||
|
if ruleapplicable[m] == True:
|
||
|
for p in range(len(grammcorr_sentences1[n])-1):
|
||
|
|
||
|
if grammcorr_sentences1[n][p] == newrules[m][0] and grammcorr_sentences1[n][p+1] == newrules[m][1]:
|
||
|
equals[n] += 40 * len(newrules[m])
|
||
|
else:
|
||
|
pass
|
||
|
|
||
|
|
||
|
if len(rules[m]) == 3:
|
||
|
for n in range(len(gs_sentence1)-2):
|
||
|
if rules[m][0] == gs_sentence1[n]:
|
||
|
if rules[m][1] == gs_sentence1[n+1]:
|
||
|
if rules[m][2] == gs_sentence1[n+2]:
|
||
|
ruleapplicable[m] = True
|
||
|
newrules[m] = sentence.split()[n:n+3]
|
||
|
|
||
|
for n in range(len(grammcorr_sentences1)):
|
||
|
if ruleapplicable[m] == True:
|
||
|
for p in range(len(grammcorr_sentences1[n])-2):
|
||
|
if grammcorr_sentences1[n][p] == newrules[m][0]:
|
||
|
if grammcorr_sentences1[n][p+1] == newrules[m][1]:
|
||
|
if grammcorr_sentences1[n][p+2] == newrules[m][2]:
|
||
|
equals[n] += 40 * len(newrules[m])
|
||
|
|
||
|
|
||
|
if len(rules[m]) == 4:
|
||
|
for n in range(len(gs_sentence1)-3):
|
||
|
if rules[m][0] == gs_sentence1[n]:
|
||
|
if rules[m][1] == gs_sentence1[n+1]:
|
||
|
if rules[m][2] == gs_sentence1[n+2]:
|
||
|
if rules[m][3] == gs_sentence1[n+3]:
|
||
|
ruleapplicable[m] = True
|
||
|
newrules[m] = sentence.split()[n:n+4]
|
||
|
|
||
|
for n in range(len(grammcorr_sentences1)):
|
||
|
if ruleapplicable[m] == True:
|
||
|
for p in range(len(grammcorr_sentences1[n])-3):
|
||
|
if grammcorr_sentences1[n][p] == newrules[m][0]:
|
||
|
if grammcorr_sentences1[n][p+1] == newrules[m][1]:
|
||
|
if grammcorr_sentences1[n][p+2] == newrules[m][2]:
|
||
|
if grammcorr_sentences1[n][p+3] == newrules[m][3]:
|
||
|
equals[n] += 40 * len(newrules[m])
|
||
|
if len(rules[m]) == 5:
|
||
|
for n in range(len(gs_sentence1)-4):
|
||
|
if rules[m][0] == gs_sentence1[n]:
|
||
|
if rules[m][1] == gs_sentence1[n+1]:
|
||
|
if rules[m][2] == gs_sentence1[n+2]:
|
||
|
if rules[m][3] == gs_sentence1[n+3]:
|
||
|
if rules[m][4] == gs_sentence1[n+4]:
|
||
|
ruleapplicable[m] = True
|
||
|
newrules[m] = sentence.split()[n:n+5]
|
||
|
|
||
|
for n in range(len(grammcorr_sentences1)):
|
||
|
if ruleapplicable[m] == True:
|
||
|
for p in range(len(grammcorr_sentences1[n])-4):
|
||
|
if grammcorr_sentences1[n][p] == newrules[m][0]:
|
||
|
if grammcorr_sentences1[n][p+1] == newrules[m][1]:
|
||
|
if grammcorr_sentences1[n][p+2] == newrules[m][2]:
|
||
|
if grammcorr_sentences1[n][p+3] == newrules[m][3]:
|
||
|
if grammcorr_sentences1[n][p+4] == newrules[m][4]:
|
||
|
equals[n] += 40 * len(newrules[m])
|
||
|
|
||
|
#print('the found rules from input:',newrules)
|
||
|
|
||
|
|
||
|
for n in range(len(grammcorr_sentences1)):
|
||
|
for m in range(len(specialrules)):
|
||
|
if len(specialrules[m]) == 2:
|
||
|
for p in range(len(grammcorr_sentences1[n])-1):
|
||
|
if right_gs_tupel1[n][p] == specialrules[m][0] and right_gs_tupel1[n+1][p] == specialrules[m][1]:
|
||
|
equals[n] += len(grammcorr_sentences1[n])
|
||
|
else:
|
||
|
pass
|
||
|
if len(specialrules[m]) == 3:
|
||
|
for p in range(len(grammcorr_sentences1[n])-2):
|
||
|
if right_gs_tupel1[n][p] == specialrules[m][0] and right_gs_tupel1[n+1][p] == specialrules[m][1] and right_gs_tupel1[n+2][p] == specialrules[m][2]:
|
||
|
equals[n] += len(grammcorr_sentences1[n])
|
||
|
else:
|
||
|
pass
|
||
|
|
||
|
#for n in range(len(grammcorr_sentences1)):
|
||
|
#if len(sentence.split()) == grammcorr_sentences1[n]:
|
||
|
#equals[n] += 50
|
||
|
|
||
|
indexedequals = []
|
||
|
for n in range(len(equals)):
|
||
|
indexedequals.append([n,equals[n]])
|
||
|
|
||
|
indexedequals_sorted = sorted(indexedequals[::-1], key=lambda tup: tup[1], reverse=True)
|
||
|
|
||
|
|
||
|
return grammcorr_sentences1[indexedequals_sorted[0][0]]
|
||
|
|
||
|
|
||
|
def checkSPO(self, splitsentence, convertedornot):
|
||
|
|
||
|
if convertedornot == 0:
|
||
|
gs_sentenceSPOProof = self.Sentence2GrammarSchema(' '.join(splitsentence), 'word.dep_')
|
||
|
|
||
|
if convertedornot == 1:
|
||
|
gs_sentenceSPOProof = splitsentence
|
||
|
|
||
|
spoCount = [0,0,0]
|
||
|
|
||
|
for word in gs_sentenceSPOProof:
|
||
|
if word == 'sb' or word == 'ep' or word == 'ph':
|
||
|
spoCount[0] = 1
|
||
|
if word == 'ROOT' or word == 'pd':
|
||
|
spoCount[1] = 1
|
||
|
if word == 'oa' or word == 'og' or word == 'oc' or word == 'op' or word == 'mo':
|
||
|
spoCount[2] = 1
|
||
|
|
||
|
return spoCount
|
||
|
|
||
|
def checkForAnnotation(self, splitsentence, token, spacyclass):
|
||
|
|
||
|
gs_sentence_RC_Proof = self.Sentence2GrammarSchema(' '.join(splitsentence), spacyclass)
|
||
|
|
||
|
AnnoORnot = 0
|
||
|
for word in gs_sentence_RC_Proof:
|
||
|
if word == token:
|
||
|
AnnoORnot = 1
|
||
|
|
||
|
return AnnoORnot
|
||
|
|
||
|
def checkForAnnotationInTokenizedSentence(self, splitsentence, token):
|
||
|
|
||
|
gs_sentence_RC_Proof = splitsentence
|
||
|
|
||
|
AnnoORnot = 0
|
||
|
for word in gs_sentence_RC_Proof:
|
||
|
if word == token:
|
||
|
AnnoORnot = 1
|
||
|
|
||
|
return AnnoORnot
|
||
|
|
||
|
|
||
|
def checkForAnnotationTuple(self, splitsentence, token, spacyclass, tupleinwords):
|
||
|
#self.spacyclass = spacyclass
|
||
|
gs_sentence_RC_Proof = self.Sentence2GrammarSchema(' '.join(splitsentence), spacyclass)
|
||
|
AnnotationtupleInwords = []
|
||
|
AnnoORnot = 0
|
||
|
#print(gs_sentence_RC_Proof)
|
||
|
for n in range(len(gs_sentence_RC_Proof) - 1):
|
||
|
if gs_sentence_RC_Proof[n] == token[0] and (splitsentence[n] == tupleinwords[0] or tupleinwords == 'None'):
|
||
|
#print('oioioiAYE')
|
||
|
#print(gs_sentence_RC_Proof)
|
||
|
AnnoORnot = 1
|
||
|
if gs_sentence_RC_Proof[n + 1] == token[1] and (splitsentence[n+1] == tupleinwords[1] or tupleinwords == 'None'):
|
||
|
AnnoORnot = 2
|
||
|
AnnotationtupleInwords.append(splitsentence[n:n+2])
|
||
|
#print(token)
|
||
|
|
||
|
return AnnoORnot, AnnotationtupleInwords
|
||
|
|
||
|
def checkForAnnotationTriple(self, splitsentence, token, spacyclass, tripleinwords):
|
||
|
#self.spacyclass = spacyclass
|
||
|
gs_sentence_RC_Proof = self.Sentence2GrammarSchema(' '.join(splitsentence), spacyclass)
|
||
|
|
||
|
#print('gssentencercprooof', gs_sentence_RC_Proof)
|
||
|
|
||
|
AnnoORnot = 0
|
||
|
AnnotationtripleInwords = []
|
||
|
for n in range(len(gs_sentence_RC_Proof) - 2):
|
||
|
if gs_sentence_RC_Proof[n] == token[0] and (splitsentence[n] == tripleinwords[0] or tripleinwords == 'None'):
|
||
|
AnnoORnot = 1
|
||
|
if gs_sentence_RC_Proof[n + 1] == token[1] and (splitsentence[n+1] == tripleinwords[1] or tripleinwords == 'None'):
|
||
|
AnnoORnot = 2
|
||
|
if gs_sentence_RC_Proof[n + 2] == token[2] and (splitsentence[n+2] == tripleinwords[2] or tripleinwords == 'None'):
|
||
|
AnnoORnot = 3
|
||
|
AnnotationtripleInwords.append(splitsentence[n:n+3])
|
||
|
|
||
|
return AnnoORnot, AnnotationtripleInwords
|
||
|
|
||
|
def checkForAnnotationQuadruple(self, splitsentence, token, spacyclass, quadrupleinwords):
|
||
|
#self.spacyclass = spacyclass
|
||
|
gs_sentence_RC_Proof = self.Sentence2GrammarSchema(' '.join(splitsentence), spacyclass)
|
||
|
|
||
|
#print('gssentencercprooof', gs_sentence_RC_Proof)
|
||
|
#print('quadrupleinwords',quadrupleinwords)
|
||
|
#print('token', token)
|
||
|
AnnoORnot = 0
|
||
|
AnnotationquadrupleInwords = []
|
||
|
for n in range(len(gs_sentence_RC_Proof) - 3):
|
||
|
if gs_sentence_RC_Proof[n] == token[0] and (splitsentence[n] == quadrupleinwords[0] or quadrupleinwords == 'None'):
|
||
|
AnnoORnot = 1
|
||
|
if gs_sentence_RC_Proof[n + 1] == token[1] and (splitsentence[n+1] == quadrupleinwords[1] or quadrupleinwords == 'None'):
|
||
|
AnnoORnot = 2
|
||
|
if gs_sentence_RC_Proof[n + 2] == token[2] and (splitsentence[n+2] == quadrupleinwords[2] or quadrupleinwords == 'None'):
|
||
|
AnnoORnot = 3
|
||
|
if gs_sentence_RC_Proof[n + 3] == token[3] and (splitsentence[n+3] == quadrupleinwords[3] or quadrupleinwords == 'None'):
|
||
|
AnnoORnot = 4
|
||
|
AnnotationquadrupleInwords.append(splitsentence[n:n+4])
|
||
|
|
||
|
#print('AnnotationquadrupleInwords', AnnotationquadrupleInwords)
|
||
|
|
||
|
return AnnoORnot, AnnotationquadrupleInwords
|
||
|
|
||
|
|
||
|
#input ['this', 'is', 'a', 'sentence']
|
||
|
|
||
|
def GetTuplesinSentence(self,mainsentence):
|
||
|
|
||
|
tuplesToCheck = []
|
||
|
tuples = [['ART', 'NE'], ['ART', 'NN'], ['APPR','NN'], ['APPR','ADJD'], ['APPR','NE'], ['ART', 'CARD'], ['APPR', 'CARD'], ['PPOSAT', 'NN'], ['PPOSAT', 'NE'], ['ADV', 'ADJD'],['ADV', 'ADV'], ['ADV', 'PTKVZ'], ['PTKNEG', 'ADV'], ['ADJA','NN'], ['ADJA','NE'], ['ADV','PIS'], ['ADJA','PIS'], ['ADJD','PIS'], ['APPRART', 'NN'], ['APPRART', 'NE'], ['PDAT', 'NE'], ['PDAT', 'NN'], ['PWAT', 'NE'], ['PWAT', 'NN'], ['PIAT', 'NE'], ['PIAT', 'NN'], ['PROAV', 'ADJD'],['PDS', 'NE'], ['PDS', 'NN'], ['NE', 'NE'], ['CARD', 'NE'], ['CARD', 'NN'] ]
|
||
|
#print('beginning of gettuplesinsentence')
|
||
|
#print('inkb',resource.getrusage(resource.RUSAGE_SELF).ru_maxrss)
|
||
|
for tupl in tuples:
|
||
|
#print('checking another tuple')
|
||
|
#print('inkb',resource.getrusage(resource.RUSAGE_SELF).ru_maxrss)
|
||
|
|
||
|
checktupleindex, tupleInWords = self.checkForAnnotationTuple(mainsentence, tupl , 'word.tag_', 'None')
|
||
|
if len(tupleInWords) > 0:
|
||
|
for tup in tupleInWords:
|
||
|
tuplesToCheck.append([tupl, tup])
|
||
|
#print('oi a tuple was found')
|
||
|
#print('after the loop')
|
||
|
#print('inkb',resource.getrusage(resource.RUSAGE_SELF).ru_maxrss)
|
||
|
|
||
|
#print('no going to the triples')
|
||
|
triplesToCheck = []
|
||
|
triples = [['APPR', 'ART', 'NN'],['APPR', 'PDAT', 'NN'], ['APPR', 'PDS', 'NN'], ['ART','ADJA','NN'], ['ART','ADJA','NE'], ['APPR', 'ART', 'NE'], ['KOKOM', 'ART', 'NN'], ['KOKOM', 'ART', 'NE'], ['APPR', 'PIAT', 'NN'], ['APPR', 'ADJA', 'NN'], ['APPR', 'ADJA', 'NE'], ['APPRART', 'NN', 'CARD'], ['APPRART', 'NE', 'CARD'], ['APPRART', 'NN', 'NE'], ['CARD', 'KON', 'CARD'], ['APPR', 'ADV', 'CARD'], ['ADJD', 'KOKOM', 'CARD'], ['APPR', 'NE', 'NE'], ['NN', 'KON', 'NN'], ['NE', 'NN', 'NE'], ['APPR', 'NE', 'NN'], ['APPR', 'CARD', 'NN'], ['APPR', 'CARD', 'NE']]
|
||
|
for tripl in triples:
|
||
|
#print('checking next triple')
|
||
|
checktripleindex, tripleInWords = self.checkForAnnotationTriple(mainsentence, tripl, 'word.tag_', 'None')
|
||
|
if len(tripleInWords) > 0:
|
||
|
for trip in tripleInWords:
|
||
|
triplesToCheck.append([tripl, trip])
|
||
|
#print('oi a triple was found')
|
||
|
|
||
|
quadruplesToCheck = []
|
||
|
quadruples = [['KOKOM', 'ADV', 'ADJA', 'NN'], ['KOKOM', 'ADV', 'ADJA', 'NE'], ['APPR', 'ADV', 'ADJA', 'NE'], ['APPR', 'ADV', 'ADJA', 'NN'], ['ART', 'NN', 'APPR', 'NE'], ['APPR', 'NE', 'NN', 'NE'], ['APPR', 'ART', 'ADJA', 'NN'], ['ART', 'ADJD', 'ADJA', 'NN']]
|
||
|
|
||
|
for quadrupl in quadruples:
|
||
|
#print('checking next triple')
|
||
|
checkquadrupleindex, quadrupleInWords = self.checkForAnnotationQuadruple(mainsentence, quadrupl, 'word.tag_', 'None')
|
||
|
if len(quadrupleInWords) > 0:
|
||
|
for quad in quadrupleInWords:
|
||
|
quadruplesToCheck.append([quadrupl, quad])
|
||
|
|
||
|
|
||
|
|
||
|
#print('gettuples insentences is done')
|
||
|
return tuplesToCheck, triplesToCheck, quadruplesToCheck
|
||
|
|
||
|
|
||
|
def createTupleofGrammarpieces(self, sentence, tuplesToCheck, triplesToCheck, quadruplesToCheck):
|
||
|
#print('going in crate tuple of grammar pieces')
|
||
|
tuplestoremove = []
|
||
|
for tupl in tuplesToCheck:
|
||
|
for tripl in triplesToCheck:
|
||
|
if (tupl[1][0] == tripl[1][0] and tupl[1][1] == tripl[1][1]) or (tupl[1][0] == tripl[1][1] and tupl[1][1] == tripl[1][2]):
|
||
|
tuplestoremove.append(tupl)
|
||
|
for tupletoremove in tuplestoremove:
|
||
|
tuplesToCheck.remove(tupletoremove)
|
||
|
|
||
|
#print('in between0', sentence, quadruplesToCheck, tuplesToCheck, triplesToCheck)
|
||
|
|
||
|
tuplestoremove = []
|
||
|
|
||
|
for tupl in tuplesToCheck:
|
||
|
for quad in quadruplesToCheck:
|
||
|
#print('I got here')
|
||
|
#print(tupl, quad)
|
||
|
#print(tupl[1][0], tupl[1][1], quad[1][2], quad[1][3])
|
||
|
if (tupl[1][0] == quad[1][0] and tupl[1][1] == quad[1][1]) or (tupl[1][0] == quad[1][1] and tupl[1][1] == quad[1][2]) or (tupl[1][0] == quad[1][2] and tupl[1][1] == quad[1][3]):
|
||
|
#print('and I got here', tupl)
|
||
|
tuplestoremove.append(tupl)
|
||
|
for tupletoremove in tuplestoremove:
|
||
|
tuplesToCheck.remove(tupletoremove)
|
||
|
|
||
|
#print('and until here?')
|
||
|
|
||
|
triplestoremove = []
|
||
|
for tripl in triplesToCheck:
|
||
|
for quad in quadruplesToCheck:
|
||
|
if (tripl[1][0] == quad[1][0] and tripl[1][1] == quad[1][1] and tripl[1][2] == quad[1][2]) or (tripl[1][0] == quad[1][1] and tripl[1][1] == quad[1][2] and tripl[1][2] == quad[1][3]):
|
||
|
triplestoremove.append(tripl)
|
||
|
for tripltoremove in triplestoremove:
|
||
|
triplesToCheck.remove(tripltoremove)
|
||
|
|
||
|
bracketinfo = []
|
||
|
bracketinfos = []
|
||
|
bracketindex = 0
|
||
|
|
||
|
#print('in between1', sentence, quadruplesToCheck)
|
||
|
|
||
|
for n in range(len(sentence)):
|
||
|
|
||
|
if sentence[n] != '':
|
||
|
if sentence[n] == '(' or sentence[n][0] == '(':
|
||
|
for m in range(n ,len(sentence)):
|
||
|
bracketinfo.append(sentence[m])
|
||
|
if sentence[m] == ')' or sentence[m][-1] == ')':
|
||
|
|
||
|
wordbeforebracketinfo = None
|
||
|
try:
|
||
|
wordbeforebracketinfo = sentence[n-1]
|
||
|
except:
|
||
|
pass
|
||
|
bracketinfos.append([bracketinfo, wordbeforebracketinfo])
|
||
|
bracketinfo = []
|
||
|
break
|
||
|
|
||
|
#print('in between2', sentence, quadruplesToCheck)
|
||
|
#print('bracketinfo',bracketinfos)
|
||
|
#print('sentence',sentence)
|
||
|
for bracketinfo in bracketinfos:
|
||
|
for word in bracketinfo[0]:
|
||
|
sentence.remove(word)
|
||
|
|
||
|
#print('in between3', sentence, quadruplesToCheck)
|
||
|
|
||
|
if len(quadruplesToCheck) != 0:
|
||
|
for n in range(len(quadruplesToCheck)):
|
||
|
for m in range(len(sentence) - 3):
|
||
|
if sentence[m] == quadruplesToCheck[n][1][0]:
|
||
|
if sentence[m + 1] == quadruplesToCheck[n][1][1]:
|
||
|
if sentence[m + 2] == quadruplesToCheck[n][1][2]:
|
||
|
if sentence[m + 3] == quadruplesToCheck[n][1][3]:
|
||
|
del sentence[m + 3]
|
||
|
del sentence[m + 2]
|
||
|
del sentence[m + 1]
|
||
|
del sentence[m]
|
||
|
sentence.insert(m,' '.join(quadruplesToCheck[n][1]))
|
||
|
|
||
|
|
||
|
if len(triplesToCheck) != 0:
|
||
|
for n in range(len(triplesToCheck)):
|
||
|
for m in range(len(sentence) - 2):
|
||
|
if sentence[m] == triplesToCheck[n][1][0]:
|
||
|
if sentence[m + 1] == triplesToCheck[n][1][1]:
|
||
|
if sentence[m + 2] == triplesToCheck[n][1][2]:
|
||
|
del sentence[m + 2]
|
||
|
del sentence[m + 1]
|
||
|
del sentence[m]
|
||
|
sentence.insert(m,' '.join(triplesToCheck[n][1]))
|
||
|
|
||
|
if len(tuplesToCheck) != 0:
|
||
|
for n in range(len(tuplesToCheck)):
|
||
|
for m in range(len(sentence) - 1):
|
||
|
if sentence[m] == tuplesToCheck[n][1][0]:
|
||
|
if sentence[m + 1] == tuplesToCheck[n][1][1]:
|
||
|
del sentence[m + 1]
|
||
|
del sentence[m]
|
||
|
sentence.insert(m,' '.join(tuplesToCheck[n][1]))
|
||
|
|
||
|
for bracketinfo in bracketinfos:
|
||
|
bracketinfowasthere = 0
|
||
|
for n in range(len(sentence)):
|
||
|
sentencensplit = sentence[n].split()
|
||
|
if bracketinfo[1] == sentencensplit[-1]:
|
||
|
sentence[n] = sentence[n] + ' '.join(bracketinfo[0])
|
||
|
bracketinfowasthere = 1
|
||
|
break
|
||
|
if bracketinfowasthere == 0:
|
||
|
sentence.append(' '.join(bracketinfo[0]))
|
||
|
#print('sentence in gs create tuple of grammar pieces', sentence)
|
||
|
#print('thesentencein create tuple of grammarpieces ',sentence)
|
||
|
return sentence
|
||
|
|
||
|
# die folgende Klasse ist zu rechenaufwendig
|
||
|
def filterpermutationsaccordingtotuples(self, sentences, tuplesToCheck, triplesToCheck):
|
||
|
|
||
|
filteredprobsentences = []
|
||
|
for sentence in sentences:
|
||
|
|
||
|
|
||
|
|
||
|
tuplchecked = 0
|
||
|
triplchecked = 0
|
||
|
#print('sentence and tuples to check', sentence, tuplesToCheck)
|
||
|
for tupl in tuplesToCheck:
|
||
|
#print(list(sentence))
|
||
|
checkedsecondtime, tupleinWords = self.checkForAnnotationTuple(sentence, tupl[0], 'word.tag_', tupl[1])
|
||
|
|
||
|
#print(checkedsecondtime)
|
||
|
if checkedsecondtime == 1:
|
||
|
|
||
|
tuplchecked = 0
|
||
|
|
||
|
if checkedsecondtime == 2:
|
||
|
|
||
|
tuplchecked = 1
|
||
|
|
||
|
|
||
|
for tripl in triplesToCheck:
|
||
|
#print(sentence)
|
||
|
checkedsecondtime, tripleinWords = self.checkForAnnotationTriple(sentence, tripl[0], 'word.tag_', tripl[1])
|
||
|
if checkedsecondtime == 1 or checkedsecondtime == 2:
|
||
|
|
||
|
triplchecked = 0
|
||
|
|
||
|
if checkedsecondtime == 3:
|
||
|
|
||
|
triplchecked = 1
|
||
|
|
||
|
|
||
|
|
||
|
if tuplchecked == 1 or triplchecked == 1:
|
||
|
filteredprobsentences.append(sentence)
|
||
|
|
||
|
return filteredprobsentences
|
||
|
|
||
|
|