# class to implement GS utils and Search

import resource


class GS_Utils(object):
    
    def __init__(self, language):
        
        
        #print('loading spacy..')
        import spacy
        self.nlp = spacy.load(language)
        #print('done')
        self.oi = 'oi'
        
    
    def Sentence2GrammarSchema(self, sentence, spacyclass):
        
        doc = self.nlp(sentence)
                
        #print(doc)
        
        GsDBsentence = []
        
        for word in doc:
            
            # es eignet sich hierbei word.pos_  fuer noun und verb, word.dep_ fuer sb pd, und evtl tag 
            if len(eval(spacyclass)) > 1:
                GsDBsentence.append(eval(spacyclass))
                
        return GsDBsentence
    
    def Sentence2RightGrammarTupel(self, sentence, gs_sentence, right_gs_tupel):
        grammcorr_sentences = []
        
        #print(sentence)
        #print(gs_sentence)
        #print(right_gs_tupel)
        
        sentence = sentence.split()
        
        for elements in right_gs_tupel:
            grammcor_sentence = []
            
            usedwordslist = []
            usedwords = set(usedwordslist)
            
            for element in elements.split():
                ok = 0
                #print('1')
                for n in range(len(gs_sentence)):
                    #print(element)
                    #print(gs_sentence)
                    
                    if element == gs_sentence[n] and n not in usedwords:
                        if ok == 0:
                            #print('bla', sentence[n])
                            
                            grammcor_sentence.append(sentence[n])
                            
                            usedwordslist.append(n)
                            usedwords = set(usedwordslist)
                            
                            ok = 1
            grammcorr_sentences.append(grammcor_sentence)
        
        return grammcorr_sentences


    # gets the best grammar scheme from both, depending on which one appears the most in both, and whether rules are still present. 
    def GetBestgsAccordingRules(self, sentence, gs_sentence1, right_gs_tupel1, right_gs_tupel2, grammcorr_sentences1 , grammcorr_sentences2, rules , specialrules):
        equals = []
        for n in range(len(grammcorr_sentences1)):
            equalcount = 0
            for l in range(len(grammcorr_sentences2)):
                
                if len(grammcorr_sentences1[n]) <= len(grammcorr_sentences2[l]):
                    for m in range(len(grammcorr_sentences1[n])):
                        if grammcorr_sentences1[n][m] == grammcorr_sentences2[l][m]:
                            equalcount += 1
                else:
                    for m in range(len(grammcorr_sentences2[l])):
                        if grammcorr_sentences1[n][m] == grammcorr_sentences2[l][m]:
                            equalcount += 1
            equals.append(equalcount)
        
        # from here check the if a rule is in the input, if yes then check it for grammar schemes and raise scores.
        
        newrules = []
        for n in range(len(rules)):
            newrules.append([])
        
        ruleapplicable = []
        for m in range(len(rules)):
            
            ruleapplicable.append(False)
      
            
            if len(rules[m]) == 2:
                for n in range(len(gs_sentence1)-1):
                    if rules[m][0] == gs_sentence1[n]:
                        if rules[m][1] == gs_sentence1[n+1]:
                            ruleapplicable[m] = True
                            newrules[m] = sentence.split()[n:n+2]
                
                
                for n in range(len(grammcorr_sentences1)):
                    if ruleapplicable[m] == True:
                        for p in range(len(grammcorr_sentences1[n])-1):
                            
                            if grammcorr_sentences1[n][p] == newrules[m][0] and grammcorr_sentences1[n][p+1] == newrules[m][1]:
                                equals[n] += 40 * len(newrules[m])
                            else:
                                pass
                                
                
            if len(rules[m]) == 3:
                for n in range(len(gs_sentence1)-2):
                    if rules[m][0] == gs_sentence1[n]:
                        if rules[m][1] == gs_sentence1[n+1]:
                            if rules[m][2] == gs_sentence1[n+2]:
                                ruleapplicable[m] = True
                                newrules[m] = sentence.split()[n:n+3]
                
                for n in range(len(grammcorr_sentences1)):
                    if ruleapplicable[m] == True:
                        for p in range(len(grammcorr_sentences1[n])-2):
                            if grammcorr_sentences1[n][p] == newrules[m][0]:
                                if grammcorr_sentences1[n][p+1] == newrules[m][1]:
                                    if grammcorr_sentences1[n][p+2] == newrules[m][2]:
                                        equals[n] += 40 * len(newrules[m])
                            
            
            if len(rules[m]) == 4:
                for n in range(len(gs_sentence1)-3):
                    if rules[m][0] == gs_sentence1[n]:
                        if rules[m][1] == gs_sentence1[n+1]:
                            if rules[m][2] == gs_sentence1[n+2]:
                                if rules[m][3] == gs_sentence1[n+3]:
                                    ruleapplicable[m] = True
                                    newrules[m] = sentence.split()[n:n+4]
                                    
                for n in range(len(grammcorr_sentences1)):
                    if ruleapplicable[m] == True:
                        for p in range(len(grammcorr_sentences1[n])-3):
                            if grammcorr_sentences1[n][p] == newrules[m][0]: 
                                if grammcorr_sentences1[n][p+1] == newrules[m][1]:
                                    if grammcorr_sentences1[n][p+2] == newrules[m][2]:
                                        if grammcorr_sentences1[n][p+3] == newrules[m][3]:
                                            equals[n] += 40 * len(newrules[m])
            if len(rules[m]) == 5:
                for n in range(len(gs_sentence1)-4):
                    if rules[m][0] == gs_sentence1[n]:
                        if rules[m][1] == gs_sentence1[n+1]:
                            if rules[m][2] == gs_sentence1[n+2]:
                                if rules[m][3] == gs_sentence1[n+3]:
                                    if rules[m][4] == gs_sentence1[n+4]:
                                        ruleapplicable[m] = True
                                        newrules[m] = sentence.split()[n:n+5]
                                    
                for n in range(len(grammcorr_sentences1)):
                    if ruleapplicable[m] == True:
                        for p in range(len(grammcorr_sentences1[n])-4):
                            if grammcorr_sentences1[n][p] == newrules[m][0]: 
                                if grammcorr_sentences1[n][p+1] == newrules[m][1]:
                                    if grammcorr_sentences1[n][p+2] == newrules[m][2]:
                                        if grammcorr_sentences1[n][p+3] == newrules[m][3]:
                                            if grammcorr_sentences1[n][p+4] == newrules[m][4]:
                                                equals[n] += 40 * len(newrules[m])
            
        #print('the found rules from input:',newrules)
                        
        
        for n in range(len(grammcorr_sentences1)):
            for m in range(len(specialrules)):
                if len(specialrules[m]) == 2:
                    for p in range(len(grammcorr_sentences1[n])-1):
                        if right_gs_tupel1[n][p] == specialrules[m][0] and right_gs_tupel1[n+1][p] == specialrules[m][1]:
                            equals[n] +=  len(grammcorr_sentences1[n])
                        else:
                            pass
                if len(specialrules[m]) == 3:
                    for p in range(len(grammcorr_sentences1[n])-2):
                        if right_gs_tupel1[n][p] == specialrules[m][0] and right_gs_tupel1[n+1][p] == specialrules[m][1] and right_gs_tupel1[n+2][p] == specialrules[m][2]:
                            equals[n] += len(grammcorr_sentences1[n])
                        else:
                            pass
        
        #for n in range(len(grammcorr_sentences1)):
            #if len(sentence.split()) == grammcorr_sentences1[n]:
                #equals[n] += 50
        
        indexedequals = []
        for n in range(len(equals)):
            indexedequals.append([n,equals[n]])
        
        indexedequals_sorted = sorted(indexedequals[::-1], key=lambda tup: tup[1], reverse=True)
        
            
        return grammcorr_sentences1[indexedequals_sorted[0][0]]
        
        
    def checkSPO(self, splitsentence, convertedornot):
        
        if convertedornot == 0:
            gs_sentenceSPOProof = self.Sentence2GrammarSchema(' '.join(splitsentence), 'word.dep_')
        
        if convertedornot == 1:
            gs_sentenceSPOProof = splitsentence
        
        spoCount = [0,0,0]
        
        for word in gs_sentenceSPOProof:
            if word == 'sb' or word == 'ep' or word == 'ph':
                spoCount[0] = 1
            if word == 'ROOT' or word == 'pd':
                spoCount[1] = 1
            if word == 'oa' or word == 'og' or word == 'oc' or word == 'op' or word == 'mo':
                spoCount[2] = 1
        
        return spoCount
    
    def checkForAnnotation(self, splitsentence, token, spacyclass):
        
        gs_sentence_RC_Proof = self.Sentence2GrammarSchema(' '.join(splitsentence), spacyclass)
        
        AnnoORnot = 0
        for word in gs_sentence_RC_Proof:
            if word == token:
                AnnoORnot = 1
        
        return AnnoORnot
    
    def checkForAnnotationInTokenizedSentence(self, splitsentence, token):
        
        gs_sentence_RC_Proof = splitsentence
        
        AnnoORnot = 0
        for word in gs_sentence_RC_Proof:
            if word == token:
                AnnoORnot = 1
        
        return AnnoORnot
    
    
    def checkForAnnotationTuple(self, splitsentence, token, spacyclass, tupleinwords):
        #self.spacyclass = spacyclass
        gs_sentence_RC_Proof = self.Sentence2GrammarSchema(' '.join(splitsentence), spacyclass)
        AnnotationtupleInwords = []
        AnnoORnot = 0
        #print(gs_sentence_RC_Proof)
        for n in range(len(gs_sentence_RC_Proof) - 1):
            if gs_sentence_RC_Proof[n] == token[0] and (splitsentence[n] == tupleinwords[0] or tupleinwords == 'None'):
                #print('oioioiAYE')
                #print(gs_sentence_RC_Proof)
                AnnoORnot = 1
                if gs_sentence_RC_Proof[n + 1] == token[1] and (splitsentence[n+1] == tupleinwords[1] or tupleinwords == 'None'):
                    AnnoORnot = 2
                    AnnotationtupleInwords.append(splitsentence[n:n+2])
                    #print(token)
                
        return AnnoORnot, AnnotationtupleInwords
    
    def checkForAnnotationTriple(self, splitsentence, token, spacyclass, tripleinwords):
        #self.spacyclass = spacyclass
        gs_sentence_RC_Proof = self.Sentence2GrammarSchema(' '.join(splitsentence), spacyclass)
        
        #print('gssentencercprooof', gs_sentence_RC_Proof)
        
        AnnoORnot = 0
        AnnotationtripleInwords = []
        for n in range(len(gs_sentence_RC_Proof) - 2):
            if gs_sentence_RC_Proof[n] == token[0] and (splitsentence[n] == tripleinwords[0] or tripleinwords == 'None'):
                AnnoORnot = 1
                if gs_sentence_RC_Proof[n + 1] == token[1] and (splitsentence[n+1] == tripleinwords[1] or tripleinwords == 'None'):
                    AnnoORnot = 2
                    if gs_sentence_RC_Proof[n + 2] == token[2] and (splitsentence[n+2] == tripleinwords[2] or tripleinwords == 'None'):
                        AnnoORnot = 3
                        AnnotationtripleInwords.append(splitsentence[n:n+3])
                
        return AnnoORnot, AnnotationtripleInwords
    
    def checkForAnnotationQuadruple(self, splitsentence, token, spacyclass, quadrupleinwords):
        #self.spacyclass = spacyclass
        gs_sentence_RC_Proof = self.Sentence2GrammarSchema(' '.join(splitsentence), spacyclass)
        
        #print('gssentencercprooof', gs_sentence_RC_Proof)
        #print('quadrupleinwords',quadrupleinwords)
        #print('token', token)
        AnnoORnot = 0
        AnnotationquadrupleInwords = []
        for n in range(len(gs_sentence_RC_Proof) - 3):
            if gs_sentence_RC_Proof[n] == token[0] and (splitsentence[n] == quadrupleinwords[0] or quadrupleinwords == 'None'):
                AnnoORnot = 1
                if gs_sentence_RC_Proof[n + 1] == token[1] and (splitsentence[n+1] == quadrupleinwords[1] or quadrupleinwords == 'None'):
                    AnnoORnot = 2
                    if gs_sentence_RC_Proof[n + 2] == token[2] and (splitsentence[n+2] == quadrupleinwords[2] or quadrupleinwords == 'None'):
                        AnnoORnot = 3
                        if gs_sentence_RC_Proof[n + 3] == token[3] and (splitsentence[n+3] == quadrupleinwords[3] or quadrupleinwords == 'None'):
                            AnnoORnot = 4
                            AnnotationquadrupleInwords.append(splitsentence[n:n+4])
        
        #print('AnnotationquadrupleInwords', AnnotationquadrupleInwords)
        
        return AnnoORnot, AnnotationquadrupleInwords
    
    
    #input ['this', 'is', 'a', 'sentence']
    
    def GetTuplesinSentence(self,mainsentence):
        
        tuplesToCheck = []
        tuples = [['ART', 'NE'], ['ART', 'NN'], ['APPR','NN'], ['APPR','ADJD'], ['APPR','NE'], ['ART', 'CARD'], ['APPR', 'CARD'], ['PPOSAT', 'NN'], ['PPOSAT', 'NE'], ['ADV', 'ADJD'],['ADV', 'ADV'], ['ADV', 'PTKVZ'], ['PTKNEG', 'ADV'], ['ADJA','NN'], ['ADJA','NE'], ['ADV','PIS'], ['ADJA','PIS'], ['ADJD','PIS'], ['APPRART', 'NN'], ['APPRART', 'NE'], ['PDAT', 'NE'], ['PDAT', 'NN'], ['PWAT', 'NE'], ['PWAT', 'NN'], ['PIAT', 'NE'], ['PIAT', 'NN'], ['PROAV', 'ADJD'],['PDS', 'NE'], ['PDS', 'NN'], ['NE', 'NE'], ['CARD', 'NE'], ['CARD', 'NN'] ]
        #print('beginning of gettuplesinsentence')
        #print('inkb',resource.getrusage(resource.RUSAGE_SELF).ru_maxrss)
        for tupl in tuples:
            #print('checking another tuple')
            #print('inkb',resource.getrusage(resource.RUSAGE_SELF).ru_maxrss)
            
            checktupleindex, tupleInWords = self.checkForAnnotationTuple(mainsentence, tupl , 'word.tag_', 'None')
            if len(tupleInWords) > 0:
                for tup in tupleInWords:
                    tuplesToCheck.append([tupl, tup])
                    #print('oi a tuple was found')
        #print('after the loop')
        #print('inkb',resource.getrusage(resource.RUSAGE_SELF).ru_maxrss)
        
        #print('no going to the triples')
        triplesToCheck = []
        triples = [['APPR', 'ART', 'NN'],['APPR', 'PDAT', 'NN'], ['APPR', 'PDS', 'NN'], ['ART','ADJA','NN'], ['ART','ADJA','NE'], ['APPR', 'ART', 'NE'], ['KOKOM', 'ART', 'NN'], ['KOKOM', 'ART', 'NE'], ['APPR', 'PIAT', 'NN'], ['APPR', 'ADJA', 'NN'], ['APPR', 'ADJA', 'NE'], ['APPRART', 'NN', 'CARD'], ['APPRART', 'NE', 'CARD'], ['APPRART', 'NN', 'NE'], ['CARD', 'KON', 'CARD'], ['APPR', 'ADV', 'CARD'], ['ADJD', 'KOKOM', 'CARD'], ['APPR', 'NE', 'NE'], ['NN', 'KON', 'NN'], ['NE', 'NN', 'NE'], ['APPR', 'NE', 'NN'], ['APPR', 'CARD', 'NN'], ['APPR', 'CARD', 'NE']]
        for tripl in triples:
            #print('checking next triple')
            checktripleindex, tripleInWords = self.checkForAnnotationTriple(mainsentence, tripl, 'word.tag_', 'None')
            if len(tripleInWords) > 0:
                for trip in tripleInWords:
                    triplesToCheck.append([tripl, trip])   
                    #print('oi a triple was found')
        
        quadruplesToCheck = []
        quadruples = [['KOKOM', 'ADV', 'ADJA', 'NN'], ['KOKOM', 'ADV', 'ADJA', 'NE'], ['APPR', 'ADV', 'ADJA', 'NE'], ['APPR', 'ADV', 'ADJA', 'NN'], ['ART', 'NN', 'APPR', 'NE'], ['APPR', 'NE', 'NN', 'NE'], ['APPR', 'ART', 'ADJA', 'NN'], ['ART', 'ADJD', 'ADJA', 'NN']]
        
        for quadrupl in quadruples:
            #print('checking next triple')
            checkquadrupleindex, quadrupleInWords = self.checkForAnnotationQuadruple(mainsentence, quadrupl, 'word.tag_', 'None')
            if len(quadrupleInWords) > 0:
                for quad in quadrupleInWords:
                    quadruplesToCheck.append([quadrupl, quad])
        
        
        #print('gettuples insentences is done')
        return tuplesToCheck, triplesToCheck, quadruplesToCheck
    
    
    def createTupleofGrammarpieces(self, sentence, tuplesToCheck, triplesToCheck, quadruplesToCheck):
        #print('going in crate tuple of grammar pieces')
        tuplestoremove = []
        for tupl in tuplesToCheck:
            for tripl in triplesToCheck:
                if (tupl[1][0] == tripl[1][0] and tupl[1][1] == tripl[1][1]) or (tupl[1][0] == tripl[1][1] and tupl[1][1] == tripl[1][2]):
                    tuplestoremove.append(tupl)
        for tupletoremove in tuplestoremove:
            tuplesToCheck.remove(tupletoremove)
        
        #print('in between0', sentence, quadruplesToCheck, tuplesToCheck, triplesToCheck)
        
        tuplestoremove = []
        
        for tupl in tuplesToCheck:
            for quad in quadruplesToCheck:
                #print('I got here')
                #print(tupl, quad)
                #print(tupl[1][0], tupl[1][1], quad[1][2], quad[1][3])
                if (tupl[1][0] == quad[1][0] and tupl[1][1] == quad[1][1]) or (tupl[1][0] == quad[1][1] and tupl[1][1] == quad[1][2]) or (tupl[1][0] == quad[1][2] and tupl[1][1] == quad[1][3]):
                    #print('and I got here', tupl)
                    tuplestoremove.append(tupl)
        for tupletoremove in tuplestoremove:
            tuplesToCheck.remove(tupletoremove)
        
        #print('and until here?')
        
        triplestoremove = []
        for tripl in triplesToCheck:
            for quad in quadruplesToCheck:
                if (tripl[1][0] == quad[1][0] and tripl[1][1] == quad[1][1] and tripl[1][2] == quad[1][2]) or (tripl[1][0] == quad[1][1] and tripl[1][1] == quad[1][2] and tripl[1][2] == quad[1][3]):
                    triplestoremove.append(tripl)
        for tripltoremove in triplestoremove:
            triplesToCheck.remove(tripltoremove)
        
        bracketinfo = []
        bracketinfos = []
        bracketindex = 0
        
        #print('in between1', sentence, quadruplesToCheck)
        
        for n in range(len(sentence)):
            
            if sentence[n] != '':
                if sentence[n] == '(' or sentence[n][0] == '(':
                    for m in range(n ,len(sentence)):
                        bracketinfo.append(sentence[m])
                        if sentence[m] == ')' or sentence[m][-1] == ')':
                            
                            wordbeforebracketinfo = None
                            try:
                                wordbeforebracketinfo = sentence[n-1]
                            except:
                                pass
                            bracketinfos.append([bracketinfo, wordbeforebracketinfo])
                            bracketinfo = []
                            break
        
        #print('in between2', sentence, quadruplesToCheck)
        #print('bracketinfo',bracketinfos)
        #print('sentence',sentence)
        for bracketinfo in bracketinfos:
            for word in bracketinfo[0]:
                sentence.remove(word)                   
        
        #print('in between3', sentence, quadruplesToCheck)
        
        if len(quadruplesToCheck) != 0:
            for n in range(len(quadruplesToCheck)):
                for m in range(len(sentence) - 3):
                    if sentence[m] == quadruplesToCheck[n][1][0]:
                        if sentence[m + 1] == quadruplesToCheck[n][1][1]:
                            if sentence[m + 2] == quadruplesToCheck[n][1][2]:
                                if sentence[m + 3] == quadruplesToCheck[n][1][3]:
                                    del sentence[m + 3]
                                    del sentence[m + 2]
                                    del sentence[m + 1]
                                    del sentence[m]
                                    sentence.insert(m,' '.join(quadruplesToCheck[n][1]))
                                    
        
        if len(triplesToCheck) != 0:
            for n in range(len(triplesToCheck)):
                for m in range(len(sentence) - 2):
                    if sentence[m] == triplesToCheck[n][1][0]:
                        if sentence[m + 1] == triplesToCheck[n][1][1]:
                            if sentence[m + 2] == triplesToCheck[n][1][2]:
                                del sentence[m + 2]
                                del sentence[m + 1]
                                del sentence[m]
                                sentence.insert(m,' '.join(triplesToCheck[n][1]))
                            
        if len(tuplesToCheck) != 0:
            for n in range(len(tuplesToCheck)):
                for m in range(len(sentence) - 1):
                    if sentence[m] == tuplesToCheck[n][1][0]:
                        if sentence[m + 1] == tuplesToCheck[n][1][1]:
                            del sentence[m + 1]
                            del sentence[m]
                            sentence.insert(m,' '.join(tuplesToCheck[n][1]))
        
        for bracketinfo in bracketinfos:
            bracketinfowasthere = 0
            for n in range(len(sentence)):
                sentencensplit = sentence[n].split()
                if bracketinfo[1] == sentencensplit[-1]:
                    sentence[n] = sentence[n] + ' '.join(bracketinfo[0])
                    bracketinfowasthere = 1
                    break
            if bracketinfowasthere == 0:
                sentence.append(' '.join(bracketinfo[0]))
        #print('sentence in gs create tuple of grammar pieces', sentence)        
        #print('thesentencein create tuple of grammarpieces ',sentence)
        return sentence
    
    # die folgende Klasse ist zu rechenaufwendig
    def filterpermutationsaccordingtotuples(self, sentences, tuplesToCheck, triplesToCheck):
        
        filteredprobsentences = []
        for sentence in sentences:
            
            
            tuplchecked = 0
            triplchecked = 0
            #print('sentence and tuples to check', sentence, tuplesToCheck)
            for tupl in tuplesToCheck:
                #print(list(sentence))  
                checkedsecondtime, tupleinWords = self.checkForAnnotationTuple(sentence, tupl[0], 'word.tag_', tupl[1])
                
                #print(checkedsecondtime)
                if checkedsecondtime == 1:
                            
                    tuplchecked = 0
                
                if checkedsecondtime == 2:
                    
                    tuplchecked = 1
                
            
            for tripl in triplesToCheck:
                #print(sentence)
                checkedsecondtime, tripleinWords = self.checkForAnnotationTriple(sentence, tripl[0], 'word.tag_', tripl[1])
                if checkedsecondtime == 1 or checkedsecondtime == 2:
                                
                    triplchecked = 0
                
                if checkedsecondtime == 3:
                    
                    triplchecked = 1
            
            
            if tuplchecked == 1 or triplchecked == 1:
                filteredprobsentences.append(sentence)
            
        return filteredprobsentences