import spacy
import nltk
from nltk.stem.snowball import SnowballStemmer

import hickle as hkl
import FASTsearch

stemmer = SnowballStemmer("german")


class Passiv2Aktiv(object):
    
    def __init__(self, hklDatabaseDir_Aktiv, hklDatabaseDir_Vorgangspassiv, hklDatabaseDir_Zustandspassiv):
        
        if hklDatabaseDir_Aktiv is not None:
            self.AktivDB = hkl.load(hklDatabaseDir_Aktiv) 
        
        if hklDatabaseDir_Vorgangspassiv is not None:
            self.VorgangspassivDB = hkl.load(hklDatabaseDir_Vorgangspassiv)
            
        if hklDatabaseDir_Zustandspassiv is not None:
            self.ZustandspassivDB = hkl.load(hklDatabaseDir_Zustandspassiv)
        
        
        #print('loading the german spacy model..')
        self.nlp = spacy.load('de_core_news_sm')
        #print('done')
        
        #print('loading the stemmer..')
        self.stemmer = SnowballStemmer("german")
        #print('done')
        
        return
    
    
    def create_hklDB_from_csv(self, csvDbDir, StemOrNot):
        
        with open(csvDbDir) as lines:
            
            self.DB_All = []
            
            for line in lines:
                
                #print(line)
                
                self.DB_All.append(list(eval(line)))
            
            
            
            
            
            self.hkldb1 = []
            self.hkldb2 = []
            
            
            counter = 0
            for n in range(len(self.DB_All)):
                
                counter += 1
                if counter % 1000 == 0:
                    print(counter)
                
                
                self.hkldb1.append([self.DB_All[n][0][0]] )
                self.hkldb2.append([self.DB_All[n][1][0]] )
                
                
            
            print('creating the hkl dump of DBAll')    
            hkl.dump(self.DB_All, 'hkldb_All' + csvDbDir[:-4] + '.hkl', mode='w', compression='lzf')
            #print('done..')
            
            print('Creating the hkl dump of DB 1')
            hkl.dump(self.hkldb1, 'hkldb1' + csvDbDir[:-4] + '.hkl', mode='w', compression='lzf')
            #print('done..')
            
            print('Creating the hkl dump of DB 2')
            hkl.dump(self.hkldb2, 'hkldb2' + csvDbDir[:-4] + '.hkl', mode='w', compression='lzf')
            #print('done..')
            
        
            
        return 'done'
    
    
    
    def load_DB_into_FASTsearch(self):
        
        #print('loading the hkldb_All databases..')
        self.hkldbAktiv_All = hkl.load('hkldb_AllAktiv.hkl')
        #print('first done')
        self.hkldbVorgangspassiv_All = hkl.load('hkldb_AllVorgangspassiv.hkl')
        #print('second done')
        self.hkldbZustandspassiv_All = hkl.load('hkldb_AllZustandspassiv.hkl')
        #print('third done')
        
        
        #print('loading hkldbIndi_Conju 1..')
        self.fsearchAktiv1 = FASTsearch.FASTsearch('hkldb1Aktiv.hkl')
        #print('done')
        
        #print('loading hkldbIndi_Conju 2..')
        self.fsearchAktiv2 = FASTsearch.FASTsearch('hkldb2Aktiv.hkl')
        #print('done')
        
        
        # generate bow model only necessary the first time
        #print('generating BoW Model 1..')
        #self.fsearchAktiv1.Gen_BoW_Model(20000, "word", punctuation = False)
        #print('done')
        
        #print('generating BoW Model 2..')
        #self.fsearchAktiv2.Gen_BoW_Model(20000, "word", punctuation = False)
        #print('done')
        
        
        #print('loading the bow model 1')
        self.fsearchAktiv1.Load_BoW_Model('bagofwordshkldb1Aktiv.pkl', 'DataBaseOneZeroshkldb1Aktiv.hkl')
        #print('done')
        
        #print('loading the bow model 2')
        self.fsearchAktiv2.Load_BoW_Model('bagofwordshkldb2Aktiv.pkl', 'DataBaseOneZeroshkldb2Aktiv.hkl')
        #print('done')
        
        #print('loading hkldbIndi_Conju 1..')
        self.fsearchVorgangspassiv1 = FASTsearch.FASTsearch('hkldb1Vorgangspassiv.hkl')
        #print('done')
        
        #print('loading hkldbIndi_Conju 2..')
        self.fsearchVorgangspassiv2 = FASTsearch.FASTsearch('hkldb2Vorgangspassiv.hkl')
        #print('done')
        
        # uncomment if models are not there
        #print('generating BoW Model 1..')
        #self.fsearchVorgangspassiv1.Gen_BoW_Model(20000, "word", punctuation = False)
        #print('done')
        
        #print('generating BoW Model 2..')
        #self.fsearchVorgangspassiv2.Gen_BoW_Model(20000, "word", punctuation = False)
        #print('done')
        
        
        #print('loading the bow model 1')
        self.fsearchVorgangspassiv1.Load_BoW_Model('bagofwordshkldb1Vorgangspassiv.pkl', 'DataBaseOneZeroshkldb1Vorgangspassiv.hkl')
        #print('done')
        
        #print('loading the bow model 2')
        self.fsearchVorgangspassiv2.Load_BoW_Model('bagofwordshkldb2Vorgangspassiv.pkl', 'DataBaseOneZeroshkldb2Vorgangspassiv.hkl')
        #print('done')
        
        #print('loading hkldbIndi_Conju 1..')
        self.fsearchZustandspassiv1 = FASTsearch.FASTsearch('hkldb1Zustandspassiv.hkl')
        #print('done')
        
        #print('loading hkldbIndi_Conju 2..')
        self.fsearchZustandspassiv2 = FASTsearch.FASTsearch('hkldb2Zustandspassiv.hkl')
        #print('done')
        
        #print('generating BoW Model 1..')
        #self.fsearchZustandspassiv1.Gen_BoW_Model(20000, "word", punctuation = False)
        #print('done')
        
        #print('generating BoW Model 2..')
        #self.fsearchZustandspassiv2.Gen_BoW_Model(20000, "word", punctuation = False)
        #print('done')
        
        
        #print('loading the bow model 1')
        self.fsearchZustandspassiv1.Load_BoW_Model('bagofwordshkldb1Zustandspassiv.pkl', 'DataBaseOneZeroshkldb1Zustandspassiv.hkl')
        #print('done')
        
        #print('loading the bow model 2')
        self.fsearchZustandspassiv2.Load_BoW_Model('bagofwordshkldb2Zustandspassiv.pkl', 'DataBaseOneZeroshkldb2Zustandspassiv.hkl')
        #print('done')
        
        import GS_Utils
        #print('initializing the gs utils..')
        self.gs = GS_Utils.GS_Utils('de_core_news_sm')
        #print('done')
        

        from SentGlue import SentGlueMach
        #print('loading the Stochastic Gradient models..')
        self.sgm = SentGlueMach('trainedSGD.pkl', 'bagofwords.pkl')
        #print('done')
        #print('initializing the SGM..')
        self.sgm.initialize()
        #print('done')
        
        #print('oi thats the get_feature_names', self.fsearch1.vectorizer.get_feature_names())
        #print('oi thats the get_feature_names', self.fsearch2.vectorizer.get_feature_names())
        
        
        
    def replacePassivForms(self,sentences):
        
        endsentences = []
        sentencecount = 0
        for sentence in sentences:
            
            sentencecount += 1
            #print('processing sentence', sentencecount)
            
            doc = self.nlp(' '.join(sentence))
            
            verbs_of_sentence = []
            wordindex_to_replace = []
            count = 0
            subjectofsentence = []
            subjectindex = []
            erindex = []
            Erindex = []
            undindex = []
            
            for word in doc:
                
                count += 1
                
                #print(word.text)
                #print(word.dep_)
                
                if word.dep_ == 'sb':
                    
                    #print('oi')
                    subjectofsentence.append(word.text)
                    subjectindex.append(count)
                
                if word.text == 'er':
                    erindex.append(count)
                if word.text == 'Er':
                    Erindex.append(count)
                if word.text == 'und':
                    undindex.append(count)
                    
                if word.tag_[0] == 'V':
                    verbs_of_sentence.append(word.text)
                    wordindex_to_replace.append(count)
            
            
            if len(verbs_of_sentence) == 1 and verbs_of_sentence[0] == ('wurde' or 'wird' or 'werden' or 'wirst' or 'werde' or 'war'):
                verbs_of_sentence[0] = 'bliblablubdudidu'
            
            verbs_of_sentence_string = ' '.join(verbs_of_sentence)
            
            
            
            length_verbs_of_sentence_string = len(verbs_of_sentence_string)
            
            verbs_of_sentence_string += ' ' + str(length_verbs_of_sentence_string)
            #print(verbs_of_sentence_string)
            bestmatchesZustandspassiv1, matchindexZustandspassiv1 = self.fsearchZustandspassiv1.search_with_highest_multiplikation_Output(verbs_of_sentence_string, 1)    
            
            bestmatchesVorgangspassiv1, matchindexVorgangspassiv1 = self.fsearchVorgangspassiv1.search_with_highest_multiplikation_Output(verbs_of_sentence_string, 1)    
            
            
            #print('verbs of sentence string', verbs_of_sentence_string)
            #print(len(verbs_of_sentence))
            #print(matchindexVorgangspassiv1)
            #print(matchindexZustandspassiv1)
            
            
            vorgangORnot = 0
            zustandORnot = 0
            if (len(verbs_of_sentence) + 1) == matchindexVorgangspassiv1[1]:
                workindex = matchindexVorgangspassiv1[0]
                vorgangORnot = 1
                
            if (len(verbs_of_sentence) + 1) == matchindexZustandspassiv1[1]:
                workindex = matchindexZustandspassiv1[0]
                zustandORnot = 1
           
           
            #print(workindex)
           
            #print(self.hkldbAktiv_All[matchindexVorgangspassiv1[0]])
            #print(self.hkldbVorgangspassiv_All[matchindexVorgangspassiv1[0]])
            #print(self.hkldbZustandspassiv_All[matchindexZustandspassiv1[0]])
            
            formToReplace = []
            
            if vorgangORnot == 1:
                completeform = self.hkldbVorgangspassiv_All[workindex]
                if len(verbs_of_sentence_string.split()) != len(completeform[0][0].split()):
                    vorgangORnot = 0
            
            if vorgangORnot == 1:
                completeform = self.hkldbVorgangspassiv_All[workindex]
                formToReplace = self.hkldbVorgangspassiv_All[workindex][1][0].split()[-2:]
                
                
                #print('formtoreplace vorgang',formToReplace)
                #print('complete form', completeform)
                
                formToReplace = '3. Person Singular ' + ' '.join(formToReplace)
                
                #print(formToReplace)
            
                thrdPersonAktivindex = self.fsearchAktiv2.search_with_highest_multiplikation_Output(formToReplace, 1)[0]
                
                thrdPersonAktiv = self.hkldbAktiv_All[thrdPersonAktivindex[0]][0][0].split()[:-1]
                
                #print(thrdPersonAktiv)
                
                thrdPersonAktiv = ' '.join(thrdPersonAktiv)
                
                dalist = verbs_of_sentence_string.split()[:-1]
                
                for verb in dalist:
                    #print(sentence)
                    #print(index)
                    
                    sentence.remove(verb)
                
                thereisasubjectEr = 0
                
                for index in subjectindex:
                    for ind in undindex:
                        if index - 1 == ind:
                            if index - 2 == ('er' or 'Er'):
                                thereisasubjectEr = 1
                        if index + 1 == ind:
                            if index + 2 == 'er' or index + 2 == 'Er':
                                thereisasubjectEr = 1
                #print('subjectofsentence', subjectofsentence)
                thereisasubjectich = 0
                thereisasubjectdu = 0
                thereisasubjectihr = 0
                thereisasubjectwir = 0
                for word in subjectofsentence:
                    if word == 'er' or word == 'Er':
                        thereisasubjectEr = 1
                    if word == 'ich':
                        thereisasubjectich = 1
                    if word == 'du':
                        thereisasubjectdu = 1
                    if word == 'ihr':
                        thereisasubjectihr = 1
                    if word == 'wir':
                        thereisasubjectwir = 1
                #print('there is a subjecter', thereisasubjectEr)
                if thereisasubjectEr == 1:
                    try:
                        sentence.remove('Er')
                    except:
                        sentence.remove('er')
                    sentence.append('ihn')
                if thereisasubjectich == 1:
                    sentence.remove('ich')
                    sentence.append('mich')
                if thereisasubjectdu == 1:
                    sentence.remove('du')
                    sentence.append('dich')
                if thereisasubjectihr == 1:
                    sentence.remove('ihr')
                    sentence.append('euch')
                if thereisasubjectwir == 1:
                    sentence.remove('wir')
                    sentence.append('uns')
                
                sentence.append(thrdPersonAktiv)
                #print('sentence in the vorgangornot', sentence)
                jemandornot = 1
                wordstodelete = []
                for n in range(len(sentence) - 1):
                    if sentence[n] == 'von':
                        if sentence[n + 1] == 'ihr':
                            sentence[n + 1] = 'sie'
                            wordstodelete.append(n)
                            jemandornot = 0
                        if sentence[n + 1] == 'ihm':
                            sentence[n + 1] = 'er'
                            wordstodelete.append(n)
                            jemandornot = 0
                        import spacy
                        nlp = spacy.load('de_core_news_sm')
                        token1 = nlp(sentence[n - 1])
                        token2 = nlp(sentence[n + 1])
                        for word in token1:
                            if word.tag_ != 'NN' and word.tag_ != 'NE':
                                for word in token2:
                                    if word.tag_ == 'NN' or word.tag_ == 'NE':
                                        wordstodelete.append(n)
                                        
                                        jemandornot = 0
                        if sentence[n + 1] == 'dem' or sentence[n + 1] == 'einem':
                            
                            token3 = nlp(sentence[n-1])
                            for word in token3:
                                if word.tag_ != 'NN' and word.tag_ != 'NE':
                                    sentence[n + 1] = 'ein'
                                    wordstodelete.append(n)
                                    jemandornot = 0
                        if sentence[n + 1] == 'der' or sentence[n + 1] == 'einer':
                            token4 = nlp(sentence[n-1])
                            for word in token4:
                                if word.tag_ != 'NN' and word.tag_ != 'NE':
                                    sentence[n + 1] = 'eine'
                                    wordstodelete.append(n)
                                    jemandornot = 0
                                    
                    if sentence[n] == 'vom':
                        
                        sentence[n] = 'ein'
                        jemandornot = 0
                for index in wordstodelete[::-1]:
                    del sentence[index]
                if jemandornot == 1:
                    sentence.append('jemand')
            
                
                #print('sentence checkpoint 2', sentence)
            
                #print('get the tuples and triples to check..')
                tuplesTocheck, triplesTocheck, quadruplesToCheck = self.gs.GetTuplesinSentence(sentence)
                #print('done')
                #print(tuplesTocheck, triplesTocheck)
                
                grammpiecessentence = self.gs.createTupleofGrammarpieces( sentence, tuplesTocheck, triplesTocheck, quadruplesToCheck)
                
                if len(grammpiecessentence) > 7:
                    print('A sentence is too long, too many permutations. \n piping wrong grammar..')
                    endsentences.append(' '.join(grammpiecessentence).split())

                else:
                    
                    #print('the grammpiecessentence', grammpiecessentence)
                    #print('genrating the permutations')
                    permutations = self.sgm.GeneratePermutationsOfSentence(grammpiecessentence)
                    #print('done')
                    #print(permutations)
                    #if (len(tuplesTocheck) != 0) or (len(triplesTocheck) != 0):
                    #    print('filtering the permutations based on the tuples and triples..')
                    #    filteredpermutations = self.gs.filterpermutationsaccordingtotuples(permutations, tuplesTocheck, triplesTocheck)
                    #    print('done')
                    #else:
                    #    print('there are no triples or tuples to check..')
                    #    filteredpermutations = permutations

                    sentencesToCheck = []
                    for sentence in permutations:
                        sentencesToCheck.append(' '.join(sentence))

                    #print('sentencesToCheck', sentencesToCheck)
                    #print('classifying the probability for right grammar in the filtered permutations..')
                    #print(' '.join(sentence))
                    endsentence = self.sgm.GetBestSentenceFromSentencesAccordingToGrammar(sentencesToCheck, ' '.join(sentence))
                    #print('done')

                    #print('the endsentence', endsentence)
                    endsentences.append(endsentence.split())
            
            #count1 = 0
            
            #print(subjectindex)
            #subjectindex = subjectindex[0]
            #if subjectindex != 0:
                #for word in sentence[subjectindex - 1:subjectindex + 1]:
                    #count1 += 1
                    #if word == 'und':
                        #thereIsanUnd = count1
            #if subjectindex == 0:
                #for word in sentence[subjectindex:subjectindex + 1]:
                    #count1 += 1
                    #if word == 'und':
                        #thereIsanUnd = count1
            #thereisanEr = 0
            #if sentence[subjectindex - 1 + thereIsanUnd] == 'er' or sentence[subjectindex - 1 + thereIsanUnd] == 'Er':
                
                #thereisanEr = 1
            
             
            #if thereisanEr == 1:
             
                #sentence.remove('Er')
                #sentence.remove('er')
                #sentence.append('ihn')
            
            
            #print('zustandornot',zustandORnot)
            #print('vorgang', vorgangORnot)
            
            if zustandORnot == 1:
                completeform = self.hkldbZustandspassiv_All[workindex]
                if len(verbs_of_sentence_string.split()) != len(completeform[0][0].split()):
                    zustandORnot = 0
            
            
            if zustandORnot == 1:
                #completeform = self.hkldbZustandspassiv_All[workindex]
                formToReplace = self.hkldbZustandspassiv_All[workindex][1][0].split()[-2:]
                formToReplace = '3. Person Singular ' + ' '.join(formToReplace)
                #print('formtoreplace zustand',formToReplace)
                #print('complete form', completeform)
            
                thrdPersonAktivindex = self.fsearchAktiv2.search_with_highest_multiplikation_Output(formToReplace, 1)[0]
                
                thrdPersonAktiv = self.hkldbAktiv_All[thrdPersonAktivindex[0]][0][0].split()[:-1]
                
                thrdPersonAktiv = ' '.join(thrdPersonAktiv)
                
                for verb in verbs_of_sentence_string.split()[:-1]:
                    #print(sentence)
                    #print(index)
                    
                    sentence.remove(verb)
                
                thereisasubjectEr = 0
                
                for index in subjectindex:
                    for ind in undindex:
                        if index - 1 == ind:
                            if index - 2 == ('er' or 'Er'):
                                thereisasubjectEr = 1
                        if index + 1 == ind:
                            if index + 2 == 'er' or index + 2 == 'Er':
                                thereisasubjectEr = 1
                #print('subjectofsentence', subjectofsentence)
                
                thereisasubjectich = 0
                thereisasubjectdu = 0
                thereisasubjectihr = 0
                thereisasubjectwir = 0
                for word in subjectofsentence:
                    if word == 'er' or word == 'Er':
                        thereisasubjectEr = 1
                    if word == 'ich':
                        thereisasubjectich = 1
                    if word == 'du':
                        thereisasubjectdu = 1
                    if word == 'ihr':
                        thereisasubjectihr = 1
                    if word == 'wir':
                        thereisasubjectwir = 1
                if thereisasubjectEr == 1:
                    try:
                        sentence.remove('Er')
                    except:
                        sentence.remove('er')
                    sentence.append('ihn')
                
                if thereisasubjectich == 1:
                    sentence.remove('ich')
                    sentence.append('mich')
                if thereisasubjectdu == 1:
                    sentence.remove('du')
                    sentence.append('dich')
                if thereisasubjectihr == 1:
                    sentence.remove('ihr')
                    sentence.append('euch')
                if thereisasubjectwir == 1:
                    sentence.remove('wir')
                    sentence.append('uns')
                
                sentence.append(thrdPersonAktiv)
                
                jemandornot = 1
                wordstodelete = []
                for n in range(len(sentence) - 1):
                    if sentence[n] == 'von':
                        if sentence[n + 1] == 'ihr':
                            sentence[n + 1] = 'sie'
                            wordstodelete.append(n)
                            jemandornot = 0
                        if sentence[n + 1] == 'ihm':
                            sentence[n + 1] = 'er'
                            wordstodelete.append(n)
                            jemandornot = 0
                        
                        import spacy
                        nlp = spacy.load('de_core_news_sm')
                        token1 = nlp(sentence[n - 1])
                        token2 = nlp(sentence[n + 1])
                        for word in token1:
                            if word.tag_ != 'NN' and word.tag_ != 'NE':
                                for word in token2:
                                    if word.tag_ == 'NN' or word.tag_ == 'NE':
                                        wordstodelete.append(n)
                                        
                                        jemandornot = 0
                        if sentence[n + 1] == 'dem' or sentence[n + 1] == 'einem':
                            
                            token3 = nlp(sentence[n-1])
                            for word in token3:
                                if word.tag_ != 'NN' and word.tag_ != 'NE':
                                    sentence[n + 1] = 'ein'
                                    wordstodelete.append(n)
                                    jemandornot = 0
                        if sentence[n + 1] == 'der' or sentence[n + 1] == 'einer':
                            token4 = nlp(sentence[n-1])
                            for word in token4:
                                if word.tag_ != 'NN' and word.tag_ != 'NE':
                                    sentence[n + 1] = 'eine'
                                    wordstodelete.append(n)
                                    jemandornot = 0
                                    
                    if sentence[n] == 'vom':
                        
                        sentence[n] = 'ein'
                        jemandornot = 0
                
                for index in wordstodelete[::-1]:
                    del sentence[index]
                    
                if jemandornot == 1:
                    sentence.append('jemand')
            
                
                #print(sentence)
            
                #print('get the tuples and triples to check..')
                tuplesTocheck, triplesTocheck, quadruplesTocheck = self.gs.GetTuplesinSentence(sentence)
                #print('done')
                #print(tuplesTocheck, triplesTocheck)
                
                grammpiecessentence = self.gs.createTupleofGrammarpieces( sentence, tuplesTocheck, triplesTocheck, quadruplesTocheck)
                
                if len(grammpiecessentence) > 7:
                    print('A sentence is too long, too many permutations. \n piping wrong grammar..')
                    endsentences.append(' '.join(grammpiecessentence).split())

                else:
                    
                    #print('the grammpiecessentence', grammpiecessentence)
                    #print('genrating the permutations')
                    permutations = self.sgm.GeneratePermutationsOfSentence(grammpiecessentence)
                    #print('done')
                    #print(permutations)
                    #if (len(tuplesTocheck) != 0) or (len(triplesTocheck) != 0):
                    #    print('filtering the permutations based on the tuples and triples..')
                    #    filteredpermutations = self.gs.filterpermutationsaccordingtotuples(permutations, tuplesTocheck, triplesTocheck)
                    #    print('done')
                    #else:
                    #    print('there are no triples or tuples to check..')
                    #    filteredpermutations = permutations

                    sentencesToCheck = []
                    for sentence in permutations:
                        sentencesToCheck.append(' '.join(sentence))

                    #print('sentencesToCheck', sentencesToCheck)
                    #print('classifying the probability for right grammar in the filtered permutations..')
                    #print(' '.join(sentence))
                    endsentence = self.sgm.GetBestSentenceFromSentencesAccordingToGrammar(sentencesToCheck, ' '.join(sentence))
                    #print('done')

                    #print('the endsentence', endsentence)
                    endsentences.append(endsentence.split())
            
            
            
            if zustandORnot == 0 and vorgangORnot == 0:
                #print('it is coming to the else')
                endsentences.append(sentence)
            
            
            
            
            
            
            
            
            
        
        return endsentences
            
            
            
            # Vorgangspassiv wird auf selbe Zeit gemappt, 3. Person Singular.
            # Zustandspassiv: Immer eine Zeit dahinter. D.h.
            # Präsens => Präteritum, Präteritum => Perfekt