import spacy
import nltk
from nltk.stem.snowball import SnowballStemmer

import hickle as hkl
import FASTsearch

stemmer = SnowballStemmer("german")


class Passiv2Aktiv(object):
    
    def __init__(self, hklDatabaseDir_Aktiv, hklDatabaseDir_Vorgangspassiv, hklDatabaseDir_Zustandspassiv):
        
        if hklDatabaseDir_Aktiv is not None:
            self.AktivDB = hkl.load(hklDatabaseDir_Aktiv) 
        
        if hklDatabaseDir_Vorgangspassiv is not None:
            self.VorgangspassivDB = hkl.load(hklDatabaseDir_Vorgangspassiv)
            
        if hklDatabaseDir_Zustandspassiv is not None:
            self.ZustandspassivDB = hkl.load(hklDatabaseDir_Zustandspassiv)
        
        
        #print('loading the german spacy model..')
        self.nlp = spacy.load('de_core_news_sm')
        #print('done')
        
        #print('loading the stemmer..')
        self.stemmer = SnowballStemmer("german")
        #print('done')
        
        return
    
    
    def create_hklDB_from_csv(self, csvDbDir, StemOrNot):
        
        with open(csvDbDir) as lines:
            
            self.DB_All = []
            
            for line in lines:
                
                #print(line)
                
                self.DB_All.append(list(eval(line)))
            
            
            self.hkldb1 = []
            self.hkldb2 = []
            
            
            counter = 0
            for n in range(len(self.DB_All)):
                
                counter += 1
                if counter % 1000 == 0:
                    print(counter)
                
                
                self.hkldb1.append([self.DB_All[n][0][0]] )
                self.hkldb2.append([self.DB_All[n][1][0]] )
                
                
            print('creating the hkl dump of DBAll')    
            hkl.dump(self.DB_All, 'hkldb_All' + csvDbDir[:-4] + '.hkl', mode='w', compression='lzf')
            #print('done..')
            
            print('Creating the hkl dump of DB 1')
            hkl.dump(self.hkldb1, 'hkldb1' + csvDbDir[:-4] + '.hkl', mode='w', compression='lzf')
            #print('done..')
            
            print('Creating the hkl dump of DB 2')
            hkl.dump(self.hkldb2, 'hkldb2' + csvDbDir[:-4] + '.hkl', mode='w', compression='lzf')
            #print('done..')
            
        
        return 'done'
    
    
    def load_DB_into_FASTsearch(self):
        
        #print('loading the hkldb_All databases..')
        self.hkldbAktiv_All = hkl.load('hkldb_AllAktiv.hkl')
        #print('first done')
        self.hkldbVorgangspassiv_All = hkl.load('hkldb_AllVorgangspassiv.hkl')
        #print('second done')
        self.hkldbZustandspassiv_All = hkl.load('hkldb_AllZustandspassiv.hkl')
        #print('third done')
        
        
        #print('loading hkldbIndi_Conju 1..')
        self.fsearchAktiv1 = FASTsearch.FASTsearch('hkldb1Aktiv.hkl')
        #print('done')
        
        #print('loading hkldbIndi_Conju 2..')
        self.fsearchAktiv2 = FASTsearch.FASTsearch('hkldb2Aktiv.hkl')
        #print('done')
        
        
        # generate bow model only necessary the first time
        #print('generating BoW Model 1..')
        #self.fsearchAktiv1.Gen_BoW_Model(20000, "word", punctuation = False)
        #print('done')
        
        #print('generating BoW Model 2..')
        #self.fsearchAktiv2.Gen_BoW_Model(20000, "word", punctuation = False)
        #print('done')
        
        
        #print('loading the bow model 1')
        self.fsearchAktiv1.Load_BoW_Model('bagofwordshkldb1Aktiv.pkl', 'DataBaseOneZeroshkldb1Aktiv.hkl')
        #print('done')
        
        #print('loading the bow model 2')
        self.fsearchAktiv2.Load_BoW_Model('bagofwordshkldb2Aktiv.pkl', 'DataBaseOneZeroshkldb2Aktiv.hkl')
        #print('done')
        
        #print('loading hkldbIndi_Conju 1..')
        self.fsearchVorgangspassiv1 = FASTsearch.FASTsearch('hkldb1Vorgangspassiv.hkl')
        #print('done')
        
        #print('loading hkldbIndi_Conju 2..')
        self.fsearchVorgangspassiv2 = FASTsearch.FASTsearch('hkldb2Vorgangspassiv.hkl')
        #print('done')
        
        # uncomment if models are not there
        #print('generating BoW Model 1..')
        #self.fsearchVorgangspassiv1.Gen_BoW_Model(20000, "word", punctuation = False)
        #print('done')
        
        #print('generating BoW Model 2..')
        #self.fsearchVorgangspassiv2.Gen_BoW_Model(20000, "word", punctuation = False)
        #print('done')
        
        
        #print('loading the bow model 1')
        self.fsearchVorgangspassiv1.Load_BoW_Model('bagofwordshkldb1Vorgangspassiv.pkl', 'DataBaseOneZeroshkldb1Vorgangspassiv.hkl')
        #print('done')
        
        #print('loading the bow model 2')
        self.fsearchVorgangspassiv2.Load_BoW_Model('bagofwordshkldb2Vorgangspassiv.pkl', 'DataBaseOneZeroshkldb2Vorgangspassiv.hkl')
        #print('done')
        
        #print('loading hkldbIndi_Conju 1..')
        self.fsearchZustandspassiv1 = FASTsearch.FASTsearch('hkldb1Zustandspassiv.hkl')
        #print('done')
        
        #print('loading hkldbIndi_Conju 2..')
        self.fsearchZustandspassiv2 = FASTsearch.FASTsearch('hkldb2Zustandspassiv.hkl')
        #print('done')
        
        #print('generating BoW Model 1..')
        #self.fsearchZustandspassiv1.Gen_BoW_Model(20000, "word", punctuation = False)
        #print('done')
        
        #print('generating BoW Model 2..')
        #self.fsearchZustandspassiv2.Gen_BoW_Model(20000, "word", punctuation = False)
        #print('done')
        
        
        #print('loading the bow model 1')
        self.fsearchZustandspassiv1.Load_BoW_Model('bagofwordshkldb1Zustandspassiv.pkl', 'DataBaseOneZeroshkldb1Zustandspassiv.hkl')
        #print('done')
        
        #print('loading the bow model 2')
        self.fsearchZustandspassiv2.Load_BoW_Model('bagofwordshkldb2Zustandspassiv.pkl', 'DataBaseOneZeroshkldb2Zustandspassiv.hkl')
        #print('done')
        
        import GS_Utils
        #print('initializing the gs utils..')
        self.gs = GS_Utils.GS_Utils('de_core_news_sm')
        #print('done')
        

        from SentGlue import SentGlueMach
        #print('loading the Stochastic Gradient models..')
        self.sgm = SentGlueMach('trainedSGD.pkl', 'bagofwords.pkl')
        #print('done')
        #print('initializing the SGM..')
        self.sgm.initialize()
        #print('done')
        
        #print('oi thats the get_feature_names', self.fsearch1.vectorizer.get_feature_names())
        #print('oi thats the get_feature_names', self.fsearch2.vectorizer.get_feature_names())
        
        
    def replacePassivForms(self,sentences):
        
        endsentences = []
        sentencecount = 0
        for sentence in sentences:
            try:
                sentencecount += 1
                #print('processing sentence', sentencecount)

                doc = self.nlp(' '.join(sentence))

                verbs_of_sentence = []
                wordindex_to_replace = []
                count = 0
                subjectofsentence = []
                subjectindex = []
                erindex = []
                Erindex = []
                undindex = []

                for word in doc:

                    count += 1

                    #print(word.text)
                    #print(word.dep_)

                    if word.dep_ == 'sb':

                        #print('oi')
                        subjectofsentence.append(word.text)
                        subjectindex.append(count)

                    if word.text == 'er':
                        erindex.append(count)
                    if word.text == 'Er':
                        Erindex.append(count)
                    if word.text == 'und':
                        undindex.append(count)

                    if word.tag_[0] == 'V':
                        verbs_of_sentence.append(word.text)
                        wordindex_to_replace.append(count)


                if len(verbs_of_sentence) == 1 and verbs_of_sentence[0] == ('wurde' or 'wird' or 'werden' or 'wirst' or 'werde' or 'war'):
                    verbs_of_sentence[0] = 'bliblablubdudidu'

                verbs_of_sentence_string = ' '.join(verbs_of_sentence)


                length_verbs_of_sentence_string = len(verbs_of_sentence_string)

                verbs_of_sentence_string += ' ' + str(length_verbs_of_sentence_string)
                #print(verbs_of_sentence_string)
                bestmatchesZustandspassiv1, matchindexZustandspassiv1 = self.fsearchZustandspassiv1.search_with_highest_multiplikation_Output(verbs_of_sentence_string, 1)    

                bestmatchesVorgangspassiv1, matchindexVorgangspassiv1 = self.fsearchVorgangspassiv1.search_with_highest_multiplikation_Output(verbs_of_sentence_string, 1)    


                #print('verbs of sentence string', verbs_of_sentence_string)
                #print(len(verbs_of_sentence))
                #print(matchindexVorgangspassiv1)
                #print(matchindexZustandspassiv1)


                vorgangORnot = 0
                zustandORnot = 0
                if (len(verbs_of_sentence) + 1) == matchindexVorgangspassiv1[1]:
                    workindex = matchindexVorgangspassiv1[0]
                    vorgangORnot = 1

                if (len(verbs_of_sentence) + 1) == matchindexZustandspassiv1[1]:
                    workindex = matchindexZustandspassiv1[0]
                    zustandORnot = 1


                #print(workindex)

                #print(self.hkldbAktiv_All[matchindexVorgangspassiv1[0]])
                #print(self.hkldbVorgangspassiv_All[matchindexVorgangspassiv1[0]])
                #print(self.hkldbZustandspassiv_All[matchindexZustandspassiv1[0]])

                formToReplace = []

                if vorgangORnot == 1:
                    completeform = self.hkldbVorgangspassiv_All[workindex]
                    if len(verbs_of_sentence_string.split()) != len(completeform[0][0].split()):
                        vorgangORnot = 0

                if vorgangORnot == 1:
                    completeform = self.hkldbVorgangspassiv_All[workindex]
                    formToReplace = self.hkldbVorgangspassiv_All[workindex][1][0].split()[-2:]


                    #print('formtoreplace vorgang',formToReplace)
                    #print('complete form', completeform)

                    formToReplace = '3. Person Singular ' + ' '.join(formToReplace)

                    #print(formToReplace)

                    thrdPersonAktivindex = self.fsearchAktiv2.search_with_highest_multiplikation_Output(formToReplace, 1)[0]

                    thrdPersonAktiv = self.hkldbAktiv_All[thrdPersonAktivindex[0]][0][0].split()[:-1]

                    #print(thrdPersonAktiv)

                    thrdPersonAktiv = ' '.join(thrdPersonAktiv)

                    dalist = verbs_of_sentence_string.split()[:-1]

                    for verb in dalist:
                        #print(sentence)
                        #print(index)

                        sentence.remove(verb)

                    thereisasubjectEr = 0

                    for index in subjectindex:
                        for ind in undindex:
                            if index - 1 == ind:
                                if index - 2 == ('er' or 'Er'):
                                    thereisasubjectEr = 1
                            if index + 1 == ind:
                                if index + 2 == 'er' or index + 2 == 'Er':
                                    thereisasubjectEr = 1
                    #print('subjectofsentence', subjectofsentence)
                    thereisasubjectich = 0
                    thereisasubjectdu = 0
                    thereisasubjectihr = 0
                    thereisasubjectwir = 0
                    for word in subjectofsentence:
                        if word == 'er' or word == 'Er':
                            thereisasubjectEr = 1
                        if word == 'ich':
                            thereisasubjectich = 1
                        if word == 'du':
                            thereisasubjectdu = 1
                        if word == 'ihr':
                            thereisasubjectihr = 1
                        if word == 'wir':
                            thereisasubjectwir = 1
                    #print('there is a subjecter', thereisasubjectEr)
                    if thereisasubjectEr == 1:
                        try:
                            sentence.remove('Er')
                        except:
                            sentence.remove('er')
                        sentence.append('ihn')
                    if thereisasubjectich == 1:
                        sentence.remove('ich')
                        sentence.append('mich')
                    if thereisasubjectdu == 1:
                        sentence.remove('du')
                        sentence.append('dich')
                    if thereisasubjectihr == 1:
                        sentence.remove('ihr')
                        sentence.append('euch')
                    if thereisasubjectwir == 1:
                        sentence.remove('wir')
                        sentence.append('uns')

                    sentence.append(thrdPersonAktiv)
                    #print('sentence in the vorgangornot', sentence)
                    jemandornot = 1
                    wordstodelete = []
                    for n in range(len(sentence) - 1):
                        if sentence[n] == 'von':
                            if sentence[n + 1] == 'ihr':
                                sentence[n + 1] = 'sie'
                                wordstodelete.append(n)
                                jemandornot = 0
                            if sentence[n + 1] == 'ihm':
                                sentence[n + 1] = 'er'
                                wordstodelete.append(n)
                                jemandornot = 0
                            import spacy
                            nlp = spacy.load('de_core_news_sm')
                            token1 = nlp(sentence[n - 1])
                            token2 = nlp(sentence[n + 1])
                            for word in token1:
                                if word.tag_ != 'NN' and word.tag_ != 'NE':
                                    for word in token2:
                                        if word.tag_ == 'NN' or word.tag_ == 'NE':
                                            wordstodelete.append(n)

                                            jemandornot = 0
                            if sentence[n + 1] == 'dem' or sentence[n + 1] == 'einem':

                                token3 = nlp(sentence[n-1])
                                for word in token3:
                                    if word.tag_ != 'NN' and word.tag_ != 'NE':
                                        sentence[n + 1] = 'ein'
                                        wordstodelete.append(n)
                                        jemandornot = 0
                            if sentence[n + 1] == 'der' or sentence[n + 1] == 'einer':
                                token4 = nlp(sentence[n-1])
                                for word in token4:
                                    if word.tag_ != 'NN' and word.tag_ != 'NE':
                                        sentence[n + 1] = 'eine'
                                        wordstodelete.append(n)
                                        jemandornot = 0

                        if sentence[n] == 'vom':

                            sentence[n] = 'ein'
                            jemandornot = 0
                    for index in wordstodelete[::-1]:
                        del sentence[index]
                    if jemandornot == 1:
                        sentence.append('jemand')


                    #print('sentence checkpoint 2', sentence)

                    #print('get the tuples and triples to check..')
                    tuplesTocheck, triplesTocheck, quadruplesToCheck = self.gs.GetTuplesinSentence(sentence)
                    #print('done')
                    #print(tuplesTocheck, triplesTocheck)

                    grammpiecessentence = self.gs.createTupleofGrammarpieces( sentence, tuplesTocheck, triplesTocheck, quadruplesToCheck)

                    if len(grammpiecessentence) > 7:
                        print('A sentence is too long, too many permutations. \n piping wrong grammar..')
                        endsentences.append(' '.join(grammpiecessentence).split())

                    else:

                        #print('the grammpiecessentence', grammpiecessentence)
                        #print('genrating the permutations')
                        permutations = self.sgm.GeneratePermutationsOfSentence(grammpiecessentence)
                        #print('done')
                        #print(permutations)
                        #if (len(tuplesTocheck) != 0) or (len(triplesTocheck) != 0):
                        #    print('filtering the permutations based on the tuples and triples..')
                        #    filteredpermutations = self.gs.filterpermutationsaccordingtotuples(permutations, tuplesTocheck, triplesTocheck)
                        #    print('done')
                        #else:
                        #    print('there are no triples or tuples to check..')
                        #    filteredpermutations = permutations

                        sentencesToCheck = []
                        for sentence in permutations:
                            sentencesToCheck.append(' '.join(sentence))

                        #print('sentencesToCheck', sentencesToCheck)
                        #print('classifying the probability for right grammar in the filtered permutations..')
                        #print(' '.join(sentence))
                        endsentence = self.sgm.GetBestSentenceFromSentencesAccordingToGrammar(sentencesToCheck, ' '.join(sentence))
                        #print('done')

                        #print('the endsentence', endsentence)
                        endsentences.append(endsentence.split())

                #count1 = 0

                #print(subjectindex)
                #subjectindex = subjectindex[0]
                #if subjectindex != 0:
                    #for word in sentence[subjectindex - 1:subjectindex + 1]:
                        #count1 += 1
                        #if word == 'und':
                            #thereIsanUnd = count1
                #if subjectindex == 0:
                    #for word in sentence[subjectindex:subjectindex + 1]:
                        #count1 += 1
                        #if word == 'und':
                            #thereIsanUnd = count1
                #thereisanEr = 0
                #if sentence[subjectindex - 1 + thereIsanUnd] == 'er' or sentence[subjectindex - 1 + thereIsanUnd] == 'Er':

                    #thereisanEr = 1


                #if thereisanEr == 1:

                    #sentence.remove('Er')
                    #sentence.remove('er')
                    #sentence.append('ihn')


                #print('zustandornot',zustandORnot)
                #print('vorgang', vorgangORnot)

                if zustandORnot == 1:
                    completeform = self.hkldbZustandspassiv_All[workindex]
                    if len(verbs_of_sentence_string.split()) != len(completeform[0][0].split()):
                        zustandORnot = 0


                if zustandORnot == 1:
                    #completeform = self.hkldbZustandspassiv_All[workindex]
                    formToReplace = self.hkldbZustandspassiv_All[workindex][1][0].split()[-2:]
                    formToReplace = '3. Person Singular ' + ' '.join(formToReplace)
                    #print('formtoreplace zustand',formToReplace)
                    #print('complete form', completeform)

                    thrdPersonAktivindex = self.fsearchAktiv2.search_with_highest_multiplikation_Output(formToReplace, 1)[0]

                    thrdPersonAktiv = self.hkldbAktiv_All[thrdPersonAktivindex[0]][0][0].split()[:-1]

                    thrdPersonAktiv = ' '.join(thrdPersonAktiv)

                    for verb in verbs_of_sentence_string.split()[:-1]:
                        #print(sentence)
                        #print(index)

                        sentence.remove(verb)

                    thereisasubjectEr = 0

                    for index in subjectindex:
                        for ind in undindex:
                            if index - 1 == ind:
                                if index - 2 == ('er' or 'Er'):
                                    thereisasubjectEr = 1
                            if index + 1 == ind:
                                if index + 2 == 'er' or index + 2 == 'Er':
                                    thereisasubjectEr = 1
                    #print('subjectofsentence', subjectofsentence)

                    thereisasubjectich = 0
                    thereisasubjectdu = 0
                    thereisasubjectihr = 0
                    thereisasubjectwir = 0
                    for word in subjectofsentence:
                        if word == 'er' or word == 'Er':
                            thereisasubjectEr = 1
                        if word == 'ich':
                            thereisasubjectich = 1
                        if word == 'du':
                            thereisasubjectdu = 1
                        if word == 'ihr':
                            thereisasubjectihr = 1
                        if word == 'wir':
                            thereisasubjectwir = 1
                    if thereisasubjectEr == 1:
                        try:
                            sentence.remove('Er')
                        except:
                            sentence.remove('er')
                        sentence.append('ihn')

                    if thereisasubjectich == 1:
                        sentence.remove('ich')
                        sentence.append('mich')
                    if thereisasubjectdu == 1:
                        sentence.remove('du')
                        sentence.append('dich')
                    if thereisasubjectihr == 1:
                        sentence.remove('ihr')
                        sentence.append('euch')
                    if thereisasubjectwir == 1:
                        sentence.remove('wir')
                        sentence.append('uns')

                    sentence.append(thrdPersonAktiv)

                    jemandornot = 1
                    wordstodelete = []
                    for n in range(len(sentence) - 1):
                        if sentence[n] == 'von':
                            if sentence[n + 1] == 'ihr':
                                sentence[n + 1] = 'sie'
                                wordstodelete.append(n)
                                jemandornot = 0
                            if sentence[n + 1] == 'ihm':
                                sentence[n + 1] = 'er'
                                wordstodelete.append(n)
                                jemandornot = 0

                            import spacy
                            nlp = spacy.load('de_core_news_sm')
                            token1 = nlp(sentence[n - 1])
                            token2 = nlp(sentence[n + 1])
                            for word in token1:
                                if word.tag_ != 'NN' and word.tag_ != 'NE':
                                    for word in token2:
                                        if word.tag_ == 'NN' or word.tag_ == 'NE':
                                            wordstodelete.append(n)

                                            jemandornot = 0
                            if sentence[n + 1] == 'dem' or sentence[n + 1] == 'einem':

                                token3 = nlp(sentence[n-1])
                                for word in token3:
                                    if word.tag_ != 'NN' and word.tag_ != 'NE':
                                        sentence[n + 1] = 'ein'
                                        wordstodelete.append(n)
                                        jemandornot = 0
                            if sentence[n + 1] == 'der' or sentence[n + 1] == 'einer':
                                token4 = nlp(sentence[n-1])
                                for word in token4:
                                    if word.tag_ != 'NN' and word.tag_ != 'NE':
                                        sentence[n + 1] = 'eine'
                                        wordstodelete.append(n)
                                        jemandornot = 0

                        if sentence[n] == 'vom':

                            sentence[n] = 'ein'
                            jemandornot = 0

                    for index in wordstodelete[::-1]:
                        del sentence[index]

                    if jemandornot == 1:
                        sentence.append('jemand')


                    #print(sentence)

                    #print('get the tuples and triples to check..')
                    tuplesTocheck, triplesTocheck, quadruplesTocheck = self.gs.GetTuplesinSentence(sentence)
                    #print('done')
                    #print(tuplesTocheck, triplesTocheck)

                    grammpiecessentence = self.gs.createTupleofGrammarpieces( sentence, tuplesTocheck, triplesTocheck, quadruplesTocheck)

                    if len(grammpiecessentence) > 7:
                        print('A sentence is too long, too many permutations. \n piping wrong grammar..')
                        endsentences.append(' '.join(grammpiecessentence).split())

                    else:

                        #print('the grammpiecessentence', grammpiecessentence)
                        #print('genrating the permutations')
                        permutations = self.sgm.GeneratePermutationsOfSentence(grammpiecessentence)
                        #print('done')
                        #print(permutations)
                        #if (len(tuplesTocheck) != 0) or (len(triplesTocheck) != 0):
                        #    print('filtering the permutations based on the tuples and triples..')
                        #    filteredpermutations = self.gs.filterpermutationsaccordingtotuples(permutations, tuplesTocheck, triplesTocheck)
                        #    print('done')
                        #else:
                        #    print('there are no triples or tuples to check..')
                        #    filteredpermutations = permutations

                        sentencesToCheck = []
                        for sentence in permutations:
                            sentencesToCheck.append(' '.join(sentence))

                        #print('sentencesToCheck', sentencesToCheck)
                        #print('classifying the probability for right grammar in the filtered permutations..')
                        #print(' '.join(sentence))
                        endsentence = self.sgm.GetBestSentenceFromSentencesAccordingToGrammar(sentencesToCheck, ' '.join(sentence))
                        #print('done')

                        #print('the endsentence', endsentence)
                        endsentences.append(endsentence.split())


                if zustandORnot == 0 and vorgangORnot == 0:
                    #print('it is coming to the else')
                    endsentences.append(sentence)
            
            except:
                print('the sentence ' + str(sentence) + ' caused an error in the module passive2active')
                if endsentences[-1] == sentence:
                    pass
                else:
                    endsentences.append(sentence)
            
            
        return endsentences
            
            
            # Vorgangspassiv wird auf selbe Zeit gemappt, 3. Person Singular.
            # Zustandspassiv: Immer eine Zeit dahinter. D.h.
            # Präsens => Präteritum, Präteritum => Perfekt