import spacy
import nltk
from nltk.stem.snowball import SnowballStemmer

import hickle as hkl
import FASTsearch

stemmer = SnowballStemmer("german")


class FremdWB(object):
    
    def __init__(self, hklDatabaseDir_Fremd_WB, hklDatabaseDir_Fremd_WB_All):
        
        if hklDatabaseDir_Fremd_WB_All is not None:
            self.Fremd_WBDB_All = hkl.load(hklDatabaseDir_Fremd_WB_All) 
        
        
        #print('loading the german spacy model..')
        self.nlp = spacy.load('de_core_news_sm')
        #print('done')
        
        #print('loading the stemmer..')
        self.stemmer = SnowballStemmer("german")
        #print('done')
        
        return
    
    
    def create_hklDB_from_csv(self, csvDbDir, StemOrNot):
        
        with open(csvDbDir) as lines:
            
            self.Fremd_WBDB_All = []
            
            for line in lines:
                
                #print(line)
                
                self.Fremd_WBDB_All.append(list(eval(line)))
            
            
            
            
            
            self.hkldbFremd_WB1 = []
            self.hkldbFremd_WB2 = []
            
            
            counter = 0
            for n in range(len(self.Fremd_WBDB_All)):
                
                counter += 1
                if counter % 1000 == 0:
                    print(counter)
                
                
                self.hkldbFremd_WB1.append([self.Fremd_WBDB_All[n][0][0]] )
                self.hkldbFremd_WB2.append([self.Fremd_WBDB_All[n][1][0]] )
                
                
            
            print('creating the hkl dump of Fremd_WBDBAll')    
            hkl.dump(self.Fremd_WBDB_All, 'hkldbFremd_WB_All.hkl', mode='w', compression='lzf')
            print('done..')
            
            print('Creating the hkl dump of Fremd_WBDB 1')
            hkl.dump(self.hkldbFremd_WB1, 'hkldbFremd_WB1.hkl', mode='w', compression='lzf')
            #print('done..')
            
            print('Creating the hkl dump of Fremd_WBDB 2')
            hkl.dump(self.hkldbFremd_WB2, 'hkldbFremd_WB2.hkl', mode='w', compression='lzf')
            #print('done..')
            
        
            
        return 'done'
    
    def load_DB_into_FASTsearch(self):
        
        #print('loading the hkldbFremd_WB1...')
        self.hkldbFremd_WB1 = hkl.load('hkldbFremd_WB1.hkl')
        #print('done')
        
        #print('loading the hkldbFremd_WB2...')
        self.hkldbFremd_WB2 = hkl.load('hkldbFremd_WB2.hkl')
        #print('done')
        
        #print('loading hkldbFremd_WB 1 into FASTsearch..')
        self.fsearch1 = FASTsearch.FASTsearch('hkldbFremd_WB1.hkl')
        #print('done')
        
        #print('loading hkldbFremd_WB 2 into FASTsearch..')
        self.fsearch2 = FASTsearch.FASTsearch('hkldbFremd_WB2.hkl')
        #print('done')
        
        #print('generating BoW Model 1..')
        self.fsearch1.Gen_BoW_Model(50000, "word", punctuation = False)
        #print('done')
        
        #print('generating BoW Model 2..')
        self.fsearch2.Gen_BoW_Model(50000, "word", punctuation = False)
        #print('done')
        
        
        #print('loading the bow model 1')
        self.fsearch1.Load_BoW_Model('bagofwordshkldbFremd_WB1.pkl', 'DataBaseOneZeroshkldbFremd_WB1.hkl')
        #print('done')
        
        #print('loading the bow model 2')
        self.fsearch2.Load_BoW_Model('bagofwordshkldbFremd_WB2.pkl', 'DataBaseOneZeroshkldbFremd_WB2.hkl')
        #print('done')
        
       
        #print('oi thats the get_feature_names', self.fsearch1.vectorizer.get_feature_names())
        #print('oi thats the get_feature_names', self.fsearch2.vectorizer.get_feature_names())
        
        
        
    def fremdEintragAppend(self, sentences, punctuations):
        outsentences = []
        #print('something')
        sentencecount = 0
        alleeintraege = []
        for sentence in sentences:
            #print('sentence', sentence)
            sentencecount += 1
            #print('processing sentence', sentencecount)
            
            doc = self.nlp(' '.join(sentence))
            
            fremds_of_sentence = []
            count = 0
            
            for word in doc:
                count += 1
                
                
                
                if word.tag_[0] == 'V' or word.tag_[0] == 'N' or word.tag_[0] == 'A':
                    fremds_of_sentence.append(word.text)
                    
            
            #print(fremds_of_sentence)
            fremdeintraege = []   
            for word in fremds_of_sentence:
                
                bestmatches2, matchindex2 = self.fsearch1.search_with_highest_multiplikation_Output(word, 1)
                
                
                
                fremd = self.hkldbFremd_WB1[matchindex2[0]][0].split()
                fremdeintrag = self.hkldbFremd_WB2[matchindex2[0]][0].split()
                
                #print(fremd)
                #print('fremdeintrag', fremdeintrag)
                
                if fremd[0] == word:
                    fremdeintraege.append(fremdeintrag)
            #print('fremdeintraege',fremdeintraege)
            outsentences.append(sentence)
            
            for eintrag in fremdeintraege:
                if eintrag[-1][-1] == '.':
                    eintrag[-1] = eintrag[-1][:-1]
                if eintrag not in alleeintraege:
                    outsentences.append(eintrag)
                    punctuations.insert(sentencecount, '.')
                alleeintraege.append(eintrag)
            
            
            
            #print('the endsentence',sentence)
        return outsentences, punctuations