# Class to solve Shortforms, data comes from Abkuerzungen.txt

import hickle as hkl

import FASTsearch

class SolveShorts(object):
    
    def __init__(self, hklDatabaseDir_Shorts, hklDatabaseDir_Shorts_All):
        
        self.ShortsDB_All = hkl.load(hklDatabaseDir_Shorts_All)
        self.ShortsDB = hkl.load(hklDatabaseDir_Shorts)
        
    
    # Input: csv file with the form ['d.h.', n] , ['das', 'heißt'] for each line
    # Output: hkl dump of array in form [[1],[d.h.],['das', 'heißt']]
    
    def create_hklDB_from_csv(self, csvDbDir):
        
        with open(csvDbDir) as lines:
            
            ShortsDB_All = []
            
            for line in lines:
                
                ShortsDB_All.append(list(eval(line)))
            
            
            #print(ShortsDB_All)
            
            #print(ShortsDB_All[0][0])
            
            
            hkldbShorts = []
            counter = 0
            for n in range(len(ShortsDB_All)):
                
                counter += 1
                #if counter % 1000 == 0:
                    #print(counter)
                
                hkldbShorts.append([ShortsDB_All[n][0][0]])
                
            #print('hkldbShorts', hkldbShorts)
            
            #print('creating the hkl dump of ShortsDBAll')    
            hkl.dump(ShortsDB_All, 'hkldbShorts_All.hkl', mode='w', compression='gzip')
            #print('done..')
            
            #print('Creating the hkl dump of ShortsDB')
            hkl.dump(hkldbShorts, 'hkldbShorts.hkl', mode='w', compression='gzip')
            #print('done..')
        
        return 'done'
    
    
    def load_DB_into_FASTsearch(self):
        
        #print('loading hkldbShorts ..')
        self.fsearch1 = FASTsearch.FASTsearch('hkldbShorts.hkl')
        #print('done')
        
        #print('generating BoW Model..')
        #self.fsearch1.Gen_BoW_Model(3000, "word", punctuation = True)
        #print('done')
        
        #print('loading the bow model')
        self.fsearch1.Load_BoW_Model('bagofwordshkldbShorts.pkl', 'DataBaseOneZeroshkldbShorts.hkl')
        #print('done')
        import spacy
        #print('loading the german spacy model..')
        self.nlp = spacy.load('de_core_news_sm')
        #print('done')
        
        
        #print('oi thats the get_feature_names', self.fsearch1.vectorizer.get_feature_names())
    
    
    def ExplainShortsInSentencesWithBrackets(self, sentences):
        outsentences = []
        count = 0
        for sentence in sentences:
            
            count += 1
            #print('processing sentence', count)
            
            nshort = []
            therewasapossibleshort = 0
            explanationlist = []
            
            doc = self.nlp(' '.join(sentence))
            #print('da sentence', sentence)
            newshorts = []
            wordcount = 0
            for oriword in sentence:
                wordcount += 1
                if wordcount == len(sentence):
                     word = oriword + '.'
                else:
                    word = oriword
                newshort = []
                prenewshort = []
                punctcount = list(word).count('.')
                #print(word, list(word), punctcount)
                if punctcount > 1:
                    replaceindex = sentence.index(oriword)
                    dacount = 0
                    for letter in list(word):
                        #print('letter in word split', letter)
                        prenewshort.append(letter)
                        
                        if letter == '.':
                            dacount += 1
                            newshort.append(''.join(prenewshort))
                            prenewshort = []
                            if dacount == punctcount:
                                newshorts.append([newshort, replaceindex])
            #print(newshorts)
            for newshort in newshorts[::-1]:
                if len(newshort) > 0:
                    del sentence[newshort[1]]
                    for part in newshort[0][::-1]:
                        sentence.insert(newshort[1], part)
            #print('sentence after newshortreplace', sentence)
            for n in range(len(sentence)):
                
                NhasToBeChecked = True
                for r in range(len(explanationlist)):
                    if explanationlist[r][3] <= n < explanationlist[r][1]:
                        NhasToBeChecked = False
                
                # Liste von falsch erkannten, zb er sollte nicht erkannt werden :)
                if sentence[n] in ['Er', 'er', 'ab', 'Ab', 'so', 'da', 'an', 'mit', 'Am', 'am']:
                    NhasToBeChecked = False
                
                if n != 0 and sentence[n][-1] != '.' and doc[n - 1].dep_[:2] != 'ART':
                    NhasToBeChecked = False
                
                
                if NhasToBeChecked == True:
                    
                    
                    bestmatches1, matchindex = self.fsearch1.search_with_highest_multiplikation_Output(sentence[n], 1)

                    #print(bestmatches1, matchindex)

                    interestingindex = 0
                    if sentence[n][-1] == '.':
                        #print(sentence[n])
                        #print('oioioioioi')
                        if len(sentence) - n > 5:

                            for m in range(5):
                                #print(n, m, n+m+1, len(sentence))
                                if sentence[n + m][-1] == '.' and sentence[n + m + 1][-1] != '.':
                                    interestingindex = m
                                    break
                        if len(sentence) - n <= 5 and n != len(sentence) - 1:
                            for m in range((len(sentence) - n)):
                                #print('oleolaolu',n, m, n+m+1, len(sentence))
                                
                                if m == (len(sentence) - n) - 1:
                                    if sentence[n + m][-1] == '.':
                                        interestingindex = m
                                        break
                                else:
                                    
                                    if sentence[n + m][-1] == '.' and sentence[n + m + 1][-1] != '.' :
                                        interestingindex = m
                                        break

                    #print(interestingindex, 'interestingindex')
                    if interestingindex == 0:
                        finalmatchindex = matchindex

                    if interestingindex >= 1:
                        thesentence = ''
                        for i in range(interestingindex + 1):
                            #print('sentence', sentence[n+i])
                            #print(thesentence + sentence[n+i])
                            if i == 0:
                                presentence = sentence[n + i]
                            if i >= 1:
                                presentence = ' ' + sentence[n + i]
                            thesentence = thesentence + presentence

                        #print('thesentence',thesentence)
                        mbestmatches, mmatchindex = self.fsearch1.search_with_highest_multiplikation_Output(thesentence , 1)
                        #print(mmatchindex)
                        finalmatchindex = mmatchindex


                    if finalmatchindex[1] == 1:
                            wordexplanationIndex = finalmatchindex[0]

                            wordexplanation = self.ShortsDB_All[wordexplanationIndex][1]

                            explanationlist.insert(0, [wordexplanation, n + interestingindex + 1, interestingindex, n])

            #print('explanationlist', explanationlist)
            for i in range(len(explanationlist)):
                for k in range(len(explanationlist)):
                    if explanationlist[i][3] == explanationlist[k][3] and i != k:
                        if explanationlist[i][2] > explanationlist[k][2]:
                            del explanationlist[k]
                        if explanationlist[i][2] < explanationlist[k][2]:
                            del explanationlist[i]
            
            for j in range(len(explanationlist)):
                
                sentence.insert(explanationlist[j][1], '(' + ' '.join(explanationlist[j][0]) + ')')
                
            #print(sentence)
             
            outsentences.append(sentence)
            # if uebereinstimmung, go to index and exchange
        return outsentences