import hickle as hkl import FASTsearch class PluritonUpdater(object): def __init__(self): self.ole = 1 # Input: csv file with the form ['eine', 'schwere', 'Sprache'] , ['in', 'leicht'] for each line # Output: hkl dump of array in form [[['eine', 'schwere', 'Sprache'],['in', 'leicht']],[..]] def create_hklDB_from_csv(self, csvDbDir): with open(csvDbDir) as lines: TranslationsDB_All = [] for line in lines: TranslationsDB_All.append(list(eval(line))) #print(ShortsDB_All) #print(ShortsDB_All[0][0]) hkldbTranslations1 = [] hkldbTranslations2 = [] counter = 0 for n in range(len(TranslationsDB_All)): counter += 1 #if counter % 1000 == 0: #print(counter) hkldbTranslations1.append([TranslationsDB_All[n][0][0]]) hkldbTranslations2.append([TranslationsDB_All[n][1][0]]) #print(hkldbTranslations1, TranslationsDB_All) #print('creating the hkl dump of TranslationsDBAll') hkl.dump(TranslationsDB_All, 'hkldbTranslations_All.hkl', mode='w', compression='gzip') #print('done..') #print('Creating the hkl dump of TranslationsDB') hkl.dump(hkldbTranslations1, 'hkldbTranslations1.hkl', mode='w', compression='gzip') hkl.dump(hkldbTranslations2, 'hkldbTranslations2.hkl', mode='w', compression='gzip') #print('done..') return 'done' def load_DB_into_FASTsearch_and_generate_BOW(self): print('loading the hkldbTranslations1...') self.hkldbTranslations1 = hkl.load('hkldbTranslations1.hkl') print('done') print('loading the hkldbTranslations2...') self.hkldbTranslations2 = hkl.load('hkldbTranslations2.hkl') print('done') print('loading hkldbTranslations 1 into FASTsearch..') self.fsearch1 = FASTsearch.FASTsearch('hkldbTranslations1.hkl') print('done') print('loading hkldbTranslations 2 into FASTsearch..') self.fsearch2 = FASTsearch.FASTsearch('hkldbTranslations2.hkl') print('done') print('generating BoW Model 1..') self.fsearch1.Gen_BoW_Model(50000, "word", punctuation = False) print('done') print('generating BoW Model 2..') self.fsearch2.Gen_BoW_Model(50000, "word", punctuation = False) print('done') return 'done' def loadModels(self): print('loading the hkldbTranslations1...') self.hkldbTranslations1 = hkl.load('hkldbTranslations1.hkl') print('done') print('loading the hkldbTranslations2...') self.hkldbTranslations2 = hkl.load('hkldbTranslations2.hkl') print('done') print('loading hkldbTranslations 1 into FASTsearch..') self.fsearch1 = FASTsearch.FASTsearch('hkldbTranslations1.hkl') print('done') print('loading hkldbTranslations 2 into FASTsearch..') self.fsearch2 = FASTsearch.FASTsearch('hkldbTranslations2.hkl') print('done') print('loading the bow model 1') self.fsearch1.Load_BoW_Model('bagofwordshkldbTranslations1.pkl', 'DataBaseOneZeroshkldbTranslations1.hkl') print('done') print('loading the bow model 2') self.fsearch2.Load_BoW_Model('bagofwordshkldbTranslations2.pkl', 'DataBaseOneZeroshkldbTranslations2.hkl') print('done') return 'done' def searchNearest2Translate(self, text): bestmatches2, matchindex2 = self.fsearch1.search_with_highest_multiplikation_Output(text, 1) DifficultText = self.hkldbTranslations1[matchindex2[0]][0].split() LeichterText = self.hkldbTranslations2[matchindex2[0]][0].split() return DifficultText, LeichterText