alpcentaur
/
Cyberlaywer

import hickle as hkl
import FASTsearch

class PluritonUpdater(object):        def __init__(self):
        self.ole = 1        # Input: csv file with the form ['eine', 'schwere', 'Sprache'] , ['in', 'leicht'] for each line    # Output: hkl dump of array in form [[['eine', 'schwere', 'Sprache'],['in', 'leicht']],[..]]        def create_hklDB_from_csv(self, csvDbDir):                with open(csvDbDir) as lines:                        TranslationsDB_All = []                        for line in lines:                                TranslationsDB_All.append(list(eval(line)))                                    #print(ShortsDB_All)                        #print(ShortsDB_All[0][0])                                    hkldbTranslations1 = []            hkldbTranslations2 = []            counter = 0            for n in range(len(TranslationsDB_All)):                                counter += 1                #if counter % 1000 == 0:                    #print(counter)                                hkldbTranslations1.append([TranslationsDB_All[n][0][0]])                hkldbTranslations2.append([TranslationsDB_All[n][1][0]])
            
            #print(hkldbTranslations1, TranslationsDB_All)            #print('creating the hkl dump of TranslationsDBAll')                hkl.dump(TranslationsDB_All, 'hkldbTranslations_All.hkl', mode='w', compression='gzip')            #print('done..')                        #print('Creating the hkl dump of TranslationsDB')            hkl.dump(hkldbTranslations1, 'hkldbTranslations1.hkl', mode='w', compression='gzip')            hkl.dump(hkldbTranslations2, 'hkldbTranslations2.hkl', mode='w', compression='gzip')            #print('done..')                return 'done'

    def load_DB_into_FASTsearch_and_generate_BOW(self):                print('loading the hkldbTranslations1...')        self.hkldbTranslations1 = hkl.load('hkldbTranslations1.hkl')        print('done')                print('loading the hkldbTranslations2...')        self.hkldbTranslations2 = hkl.load('hkldbTranslations2.hkl')        print('done')                print('loading hkldbTranslations 1 into FASTsearch..')        self.fsearch1 = FASTsearch.FASTsearch('hkldbTranslations1.hkl')        print('done')                print('loading hkldbTranslations 2 into FASTsearch..')        self.fsearch2 = FASTsearch.FASTsearch('hkldbTranslations2.hkl')        print('done')                print('generating BoW Model 1..')        self.fsearch1.Gen_BoW_Model(50000, "word", punctuation = False)        print('done')                print('generating BoW Model 2..')        self.fsearch2.Gen_BoW_Model(50000, "word", punctuation = False)        print('done')                return 'done'
    def loadModels(self):                        print('loading the hkldbTranslations1...')        self.hkldbTranslations1 = hkl.load('hkldbTranslations1.hkl')        print('done')
        print('loading the hkldbTranslations2...')        self.hkldbTranslations2 = hkl.load('hkldbTranslations2.hkl')        print('done')
        print('loading hkldbTranslations 1 into FASTsearch..')        self.fsearch1 = FASTsearch.FASTsearch('hkldbTranslations1.hkl')        print('done')
        print('loading hkldbTranslations 2 into FASTsearch..')        self.fsearch2 = FASTsearch.FASTsearch('hkldbTranslations2.hkl')        print('done')
        print('loading the bow model 1')        self.fsearch1.Load_BoW_Model('bagofwordshkldbTranslations1.pkl', 'DataBaseOneZeroshkldbTranslations1.hkl')        print('done')                print('loading the bow model 2')        self.fsearch2.Load_BoW_Model('bagofwordshkldbTranslations2.pkl', 'DataBaseOneZeroshkldbTranslations2.hkl')        print('done')

        return 'done'
    def searchNearest2Translate(self, text):                            bestmatches2, matchindex2 = self.fsearch1.search_with_highest_multiplikation_Output(text, 1)                                DifficultText = self.hkldbTranslations1[matchindex2[0]][0].split()        LeichterText = self.hkldbTranslations2[matchindex2[0]][0].split()        	        return DifficultText, LeichterText