alpcentaur
/
PluriTon


								import hickle as hkl


								import FASTsearch


								class PluritonUpdater(object):


								    def __init__(self):


								        self.ole = 1


								    # Input: csv file with the form ['eine', 'schwere', 'Sprache'] , ['in', 'leicht'] for each line

								    # Output: hkl dump of array in form [[['eine', 'schwere', 'Sprache'],['in', 'leicht']],[..]]


								    def create_hklDB_from_csv(self, csvDbDir):


								        with open(csvDbDir) as lines:


								            TranslationsDB_All = []


								            for line in lines:


								                TranslationsDB_All.append(list(eval(line)))


								            #print(ShortsDB_All)


								            #print(ShortsDB_All[0][0])


								            hkldbTranslations1 = []

								            hkldbTranslations2 = []

								            counter = 0

								            for n in range(len(TranslationsDB_All)):


								                counter += 1

								                #if counter % 1000 == 0:

								                    #print(counter)


								                hkldbTranslations1.append([TranslationsDB_All[n][0][0]])

								                hkldbTranslations2.append([TranslationsDB_All[n][1][0]])


								            #print(hkldbTranslations1, TranslationsDB_All)

								            #print('creating the hkl dump of TranslationsDBAll')

								            hkl.dump(TranslationsDB_All, 'hkldbTranslations_All.hkl', mode='w', compression='gzip')

								            #print('done..')


								            #print('Creating the hkl dump of TranslationsDB')

								            hkl.dump(hkldbTranslations1, 'hkldbTranslations1.hkl', mode='w', compression='gzip')

								            hkl.dump(hkldbTranslations2, 'hkldbTranslations2.hkl', mode='w', compression='gzip')

								            #print('done..')


								        return 'done'


								    def load_DB_into_FASTsearch_and_generate_BOW(self):


								        print('loading the hkldbTranslations1...')

								        self.hkldbTranslations1 = hkl.load('hkldbTranslations1.hkl')

								        print('done')


								        print('loading the hkldbTranslations2...')

								        self.hkldbTranslations2 = hkl.load('hkldbTranslations2.hkl')

								        print('done')


								        print('loading hkldbTranslations 1 into FASTsearch..')

								        self.fsearch1 = FASTsearch.FASTsearch('hkldbTranslations1.hkl')

								        print('done')


								        print('loading hkldbTranslations 2 into FASTsearch..')

								        self.fsearch2 = FASTsearch.FASTsearch('hkldbTranslations2.hkl')

								        print('done')


								        print('generating BoW Model 1..')

								        self.fsearch1.Gen_BoW_Model(50000, "word", punctuation = False)

								        print('done')


								        print('generating BoW Model 2..')

								        self.fsearch2.Gen_BoW_Model(50000, "word", punctuation = False)

								        print('done')


								        return 'done'


								    def loadModels(self):


								        print('loading the hkldbTranslations1...')

								        self.hkldbTranslations1 = hkl.load('hkldbTranslations1.hkl')

								        print('done')


								        print('loading the hkldbTranslations2...')

								        self.hkldbTranslations2 = hkl.load('hkldbTranslations2.hkl')

								        print('done')


								        print('loading hkldbTranslations 1 into FASTsearch..')

								        self.fsearch1 = FASTsearch.FASTsearch('hkldbTranslations1.hkl')

								        print('done')


								        print('loading hkldbTranslations 2 into FASTsearch..')

								        self.fsearch2 = FASTsearch.FASTsearch('hkldbTranslations2.hkl')

								        print('done')


								        print('loading the bow model 1')

								        self.fsearch1.Load_BoW_Model('bagofwordshkldbTranslations1.pkl', 'DataBaseOneZeroshkldbTranslations1.hkl')

								        print('done')


								        print('loading the bow model 2')

								        self.fsearch2.Load_BoW_Model('bagofwordshkldbTranslations2.pkl', 'DataBaseOneZeroshkldbTranslations2.hkl')

								        print('done')


								        return 'done'


								    def searchNearest2Translate(self, text):


								        bestmatches2, matchindex2 = self.fsearch1.search_with_highest_multiplikation_Output(text, 1)


								        DifficultText = self.hkldbTranslations1[matchindex2[0]][0].split()

								        LeichterText = self.hkldbTranslations2[matchindex2[0]][0].split()


								        return DifficultText, LeichterText