PluriTon/build/tfgpu-pluriton/updateDatabase.py

import hickle as hkl

import FASTsearch


class PluritonUpdater(object):
    
    def __init__(self):

        self.ole = 1
    
    # Input: csv file with the form ['eine', 'schwere', 'Sprache'] , ['in', 'leicht'] for each line
    # Output: hkl dump of array in form [[['eine', 'schwere', 'Sprache'],['in', 'leicht']],[..]]
    
    def create_hklDB_from_csv(self, csvDbDir):
        
        with open(csvDbDir) as lines:
            
            TranslationsDB_All = []
            
            for line in lines:
                
                TranslationsDB_All.append(list(eval(line)))
            
            
            #print(ShortsDB_All)
            
            #print(ShortsDB_All[0][0])
            
            
            hkldbTranslations1 = []
            hkldbTranslations2 = []
            counter = 0
            for n in range(len(TranslationsDB_All)):
                
                counter += 1
                #if counter % 1000 == 0:
                    #print(counter)
                
                hkldbTranslations1.append([TranslationsDB_All[n][0][0]])
                hkldbTranslations2.append([TranslationsDB_All[n][1][0]])

            
            #print(hkldbTranslations1, TranslationsDB_All)
            #print('creating the hkl dump of TranslationsDBAll')    
            hkl.dump(TranslationsDB_All, 'hkldbTranslations_All.hkl', mode='w', compression='gzip')
            #print('done..')
            
            #print('Creating the hkl dump of TranslationsDB')
            hkl.dump(hkldbTranslations1, 'hkldbTranslations1.hkl', mode='w', compression='gzip')
            hkl.dump(hkldbTranslations2, 'hkldbTranslations2.hkl', mode='w', compression='gzip')
            #print('done..')
        
        return 'done'


    def load_DB_into_FASTsearch_and_generate_BOW(self):
        
        print('loading the hkldbTranslations1...')
        self.hkldbTranslations1 = hkl.load('hkldbTranslations1.hkl')
        print('done')
        
        print('loading the hkldbTranslations2...')
        self.hkldbTranslations2 = hkl.load('hkldbTranslations2.hkl')
        print('done')
        
        print('loading hkldbTranslations 1 into FASTsearch..')
        self.fsearch1 = FASTsearch.FASTsearch('hkldbTranslations1.hkl')
        print('done')
        
        print('loading hkldbTranslations 2 into FASTsearch..')
        self.fsearch2 = FASTsearch.FASTsearch('hkldbTranslations2.hkl')
        print('done')
        
        print('generating BoW Model 1..')
        self.fsearch1.Gen_BoW_Model(50000, "word", punctuation = False)
        print('done')
        
        print('generating BoW Model 2..')
        self.fsearch2.Gen_BoW_Model(50000, "word", punctuation = False)
        print('done')
        
        return 'done'

    def loadModels(self):        
        
        print('loading the hkldbTranslations1...')
        self.hkldbTranslations1 = hkl.load('hkldbTranslations1.hkl')
        print('done')

        print('loading the hkldbTranslations2...')
        self.hkldbTranslations2 = hkl.load('hkldbTranslations2.hkl')
        print('done')

        print('loading hkldbTranslations 1 into FASTsearch..')
        self.fsearch1 = FASTsearch.FASTsearch('hkldbTranslations1.hkl')
        print('done')

        print('loading hkldbTranslations 2 into FASTsearch..')
        self.fsearch2 = FASTsearch.FASTsearch('hkldbTranslations2.hkl')
        print('done')

        print('loading the bow model 1')
        self.fsearch1.Load_BoW_Model('bagofwordshkldbTranslations1.pkl', 'DataBaseOneZeroshkldbTranslations1.hkl')
        print('done')
        
        print('loading the bow model 2')
        self.fsearch2.Load_BoW_Model('bagofwordshkldbTranslations2.pkl', 'DataBaseOneZeroshkldbTranslations2.hkl')
        print('done')


        return 'done'

    def searchNearest2Translate(self, text):
        
            
        bestmatches2, matchindex2 = self.fsearch1.search_with_highest_multiplikation_Output(text, 1)
        
        
        DifficultText = self.hkldbTranslations1[matchindex2[0]][0].split()
        LeichterText = self.hkldbTranslations2[matchindex2[0]][0].split()
        	
        return DifficultText, LeichterText
docker+rust 2021-10-18 18:22:03 +02:00			`import hickle as hkl`

			`import FASTsearch`


			`class PluritonUpdater(object):`

			`def __init__(self):`

			`self.ole = 1`

			`# Input: csv file with the form ['eine', 'schwere', 'Sprache'] , ['in', 'leicht'] for each line`
			`# Output: hkl dump of array in form [[['eine', 'schwere', 'Sprache'],['in', 'leicht']],[..]]`

			`def create_hklDB_from_csv(self, csvDbDir):`

			`with open(csvDbDir) as lines:`

			`TranslationsDB_All = []`

			`for line in lines:`

			`TranslationsDB_All.append(list(eval(line)))`


			`#print(ShortsDB_All)`

			`#print(ShortsDB_All[0][0])`


			`hkldbTranslations1 = []`
			`hkldbTranslations2 = []`
			`counter = 0`
			`for n in range(len(TranslationsDB_All)):`

			`counter += 1`
			`#if counter % 1000 == 0:`
			`#print(counter)`

			`hkldbTranslations1.append([TranslationsDB_All[n][0][0]])`
			`hkldbTranslations2.append([TranslationsDB_All[n][1][0]])`



			`#print(hkldbTranslations1, TranslationsDB_All)`
			`#print('creating the hkl dump of TranslationsDBAll')`
			`hkl.dump(TranslationsDB_All, 'hkldbTranslations_All.hkl', mode='w', compression='gzip')`
			`#print('done..')`

			`#print('Creating the hkl dump of TranslationsDB')`
			`hkl.dump(hkldbTranslations1, 'hkldbTranslations1.hkl', mode='w', compression='gzip')`
			`hkl.dump(hkldbTranslations2, 'hkldbTranslations2.hkl', mode='w', compression='gzip')`
			`#print('done..')`

			`return 'done'`


			`def load_DB_into_FASTsearch_and_generate_BOW(self):`

			`print('loading the hkldbTranslations1...')`
			`self.hkldbTranslations1 = hkl.load('hkldbTranslations1.hkl')`
			`print('done')`

			`print('loading the hkldbTranslations2...')`
			`self.hkldbTranslations2 = hkl.load('hkldbTranslations2.hkl')`
			`print('done')`

			`print('loading hkldbTranslations 1 into FASTsearch..')`
			`self.fsearch1 = FASTsearch.FASTsearch('hkldbTranslations1.hkl')`
			`print('done')`

			`print('loading hkldbTranslations 2 into FASTsearch..')`
			`self.fsearch2 = FASTsearch.FASTsearch('hkldbTranslations2.hkl')`
			`print('done')`

			`print('generating BoW Model 1..')`
			`self.fsearch1.Gen_BoW_Model(50000, "word", punctuation = False)`
			`print('done')`

			`print('generating BoW Model 2..')`
			`self.fsearch2.Gen_BoW_Model(50000, "word", punctuation = False)`
			`print('done')`

			`return 'done'`

			`def loadModels(self):`

			`print('loading the hkldbTranslations1...')`
			`self.hkldbTranslations1 = hkl.load('hkldbTranslations1.hkl')`
			`print('done')`

			`print('loading the hkldbTranslations2...')`
			`self.hkldbTranslations2 = hkl.load('hkldbTranslations2.hkl')`
			`print('done')`

			`print('loading hkldbTranslations 1 into FASTsearch..')`
			`self.fsearch1 = FASTsearch.FASTsearch('hkldbTranslations1.hkl')`
			`print('done')`

			`print('loading hkldbTranslations 2 into FASTsearch..')`
			`self.fsearch2 = FASTsearch.FASTsearch('hkldbTranslations2.hkl')`
			`print('done')`

			`print('loading the bow model 1')`
			`self.fsearch1.Load_BoW_Model('bagofwordshkldbTranslations1.pkl', 'DataBaseOneZeroshkldbTranslations1.hkl')`
			`print('done')`

			`print('loading the bow model 2')`
			`self.fsearch2.Load_BoW_Model('bagofwordshkldbTranslations2.pkl', 'DataBaseOneZeroshkldbTranslations2.hkl')`
			`print('done')`


			`return 'done'`

			`def searchNearest2Translate(self, text):`


			`bestmatches2, matchindex2 = self.fsearch1.search_with_highest_multiplikation_Output(text, 1)`



			`DifficultText = self.hkldbTranslations1[matchindex2[0]][0].split()`
			`LeichterText = self.hkldbTranslations2[matchindex2[0]][0].split()`

			`return DifficultText, LeichterText`