alpcentaur
/
basabuuka_prototyp

import ipywidgets as widgetsfrom IPython.display import display, HTML
javascript_functions = {False: "hide()", True: "show()"}button_descriptions  = {False: "Code anzeigen", True: "Code verstecken"}


class oi(object):        def __init__(self):        self.punktuation_list = ['.', '?', '!', ';', ':']        return None
    def ReadDoc2Sent(self, document):                        splitsentences = []            splitsentence = []            punctuations = []                        with open(document) as sentences:                counter = 0                for sentence in sentences:                                        counter += 1                    if counter % 1000 == 0:                        print(counter)                                        words = sentence.split()                                                           for word in words:                                               if(word[-1] in self.punktuation_list or word in self.punktuation_list) and len(word) > 2:                                                                                    splitsentence.append(word[:-1])                                                        splitsentences.append(splitsentence)                                                        punctuations.append(word[-1])                                                        splitsentence = []                        else:                            splitsentence.append(word)                                    return splitsentences, punctuations        
    def PrintSplitSentencesToTextFile(self, punctuations, sentences, document):
        with open(document, 'a') as doc:
            for n in range(len(sentences)):                                towrite = ' '.join(sentences[n])                                #print(punctuations[n])                punctuation = punctuations[n]                                doc.write(towrite + punctuation + '\n')

        return 'OK'                    def CellInputText2Splitsentences(self, sentences):                splitsentences = []        splitsentence = []        punctuations = []                
        words = sentences.split()

        for word in words:

            if word in self.punktuation_list or (word[-1] in self.punktuation_list and len(word) > 2):

                splitsentence.append(word[:-1])
                splitsentences.append(splitsentence)
                punctuations.append(word[-1])
                splitsentence = []            else:                splitsentence.append(word)
        return splitsentences, punctuations        def CellInputText2SplitsentencesWithspacy(self, sentences):        #print('iamhere')        punctuations = []        splitsentences = []        import spacy        nlp = spacy.load('de_core_news_sm')        spacysentences = nlp(sentences)                                        for sent in spacysentences.sents:            #print('sent', sent)            firstwordisaNoun = False            if sent[0].tag_ == 'NN' or sent[0].tag_ == 'NE':                firstwordisaNoun = True                        #print('da taaaag', sent[0].tag_)            splitsent = sent.text.split()            lastword = splitsent[-1]                        if firstwordisaNoun == False:                try:                    splitsent[0] = splitsent[0].lower()                except:                    #print('lower did not work')                    pass            #print('Firstword',splitsent[0])            count = 0            indextocutlastword = 1            for letter in lastword:                count += 1                if letter in self.punktuation_list:                    punctuations.append(letter)                    indextocutlastword = count                                    splitsent[-1] = splitsent[-1][:indextocutlastword - 1]                        splitsentences.append(splitsent)            #print('senttext' , splitsent, indextocutlastword)                                if len(splitsentences) != len(punctuations):            splitsentences = []            splitsentence = []            punctuations = []
            words = sentences.split()            #print('sentencessplit', words)            for word in words:
                if word in self.punktuation_list or (word[-1] in self.punktuation_list and len(word) > 2):
                    splitsentence.append(word[:-1])
                    splitsentences.append(splitsentence)
                    punctuations.append(word[-1])
                    splitsentence = []                else:                    splitsentence.append(word)
            for splisentence in splitsentences:                for word in spacysentences:                    if word.text == splisentence[0]:                        if word.tag_ == 'NN' or word.tag_ == 'NE':                            pass                        else:                            splisentence[0] = splisentence[0].lower()

        
        return splitsentences, punctuations        def printSplitsentences2Text(self, punctuations, splitsentences):        for n in range(len(sentences)):                                towrite = ' '.join(sentences[n])                                                punctuation = punctuations[n]                                print(towrite + punctuation + '\n')                return 'done'        


    def toggle_code(self, state):
        """
        Toggles the JavaScript show()/hide() function on the div.input element.        """

        output_string = "<script>$(\"div.input\").{}</script>"        output_args   = (javascript_functions[state],)        output        = output_string.format(*output_args)
        display(HTML(output))

    def button_action(self, value):
        """
        Calls the toggle_code function and updates the button description.        """

        state = value.new
        self.toggle_code(state)
        value.owner.description = button_descriptions[state]
    def log_progress(self, sequence, every=None, size=None, name='Items'):        from ipywidgets import IntProgress, HTML, VBox        from IPython.display import display
        is_iterator = False        if size is None:            try:                size = len(sequence)            except TypeError:                is_iterator = True        if size is not None:            if every is None:                if size <= 200:                    every = 1                else:                    every = int(size / 200)     # every 0.5%        else:            assert every is not None, 'sequence is iterator, set every'
        if is_iterator:            progress = IntProgress(min=0, max=1, value=1)            progress.bar_style = 'info'        else:            progress = IntProgress(min=0, max=size, value=0)        label = HTML()        box = VBox(children=[label, progress])        display(box)
        index = 0        try:            for index, record in enumerate(sequence, 1):                if index == 1 or index % every == 0:                    if is_iterator:                        label.value = '{name}: {index} / ?'.format(                            name=name,                            index=index                        )                    else:                        progress.value = index                        label.value = u'{name}: {index} / {size}'.format(                            name=name,                            index=index,                            size=size                        )                yield record        except:            progress.bar_style = 'danger'            raise        else:            progress.bar_style = 'success'            progress.value = index            label.value = "{name}: {index}".format(                name=name,                index=str(index or '?')            )