alpcentaur
/
basabuuka_prototyp


# split sentences
# in den Listen fehlt noch sondern ( und noch weitere Dinge..)

# Folgende Konjunktionen brauchen keine Satzumformungen:    # Woraufhin, zudem, zumal, umso - desto, 
# sondern ist schwierig zu lösen.. am besten mit sondern weg, und anschließend SentGlue


class SentSeg(object):        def __init__(self, language):                self.language = language                self.punktuation_list = ['.', '?', '!', ';', ':']                self.wrappunktuation_list = [',', '-']                self.adversativ_list = ['wohingegen', 'Wohingegen', 'aber', 'Aber', 'wobei', 'Wobei', 'hingegen']                self.final_list = ['damit','Damit', 'um', 'Um']                self.kausal_list = ['weil', 'Weil', 'da', 'Da', 'denn', 'falls', 'Falls' ]                self.konditional_list = ['wenn', 'Wenn', 'sobald', 'Sobald', 'als', 'falls']                self.konsekutiv_list = ['dass', 'Dass']                self.konzessiv_list = ['obwohl', 'Obwohl', 'obgleich', 'Obgleich', 'trotzdem', 'Trotzdem', 'wenngleich', 'doch']                self.lokal_list = ['wo', 'Wo']                self.temporal_list_vor = ['bevor', 'Bevor']                self.temporal_list_nach = ['nachdem', 'Nachdem']                self.instrumental_list = ['indem', 'Indem']                self.indirectspeech_list = ['ob', 'Ob', 'wann', 'Wann', 'wer', 'Wer', 'wie', 'Wie', 'warum', 'Warum', 'weshalb', 'Weshalb', 'wieso', 'Wieso']        self.firstwordlist = []        #self.firstwordlist = ['wann', 'Wann', 'wer', 'Wer', 'wie', 'Wie', 'warum', 'Warum', 'weshalb', 'Weshalb', 'wieso', 'Wieso', 'dies', 'dann', 'jedoch', 'deswegen', 'trotzdem', 'danach', 'davor', 'wenn', 'sobald']                self.full_list = self.adversativ_list + self.final_list + self.kausal_list + self.konditional_list + self.konsekutiv_list + self.konzessiv_list + self.lokal_list + self.temporal_list_nach + self.temporal_list_vor + self.instrumental_list + self.indirectspeech_list        def ReadDoc2Sent(self, document):                splitsentences = []        splitsentence = []                with open(document) as sentences:            counter = 0            for sentence in sentences:                                counter += 1                if counter % 1000 == 0:                    print(counter)                                words = sentence.split()                                                                for word in words:                                        splitsentence.append(word)                                                                                                                                            if(word[-1] in self.punktuation_list or word in self.punktuation_list) and len(word) > 2:                                                splitsentences.append([splitsentence])                                                splitsentence = []                return splitsentences                        def AndOrSolver(self, sentences, punctuations):                for n in range(len(punctuations)):            if punctuations[n] == ':' or punctuations[n] == '-':                punctuations[n] = '.'                        #print(sentences, punctuations)                splitsentences = []                counter = 0                newsentences = []        for sentence in sentences:            newpunctuationsindexes = []            utterancenumber = sentence[2]            commainfo = sentence[1]            commaornot = commainfo[0]            sentence = sentence[0]                                    counter += 1            doc = self.nlp(' '.join(sentence))                        subjectcount = 0            separationwords = []            subjectcounts = []            doccounter = 0            subjectindex = []            rcornot = 0            for word in doc:                doccounter += 1                if word.dep_ == 'sb' or word.dep_ == 'ep':                    subjectcount += 1                    subjectindex.append(doccounter - 1)                if word.dep_ == 'rc':                    rcornot = 1                                                if word.tag_ == '$,':                                        subjectcounts.append([subjectcount, doccounter - 2, subjectindex, rcornot])                    subjectindex = []                    subjectcount = 0                    #print('aleaole',sentence[doccounter - 2])                    if len(sentence[doccounter - 2]) > 1:                                                doccounter -= 1                            if word.text == 'und' or word.text == 'also' or word.text == 'oder' or word.text == 'schon' or word.text == 'bald' or word.text == 'doch' or word.text == 'jedoch' or word.text == 'sondern':                    separationwords.append(doccounter - 1)                        #print('separationwords', separationwords)            #print('subjectcounts', subjectcounts)                                                        separationwordstocut = []            listofownsentencessubjectindexes = []            for n in range(len(subjectcounts) - 1):                if subjectcounts[n][0] > 0 and subjectcounts[n + 1][0] > 0 and subjectcounts[n + 1][3] == 0:                    listofownsentencessubjectindexes.append(subjectcounts[n])                for m in range(len(separationwords)):                    if subjectcounts[n][1] < separationwords[m] < subjectcounts[n + 1][1]:                        #print(subjectcounts[n + 1],  separationwords[m])                        if subjectcounts[n + 1][0] > 1:                            if subjectcounts[n + 1][2][0] < separationwords[m] <= subjectcounts[n + 1][2][-1]:                                separationwordstocut.append(separationwords[m])                         processed = 0                        #print('oioioi')            #print(listofownsentencessubjectindexes)            #print(separationwordstocut)                        if len(listofownsentencessubjectindexes) > 0:                for n in range(len(listofownsentencessubjectindexes)):                                        sentence[listofownsentencessubjectindexes[n][1]] = sentence[listofownsentencessubjectindexes[n][1]] + 'alohaseparator'                    newpunctuationsindexes.append([punctuations[counter - 1], counter - 1])                    #print('a new punctuation1')                processed = 1            if len(separationwordstocut) > 0:                for n in range(len(separationwordstocut)):                    sentence[separationwordstocut[n] - 1] = sentence[separationwordstocut[n] - 1] + 'alohaseparator'                    #print('a new punctuation2')                    newpunctuationsindexes.append([punctuations[counter - 1], counter - 1])                    processed = 1                        if processed == 0:                newsentences.append([sentence])                        if processed == 1:                #print(sentence)                splitsentence = []                for word in sentence:                    splitsentence.append(word)                    if word[-14:] == 'alohaseparator':                        if splitsentence[-1][-15] == ',':                            splitsentence[-1] = splitsentence[-1][:-15]                        else:                            splitsentence[-1] = splitsentence[-1][:-14]                        newsentences.append([splitsentence])                        splitsentence = []                newsentences.append([splitsentence])                        #print(newpunctuationsindexes)            newpunctuationsindexes = newpunctuationsindexes[::-1]            for n in range(len(newpunctuationsindexes)):                punctuations.insert(newpunctuationsindexes[n][1], newpunctuationsindexes[n][0])                        #print(newsentences, punctuations)        return newsentences, punctuations                    def LoadBoWModelAndDatabaseOnesZeros(self):                        import FASTsearch                #print('loading the tag hkl db..')        self.fsearch1 = FASTsearch.FASTsearch('GS_DB_word.tag_.hkl')        #print('done')                #print('generating BoW Model..')        self.fsearch1.Gen_BoW_Model(1000, "word")        #print('done')                #print('loading the bow model')        self.fsearch1.Load_BoW_Model('bagofwordsGS_DB_word.tag_.pkl', 'DataBaseOneZerosGS_DB_word.tag_.hkl')        #print('done')                #print('loading the dep hkl db..')        self.fsearch2 = FASTsearch.FASTsearch('GS_DB_word.dep_.hkl')        #print('done')                #print('generating BoW Model..')        self.fsearch2.Gen_BoW_Model(1000, "word")        #print('done')                #print('loading the bow model')        self.fsearch2.Load_BoW_Model('bagofwordsGS_DB_word.dep_.pkl', 'DataBaseOneZerosGS_DB_word.dep_.hkl')        #print('done')
    def LoadSentGlueSGDandGSUtils(self):                import GS_Utils        #print('initializing the gs utils..')        self.gs = GS_Utils.GS_Utils('de_core_news_sm')        #print('done')        
        from SentGlue import SentGlueMach        #print('loading the Stochastic Gradient models..')        self.sgm = SentGlueMach('trainedSGD_twolabel.pkl', 'bagofwordstwolabel.pkl')        #print('done')        #print('initializing the SGM..')        self.sgm.initialize()        #print('done')                #print('importing spacy..')        import spacy        #print('done')                #print('importing german model..')        self.nlp = spacy.load('de_core_news_sm')        #print('done')                return 'done'        def CommaSentenceOrNot(self, sentences):                nlp = self.nlp                commasentences = []        counter = 0                #print('creating array of comma or not..')        for sentence in sentences:                        doc = nlp(' '.join(sentence[0]))                        #print(doc)            counter += 1            #if counter % 100 == 0:                #print(counter)                                        n = 0            firstone = 0            token = []            nextword = 0            for word in doc:                #print(word.tag_)                # es eignet sich hierbei word.pos_  fuer noun und verb, word.dep_ fuer sb pd, und evtl tag                                 if firstone == 0:                    token.append(word.text)                                firstone = 1                
                if nextword == 1:                    token.append(word.text)                                nextword = 0                                if word.tag_ == '$,':                    n += 1                    nextword = 1                        sentence.append([n, token])                            commasentences.append(sentence)                    #print('done')        return commasentences        def EnumerationSolver(self, sentences):                        gs = self.gs                        nlp = self.nlp                sgm = self.sgm                        enumerationsentences = []        counter = 0        NOTenumerations = []        #print('processing enumerations..')        for sentence in sentences:                        doc = nlp(' '.join(sentence[0]))                        #print(doc)            counter += 1            #if counter % 100 == 0:                #print(counter)                        n = 0            firstone = 0            token = []            nextword = 0            enumeration = False                        splitsentence = []            splitsentence_deps = []            splitsentence_tags = []            splitsentences = []            splitsentences_deps = []            splitsentences_tags = []                                                for word in doc:                #print(word.tag_)                # es eignet sich hierbei word.pos_  fuer noun und verb, word.dep_ fuer sb pd, und evtl tag                                                                 nextword = 0                                if word.tag_ == '$,':                    n += 1                    nextword = 1                                if (word.text == 'und' or word.text == 'oder') and n >= 1:                    enumeration = True                    break                                    output = []            if enumeration == True:                                for word in doc:                                        #print(word.text)                                            if word.text != ',' and word.text != '.' and word.text != 'und':                                                splitsentence.append(word.text)                        splitsentence_deps.append(word.dep_)                        splitsentence_tags.append(word.tag_)                                        if word.text == ',' or word.text == 'und':                                                #print('oi')                                                splitsentences.append(splitsentence)                        splitsentences_deps.append(splitsentence_deps)                        splitsentences_tags.append(splitsentence_tags)                        splitsentence = []                        splitsentence_deps = []                        splitsentence_tags = []                                        splitsentences.append(splitsentence)                splitsentences_deps.append(splitsentence_deps)                splitsentences_tags.append(splitsentence_tags)                                #print( 'splitsentences', splitsentences)                                token = []                enumerations = []                enumerationsSPOs = []                NOTenumerations = []                                for sentence in splitsentences:                    token.append(sentence[0])                                                            if sentence[0] not in self.full_list:                        enumerations.append(sentence)                        enumerationsSPOs.append(gs.checkSPO(sentence, 0))                    else:                        NOTenumerations.append(sentence)                                #print(enumerationsSPOs)                                                #print('enumerations', enumerations)                biggest = []                for i in range(len(enumerationsSPOs)):                    biggest.append([i, sum(enumerationsSPOs[i])])                                                sortedbiggest = sorted(biggest[::-1], key=lambda tup: tup[1], reverse=True)                                for i in range(len(sortedbiggest)):                    if sortedbiggest[i][0] == 0:                        mainsentenceIndex = sortedbiggest[i][0]                        lastornot = 0                        break                                        if sortedbiggest[i][0] == len(biggest) - 1:                        mainsentenceIndex = sortedbiggest[i][0]                        lastornot = 1                        break                                                    # Hier muss noch für den Fall Er, sie und der Beamte LACHTEN den Clown aus --> das lachten abgefangen werden mit der Datenbank der Fälle, sprich enumeration im spo 1 0 0 + plural muss dann zu singular werden abhängig von den artikeln.                #print('enumerations', enumerations)                mainsentence = enumerations[mainsentenceIndex]                #print('main', mainsentence)                probablemainsentences = []                for i in range(len(enumerations)):                    if i != mainsentenceIndex:                        iprobablemainsentences = []                        probablemainsentence = []                        if lastornot == 0:                            for j in range(1, len(mainsentence)):                                probablemainsentence = mainsentence[0:j] + enumerations[i]                                #print(probablemainsentence)                                iprobablemainsentences.append(' '.join(probablemainsentence))                        if lastornot == 1:                            for j in range(1, len(mainsentence)):                                probablemainsentence = enumerations[i] + mainsentence[-j:]                                 iprobablemainsentences.append(' '.join(probablemainsentence))                        probablemainsentences.append(iprobablemainsentences)                                                # hier wird auf noch da geprüft, aber es ist wichtiger in diesem fall, dass ein tuple nicht zerissen vorkommt AENDERN !!!!                                #print('probablemainsentences', probablemainsentences)                tuplesToCheck = []                tuples = [['ART', 'NN'], ['APPR','NN'], ['ART', 'CARD']]                for tupl in tuples:                                                            checktupleindex, tupleInWords = gs.checkForAnnotationTuple(mainsentence, tupl , 'word.tag_', 'None')                    if checktupleindex == 2:                        tuplesToCheck.append([tupl, tupleInWords])                triplesToCheck = []                triples = [['ART','ADJA','NN'], ['APPR', 'ART', 'NN'], ['KOKOM', 'ART', 'NN']]                for tripl in triples:                    checktripleindex, tripleInWords = gs.checkForAnnotationTriple(mainsentence, tripl, 'word.tag_', 'None')                    if checktripleindex == 3:                        triplesToCheck.append([tripl, tripleInWords])                                   #print('tuples to check', tuplesToCheck)                #print('triples to check', triplesToCheck)                #print('probablemainsentences', probablemainsentences)                for probsentences in probablemainsentences:                                        checktripleindexes = []                    checktupleindexes = []                    #print(probsentences)                    filteredprobsentences = []                    for sentence in probsentences:                        tuplchecked = 0                        triplchecked = 0                        #print('sentence and tuples to check', sentence, tuplesToCheck)                        for tupl in tuplesToCheck:                                                        checkedsecondtime, tupleinWords = gs.checkForAnnotationTuple(sentence.split(), tupl[0], 'word.tag_', tupl[1])                                                        #print(sentence, checkedsecondtime)                            if checkedsecondtime == 1:                                                                        tuplchecked = 0                            if checkedsecondtime == 2:                                                                tuplchecked = 1                                                for tripl in triplesToCheck:                            checkedsecondtime, tripleinWords = gs.checkForAnnotationTriple(sentence.split(), tripl[0], 'word.tag_', tripl[1])                            if checkedsecondtime == 1 or checkedsecondtime == 2:                                                                            triplchecked = 0                            if checkedsecondtime == 3:                                                                triplchecked = 1                                                                                                                            if triplchecked == 1 or tuplchecked == 1:                            filteredprobsentences.append(sentence)                                        #print('filteredprobsentences', filteredprobsentences)                    if len(filteredprobsentences) == 0:                        filteredprobsentences = probsentences                    # here is still the problem, that there are lists of words instead of proper sentences..                    #print('filteredprobsentences', filteredprobsentences)                    probsMatrix = sgm.predictprobsOnSentenceList(filteredprobsentences, filteredprobsentences)                                        #print(probsMatrix)                                        for i in range(len(probsMatrix)):                        probsMatrix[i][0] = i                                        #print(probsMatrix)                                        sortedprobsMatrix = sorted(probsMatrix[::-1], key=lambda tup: tup[1], reverse=True)                                        #print(sortedprobsMatrix)                                        bestindex = sortedprobsMatrix[0][0]                                        #print(bestindex)                    #print('probablemainsentences', filteredprobsentences)                    probablemainsentence = filteredprobsentences[int(bestindex)]                    #print('oi', probablemainsentence)                                        #print('probablemainsentence', probablemainsentence)                    enumerationsentences.append([probablemainsentence])                                                    enumerationsentences.append([' '.join(mainsentence)])                                for notenum in NOTenumerations:                    #print(enumerationsentences)                    #print(enumerationsentences[-1])                    #print('enum no1', enumerationsentences)                    #print('notenum', notenum)                    enumerationsentences[-1].append(' '.join(notenum))                    #print('enumsentences',enumerationsentences[-1])                    enumerationsentences[-1] = [', '.join(enumerationsentences[-1])]                                                else:                enumerationsentences.append([sentence])                                                output.append(enumerationsentences)                        for n in range(len(output[0])):            #print('out',output[0][n])            try:                output[0][n] = [output[0][n][0].split()]            except:                output[0][n] = [output[0][n][0][0]]                        #print('done')        return output[0]             def GetUtteranceNumber(self, sentences):                nlp = self.nlp                uttersentences = []                for sentence in sentences:                        doc = nlp(' '.join(sentence[0]))                        subjectcount = 0
            for word in doc:
                if word.dep_ == 'sb' or word.dep_ == 'ep':                    subjectcount += 1                        sentence.append(subjectcount)            uttersentences.append(sentence)                return uttersentences        def GetQuestionOrNot(self, sentences):                nlp = self.nlp                uttersentences = []        questionmark = 0        for sentence in sentences:                        doc = nlp(' '.join(sentence[0]))                                    count = 0            for word in doc:                                                count += 1                                 if word.text == '?':                    questionmark = 1                        sentence.append(questionmark)            uttersentences.append(sentence)                return uttersentences        def SplitSentencesIntoHauptNebenTuple(self, sentences, punctuations):                                oldsplitsentences = []        #print('hauptneben inputsentences', sentences)                gs = self.gs                #print('importing spacy..')        import spacy        #print('done')                nlp = self.nlp                outputsentences = []        sentencesThatAreOutoutput = []        outsentences = []        for generalindex in range(len(sentences)):            presentence = sentences[generalindex]                        splitsentence = []            splitsentence_deps = []            splitsentence_tags = []            splitsentences = []            splitsentences_deps = []            splitsentences_tags = []            commainfo = presentence[1]            outputsentence = []                                    token = commainfo[1]                        commaornot = commainfo[0]                        numberutterances = presentence[2]                        sentence = presentence[0]                        oldsentence = presentence[0]                        #print(commaornot)            if commaornot >= 2:                #print('nla')                                sentence[0] = sentence[0].title()                                doc = nlp(' '.join(sentence))                                            for word in doc:                                        #print(word.text)                                            if word.text != ',' and word.text != '.':                                                splitsentence.append(word.text)                        splitsentence_deps.append(word.dep_)                        splitsentence_tags.append(word.tag_)                                        if word.text == ',':                                                #print('oi')                                                splitsentences.append(splitsentence)                        splitsentences_deps.append(splitsentence_deps)                        splitsentences_tags.append(splitsentence_tags)                        splitsentence = []                        splitsentence_deps = []                        splitsentence_tags = []                                                splitsentences.append(splitsentence)                splitsentences[0][0] = splitsentences[0][0].lower()                splitsentences_deps.append(splitsentence_deps)                splitsentences_tags.append(splitsentence_tags)                oldsplitsentences = splitsentences                #print(splitsentences)                #print(splitsentences_tags)                #print(splitsentences_deps)                spo = []                                for n in range(len(splitsentences)):                    prespo = []                    prespo = gs.checkSPO(splitsentences_deps[n], 1)                    prespo.append( gs.checkForAnnotation(splitsentences[n], 'VVINF', 'word.tag_'))                    prespo.append(gs.checkForAnnotation(splitsentences[n], 'VAFIN', 'word.tag_'))                    prespo.append(gs.checkForAnnotation(splitsentences[n], 'VVFIN', 'word.tag_'))                    prespo.append(gs.checkForAnnotation(splitsentences[n], 'VMFIN', 'word.tag_'))                                                                                                                        spo.append(prespo)                #print(splitsentences_deps)                #print(splitsentences)                #print(spo)                                indexSPO = []                lastm = len(splitsentences)                for o in range(len(splitsentences)):                                        m = len(splitsentences) - 1 - o                    for n in range(len(splitsentences)):                                                                                                if m < n - 1 and n < lastm:                                                        #print('spo s',spo[m], spo[n])                            sb = spo[m][0] + spo[n][0]                             Vafin = 1                            if spo[m][3] == 1 or spo[n][3] == 1:                                Vafin = spo[m][3] + spo[n][3]                            Vvinf = 1                            if spo[m][4] == 1 or spo[n][4] == 1:                                Vvinf = spo[m][4] + spo[n][4]                            Vvfin = 1                            if spo[m][5] == 1 or spo[n][5] == 1:                                Vvfin = spo[m][5] + spo[n][5]                            Vmfin = 1                            if spo[m][6] == 1 or spo[n][6] == 1:                                Vmfin == spo[m][6] + spo[n][6]                            #wrapped = 0                            #for n in range(len(indexSPO)):                                #if n == indexSPO[n][0] + 1 and n == indexSPO[n][1] - 1:                                    #wrapped = 1                            #print(sb, Vafin, Vvinf, Vvfin, Vmfin, 'm n', m, n)                            if sb == 1 and Vafin == 1 and Vvinf == 1 and (Vvfin == 1 or Vmfin == 1):                                                                indexSPO.append([m,n])                                #print([m,n])                                lastm = m                                #print('lastm',lastm)                                                                                                                            #print(splitsentences)                Hauptsentences = []                for n in range(len(indexSPO)):                    if indexSPO[n][0] > indexSPO[n][1]:                        i = 1                        j = 0                    else:                        i = 0                        j = 1                    Hauptsentences.append([splitsentences[indexSPO[n][i]] + splitsentences[indexSPO[n][j]] , indexSPO[n][i], indexSPO[n][j] ])                                HauptSentences = []                for n in range(len(Hauptsentences)):                    m = len(Hauptsentences) - 1 - n                    HauptSentences.append(Hauptsentences[m])                                #print('Hauptsentences', Hauptsentences)                #print('HauptSentences', HauptSentences)                sentencesThatAreOut =[]                                for n in range(len(HauptSentences)):                    index = HauptSentences[n][1]                    finish = 0                    #print('Oi',HauptSentences[n])                    if n == len(HauptSentences) - 1:                                                #print('lenHauptsentences', len(HauptSentences))                                                stopindex = len(splitsentences)                        finish = 1                    else:                        stopindex = HauptSentences[n + 1][1]                    #print('stopindex', stopindex)                        vvfinisthere = 0                    if finish == 0:                        if splitsentences_tags[stopindex][0] == 'VVFIN':                            stopindex -= 1                            vvfinisthere = 1                                                if splitsentences_tags[index][0] == 'VVFIN':                        vvfinisthere = 1                                        if vvfinisthere == 1:                                                                        HNTuple = HauptSentences[n][0] + [','] + splitsentences[index - 1]                        outputsentence.append(HNTuple)                        sentencesThatAreOut.append(index - 1)                        sentencesThatAreOut.append(Hauptsentences[n][1])                        sentencesThatAreOut.append(Hauptsentences[n][2])                                            for m in range(index + 1, stopindex ):                        if m != HauptSentences[n][2]:                            HNTuple = HauptSentences[n][0] + [','] + splitsentences[m]                            #print('check', HauptSentences[n], n)                            #print('check', splitsentences[m], m)                            #print('double', HNTuple)                            outputsentence.append(HNTuple)                                                                                                                sentencesThatAreOut.append(m)                            sentencesThatAreOut.append(Hauptsentences[n][1])                            sentencesThatAreOut.append(Hauptsentences[n][2])                                sentencesThatAreOutoutput.append(sentencesThatAreOut)                                                cpOrNots = []                rcOrNots = []                for splitsentence in splitsentences_deps:                    cpOrNot = gs.checkForAnnotationInTokenizedSentence(splitsentence, 'cp')                    cpOrNots.append(cpOrNot)                    rcOrNot = gs.checkForAnnotationInTokenizedSentence(splitsentence, 'rc')                    rcOrNots.append(rcOrNot)                                    #print('Laenge splitsentences', len(splitsentences))                #print('laenge cpOrNots', len(cpOrNots))                #print(cpOrNots)                #print('rc or nots', rcOrNots)                pairs = []                for n in range(len(cpOrNots)):                    index = len(cpOrNots) - 1 - n                    done = 0                    if rcOrNots[index] == 1:                        pairs.append([index, index - 1])                        done = 1                                                            if done == 0 and cpOrNots[index] == 1:                        try:                            if splitsentences_tags[index + 1][0] == 'VVFIN':                                pairs.append([index, index + 1])                                done = 1                        except:                            pass                        try:                            if done == 0 and rcOrNots[index - 1] == 0:                                pairs.append([index, index - 1])                                done = 1                        except:                            pass                        try:                            if done == 0 and rcOrNots[index - 1] == 1:                                if rcOrNots[index - 2] == 0:                                    pairs.append([index, index - 2])                        except:                            pass                                        for pair in pairs[::-1]:                    if pair[0] not in set(sentencesThatAreOut) or pair[1] not in set(sentencesThatAreOut):                        outputsentence.append(splitsentences[pair[1]] + [','] + splitsentences[pair[0]])                #print('hnhn',sentences)                sentences[generalindex][0] = outputsentence                                #print('outputsentence hntuple',outputsentence)                #outputsentences.append([outputsentence , i])                        #print('Oio', outputsentences)            #print(sentencesThatAreOutoutput)            #print(splitsentences)            #print('oioioioioioioio',sentences)                        #print(sentences[0][0])                                    #print('oioi',sentences[n])            #print('malatesta', sentences[n][0][0])            #print('generalindex sentences index 0', sentences[generalindex][0])            try:                if type(sentences[generalindex][0][0]) == str:                    sentences[generalindex][0] = [sentences[generalindex][0]]            except:                pass            #print('generalindex sentences index 0', sentences[generalindex][0])            #print('oldsentence', oldsentence)            newgeneratedsentences = len(sentences[generalindex][0])            if newgeneratedsentences > 1:                #print('goti t')                for sentence in sentences[generalindex][0]:                    punctuations.insert(generalindex, punctuations[generalindex])                    outsentences.append(sentence)                del punctuations[generalindex]            if newgeneratedsentences == 1:                if len(sentences[generalindex][0][0]) > 1:                    outsentences.append(sentences[generalindex][0][0])                else:                    outsentences.append(oldsentence)            if newgeneratedsentences == 0:                #print('case oldsentence', oldsentence)                outsentences.append(oldsentence)            #print('oioi', sentences[n])        # connect alonestanding commatas with the word before        #print('theoutsentences', outsentences)        for outsentence in outsentences:            todelete = []            for n in range(len(outsentence)):                if outsentence[n] == ',':                    todelete.append(n)                    outsentence[n-1] = outsentence[n-1] + ','            for deleteindex in todelete[::-1]:                del outsentence[deleteindex]                for index in range(len(outsentences)):            outsentences[index] = [outsentences[index]]        #print('theoutsentences', outsentences)                #removing doubles        doubledsentences = []        for o in range(len(outsentences)):            sentence = outsentences[o][0]            for m in range(len(outsentences)):                if m != o:                        count = 0                    for n in range(len(sentence)):                        if sentence[n] in outsentences[m][0] or sentence[n][:-1] in outsentences[m][0]:                            count += 1                        if count == len(sentence):                            doubledsentences.append(sentence)        punctdeleteindex = []        tmp = set()        for sentence in doubledsentences:            tmp.add(tuple(sentence))        #print(list(tmp))        doubledsentences = []        for tup in tmp:            doubledsentences.append([list(tup)])        #print('doubledsentences',doubledsentences)        punctdeleteindexes = []        for double in doubledsentences:            if double in outsentences:                punctdeleteindex = outsentences[::-1].index(double)                 del outsentences[len(outsentences) - 1 - punctdeleteindex]                punctdeleteindexes.append(punctdeleteindex)                for index in punctdeleteindexes[::-1]:            del punctuations[len(outsentences) - 1 - index]                #print('oldsplit',oldsplitsentences)        #print('outsents',outsentences)                for o in range(len(oldsplitsentences)):            for m in range(len(outsentences)):                counter = 0                for n in range(len(oldsplitsentences[o])):                    if oldsplitsentences[o][n] in outsentences[m][0] or oldsplitsentences[o][n] + ',' in outsentences[m][0]:                        counter += 1                if counter >= len(oldsplitsentences[o]):                    break                if m == len(outsentences) - 1 and counter < len(oldsplitsentences[o]):                    if o == 0:                        outsentences.insert(0,[oldsplitsentences[o]])                        punctuations.insert(0, punctuations[0])                    else:                        newones = []                        for i in range(len(outsentences)):                            if outsentences[i][0][-1] == oldsplitsentences[o - 1][-1]:                                if len(outsentences[i][0]) > 2 and len(oldsplitsentences[o - 1]) > 2:                                    if outsentences[i][0][-2] == oldsplitsentences[o - 1][-2]:                                        if outsentences[i][0][-3] == oldsplitsentences[o - 1][-3]:                                            newones.append([i + 1, [oldsplitsentences[o]]])                        for newone in newones[::-1]:                            #print(newones)                            outsentences.insert(newone[0], newone[1])                            punctuations.insert(newone[0], punctuations[newone[0] - 1])                                                                                                                #print('outsentences at the very end ', outsentences, punctuations)        return outsentences, punctuations                    # Notiz: Hier muss der Input immer Paare sein, von Hauptsatz/Nebensatz. D.h. eine weitere vorgeschaltete Klasse ist von Nöten.        def SplitCommatas(self, Inputsentences, punctuations):                gs = self.gs                nlp = self.nlp                gramcorr_splitsentences = []        counter = 0        newpunctuationsindex = []        for Inputsentence in Inputsentences:                        counter += 1                                    commainfo = Inputsentence[1]                                    token = commainfo[1]                        commaornot = commainfo[0]                        numberutterances = Inputsentence[2]                                    if commaornot == 0:                gramcorr_splitsentences.append(Inputsentence[0])                        if commaornot > 1:                gramcorr_splitsentences.append(Inputsentence[0])                        if commaornot == 1:                oldsentence = Inputsentence[0]                Inputsentence = [[Inputsentence[0]]]                                                                                for sentence in Inputsentence[0]:
                    splitsentence = []
                    splitsentences = []

                    
                    processed = 0                    wasNotInAnyList = 0                    try:                        for n in range(len(token)):
                            if token[n] in self.final_list:                                splitsentence = []                                for word in sentence:
                                    if word != token[n]:
                                        if word[-1] == ',':                                            splitsentence.append(word[:-1])

                                        if word[-1] != ',':                                            splitsentence.append(word)
                                    if word[-1] == ',' or word == ',':
                                        splitsentences.append(splitsentence)
                                        splitsentence = []
                                splitsentences.append(splitsentence)
                                if n == 1:

                                    if token[n] == 'um' or token[n] == 'Um':                                                                                splitsentences[n].insert(0,'dies')                                        splitsentences[n].insert(0,'um')                                    else:                                        splitsentences[n].insert(0,'dann')


                                if n == 0:
                                    if token[n] == 'um' or token[n] == 'Um':                                        splitsentences[n].insert(0,'dies')                                        splitsentences[n].insert(0,'um')                                        splitsentences = splitsentences[::-1]                                    else:                                        splitsentences[n].insert(0,'dann')
                                    splitsentences[0], splitsentences[1] = splitsentences[1] , splitsentences[0]
                                generalrules = [['ADV','VAFIN'], ['ADV', 'VVFIN']]                                processed = 1
                            if token[n] in self.adversativ_list:                                splitsentence = []                                for word in sentence:
                                    if word != token[n]:
                                        if word[-1] == ',':                                            splitsentence.append(word[:-1])                                        if word == ',':                                            pass                                        if word[-1] != ',':                                            splitsentence.append(word)
                                    if word[-1] == ',' or word == ',':
                                        splitsentences.append(splitsentence)
                                        splitsentence = []
                                splitsentences.append(splitsentence)
                                splitsentences[n].append('jedoch')

                                generalrules = [['ADV','VAFIN'], ['ADV', 'VVFIN']]                                processed = 1
                            if token[n] in self.kausal_list:                                splitsentence = []                                for word in sentence:
                                    if word != token[n]:
                                        if word[-1] == ',':                                            splitsentence.append(word[:-1])                                        if word == ',':                                            pass                                        if word[-1] != ',':                                            splitsentence.append(word)
                                    if word[-1] == ',' or word == ',':
                                        splitsentences.append(splitsentence)
                                        splitsentence = []
                                splitsentences.append(splitsentence)
                                # Da deswegen an den anderen Satz gehaengt wird, muss der input zu commasentences immer ZWEI sentences sein.                                #print('splitsentences in kausal', splitsentences)                                if n == 1:                                    splitsentences[n - 1].insert(0,'deswegen')                                    splitsentences[0], splitsentences[1] = splitsentences[1] , splitsentences[0]


                                if n == 0:                                    splitsentences[n + 1].insert(0,'deswegen')


                                #print('splitsentences in kausal', splitsentences)

                                generalrules = [['PROAV','VAFIN'], ['PROAV', 'VVFIN']]                                processed = 1
                            # from here come konsekutiv sentences, they have to be split according https://www.deutschplus.net/pages/Konsekutivsatz                            if token[n] in self.konsekutiv_list:                                #print('oi konsekutiv')                                splitsentence = []                                for word in sentence:
                                    if word != token[n]:
                                        if word[-1] == ',':                                            splitsentence.append(word[:-1])                                        if word == ',':                                            pass                                        if word[-1] != ',':                                            splitsentence.append(word)
                                    if word[-1] == ',' or word == ',':
                                        splitsentences.append(splitsentence)
                                        splitsentence = []
                                splitsentences.append(splitsentence)
                                generalrules = [['KOUS','PPER']]                                processed = 1

                            if token[n] in self.konditional_list:                                splitsentence = []                                for word in sentence:

                                    if word[-1] == ',':                                        splitsentence.append(word[:-1])                                    if word == ',':                                        pass                                    if word[-1] != ',':                                        splitsentence.append(word)
                                    if word[-1] == ',' or word == ',':
                                        splitsentences.append(splitsentence)
                                        splitsentence = []
                                splitsentences.append(splitsentence)

                                if n == 1:
                                    spoCount = gs.checkSPO(splitsentences[n], 0)
                                    spoCount = sum(spoCount)
                                    if spoCount == 2:                                        thereisanes = 0                                        for word in splitsentences[n]:                                            if word == 'es' or word == 'Es':                                                thereisanes = 1                                        if thereisanes == 0:                                            splitsentences[n].append('es')

                                if n == 0:

                                    spoCount = gs.checkSPO(splitsentences[n], 0)
                                    spoCount = sum(spoCount)
                                    if spoCount == 2:
                                        thereisanes = 0                                        for word in splitsentences[n]:                                            if word == 'es' or word == 'Es':                                                thereisanes = 1                                        if thereisanes == 0:                                            splitsentences[n].append('es')
                                    splitsentences[0], splitsentences[1] = splitsentences[1] , splitsentences[0]
                                generalrules = [['KOUS','PPER']]                                processed = 1
                            if token[n] in self.konzessiv_list:                                splitsentence = []                                for word in sentence:
                                    if word != token[n]:
                                        if word[-1] == ',':                                            splitsentence.append(word[:-1])                                        if word == ',':                                            pass                                        if word[-1] != ',':                                            splitsentence.append(word)
                                    if word[-1] == ',' or word == ',':
                                        splitsentences.append(splitsentence)
                                        splitsentence = []
                                splitsentences.append(splitsentence)

                                if n == 1:                                    splitsentences[n - 1].insert(0,'trotzdem')                                    splitsentences[0], splitsentences[1] = splitsentences[1] , splitsentences[0]


                                if n == 0:                                    splitsentences[n + 1].insert(0,'trotzdem')

                                generalrules = [['PROAV','VAFIN'], ['PROAV', 'VVFIN']]                                processed = 1
                            if token[n] in self.lokal_list:                                #print('lokal ole ole ')                                splitsentence = []                                for word in sentence:
                                    if word != token[n]:
                                        if word[-1] == ',':                                            splitsentence.append(word[:-1])                                        if word == ',':                                            pass                                        if word[-1] != ',':                                            splitsentence.append(word)
                                    if word[-1] == ',' or word == ',':
                                        splitsentences.append(splitsentence)
                                        splitsentence = []
                                splitsentences.append(splitsentence)

                                if n == 1:                                    splitsentences[n - 1].insert(0,'dort')                                    splitsentences[0], splitsentences[1] = splitsentences[1] , splitsentences[0]


                                if n == 0:                                    splitsentences[n + 1].insert(0,'dort')

                                generalrules = [['PROAV','VAFIN'], ['PROAV', 'VVFIN']]                                processed = 1
                            if token[n] in self.instrumental_list:                                splitsentence = []                                for word in sentence:
                                    if word != token[n]:
                                        if word[-1] == ',':                                            splitsentence.append(word[:-1])                                        if word == ',':                                            pass                                        if word[-1] != ',':                                            splitsentence.append(word)
                                    if word[-1] == ',' or word == ',':
                                        splitsentences.append(splitsentence)
                                        splitsentence = []
                                splitsentences.append(splitsentence)

                                if n == 1:                                    splitsentences[n - 1].insert(0,'so')                                    splitsentences[0], splitsentences[1] = splitsentences[1] , splitsentences[0]


                                if n == 0:                                    splitsentences[n + 1].insert(0,'so')

                                generalrules = [['ADV','VAFIN'], ['ADV', 'VVFIN']]                                processed = 1
                            if token[n] in self.temporal_list_vor:                                splitsentence = []                                for word in sentence:
                                    if word != token[n]:
                                        if word[-1] == ',':                                            splitsentence.append(word[:-1])                                        if word == ',':                                            pass                                        if word[-1] != ',':                                            splitsentence.append(word)
                                    if word[-1] == ',' or word == ',':
                                        splitsentences.append(splitsentence)
                                        splitsentence = []
                                splitsentences.append(splitsentence)

                                if n == 1:                                    splitsentences[n].insert(0,'danach')


                                if n == 0:                                    splitsentences[n].insert(0,'danach')                                    splitsentences[0], splitsentences[1] = splitsentences[1] , splitsentences[0]
                                generalrules = [['PROAV','VAFIN'], ['PROAV', 'VVFIN']]                                processed = 1
                            if token[n] in self.temporal_list_nach:                                splitsentence = []                                for word in sentence:
                                    if word != token[n]:
                                        if word[-1] == ',':                                            splitsentence.append(word[:-1])                                        if word == ',':                                            pass                                        if word[-1] != ',':                                            splitsentence.append(word)
                                    if word[-1] == ',' or word == ',':
                                        splitsentences.append(splitsentence)
                                        splitsentence = []
                                splitsentences.append(splitsentence)

                                if n == 1:                                    splitsentences[n].insert(0,'davor')


                                if n == 0:                                    splitsentences[n].insert(0,'davor')                                    splitsentences[0], splitsentences[1] = splitsentences[1] , splitsentences[0]
                                generalrules = [['PROAV','VAFIN'], ['PROAV', 'VVFIN']]                                processed = 1
                            #print(token[n])                            if token[n] == 'der' or token[n] == 'welcher':                                                tokens = self.nlp(' '.join(sentence))                                for word in tokens:                                    if word.dep_ == 'rc':                                        wordwithrc = word.text
                                rcORnot = gs.checkForAnnotation(sentence, 'rc', 'word.dep_')
                                oldsplitsentences = splitsentences                                splitsentences = []
                                if rcORnot == 1:                                    splitsentence = []                                    for word in sentence:


                                        if word[-1] == ',':                                            splitsentence.append(word[:-1])                                        if word == ',':                                            pass                                        if word[-1] != ',':                                            splitsentence.append(word)
                                        if word[-1] == ',' or word == ',':
                                            splitsentences.append(splitsentence)
                                            splitsentence = []
                                    splitsentences.append(splitsentence)
                                    # das umtauschen wird hier vollzogen, da ansonsten spacy dieser nicht als PDS einliest.. analog in den anderen.
                                    if wordwithrc in splitsentences[n]:
                                        splitsentences[n][0] = 'dieser'
                                        verb = splitsentences[n][-1]
                                        splitsentences[n] = splitsentences[n][:-1]                                        splitsentences[n].insert(1, verb)

                                        #print('Vorsicht', splitsentences)
                                        generalrules = [['PDS','VAFIN'], ['PDS', 'VVFIN']]                                        processed = 1                                    else:                                        splitsentences = oldsplitsentences                                        splitsentence = []
                            if token[n] == 'die' or token[n] == 'welche':

                                tokens = self.nlp(' '.join(sentence))                                for word in tokens:                                    if word.dep_ == 'rc':                                        wordwithrc = word.text


                                rcORnot = gs.checkForAnnotation(sentence, 'rc', 'word.dep_')

                                oldsplitsentences = splitsentences                                splitsentences = []

                                if rcORnot == 1:                                    #print('it went to rcornot in case die')

                                    splitsentence = []                                    for word in sentence:


                                        if word[-1] == ',':                                            splitsentence.append(word[:-1])                                        if word == ',':                                            pass                                        if word[-1] != ',':                                            splitsentence.append(word)
                                        if word[-1] == ',' or word == ',':
                                            splitsentences.append(splitsentence)
                                            splitsentence = []
                                    splitsentences.append(splitsentence)
                                    if wordwithrc in splitsentences[n]:                                        #print('wordwithrc was in sentence')                                        #print(wordwithrc)                                        #print(splitsentences[n])                                        #print('wordwithrcend')                                        splitsentences[n][0] = 'diese'
                                        verb = splitsentences[n][-1]
                                        splitsentences[n] = splitsentences[n][:-1]                                        splitsentences[n].insert(1, verb)


                                        generalrules = [['PDS','VAFIN'], ['PDS', 'VVFIN']]                                        processed = 1                                    else:
                                        splitsentences = oldsplitsentences                                        splitsentence = []
                            if token[n] == 'dem':
                                tokens = self.nlp(' '.join(sentence))                                for word in tokens:                                    if word.dep_ == 'rc':                                        wordwithrc = word.text
                                rcORnot = gs.checkForAnnotation(sentence, 'rc', 'word.dep_')
                                oldsplitsentences = splitsentences                                splitsentences = []                                                                if rcORnot == 1:                                    splitsentence = []                                    for word in sentence:


                                        if word[-1] == ',':                                            splitsentence.append(word[:-1])                                        if word == ',':                                            pass                                        if word[-1] != ',' and word[-1] != '.':                                            splitsentence.append(word)
                                        if word[-1] == ',':
                                            splitsentences.append(splitsentence)
                                            splitsentence = []
                                    splitsentences.append(splitsentence)

                                    if wordwithrc in splitsentences[n]:
                                        splitsentences[n][0] = 'diesem'
                                        verb = splitsentences[n][-1]
                                        splitsentences[n] = splitsentences[n][:-1]                                        splitsentences[n].insert(1, verb)


                                        generalrules = [['PDS','VAFIN'], ['PDS', 'VVFIN']]                                        processed = 1                                    else:                                        splitsentences = oldsplitsentences                                        splitsentence = []
                            if token[n] == 'das' or token[n] == 'welches':
                                tokens = self.nlp(' '.join(sentence))                                for word in tokens:                                    if word.dep_ == 'rc':                                        wordwithrc = word.text

                                rcORnot = gs.checkForAnnotation(sentence, 'rc', 'word.dep_')
                                #print('Oeeee',rcORnot)                                oldsplitsentences = splitsentences                                splitsentences = []                                if rcORnot == 1:                                    splitsentence = []                                    for word in sentence:


                                        if word[-1] == ',':                                            splitsentence.append(word[:-1])                                        if word == ',':                                            pass                                        if word[-1] != ',':                                            splitsentence.append(word)
                                        if word[-1] == ',' or word == ',':
                                            splitsentences.append(splitsentence)
                                            splitsentence = []
                                    splitsentences.append(splitsentence)                                    #print('splitsentence in das rc', splitsentences)                                    if wordwithrc in splitsentences[n]:
                                        splitsentences[n][0] = 'dieses'
                                        verb = splitsentences[n][-1]                                        #print('verb',verb)                                        splitsentences[n] = splitsentences[n][:-1]                                        splitsentences[n].insert(1, verb)
                                        generalrules = [['PDS','VAFIN'], ['PDS', 'VVFIN']]                                        processed = 1                                    else:                                        splitsentences = oldsplitsentences                                        splitsentence = []
                            if token[n] == 'dessen' or token[n] == 'wessen':
                                tokens = self.nlp(' '.join(sentence))                                for word in tokens:                                    if word.dep_ == 'rc':                                        wordwithrc = word.text

                                rcORnot = gs.checkForAnnotation(sentence, 'rc', 'word.dep_')                                                                oldsplitsentences = splitsentences                                splitsentences = []
                                if rcORnot == 1:                                    splitsentence = []                                    for word in sentence:                                        


                                        if word[-1] == ',':                                            splitsentence.append(word[:-1])                                        if word == ',':                                            pass                                        if word[-1] != ',':                                            splitsentence.append(word)
                                        if word[-1] == ',' or word == ',':
                                            splitsentences.append(splitsentence)
                                            splitsentence = []
                                    splitsentences.append(splitsentence)
                                    if wordwithrc in splitsentences[n]:                                        verb = splitsentences[n][-1]
                                        splitsentences[n] = splitsentences[n][:-1]                                        splitsentences[n].insert(1, verb)


                                        generalrules = [['PDS','VAFIN'], ['PDS', 'VVFIN']]                                        processed = 1                                    else:                                        splitsentences = oldsplitsentences                                        splitsentence = []
                            if token[n] == 'den' or token[n] == 'welchen':
                                tokens = self.nlp(' '.join(sentence))                                for word in tokens:                                    if word.dep_ == 'rc':                                        wordwithrc = word.text

                                rcORnot = gs.checkForAnnotation(sentence, 'rc', 'word.dep_')
                                oldsplitsentences = splitsentences                                splitsentences = []                                                                if rcORnot == 1:                                    splitsentence = []                                    for word in sentence:


                                        if word[-1] == ',':                                            splitsentence.append(word[:-1])                                        if word == ',':                                            pass                                        if word[-1] != ',':                                            splitsentence.append(word)
                                        if word[-1] == ',' or word == ',':
                                            splitsentences.append(splitsentence)
                                            splitsentence = []
                                    splitsentences.append(splitsentence)

                                    if wordwithrc in splitsentences[n]:
                                        splitsentences[n][0] = 'diesen'
                                        verb = splitsentences[n][-1]
                                        splitsentences[n] = splitsentences[n][:-1]                                        splitsentences[n].insert(1, verb)

                                        generalrules = [['PDS','VAFIN'], ['PDS', 'VVFIN']]                                        processed = 1                                    else:                                        splitsentences = oldsplitsentences                                        splitsentence = []

                            if token[n] == 'wem' or token[n] == 'Wem' or token[n] == 'welchem':
                                daORnot = gs.checkForAnnotation(sentence, 'da', 'word.dep_')
                                oaORnot = gs.checkForAnnotation(sentence, 'oa', 'word.dep_')
                                reORnot = gs.checkForAnnotation(sentence, 're', 'word.dep_')                                                                oldsplitsentences = splitsentences                                splitsentences = []                                                                for word in sentence:


                                    if word[-1] == ',':                                        splitsentence.append(word[:-1])                                    if word == ',':                                        pass                                    if word[-1] != ',':                                        splitsentence.append(word)
                                    if word[-1] == ',' or word == ',':
                                        splitsentences.append(splitsentence)
                                        splitsentence = []
                                splitsentences.append(splitsentence)

                                if n == 0:                                    index = 1                                if n == 1:                                    index = 0
                                if reORnot == 1:                                    pass                                if daORnot == 1 and reORnot == 0:                                    splitsentences[index].insert(1, 'das')
                                if oaORnot == 1 and reORnot == 0:                                    splitsentences[index].insert(1, 'dem')
                                if n == 1:                                    splitsentences[0], splitsentences[1] = splitsentences[1] , splitsentences[0]

                                generalrules = [['PDS','VAFIN'], ['PDS', 'VVFIN']]                                processed = 1
                            if token[n] in self.indirectspeech_list and token[1] not in self.konsekutiv_list:                                
                                reORnot = gs.checkForAnnotation(sentence, 're', 'word.dep_')                                oldsplitsentences = splitsentences                                splitsentences = []                                splitsentence = []                                for word in sentence:

                                    if word[-1] == ',':                                        splitsentence.append(word[:-1])                                    if word == ',':                                        pass                                    if word[-1] != ',':                                        splitsentence.append(word)
                                    if word[-1] == ',' or word == ',':
                                        splitsentences.append(splitsentence)
                                        splitsentence = []
                                splitsentences.append(splitsentence)

                                if n == 0:                                    index = 1                                if n == 1:                                    index = 0
                                if reORnot == 0:                                    if splitsentences[index][0] != 'was':                                        splitsentences[index].insert(1, 'das')
                                if n == 1:                                    splitsentences[0], splitsentences[1] = splitsentences[1] , splitsentences[0]

                                generalrules = [['PDS','VAFIN'], ['PDS', 'VVFIN']]                                processed = 1
                            if processed == 0 and n == 1:
                                ZUVINFTupelORnot = gs.checkForAnnotationTuple(sentence, ['PTKZU', 'VVINF'], 'word.tag_', 'None')                                if ZUVINFTupelORnot == 0:                                    ZUVINFTupelORnot = gs.checkForAnnotationTuple(sentence, ['PTKZU', 'VAINF'], 'word.tag_', 'None')
                                if ZUVINFTupelORnot == 1:
                                    reORnot = gs.checkForAnnotation(sentence, 're', 'word.dep_')                                    splitsentence = []                                    for word in sentence:

                                        if word[-1] == ',':                                            splitsentence.append(word[:-1])                                        if word == ',':                                            pass                                        if word[-1] != ',' :                                            splitsentence.append(word)
                                        if word[-1] == ',' or word == ',':
                                            splitsentences.append(splitsentence)                                            processed = 1                                            splitsentence = []                                                                        splitsentences.append(splitsentence)
                                    for m in range(2):                                        ZUINForNOT = gs.checkForAnnotationTuple(splitsentences[m], ['PTKZU', 'VVINF'], 'word.tag_','None')
                                        if ZUINForNOT == 0:                                            ZUINForNOT = gs.checkForAnnotationTuple(splitsentences[m], ['PTKZU', 'VAINF'], 'word.tag_','None')

                                        if ZUINForNOT == 1:                                            r = m                                            ZUINForNOT = 0


                                    if r == 0:                                        index = 1                                    if r == 1:                                        index = 0
                                    objectORnot = gs.checkForAnnotation(splitsentences[index] , 'oa', 'word.dep_')
                                    if reORnot == 0 and objectORnot == 0:                                        splitsentences[index].insert(1, 'das')
                                    if r == 1:                                        splitsentences[0], splitsentences[1] = splitsentences[1] , splitsentences[0]                                else:                                    processed == 2

                    except:                        wasNotInAnyList = 1

                    #rules = [['ART','ADJA','NN'], ['ART','ADJA','NE'], ['ART', 'NN'], ['ART', 'NE'], ['APPR','NN'], ['APPR','NE'], ['APPR', 'ART', 'NN'], ['APPR', 'ART', 'NE'], ['APPR','ART','NN','ADJA','NN'], ['APPR','ART','NN','ADJA','NE'], ['KOKOM', 'ART', 'NN'], ['KOKOM', 'ART', 'NE'], ['PPOSAT', 'NN'], ['PPOSAT', 'NE'], ['ADV', 'ADJD']]                                        #print('B',splitsentences)                    endsentences = []                    if (processed == 2 or processed == 0) and n == 1:                        wasNotInAnyList = 1                                                            try:                        if wasNotInAnyList == 0:                            newpunctuationsindex.insert(0,[counter-1,punctuations[counter-1]])                            #print('splitsentencee', splitsentences)                            if len(splitsentences) > 2:                                splitsentences = splitsentences[:2]                                                        #print('splitsentenceeeees', splitsentences)
                            for splitsentence in splitsentences:
                                #print('splitsentenceeeeeeeeeeee!!',splitsentence)                                wordtoputfirst = 'nada'                                for word in self.firstwordlist:                                    if word == splitsentence[0]:                                        wordtoputfirst = word                                        splitsentence.remove(word)


                                #print('get the tuples and triples to check..')                                tuplesTocheck, triplesTocheck, quadruplesTocheck = self.gs.GetTuplesinSentence(splitsentence)                                #print('done')                                #print(tuplesTocheck, 'ole', triplesTocheck ,'aiai', quadruplesTocheck)                                #print('1')                                grammpiecessentence = self.gs.createTupleofGrammarpieces( splitsentence, tuplesTocheck, triplesTocheck, quadruplesTocheck)                                                                #print('grammpiece',grammpiecessentence)                                #print('2')                                if len(grammpiecessentence) > 7:                                    print('A sentence is too long, too many permutations. \n piping wrong grammar..')                                    endsentence = ' '.join(grammpiecessentence)
                                else:                                    #print('genrating the permutations')                                    permutations = self.sgm.GeneratePermutationsOfSentence(grammpiecessentence)                                    #print('done')                                    #print(permutations)                                    #print('3')                                    firstwordwithverblist = ['deswegen', 'danach']                                    permutationstodelete = []                                    for permutation in permutations:                                        #print('4')                                        if permutation[0] in firstwordwithverblist:                                            #print('4.1')                                            count = 1                                            for word in self.nlp(permutation[1]):                                                #print('4.2')                                                if word.tag_[0] != 'V':                                                    #print('4.3')                                                    permutationstodelete.append(permutation)                                                    break                                                else:                                                    break                                            #for word in self.nlp(permutation[0]):                                                #print('4.2')                                                #if word.tag_[0] != 'V':                                                    #print('4.3')                                                    #permutationstodelete.append(permutation)                                                    #break                                                #else:                                                    #break                                    for delperm in permutationstodelete:                                        try:                                            permutations.remove(delperm)                                        except:                                                                                        pass                                    #print('5')                                                                        sentencesToCheck = []                                    if wordtoputfirst in self.firstwordlist:                                        for sentence in permutations:                                            sentencesToCheck.append(wordtoputfirst + ' ' + ' '.join(sentence))                                    else:                                        for sentence in permutations:                                            sentencesToCheck.append(' '.join(sentence))
                                    endsentence = self.sgm.GetBestSentenceFromSentencesAccordingToGrammar(sentencesToCheck, ' '.join(splitsentence))                                    #print('done')                                    #print('endsent',endsentence)                                endsentences.append(endsentence)                    except:                        #print('there was an error')                        wasNotInAnyList = 1                        endsentences = []                        todelete = []                        for index in range(len(newpunctuationsindex)):                            if newpunctuationsindex[index][0] == counter - 1:                                todelete.append(index)                        for todel in todelete[::-1]:                            del newpunctuationsindex[todel]                                                                        if wasNotInAnyList == 1:                        #print('was not in any list')                        #print(oldsentence)                        endsplisentences = []                        splisentence = []                        for word in oldsentence:

                            if word[-1] == ',':                                splisentence.append(word[:-1])                                                        if word == ',':                                pass                            if word[-1] != ',':                                splisentence.append(word)
                            if word[-1] == ',' or word == ',':
                                endsplisentences.append(splisentence)                                                                splisentence = []
                        endsplisentences.append(splisentence)                                                newpunctuationsindex.insert(0,[counter-1,punctuations[counter-1]])                                                #print('endsplisentences',endsplisentences)                        for splsentence in endsplisentences:                                                    endsentences.append(' '.join(splsentence))


                        '''


                        fsearch1 = self.fsearch1                        spacyclass1 = 'word.tag_'

                        gs_sentence1 = gs.Sentence2GrammarSchema(' '.join(splitsentence), spacyclass1)

                        print('searchPatternMatch for tags')                        bestmatches1 = fsearch1.searchPatternMatch(' '.join(gs_sentence1), 1)                        print('done')
                        #print('oioi', bestmatches1)
                        #print(len(fsearch1.database))                        right_gs_tupel1 = []
                        if len(bestmatches1) < 10:                            bestndocs1 = len(bestmatches1)                        else:                            bestndocs1 = 10
                        for m in range(bestndocs1):                            right_gs_tupel1.append(fsearch1.database[bestmatches1[m][0]])

                        statistically_correct_sentences1 = gs.Sentence2RightGrammarTupel(' '.join(splitsentence), gs_sentence1, right_gs_tupel1)

                        fsearch2 = self.fsearch2
                        spacyclass2 = 'word.dep_'
                        gs_sentence2 = gs.Sentence2GrammarSchema(' '.join(splitsentence), spacyclass2)
                        print('searchPatternMatch for deps')                        bestmatches2 = fsearch2.searchPatternMatch(' '.join(gs_sentence2), 1)                        print('done')
                        right_gs_tupel2 = []

                        if len(bestmatches2) < 10:                            bestndocs2 = len(bestmatches2)                        else:                            bestndocs2 = 10

                        for m in range(bestndocs2):                            right_gs_tupel2.append(fsearch2.database[bestmatches2[m][0]])
                        #print(' '.join(splitsentence))
                        statistically_correct_sentences2 = gs.Sentence2RightGrammarTupel(' '.join(splitsentence), gs_sentence2, right_gs_tupel2)

                        print(splitsentence)

                        Rightsentence = gs.GetBestgsAccordingRules(' '.join(splitsentence) , gs_sentence1, right_gs_tupel1, right_gs_tupel2, statistically_correct_sentences1, statistically_correct_sentences2, rules, generalrules)

                        '''
                    for endsentence in endsentences:                        gramcorr_splitsentences.append(endsentence.split())
        for index in newpunctuationsindex:            punctuations.insert(index[0], index[1])                                                return gramcorr_splitsentences, punctuations                                                                def putAppendixesIntoOwnSentences(self, sentences, punctuations):                gs = self.gs        #triples = [['NN', 'ART', 'NN'], ['NE', 'ART', 'NN'], ['NN', 'ART', 'NN'], ['NE', 'ART', 'NE']]        quadruples = [['NN', 'APPR', 'NE', 'NN'], ['NN', 'APPR', 'NE', 'NN'], ['NN', 'APPR', 'ART', 'NN'], ['NE', 'APPR', 'ART', 'NN'], ['NN', 'APPR', 'ART', 'NE'], ['NE', 'APPR', 'ART', 'NE']]         quadruplestochange = []        triplestochange = []        newsentences = []        newpunctuations = []        Whatisofnouns = []        oldsentences = sentences        oldpunctuations = punctuations        for hauptindex in range(len(sentences)):                        sentence = sentences[hauptindex]            try:                #for triple in triples:                #    AnnoOrNot, tripleInWords = gs.checkForAnnotationTriple(sentence, triple, 'word.tag_', 'None')                #    for tripleinwor in tripleInWords:                #        triplestochange.append([triple, tripleinwor])
                for quadruple in quadruples:                    AnnoOrNot, quadrupleInWords = gs.checkForAnnotationQuadruple(sentence, quadruple, 'word.tag_', 'None')                    #print('quadinwords', quadrupleInWords)                    #print('ANNOORNOT', AnnoOrNot)                    for quadrupleInWo in quadrupleInWords:                        quadruplestochange.append([quadruple, quadrupleInWo])
                #print('quadstochange',quadruplestochange)                for quad in quadruplestochange:                    for n in range(len(sentence) - 4):                        if sentence[n] == quad[1][0]:                            if sentence[n + 1] == quad[1][1]:                                if sentence[n + 2] == quad[1][2]:                                    artword = None                                    longerWhatisnoun = 0                                    for m in range(2):                                        for word in self.nlp(sentence[n - m]):                                            if word.tag_ == 'ART':                                                Nounthatis = sentence[n - m:n + 1]                                                import spacy                                                nlp = spacy.load('de_core_news_sm')                                                token3 = nlp(sentence[n+4])                                                counter = 0                                                Whatisnoun = sentence[n + 1:n + 4]                                                for wor in token3:                                                    counter += 1                                                    if wor.tag_ == 'NN' or wor.tag_ == 'NE':                                                        if counter == 1:                                                            Whatisnoun = sentence[n + 1:n + 5]                                                            longerWhatisnoun = 1                                                        if counter == 2:                                                            Whatisnoun = sentence[n + 1:n + 4]


                                                artword = word.text                                    #print(sentence[n - 1],'oi')                                    if ((artword == 'die' or artword == 'Die') and sentence[n][-1] != 'n') or ((artword == 'der' or artword == 'einer' or artword == 'dieser') and (sentence[n - 2] in ['von', 'in', 'auf', 'ueber', 'unter', 'nach', 'mit'])):
                                        if artword == 'der':                                            Nounthatis[0] = 'die'
                                        donothing = 0                                        if sentence[n + 1] == 'mit':                                            if sentence[n + 2] == 'den':                                                verb = ' hat die '                                                Whatisnoun = Whatisnoun[2:]                                            if sentence[n + 2] == 'der':                                                verb = ' hat eine '                                                Whatisnoun = Whatisnoun[2:]                                            if sentence[n + 2] != 'der' and sentence[n + 2] != 'den':                                                donothing = 1                                        else:                                            verb = ' ist '                                        if donothing == 0:                                            newsentence = ' '.join(Nounthatis) + verb + ' '.join(Whatisnoun)

                                            newsentences.append([hauptindex + 1, newsentence.split()])                                            newpunctuations.append([hauptindex + 1, punctuations[hauptindex]])                                            if longerWhatisnoun == 0:                                                Whatisofnouns.append([n + 1, n + 4, hauptindex])                                            else:                                                Whatisofnouns.append([n + 1, n + 5, hauptindex])            except:                print('Konnte nicht ' + str(sentence) + 'in Characterisierung pro Satz prozessieren..')        try:                    for whatis in Whatisofnouns[::-1]:                thereisacomma = 0                #print(sentences[whatis[2]][whatis[1] - 1])                if sentences[whatis[2]][whatis[1] - 1][-1] == ',':
                    thereisacomma = 1                if thereisacomma == 1:                    #print(sentences[whatis[2]][whatis[0] - 1])                    sentences[whatis[2]][whatis[0] - 1] = sentences[whatis[2]][whatis[0] - 1] + ','                del sentences[whatis[2]][whatis[0]:whatis[1]]            for newsent in newsentences[::-1]:                sentences.insert(newsent[0], newsent[1])            for newpunct in newpunctuations[::-1]:                punctuations.insert(newpunct[0], newpunct[1])            for sentence in sentences:                if sentence[-1][-1] == ',':                    sentence[-1] = sentence[-1][:-1]        except:            print('konnte nicht die gesammelten Characterisierungen prozessieren')            sentences = oldsentences            punctuations = oldpunctuations                                        return sentences, punctuations