alpcentaur
/
basabuuka_prototyp



								# split sentences


								# in den Listen fehlt noch sondern ( und noch weitere Dinge..)


								# Folgende Konjunktionen brauchen keine Satzumformungen:

								    # Woraufhin, zudem, zumal, umso - desto,


								# sondern ist schwierig zu lösen.. am besten mit sondern weg, und anschließend SentGlue


								class SentSeg(object):


								    def __init__(self, language):


								        self.language = language


								        self.punktuation_list = ['.', '?', '!', ';', ':']


								        self.wrappunktuation_list = [',', '-']


								        self.adversativ_list = ['wohingegen', 'Wohingegen', 'aber', 'Aber', 'wobei', 'Wobei', 'hingegen']


								        self.final_list = ['damit','Damit', 'um', 'Um']


								        self.kausal_list = ['weil', 'Weil', 'da', 'Da', 'denn', 'falls', 'Falls' ]


								        self.konditional_list = ['wenn', 'Wenn', 'sobald', 'Sobald', 'als', 'falls']


								        self.konsekutiv_list = ['dass', 'Dass']


								        self.konzessiv_list = ['obwohl', 'Obwohl', 'obgleich', 'Obgleich', 'trotzdem', 'Trotzdem', 'wenngleich', 'doch']


								        self.lokal_list = ['wo', 'Wo']


								        self.temporal_list_vor = ['bevor', 'Bevor']


								        self.temporal_list_nach = ['nachdem', 'Nachdem']


								        self.instrumental_list = ['indem', 'Indem']


								        self.indirectspeech_list = ['ob', 'Ob', 'wann', 'Wann', 'wer', 'Wer', 'wie', 'Wie', 'warum', 'Warum', 'weshalb', 'Weshalb', 'wieso', 'Wieso']

								        self.firstwordlist = []

								        #self.firstwordlist = ['wann', 'Wann', 'wer', 'Wer', 'wie', 'Wie', 'warum', 'Warum', 'weshalb', 'Weshalb', 'wieso', 'Wieso', 'dies', 'dann', 'jedoch', 'deswegen', 'trotzdem', 'danach', 'davor', 'wenn', 'sobald']


								        self.full_list = self.adversativ_list + self.final_list + self.kausal_list + self.konditional_list + self.konsekutiv_list + self.konzessiv_list + self.lokal_list + self.temporal_list_nach + self.temporal_list_vor + self.instrumental_list + self.indirectspeech_list


								    def ReadDoc2Sent(self, document):


								        splitsentences = []

								        splitsentence = []


								        with open(document) as sentences:

								            counter = 0

								            for sentence in sentences:


								                counter += 1

								                if counter % 1000 == 0:

								                    print(counter)


								                words = sentence.split()


								                for word in words:


								                    splitsentence.append(word)


								                    if(word[-1] in self.punktuation_list or word in self.punktuation_list) and len(word) > 2:


								                        splitsentences.append([splitsentence])


								                        splitsentence = []


								        return splitsentences


								    def AndOrSolver(self, sentences, punctuations):


								        for n in range(len(punctuations)):

								            if punctuations[n] == ':' or punctuations[n] == '-':

								                punctuations[n] = '.'


								        #print(sentences, punctuations)


								        splitsentences = []


								        counter = 0


								        newsentences = []

								        for sentence in sentences:

								            newpunctuationsindexes = []

								            utterancenumber = sentence[2]

								            commainfo = sentence[1]

								            commaornot = commainfo[0]

								            sentence = sentence[0]


								            counter += 1

								            doc = self.nlp(' '.join(sentence))


								            subjectcount = 0

								            separationwords = []

								            subjectcounts = []

								            doccounter = 0

								            subjectindex = []

								            rcornot = 0

								            for word in doc:

								                doccounter += 1

								                if word.dep_ == 'sb' or word.dep_ == 'ep':

								                    subjectcount += 1

								                    subjectindex.append(doccounter - 1)

								                if word.dep_ == 'rc':

								                    rcornot = 1


								                if word.tag_ == '$,':


								                    subjectcounts.append([subjectcount, doccounter - 2, subjectindex, rcornot])

								                    subjectindex = []

								                    subjectcount = 0

								                    #print('aleaole',sentence[doccounter - 2])

								                    if len(sentence[doccounter - 2]) > 1:


								                        doccounter -= 1


								                if word.text == 'und' or word.text == 'also' or word.text == 'oder' or word.text == 'schon' or word.text == 'bald' or word.text == 'doch' or word.text == 'jedoch' or word.text == 'sondern':

								                    separationwords.append(doccounter - 1)


								            #print('separationwords', separationwords)

								            #print('subjectcounts', subjectcounts)


								            separationwordstocut = []

								            listofownsentencessubjectindexes = []

								            for n in range(len(subjectcounts) - 1):

								                if subjectcounts[n][0] > 0 and subjectcounts[n + 1][0] > 0 and subjectcounts[n + 1][3] == 0:

								                    listofownsentencessubjectindexes.append(subjectcounts[n])

								                for m in range(len(separationwords)):

								                    if subjectcounts[n][1] < separationwords[m] < subjectcounts[n + 1][1]:

								                        #print(subjectcounts[n + 1],  separationwords[m])

								                        if subjectcounts[n + 1][0] > 1:

								                            if subjectcounts[n + 1][2][0] < separationwords[m] <= subjectcounts[n + 1][2][-1]:

								                                separationwordstocut.append(separationwords[m])


								            processed = 0


								            #print('oioioi')

								            #print(listofownsentencessubjectindexes)

								            #print(separationwordstocut)


								            if len(listofownsentencessubjectindexes) > 0:

								                for n in range(len(listofownsentencessubjectindexes)):


								                    sentence[listofownsentencessubjectindexes[n][1]] = sentence[listofownsentencessubjectindexes[n][1]] + 'alohaseparator'

								                    newpunctuationsindexes.append([punctuations[counter - 1], counter - 1])

								                    #print('a new punctuation1')

								                processed = 1

								            if len(separationwordstocut) > 0:

								                for n in range(len(separationwordstocut)):

								                    sentence[separationwordstocut[n] - 1] = sentence[separationwordstocut[n] - 1] + 'alohaseparator'

								                    #print('a new punctuation2')

								                    newpunctuationsindexes.append([punctuations[counter - 1], counter - 1])

								                    processed = 1


								            if processed == 0:

								                newsentences.append([sentence])


								            if processed == 1:

								                #print(sentence)

								                splitsentence = []

								                for word in sentence:

								                    splitsentence.append(word)

								                    if word[-14:] == 'alohaseparator':

								                        if splitsentence[-1][-15] == ',':

								                            splitsentence[-1] = splitsentence[-1][:-15]

								                        else:

								                            splitsentence[-1] = splitsentence[-1][:-14]

								                        newsentences.append([splitsentence])

								                        splitsentence = []

								                newsentences.append([splitsentence])


								            #print(newpunctuationsindexes)

								            newpunctuationsindexes = newpunctuationsindexes[::-1]

								            for n in range(len(newpunctuationsindexes)):

								                punctuations.insert(newpunctuationsindexes[n][1], newpunctuationsindexes[n][0])


								        #print(newsentences, punctuations)

								        return newsentences, punctuations


								    def LoadBoWModelAndDatabaseOnesZeros(self):


								        import FASTsearch


								        #print('loading the tag hkl db..')

								        self.fsearch1 = FASTsearch.FASTsearch('GS_DB_word.tag_.hkl')

								        #print('done')


								        #print('generating BoW Model..')

								        self.fsearch1.Gen_BoW_Model(1000, "word")

								        #print('done')


								        #print('loading the bow model')

								        self.fsearch1.Load_BoW_Model('bagofwordsGS_DB_word.tag_.pkl', 'DataBaseOneZerosGS_DB_word.tag_.hkl')

								        #print('done')


								        #print('loading the dep hkl db..')

								        self.fsearch2 = FASTsearch.FASTsearch('GS_DB_word.dep_.hkl')

								        #print('done')


								        #print('generating BoW Model..')

								        self.fsearch2.Gen_BoW_Model(1000, "word")

								        #print('done')


								        #print('loading the bow model')

								        self.fsearch2.Load_BoW_Model('bagofwordsGS_DB_word.dep_.pkl', 'DataBaseOneZerosGS_DB_word.dep_.hkl')

								        #print('done')


								    def LoadSentGlueSGDandGSUtils(self):


								        import GS_Utils

								        #print('initializing the gs utils..')

								        self.gs = GS_Utils.GS_Utils('de_core_news_sm')

								        #print('done')


								        from SentGlue import SentGlueMach

								        #print('loading the Stochastic Gradient models..')

								        self.sgm = SentGlueMach('trainedSGD_twolabel.pkl', 'bagofwordstwolabel.pkl')

								        #print('done')

								        #print('initializing the SGM..')

								        self.sgm.initialize()

								        #print('done')


								        #print('importing spacy..')

								        import spacy

								        #print('done')


								        #print('importing german model..')

								        self.nlp = spacy.load('de_core_news_sm')

								        #print('done')


								        return 'done'


								    def CommaSentenceOrNot(self, sentences):


								        nlp = self.nlp


								        commasentences = []

								        counter = 0


								        #print('creating array of comma or not..')

								        for sentence in sentences:


								            doc = nlp(' '.join(sentence[0]))


								            #print(doc)

								            counter += 1

								            #if counter % 100 == 0:

								                #print(counter)


								            n = 0

								            firstone = 0

								            token = []

								            nextword = 0

								            for word in doc:

								                #print(word.tag_)

								                # es eignet sich hierbei word.pos_  fuer noun und verb, word.dep_ fuer sb pd, und evtl tag


								                if firstone == 0:

								                    token.append(word.text)


								                firstone = 1


								                if nextword == 1:

								                    token.append(word.text)


								                nextword = 0


								                if word.tag_ == '$,':

								                    n += 1

								                    nextword = 1


								            sentence.append([n, token])


								            commasentences.append(sentence)


								        #print('done')

								        return commasentences


								    def EnumerationSolver(self, sentences):


								        gs = self.gs


								        nlp = self.nlp


								        sgm = self.sgm


								        enumerationsentences = []

								        counter = 0

								        NOTenumerations = []

								        #print('processing enumerations..')

								        for sentence in sentences:


								            doc = nlp(' '.join(sentence[0]))


								            #print(doc)

								            counter += 1

								            #if counter % 100 == 0:

								                #print(counter)


								            n = 0

								            firstone = 0

								            token = []

								            nextword = 0

								            enumeration = False


								            splitsentence = []

								            splitsentence_deps = []

								            splitsentence_tags = []

								            splitsentences = []

								            splitsentences_deps = []

								            splitsentences_tags = []


								            for word in doc:

								                #print(word.tag_)

								                # es eignet sich hierbei word.pos_  fuer noun und verb, word.dep_ fuer sb pd, und evtl tag


								                nextword = 0


								                if word.tag_ == '$,':

								                    n += 1

								                    nextword = 1


								                if (word.text == 'und' or word.text == 'oder') and n >= 1:

								                    enumeration = True

								                    break


								            output = []

								            if enumeration == True:


								                for word in doc:


								                    #print(word.text)


								                    if word.text != ',' and word.text != '.' and word.text != 'und':


								                        splitsentence.append(word.text)

								                        splitsentence_deps.append(word.dep_)

								                        splitsentence_tags.append(word.tag_)


								                    if word.text == ',' or word.text == 'und':


								                        #print('oi')


								                        splitsentences.append(splitsentence)

								                        splitsentences_deps.append(splitsentence_deps)

								                        splitsentences_tags.append(splitsentence_tags)

								                        splitsentence = []

								                        splitsentence_deps = []

								                        splitsentence_tags = []


								                splitsentences.append(splitsentence)

								                splitsentences_deps.append(splitsentence_deps)

								                splitsentences_tags.append(splitsentence_tags)


								                #print( 'splitsentences', splitsentences)


								                token = []

								                enumerations = []

								                enumerationsSPOs = []

								                NOTenumerations = []


								                for sentence in splitsentences:

								                    token.append(sentence[0])


								                    if sentence[0] not in self.full_list:

								                        enumerations.append(sentence)

								                        enumerationsSPOs.append(gs.checkSPO(sentence, 0))

								                    else:

								                        NOTenumerations.append(sentence)


								                #print(enumerationsSPOs)


								                #print('enumerations', enumerations)

								                biggest = []

								                for i in range(len(enumerationsSPOs)):

								                    biggest.append([i, sum(enumerationsSPOs[i])])


								                sortedbiggest = sorted(biggest[::-1], key=lambda tup: tup[1], reverse=True)


								                for i in range(len(sortedbiggest)):

								                    if sortedbiggest[i][0] == 0:

								                        mainsentenceIndex = sortedbiggest[i][0]

								                        lastornot = 0

								                        break


								                    if sortedbiggest[i][0] == len(biggest) - 1:

								                        mainsentenceIndex = sortedbiggest[i][0]

								                        lastornot = 1

								                        break


								                # Hier muss noch für den Fall Er, sie und der Beamte LACHTEN den Clown aus --> das lachten abgefangen werden mit der Datenbank der Fälle, sprich enumeration im spo 1 0 0 + plural muss dann zu singular werden abhängig von den artikeln.

								                #print('enumerations', enumerations)

								                mainsentence = enumerations[mainsentenceIndex]

								                #print('main', mainsentence)

								                probablemainsentences = []

								                for i in range(len(enumerations)):

								                    if i != mainsentenceIndex:

								                        iprobablemainsentences = []

								                        probablemainsentence = []

								                        if lastornot == 0:

								                            for j in range(1, len(mainsentence)):

								                                probablemainsentence = mainsentence[0:j] + enumerations[i]

								                                #print(probablemainsentence)

								                                iprobablemainsentences.append(' '.join(probablemainsentence))

								                        if lastornot == 1:

								                            for j in range(1, len(mainsentence)):

								                                probablemainsentence = enumerations[i] + mainsentence[-j:]

								                                iprobablemainsentences.append(' '.join(probablemainsentence))

								                        probablemainsentences.append(iprobablemainsentences)


								                # hier wird auf noch da geprüft, aber es ist wichtiger in diesem fall, dass ein tuple nicht zerissen vorkommt AENDERN !!!!


								                #print('probablemainsentences', probablemainsentences)

								                tuplesToCheck = []

								                tuples = [['ART', 'NN'], ['APPR','NN'], ['ART', 'CARD']]

								                for tupl in tuples:


								                    checktupleindex, tupleInWords = gs.checkForAnnotationTuple(mainsentence, tupl , 'word.tag_', 'None')

								                    if checktupleindex == 2:

								                        tuplesToCheck.append([tupl, tupleInWords])

								                triplesToCheck = []

								                triples = [['ART','ADJA','NN'], ['APPR', 'ART', 'NN'], ['KOKOM', 'ART', 'NN']]

								                for tripl in triples:

								                    checktripleindex, tripleInWords = gs.checkForAnnotationTriple(mainsentence, tripl, 'word.tag_', 'None')

								                    if checktripleindex == 3:

								                        triplesToCheck.append([tripl, tripleInWords])


								                #print('tuples to check', tuplesToCheck)

								                #print('triples to check', triplesToCheck)

								                #print('probablemainsentences', probablemainsentences)

								                for probsentences in probablemainsentences:


								                    checktripleindexes = []

								                    checktupleindexes = []

								                    #print(probsentences)

								                    filteredprobsentences = []

								                    for sentence in probsentences:

								                        tuplchecked = 0

								                        triplchecked = 0

								                        #print('sentence and tuples to check', sentence, tuplesToCheck)

								                        for tupl in tuplesToCheck:


								                            checkedsecondtime, tupleinWords = gs.checkForAnnotationTuple(sentence.split(), tupl[0], 'word.tag_', tupl[1])


								                            #print(sentence, checkedsecondtime)

								                            if checkedsecondtime == 1:


								                                tuplchecked = 0

								                            if checkedsecondtime == 2:


								                                tuplchecked = 1


								                        for tripl in triplesToCheck:

								                            checkedsecondtime, tripleinWords = gs.checkForAnnotationTriple(sentence.split(), tripl[0], 'word.tag_', tripl[1])

								                            if checkedsecondtime == 1 or checkedsecondtime == 2:


								                                triplchecked = 0

								                            if checkedsecondtime == 3:


								                                triplchecked = 1


								                        if triplchecked == 1 or tuplchecked == 1:

								                            filteredprobsentences.append(sentence)


								                    #print('filteredprobsentences', filteredprobsentences)

								                    if len(filteredprobsentences) == 0:

								                        filteredprobsentences = probsentences

								                    # here is still the problem, that there are lists of words instead of proper sentences..

								                    #print('filteredprobsentences', filteredprobsentences)

								                    probsMatrix = sgm.predictprobsOnSentenceList(filteredprobsentences, filteredprobsentences)


								                    #print(probsMatrix)


								                    for i in range(len(probsMatrix)):

								                        probsMatrix[i][0] = i


								                    #print(probsMatrix)


								                    sortedprobsMatrix = sorted(probsMatrix[::-1], key=lambda tup: tup[1], reverse=True)


								                    #print(sortedprobsMatrix)


								                    bestindex = sortedprobsMatrix[0][0]


								                    #print(bestindex)

								                    #print('probablemainsentences', filteredprobsentences)

								                    probablemainsentence = filteredprobsentences[int(bestindex)]

								                    #print('oi', probablemainsentence)


								                    #print('probablemainsentence', probablemainsentence)

								                    enumerationsentences.append([probablemainsentence])


								                enumerationsentences.append([' '.join(mainsentence)])


								                for notenum in NOTenumerations:

								                    #print(enumerationsentences)

								                    #print(enumerationsentences[-1])

								                    #print('enum no1', enumerationsentences)

								                    #print('notenum', notenum)

								                    enumerationsentences[-1].append(' '.join(notenum))

								                    #print('enumsentences',enumerationsentences[-1])

								                    enumerationsentences[-1] = [', '.join(enumerationsentences[-1])]


								            else:

								                enumerationsentences.append([sentence])


								            output.append(enumerationsentences)


								        for n in range(len(output[0])):

								            #print('out',output[0][n])

								            try:

								                output[0][n] = [output[0][n][0].split()]

								            except:

								                output[0][n] = [output[0][n][0][0]]


								        #print('done')

								        return output[0]


								    def GetUtteranceNumber(self, sentences):


								        nlp = self.nlp


								        uttersentences = []


								        for sentence in sentences:


								            doc = nlp(' '.join(sentence[0]))


								            subjectcount = 0


								            for word in doc:


								                if word.dep_ == 'sb' or word.dep_ == 'ep':

								                    subjectcount += 1


								            sentence.append(subjectcount)

								            uttersentences.append(sentence)


								        return uttersentences


								    def GetQuestionOrNot(self, sentences):


								        nlp = self.nlp


								        uttersentences = []

								        questionmark = 0

								        for sentence in sentences:


								            doc = nlp(' '.join(sentence[0]))


								            count = 0

								            for word in doc:


								                count += 1


								                if word.text == '?':

								                    questionmark = 1


								            sentence.append(questionmark)

								            uttersentences.append(sentence)


								        return uttersentences


								    def SplitSentencesIntoHauptNebenTuple(self, sentences, punctuations):


								        oldsplitsentences = []

								        #print('hauptneben inputsentences', sentences)


								        gs = self.gs


								        #print('importing spacy..')

								        import spacy

								        #print('done')


								        nlp = self.nlp


								        outputsentences = []

								        sentencesThatAreOutoutput = []

								        outsentences = []

								        for generalindex in range(len(sentences)):

								            presentence = sentences[generalindex]


								            splitsentence = []

								            splitsentence_deps = []

								            splitsentence_tags = []

								            splitsentences = []

								            splitsentences_deps = []

								            splitsentences_tags = []

								            commainfo = presentence[1]

								            outputsentence = []


								            token = commainfo[1]


								            commaornot = commainfo[0]


								            numberutterances = presentence[2]


								            sentence = presentence[0]


								            oldsentence = presentence[0]


								            #print(commaornot)

								            if commaornot >= 2:

								                #print('nla')


								                sentence[0] = sentence[0].title()


								                doc = nlp(' '.join(sentence))


								                for word in doc:


								                    #print(word.text)


								                    if word.text != ',' and word.text != '.':


								                        splitsentence.append(word.text)

								                        splitsentence_deps.append(word.dep_)

								                        splitsentence_tags.append(word.tag_)


								                    if word.text == ',':


								                        #print('oi')


								                        splitsentences.append(splitsentence)

								                        splitsentences_deps.append(splitsentence_deps)

								                        splitsentences_tags.append(splitsentence_tags)

								                        splitsentence = []

								                        splitsentence_deps = []

								                        splitsentence_tags = []


								                splitsentences.append(splitsentence)

								                splitsentences[0][0] = splitsentences[0][0].lower()

								                splitsentences_deps.append(splitsentence_deps)

								                splitsentences_tags.append(splitsentence_tags)

								                oldsplitsentences = splitsentences

								                #print(splitsentences)

								                #print(splitsentences_tags)

								                #print(splitsentences_deps)

								                spo = []


								                for n in range(len(splitsentences)):

								                    prespo = []

								                    prespo = gs.checkSPO(splitsentences_deps[n], 1)

								                    prespo.append( gs.checkForAnnotation(splitsentences[n], 'VVINF', 'word.tag_'))

								                    prespo.append(gs.checkForAnnotation(splitsentences[n], 'VAFIN', 'word.tag_'))

								                    prespo.append(gs.checkForAnnotation(splitsentences[n], 'VVFIN', 'word.tag_'))

								                    prespo.append(gs.checkForAnnotation(splitsentences[n], 'VMFIN', 'word.tag_'))


								                    spo.append(prespo)

								                #print(splitsentences_deps)

								                #print(splitsentences)

								                #print(spo)


								                indexSPO = []

								                lastm = len(splitsentences)

								                for o in range(len(splitsentences)):


								                    m = len(splitsentences) - 1 - o

								                    for n in range(len(splitsentences)):


								                        if m < n - 1 and n < lastm:


								                            #print('spo s',spo[m], spo[n])

								                            sb = spo[m][0] + spo[n][0]

								                            Vafin = 1

								                            if spo[m][3] == 1 or spo[n][3] == 1:

								                                Vafin = spo[m][3] + spo[n][3]

								                            Vvinf = 1

								                            if spo[m][4] == 1 or spo[n][4] == 1:

								                                Vvinf = spo[m][4] + spo[n][4]

								                            Vvfin = 1

								                            if spo[m][5] == 1 or spo[n][5] == 1:

								                                Vvfin = spo[m][5] + spo[n][5]

								                            Vmfin = 1

								                            if spo[m][6] == 1 or spo[n][6] == 1:

								                                Vmfin == spo[m][6] + spo[n][6]

								                            #wrapped = 0

								                            #for n in range(len(indexSPO)):

								                                #if n == indexSPO[n][0] + 1 and n == indexSPO[n][1] - 1:

								                                    #wrapped = 1

								                            #print(sb, Vafin, Vvinf, Vvfin, Vmfin, 'm n', m, n)

								                            if sb == 1 and Vafin == 1 and Vvinf == 1 and (Vvfin == 1 or Vmfin == 1):


								                                indexSPO.append([m,n])

								                                #print([m,n])

								                                lastm = m

								                                #print('lastm',lastm)


								                #print(splitsentences)

								                Hauptsentences = []

								                for n in range(len(indexSPO)):

								                    if indexSPO[n][0] > indexSPO[n][1]:

								                        i = 1

								                        j = 0

								                    else:

								                        i = 0

								                        j = 1

								                    Hauptsentences.append([splitsentences[indexSPO[n][i]] + splitsentences[indexSPO[n][j]] , indexSPO[n][i], indexSPO[n][j] ])


								                HauptSentences = []

								                for n in range(len(Hauptsentences)):

								                    m = len(Hauptsentences) - 1 - n

								                    HauptSentences.append(Hauptsentences[m])


								                #print('Hauptsentences', Hauptsentences)

								                #print('HauptSentences', HauptSentences)

								                sentencesThatAreOut =[]


								                for n in range(len(HauptSentences)):

								                    index = HauptSentences[n][1]

								                    finish = 0

								                    #print('Oi',HauptSentences[n])

								                    if n == len(HauptSentences) - 1:


								                        #print('lenHauptsentences', len(HauptSentences))


								                        stopindex = len(splitsentences)

								                        finish = 1

								                    else:

								                        stopindex = HauptSentences[n + 1][1]

								                    #print('stopindex', stopindex)

								                    vvfinisthere = 0

								                    if finish == 0:

								                        if splitsentences_tags[stopindex][0] == 'VVFIN':

								                            stopindex -= 1

								                            vvfinisthere = 1


								                    if splitsentences_tags[index][0] == 'VVFIN':

								                        vvfinisthere = 1


								                    if vvfinisthere == 1:


								                        HNTuple = HauptSentences[n][0] + [','] + splitsentences[index - 1]

								                        outputsentence.append(HNTuple)

								                        sentencesThatAreOut.append(index - 1)

								                        sentencesThatAreOut.append(Hauptsentences[n][1])

								                        sentencesThatAreOut.append(Hauptsentences[n][2])


								                    for m in range(index + 1, stopindex ):

								                        if m != HauptSentences[n][2]:

								                            HNTuple = HauptSentences[n][0] + [','] + splitsentences[m]

								                            #print('check', HauptSentences[n], n)

								                            #print('check', splitsentences[m], m)

								                            #print('double', HNTuple)

								                            outputsentence.append(HNTuple)


								                            sentencesThatAreOut.append(m)

								                            sentencesThatAreOut.append(Hauptsentences[n][1])

								                            sentencesThatAreOut.append(Hauptsentences[n][2])


								                sentencesThatAreOutoutput.append(sentencesThatAreOut)


								                cpOrNots = []

								                rcOrNots = []

								                for splitsentence in splitsentences_deps:

								                    cpOrNot = gs.checkForAnnotationInTokenizedSentence(splitsentence, 'cp')

								                    cpOrNots.append(cpOrNot)

								                    rcOrNot = gs.checkForAnnotationInTokenizedSentence(splitsentence, 'rc')

								                    rcOrNots.append(rcOrNot)


								                #print('Laenge splitsentences', len(splitsentences))

								                #print('laenge cpOrNots', len(cpOrNots))

								                #print(cpOrNots)

								                #print('rc or nots', rcOrNots)

								                pairs = []

								                for n in range(len(cpOrNots)):

								                    index = len(cpOrNots) - 1 - n

								                    done = 0

								                    if rcOrNots[index] == 1:

								                        pairs.append([index, index - 1])

								                        done = 1


								                    if done == 0 and cpOrNots[index] == 1:

								                        try:

								                            if splitsentences_tags[index + 1][0] == 'VVFIN':

								                                pairs.append([index, index + 1])

								                                done = 1

								                        except:

								                            pass

								                        try:

								                            if done == 0 and rcOrNots[index - 1] == 0:

								                                pairs.append([index, index - 1])

								                                done = 1

								                        except:

								                            pass

								                        try:

								                            if done == 0 and rcOrNots[index - 1] == 1:

								                                if rcOrNots[index - 2] == 0:

								                                    pairs.append([index, index - 2])

								                        except:

								                            pass


								                for pair in pairs[::-1]:

								                    if pair[0] not in set(sentencesThatAreOut) or pair[1] not in set(sentencesThatAreOut):

								                        outputsentence.append(splitsentences[pair[1]] + [','] + splitsentences[pair[0]])

								                #print('hnhn',sentences)

								                sentences[generalindex][0] = outputsentence


								                #print('outputsentence hntuple',outputsentence)

								                #outputsentences.append([outputsentence , i])


								            #print('Oio', outputsentences)

								            #print(sentencesThatAreOutoutput)

								            #print(splitsentences)

								            #print('oioioioioioioio',sentences)


								            #print(sentences[0][0])


								            #print('oioi',sentences[n])

								            #print('malatesta', sentences[n][0][0])

								            #print('generalindex sentences index 0', sentences[generalindex][0])

								            try:

								                if type(sentences[generalindex][0][0]) == str:

								                    sentences[generalindex][0] = [sentences[generalindex][0]]

								            except:

								                pass

								            #print('generalindex sentences index 0', sentences[generalindex][0])

								            #print('oldsentence', oldsentence)

								            newgeneratedsentences = len(sentences[generalindex][0])

								            if newgeneratedsentences > 1:

								                #print('goti t')

								                for sentence in sentences[generalindex][0]:

								                    punctuations.insert(generalindex, punctuations[generalindex])

								                    outsentences.append(sentence)

								                del punctuations[generalindex]

								            if newgeneratedsentences == 1:

								                if len(sentences[generalindex][0][0]) > 1:

								                    outsentences.append(sentences[generalindex][0][0])

								                else:

								                    outsentences.append(oldsentence)

								            if newgeneratedsentences == 0:

								                #print('case oldsentence', oldsentence)

								                outsentences.append(oldsentence)

								            #print('oioi', sentences[n])

								        # connect alonestanding commatas with the word before

								        #print('theoutsentences', outsentences)

								        for outsentence in outsentences:

								            todelete = []

								            for n in range(len(outsentence)):

								                if outsentence[n] == ',':

								                    todelete.append(n)

								                    outsentence[n-1] = outsentence[n-1] + ','

								            for deleteindex in todelete[::-1]:

								                del outsentence[deleteindex]


								        for index in range(len(outsentences)):

								            outsentences[index] = [outsentences[index]]

								        #print('theoutsentences', outsentences)


								        #removing doubles

								        doubledsentences = []

								        for o in range(len(outsentences)):

								            sentence = outsentences[o][0]

								            for m in range(len(outsentences)):

								                if m != o:

								                    count = 0

								                    for n in range(len(sentence)):

								                        if sentence[n] in outsentences[m][0] or sentence[n][:-1] in outsentences[m][0]:

								                            count += 1

								                        if count == len(sentence):

								                            doubledsentences.append(sentence)

								        punctdeleteindex = []

								        tmp = set()

								        for sentence in doubledsentences:

								            tmp.add(tuple(sentence))

								        #print(list(tmp))

								        doubledsentences = []

								        for tup in tmp:

								            doubledsentences.append([list(tup)])

								        #print('doubledsentences',doubledsentences)

								        punctdeleteindexes = []

								        for double in doubledsentences:

								            if double in outsentences:

								                punctdeleteindex = outsentences[::-1].index(double)

								                del outsentences[len(outsentences) - 1 - punctdeleteindex]

								                punctdeleteindexes.append(punctdeleteindex)


								        for index in punctdeleteindexes[::-1]:

								            del punctuations[len(outsentences) - 1 - index]


								        #print('oldsplit',oldsplitsentences)

								        #print('outsents',outsentences)


								        for o in range(len(oldsplitsentences)):

								            for m in range(len(outsentences)):

								                counter = 0

								                for n in range(len(oldsplitsentences[o])):

								                    if oldsplitsentences[o][n] in outsentences[m][0] or oldsplitsentences[o][n] + ',' in outsentences[m][0]:

								                        counter += 1

								                if counter >= len(oldsplitsentences[o]):

								                    break

								                if m == len(outsentences) - 1 and counter < len(oldsplitsentences[o]):

								                    if o == 0:

								                        outsentences.insert(0,[oldsplitsentences[o]])

								                        punctuations.insert(0, punctuations[0])

								                    else:

								                        newones = []

								                        for i in range(len(outsentences)):

								                            if outsentences[i][0][-1] == oldsplitsentences[o - 1][-1]:

								                                if len(outsentences[i][0]) > 2 and len(oldsplitsentences[o - 1]) > 2:

								                                    if outsentences[i][0][-2] == oldsplitsentences[o - 1][-2]:

								                                        if outsentences[i][0][-3] == oldsplitsentences[o - 1][-3]:

								                                            newones.append([i + 1, [oldsplitsentences[o]]])

								                        for newone in newones[::-1]:

								                            #print(newones)

								                            outsentences.insert(newone[0], newone[1])

								                            punctuations.insert(newone[0], punctuations[newone[0] - 1])


								        #print('outsentences at the very end ', outsentences, punctuations)

								        return outsentences, punctuations


								    # Notiz: Hier muss der Input immer Paare sein, von Hauptsatz/Nebensatz. D.h. eine weitere vorgeschaltete Klasse ist von Nöten.


								    def SplitCommatas(self, Inputsentences, punctuations):


								        gs = self.gs


								        nlp = self.nlp


								        gramcorr_splitsentences = []

								        counter = 0

								        newpunctuationsindex = []

								        for Inputsentence in Inputsentences:


								            counter += 1


								            commainfo = Inputsentence[1]


								            token = commainfo[1]


								            commaornot = commainfo[0]


								            numberutterances = Inputsentence[2]


								            if commaornot == 0:

								                gramcorr_splitsentences.append(Inputsentence[0])


								            if commaornot > 1:

								                gramcorr_splitsentences.append(Inputsentence[0])


								            if commaornot == 1:

								                oldsentence = Inputsentence[0]

								                Inputsentence = [[Inputsentence[0]]]


								                for sentence in Inputsentence[0]:


								                    splitsentence = []


								                    splitsentences = []


								                    processed = 0

								                    wasNotInAnyList = 0

								                    try:

								                        for n in range(len(token)):


								                            if token[n] in self.final_list:

								                                splitsentence = []

								                                for word in sentence:


								                                    if word != token[n]:


								                                        if word[-1] == ',':

								                                            splitsentence.append(word[:-1])


								                                        if word[-1] != ',':

								                                            splitsentence.append(word)


								                                    if word[-1] == ',' or word == ',':


								                                        splitsentences.append(splitsentence)


								                                        splitsentence = []


								                                splitsentences.append(splitsentence)


								                                if n == 1:


								                                    if token[n] == 'um' or token[n] == 'Um':


								                                        splitsentences[n].insert(0,'dies')

								                                        splitsentences[n].insert(0,'um')

								                                    else:

								                                        splitsentences[n].insert(0,'dann')


								                                if n == 0:


								                                    if token[n] == 'um' or token[n] == 'Um':

								                                        splitsentences[n].insert(0,'dies')

								                                        splitsentences[n].insert(0,'um')

								                                        splitsentences = splitsentences[::-1]

								                                    else:

								                                        splitsentences[n].insert(0,'dann')


								                                    splitsentences[0], splitsentences[1] = splitsentences[1] , splitsentences[0]


								                                generalrules = [['ADV','VAFIN'], ['ADV', 'VVFIN']]

								                                processed = 1


								                            if token[n] in self.adversativ_list:

								                                splitsentence = []

								                                for word in sentence:


								                                    if word != token[n]:


								                                        if word[-1] == ',':

								                                            splitsentence.append(word[:-1])

								                                        if word == ',':

								                                            pass

								                                        if word[-1] != ',':

								                                            splitsentence.append(word)


								                                    if word[-1] == ',' or word == ',':


								                                        splitsentences.append(splitsentence)


								                                        splitsentence = []


								                                splitsentences.append(splitsentence)


								                                splitsentences[n].append('jedoch')


								                                generalrules = [['ADV','VAFIN'], ['ADV', 'VVFIN']]

								                                processed = 1


								                            if token[n] in self.kausal_list:

								                                splitsentence = []

								                                for word in sentence:


								                                    if word != token[n]:


								                                        if word[-1] == ',':

								                                            splitsentence.append(word[:-1])

								                                        if word == ',':

								                                            pass

								                                        if word[-1] != ',':

								                                            splitsentence.append(word)


								                                    if word[-1] == ',' or word == ',':


								                                        splitsentences.append(splitsentence)


								                                        splitsentence = []


								                                splitsentences.append(splitsentence)


								                                # Da deswegen an den anderen Satz gehaengt wird, muss der input zu commasentences immer ZWEI sentences sein.

								                                #print('splitsentences in kausal', splitsentences)

								                                if n == 1:

								                                    splitsentences[n - 1].insert(0,'deswegen')

								                                    splitsentences[0], splitsentences[1] = splitsentences[1] , splitsentences[0]


								                                if n == 0:

								                                    splitsentences[n + 1].insert(0,'deswegen')


								                                #print('splitsentences in kausal', splitsentences)


								                                generalrules = [['PROAV','VAFIN'], ['PROAV', 'VVFIN']]

								                                processed = 1


								                            # from here come konsekutiv sentences, they have to be split according https://www.deutschplus.net/pages/Konsekutivsatz

								                            if token[n] in self.konsekutiv_list:

								                                #print('oi konsekutiv')

								                                splitsentence = []

								                                for word in sentence:


								                                    if word != token[n]:


								                                        if word[-1] == ',':

								                                            splitsentence.append(word[:-1])

								                                        if word == ',':

								                                            pass

								                                        if word[-1] != ',':

								                                            splitsentence.append(word)


								                                    if word[-1] == ',' or word == ',':


								                                        splitsentences.append(splitsentence)


								                                        splitsentence = []


								                                splitsentences.append(splitsentence)


								                                generalrules = [['KOUS','PPER']]

								                                processed = 1


								                            if token[n] in self.konditional_list:

								                                splitsentence = []

								                                for word in sentence:


								                                    if word[-1] == ',':

								                                        splitsentence.append(word[:-1])

								                                    if word == ',':

								                                        pass

								                                    if word[-1] != ',':

								                                        splitsentence.append(word)


								                                    if word[-1] == ',' or word == ',':


								                                        splitsentences.append(splitsentence)


								                                        splitsentence = []


								                                splitsentences.append(splitsentence)


								                                if n == 1:


								                                    spoCount = gs.checkSPO(splitsentences[n], 0)


								                                    spoCount = sum(spoCount)


								                                    if spoCount == 2:

								                                        thereisanes = 0

								                                        for word in splitsentences[n]:

								                                            if word == 'es' or word == 'Es':

								                                                thereisanes = 1

								                                        if thereisanes == 0:

								                                            splitsentences[n].append('es')


								                                if n == 0:


								                                    spoCount = gs.checkSPO(splitsentences[n], 0)


								                                    spoCount = sum(spoCount)


								                                    if spoCount == 2:


								                                        thereisanes = 0

								                                        for word in splitsentences[n]:

								                                            if word == 'es' or word == 'Es':

								                                                thereisanes = 1

								                                        if thereisanes == 0:

								                                            splitsentences[n].append('es')


								                                    splitsentences[0], splitsentences[1] = splitsentences[1] , splitsentences[0]


								                                generalrules = [['KOUS','PPER']]

								                                processed = 1


								                            if token[n] in self.konzessiv_list:

								                                splitsentence = []

								                                for word in sentence:


								                                    if word != token[n]:


								                                        if word[-1] == ',':

								                                            splitsentence.append(word[:-1])

								                                        if word == ',':

								                                            pass

								                                        if word[-1] != ',':

								                                            splitsentence.append(word)


								                                    if word[-1] == ',' or word == ',':


								                                        splitsentences.append(splitsentence)


								                                        splitsentence = []


								                                splitsentences.append(splitsentence)


								                                if n == 1:

								                                    splitsentences[n - 1].insert(0,'trotzdem')

								                                    splitsentences[0], splitsentences[1] = splitsentences[1] , splitsentences[0]


								                                if n == 0:

								                                    splitsentences[n + 1].insert(0,'trotzdem')


								                                generalrules = [['PROAV','VAFIN'], ['PROAV', 'VVFIN']]

								                                processed = 1


								                            if token[n] in self.lokal_list:

								                                #print('lokal ole ole ')

								                                splitsentence = []

								                                for word in sentence:


								                                    if word != token[n]:


								                                        if word[-1] == ',':

								                                            splitsentence.append(word[:-1])

								                                        if word == ',':

								                                            pass

								                                        if word[-1] != ',':

								                                            splitsentence.append(word)


								                                    if word[-1] == ',' or word == ',':


								                                        splitsentences.append(splitsentence)


								                                        splitsentence = []


								                                splitsentences.append(splitsentence)


								                                if n == 1:

								                                    splitsentences[n - 1].insert(0,'dort')

								                                    splitsentences[0], splitsentences[1] = splitsentences[1] , splitsentences[0]


								                                if n == 0:

								                                    splitsentences[n + 1].insert(0,'dort')


								                                generalrules = [['PROAV','VAFIN'], ['PROAV', 'VVFIN']]

								                                processed = 1


								                            if token[n] in self.instrumental_list:

								                                splitsentence = []

								                                for word in sentence:


								                                    if word != token[n]:


								                                        if word[-1] == ',':

								                                            splitsentence.append(word[:-1])

								                                        if word == ',':

								                                            pass

								                                        if word[-1] != ',':

								                                            splitsentence.append(word)


								                                    if word[-1] == ',' or word == ',':


								                                        splitsentences.append(splitsentence)


								                                        splitsentence = []


								                                splitsentences.append(splitsentence)


								                                if n == 1:

								                                    splitsentences[n - 1].insert(0,'so')

								                                    splitsentences[0], splitsentences[1] = splitsentences[1] , splitsentences[0]


								                                if n == 0:

								                                    splitsentences[n + 1].insert(0,'so')


								                                generalrules = [['ADV','VAFIN'], ['ADV', 'VVFIN']]

								                                processed = 1


								                            if token[n] in self.temporal_list_vor:

								                                splitsentence = []

								                                for word in sentence:


								                                    if word != token[n]:


								                                        if word[-1] == ',':

								                                            splitsentence.append(word[:-1])

								                                        if word == ',':

								                                            pass

								                                        if word[-1] != ',':

								                                            splitsentence.append(word)


								                                    if word[-1] == ',' or word == ',':


								                                        splitsentences.append(splitsentence)


								                                        splitsentence = []


								                                splitsentences.append(splitsentence)


								                                if n == 1:

								                                    splitsentences[n].insert(0,'danach')


								                                if n == 0:

								                                    splitsentences[n].insert(0,'danach')

								                                    splitsentences[0], splitsentences[1] = splitsentences[1] , splitsentences[0]


								                                generalrules = [['PROAV','VAFIN'], ['PROAV', 'VVFIN']]

								                                processed = 1


								                            if token[n] in self.temporal_list_nach:

								                                splitsentence = []

								                                for word in sentence:


								                                    if word != token[n]:


								                                        if word[-1] == ',':

								                                            splitsentence.append(word[:-1])

								                                        if word == ',':

								                                            pass

								                                        if word[-1] != ',':

								                                            splitsentence.append(word)


								                                    if word[-1] == ',' or word == ',':


								                                        splitsentences.append(splitsentence)


								                                        splitsentence = []


								                                splitsentences.append(splitsentence)


								                                if n == 1:

								                                    splitsentences[n].insert(0,'davor')


								                                if n == 0:

								                                    splitsentences[n].insert(0,'davor')

								                                    splitsentences[0], splitsentences[1] = splitsentences[1] , splitsentences[0]


								                                generalrules = [['PROAV','VAFIN'], ['PROAV', 'VVFIN']]

								                                processed = 1


								                            #print(token[n])

								                            if token[n] == 'der' or token[n] == 'welcher':


								                                tokens = self.nlp(' '.join(sentence))

								                                for word in tokens:

								                                    if word.dep_ == 'rc':

								                                        wordwithrc = word.text


								                                rcORnot = gs.checkForAnnotation(sentence, 'rc', 'word.dep_')


								                                oldsplitsentences = splitsentences

								                                splitsentences = []


								                                if rcORnot == 1:

								                                    splitsentence = []

								                                    for word in sentence:


								                                        if word[-1] == ',':

								                                            splitsentence.append(word[:-1])

								                                        if word == ',':

								                                            pass

								                                        if word[-1] != ',':

								                                            splitsentence.append(word)


								                                        if word[-1] == ',' or word == ',':


								                                            splitsentences.append(splitsentence)


								                                            splitsentence = []


								                                    splitsentences.append(splitsentence)


								                                    # das umtauschen wird hier vollzogen, da ansonsten spacy dieser nicht als PDS einliest.. analog in den anderen.


								                                    if wordwithrc in splitsentences[n]:


								                                        splitsentences[n][0] = 'dieser'


								                                        verb = splitsentences[n][-1]


								                                        splitsentences[n] = splitsentences[n][:-1]

								                                        splitsentences[n].insert(1, verb)


								                                        #print('Vorsicht', splitsentences)


								                                        generalrules = [['PDS','VAFIN'], ['PDS', 'VVFIN']]

								                                        processed = 1

								                                    else:

								                                        splitsentences = oldsplitsentences

								                                        splitsentence = []


								                            if token[n] == 'die' or token[n] == 'welche':


								                                tokens = self.nlp(' '.join(sentence))

								                                for word in tokens:

								                                    if word.dep_ == 'rc':

								                                        wordwithrc = word.text


								                                rcORnot = gs.checkForAnnotation(sentence, 'rc', 'word.dep_')


								                                oldsplitsentences = splitsentences

								                                splitsentences = []


								                                if rcORnot == 1:

								                                    #print('it went to rcornot in case die')


								                                    splitsentence = []

								                                    for word in sentence:


								                                        if word[-1] == ',':

								                                            splitsentence.append(word[:-1])

								                                        if word == ',':

								                                            pass

								                                        if word[-1] != ',':

								                                            splitsentence.append(word)


								                                        if word[-1] == ',' or word == ',':


								                                            splitsentences.append(splitsentence)


								                                            splitsentence = []


								                                    splitsentences.append(splitsentence)


								                                    if wordwithrc in splitsentences[n]:

								                                        #print('wordwithrc was in sentence')

								                                        #print(wordwithrc)

								                                        #print(splitsentences[n])

								                                        #print('wordwithrcend')

								                                        splitsentences[n][0] = 'diese'


								                                        verb = splitsentences[n][-1]


								                                        splitsentences[n] = splitsentences[n][:-1]

								                                        splitsentences[n].insert(1, verb)


								                                        generalrules = [['PDS','VAFIN'], ['PDS', 'VVFIN']]

								                                        processed = 1

								                                    else:


								                                        splitsentences = oldsplitsentences

								                                        splitsentence = []


								                            if token[n] == 'dem':


								                                tokens = self.nlp(' '.join(sentence))

								                                for word in tokens:

								                                    if word.dep_ == 'rc':

								                                        wordwithrc = word.text


								                                rcORnot = gs.checkForAnnotation(sentence, 'rc', 'word.dep_')


								                                oldsplitsentences = splitsentences

								                                splitsentences = []


								                                if rcORnot == 1:

								                                    splitsentence = []

								                                    for word in sentence:


								                                        if word[-1] == ',':

								                                            splitsentence.append(word[:-1])

								                                        if word == ',':

								                                            pass

								                                        if word[-1] != ',' and word[-1] != '.':

								                                            splitsentence.append(word)


								                                        if word[-1] == ',':


								                                            splitsentences.append(splitsentence)


								                                            splitsentence = []


								                                    splitsentences.append(splitsentence)


								                                    if wordwithrc in splitsentences[n]:


								                                        splitsentences[n][0] = 'diesem'


								                                        verb = splitsentences[n][-1]


								                                        splitsentences[n] = splitsentences[n][:-1]

								                                        splitsentences[n].insert(1, verb)


								                                        generalrules = [['PDS','VAFIN'], ['PDS', 'VVFIN']]

								                                        processed = 1

								                                    else:

								                                        splitsentences = oldsplitsentences

								                                        splitsentence = []


								                            if token[n] == 'das' or token[n] == 'welches':


								                                tokens = self.nlp(' '.join(sentence))

								                                for word in tokens:

								                                    if word.dep_ == 'rc':

								                                        wordwithrc = word.text


								                                rcORnot = gs.checkForAnnotation(sentence, 'rc', 'word.dep_')


								                                #print('Oeeee',rcORnot)

								                                oldsplitsentences = splitsentences

								                                splitsentences = []

								                                if rcORnot == 1:

								                                    splitsentence = []

								                                    for word in sentence:


								                                        if word[-1] == ',':

								                                            splitsentence.append(word[:-1])

								                                        if word == ',':

								                                            pass

								                                        if word[-1] != ',':

								                                            splitsentence.append(word)


								                                        if word[-1] == ',' or word == ',':


								                                            splitsentences.append(splitsentence)


								                                            splitsentence = []


								                                    splitsentences.append(splitsentence)

								                                    #print('splitsentence in das rc', splitsentences)

								                                    if wordwithrc in splitsentences[n]:


								                                        splitsentences[n][0] = 'dieses'


								                                        verb = splitsentences[n][-1]

								                                        #print('verb',verb)

								                                        splitsentences[n] = splitsentences[n][:-1]

								                                        splitsentences[n].insert(1, verb)


								                                        generalrules = [['PDS','VAFIN'], ['PDS', 'VVFIN']]

								                                        processed = 1

								                                    else:

								                                        splitsentences = oldsplitsentences

								                                        splitsentence = []


								                            if token[n] == 'dessen' or token[n] == 'wessen':


								                                tokens = self.nlp(' '.join(sentence))

								                                for word in tokens:

								                                    if word.dep_ == 'rc':

								                                        wordwithrc = word.text


								                                rcORnot = gs.checkForAnnotation(sentence, 'rc', 'word.dep_')


								                                oldsplitsentences = splitsentences

								                                splitsentences = []


								                                if rcORnot == 1:

								                                    splitsentence = []

								                                    for word in sentence:


								                                        if word[-1] == ',':

								                                            splitsentence.append(word[:-1])

								                                        if word == ',':

								                                            pass

								                                        if word[-1] != ',':

								                                            splitsentence.append(word)


								                                        if word[-1] == ',' or word == ',':


								                                            splitsentences.append(splitsentence)


								                                            splitsentence = []


								                                    splitsentences.append(splitsentence)


								                                    if wordwithrc in splitsentences[n]:

								                                        verb = splitsentences[n][-1]


								                                        splitsentences[n] = splitsentences[n][:-1]

								                                        splitsentences[n].insert(1, verb)


								                                        generalrules = [['PDS','VAFIN'], ['PDS', 'VVFIN']]

								                                        processed = 1

								                                    else:

								                                        splitsentences = oldsplitsentences

								                                        splitsentence = []


								                            if token[n] == 'den' or token[n] == 'welchen':


								                                tokens = self.nlp(' '.join(sentence))

								                                for word in tokens:

								                                    if word.dep_ == 'rc':

								                                        wordwithrc = word.text


								                                rcORnot = gs.checkForAnnotation(sentence, 'rc', 'word.dep_')


								                                oldsplitsentences = splitsentences

								                                splitsentences = []


								                                if rcORnot == 1:

								                                    splitsentence = []

								                                    for word in sentence:


								                                        if word[-1] == ',':

								                                            splitsentence.append(word[:-1])

								                                        if word == ',':

								                                            pass

								                                        if word[-1] != ',':

								                                            splitsentence.append(word)


								                                        if word[-1] == ',' or word == ',':


								                                            splitsentences.append(splitsentence)


								                                            splitsentence = []


								                                    splitsentences.append(splitsentence)


								                                    if wordwithrc in splitsentences[n]:


								                                        splitsentences[n][0] = 'diesen'


								                                        verb = splitsentences[n][-1]


								                                        splitsentences[n] = splitsentences[n][:-1]

								                                        splitsentences[n].insert(1, verb)


								                                        generalrules = [['PDS','VAFIN'], ['PDS', 'VVFIN']]

								                                        processed = 1

								                                    else:

								                                        splitsentences = oldsplitsentences

								                                        splitsentence = []


								                            if token[n] == 'wem' or token[n] == 'Wem' or token[n] == 'welchem':


								                                daORnot = gs.checkForAnnotation(sentence, 'da', 'word.dep_')


								                                oaORnot = gs.checkForAnnotation(sentence, 'oa', 'word.dep_')


								                                reORnot = gs.checkForAnnotation(sentence, 're', 'word.dep_')


								                                oldsplitsentences = splitsentences

								                                splitsentences = []


								                                for word in sentence:


								                                    if word[-1] == ',':

								                                        splitsentence.append(word[:-1])

								                                    if word == ',':

								                                        pass

								                                    if word[-1] != ',':

								                                        splitsentence.append(word)


								                                    if word[-1] == ',' or word == ',':


								                                        splitsentences.append(splitsentence)


								                                        splitsentence = []


								                                splitsentences.append(splitsentence)


								                                if n == 0:

								                                    index = 1

								                                if n == 1:

								                                    index = 0


								                                if reORnot == 1:

								                                    pass

								                                if daORnot == 1 and reORnot == 0:

								                                    splitsentences[index].insert(1, 'das')


								                                if oaORnot == 1 and reORnot == 0:

								                                    splitsentences[index].insert(1, 'dem')


								                                if n == 1:

								                                    splitsentences[0], splitsentences[1] = splitsentences[1] , splitsentences[0]


								                                generalrules = [['PDS','VAFIN'], ['PDS', 'VVFIN']]

								                                processed = 1


								                            if token[n] in self.indirectspeech_list and token[1] not in self.konsekutiv_list:


								                                reORnot = gs.checkForAnnotation(sentence, 're', 'word.dep_')

								                                oldsplitsentences = splitsentences

								                                splitsentences = []

								                                splitsentence = []

								                                for word in sentence:


								                                    if word[-1] == ',':

								                                        splitsentence.append(word[:-1])

								                                    if word == ',':

								                                        pass

								                                    if word[-1] != ',':

								                                        splitsentence.append(word)


								                                    if word[-1] == ',' or word == ',':


								                                        splitsentences.append(splitsentence)


								                                        splitsentence = []


								                                splitsentences.append(splitsentence)


								                                if n == 0:

								                                    index = 1

								                                if n == 1:

								                                    index = 0


								                                if reORnot == 0:

								                                    if splitsentences[index][0] != 'was':

								                                        splitsentences[index].insert(1, 'das')


								                                if n == 1:

								                                    splitsentences[0], splitsentences[1] = splitsentences[1] , splitsentences[0]


								                                generalrules = [['PDS','VAFIN'], ['PDS', 'VVFIN']]

								                                processed = 1


								                            if processed == 0 and n == 1:


								                                ZUVINFTupelORnot = gs.checkForAnnotationTuple(sentence, ['PTKZU', 'VVINF'], 'word.tag_', 'None')

								                                if ZUVINFTupelORnot == 0:

								                                    ZUVINFTupelORnot = gs.checkForAnnotationTuple(sentence, ['PTKZU', 'VAINF'], 'word.tag_', 'None')


								                                if ZUVINFTupelORnot == 1:


								                                    reORnot = gs.checkForAnnotation(sentence, 're', 'word.dep_')

								                                    splitsentence = []

								                                    for word in sentence:


								                                        if word[-1] == ',':

								                                            splitsentence.append(word[:-1])

								                                        if word == ',':

								                                            pass

								                                        if word[-1] != ',' :

								                                            splitsentence.append(word)


								                                        if word[-1] == ',' or word == ',':


								                                            splitsentences.append(splitsentence)

								                                            processed = 1

								                                            splitsentence = []


								                                    splitsentences.append(splitsentence)


								                                    for m in range(2):

								                                        ZUINForNOT = gs.checkForAnnotationTuple(splitsentences[m], ['PTKZU', 'VVINF'], 'word.tag_','None')


								                                        if ZUINForNOT == 0:

								                                            ZUINForNOT = gs.checkForAnnotationTuple(splitsentences[m], ['PTKZU', 'VAINF'], 'word.tag_','None')


								                                        if ZUINForNOT == 1:

								                                            r = m

								                                            ZUINForNOT = 0


								                                    if r == 0:

								                                        index = 1

								                                    if r == 1:

								                                        index = 0


								                                    objectORnot = gs.checkForAnnotation(splitsentences[index] , 'oa', 'word.dep_')


								                                    if reORnot == 0 and objectORnot == 0:

								                                        splitsentences[index].insert(1, 'das')


								                                    if r == 1:

								                                        splitsentences[0], splitsentences[1] = splitsentences[1] , splitsentences[0]

								                                else:

								                                    processed == 2


								                    except:

								                        wasNotInAnyList = 1


								                    #rules = [['ART','ADJA','NN'], ['ART','ADJA','NE'], ['ART', 'NN'], ['ART', 'NE'], ['APPR','NN'], ['APPR','NE'], ['APPR', 'ART', 'NN'], ['APPR', 'ART', 'NE'], ['APPR','ART','NN','ADJA','NN'], ['APPR','ART','NN','ADJA','NE'], ['KOKOM', 'ART', 'NN'], ['KOKOM', 'ART', 'NE'], ['PPOSAT', 'NN'], ['PPOSAT', 'NE'], ['ADV', 'ADJD']]


								                    #print('B',splitsentences)

								                    endsentences = []

								                    if (processed == 2 or processed == 0) and n == 1:

								                        wasNotInAnyList = 1


								                    try:

								                        if wasNotInAnyList == 0:

								                            newpunctuationsindex.insert(0,[counter-1,punctuations[counter-1]])

								                            #print('splitsentencee', splitsentences)

								                            if len(splitsentences) > 2:

								                                splitsentences = splitsentences[:2]


								                            #print('splitsentenceeeees', splitsentences)


								                            for splitsentence in splitsentences:


								                                #print('splitsentenceeeeeeeeeeee!!',splitsentence)

								                                wordtoputfirst = 'nada'

								                                for word in self.firstwordlist:

								                                    if word == splitsentence[0]:

								                                        wordtoputfirst = word

								                                        splitsentence.remove(word)


								                                #print('get the tuples and triples to check..')

								                                tuplesTocheck, triplesTocheck, quadruplesTocheck = self.gs.GetTuplesinSentence(splitsentence)

								                                #print('done')

								                                #print(tuplesTocheck, 'ole', triplesTocheck ,'aiai', quadruplesTocheck)

								                                #print('1')

								                                grammpiecessentence = self.gs.createTupleofGrammarpieces( splitsentence, tuplesTocheck, triplesTocheck, quadruplesTocheck)


								                                #print('grammpiece',grammpiecessentence)

								                                #print('2')

								                                if len(grammpiecessentence) > 7:

								                                    print('A sentence is too long, too many permutations. \n piping wrong grammar..')

								                                    endsentence = ' '.join(grammpiecessentence)


								                                else:

								                                    #print('genrating the permutations')

								                                    permutations = self.sgm.GeneratePermutationsOfSentence(grammpiecessentence)

								                                    #print('done')

								                                    #print(permutations)

								                                    #print('3')

								                                    firstwordwithverblist = ['deswegen', 'danach']

								                                    permutationstodelete = []

								                                    for permutation in permutations:

								                                        #print('4')

								                                        if permutation[0] in firstwordwithverblist:

								                                            #print('4.1')

								                                            count = 1

								                                            for word in self.nlp(permutation[1]):

								                                                #print('4.2')

								                                                if word.tag_[0] != 'V':

								                                                    #print('4.3')

								                                                    permutationstodelete.append(permutation)

								                                                    break

								                                                else:

								                                                    break

								                                            #for word in self.nlp(permutation[0]):

								                                                #print('4.2')

								                                                #if word.tag_[0] != 'V':

								                                                    #print('4.3')

								                                                    #permutationstodelete.append(permutation)

								                                                    #break

								                                                #else:

								                                                    #break

								                                    for delperm in permutationstodelete:

								                                        try:

								                                            permutations.remove(delperm)

								                                        except:


								                                            pass

								                                    #print('5')


								                                    sentencesToCheck = []

								                                    if wordtoputfirst in self.firstwordlist:

								                                        for sentence in permutations:

								                                            sentencesToCheck.append(wordtoputfirst + ' ' + ' '.join(sentence))

								                                    else:

								                                        for sentence in permutations:

								                                            sentencesToCheck.append(' '.join(sentence))


								                                    endsentence = self.sgm.GetBestSentenceFromSentencesAccordingToGrammar(sentencesToCheck, ' '.join(splitsentence))

								                                    #print('done')

								                                    #print('endsent',endsentence)

								                                endsentences.append(endsentence)

								                    except:

								                        #print('there was an error')

								                        wasNotInAnyList = 1

								                        endsentences = []

								                        todelete = []

								                        for index in range(len(newpunctuationsindex)):

								                            if newpunctuationsindex[index][0] == counter - 1:

								                                todelete.append(index)

								                        for todel in todelete[::-1]:

								                            del newpunctuationsindex[todel]


								                    if wasNotInAnyList == 1:

								                        #print('was not in any list')

								                        #print(oldsentence)

								                        endsplisentences = []

								                        splisentence = []

								                        for word in oldsentence:


								                            if word[-1] == ',':

								                                splisentence.append(word[:-1])


								                            if word == ',':

								                                pass

								                            if word[-1] != ',':

								                                splisentence.append(word)


								                            if word[-1] == ',' or word == ',':


								                                endsplisentences.append(splisentence)


								                                splisentence = []


								                        endsplisentences.append(splisentence)


								                        newpunctuationsindex.insert(0,[counter-1,punctuations[counter-1]])


								                        #print('endsplisentences',endsplisentences)

								                        for splsentence in endsplisentences:


								                            endsentences.append(' '.join(splsentence))


								                        '''


								                        fsearch1 = self.fsearch1

								                        spacyclass1 = 'word.tag_'


								                        gs_sentence1 = gs.Sentence2GrammarSchema(' '.join(splitsentence), spacyclass1)


								                        print('searchPatternMatch for tags')

								                        bestmatches1 = fsearch1.searchPatternMatch(' '.join(gs_sentence1), 1)

								                        print('done')


								                        #print('oioi', bestmatches1)


								                        #print(len(fsearch1.database))

								                        right_gs_tupel1 = []


								                        if len(bestmatches1) < 10:

								                            bestndocs1 = len(bestmatches1)

								                        else:

								                            bestndocs1 = 10


								                        for m in range(bestndocs1):

								                            right_gs_tupel1.append(fsearch1.database[bestmatches1[m][0]])


								                        statistically_correct_sentences1 = gs.Sentence2RightGrammarTupel(' '.join(splitsentence), gs_sentence1, right_gs_tupel1)


								                        fsearch2 = self.fsearch2


								                        spacyclass2 = 'word.dep_'


								                        gs_sentence2 = gs.Sentence2GrammarSchema(' '.join(splitsentence), spacyclass2)


								                        print('searchPatternMatch for deps')

								                        bestmatches2 = fsearch2.searchPatternMatch(' '.join(gs_sentence2), 1)

								                        print('done')


								                        right_gs_tupel2 = []


								                        if len(bestmatches2) < 10:

								                            bestndocs2 = len(bestmatches2)

								                        else:

								                            bestndocs2 = 10


								                        for m in range(bestndocs2):

								                            right_gs_tupel2.append(fsearch2.database[bestmatches2[m][0]])


								                        #print(' '.join(splitsentence))


								                        statistically_correct_sentences2 = gs.Sentence2RightGrammarTupel(' '.join(splitsentence), gs_sentence2, right_gs_tupel2)


								                        print(splitsentence)


								                        Rightsentence = gs.GetBestgsAccordingRules(' '.join(splitsentence) , gs_sentence1, right_gs_tupel1, right_gs_tupel2, statistically_correct_sentences1, statistically_correct_sentences2, rules, generalrules)


								                        '''

								                    for endsentence in endsentences:

								                        gramcorr_splitsentences.append(endsentence.split())


								        for index in newpunctuationsindex:

								            punctuations.insert(index[0], index[1])


								        return gramcorr_splitsentences, punctuations


								    def putAppendixesIntoOwnSentences(self, sentences, punctuations):


								        gs = self.gs

								        #triples = [['NN', 'ART', 'NN'], ['NE', 'ART', 'NN'], ['NN', 'ART', 'NN'], ['NE', 'ART', 'NE']]

								        quadruples = [['NN', 'APPR', 'NE', 'NN'], ['NN', 'APPR', 'NE', 'NN'], ['NN', 'APPR', 'ART', 'NN'], ['NE', 'APPR', 'ART', 'NN'], ['NN', 'APPR', 'ART', 'NE'], ['NE', 'APPR', 'ART', 'NE']]

								        quadruplestochange = []

								        triplestochange = []

								        newsentences = []

								        newpunctuations = []

								        Whatisofnouns = []

								        oldsentences = sentences

								        oldpunctuations = punctuations

								        for hauptindex in range(len(sentences)):


								            sentence = sentences[hauptindex]

								            try:

								                #for triple in triples:

								                #    AnnoOrNot, tripleInWords = gs.checkForAnnotationTriple(sentence, triple, 'word.tag_', 'None')

								                #    for tripleinwor in tripleInWords:

								                #        triplestochange.append([triple, tripleinwor])


								                for quadruple in quadruples:

								                    AnnoOrNot, quadrupleInWords = gs.checkForAnnotationQuadruple(sentence, quadruple, 'word.tag_', 'None')

								                    #print('quadinwords', quadrupleInWords)

								                    #print('ANNOORNOT', AnnoOrNot)

								                    for quadrupleInWo in quadrupleInWords:

								                        quadruplestochange.append([quadruple, quadrupleInWo])


								                #print('quadstochange',quadruplestochange)

								                for quad in quadruplestochange:

								                    for n in range(len(sentence) - 4):

								                        if sentence[n] == quad[1][0]:

								                            if sentence[n + 1] == quad[1][1]:

								                                if sentence[n + 2] == quad[1][2]:

								                                    artword = None

								                                    longerWhatisnoun = 0

								                                    for m in range(2):

								                                        for word in self.nlp(sentence[n - m]):

								                                            if word.tag_ == 'ART':

								                                                Nounthatis = sentence[n - m:n + 1]

								                                                import spacy

								                                                nlp = spacy.load('de_core_news_sm')

								                                                token3 = nlp(sentence[n+4])

								                                                counter = 0

								                                                Whatisnoun = sentence[n + 1:n + 4]

								                                                for wor in token3:

								                                                    counter += 1

								                                                    if wor.tag_ == 'NN' or wor.tag_ == 'NE':

								                                                        if counter == 1:

								                                                            Whatisnoun = sentence[n + 1:n + 5]

								                                                            longerWhatisnoun = 1

								                                                        if counter == 2:

								                                                            Whatisnoun = sentence[n + 1:n + 4]


								                                                artword = word.text

								                                    #print(sentence[n - 1],'oi')

								                                    if ((artword == 'die' or artword == 'Die') and sentence[n][-1] != 'n') or ((artword == 'der' or artword == 'einer' or artword == 'dieser') and (sentence[n - 2] in ['von', 'in', 'auf', 'ueber', 'unter', 'nach', 'mit'])):


								                                        if artword == 'der':

								                                            Nounthatis[0] = 'die'


								                                        donothing = 0

								                                        if sentence[n + 1] == 'mit':

								                                            if sentence[n + 2] == 'den':

								                                                verb = ' hat die '

								                                                Whatisnoun = Whatisnoun[2:]

								                                            if sentence[n + 2] == 'der':

								                                                verb = ' hat eine '

								                                                Whatisnoun = Whatisnoun[2:]

								                                            if sentence[n + 2] != 'der' and sentence[n + 2] != 'den':

								                                                donothing = 1

								                                        else:

								                                            verb = ' ist '

								                                        if donothing == 0:

								                                            newsentence = ' '.join(Nounthatis) + verb + ' '.join(Whatisnoun)


								                                            newsentences.append([hauptindex + 1, newsentence.split()])

								                                            newpunctuations.append([hauptindex + 1, punctuations[hauptindex]])

								                                            if longerWhatisnoun == 0:

								                                                Whatisofnouns.append([n + 1, n + 4, hauptindex])

								                                            else:

								                                                Whatisofnouns.append([n + 1, n + 5, hauptindex])

								            except:

								                print('Konnte nicht ' + str(sentence) + 'in Characterisierung pro Satz prozessieren..')

								        try:

								            for whatis in Whatisofnouns[::-1]:

								                thereisacomma = 0

								                #print(sentences[whatis[2]][whatis[1] - 1])

								                if sentences[whatis[2]][whatis[1] - 1][-1] == ',':


								                    thereisacomma = 1

								                if thereisacomma == 1:

								                    #print(sentences[whatis[2]][whatis[0] - 1])

								                    sentences[whatis[2]][whatis[0] - 1] = sentences[whatis[2]][whatis[0] - 1] + ','

								                del sentences[whatis[2]][whatis[0]:whatis[1]]

								            for newsent in newsentences[::-1]:

								                sentences.insert(newsent[0], newsent[1])

								            for newpunct in newpunctuations[::-1]:

								                punctuations.insert(newpunct[0], newpunct[1])

								            for sentence in sentences:

								                if sentence[-1][-1] == ',':

								                    sentence[-1] = sentence[-1][:-1]

								        except:

								            print('konnte nicht die gesammelten Characterisierungen prozessieren')

								            sentences = oldsentences

								            punctuations = oldpunctuations


								        return sentences, punctuations