further cleaning of repo
This commit is contained in:
parent
b4db610e8b
commit
924781e2f9
41 changed files with 0 additions and 77164 deletions
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
|
@ -1,306 +0,0 @@
|
|||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Hier wird die Bibliothek ConjunctSolve und deren Funktionen importiert. Anschließend wird die Klasse initialisiert."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from ConjunctSolve import *\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"cs = ConjunctSolve(None,None)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"1000\n",
|
||||
"2000\n",
|
||||
"3000\n",
|
||||
"4000\n",
|
||||
"5000\n",
|
||||
"6000\n",
|
||||
"7000\n",
|
||||
"8000\n",
|
||||
"9000\n",
|
||||
"10000\n",
|
||||
"11000\n",
|
||||
"12000\n",
|
||||
"13000\n",
|
||||
"14000\n",
|
||||
"15000\n",
|
||||
"16000\n",
|
||||
"17000\n",
|
||||
"18000\n",
|
||||
"19000\n",
|
||||
"20000\n",
|
||||
"21000\n",
|
||||
"22000\n",
|
||||
"23000\n",
|
||||
"24000\n",
|
||||
"25000\n",
|
||||
"26000\n",
|
||||
"27000\n",
|
||||
"28000\n",
|
||||
"29000\n",
|
||||
"30000\n",
|
||||
"31000\n",
|
||||
"32000\n",
|
||||
"33000\n",
|
||||
"34000\n",
|
||||
"35000\n",
|
||||
"36000\n",
|
||||
"37000\n",
|
||||
"38000\n",
|
||||
"39000\n",
|
||||
"40000\n",
|
||||
"41000\n",
|
||||
"42000\n",
|
||||
"43000\n",
|
||||
"44000\n",
|
||||
"45000\n",
|
||||
"46000\n",
|
||||
"47000\n",
|
||||
"48000\n",
|
||||
"49000\n",
|
||||
"50000\n",
|
||||
"51000\n",
|
||||
"52000\n",
|
||||
"53000\n",
|
||||
"54000\n",
|
||||
"55000\n",
|
||||
"56000\n",
|
||||
"57000\n",
|
||||
"58000\n",
|
||||
"59000\n",
|
||||
"60000\n",
|
||||
"61000\n",
|
||||
"62000\n",
|
||||
"63000\n",
|
||||
"64000\n",
|
||||
"65000\n",
|
||||
"66000\n",
|
||||
"67000\n",
|
||||
"68000\n",
|
||||
"69000\n",
|
||||
"70000\n",
|
||||
"71000\n",
|
||||
"72000\n",
|
||||
"73000\n",
|
||||
"74000\n",
|
||||
"75000\n",
|
||||
"76000\n",
|
||||
"77000\n",
|
||||
"78000\n",
|
||||
"79000\n",
|
||||
"80000\n",
|
||||
"81000\n",
|
||||
"82000\n",
|
||||
"83000\n",
|
||||
"84000\n",
|
||||
"85000\n",
|
||||
"86000\n",
|
||||
"87000\n",
|
||||
"88000\n",
|
||||
"89000\n",
|
||||
"90000\n",
|
||||
"91000\n",
|
||||
"92000\n",
|
||||
"93000\n",
|
||||
"94000\n",
|
||||
"95000\n",
|
||||
"96000\n",
|
||||
"97000\n",
|
||||
"98000\n",
|
||||
"99000\n",
|
||||
"100000\n",
|
||||
"101000\n",
|
||||
"102000\n",
|
||||
"103000\n",
|
||||
"104000\n",
|
||||
"105000\n",
|
||||
"106000\n",
|
||||
"107000\n",
|
||||
"108000\n",
|
||||
"109000\n",
|
||||
"110000\n",
|
||||
"111000\n",
|
||||
"112000\n",
|
||||
"113000\n",
|
||||
"114000\n",
|
||||
"115000\n",
|
||||
"116000\n",
|
||||
"117000\n",
|
||||
"118000\n",
|
||||
"119000\n",
|
||||
"120000\n",
|
||||
"121000\n",
|
||||
"122000\n",
|
||||
"123000\n",
|
||||
"124000\n",
|
||||
"125000\n",
|
||||
"126000\n",
|
||||
"127000\n",
|
||||
"128000\n",
|
||||
"creating the hkl dump of Indi_ConjuDBAll\n",
|
||||
"done..\n",
|
||||
"Creating the hkl dump of Indi_ConjuDB 1\n",
|
||||
"Creating the hkl dump of Indi_ConjuDB 2\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'done'"
|
||||
]
|
||||
},
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"cs.create_hklDB_from_csv('Indikativ_Conjunktiv.txt', 'None')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Nun werden die Datenbanken in den Arbeitsspeicher geladen"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Creating the bag of words...\n",
|
||||
"\n",
|
||||
"dumping the data to hkl format..\n",
|
||||
"done\n",
|
||||
"Creating the bag of words...\n",
|
||||
"\n",
|
||||
"dumping the data to hkl format..\n",
|
||||
"done\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"cs.load_DB_into_FASTsearch()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "raw",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"oi ist eine Klasse mit nur zwei Funktionen. Das Einlesen und schreiben von Textdateien. Die Funktion ReadDoc2Sent liest ein Textdokument ein. Der Output sind die Sätze in Listen geschrieben \n",
|
||||
"( eine Liste in python hat die Form [ 'Das', 'ist', 'ein', 'Satz.' ] )."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from oi import *\n",
|
||||
"oi = oi()\n",
|
||||
"\n",
|
||||
"sentences, punctuations = oi.ReadDoc2Sent('atest1')\n",
|
||||
"print(sentences, punctuations)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "raw",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Die Funktion replaceConjunctives wird nun auf die Liste aus Satzlisten angewendet. Die Variable outsentences ist auch wieder eine Liste."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"ename": "TypeError",
|
||||
"evalue": "replaceConjunctives() takes 2 positional arguments but 3 were given",
|
||||
"output_type": "error",
|
||||
"traceback": [
|
||||
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
|
||||
"\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)",
|
||||
"\u001b[0;32m<ipython-input-7-154bf4b4e943>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0moutsentences\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcs\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mreplaceConjunctives\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0msentences\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mpunctuations\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
|
||||
"\u001b[0;31mTypeError\u001b[0m: replaceConjunctives() takes 2 positional arguments but 3 were given"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"outsentences = cs.replaceConjunctives(sentences, punctuations)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "raw",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Abschließend wird nun die Satzliste mit den ausgetauschten Konjunktiven in die Datei 'atest1out' geschrieben."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"oi.PrintSplitSentencesToTextFile(punctuations, outsentences, 'atest1out')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.5.3"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
|
@ -1,128 +0,0 @@
|
|||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from GenitivSolve import *\n",
|
||||
"\n",
|
||||
"from oi import *\n",
|
||||
"\n",
|
||||
"oi = oi()\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"loading the german spacy model..\n",
|
||||
"done\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"gs = GenitivSolve()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"sentences, punctuations = oi.ReadDoc2Sent('test1')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"processing sentence 1\n",
|
||||
"processing sentence 2\n",
|
||||
"processing sentence 3\n",
|
||||
"processing sentence 4\n",
|
||||
"processing sentence 5\n",
|
||||
"processing sentence 6\n",
|
||||
"processing sentence 7\n",
|
||||
"processing sentence 8\n",
|
||||
"processing sentence 9\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"outsentences = gs.ReplaceGenitivWithDativ(sentences)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"metadata": {
|
||||
"scrolled": true
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"['Hallo', 'was', 'geht', 'denn', 'hier', 'so']\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"ename": "TypeError",
|
||||
"evalue": "Can't convert 'list' object to str implicitly",
|
||||
"output_type": "error",
|
||||
"traceback": [
|
||||
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
|
||||
"\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)",
|
||||
"\u001b[0;32m<ipython-input-6-431ebe52fd8b>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0moi\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mPrintSplitSentencesToTextFile\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0moutsentences\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mpunctuations\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'test1out'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
|
||||
"\u001b[0;32m~/ProjektA/LeichteSprache/code200110/Prototyp/oi.py\u001b[0m in \u001b[0;36mPrintSplitSentencesToTextFile\u001b[0;34m(self, punctuations, sentences, document)\u001b[0m\n\u001b[1;32m 60\u001b[0m \u001b[0mpunctuation\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mpunctuations\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mn\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 61\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 62\u001b[0;31m \u001b[0mdoc\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mwrite\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtowrite\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0mpunctuation\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0;34m'\\n'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 63\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 64\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
|
||||
"\u001b[0;31mTypeError\u001b[0m: Can't convert 'list' object to str implicitly"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"oi.PrintSplitSentencesToTextFile(punctuations, outsentences, 'test1out')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.5.3"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
|
@ -1,325 +0,0 @@
|
|||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "raw",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Alle Funktionen der Klasse Passiv2Aktiv werden importiert"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from Passiv2Aktiv import *\n",
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "raw",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Nun wird die Klasse initialisiert"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"metadata": {
|
||||
"scrolled": true
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"p2a = Passiv2Aktiv(None, None, None)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "raw",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Die nötigen Datenbanken werden in den Arbeitsspeicher der Session geladen. Hier zeigt sich auch die Stärke des Jupyter Notebooks, da der code einmal ausgeführt werden muss, und dann die folgenden Codezeilen neuausgeführt werden können, ohne das die Datenbanken neu eingeladen werden müssen. Das heißt, die Datenbanken können die ganze Zeit im Arbeitsspeicher ruhen, und trotzdem kann neuer Text atest1 geschrieben werden. Dieser wird dann neu eingeladen mit der oi Klasse und anschließend werden die Passivformen der neuen Sätze ausgetauscht."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"p2a.create_hklDB_from_csv('Aktiv.txt', 'None')\n",
|
||||
"p2a.create_hklDB_from_csv('Vorgangspassiv.txt', 'None')\n",
|
||||
"p2a.create_hklDB_from_csv('Zustandspassiv.txt', 'None')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"/home/alpcentaur/ProjektA/LeichteSprache/code200110/SentSeg/venv/lib/python3.5/site-packages/sklearn/base.py:251: UserWarning: Trying to unpickle estimator CountVectorizer from version 0.22.2 when using version 0.20.0. This might lead to breaking code or invalid results. Use at your own risk.\n",
|
||||
" UserWarning)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"loading spacy..\n",
|
||||
"done\n",
|
||||
"loading vectorizer..\n",
|
||||
"done\n",
|
||||
"loading the SGD model..\n",
|
||||
"done\n",
|
||||
"loading spacy..\n",
|
||||
"done\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"p2a.load_DB_into_FASTsearch()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from oi import *\n",
|
||||
"oi = oi()\n",
|
||||
"\n",
|
||||
"sentences, punctuations = oi.ReadDoc2Sent('atest1')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"verbs of sentence string gehe 4\n",
|
||||
"1\n",
|
||||
"[40072, 0.0]\n",
|
||||
"[40072, 0.0]\n",
|
||||
"[['werden verheddert haben 23'], ['3. Person Plural Futur II verheddern']]\n",
|
||||
"[['werden verheddert worden sein 29'], ['3. Person Plural Futur II verheddern']]\n",
|
||||
"[['werden verheddert gewesen sein 30'], ['3. Person Plural Futur II verheddern']]\n",
|
||||
"verbs of sentence string habe verdammt 13\n",
|
||||
"2\n",
|
||||
"[39974, 1.0]\n",
|
||||
"[39913, 1.0]\n",
|
||||
"[['platztest rein 14'], ['2. Person Singular Präteritum reinplatzen']]\n",
|
||||
"[['reinplatztest 13'], ['2. Person Singular Präteritum reinplatzen']]\n",
|
||||
"[['waren verhext 13'], ['3. Person Plural Präteritum verhexen']]\n",
|
||||
"verbs of sentence string gibt 4\n",
|
||||
"1\n",
|
||||
"[40072, 0.0]\n",
|
||||
"[40072, 0.0]\n",
|
||||
"[['werden verheddert haben 23'], ['3. Person Plural Futur II verheddern']]\n",
|
||||
"[['werden verheddert worden sein 29'], ['3. Person Plural Futur II verheddern']]\n",
|
||||
"[['werden verheddert gewesen sein 30'], ['3. Person Plural Futur II verheddern']]\n",
|
||||
"verbs of sentence string wird 4\n",
|
||||
"1\n",
|
||||
"[40069, 1.0]\n",
|
||||
"[40069, 1.0]\n",
|
||||
"[['wird verheddert haben 21'], ['3. Person Singular Futur II verheddern']]\n",
|
||||
"[['wird verheddert worden sein 27'], ['3. Person Singular Futur II verheddern']]\n",
|
||||
"[['wird verheddert gewesen sein 28'], ['3. Person Singular Futur II verheddern']]\n",
|
||||
"verbs of sentence string gingen regnete 14\n",
|
||||
"2\n",
|
||||
"[39913, 1.0]\n",
|
||||
"[40045, 1.0]\n",
|
||||
"[['verhexten 9'], ['3. Person Plural Präteritum verhexen']]\n",
|
||||
"[['wurden verhext 14'], ['3. Person Plural Präteritum verhexen']]\n",
|
||||
"[['war verheddert 14'], ['3. Person Singular Präteritum verheddern']]\n",
|
||||
"verbs of sentence string war angemalt funktionierte 26\n",
|
||||
"3\n",
|
||||
"[37394, 2.0]\n",
|
||||
"[38744, 2.0]\n",
|
||||
"[['hatte dahingeschleppt 21'], ['3. Person Singular Plusquamperfekt dahinschleppen']]\n",
|
||||
"[['war dahingeschleppt worden 26'], ['3. Person Singular Plusquamperfekt dahinschleppen']]\n",
|
||||
"[['war inventarisiert gewesen 26'], ['3. Person Singular Plusquamperfekt inventarisieren']]\n",
|
||||
"verbs of sentence string wurde geliebt 13\n",
|
||||
"2\n",
|
||||
"[2, 3.0]\n",
|
||||
"[39975, 1.0]\n",
|
||||
"[['liebte 6'], ['3. Person Singular Präteritum lieben']]\n",
|
||||
"[['wurde geliebt 13'], ['3. Person Singular Präteritum lieben']]\n",
|
||||
"[['wurde reingeplatzt 18'], ['3. Person Singular Präteritum reinplatzen']]\n",
|
||||
"subjectofsentence ['Er']\n",
|
||||
"there is a subjecter 1\n",
|
||||
"get the tuples and triples to check..\n",
|
||||
"done\n",
|
||||
"['ihn', 'liebte', 'jemand']\n",
|
||||
"genrating the permutations\n",
|
||||
"done\n",
|
||||
"classifying the probability for right grammar in the filtered permutations..\n",
|
||||
"jemand liebte ihn\n",
|
||||
"done\n",
|
||||
"verbs of sentence string habe 4\n",
|
||||
"1\n",
|
||||
"[40072, 0.0]\n",
|
||||
"[40072, 0.0]\n",
|
||||
"[['werden verheddert haben 23'], ['3. Person Plural Futur II verheddern']]\n",
|
||||
"[['werden verheddert worden sein 29'], ['3. Person Plural Futur II verheddern']]\n",
|
||||
"[['werden verheddert gewesen sein 30'], ['3. Person Plural Futur II verheddern']]\n",
|
||||
"verbs of sentence string war 3\n",
|
||||
"1\n",
|
||||
"[40057, 1.0]\n",
|
||||
"[40057, 1.0]\n",
|
||||
"[['hatte verheddert 16'], ['3. Person Singular Plusquamperfekt verheddern']]\n",
|
||||
"[['war verheddert worden 21'], ['3. Person Singular Plusquamperfekt verheddern']]\n",
|
||||
"[['war verheddert gewesen 22'], ['3. Person Singular Plusquamperfekt verheddern']]\n",
|
||||
"verbs of sentence string 0\n",
|
||||
"0\n",
|
||||
"[40072, 0.0]\n",
|
||||
"[40072, 0.0]\n",
|
||||
"[['werden verheddert haben 23'], ['3. Person Plural Futur II verheddern']]\n",
|
||||
"[['werden verheddert worden sein 29'], ['3. Person Plural Futur II verheddern']]\n",
|
||||
"[['werden verheddert gewesen sein 30'], ['3. Person Plural Futur II verheddern']]\n",
|
||||
"verbs of sentence string ist 3\n",
|
||||
"1\n",
|
||||
"[40051, 1.0]\n",
|
||||
"[40051, 1.0]\n",
|
||||
"[['hat verheddert 14'], ['3. Person Singular Perfekt verheddern']]\n",
|
||||
"[['ist verheddert worden 21'], ['3. Person Singular Perfekt verheddern']]\n",
|
||||
"[['ist verheddert gewesen 22'], ['3. Person Singular Perfekt verheddern']]\n",
|
||||
"verbs of sentence string 0\n",
|
||||
"0\n",
|
||||
"[40072, 0.0]\n",
|
||||
"[40072, 0.0]\n",
|
||||
"[['werden verheddert haben 23'], ['3. Person Plural Futur II verheddern']]\n",
|
||||
"[['werden verheddert worden sein 29'], ['3. Person Plural Futur II verheddern']]\n",
|
||||
"[['werden verheddert gewesen sein 30'], ['3. Person Plural Futur II verheddern']]\n",
|
||||
"verbs of sentence string 0\n",
|
||||
"0\n",
|
||||
"[40072, 0.0]\n",
|
||||
"[40072, 0.0]\n",
|
||||
"[['werden verheddert haben 23'], ['3. Person Plural Futur II verheddern']]\n",
|
||||
"[['werden verheddert worden sein 29'], ['3. Person Plural Futur II verheddern']]\n",
|
||||
"[['werden verheddert gewesen sein 30'], ['3. Person Plural Futur II verheddern']]\n",
|
||||
"verbs of sentence string ginge 5\n",
|
||||
"1\n",
|
||||
"[40072, 0.0]\n",
|
||||
"[40072, 0.0]\n",
|
||||
"[['werden verheddert haben 23'], ['3. Person Plural Futur II verheddern']]\n",
|
||||
"[['werden verheddert worden sein 29'], ['3. Person Plural Futur II verheddern']]\n",
|
||||
"[['werden verheddert gewesen sein 30'], ['3. Person Plural Futur II verheddern']]\n",
|
||||
"verbs of sentence string lieben 6\n",
|
||||
"1\n",
|
||||
"[40072, 0.0]\n",
|
||||
"[40072, 0.0]\n",
|
||||
"[['werden verheddert haben 23'], ['3. Person Plural Futur II verheddern']]\n",
|
||||
"[['werden verheddert worden sein 29'], ['3. Person Plural Futur II verheddern']]\n",
|
||||
"[['werden verheddert gewesen sein 30'], ['3. Person Plural Futur II verheddern']]\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"outsentences = p2a.replacePassivForms(sentences)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"[['Hallo', 'was', 'gehe', 'denn', 'hier', 'so'], ['Ich', 'habe', 'echt', 'keine', 'Ahnung', 'verdammt'], ['I.', 'd.', 'R.', 'gibt', 'es', 'keine', 'Abschiebungen'], ['Ende', 'd.', 'J.', 'wird', 'alles', 'problematisch'], ['Sie', 'gingen', 'nach', 'Hause,', 'weil', 'es', 'in', 'Strömen', 'regnete'], ['Heute', 'war', 'die', 'Straße', 'blau', 'angemalt,', 'damit', 'der', 'Marathon', 'funktionierte'], ['ihn', 'liebte', 'jemand'], ['Er', 'habe', 'es', 'sehr', 'schwer'], ['Es', 'war', 'die', 'Hose', 'des', 'Gauners'], ['Bliblablub'], ['Sie', 'ist', 'nicht', 'schön', 'heute'], ['Oleoleole'], ['Mannoman'], ['Er', 'ginge', 'nicht', 'schnell'], ['Die', 'Hühner', 'lieben', 'sich', 'nicht']]\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print(outsentences)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
".\n",
|
||||
".\n",
|
||||
".\n",
|
||||
".\n",
|
||||
".\n",
|
||||
".\n",
|
||||
".\n",
|
||||
".\n",
|
||||
".\n",
|
||||
".\n",
|
||||
".\n",
|
||||
".\n",
|
||||
".\n",
|
||||
".\n",
|
||||
".\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'OK'"
|
||||
]
|
||||
},
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
" oi.PrintSplitSentencesToTextFile(punctuations, outsentences, 'atest1out')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.5.3"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
|
@ -1,251 +0,0 @@
|
|||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "raw",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
" Zuerst wird die Klasse SayYes importiert und initialisiert"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"loading the german spacy model..\n",
|
||||
"done\n",
|
||||
"loading the stemmer..\n",
|
||||
"done\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from SayYes import *\n",
|
||||
"\n",
|
||||
"sy = SayYes(None, None)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "raw",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Nun werden die Datenbanken in den Arbeitsspeicher geladen."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Loading the hklDB1..\n",
|
||||
"done\n",
|
||||
"Loading the hklDB2\n",
|
||||
"done\n",
|
||||
"loading hkldbOpposites 1..\n",
|
||||
"done\n",
|
||||
"loading hkldbOpposites 2..\n",
|
||||
"done\n",
|
||||
"loading the bow model 1\n",
|
||||
"done\n",
|
||||
"loading the bow model 2\n",
|
||||
"done\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"\n",
|
||||
"\n",
|
||||
"#sy.create_hklDB_from_csv('Gegenwoerter.csv')\n",
|
||||
"\n",
|
||||
"#print(sy.hkldbOpposites1)\n",
|
||||
"\n",
|
||||
"sy.load_DB_into_FASTsearch()\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "raw",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Hier wird die Datei atest1 eingeladen, Output der Funktion ReadDoc2Sent sind die Sätze in Listen. "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from oi import *\n",
|
||||
"oi = oi()\n",
|
||||
"sentences, punctuations = oi.ReadDoc2Sent('atest1')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "raw",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Hier werden nun die Sätze nach den Wörtern 'nicht' oder 'kein/keine' durchsucht, diesess dann entfernt und das entsprechende Wort durch dessen Gegenteil ersetzt."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"wenigstens etwas\n",
|
||||
"oloa\n",
|
||||
"processing sentence 1\n",
|
||||
"ola\n",
|
||||
"oloa\n",
|
||||
"processing sentence 2\n",
|
||||
"ola\n",
|
||||
"thetheone\n",
|
||||
"oloa\n",
|
||||
"processing sentence 3\n",
|
||||
"ola\n",
|
||||
"thetheone\n",
|
||||
"oloa\n",
|
||||
"processing sentence 4\n",
|
||||
"ola\n",
|
||||
"oloa\n",
|
||||
"processing sentence 5\n",
|
||||
"ola\n",
|
||||
"oloa\n",
|
||||
"processing sentence 6\n",
|
||||
"ola\n",
|
||||
"oloa\n",
|
||||
"processing sentence 7\n",
|
||||
"ola\n",
|
||||
"oloa\n",
|
||||
"processing sentence 8\n",
|
||||
"ola\n",
|
||||
"oloa\n",
|
||||
"processing sentence 9\n",
|
||||
"ola\n",
|
||||
"oloa\n",
|
||||
"processing sentence 10\n",
|
||||
"ola\n",
|
||||
"thetheone\n",
|
||||
"theone\n",
|
||||
"1\n",
|
||||
"2\n",
|
||||
"2\n",
|
||||
"oloa\n",
|
||||
"processing sentence 11\n",
|
||||
"ola\n",
|
||||
"oloa\n",
|
||||
"processing sentence 12\n",
|
||||
"ola\n",
|
||||
"oloa\n",
|
||||
"processing sentence 13\n",
|
||||
"ola\n",
|
||||
"thetheone\n",
|
||||
"theone\n",
|
||||
"1\n",
|
||||
"2\n",
|
||||
"2\n",
|
||||
"oloa\n",
|
||||
"processing sentence 14\n",
|
||||
"ola\n",
|
||||
"thetheone\n",
|
||||
"theone\n",
|
||||
"1\n",
|
||||
"4\n",
|
||||
"2\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"outsentences = sy.replaceOpposites(sentences)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "raw",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Das Ergebnis der vorherigen Funktion wird in der folgenden Zeile in das Dokument atest1out geschrieben."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
".\n",
|
||||
".\n",
|
||||
".\n",
|
||||
".\n",
|
||||
".\n",
|
||||
".\n",
|
||||
".\n",
|
||||
".\n",
|
||||
".\n",
|
||||
".\n",
|
||||
".\n",
|
||||
".\n",
|
||||
".\n",
|
||||
".\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'OK'"
|
||||
]
|
||||
},
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"\n",
|
||||
"oi.PrintSplitSentencesToTextFile(punctuations, outsentences, 'atest1out')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.5.3"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 4
|
||||
}
|
|
@ -1,264 +0,0 @@
|
|||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import SentSeg\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"[[['Hallo', 'was', 'gehe', 'denn', 'hier', 'so.']], [['Ich', 'habe', 'echt', 'keine', 'Ahnung', 'verdammt.']], [['I.', 'd.', 'R.', 'gibt', 'es', 'keine', 'Abschiebungen.']], [['Ende', 'd.', 'J.', 'wird', 'alles', 'problematisch.']], [['Sie', 'gingen', 'nach', 'Hause,', 'weil', 'es', 'in', 'Strömen', 'regnete.']], [['Heute', 'war', 'die', 'Straße', 'blau', 'angemalt,', 'damit', 'der', 'Marathon', 'funktionierte.']], [['Er', 'habe', 'es', 'sehr', 'schwer.']], [['Es', 'war', 'die', 'Hose', 'des', 'Gauners.']], [['Bliblablub.']], [['Sie', 'ist', 'nicht', 'schön', 'heute.']], [['Oleoleole.']], [['Mannoman.']], [['Er', 'ginge', 'nicht', 'schnell.']], [['Die', 'Hühner', 'lieben', 'sich', 'nicht.']]]\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"sent_seg = SentSeg.SentSeg('de')\n",
|
||||
"\n",
|
||||
" \n",
|
||||
"sentences = sent_seg.ReadDoc2Sent('atest1')\n",
|
||||
"print(sentences)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"initializing the gs utils..\n",
|
||||
"loading spacy..\n",
|
||||
"done\n",
|
||||
"done\n",
|
||||
"loading the Stochastic Gradient models..\n",
|
||||
"done\n",
|
||||
"initializing the SGM..\n",
|
||||
"loading vectorizer..\n",
|
||||
"done\n",
|
||||
"loading the SGD model..\n",
|
||||
"done\n",
|
||||
"loading spacy..\n",
|
||||
"done\n",
|
||||
"done\n",
|
||||
"importing spacy..\n",
|
||||
"done\n",
|
||||
"importing german model..\n",
|
||||
"done\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'done'"
|
||||
]
|
||||
},
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"sent_seg.LoadSentGlueSGDandGSUtils()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"creating array of comma or not..\n",
|
||||
"done\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"sentences = sent_seg.CommaSentenceOrNot(sentences)\n",
|
||||
"print(sentences)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"sentences = sent_seg.GetUtteranceNumber(sentences)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"sentences = sent_seg.GetQuestionOrNot(sentences)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"importing spacy..\n",
|
||||
"done\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"sentences1 = sent_seg.SplitSentencesIntoHauptNebenTuple(sentences)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"['es', 'regnete', 'in Strömen']\n",
|
||||
"['deswegen', 'Sie', 'gingen', 'nach Hause']\n",
|
||||
"['Heute', 'war', 'blau', 'angemalt', 'die Straße']\n",
|
||||
"100\n",
|
||||
"['dann', 'funktionierte', 'der Marathon']\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"outsentences = sent_seg.SplitCommatas(sentences1)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 18,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"[['Hallo', 'was', 'gehe', 'denn', 'hier', 'so'], ['Ich', 'habe', 'echt', 'keine', 'Ahnung', 'verdammt'], ['I.', 'd.', 'R.', 'gibt', 'es', 'keine', 'Abschiebungen'], ['Ende', 'd.', 'J.', 'wird', 'alles', 'problematisch'], ['in', 'Strömen', 'regnete', 'e'], ['deswegen', 'gingen', 'Sie', 'nach', 'Haus'], ['angemalt', 'war', 'die', 'Straße', 'blau', 'Heut'], ['dann', 'der', 'Marathon', 'funktioniert'], ['Er', 'habe', 'es', 'sehr', 'schwer'], ['Es', 'war', 'die', 'Hose', 'des', 'Gauners'], ['Bliblablub'], ['Oleoleole'], ['Mannoman'], ['Er', 'ginge', 'nicht', 'schnell'], ['Der', 'Satz', 'davor', 'funktioniert', 'nicht', 'im', 'Modul', 'Konjunktsolve'], ['Weil', 'er', 'zu', 'viele', 'verben', 'hat']]\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print(outsentences)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 19,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"punctuations = []\n",
|
||||
"for n in range(len(outsentences)):\n",
|
||||
" punctuations.append('.')\n",
|
||||
" if outsentences[n][-1][-1] == '.':\n",
|
||||
" outsentences[n][-1] = outsentences[n][-1][:-1]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 20,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from oi import *\n",
|
||||
"oi = oi()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 21,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
".\n",
|
||||
".\n",
|
||||
".\n",
|
||||
".\n",
|
||||
".\n",
|
||||
".\n",
|
||||
".\n",
|
||||
".\n",
|
||||
".\n",
|
||||
".\n",
|
||||
".\n",
|
||||
".\n",
|
||||
".\n",
|
||||
".\n",
|
||||
".\n",
|
||||
".\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'OK'"
|
||||
]
|
||||
},
|
||||
"execution_count": 21,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"oi.PrintSplitSentencesToTextFile(punctuations, outsentences, 'test1out')\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.5.3"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
|
@ -1,104 +0,0 @@
|
|||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from SolveShorts import *\n",
|
||||
"\n",
|
||||
"from oi import *\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"solSh = SolveShorts('hkldbShorts.hkl', 'hkldbShorts_All.hkl')\n",
|
||||
"\n",
|
||||
"#solSh.create_hklDB_from_csv('Abkuerzungen.txt')\n",
|
||||
"\n",
|
||||
"oi = oi()\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"sentences, punctuations = oi.ReadDoc2Sent('test1')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"loading hkldbShorts ..\n",
|
||||
"done\n",
|
||||
"loading the bow model\n",
|
||||
"done\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"solSh.load_DB_into_FASTsearch()\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"[['Hallo', 'was', 'geht', 'denn', 'hier', 'so', '(sonntag)'], ['Ich', 'habe', 'echt', 'keine', 'Ahnung', 'verdammt'], ['I.', 'd.', 'R.', '(in der regel)', '(rechts rot)', 'gibt', 'es', 'keine', 'Abschiebungen'], ['Ende', 'd.', 'J.', '(juristisch)', 'wird', 'alles', 'problematisch'], ['Er', '(Europarekord Einfuhrerklärung Empfangsrelais Entschädigungsrente Ergänzungsrichtlinie Europarat endoplasmatisches Retikulum)', 'habe', 'es', 'sehr', 'schwer'], ['Es', 'war', 'die', 'Hose', 'des', 'Gauners'], ['Bliblablub'], ['Er', '(Europarekord Einfuhrerklärung Empfangsrelais Entschädigungsrente Ergänzungsrichtlinie Europarat endoplasmatisches Retikulum)', 'ging', 'nicht', 'schnell'], ['Er', '(Europarekord Einfuhrerklärung Empfangsrelais Entschädigungsrente Ergänzungsrichtlinie Europarat endoplasmatisches Retikulum)', 'geht', 'davon', 'aus,', 'dass', 'es', 'schnell', 'zu', 'Ende', 'sein', 'wird']]\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"outsentences = solSh.ExplainShortsInSentencesWithBrackets(sentences)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"oi.PrintSplitSentencesToTextFile(punctuations, outsentences, 'test1out')"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.5.3"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
|
@ -1,154 +0,0 @@
|
|||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from FremdWB import *\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"fwb = FremdWB(None,None)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"#fwb.create_hklDB_from_csv('HurrakiWoerterbuch_nodoubles.txt', 'None')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"generating BoW Model 1..\n",
|
||||
"Creating the bag of words...\n",
|
||||
"\n",
|
||||
"dumping the data to hkl format..\n",
|
||||
"done\n",
|
||||
"generating BoW Model 2..\n",
|
||||
"Creating the bag of words...\n",
|
||||
"\n",
|
||||
"dumping the data to hkl format..\n",
|
||||
"done\n",
|
||||
"loading the bow model 1\n",
|
||||
"loading the bow model 2\n",
|
||||
"done\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"fwb.load_DB_into_FASTsearch()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"sentences = [['das', 'ist', 'Abfall'],['er', 'ging', 'über', 'die', 'Straße'], ['halt', 'ab', 'hier']]\n",
|
||||
"punctuations = ['.', '!', '.']"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"something\n",
|
||||
"sentence ['das', 'ist', 'Abfall']\n",
|
||||
"['ist', 'Abfall']\n",
|
||||
"['Zyprer']\n",
|
||||
"fremdeintrag ['Ein', 'Zyprer', 'oder', 'eine', 'Zyprerin', 'ist', 'ein', 'Mensch.', 'Dieser', 'Mensch', 'lebt', 'auf', 'der', 'Insel', 'Zypern.']\n",
|
||||
"['Abfall']\n",
|
||||
"fremdeintrag ['Abfall', 'sind', 'Sachen', 'die', 'wir', 'nicht', 'mehr', 'brauchen.']\n",
|
||||
"fremdeintraege [['Abfall', 'sind', 'Sachen', 'die', 'wir', 'nicht', 'mehr', 'brauchen.']]\n",
|
||||
"sentence ['er', 'ging', 'über', 'die', 'Straße']\n",
|
||||
"['ging', 'über', 'die', 'Straße']\n",
|
||||
"['Zyprer']\n",
|
||||
"fremdeintrag ['Ein', 'Zyprer', 'oder', 'eine', 'Zyprerin', 'ist', 'ein', 'Mensch.', 'Dieser', 'Mensch', 'lebt', 'auf', 'der', 'Insel', 'Zypern.']\n",
|
||||
"['Zyprer']\n",
|
||||
"fremdeintrag ['Ein', 'Zyprer', 'oder', 'eine', 'Zyprerin', 'ist', 'ein', 'Mensch.', 'Dieser', 'Mensch', 'lebt', 'auf', 'der', 'Insel', 'Zypern.']\n",
|
||||
"['Enzyklopädie']\n",
|
||||
"fremdeintrag ['Enzyklopädie', 'ist', 'ein', 'anderes', 'Wort', 'für', 'Lexikon.', 'In', 'einer', 'Enzyklopädie', 'findet', 'man', 'Informationen.']\n",
|
||||
"['Zyprer']\n",
|
||||
"fremdeintrag ['Ein', 'Zyprer', 'oder', 'eine', 'Zyprerin', 'ist', 'ein', 'Mensch.', 'Dieser', 'Mensch', 'lebt', 'auf', 'der', 'Insel', 'Zypern.']\n",
|
||||
"fremdeintraege []\n",
|
||||
"sentence ['halt', 'ab', 'hier']\n",
|
||||
"['halt', 'ab', 'hier']\n",
|
||||
"['Zyprer']\n",
|
||||
"fremdeintrag ['Ein', 'Zyprer', 'oder', 'eine', 'Zyprerin', 'ist', 'ein', 'Mensch.', 'Dieser', 'Mensch', 'lebt', 'auf', 'der', 'Insel', 'Zypern.']\n",
|
||||
"['Zyprer']\n",
|
||||
"fremdeintrag ['Ein', 'Zyprer', 'oder', 'eine', 'Zyprerin', 'ist', 'ein', 'Mensch.', 'Dieser', 'Mensch', 'lebt', 'auf', 'der', 'Insel', 'Zypern.']\n",
|
||||
"['Zyprer']\n",
|
||||
"fremdeintrag ['Ein', 'Zyprer', 'oder', 'eine', 'Zyprerin', 'ist', 'ein', 'Mensch.', 'Dieser', 'Mensch', 'lebt', 'auf', 'der', 'Insel', 'Zypern.']\n",
|
||||
"fremdeintraege []\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"outsentences, punctuations = fwb.fremdEintragAppend(sentences, punctuations)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"[['das', 'ist', 'Abfall'], ['Abfall', 'sind', 'Sachen', 'die', 'wir', 'nicht', 'mehr', 'brauchen'], ['er', 'ging', 'über', 'die', 'Straße'], ['halt', 'ab', 'hier']] ['.', '.', '.', '!']\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print(outsentences, punctuations)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.5.3"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
File diff suppressed because it is too large
Load diff
|
@ -1,4 +0,0 @@
|
|||
|
||||
|
||||
|
||||
python aye.py
|
Loading…
Reference in a new issue