311 lines
6.5 KiB
Text
311 lines
6.5 KiB
Text
|
{
|
||
|
"cells": [
|
||
|
{
|
||
|
"cell_type": "raw",
|
||
|
"metadata": {},
|
||
|
"source": [
|
||
|
"Alle Funktionen der Klasse Passiv2Aktiv werden importiert"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 1,
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"from Passiv2Aktiv import *\n",
|
||
|
"\n"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "raw",
|
||
|
"metadata": {},
|
||
|
"source": [
|
||
|
"Nun wird die Klasse initialisiert"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 2,
|
||
|
"metadata": {
|
||
|
"scrolled": true
|
||
|
},
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"p2a = Passiv2Aktiv(None, None, None)"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "raw",
|
||
|
"metadata": {},
|
||
|
"source": [
|
||
|
"Die nötigen Datenbanken werden in den Arbeitsspeicher der Session geladen. Hier zeigt sich auch die Stärke des Jupyter Notebooks, da der code einmal ausgeführt werden muss, und dann die folgenden Codezeilen neuausgeführt werden können, ohne das die Datenbanken neu eingeladen werden müssen. Das heißt, die Datenbanken können die ganze Zeit im Arbeitsspeicher ruhen, und trotzdem kann neuer Text atest1 geschrieben werden. Dieser wird dann neu eingeladen mit der oi Klasse und anschließend werden die Passivformen der neuen Sätze ausgetauscht."
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 3,
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"name": "stdout",
|
||
|
"output_type": "stream",
|
||
|
"text": [
|
||
|
"1000\n",
|
||
|
"2000\n",
|
||
|
"3000\n",
|
||
|
"4000\n",
|
||
|
"5000\n",
|
||
|
"6000\n",
|
||
|
"7000\n",
|
||
|
"8000\n",
|
||
|
"9000\n",
|
||
|
"10000\n",
|
||
|
"11000\n",
|
||
|
"12000\n",
|
||
|
"13000\n",
|
||
|
"14000\n",
|
||
|
"15000\n",
|
||
|
"16000\n",
|
||
|
"17000\n",
|
||
|
"18000\n",
|
||
|
"19000\n",
|
||
|
"20000\n",
|
||
|
"21000\n",
|
||
|
"22000\n",
|
||
|
"23000\n",
|
||
|
"24000\n",
|
||
|
"25000\n",
|
||
|
"26000\n",
|
||
|
"27000\n",
|
||
|
"28000\n",
|
||
|
"29000\n",
|
||
|
"30000\n",
|
||
|
"31000\n",
|
||
|
"32000\n",
|
||
|
"33000\n",
|
||
|
"34000\n",
|
||
|
"35000\n",
|
||
|
"36000\n",
|
||
|
"37000\n",
|
||
|
"38000\n",
|
||
|
"39000\n",
|
||
|
"40000\n",
|
||
|
"41000\n",
|
||
|
"42000\n",
|
||
|
"43000\n",
|
||
|
"44000\n",
|
||
|
"45000\n",
|
||
|
"46000\n",
|
||
|
"47000\n",
|
||
|
"48000\n",
|
||
|
"49000\n",
|
||
|
"50000\n",
|
||
|
"51000\n",
|
||
|
"creating the hkl dump of DBAll\n",
|
||
|
"Creating the hkl dump of DB 1\n",
|
||
|
"Creating the hkl dump of DB 2\n",
|
||
|
"1000\n",
|
||
|
"2000\n",
|
||
|
"3000\n",
|
||
|
"4000\n",
|
||
|
"5000\n",
|
||
|
"6000\n",
|
||
|
"7000\n",
|
||
|
"8000\n",
|
||
|
"9000\n",
|
||
|
"10000\n",
|
||
|
"11000\n",
|
||
|
"12000\n",
|
||
|
"13000\n",
|
||
|
"14000\n",
|
||
|
"15000\n",
|
||
|
"16000\n",
|
||
|
"17000\n",
|
||
|
"18000\n",
|
||
|
"19000\n",
|
||
|
"20000\n",
|
||
|
"21000\n",
|
||
|
"22000\n",
|
||
|
"23000\n",
|
||
|
"24000\n",
|
||
|
"25000\n",
|
||
|
"creating the hkl dump of DBAll\n",
|
||
|
"Creating the hkl dump of DB 1\n",
|
||
|
"Creating the hkl dump of DB 2\n",
|
||
|
"1000\n",
|
||
|
"2000\n",
|
||
|
"3000\n",
|
||
|
"4000\n",
|
||
|
"5000\n",
|
||
|
"6000\n",
|
||
|
"7000\n",
|
||
|
"8000\n",
|
||
|
"9000\n",
|
||
|
"10000\n",
|
||
|
"11000\n",
|
||
|
"12000\n",
|
||
|
"13000\n",
|
||
|
"14000\n",
|
||
|
"15000\n",
|
||
|
"16000\n",
|
||
|
"17000\n",
|
||
|
"18000\n",
|
||
|
"19000\n",
|
||
|
"20000\n",
|
||
|
"21000\n",
|
||
|
"22000\n",
|
||
|
"creating the hkl dump of DBAll\n",
|
||
|
"Creating the hkl dump of DB 1\n",
|
||
|
"Creating the hkl dump of DB 2\n"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"data": {
|
||
|
"text/plain": [
|
||
|
"'done'"
|
||
|
]
|
||
|
},
|
||
|
"execution_count": 3,
|
||
|
"metadata": {},
|
||
|
"output_type": "execute_result"
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"p2a.create_hklDB_from_csv('Aktiv.txt', 'None')\n",
|
||
|
"p2a.create_hklDB_from_csv('Vorgangspassiv.txt', 'None')\n",
|
||
|
"p2a.create_hklDB_from_csv('Zustandspassiv.txt', 'None')"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 4,
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"name": "stdout",
|
||
|
"output_type": "stream",
|
||
|
"text": [
|
||
|
"loading the hkldb_All databases..\n",
|
||
|
"first done\n",
|
||
|
"second done\n",
|
||
|
"third done\n",
|
||
|
"loading hkldbIndi_Conju 1..\n",
|
||
|
"loading hkldbIndi_Conju 2..\n",
|
||
|
"generating BoW Model 1..\n",
|
||
|
"Creating the bag of words...\n",
|
||
|
"\n",
|
||
|
"dumping the data to hkl format..\n",
|
||
|
"done\n",
|
||
|
"generating BoW Model 2..\n",
|
||
|
"Creating the bag of words...\n",
|
||
|
"\n",
|
||
|
"dumping the data to hkl format..\n",
|
||
|
"done\n",
|
||
|
"loading the bow model 1\n",
|
||
|
"loading the bow model 2\n",
|
||
|
"loading hkldbIndi_Conju 1..\n",
|
||
|
"generating BoW Model 1..\n",
|
||
|
"Creating the bag of words...\n",
|
||
|
"\n",
|
||
|
"dumping the data to hkl format..\n",
|
||
|
"done\n",
|
||
|
"generating BoW Model 2..\n",
|
||
|
"Creating the bag of words...\n",
|
||
|
"\n",
|
||
|
"dumping the data to hkl format..\n",
|
||
|
"done\n",
|
||
|
"generating BoW Model 1..\n",
|
||
|
"Creating the bag of words...\n",
|
||
|
"\n",
|
||
|
"dumping the data to hkl format..\n",
|
||
|
"done\n",
|
||
|
"generating BoW Model 2..\n",
|
||
|
"Creating the bag of words...\n",
|
||
|
"\n",
|
||
|
"dumping the data to hkl format..\n",
|
||
|
"done\n"
|
||
|
]
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"p2a.load_DB_into_FASTsearch()"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": null,
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"from oi import *\n",
|
||
|
"oi = oi()\n",
|
||
|
"\n",
|
||
|
"sentences, punctuations = oi.ReadDoc2Sent('atest1')"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": null,
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"outsentences = p2a.replacePassivForms(sentences)\n"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": null,
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"print(outsentences)"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": null,
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
" oi.PrintSplitSentencesToTextFile(punctuations, outsentences, 'atest1out')"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": null,
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": []
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": null,
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": []
|
||
|
}
|
||
|
],
|
||
|
"metadata": {
|
||
|
"kernelspec": {
|
||
|
"display_name": "Python 3",
|
||
|
"language": "python",
|
||
|
"name": "python3"
|
||
|
},
|
||
|
"language_info": {
|
||
|
"codemirror_mode": {
|
||
|
"name": "ipython",
|
||
|
"version": 3
|
||
|
},
|
||
|
"file_extension": ".py",
|
||
|
"mimetype": "text/x-python",
|
||
|
"name": "python",
|
||
|
"nbconvert_exporter": "python",
|
||
|
"pygments_lexer": "ipython3",
|
||
|
"version": "3.5.3"
|
||
|
}
|
||
|
},
|
||
|
"nbformat": 4,
|
||
|
"nbformat_minor": 2
|
||
|
}
|