313 lines
6.6 KiB
Text
313 lines
6.6 KiB
Text
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"Hier wird die Bibliothek ConjunctSolve und deren Funktionen importiert. Anschließend wird die Klasse initialisiert."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 1,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"from ConjunctSolve import *\n",
|
|
"\n",
|
|
"\n",
|
|
"\n",
|
|
"cs = ConjunctSolve(None,None)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 2,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"1000\n",
|
|
"2000\n",
|
|
"3000\n",
|
|
"4000\n",
|
|
"5000\n",
|
|
"6000\n",
|
|
"7000\n",
|
|
"8000\n",
|
|
"9000\n",
|
|
"10000\n",
|
|
"11000\n",
|
|
"12000\n",
|
|
"13000\n",
|
|
"14000\n",
|
|
"15000\n",
|
|
"16000\n",
|
|
"17000\n",
|
|
"18000\n",
|
|
"19000\n",
|
|
"20000\n",
|
|
"21000\n",
|
|
"22000\n",
|
|
"23000\n",
|
|
"24000\n",
|
|
"25000\n",
|
|
"26000\n",
|
|
"27000\n",
|
|
"28000\n",
|
|
"29000\n",
|
|
"30000\n",
|
|
"31000\n",
|
|
"32000\n",
|
|
"33000\n",
|
|
"34000\n",
|
|
"35000\n",
|
|
"36000\n",
|
|
"37000\n",
|
|
"38000\n",
|
|
"39000\n",
|
|
"40000\n",
|
|
"41000\n",
|
|
"42000\n",
|
|
"43000\n",
|
|
"44000\n",
|
|
"45000\n",
|
|
"46000\n",
|
|
"47000\n",
|
|
"48000\n",
|
|
"49000\n",
|
|
"50000\n",
|
|
"51000\n",
|
|
"52000\n",
|
|
"53000\n",
|
|
"54000\n",
|
|
"55000\n",
|
|
"56000\n",
|
|
"57000\n",
|
|
"58000\n",
|
|
"59000\n",
|
|
"60000\n",
|
|
"61000\n",
|
|
"62000\n",
|
|
"63000\n",
|
|
"64000\n",
|
|
"65000\n",
|
|
"66000\n",
|
|
"67000\n",
|
|
"68000\n",
|
|
"69000\n",
|
|
"70000\n",
|
|
"71000\n",
|
|
"72000\n",
|
|
"73000\n",
|
|
"74000\n",
|
|
"75000\n",
|
|
"76000\n",
|
|
"77000\n",
|
|
"78000\n",
|
|
"79000\n",
|
|
"80000\n",
|
|
"81000\n",
|
|
"82000\n",
|
|
"83000\n",
|
|
"84000\n",
|
|
"85000\n",
|
|
"86000\n",
|
|
"87000\n",
|
|
"88000\n",
|
|
"89000\n",
|
|
"90000\n",
|
|
"91000\n",
|
|
"92000\n",
|
|
"93000\n",
|
|
"94000\n",
|
|
"95000\n",
|
|
"96000\n",
|
|
"97000\n",
|
|
"98000\n",
|
|
"99000\n",
|
|
"100000\n",
|
|
"101000\n",
|
|
"102000\n",
|
|
"103000\n",
|
|
"104000\n",
|
|
"105000\n",
|
|
"106000\n",
|
|
"107000\n",
|
|
"108000\n",
|
|
"109000\n",
|
|
"110000\n",
|
|
"111000\n",
|
|
"112000\n",
|
|
"113000\n",
|
|
"114000\n",
|
|
"115000\n",
|
|
"116000\n",
|
|
"117000\n",
|
|
"118000\n",
|
|
"119000\n",
|
|
"120000\n",
|
|
"121000\n",
|
|
"122000\n",
|
|
"123000\n",
|
|
"124000\n",
|
|
"125000\n",
|
|
"126000\n",
|
|
"127000\n",
|
|
"128000\n",
|
|
"creating the hkl dump of Indi_ConjuDBAll\n",
|
|
"done..\n",
|
|
"Creating the hkl dump of Indi_ConjuDB 1\n",
|
|
"Creating the hkl dump of Indi_ConjuDB 2\n"
|
|
]
|
|
},
|
|
{
|
|
"data": {
|
|
"text/plain": [
|
|
"'done'"
|
|
]
|
|
},
|
|
"execution_count": 2,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"cs.create_hklDB_from_csv('Indikativ_Conjunktiv.txt', 'None')"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"Nun werden die Datenbanken in den Arbeitsspeicher geladen"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 3,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"Creating the bag of words...\n",
|
|
"\n",
|
|
"dumping the data to hkl format..\n",
|
|
"done\n",
|
|
"Creating the bag of words...\n",
|
|
"\n",
|
|
"dumping the data to hkl format..\n",
|
|
"done\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"cs.load_DB_into_FASTsearch()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "raw",
|
|
"metadata": {},
|
|
"source": [
|
|
"oi ist eine Klasse mit nur zwei Funktionen. Das Einlesen und schreiben von Textdateien. Die Funktion ReadDoc2Sent liest ein Textdokument ein. Der Output sind die Sätze in Listen geschrieben \n",
|
|
"( eine Liste in python hat die Form [ 'Das', 'ist', 'ein', 'Satz.' ] )."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 15,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"[['Hallo', 'was', 'gehe', 'denn', 'hier', 'so'], ['Ich', 'habe', 'echt', 'keine', 'Ahnung', 'verdammt'], ['I.', 'd.', 'R.', 'gibt', 'es', 'keine', 'Abschiebungen'], ['Ende', 'd.', 'J.', 'wird', 'alles', 'problematisch'], ['Ich', 'wuerde', 'nach', 'Hause', 'gehen'], ['Er', 'wuerde', 'es', 'schaffen'], ['Wir', 'wuerden', 'fechten'], ['Er', 'wurde', 'geliebt'], ['Er', 'habe', 'es', 'sehr', 'schwer'], ['Es', 'war', 'die', 'Hose', 'des', 'Gauners'], ['Bliblablub'], ['Sie', 'ist', 'nicht', 'schön', 'heute'], ['Oleoleole'], ['Mannoman'], ['Er', 'ginge', 'nicht', 'schnell'], ['Die', 'Hühner', 'lieben', 'sich', 'nicht']] ['.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.']\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"from oi import *\n",
|
|
"oi = oi()\n",
|
|
"\n",
|
|
"sentences, punctuations = oi.ReadDoc2Sent('atest1')\n",
|
|
"print(sentences, punctuations)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "raw",
|
|
"metadata": {},
|
|
"source": [
|
|
"Die Funktion replaceConjunctives wird nun auf die Liste aus Satzlisten angewendet. Die Variable outsentences ist auch wieder eine Liste."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 16,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"outsentences = cs.replaceConjunctives(sentences)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "raw",
|
|
"metadata": {},
|
|
"source": [
|
|
"Abschließend wird nun die Satzliste mit den ausgetauschten Konjunktiven in die Datei 'atest1out' geschrieben."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 17,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/plain": [
|
|
"'OK'"
|
|
]
|
|
},
|
|
"execution_count": 17,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"oi.PrintSplitSentencesToTextFile(punctuations, outsentences, 'atest1out')"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": []
|
|
}
|
|
],
|
|
"metadata": {
|
|
"kernelspec": {
|
|
"display_name": "Python 3",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.5.3"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 2
|
|
}
|