alpcentaur
/
basabuuka_prototyp


								{

								 "cells": [

								  {

								   "cell_type": "markdown",

								   "metadata": {},

								   "source": [

								    "Hier wird die Bibliothek ConjunctSolve und deren Funktionen importiert. Anschließend wird die Klasse initialisiert."

								   ]

								  },

								  {

								   "cell_type": "code",

								   "execution_count": 1,

								   "metadata": {},

								   "outputs": [],

								   "source": [

								    "from ConjunctSolve import *\n",

								    "\n",

								    "\n",

								    "\n",

								    "cs = ConjunctSolve(None,None)"

								   ]

								  },

								  {

								   "cell_type": "code",

								   "execution_count": 2,

								   "metadata": {},

								   "outputs": [

								    {

								     "name": "stdout",

								     "output_type": "stream",

								     "text": [

								      "1000\n",

								      "2000\n",

								      "3000\n",

								      "4000\n",

								      "5000\n",

								      "6000\n",

								      "7000\n",

								      "8000\n",

								      "9000\n",

								      "10000\n",

								      "11000\n",

								      "12000\n",

								      "13000\n",

								      "14000\n",

								      "15000\n",

								      "16000\n",

								      "17000\n",

								      "18000\n",

								      "19000\n",

								      "20000\n",

								      "21000\n",

								      "22000\n",

								      "23000\n",

								      "24000\n",

								      "25000\n",

								      "26000\n",

								      "27000\n",

								      "28000\n",

								      "29000\n",

								      "30000\n",

								      "31000\n",

								      "32000\n",

								      "33000\n",

								      "34000\n",

								      "35000\n",

								      "36000\n",

								      "37000\n",

								      "38000\n",

								      "39000\n",

								      "40000\n",

								      "41000\n",

								      "42000\n",

								      "43000\n",

								      "44000\n",

								      "45000\n",

								      "46000\n",

								      "47000\n",

								      "48000\n",

								      "49000\n",

								      "50000\n",

								      "51000\n",

								      "52000\n",

								      "53000\n",

								      "54000\n",

								      "55000\n",

								      "56000\n",

								      "57000\n",

								      "58000\n",

								      "59000\n",

								      "60000\n",

								      "61000\n",

								      "62000\n",

								      "63000\n",

								      "64000\n",

								      "65000\n",

								      "66000\n",

								      "67000\n",

								      "68000\n",

								      "69000\n",

								      "70000\n",

								      "71000\n",

								      "72000\n",

								      "73000\n",

								      "74000\n",

								      "75000\n",

								      "76000\n",

								      "77000\n",

								      "78000\n",

								      "79000\n",

								      "80000\n",

								      "81000\n",

								      "82000\n",

								      "83000\n",

								      "84000\n",

								      "85000\n",

								      "86000\n",

								      "87000\n",

								      "88000\n",

								      "89000\n",

								      "90000\n",

								      "91000\n",

								      "92000\n",

								      "93000\n",

								      "94000\n",

								      "95000\n",

								      "96000\n",

								      "97000\n",

								      "98000\n",

								      "99000\n",

								      "100000\n",

								      "101000\n",

								      "102000\n",

								      "103000\n",

								      "104000\n",

								      "105000\n",

								      "106000\n",

								      "107000\n",

								      "108000\n",

								      "109000\n",

								      "110000\n",

								      "111000\n",

								      "112000\n",

								      "113000\n",

								      "114000\n",

								      "115000\n",

								      "116000\n",

								      "117000\n",

								      "118000\n",

								      "119000\n",

								      "120000\n",

								      "121000\n",

								      "122000\n",

								      "123000\n",

								      "124000\n",

								      "125000\n",

								      "126000\n",

								      "127000\n",

								      "128000\n",

								      "creating the hkl dump of Indi_ConjuDBAll\n",

								      "done..\n",

								      "Creating the hkl dump of Indi_ConjuDB 1\n",

								      "Creating the hkl dump of Indi_ConjuDB 2\n"

								     ]

								    },

								    {

								     "data": {

								      "text/plain": [

								       "'done'"

								      ]

								     },

								     "execution_count": 2,

								     "metadata": {},

								     "output_type": "execute_result"

								    }

								   ],

								   "source": [

								    "cs.create_hklDB_from_csv('Indikativ_Conjunktiv.txt', 'None')"

								   ]

								  },

								  {

								   "cell_type": "markdown",

								   "metadata": {},

								   "source": [

								    "Nun werden die Datenbanken in den Arbeitsspeicher geladen"

								   ]

								  },

								  {

								   "cell_type": "code",

								   "execution_count": 3,

								   "metadata": {},

								   "outputs": [

								    {

								     "name": "stdout",

								     "output_type": "stream",

								     "text": [

								      "Creating the bag of words...\n",

								      "\n",

								      "dumping the data to hkl format..\n",

								      "done\n",

								      "Creating the bag of words...\n",

								      "\n",

								      "dumping the data to hkl format..\n",

								      "done\n"

								     ]

								    }

								   ],

								   "source": [

								    "cs.load_DB_into_FASTsearch()"

								   ]

								  },

								  {

								   "cell_type": "raw",

								   "metadata": {},

								   "source": [

								    "oi ist eine Klasse mit nur zwei Funktionen. Das Einlesen und schreiben von Textdateien. Die Funktion ReadDoc2Sent liest ein Textdokument ein. Der Output sind die Sätze in Listen geschrieben \n",

								    "( eine Liste in python hat die Form [ 'Das', 'ist', 'ein', 'Satz.' ] )."

								   ]

								  },

								  {

								   "cell_type": "code",

								   "execution_count": 15,

								   "metadata": {},

								   "outputs": [

								    {

								     "name": "stdout",

								     "output_type": "stream",

								     "text": [

								      "[['Hallo', 'was', 'gehe', 'denn', 'hier', 'so'], ['Ich', 'habe', 'echt', 'keine', 'Ahnung', 'verdammt'], ['I.', 'd.', 'R.', 'gibt', 'es', 'keine', 'Abschiebungen'], ['Ende', 'd.', 'J.', 'wird', 'alles', 'problematisch'], ['Ich', 'wuerde', 'nach', 'Hause', 'gehen'], ['Er', 'wuerde', 'es', 'schaffen'], ['Wir', 'wuerden', 'fechten'], ['Er', 'wurde', 'geliebt'], ['Er', 'habe', 'es', 'sehr', 'schwer'], ['Es', 'war', 'die', 'Hose', 'des', 'Gauners'], ['Bliblablub'], ['Sie', 'ist', 'nicht', 'schön', 'heute'], ['Oleoleole'], ['Mannoman'], ['Er', 'ginge', 'nicht', 'schnell'], ['Die', 'Hühner', 'lieben', 'sich', 'nicht']] ['.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.']\n"

								     ]

								    }

								   ],

								   "source": [

								    "from oi import *\n",

								    "oi = oi()\n",

								    "\n",

								    "sentences, punctuations = oi.ReadDoc2Sent('atest1')\n",

								    "print(sentences, punctuations)"

								   ]

								  },

								  {

								   "cell_type": "raw",

								   "metadata": {},

								   "source": [

								    "Die Funktion replaceConjunctives wird nun auf die Liste aus Satzlisten angewendet. Die Variable outsentences ist auch wieder eine Liste."

								   ]

								  },

								  {

								   "cell_type": "code",

								   "execution_count": 16,

								   "metadata": {},

								   "outputs": [],

								   "source": [

								    "outsentences = cs.replaceConjunctives(sentences)"

								   ]

								  },

								  {

								   "cell_type": "raw",

								   "metadata": {},

								   "source": [

								    "Abschließend wird nun die Satzliste mit den ausgetauschten Konjunktiven in die Datei 'atest1out' geschrieben."

								   ]

								  },

								  {

								   "cell_type": "code",

								   "execution_count": 17,

								   "metadata": {},

								   "outputs": [

								    {

								     "data": {

								      "text/plain": [

								       "'OK'"

								      ]

								     },

								     "execution_count": 17,

								     "metadata": {},

								     "output_type": "execute_result"

								    }

								   ],

								   "source": [

								    "oi.PrintSplitSentencesToTextFile(punctuations, outsentences, 'atest1out')"

								   ]

								  },

								  {

								   "cell_type": "code",

								   "execution_count": null,

								   "metadata": {},

								   "outputs": [],

								   "source": []

								  }

								 ],

								 "metadata": {

								  "kernelspec": {

								   "display_name": "Python 3",

								   "language": "python",

								   "name": "python3"

								  },

								  "language_info": {

								   "codemirror_mode": {

								    "name": "ipython",

								    "version": 3

								   },

								   "file_extension": ".py",

								   "mimetype": "text/x-python",

								   "name": "python",

								   "nbconvert_exporter": "python",

								   "pygments_lexer": "ipython3",

								   "version": "3.5.3"

								  }

								 },

								 "nbformat": 4,

								 "nbformat_minor": 2

								}