alpcentaur
/
basabuuka_prototyp


								{

								 "cells": [

								  {

								   "cell_type": "code",

								   "execution_count": 1,

								   "metadata": {},

								   "outputs": [],

								   "source": [

								    "import SentSeg\n"

								   ]

								  },

								  {

								   "cell_type": "code",

								   "execution_count": 3,

								   "metadata": {},

								   "outputs": [

								    {

								     "name": "stdout",

								     "output_type": "stream",

								     "text": [

								      "[[['Hallo', 'was', 'gehe', 'denn', 'hier', 'so.']], [['Ich', 'habe', 'echt', 'keine', 'Ahnung', 'verdammt.']], [['I.', 'd.', 'R.', 'gibt', 'es', 'keine', 'Abschiebungen.']], [['Ende', 'd.', 'J.', 'wird', 'alles', 'problematisch.']], [['Sie', 'gingen', 'nach', 'Hause,', 'weil', 'es', 'in', 'Strömen', 'regnete.']], [['Heute', 'war', 'die', 'Straße', 'blau', 'angemalt,', 'damit', 'der', 'Marathon', 'funktionierte.']], [['Er', 'habe', 'es', 'sehr', 'schwer.']], [['Es', 'war', 'die', 'Hose', 'des', 'Gauners.']], [['Bliblablub.']], [['Sie', 'ist', 'nicht', 'schön', 'heute.']], [['Oleoleole.']], [['Mannoman.']], [['Er', 'ginge', 'nicht', 'schnell.']], [['Die', 'Hühner', 'lieben', 'sich', 'nicht.']]]\n"

								     ]

								    }

								   ],

								   "source": [

								    "sent_seg = SentSeg.SentSeg('de')\n",

								    "\n",

								    "    \n",

								    "sentences = sent_seg.ReadDoc2Sent('atest1')\n",

								    "print(sentences)"

								   ]

								  },

								  {

								   "cell_type": "code",

								   "execution_count": 4,

								   "metadata": {},

								   "outputs": [

								    {

								     "name": "stdout",

								     "output_type": "stream",

								     "text": [

								      "initializing the gs utils..\n",

								      "loading spacy..\n",

								      "done\n",

								      "done\n",

								      "loading the Stochastic Gradient models..\n",

								      "done\n",

								      "initializing the SGM..\n",

								      "loading vectorizer..\n",

								      "done\n",

								      "loading the SGD model..\n",

								      "done\n",

								      "loading spacy..\n",

								      "done\n",

								      "done\n",

								      "importing spacy..\n",

								      "done\n",

								      "importing german model..\n",

								      "done\n"

								     ]

								    },

								    {

								     "data": {

								      "text/plain": [

								       "'done'"

								      ]

								     },

								     "execution_count": 4,

								     "metadata": {},

								     "output_type": "execute_result"

								    }

								   ],

								   "source": [

								    "sent_seg.LoadSentGlueSGDandGSUtils()"

								   ]

								  },

								  {

								   "cell_type": "code",

								   "execution_count": 5,

								   "metadata": {},

								   "outputs": [

								    {

								     "name": "stdout",

								     "output_type": "stream",

								     "text": [

								      "creating array of comma or not..\n",

								      "done\n"

								     ]

								    }

								   ],

								   "source": [

								    "sentences = sent_seg.CommaSentenceOrNot(sentences)\n",

								    "print(sentences)"

								   ]

								  },

								  {

								   "cell_type": "code",

								   "execution_count": 6,

								   "metadata": {},

								   "outputs": [],

								   "source": [

								    "sentences = sent_seg.GetUtteranceNumber(sentences)"

								   ]

								  },

								  {

								   "cell_type": "code",

								   "execution_count": 7,

								   "metadata": {},

								   "outputs": [],

								   "source": [

								    "sentences = sent_seg.GetQuestionOrNot(sentences)"

								   ]

								  },

								  {

								   "cell_type": "code",

								   "execution_count": 8,

								   "metadata": {},

								   "outputs": [

								    {

								     "name": "stdout",

								     "output_type": "stream",

								     "text": [

								      "importing spacy..\n",

								      "done\n"

								     ]

								    }

								   ],

								   "source": [

								    "sentences1 = sent_seg.SplitSentencesIntoHauptNebenTuple(sentences)"

								   ]

								  },

								  {

								   "cell_type": "code",

								   "execution_count": 9,

								   "metadata": {},

								   "outputs": [

								    {

								     "name": "stdout",

								     "output_type": "stream",

								     "text": [

								      "['es', 'regnete', 'in Strömen']\n",

								      "['deswegen', 'Sie', 'gingen', 'nach Hause']\n",

								      "['Heute', 'war', 'blau', 'angemalt', 'die Straße']\n",

								      "100\n",

								      "['dann', 'funktionierte', 'der Marathon']\n"

								     ]

								    }

								   ],

								   "source": [

								    "outsentences = sent_seg.SplitCommatas(sentences1)"

								   ]

								  },

								  {

								   "cell_type": "code",

								   "execution_count": 18,

								   "metadata": {},

								   "outputs": [

								    {

								     "name": "stdout",

								     "output_type": "stream",

								     "text": [

								      "[['Hallo', 'was', 'gehe', 'denn', 'hier', 'so'], ['Ich', 'habe', 'echt', 'keine', 'Ahnung', 'verdammt'], ['I.', 'd.', 'R.', 'gibt', 'es', 'keine', 'Abschiebungen'], ['Ende', 'd.', 'J.', 'wird', 'alles', 'problematisch'], ['in', 'Strömen', 'regnete', 'e'], ['deswegen', 'gingen', 'Sie', 'nach', 'Haus'], ['angemalt', 'war', 'die', 'Straße', 'blau', 'Heut'], ['dann', 'der', 'Marathon', 'funktioniert'], ['Er', 'habe', 'es', 'sehr', 'schwer'], ['Es', 'war', 'die', 'Hose', 'des', 'Gauners'], ['Bliblablub'], ['Oleoleole'], ['Mannoman'], ['Er', 'ginge', 'nicht', 'schnell'], ['Der', 'Satz', 'davor', 'funktioniert', 'nicht', 'im', 'Modul', 'Konjunktsolve'], ['Weil', 'er', 'zu', 'viele', 'verben', 'hat']]\n"

								     ]

								    }

								   ],

								   "source": [

								    "print(outsentences)"

								   ]

								  },

								  {

								   "cell_type": "code",

								   "execution_count": 19,

								   "metadata": {},

								   "outputs": [],

								   "source": [

								    "punctuations = []\n",

								    "for n in range(len(outsentences)):\n",

								    "    punctuations.append('.')\n",

								    "    if outsentences[n][-1][-1] == '.':\n",

								    "        outsentences[n][-1] = outsentences[n][-1][:-1]"

								   ]

								  },

								  {

								   "cell_type": "code",

								   "execution_count": 20,

								   "metadata": {},

								   "outputs": [],

								   "source": [

								    "from oi import *\n",

								    "oi = oi()"

								   ]

								  },

								  {

								   "cell_type": "code",

								   "execution_count": 21,

								   "metadata": {},

								   "outputs": [

								    {

								     "name": "stdout",

								     "output_type": "stream",

								     "text": [

								      ".\n",

								      ".\n",

								      ".\n",

								      ".\n",

								      ".\n",

								      ".\n",

								      ".\n",

								      ".\n",

								      ".\n",

								      ".\n",

								      ".\n",

								      ".\n",

								      ".\n",

								      ".\n",

								      ".\n",

								      ".\n"

								     ]

								    },

								    {

								     "data": {

								      "text/plain": [

								       "'OK'"

								      ]

								     },

								     "execution_count": 21,

								     "metadata": {},

								     "output_type": "execute_result"

								    }

								   ],

								   "source": [

								    "oi.PrintSplitSentencesToTextFile(punctuations, outsentences, 'test1out')\n"

								   ]

								  },

								  {

								   "cell_type": "code",

								   "execution_count": null,

								   "metadata": {},

								   "outputs": [],

								   "source": []

								  }

								 ],

								 "metadata": {

								  "kernelspec": {

								   "display_name": "Python 3",

								   "language": "python",

								   "name": "python3"

								  },

								  "language_info": {

								   "codemirror_mode": {

								    "name": "ipython",

								    "version": 3

								   },

								   "file_extension": ".py",

								   "mimetype": "text/x-python",

								   "name": "python",

								   "nbconvert_exporter": "python",

								   "pygments_lexer": "ipython3",

								   "version": "3.5.3"

								  }

								 },

								 "nbformat": 4,

								 "nbformat_minor": 2

								}