|
|
- {
- "cells": [
- {
- "cell_type": "code",
- "execution_count": 1,
- "metadata": {},
- "outputs": [],
- "source": [
- "from FremdWB import *\n",
- "\n",
- "\n",
- "\n",
- "fwb = FremdWB(None,None)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 2,
- "metadata": {},
- "outputs": [],
- "source": [
- "#fwb.create_hklDB_from_csv('HurrakiWoerterbuch_nodoubles.txt', 'None')"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 3,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "generating BoW Model 1..\n",
- "Creating the bag of words...\n",
- "\n",
- "dumping the data to hkl format..\n",
- "done\n",
- "generating BoW Model 2..\n",
- "Creating the bag of words...\n",
- "\n",
- "dumping the data to hkl format..\n",
- "done\n",
- "loading the bow model 1\n",
- "loading the bow model 2\n",
- "done\n"
- ]
- }
- ],
- "source": [
- "fwb.load_DB_into_FASTsearch()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 4,
- "metadata": {},
- "outputs": [],
- "source": [
- "sentences = [['das', 'ist', 'Abfall'],['er', 'ging', 'über', 'die', 'Straße'], ['halt', 'ab', 'hier']]\n",
- "punctuations = ['.', '!', '.']"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 5,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "something\n",
- "sentence ['das', 'ist', 'Abfall']\n",
- "['ist', 'Abfall']\n",
- "['Zyprer']\n",
- "fremdeintrag ['Ein', 'Zyprer', 'oder', 'eine', 'Zyprerin', 'ist', 'ein', 'Mensch.', 'Dieser', 'Mensch', 'lebt', 'auf', 'der', 'Insel', 'Zypern.']\n",
- "['Abfall']\n",
- "fremdeintrag ['Abfall', 'sind', 'Sachen', 'die', 'wir', 'nicht', 'mehr', 'brauchen.']\n",
- "fremdeintraege [['Abfall', 'sind', 'Sachen', 'die', 'wir', 'nicht', 'mehr', 'brauchen.']]\n",
- "sentence ['er', 'ging', 'über', 'die', 'Straße']\n",
- "['ging', 'über', 'die', 'Straße']\n",
- "['Zyprer']\n",
- "fremdeintrag ['Ein', 'Zyprer', 'oder', 'eine', 'Zyprerin', 'ist', 'ein', 'Mensch.', 'Dieser', 'Mensch', 'lebt', 'auf', 'der', 'Insel', 'Zypern.']\n",
- "['Zyprer']\n",
- "fremdeintrag ['Ein', 'Zyprer', 'oder', 'eine', 'Zyprerin', 'ist', 'ein', 'Mensch.', 'Dieser', 'Mensch', 'lebt', 'auf', 'der', 'Insel', 'Zypern.']\n",
- "['Enzyklopädie']\n",
- "fremdeintrag ['Enzyklopädie', 'ist', 'ein', 'anderes', 'Wort', 'für', 'Lexikon.', 'In', 'einer', 'Enzyklopädie', 'findet', 'man', 'Informationen.']\n",
- "['Zyprer']\n",
- "fremdeintrag ['Ein', 'Zyprer', 'oder', 'eine', 'Zyprerin', 'ist', 'ein', 'Mensch.', 'Dieser', 'Mensch', 'lebt', 'auf', 'der', 'Insel', 'Zypern.']\n",
- "fremdeintraege []\n",
- "sentence ['halt', 'ab', 'hier']\n",
- "['halt', 'ab', 'hier']\n",
- "['Zyprer']\n",
- "fremdeintrag ['Ein', 'Zyprer', 'oder', 'eine', 'Zyprerin', 'ist', 'ein', 'Mensch.', 'Dieser', 'Mensch', 'lebt', 'auf', 'der', 'Insel', 'Zypern.']\n",
- "['Zyprer']\n",
- "fremdeintrag ['Ein', 'Zyprer', 'oder', 'eine', 'Zyprerin', 'ist', 'ein', 'Mensch.', 'Dieser', 'Mensch', 'lebt', 'auf', 'der', 'Insel', 'Zypern.']\n",
- "['Zyprer']\n",
- "fremdeintrag ['Ein', 'Zyprer', 'oder', 'eine', 'Zyprerin', 'ist', 'ein', 'Mensch.', 'Dieser', 'Mensch', 'lebt', 'auf', 'der', 'Insel', 'Zypern.']\n",
- "fremdeintraege []\n"
- ]
- }
- ],
- "source": [
- "outsentences, punctuations = fwb.fremdEintragAppend(sentences, punctuations)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 6,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "[['das', 'ist', 'Abfall'], ['Abfall', 'sind', 'Sachen', 'die', 'wir', 'nicht', 'mehr', 'brauchen'], ['er', 'ging', 'über', 'die', 'Straße'], ['halt', 'ab', 'hier']] ['.', '.', '.', '!']\n"
- ]
- }
- ],
- "source": [
- "print(outsentences, punctuations)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": []
- }
- ],
- "metadata": {
- "kernelspec": {
- "display_name": "Python 3",
- "language": "python",
- "name": "python3"
- },
- "language_info": {
- "codemirror_mode": {
- "name": "ipython",
- "version": 3
- },
- "file_extension": ".py",
- "mimetype": "text/x-python",
- "name": "python",
- "nbconvert_exporter": "python",
- "pygments_lexer": "ipython3",
- "version": "3.5.3"
- }
- },
- "nbformat": 4,
- "nbformat_minor": 2
- }
|