|
|
- {
- "cells": [
- {
- "cell_type": "code",
- "execution_count": 1,
- "metadata": {},
- "outputs": [],
- "source": [
- "from Medio import *\n",
- "\n",
- "\n",
- "\n",
- "medi = Medio(None,None)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 2,
- "metadata": {},
- "outputs": [],
- "source": [
- "#medi.create_hklDB_from_csv('mediowords.txt', 'None')"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 3,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Creating the bag of words...\n",
- "\n",
- "dumping the data to hkl format..\n",
- "done\n",
- "Creating the bag of words...\n",
- "\n",
- "dumping the data to hkl format..\n",
- "done\n"
- ]
- }
- ],
- "source": [
- "medi.load_DB_into_FASTsearch()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 4,
- "metadata": {},
- "outputs": [],
- "source": [
- "sentences = [['das', 'ist', 'Kindersprache'],['er', 'ging', 'über', 'die', 'Straße'], ['halt', 'ab', 'hier']]\n",
- "punctuations = ['.', '!', '.']"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 5,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "mediosofsentence ['das', 'ist', 'Kindersprache']\n",
- "['Oberbuergermeister']\n",
- "medioeintrag ['Ober·buerger·meister']\n",
- "['Oberbuergermeister']\n",
- "medioeintrag ['Ober·buerger·meister']\n",
- "['Kindersprache']\n",
- "medioeintrag ['Kinder·sprache']\n",
- "medioeintraege [['Kindersprache', ['Kinder·sprache']]]\n",
- "mediosofsentence ['er', 'ging', 'über', 'die', 'Straße']\n",
- "['Oberbuergermeister']\n",
- "medioeintrag ['Ober·buerger·meister']\n",
- "['Oberbuergermeister']\n",
- "medioeintrag ['Ober·buerger·meister']\n",
- "['Oberbuergermeister']\n",
- "medioeintrag ['Ober·buerger·meister']\n",
- "['Oberbuergermeister']\n",
- "medioeintrag ['Ober·buerger·meister']\n",
- "['Oberbuergermeister']\n",
- "medioeintrag ['Ober·buerger·meister']\n",
- "medioeintraege []\n",
- "mediosofsentence ['halt', 'ab', 'hier']\n",
- "['Oberbuergermeister']\n",
- "medioeintrag ['Ober·buerger·meister']\n",
- "['Oberbuergermeister']\n",
- "medioeintrag ['Ober·buerger·meister']\n",
- "['Oberbuergermeister']\n",
- "medioeintrag ['Ober·buerger·meister']\n",
- "medioeintraege []\n"
- ]
- }
- ],
- "source": [
- "outsentences, punctuations = medi.Medioreplace(sentences, punctuations)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 6,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "[['das', 'ist', ['Kinder·sprache']], ['er', 'ging', 'über', 'die', 'Straße'], ['halt', 'ab', 'hier']] ['.', '!', '.']\n"
- ]
- }
- ],
- "source": [
- "print(outsentences, punctuations)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": []
- }
- ],
- "metadata": {
- "kernelspec": {
- "display_name": "Python 3",
- "language": "python",
- "name": "python3"
- },
- "language_info": {
- "codemirror_mode": {
- "name": "ipython",
- "version": 3
- },
- "file_extension": ".py",
- "mimetype": "text/x-python",
- "name": "python",
- "nbconvert_exporter": "python",
- "pygments_lexer": "ipython3",
- "version": "3.5.3"
- }
- },
- "nbformat": 4,
- "nbformat_minor": 2
- }
|