|
|
- {
- "cells": [
- {
- "cell_type": "code",
- "execution_count": 1,
- "metadata": {},
- "outputs": [],
- "source": [
- "from FremdWB import *\n",
- "\n",
- "\n",
- "\n",
- "fwb = FremdWB(None,None)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 2,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "1000\n",
- "2000\n",
- "creating the hkl dump of Fremd_WBDBAll\n",
- "done..\n",
- "Creating the hkl dump of Fremd_WBDB 1\n",
- "Creating the hkl dump of Fremd_WBDB 2\n"
- ]
- },
- {
- "data": {
- "text/plain": [
- "'done'"
- ]
- },
- "execution_count": 2,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "fwb.create_hklDB_from_csv('HurrakiWoerterbuch_nodoubles.txt', 'None')"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 3,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Creating the bag of words...\n",
- "\n",
- "dumping the data to hkl format..\n",
- "done\n",
- "Creating the bag of words...\n",
- "\n",
- "dumping the data to hkl format..\n",
- "done\n"
- ]
- }
- ],
- "source": [
- "fwb.load_DB_into_FASTsearch()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 4,
- "metadata": {},
- "outputs": [],
- "source": [
- "sentences = [['das', 'ist', 'Abfall'],['er', 'ging', 'über', 'die', 'Straße'], ['halt', 'ab', 'hier']]\n",
- "punctuations = ['.', '!', '.']"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 5,
- "metadata": {},
- "outputs": [],
- "source": [
- "outsentences, punctuations = fwb.fremdEintragAppend(sentences, punctuations)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 6,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "[['das', 'ist', 'Abfall'], ['Abfall', 'sind', 'Sachen', 'die', 'wir', 'nicht', 'mehr', 'brauchen'], ['er', 'ging', 'über', 'die', 'Straße'], ['halt', 'ab', 'hier']] ['.', '.', '!', '.']\n"
- ]
- }
- ],
- "source": [
- "print(outsentences, punctuations)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": []
- }
- ],
- "metadata": {
- "kernelspec": {
- "display_name": "Python 3",
- "language": "python",
- "name": "python3"
- },
- "language_info": {
- "codemirror_mode": {
- "name": "ipython",
- "version": 3
- },
- "file_extension": ".py",
- "mimetype": "text/x-python",
- "name": "python",
- "nbconvert_exporter": "python",
- "pygments_lexer": "ipython3",
- "version": "3.5.3"
- }
- },
- "nbformat": 4,
- "nbformat_minor": 2
- }
|