basabuuka_prototyp/Prototyp/UseSayYes.ipynb
2020-08-16 19:36:44 +02:00

251 lines
4.7 KiB
Text

{
"cells": [
{
"cell_type": "raw",
"metadata": {},
"source": [
" Zuerst wird die Klasse SayYes importiert und initialisiert"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"loading the german spacy model..\n",
"done\n",
"loading the stemmer..\n",
"done\n"
]
}
],
"source": [
"from SayYes import *\n",
"\n",
"sy = SayYes(None, None)"
]
},
{
"cell_type": "raw",
"metadata": {},
"source": [
"Nun werden die Datenbanken in den Arbeitsspeicher geladen."
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Loading the hklDB1..\n",
"done\n",
"Loading the hklDB2\n",
"done\n",
"loading hkldbOpposites 1..\n",
"done\n",
"loading hkldbOpposites 2..\n",
"done\n",
"loading the bow model 1\n",
"done\n",
"loading the bow model 2\n",
"done\n"
]
}
],
"source": [
"\n",
"\n",
"#sy.create_hklDB_from_csv('Gegenwoerter.csv')\n",
"\n",
"#print(sy.hkldbOpposites1)\n",
"\n",
"sy.load_DB_into_FASTsearch()\n"
]
},
{
"cell_type": "raw",
"metadata": {},
"source": [
"Hier wird die Datei atest1 eingeladen, Output der Funktion ReadDoc2Sent sind die Sätze in Listen. "
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"from oi import *\n",
"oi = oi()\n",
"sentences, punctuations = oi.ReadDoc2Sent('atest1')"
]
},
{
"cell_type": "raw",
"metadata": {},
"source": [
"Hier werden nun die Sätze nach den Wörtern 'nicht' oder 'kein/keine' durchsucht, diesess dann entfernt und das entsprechende Wort durch dessen Gegenteil ersetzt."
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"wenigstens etwas\n",
"oloa\n",
"processing sentence 1\n",
"ola\n",
"oloa\n",
"processing sentence 2\n",
"ola\n",
"thetheone\n",
"oloa\n",
"processing sentence 3\n",
"ola\n",
"thetheone\n",
"oloa\n",
"processing sentence 4\n",
"ola\n",
"oloa\n",
"processing sentence 5\n",
"ola\n",
"oloa\n",
"processing sentence 6\n",
"ola\n",
"oloa\n",
"processing sentence 7\n",
"ola\n",
"oloa\n",
"processing sentence 8\n",
"ola\n",
"oloa\n",
"processing sentence 9\n",
"ola\n",
"oloa\n",
"processing sentence 10\n",
"ola\n",
"thetheone\n",
"theone\n",
"1\n",
"2\n",
"2\n",
"oloa\n",
"processing sentence 11\n",
"ola\n",
"oloa\n",
"processing sentence 12\n",
"ola\n",
"oloa\n",
"processing sentence 13\n",
"ola\n",
"thetheone\n",
"theone\n",
"1\n",
"2\n",
"2\n",
"oloa\n",
"processing sentence 14\n",
"ola\n",
"thetheone\n",
"theone\n",
"1\n",
"4\n",
"2\n"
]
}
],
"source": [
"outsentences = sy.replaceOpposites(sentences)"
]
},
{
"cell_type": "raw",
"metadata": {},
"source": [
"Das Ergebnis der vorherigen Funktion wird in der folgenden Zeile in das Dokument atest1out geschrieben."
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
".\n",
".\n",
".\n",
".\n",
".\n",
".\n",
".\n",
".\n",
".\n",
".\n",
".\n",
".\n",
".\n",
".\n"
]
},
{
"data": {
"text/plain": [
"'OK'"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"\n",
"oi.PrintSplitSentencesToTextFile(punctuations, outsentences, 'atest1out')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.5.3"
}
},
"nbformat": 4,
"nbformat_minor": 4
}