The Interface of the fdb-spider, based on rocketchat
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 

772 lines
39 KiB

import os
from rocketchat.api import RocketChatAPI
import configparser
import json
import requests
import yaml
import dateutil.parser
import subprocess
config = configparser.ConfigParser()
config.read('config.ini')
botname = config['Chat']['username']
botpassword = config['Chat']['password']
server_url = config['Chat']['URL']
room_id = config['Chat']['room_id']
bot_user_id = config['Chat']['bot_user_id']
spider_directory = config['Spider']['spider_directory']
# here comes the functions to talk to gpt
# For local streaming, the websockets are hosted without ssl - http://
HOST = 'localhost:5000'
URI = f'http://{HOST}/api/v1/chat'
#URI = f'http://{HOST}/api'
# http://192.168.9.197:5000/api/v1/chat
# For reverse-proxied streaming, the remote will likely host with ssl - https://
# URI = 'https://your-uri-here.trycloudflare.com/api/v1/chat'
if __name__ == '__main__':
api = RocketChatAPI(settings={'username': botname, 'password': botpassword, 'domain': server_url})
# api.send_message('Ciao, I am the fdb-spider', room_id)
#myinfo = api.get_my_info()
#room_history = api.get_private_room_history(room_id)
#print(room_history['messages'][0]['msg'])
# print(myinfo)
rooms = api.get_private_rooms()
print('blubidab oioioi', rooms)
# api.send_message('Ole', room_id)
n = 0
import time
import schedule
#change to False here, if you want to have the update run at server start
already_updated = True
start = True
import datetime
from datetime import timedelta
while True:
time.sleep(2)
#already_updated = True
now = datetime.datetime.now()
current_hour = now.strftime("%H")
#print(current_hour)
# run variable update and creation at start
if start == True:
room_list = []
rooms = os.listdir('rooms')
for room in rooms:
room_file = open('rooms/' + room, 'r')
room_file_raw = room_file.read()
room_file.close()
room_file_list = room_file_raw.split('§%§%')
print(room_list)
print(room)
print(room_file_list)
room_list.append([room[:-4], room_file_list[0], room_file_list[1], room_file_list[2]])
with open(spider_directory + '/spiders/config.yaml' , "r") as stream:
try:
config = yaml.safe_load(stream)
except yaml.YAMLError as exc:
print(exc)
fdb_list = []
#print(config)
for key in config:
fdb_list.append(key)
start = False
if int(current_hour) > 11:
aftersix = True
if int(current_hour) <= 11:
aftersix = False
already_updated = False
if aftersix == True and already_updated == False and aftersix == False:
room_list = []
rooms = os.listdir('rooms')
for room in rooms:
room_file = open('rooms/' + room, 'r')
room_file_raw = room_file.read()
room_file.close()
room_file_list = room_file_raw.split('§%§%')
room_list.append([room[:-4], room_file_list[0], room_file_list[1], room_file_list[2], room_file_list[3]])
with open(spider_directory + '/spiders/config.yaml' , "r") as stream:
try:
config = yaml.safe_load(stream)
except yaml.YAMLError as exc:
print(exc)
fdb_list = []
#print(config)
for key in config:
fdb_list.append(key)
data = dict({})
for room in room_list:
print(room[0])
room_fdbs = room[2]
room_tags = room[3]
room_deadline_days = room[4]
#subprocess.run(["python", spider_directory + 'main.py', fdbs])
room_history_list = []
try:
room_history_file = open('roomhistories/' + room[0] + '.txt', 'x')
room_history_file.close()
except:
print('reading from roomhistory')
with open('roomhistories/' + room[0] + '.txt') as room_history_file:
lines = room_history_file.readlines()
for line in lines:
room_history_list.append(line)
#date = datetime.datetime.now() - timedelta(days=3)
#room_history = api.get_room_history(room[0], oldest=date, latest=datetime.datetime.now())
for room_fdb in eval(room_fdbs):
#print('room_fdb',room_fdb, 'fdb_list',fdb_list)
try:
iteration_var_list = config.get(room_fdb).get("entry-list").get("iteration-var-list")
except Exception as e:
print('there was an error with the entry-list parameter in the config regarding the fdb ', room_fdb)
if room_fdb in fdb_list and room_fdb not in [key for key in data]:
iterdict = {}
for i in eval(iteration_var_list):
f = open(spider_directory + "/spiders/output/" + room_fdb + str(i) + "entryList.txt")
text = f.read()
dictionary_entry_list = eval(text)
iterdict[i] = dictionary_entry_list
data[room_fdb] = iterdict
for i in eval(iteration_var_list):
try:
print(room_fdb, i)
for key in data[room_fdb][i]:
contains_tag = False
period_data_formatted = dateutil.parser.parse(data[room_fdb][i][key]["period"])
name_data_lower = [word.lower() for word in data[room_fdb][i][key]["name"].split(' ')]
info_data_lower = [word.lower() for word in data[room_fdb][i][key]["info"].split(' ')]
text_data_lower = [word.lower() for word in data[room_fdb][i][key]["text"].split(' ')]
tag_list = []
for tag in eval(room_tags):
if '_' in tag:
ntags = tag.split('_')
ntags_length = len(ntags)
ntag_count = 0
for nword in name_data_lower:
for ntag in ntags:
if ntag.lower() in nword:
ntag_count += 1
if ntag_count == ntags_length:
contains_tag = True
tag_list.append(tag + ' in name')
ntag_count = 0
for iword in info_data_lower:
for ntag in ntags:
if ntag.lower() in iword:
ntag_count += 1
if ntag_count == ntags_length:
contains_tag = True
tag_list.append(tag + ' in info')
ntag_count = 0
for tword in text_data_lower:
ntag_count = 0
for ntag in ntags:
if ntag.lower() in tword:
ntag_count += 1
if ntag_count == ntags_length:
contains_tag = True
tag_list.append(tag + ' in text')
else:
for nword in name_data_lower:
if tag.lower() in nword:
contains_tag = True
tag_list.append(tag + ' in name')
for iword in info_data_lower:
if tag.lower() in iword:
contains_tag = True
tag_list.append(tag + ' in info')
for tword in text_data_lower:
if tag.lower() in tword:
contains_tag = True
tag_list.append(tag + ' in text')
if contains_tag == True:
try:
url = data[room_fdb][i][key]["domain"]
except:
url = data[room_fdb][i][key]["link"]
entry_message = '<' + url + '|' + data[room_fdb][i][key]["name"]+ '>' + '\n' + data[room_fdb][i][key]["info"] + '\n' + 'The period of the entry is:' + str(period_data_formatted) + '\n' + str(tag_list)
entry_message_identifier = '<' + url + '|' + data[room_fdb][i][key]["name"]+ '>'
message_was_already_there = False
for message in room_history_list:
print('message that gets checked with identifier ', message[:-1])
print('the entry message identifier is:', entry_message_identifier)
if url in message[:-1] or data[room_fdb][i][key]["name"] in message:
message_was_already_there = True
now = datetime.datetime.now()
now_formatted = dateutil.parser.parse(str(now))
delta = period_data_formatted - now_formatted
if message_was_already_there == False and delta.days < int(room_deadline_days):
print('went into already there false')
api.send_message(entry_message, room[0])
print('before writing')
try:
room_file = open('roomhistories/' + room[0] + '.txt', 'x')
room_file.close()
except:
print('appending to roomhistory')
room_file = open('roomhistories/' + room[0] + '.txt', 'a')
room_file.write(entry_message_identifier + '\n')
room_file.close()
print('after writing')
except Exception as e:
print("probably i was not there in last page, original error is:", e)
#print('data',data['giz'][2],'data')
already_updated = True
# if datestime.split over etc and updated= true etc
n += 1
if n%100 == 0:
print(n)
try:
#print('getting the room history')
date = datetime.datetime.now() - timedelta(days=3)
room_history = api.get_private_room_history(room_id, oldest=date)
except Exception as e:
time.sleep(10)
api = RocketChatAPI(settings={'username': botname, 'password': botpassword, 'domain': server_url})
time.sleep(5)
room_history = api.get_private_room_history(room_id, oldest=date)
print('got a connection error, original message is:',e)
messages_list = []
for message in room_history['messages']:
messages_list.append(message)
if len(messages_list) >= 1:
#print('blub', messages_list)
latest_message_user_id = messages_list[0]['u']['_id']
latest_message_user_username = messages_list[0]['u']['username']
latest_message = messages_list[0]['msg']
latest_message_id = messages_list[0]['_id']
new_message_file = open('new_message_file.txt', 'r')
new_message = new_message_file.read()
new_message_file.close()
new_message_list = new_message.split('§%§%')
#print(latest_message, new_message_list[0])
if new_message_list[0] != latest_message and new_message_list[1] != latest_message_id and latest_message_user_id != bot_user_id:
answer = 'Ich habe kein Kommando erhalten.'
new_message_file = open('new_message_file.txt', 'w')
new_message_file.write(latest_message + '§%§%' + latest_message_id)
new_message_file.close()
user_input = latest_message
user_input_list = user_input.split(' ')
if user_input_list[0] == 'addtags':
try:
room_id_add_tags = user_input_list[1]
except:
room_id_add_tags = 'NONE'
try:
new_tags = user_input_list[2]
except:
new_tags = 'NONE'
try:
thirdarg = user_input_list[3]
except:
thirdarg = 'NONE'
if len(room_id_add_tags) >= 1 and len(new_tags) >= 1 and thirdarg == 'NONE':
try:
room_file_add_tags = open('rooms/' + user_input_list[1] + '.txt', 'r')
room_info_raw = room_file_add_tags.read()
room_file_add_tags.close()
room_info = room_info_raw.split('§%§%')
tag_list = eval(room_info[-1])
for tag in eval(user_input_list[2]):
if tag not in tag_list:
tag_list.append(tag)
room_file_add_tags = open('rooms/' + user_input_list[1] + '.txt', 'w')
room_file_add_tags.write( str(room_info[0]) + '§%§%' + str(room_info[1]) + '§%§%' + str(tag_list))
room_file_add_tags.close()
answer = 'the updated tag list is' + str(tag_list)
except Exception as e:
print('error opening, original error is:', e)
answer = "The room_id to update the tags was not found"
if user_input_list[0] == 'printtags':
try:
room_id_to_print_tags = user_input_list[1]
except:
answer = "after the command printtags, the second argument has to be the room id.. use printrooms and look up the id of the room you want to print the tags"
room_id_to_print_tags = 'NONE'
if room_id_to_print_tags != 'NONE':
try:
room_file = open('rooms/' + room_id_to_print_tags + '.txt', 'r')
room_info_raw = room_file.read()
room_file.close()
room_info = room_info_raw.split('§%§%')
tag_list = eval(room_info[-1])
answer = tag_list
except Exception as e:
print('error opening, original error is:', e)
answer = "The room_id to get the tags was not found"
if user_input_list[0] == 'printcommands':
answer = """
To print all available rooms and their configuration, use
command : `printrooms`
-----------------------------------------------------------------------------------------
To print all available fdbs, use
command : `printfdbs`
-----------------------------------------------------------------------------------------
To update all rooms use
command : `updaterooms`
-----------------------------------------------------------------------------------------
To update one room use the room_id from the output of printrooms:
command : `updaterooms <room-id>`
example : `updaterooms 6572012bebb39dd248d08320`
-----------------------------------------------------------------------------------------
To create a room use following command, but be sure to **not have spaces** in your lists,
as a space indicates a new command argument:
command : `createroom <room-name> <list-fdbs> <list-tags> <days-to-deadline>`
example : `createroom room-test-1 ['giz','fdb2'] ['tag1','tag2','tag3'] 7`
-----------------------------------------------------------------------------------------
To delete a room use
command : `deleteroom <room-id>`
example : `deleteroom 6572012bebb39dd248d08320`
-----------------------------------------------------------------------------------------
To print the tags of a room use
command : `printtags <room-id>`
example : `printtags 6572012bebb39dd248d08320`
-----------------------------------------------------------------------------------------
To add tags to the existing tags use
command : `addtags <room-id> <tag-list-without-spaces>`
example : `addtags 6572012bebb39dd248d08320 ['tag1','tag2','tag3']`
"""
if user_input_list[0] == 'printrooms':
room_was_found = False
# get all the rooms
import os
room_list = []
rooms = os.listdir('rooms')
for room in rooms:
room_file = open('rooms/' + room, 'r')
room_file_raw = room_file.read()
room_file.close()
room_file_list = room_file_raw.split('§%§%')
room_list.append([room[:-4], room_file_list[0], room_file_list[1], room_file_list[2], room_file_list[3]])
room_list_string = ''
for room in room_list:
room_list_string += str(room) + '\n' + '------------------------------------------------------------------------------' + '\n'
#print(room_list)
answer = room_list_string
if user_input_list[0] == 'updaterooms':
try:
if len(user_input_list[1]) >= 1:
room_to_update = user_input_list[1]
except Exception as e:
room_to_update = 'NONE'
room_list = []
rooms = os.listdir('rooms')
for room in rooms:
room_file = open('rooms/' + room, 'r')
room_file_raw = room_file.read()
room_file.close()
room_file_list = room_file_raw.split('§%§%')
room_list.append([room[:-4], room_file_list[0], room_file_list[1], room_file_list[2], room_file_list[3]])
with open(spider_directory + '/spiders/config.yaml' , "r") as stream:
try:
config = yaml.safe_load(stream)
except yaml.YAMLError as exc:
print(exc)
fdb_list = []
#print(config)
for key in config:
fdb_list.append(key)
answer = "Ich update die Rooms auf Basis der Daten von heute morgen um 6 Uhr.."
data = dict({})
for room in room_list:
if room[0] == room_to_update or room_to_update == 'NONE':
room_was_found = True
print(room[0])
room_fdbs = room[2]
room_tags = room[3]
room_deadline_days = room[4]
# not running get_rooms because disfunctional
#subprocess.run(["python", spider_directory + 'main.py', fdbs])
#myinfo = api.get_room_info(room[0])
#print(myinfo)
#roomid = "'657cbeccebb39dd248d38ec3'"
#roomoioioi = api.get_user_info(bot_user_id)
#print(roomoioioi)
#room_history_updateroom = api.get_private_room_history(room[0])
# reading from txt state history instead
try:
room_history_file = open('roomhistories/' + room[0] + '.txt', 'x')
room_history_file.close()
except:
print('reading from roomhistory which is already there')
room_history_list = []
with open('roomhistories/' + room[0] + '.txt') as room_history_file:
lines = room_history_file.readlines()
for line in lines:
room_history_list.append(line)
#
#for message in room_history_raw:
# print(message)
for room_fdb in eval(room_fdbs):
#print('room_fdb',room_fdb, 'fdb_list',fdb_list)
try:
iteration_var_list = config.get(room_fdb).get("entry-list").get("iteration-var-list")
except Exception as e:
print('there was an error with the entry-list parameter in the config regarding the fdb ', room_fdb)
if room_fdb in fdb_list and room_fdb not in [key for key in data]:
iterdict = {}
for i in eval(iteration_var_list):
f = open(spider_directory + "/spiders/output/" + room_fdb + str(i) + "entryList.txt")
text = f.read()
dictionary_entry_list = eval(text)
iterdict[i] = dictionary_entry_list
data[room_fdb] = iterdict
for i in eval(iteration_var_list):
try:
print(room_fdb, i)
print('oioioioioiOIOIOI')
for key in data[room_fdb][i]:
contains_tag = False
period_data_formatted = dateutil.parser.parse(data[room_fdb][i][key]["period"])
name_data_lower = [word.lower() for word in data[room_fdb][i][key]["name"].split(' ')]
info_data_lower = [word.lower() for word in data[room_fdb][i][key]["info"].split(' ')]
text_data_lower = [word.lower() for word in data[room_fdb][i][key]["text"].split(' ')]
print('got until ONE')
tag_list = []
for tag in eval(room_tags):
print('got until TWO')
if '_' in tag:
ntags = tag.split('_')
ntags_length = len(ntags)
print(ntags)
ntag_count = 0
for ntag in ntags:
print('searching for ntag: ', ntag)
for nword in name_data_lower:
if ntag.lower() in nword:
if nword != '':
ntag_count += 1
print(ntag, ' ntag was found')
break
print('ntag count is ', ntag_count, 'ntag_length is ', ntags_length)
if ntag_count == ntags_length:
contains_tag = True
tag_list.append('_'.join(ntags) + ' in name')
ntag_count = 0
for ntag in ntags:
for iword in info_data_lower:
if ntag.lower() in iword:
if iword != '':
ntag_count += 1
break
if ntag_count == ntags_length:
contains_tag = True
tag_list.append('_'.join(ntags) + ' in info')
ntag_count = 0
for ntag in ntags:
for tword in text_data_lower:
if ntag.lower() in tword:
if tword != '':
ntag_count += 1
break
if ntag_count == ntags_length:
contains_tag = True
tag_list.append('_'.join(ntags) + ' in text')
else:
#print('------------------')
print(name_data_lower, info_data_lower, text_data_lower, room[0], room_tags)
nword_contains_tag = False
for nword in name_data_lower:
print(tag.lower(), nword)
if tag.lower() in nword:
print('goooot heeeere')
print(nword)
if nword != '':
print('goot behind nword check')
nword_contains_tag = True
if nword_contains_tag == True:
contains_tag = True
tag_list.append(tag + ' in name')
iword_contains_tag = False
for iword in info_data_lower:
if tag.lower() in iword:
if iword != '':
iword_contains_tag = True
print('oioiOIOIOIoioioiOIOIword', iword)
if iword_contains_tag == True:
contains_tag = True
tag_list.append(tag + ' in info')
tword_contains_tag = False
for tword in text_data_lower:
if tag.lower() in tword:
if tword != '':
tword_contains_tag = True
if tword_contains_tag == True:
contains_tag = True
tag_list.append(tag + ' in text')
print('got until THREE')
if contains_tag == True:
#print('------------------')
#print(name_data_lower, info_data_lower, text_data_lower)
try:
url = data[room_fdb][i][key]["domain"]
except:
url = data[room_fdb][i][key]["link"]
print('101110001101010010010101000111')
entry_message = '<' + url + '|' + data[room_fdb][i][key]["name"]+ '>' + '\n' + data[room_fdb][i][key]["info"] + '\n' + 'The period of the entry is:' + str(period_data_formatted) + '\n' + str(tag_list)
entry_message_identifier = '<' + url + '|' + data[room_fdb][i][key]["name"]+ '>'
message_was_already_there = False
for message in room_history_list:
print('message', message[:-1], 'identifier', entry_message_identifier)
if url in message[:-1] or data[room_fdb][i][key]["name"] in message:
message_was_already_there = True
now = datetime.datetime.now()
now_formatted = dateutil.parser.parse(str(now))
try:
delta = period_data_formatted - now_formatted
days_to_check = delta.days
except Exception as e:
days_to_check = int(room_deadline_days) - 1
print('calc of delta did now work, original error is:', e)
#print('delta', delta, int(delta.days))
if message_was_already_there == False and int(days_to_check) < int(room_deadline_days):
api.send_message(entry_message, room[0])
try:
room_file = open('roomhistories/' + room[0] + '.txt', 'x')
room_file.close()
except:
print('appending to roomhistory')
room_file = open('roomhistories/' + room[0] + '.txt', 'a')
room_file.write(entry_message_identifier + '\n')
room_file.close()
except Exception as e:
print("probably i was not there in last page, original error is:", e)
if room_to_update == 'NONE':
answer = 'No room was specified, all rooms will get an update based on the data available'
if room_was_found == False and room_to_update != 'NONE':
answer = 'The room you specified does not exist, or there are no rooms configured, try the command without argument -> updaterooms'
if room_was_found == True and room_to_update != 'NONE':
answer = "I am updating the specified room based on the data in fdb-spider/spiders/output.."
if user_input_list[0] == 'printfdbs':
answer = str(fdb_list)
if len(user_input_list) > 1:
answer = 'Die Syntax zum Ausgeben der konfigurierten Förderdatenbanken hat einen Fehler. Versuche es erneut, mit -> printfdbs'
if user_input_list[0] == 'deleteroom':
try:
room_id_del = user_input_list[1]
except Exception as e:
room_id_del = 'NONE'
print(e)
os.remove("rooms/" + room_id_del + ".txt")
anwer = 'deleting the requested room..'
if len(user_input_list) > 2:
answer = 'Die Syntax zum Löschen eines Raumes hat einen Fehler. Versuche es erneut, nach dem Muster -> deleteroom <room_id>'
if user_input_list[0] == 'createroom':
try:
room_name = user_input_list[1]
except Exception as e:
room_name = 'NONE'
print(e)
try:
databases = user_input_list[2]
except Exception as e:
databases = 'NONE'
print(e)
try:
filters = user_input_list[3]
except Exception as e:
filters = 'NONE'
print(e)
try:
days_to_deadline = user_input_list[4]
except Exception as e:
days_to_deadline = 'NONE'
print(e)
if len(user_input_list) > 4:
answer = 'Die Syntax hat einen Fehler. Wahrscheinlich sind Leerzeichen in den Listen. Leerzeichen definieren die Syntax. Versuche es erneut, nach dem Muster -> createroom room1 ["fdb1","fdb2"] ["tag1","tag2"]'
if room_name == 'NONE' or databases == 'NONE' or filters == 'NONE' or days_to_deadline == 'NONE':
answer = 'Um einen Raum zu erstellen, in dem neueste Einträge geteilt werden, lautet die Syntax: createroom <Raumname> <Liste-Datenbanken> <Liste-Filtertags> <Tage-bis-zur-Frist>'
else:
try:
new_room = api.create_public_room(room_name,
members=[latest_message_user_username],
read_only=False)
new_room_id = new_room['channel']['_id']
room_file = open('rooms/' + new_room_id + '.txt', 'w')
room_file.write( room_name + '§%§%' + databases + '§%§%' + filters + '§%§%' + days_to_deadline)
room_file.close()
answer = 'Der Command wurde übermittelt'
except Exception as e:
print('it was not able to create the room, the original error message is:', e)
answer = 'There was an error creating the room, look up the logs.. the original error was: ' + str(e)
#print('oi', user_input)
api.send_message('Die Anfrage wird bearbeitet..', room_id)
# here comes the code interacting with the spiders output json
#answer = 'the up to date entries are: oi Oi Oi!'
api.send_message(answer, room_id)
time.sleep(1)
api.send_message('Ich bin wieder bereit für Konfigurationsinput : )', room_id)
time.sleep(1)