The Interface of the fdb-spider, based on rocketchat
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 

722 lines
35 KiB

import os
from rocketchat.api import RocketChatAPI
import configparser
import json
import requests
import yaml
import subprocess
config = configparser.ConfigParser()
config.read('config.ini')
botname = config['Chat']['username']
botpassword = config['Chat']['password']
server_url = config['Chat']['URL']
room_id = config['Chat']['room_id']
bot_user_id = config['Chat']['bot_user_id']
spider_directory = config['Spider']['spider_directory']
# here comes the functions to talk to gpt
# For local streaming, the websockets are hosted without ssl - http://
HOST = 'localhost:5000'
URI = f'http://{HOST}/api/v1/chat'
#URI = f'http://{HOST}/api'
# http://192.168.9.197:5000/api/v1/chat
# For reverse-proxied streaming, the remote will likely host with ssl - https://
# URI = 'https://your-uri-here.trycloudflare.com/api/v1/chat'
if __name__ == '__main__':
api = RocketChatAPI(settings={'username': botname, 'password': botpassword, 'domain': server_url})
# api.send_message('Ciao, I am the fdb-spider', room_id)
#myinfo = api.get_my_info()
#room_history = api.get_private_room_history(room_id)
#print(room_history['messages'][0]['msg'])
# print(myinfo)
rooms = api.get_private_rooms()
print('blubidab oioioi', rooms)
# api.send_message('Ole', room_id)
n = 0
import time
import schedule
#change to False here, if you want to have the update run at server start
already_updated = True
start = True
import datetime
from datetime import timedelta
while True:
time.sleep(2)
#already_updated = True
now = datetime.datetime.now()
current_hour = now.strftime("%H")
#print(current_hour)
# run variable update and creation at start
if start == True:
room_list = []
rooms = os.listdir('rooms')
for room in rooms:
room_file = open('rooms/' + room, 'r')
room_file_raw = room_file.read()
room_file.close()
room_file_list = room_file_raw.split('§%§%')
print(room_list)
print(room)
print(room_file_list)
room_list.append([room[:-4], room_file_list[0], room_file_list[1], room_file_list[2]])
with open(spider_directory + '/spiders/config.yaml' , "r") as stream:
try:
config = yaml.safe_load(stream)
except yaml.YAMLError as exc:
print(exc)
fdb_list = []
#print(config)
for key in config:
fdb_list.append(key)
start = False
if int(current_hour) > 11:
aftersix = True
if int(current_hour) <= 11:
aftersix = False
already_updated = False
if aftersix == True and already_updated == False and aftersix == False:
room_list = []
rooms = os.listdir('rooms')
for room in rooms:
room_file = open('rooms/' + room, 'r')
room_file_raw = room_file.read()
room_file.close()
room_file_list = room_file_raw.split('§%§%')
room_list.append([room[:-4], room_file_list[0], room_file_list[1], room_file_list[2]])
with open(spider_directory + '/spiders/config.yaml' , "r") as stream:
try:
config = yaml.safe_load(stream)
except yaml.YAMLError as exc:
print(exc)
fdb_list = []
#print(config)
for key in config:
fdb_list.append(key)
data = dict({})
for room in room_list:
print(room[0])
room_fdbs = room[2]
room_tags = room[3]
#subprocess.run(["python", spider_directory + 'main.py', fdbs])
room_history_list = []
try:
room_history_file = open('roomhistories/' + room[0] + '.txt', 'x')
room_history_file.close()
except:
print('reading from roomhistory')
with open('roomhistories/' + room[0] + '.txt') as room_history_file:
lines = room_history_file.readlines()
for line in lines:
room_history_list.append(line)
#date = datetime.datetime.now() - timedelta(days=3)
#room_history = api.get_room_history(room[0], oldest=date, latest=datetime.datetime.now())
for room_fdb in eval(room_fdbs):
#print('room_fdb',room_fdb, 'fdb_list',fdb_list)
try:
iteration_var_list = config.get(room_fdb).get("entry-list").get("iteration-var-list")
except Exception as e:
print('there was an error with the entry-list parameter in the config regarding the fdb ', room_fdb)
if room_fdb in fdb_list and room_fdb not in [key for key in data]:
iterdict = {}
for i in eval(iteration_var_list):
f = open(spider_directory + "/spiders/output/" + room_fdb + str(i) + "entryList.txt")
text = f.read()
dictionary_entry_list = eval(text)
iterdict[i] = dictionary_entry_list
data[room_fdb] = iterdict
for i in eval(iteration_var_list):
try:
print(room_fdb, i)
for key in data[room_fdb][i]:
contains_tag = False
name_data_lower = [word.lower() for word in data[room_fdb][i][key]["name"].split(' ')]
info_data_lower = [word.lower() for word in data[room_fdb][i][key]["info"].split(' ')]
text_data_lower = [word.lower() for word in data[room_fdb][i][key]["text"].split(' ')]
tag_list = []
for tag in eval(room_tags):
if '_' in tag:
ntags = tag.split('_')
ntags_length = len(ntags)
ntag_count = 0
for nword in name_data_lower:
for ntag in ntags:
if ntag.lower() in nword:
ntag_count += 1
if ntag_count == ntags_length:
contains_tag = True
tag_list.append(tag + ' in name')
ntag_count = 0
for iword in info_data_lower:
for ntag in ntags:
if ntag.lower() in iword:
ntag_count += 1
if ntag_count == ntags_length:
contains_tag = True
tag_list.append(tag + ' in info')
ntag_count = 0
for tword in text_data_lower:
ntag_count = 0
for ntag in ntags:
if ntag.lower() in tword:
ntag_count += 1
if ntag_count == ntags_length:
contains_tag = True
tag_list.append(tag + ' in text')
else:
for nword in name_data_lower:
if tag.lower() in nword:
contains_tag = True
tag_list.append(tag + ' in name')
for iword in info_data_lower:
if tag.lower() in iword:
contains_tag = True
tag_list.append(tag + ' in info')
for tword in text_data_lower:
if tag.lower() in tword:
contains_tag = True
tag_list.append(tag + ' in text')
if contains_tag == True:
try:
url = data[room_fdb][i][key]["domain"]
except:
url = data[room_fdb][i][key]["link"]
entry_message = '<' + url + '|' + data[room_fdb][i][key]["name"]+ '>' + '\n' + data[room_fdb][i][key]["info"] + '\n' + str(tag_list)
entry_message_identifier = '<' + url + '|' + data[room_fdb][i][key]["name"]+ '>'
message_was_already_there = False
for message in room_history_list:
print('message that gets checked with identifier ', message[:-1])
print('the entry message identifier is:', entry_message_identifier)
if url in message[:-1] or data[room_fdb][i][key]["name"] in message:
message_was_already_there = True
if message_was_already_there == False:
print('went into already there false')
api.send_message(entry_message, room[0])
print('before writing')
try:
room_file = open('roomhistories/' + room[0] + '.txt', 'x')
room_file.close()
except:
print('appending to roomhistory')
room_file = open('roomhistories/' + room[0] + '.txt', 'a')
room_file.write(entry_message_identifier + '\n')
room_file.close()
print('after writing')
except Exception as e:
print("probably i was not there in last page, original error is:", e)
#print('data',data['giz'][2],'data')
already_updated = True
# if datestime.split over etc and updated= true etc
n += 1
if n%100 == 0:
print(n)
try:
#print('getting the room history')
date = datetime.datetime.now() - timedelta(days=3)
room_history = api.get_private_room_history(room_id, oldest=date)
except Exception as e:
time.sleep(10)
api = RocketChatAPI(settings={'username': botname, 'password': botpassword, 'domain': server_url})
time.sleep(5)
room_history = api.get_private_room_history(room_id, oldest=date)
print('got a connection error, original message is:',e)
messages_list = []
for message in room_history['messages']:
messages_list.append(message)
if len(messages_list) >= 1:
#print('blub', messages_list)
latest_message_user_id = messages_list[0]['u']['_id']
latest_message_user_username = messages_list[0]['u']['username']
latest_message = messages_list[0]['msg']
latest_message_id = messages_list[0]['_id']
new_message_file = open('new_message_file.txt', 'r')
new_message = new_message_file.read()
new_message_file.close()
new_message_list = new_message.split('§%§%')
#print(latest_message, new_message_list[0])
if new_message_list[0] != latest_message and new_message_list[1] != latest_message_id and latest_message_user_id != bot_user_id:
answer = 'Ich habe kein Kommando erhalten.'
new_message_file = open('new_message_file.txt', 'w')
new_message_file.write(latest_message + '§%§%' + latest_message_id)
new_message_file.close()
user_input = latest_message
user_input_list = user_input.split(' ')
if user_input_list[0] == 'addtags':
try:
room_id_add_tags = user_input_list[1]
except:
room_id_add_tags = 'NONE'
try:
new_tags = user_input_list[2]
except:
new_tags = 'NONE'
try:
thirdarg = user_input_list[3]
except:
thirdarg = 'NONE'
if len(room_id_add_tags) >= 1 and len(new_tags) >= 1 and thirdarg == 'NONE':
try:
room_file_add_tags = open('rooms/' + user_input_list[1] + '.txt', 'r')
room_info_raw = room_file_add_tags.read()
room_file_add_tags.close()
room_info = room_info_raw.split('§%§%')
tag_list = eval(room_info[-1])
for tag in eval(user_input_list[2]):
if tag not in tag_list:
tag_list.append(tag)
room_file_add_tags = open('rooms/' + user_input_list[1] + '.txt', 'w')
room_file_add_tags.write( str(room_info[0]) + '§%§%' + str(room_info[1]) + '§%§%' + str(tag_list))
room_file_add_tags.close()
answer = 'the updated tag list is' + str(tag_list)
except Exception as e:
print('error opening, original error is:', e)
answer = "The room_id to update the tags was not found"
if user_input_list[0] == 'printtags':
try:
room_id_to_print_tags = user_input_list[1]
except:
answer = "after the command printtags, the second argument has to be the room id.. use printrooms and look up the id of the room you want to print the tags"
room_id_to_print_tags = 'NONE'
if room_id_to_print_tags != 'NONE':
try:
room_file = open('rooms/' + room_id_to_print_tags + '.txt', 'r')
room_info_raw = room_file.read()
room_file.close()
room_info = room_info_raw.split('§%§%')
tag_list = eval(room_info[-1])
answer = tag_list
except Exception as e:
print('error opening, original error is:', e)
answer = "The room_id to get the tags was not found"
if user_input_list[0] == 'printcommands':
answer = """
To print all available rooms and their configuration, use
command : `printrooms`
-----------------------------------------------------------------------------------------
To print all available fdbs, use
command : `printfdbs`
-----------------------------------------------------------------------------------------
To update all rooms use
command : `updaterooms`
-----------------------------------------------------------------------------------------
To update one room use the room_id from the output of printrooms:
command : `updaterooms <room-id>`
example : `updaterooms 6572012bebb39dd248d08320`
-----------------------------------------------------------------------------------------
To create a room use following command, but be sure to not have spaces in your lists,
as a space indicates a new command argument:
command : `createroom <room-name> <list-fdbs> <list-tags>`
example : `createroom room-test-1 ['giz','fdb2'] ['tag1','tag2','tag3']`
-----------------------------------------------------------------------------------------
To delete a room use
command : `deleteroom <room-id>`
example : `deleteroom 6572012bebb39dd248d08320`
-----------------------------------------------------------------------------------------
To print the tags of a room use
command : `printtags <room-id>`
example : `printtags 6572012bebb39dd248d08320`
-----------------------------------------------------------------------------------------
To add tags to the existing tags use
command : `addtags <room-id> <tag-list-without-spaces>`
example : `addtags 6572012bebb39dd248d08320 ['tag1','tag2','tag3']`
"""
if user_input_list[0] == 'printrooms':
room_was_found = False
# get all the rooms
import os
room_list = []
rooms = os.listdir('rooms')
for room in rooms:
room_file = open('rooms/' + room, 'r')
room_file_raw = room_file.read()
room_file.close()
room_file_list = room_file_raw.split('§%§%')
room_list.append([room[:-4], room_file_list[0], room_file_list[1], room_file_list[2]])
#print(room_list)
answer = str(room_list)
if user_input_list[0] == 'updaterooms':
try:
if len(user_input_list[1]) >= 1:
room_to_update = user_input_list[1]
except Exception as e:
room_to_update = 'NONE'
room_list = []
rooms = os.listdir('rooms')
for room in rooms:
room_file = open('rooms/' + room, 'r')
room_file_raw = room_file.read()
room_file.close()
room_file_list = room_file_raw.split('§%§%')
room_list.append([room[:-4], room_file_list[0], room_file_list[1], room_file_list[2]])
with open(spider_directory + '/spiders/config.yaml' , "r") as stream:
try:
config = yaml.safe_load(stream)
except yaml.YAMLError as exc:
print(exc)
fdb_list = []
#print(config)
for key in config:
fdb_list.append(key)
answer = "Ich update die Rooms auf Basis der Daten von heute morgen um 6 Uhr.."
data = dict({})
for room in room_list:
if room[0] == room_to_update or room_to_update == 'NONE':
room_was_found = True
print(room[0])
room_fdbs = room[2]
room_tags = room[3]
# not running get_rooms because disfunctional
#subprocess.run(["python", spider_directory + 'main.py', fdbs])
#myinfo = api.get_room_info(room[0])
#print(myinfo)
#roomid = "'657cbeccebb39dd248d38ec3'"
#roomoioioi = api.get_user_info(bot_user_id)
#print(roomoioioi)
#room_history_updateroom = api.get_private_room_history(room[0])
# reading from txt state history instead
try:
room_history_file = open('roomhistories/' + room[0] + '.txt', 'x')
room_history_file.close()
except:
print('reading from roomhistory which is already there')
room_history_list = []
with open('roomhistories/' + room[0] + '.txt') as room_history_file:
lines = room_history_file.readlines()
for line in lines:
room_history_list.append(line)
#
#for message in room_history_raw:
# print(message)
for room_fdb in eval(room_fdbs):
#print('room_fdb',room_fdb, 'fdb_list',fdb_list)
try:
iteration_var_list = config.get(room_fdb).get("entry-list").get("iteration-var-list")
except Exception as e:
print('there was an error with the entry-list parameter in the config regarding the fdb ', room_fdb)
if room_fdb in fdb_list and room_fdb not in [key for key in data]:
iterdict = {}
for i in eval(iteration_var_list):
f = open(spider_directory + "/spiders/output/" + room_fdb + str(i) + "entryList.txt")
text = f.read()
dictionary_entry_list = eval(text)
iterdict[i] = dictionary_entry_list
data[room_fdb] = iterdict
for i in eval(iteration_var_list):
try:
print(room_fdb, i)
for key in data[room_fdb][i]:
contains_tag = False
name_data_lower = [word.lower() for word in data[room_fdb][i][key]["name"].split(' ')]
info_data_lower = [word.lower() for word in data[room_fdb][i][key]["info"].split(' ')]
text_data_lower = [word.lower() for word in data[room_fdb][i][key]["text"].split(' ')]
tag_list = []
for tag in eval(room_tags):
if '_' in tag:
ntags = tag.split('_')
ntags_length = len(ntags)
ntag_count = 0
for nword in name_data_lower:
for ntag in ntags:
if ntag.lower() in nword and nword != '':
ntag_count += 1
if ntag_count == ntags_length:
contains_tag = True
tag_list.append(ntags.join('_') + ' in name')
ntag_count = 0
for iword in info_data_lower:
for ntag in ntags:
if ntag.lower() in iword and iword != '':
ntag_count += 1
if ntag_count == ntags_length:
contains_tag = True
tag_list.append(ntags.join('_') + ' in info')
ntag_count = 0
for tword in text_data_lower:
ntag_count = 0
for ntag in ntags:
if ntag.lower() in tword and tword != '':
ntag_count += 1
if ntag_count == ntags_length:
contains_tag = True
tag_list.append(ntags.join('_') + ' in text')
else:
#print('------------------')
#print(name_data_lower, info_data_lower, text_data_lower, room[0], room_tags)
for nword in name_data_lower:
if tag.lower() in nword and nword != '':
contains_tag = True
if contains_tag == True:
tag_list.append(tag + ' in name')
for iword in info_data_lower:
if tag.lower() in iword and iword != '':
contains_tag = True
if contains_tag == True:
tag_list.append(tag + ' in info')
for tword in text_data_lower::
if tag.lower() in tword and tword != '':
contains_tag = True
if contains_tag == True:
tag_list.append(tag + ' in text')
if contains_tag == True:
print('------------------')
print(name_data_lower, info_data_lower, text_data_lower)
try:
url = data[room_fdb][i][key]["domain"]
except:
url = data[room_fdb][i][key]["link"]
print('101110001101010010010101000111')
entry_message = '<' + url + '|' + data[room_fdb][i][key]["name"]+ '>' + '\n' + data[room_fdb][i][key]["info"] + '\n' + str(tag_list)
entry_message_identifier = '<' + url + '|' + data[room_fdb][i][key]["name"]+ '>'
message_was_already_there = False
for message in room_history_list:
print('message', message[:-1], 'identifier', entry_message_identifier)
if url in message[:-1] or data[room_fdb][i][key]["name"] in message:
message_was_already_there = True
if message_was_already_there == False:
api.send_message(entry_message, room[0])
try:
room_file = open('roomhistories/' + room[0] + '.txt', 'x')
room_file.close()
except:
print('appending to roomhistory')
room_file = open('roomhistories/' + room[0] + '.txt', 'a')
room_file.write(entry_message_identifier + '\n')
room_file.close()
except Exception as e:
print("probably i was not there in last page, original error is:", e)
if room_to_update == 'NONE':
answer = 'No room was specified, all rooms will get an update based on the data available'
if room_was_found == False and room_to_update != 'NONE':
answer = 'The room you specified does not exist, or there are no rooms configured, try the command without argument -> updaterooms'
if room_was_found == True and room_to_update != 'NONE':
answer = "I am updating the specified room based on the data in fdb-spider/spiders/output.."
if user_input_list[0] == 'printfdbs':
answer = str(fdb_list)
if len(user_input_list) > 1:
answer = 'Die Syntax zum Ausgeben der konfigurierten Förderdatenbanken hat einen Fehler. Versuche es erneut, mit -> printfdbs'
if user_input_list[0] == 'deleteroom':
try:
room_id_del = user_input_list[1]
except Exception as e:
room_id_del = 'NONE'
print(e)
os.remove("rooms/" + room_id_del + ".txt")
anwer = 'deleting the requested room..'
if len(user_input_list) > 2:
answer = 'Die Syntax zum Löschen eines Raumes hat einen Fehler. Versuche es erneut, nach dem Muster -> deleteroom <room_id>'
if user_input_list[0] == 'createroom':
try:
room_name = user_input_list[1]
except Exception as e:
room_name = 'NONE'
print(e)
try:
databases = user_input_list[2]
except Exception as e:
databases = 'NONE'
print(e)
try:
filters = user_input_list[3]
except Exception as e:
filters = 'NONE'
print(e)
if len(user_input_list) > 3:
answer = 'Die Syntax hat einen Fehler. Wahrscheinlich sind Leerzeichen in den Listen. Leerzeichen definieren die Syntax. Versuche es erneut, nach dem Muster -> createroom room1 ["fdb1","fdb2"] ["tag1","tag2"]'
if room_name == 'NONE' or databases == 'NONE' or filters == 'NONE':
answer = 'Um einen Raum zu erstellen, in dem neueste Einträge geteilt werden, lautet die Syntax: createroom <Raumname> <Liste-Datenbanken> <Liste-Filtertags>'
else:
try:
new_room = api.create_public_room(room_name,
members=[latest_message_user_username],
read_only=False)
new_room_id = new_room['channel']['_id']
room_file = open('rooms/' + new_room_id + '.txt', 'w')
room_file.write( room_name + '§%§%' + databases + '§%§%' + filters)
room_file.close()
answer = 'Der Command wurde übermittelt'
except Exception as e:
print('it was not able to create the room, the original error message is:', e)
answer = 'There was an error creating the room, look up the logs.. the original error was: ' + str(e)
#print('oi', user_input)
api.send_message('Die Anfrage wird bearbeitet..', room_id)
# here comes the code interacting with the spiders output json
#answer = 'the up to date entries are: oi Oi Oi!'
api.send_message(answer, room_id)
time.sleep(1)
api.send_message('Ich bin wieder bereit für Konfigurationsinput : )', room_id)
time.sleep(1)