fdb-spider-interface/fdb_spider_interface.py

871 lines
44 KiB
Python
Raw Permalink Normal View History

import os
from rocketchat.api import RocketChatAPI
import configparser
import json
import requests
import yaml
import dateutil.parser
import subprocess
config = configparser.ConfigParser()
config.read('config.ini')
botname = config['Chat']['username']
botpassword = config['Chat']['password']
server_url = config['Chat']['URL']
room_id = config['Chat']['room_id']
2023-11-29 19:54:43 +01:00
bot_user_id = config['Chat']['bot_user_id']
spider_directory = config['Spider']['spider_directory']
# here comes the functions to talk to gpt
# For local streaming, the websockets are hosted without ssl - http://
HOST = 'localhost:5000'
URI = f'http://{HOST}/api/v1/chat'
#URI = f'http://{HOST}/api'
# http://192.168.9.197:5000/api/v1/chat
# For reverse-proxied streaming, the remote will likely host with ssl - https://
# URI = 'https://your-uri-here.trycloudflare.com/api/v1/chat'
if __name__ == '__main__':
def name2id(name):
# get all the rooms
import os
room_list = []
rooms = os.listdir('rooms')
for room in rooms:
room_file = open('rooms/' + room, 'r')
room_file_raw = room_file.read()
room_file.close()
room_file_list = room_file_raw.split('§%§%')
room_list.append([room[:-4], room_file_list[0], room_file_list[1], room_file_list[2], room_file_list[3]])
outputid = 'NONE'
for roo in room_list:
if roo[1] == name:
outputid = roo[0]
return str(outputid)
api = RocketChatAPI(settings={'username': botname, 'password': botpassword, 'domain': server_url})
2023-12-05 17:18:48 +01:00
2023-11-29 19:54:43 +01:00
# api.send_message('Ciao, I am the fdb-spider', room_id)
#myinfo = api.get_my_info()
2023-11-29 19:54:43 +01:00
#room_history = api.get_private_room_history(room_id)
#print(room_history['messages'][0]['msg'])
2023-12-05 17:18:48 +01:00
# print(myinfo)
rooms = api.get_private_rooms()
print('blubidab oioioi', rooms)
# api.send_message('Ole', room_id)
n = 0
2023-12-05 17:18:48 +01:00
import time
import schedule
2023-12-05 17:18:48 +01:00
#change to False here, if you want to have the update run at server start
already_updated = True
start = True
import datetime
from datetime import timedelta
while True:
2024-07-10 22:22:00 +02:00
time.sleep(3)
#already_updated = True
now = datetime.datetime.now()
current_hour = now.strftime("%H")
#print(current_hour)
2023-12-05 17:18:48 +01:00
# run variable update and creation at start
if start == True:
room_list = []
rooms = os.listdir('rooms')
for room in rooms:
room_file = open('rooms/' + room, 'r')
room_file_raw = room_file.read()
room_file.close()
room_file_list = room_file_raw.split('§%§%')
#print(room_list)
#print(room)
#print(room_file_list)
room_list.append([room[:-4], room_file_list[0], room_file_list[1], room_file_list[2]])
with open(spider_directory + '/spiders/config.yaml' , "r") as stream:
try:
config = yaml.safe_load(stream)
except yaml.YAMLError as exc:
print(exc)
fdb_list = []
#print(config)
for key in config:
fdb_list.append(key)
start = False
if int(current_hour) > 11:
aftersix = True
2023-12-05 17:18:48 +01:00
if int(current_hour) <= 11:
aftersix = False
already_updated = False
if aftersix == True and already_updated == False and aftersix == False:
room_list = []
rooms = os.listdir('rooms')
for room in rooms:
room_file = open('rooms/' + room, 'r')
room_file_raw = room_file.read()
room_file.close()
room_file_list = room_file_raw.split('§%§%')
2024-01-22 16:05:21 +01:00
room_list.append([room[:-4], room_file_list[0], room_file_list[1], room_file_list[2], room_file_list[3]])
with open(spider_directory + '/spiders/config.yaml' , "r") as stream:
try:
config = yaml.safe_load(stream)
except yaml.YAMLError as exc:
print(exc)
fdb_list = []
#print(config)
2023-12-05 17:18:48 +01:00
for key in config:
fdb_list.append(key)
2023-12-05 17:18:48 +01:00
data = dict({})
for room in room_list:
#print(room[0])
room_fdbs = room[2]
room_tags = room[3]
2024-01-22 16:05:21 +01:00
room_deadline_days = room[4]
#subprocess.run(["python", spider_directory + 'main.py', fdbs])
room_history_list = []
try:
room_history_file = open('roomhistories/' + room[0] + '.txt', 'x')
room_history_file.close()
except:
print('reading from roomhistory')
with open('roomhistories/' + room[0] + '.txt') as room_history_file:
lines = room_history_file.readlines()
for line in lines:
room_history_list.append(line)
#date = datetime.datetime.now() - timedelta(days=3)
#room_history = api.get_room_history(room[0], oldest=date, latest=datetime.datetime.now())
for room_fdb in eval(room_fdbs):
#print('room_fdb',room_fdb, 'fdb_list',fdb_list)
try:
iteration_var_list = config.get(room_fdb).get("entry-list").get("iteration-var-list")
except Exception as e:
print('there was an error with the entry-list parameter in the config regarding the fdb ', room_fdb)
if room_fdb in fdb_list and room_fdb not in [key for key in data]:
iterdict = {}
for i in eval(iteration_var_list):
f = open(spider_directory + "/spiders/output/" + room_fdb + str(i) + "entryList.txt")
text = f.read()
dictionary_entry_list = eval(text)
iterdict[i] = dictionary_entry_list
data[room_fdb] = iterdict
for i in eval(iteration_var_list):
try:
#print(room_fdb, i)
for key in data[room_fdb][i]:
contains_tag = False
period_data_formatted = dateutil.parser.parse(data[room_fdb][i][key]["period"])
name_data_lower = [word.lower() for word in data[room_fdb][i][key]["name"].split(' ')]
info_data_lower = [word.lower() for word in data[room_fdb][i][key]["info"].split(' ')]
try:
text_data_lower = [word.lower() for word in data[room_fdb][i][key]["text"].split(' ')]
except Exception as e:
print(e)
text_data_lower = ['none']
tag_list = []
for tag in eval(room_tags):
if '_' in tag:
ntags = tag.split('_')
ntags_length = len(ntags)
ntag_count = 0
for nword in name_data_lower:
for ntag in ntags:
if ntag.lower() in nword:
ntag_count += 1
if ntag_count == ntags_length:
contains_tag = True
tag_list.append(tag + ' in name')
ntag_count = 0
for iword in info_data_lower:
for ntag in ntags:
if ntag.lower() in iword:
ntag_count += 1
if ntag_count == ntags_length:
contains_tag = True
tag_list.append(tag + ' in info')
ntag_count = 0
for tword in text_data_lower:
ntag_count = 0
for ntag in ntags:
if ntag.lower() in tword:
ntag_count += 1
if ntag_count == ntags_length:
contains_tag = True
tag_list.append(tag + ' in text')
else:
for nword in name_data_lower:
if tag.lower() in nword:
contains_tag = True
tag_list.append(tag + ' in name')
for iword in info_data_lower:
if tag.lower() in iword:
contains_tag = True
tag_list.append(tag + ' in info')
for tword in text_data_lower:
if tag.lower() in tword:
contains_tag = True
tag_list.append(tag + ' in text')
if contains_tag == True:
try:
url = data[room_fdb][i][key]["domain"]
except:
url = data[room_fdb][i][key]["link"]
entry_message = '<' + url + '|' + data[room_fdb][i][key]["name"]+ '>' + '\n' + data[room_fdb][i][key]["info"] + '\n' + 'The period of the entry is:' + str(period_data_formatted) + '\n' + str(tag_list)
entry_message_identifier = '<' + url + '|' + data[room_fdb][i][key]["name"]+ '>'
message_was_already_there = False
for message in room_history_list:
#print('message that gets checked with identifier ', message[:-1])
#print('the entry message identifier is:', entry_message_identifier)
if url in message[:-1] or data[room_fdb][i][key]["name"] in message:
message_was_already_there = True
2024-01-22 16:05:21 +01:00
now = datetime.datetime.now()
now_formatted = dateutil.parser.parse(str(now))
delta = period_data_formatted - now_formatted
2024-01-22 16:05:21 +01:00
if message_was_already_there == False and delta.days < int(room_deadline_days):
#print('went into already there false')
api.send_message(entry_message, room[0])
#print('before writing')
try:
room_file = open('roomhistories/' + room[0] + '.txt', 'x')
room_file.close()
except:
print('appending to roomhistory')
room_file = open('roomhistories/' + room[0] + '.txt', 'a')
room_file.write(entry_message_identifier + '\n')
room_file.close()
print('after writing')
except Exception as e:
print("probably i was not there in last page, original error is:", e)
#print('data',data['giz'][2],'data')
already_updated = True
# if datestime.split over etc and updated= true etc
n += 1
if n%100 == 0:
print(n)
try:
2023-12-05 17:18:48 +01:00
#print('getting the room history')
date = datetime.datetime.now() - timedelta(days=3)
room_history = api.get_private_room_history(room_id, oldest=date)
except Exception as e:
time.sleep(10)
api = RocketChatAPI(settings={'username': botname, 'password': botpassword, 'domain': server_url})
time.sleep(5)
room_history = api.get_private_room_history(room_id, oldest=date)
print('got a connection error, original message is:',e)
messages_list = []
for message in room_history['messages']:
messages_list.append(message)
2023-12-05 17:18:48 +01:00
if len(messages_list) >= 1:
2023-12-05 17:18:48 +01:00
#print('blub', messages_list)
latest_message_user_id = messages_list[0]['u']['_id']
2023-12-05 17:18:48 +01:00
latest_message_user_username = messages_list[0]['u']['username']
latest_message = messages_list[0]['msg']
latest_message_id = messages_list[0]['_id']
new_message_file = open('new_message_file.txt', 'r')
new_message = new_message_file.read()
new_message_file.close()
new_message_list = new_message.split('§%§%')
2023-12-05 17:18:48 +01:00
#print(latest_message, new_message_list[0])
2023-11-29 19:54:43 +01:00
if new_message_list[0] != latest_message and new_message_list[1] != latest_message_id and latest_message_user_id != bot_user_id:
2023-12-05 17:18:48 +01:00
answer = 'Ich habe kein Kommando erhalten.'
new_message_file = open('new_message_file.txt', 'w')
new_message_file.write(latest_message + '§%§%' + latest_message_id)
new_message_file.close()
user_input = latest_message
2023-12-05 17:18:48 +01:00
user_input_list = user_input.split(' ')
if user_input_list[0] == 'addtags':
try:
room_name_add_tags = user_input_list[1]
room_id_add_tags = name2id(room_name_add_tags)
#print(room_id_add_tags)
except:
room_id_add_tags = 'NONE'
try:
new_tags = user_input_list[2]
except:
new_tags = 'NONE'
try:
thirdarg = user_input_list[3]
except:
thirdarg = 'NONE'
if room_id_add_tags != 'NONE' and len(new_tags) >= 1 and thirdarg == 'NONE':
try:
room_file_add_tags = open('rooms/' + room_id_add_tags + '.txt', 'r')
room_info_raw = room_file_add_tags.read()
room_file_add_tags.close()
room_info = room_info_raw.split('§%§%')
2024-02-06 15:54:02 +01:00
tag_list = eval(room_info[-2])
for tag in eval(user_input_list[2]):
if tag not in tag_list:
tag_list.append(tag)
room_file_add_tags = open('rooms/' + room_id_add_tags + '.txt', 'w')
2024-02-06 15:54:02 +01:00
room_file_add_tags.write( str(room_info[0]) + '§%§%' + str(room_info[1]) + '§%§%' + str(tag_list) + '§%§%' + str(room_info[-1]))
room_file_add_tags.close()
answer = 'the updated tag list is' + str(tag_list)
except Exception as e:
print('error opening, original error is:', e)
answer = "The room_id to update the tags was not found"
2024-07-10 22:22:00 +02:00
if user_input_list[0] == 'addfdbs':
print('oi')
print(user_input_list)
try:
room_name_add_fdbs = user_input_list[1]
print(room_name_add_fdbs)
room_id_add_fdbs = name2id(room_name_add_fdbs)
print(room_name_add_fdbs)
print(room_id_add_fdbs)
except Exception as e:
room_id_add_fdbs = 'NONE'
print(e, 'did not work')
try:
new_fdbs = user_input_list[2]
except:
new_fdbs = 'NONE'
try:
thirdarg = user_input_list[3]
except:
thirdarg = 'NONE'
if room_id_add_fdbs != 'NONE' and len(new_fdbs) >= 1 and thirdarg == 'NONE':
try:
room_file_add_fdbs = open('rooms/' + room_id_add_fdbs + '.txt', 'r')
room_info_raw = room_file_add_fdbs.read()
room_file_add_fdbs.close()
room_info = room_info_raw.split('§%§%')
fdb_list = eval(room_info[-3])
for fdb in eval(user_input_list[2]):
if fdb not in fdb_list:
fdb_list.append(fdb)
room_file_add_fdbs = open('rooms/' + room_id_add_fdbs + '.txt', 'w')
room_file_add_fdbs.write( str(room_info[0]) + '§%§%' + str(fdb_list) + '§%§%' + str(room_info[2]) + '§%§%' + str(room_info[-1]))
room_file_add_fdbs.close()
answer = 'the updated fdb list is' + str(fdb_list)
except Exception as e:
print('error opening, original error is:', e)
answer = "The room_id to update the fdbs was not found"
if user_input_list[0] == 'printtags':
try:
room_name_add_tags = user_input_list[1]
room_id_to_print_tags = name2id(room_name_add_tags)
except:
answer = "after the command printtags, the second argument has to be the room name.. use printrooms and look up the id of the room you want to print the tags"
room_id_to_print_tags = 'NONE'
if room_id_to_print_tags != 'NONE':
try:
room_file = open('rooms/' + room_id_to_print_tags + '.txt', 'r')
room_info_raw = room_file.read()
room_file.close()
room_info = room_info_raw.split('§%§%')
tag_list = eval(room_info[-2])
answer = tag_list
except Exception as e:
print('error opening, original error is:', e)
answer = "The room_id to get the tags was not found"
if user_input_list[0] == 'printcommands':
answer = """
To print all available rooms and their configuration, use
command : `printrooms`
-----------------------------------------------------------------------------------------
To print all available fdbs, use
command : `printfdbs`
-----------------------------------------------------------------------------------------
To update all rooms use
command : `updaterooms all`
-----------------------------------------------------------------------------------------
To update one room use the name of the room from the output of printrooms:
command : `updaterooms <room-name>`
example : `updaterooms test42`
-----------------------------------------------------------------------------------------
To create a room use following command, but be sure to **not have spaces** in your lists,
as a space indicates a new command argument:
2024-01-22 16:05:21 +01:00
command : `createroom <room-name> <list-fdbs> <list-tags> <days-to-deadline>`
example : `createroom room-test-1 ['giz','fdb2'] ['tag1','tag2','tag3'] 7`
-----------------------------------------------------------------------------------------
To delete a room use
command : `deleteroom <room-name>`
example : `deleteroom test42`
-----------------------------------------------------------------------------------------
To print the tags of a room use
command : `printtags <room-name>`
example : `printtags test42`
-----------------------------------------------------------------------------------------
To add tags to the existing tags use
command : `addtags <room-name> <tag-list-without-spaces>`
example : `addtags test42 ['tag1','tag2','tag3']`
"""
2023-12-05 17:18:48 +01:00
if user_input_list[0] == 'printrooms':
room_was_found = False
# get all the rooms
import os
room_list = []
rooms = os.listdir('rooms')
for room in rooms:
room_file = open('rooms/' + room, 'r')
room_file_raw = room_file.read()
room_file.close()
room_file_list = room_file_raw.split('§%§%')
room_list.append([room[:-4], room_file_list[0], room_file_list[1], room_file_list[2], room_file_list[3]])
room_list_string = ''
for room in room_list:
room_list_string += str(room) + '\n' + '------------------------------------------------------------------------------' + '\n'
#print(room_list)
answer = room_list_string
2023-12-05 17:18:48 +01:00
2023-12-12 12:41:50 +01:00
if user_input_list[0] == 'updaterooms':
try:
if len(user_input_list[1]) >= 1:
room_name_to_update = user_input_list[1]
room_to_update = name2id(room_name_to_update)
#room_to_update = user_input_list[1]
except Exception as e:
room_to_update = 'NONE'
room_list = []
rooms = os.listdir('rooms')
for room in rooms:
room_file = open('rooms/' + room, 'r')
room_file_raw = room_file.read()
room_file.close()
room_file_list = room_file_raw.split('§%§%')
2024-01-22 16:05:21 +01:00
room_list.append([room[:-4], room_file_list[0], room_file_list[1], room_file_list[2], room_file_list[3]])
with open(spider_directory + '/spiders/config.yaml' , "r") as stream:
try:
config = yaml.safe_load(stream)
except yaml.YAMLError as exc:
print(exc)
fdb_list = []
#print(config)
for key in config:
fdb_list.append(key)
2023-12-12 12:41:50 +01:00
answer = "Ich update die Rooms auf Basis der Daten von heute morgen um 6 Uhr.."
2023-12-12 12:41:50 +01:00
data = dict({})
for room in room_list:
if room[0] == room_to_update or room_to_update == 'all':
room_was_found = True
2024-07-10 22:22:00 +02:00
print('oioioiOI', room[0])
room_fdbs = room[2]
room_tags = room[3]
2024-01-22 16:05:21 +01:00
room_deadline_days = room[4]
# not running get_rooms because disfunctional
#subprocess.run(["python", spider_directory + 'main.py', fdbs])
2023-12-12 12:41:50 +01:00
#myinfo = api.get_room_info(room[0])
#print(myinfo)
#roomid = "'657cbeccebb39dd248d38ec3'"
#roomoioioi = api.get_user_info(bot_user_id)
#print(roomoioioi)
#room_history_updateroom = api.get_private_room_history(room[0])
# reading from txt state history instead
try:
room_history_file = open('roomhistories/' + room[0] + '.txt', 'x')
room_history_file.close()
except:
print('reading from roomhistory which is already there')
room_history_list = []
with open('roomhistories/' + room[0] + '.txt') as room_history_file:
lines = room_history_file.readlines()
for line in lines:
room_history_list.append(line)
#
#for message in room_history_raw:
# print(message)
for room_fdb in eval(room_fdbs):
2024-07-10 22:22:00 +02:00
print('room_fdb',room_fdb, 'fdb_list',fdb_list)
try:
iteration_var_list = config.get(room_fdb).get("entry-list").get("iteration-var-list")
except Exception as e:
print('there was an error with the entry-list parameter in the config regarding the fdb ', room_fdb)
if room_fdb in fdb_list and room_fdb not in [key for key in data]:
iterdict = {}
for i in eval(iteration_var_list):
f = open(spider_directory + "/spiders/output/" + room_fdb + str(i) + "entryList.txt")
text = f.read()
dictionary_entry_list = eval(text)
2023-12-12 12:41:50 +01:00
iterdict[i] = dictionary_entry_list
2023-12-12 12:41:50 +01:00
data[room_fdb] = iterdict
2023-12-12 12:41:50 +01:00
for i in eval(iteration_var_list):
try:
2024-07-10 22:22:00 +02:00
print('roomfdb and i', room_fdb, i)
#print('oioioioioiOIOIOI')
for key in data[room_fdb][i]:
2024-07-10 22:22:00 +02:00
#print('the fdb', roomfdb, ' is getting searched')
contains_tag = False
try:
period_data_formatted = dateutil.parser.parse(data[room_fdb][i][key]["period"])
except Exception as e:
period_data_formatted = 'NONE'
#print('getting the period did not work for', room_fdb, i, key, ' ori err is:', e)
name_data_lower = [word.lower() for word in data[room_fdb][i][key]["name"].split(' ')]
info_data_lower = [word.lower() for word in data[room_fdb][i][key]["info"].split(' ')]
try:
text_data_lower = [word.lower() for word in data[room_fdb][i][key]["text"].split(' ')]
except Exception as e:
2024-07-10 22:22:00 +02:00
#print(e, 'there was an exception converting to lowercase')
#try:
# print(data[room_fdb][i][key]["text"].split(' '))
#except Exception as e:
# print(e, 'even printing the data was not possible')
text_data_lower = ['NONE']
#print('got until ONE')
tag_list = []
2024-07-10 22:22:00 +02:00
#print(str(name_data_lower) + ' is getting searched..')
for tag in eval(room_tags):
2024-07-10 22:22:00 +02:00
#print('the tag ' + tag + ' is getting searched')
if '_' in tag:
ntags = tag.split('_')
ntags_length = len(ntags)
#print(ntags)
ntag_count = 0
for ntag in ntags:
#print('searching for ntag: ', ntag)
for nword in name_data_lower:
if ntag.lower() in nword:
if nword != '':
ntag_count += 1
2024-07-10 22:22:00 +02:00
#print(ntag, ' ntag was found in name')
break
#print('ntag count is ', ntag_count, 'ntag_length is ', ntags_length)
if ntag_count == ntags_length:
contains_tag = True
tag_list.append('_'.join(ntags) + ' in name')
ntag_count = 0
for ntag in ntags:
for iword in info_data_lower:
if ntag.lower() in iword:
if iword != '':
ntag_count += 1
2024-07-10 22:22:00 +02:00
#print(ntag, ' ntag was found in info')
break
if ntag_count == ntags_length:
contains_tag = True
tag_list.append('_'.join(ntags) + ' in info')
ntag_count = 0
for ntag in ntags:
for tword in text_data_lower:
if ntag.lower() in tword:
if tword != '':
ntag_count += 1
break
if ntag_count == ntags_length:
contains_tag = True
tag_list.append('_'.join(ntags) + ' in text')