import os from rocketchat.api import RocketChatAPI import configparser import json import requests import yaml import dateutil.parser import subprocess config = configparser.ConfigParser() config.read('config.ini') botname = config['Chat']['username'] botpassword = config['Chat']['password'] server_url = config['Chat']['URL'] room_id = config['Chat']['room_id'] bot_user_id = config['Chat']['bot_user_id'] spider_directory = config['Spider']['spider_directory'] # here comes the functions to talk to gpt # For local streaming, the websockets are hosted without ssl - http:// HOST = 'localhost:5000' URI = f'http://{HOST}/api/v1/chat' #URI = f'http://{HOST}/api' # http://192.168.9.197:5000/api/v1/chat # For reverse-proxied streaming, the remote will likely host with ssl - https:// # URI = 'https://your-uri-here.trycloudflare.com/api/v1/chat' if __name__ == '__main__': api = RocketChatAPI(settings={'username': botname, 'password': botpassword, 'domain': server_url}) # api.send_message('Ciao, I am the fdb-spider', room_id) #myinfo = api.get_my_info() #room_history = api.get_private_room_history(room_id) #print(room_history['messages'][0]['msg']) # print(myinfo) rooms = api.get_private_rooms() print('blubidab oioioi', rooms) # api.send_message('Ole', room_id) n = 0 import time import schedule #change to False here, if you want to have the update run at server start already_updated = True start = True import datetime from datetime import timedelta while True: time.sleep(2) #already_updated = True now = datetime.datetime.now() current_hour = now.strftime("%H") #print(current_hour) # run variable update and creation at start if start == True: room_list = [] rooms = os.listdir('rooms') for room in rooms: room_file = open('rooms/' + room, 'r') room_file_raw = room_file.read() room_file.close() room_file_list = room_file_raw.split('§%§%') print(room_list) print(room) print(room_file_list) room_list.append([room[:-4], room_file_list[0], room_file_list[1], room_file_list[2]]) with open(spider_directory + '/spiders/config.yaml' , "r") as stream: try: config = yaml.safe_load(stream) except yaml.YAMLError as exc: print(exc) fdb_list = [] #print(config) for key in config: fdb_list.append(key) start = False if int(current_hour) > 11: aftersix = True if int(current_hour) <= 11: aftersix = False already_updated = False if aftersix == True and already_updated == False and aftersix == False: room_list = [] rooms = os.listdir('rooms') for room in rooms: room_file = open('rooms/' + room, 'r') room_file_raw = room_file.read() room_file.close() room_file_list = room_file_raw.split('§%§%') room_list.append([room[:-4], room_file_list[0], room_file_list[1], room_file_list[2]]) with open(spider_directory + '/spiders/config.yaml' , "r") as stream: try: config = yaml.safe_load(stream) except yaml.YAMLError as exc: print(exc) fdb_list = [] #print(config) for key in config: fdb_list.append(key) data = dict({}) for room in room_list: print(room[0]) room_fdbs = room[2] room_tags = room[3] #subprocess.run(["python", spider_directory + 'main.py', fdbs]) room_history_list = [] try: room_history_file = open('roomhistories/' + room[0] + '.txt', 'x') room_history_file.close() except: print('reading from roomhistory') with open('roomhistories/' + room[0] + '.txt') as room_history_file: lines = room_history_file.readlines() for line in lines: room_history_list.append(line) #date = datetime.datetime.now() - timedelta(days=3) #room_history = api.get_room_history(room[0], oldest=date, latest=datetime.datetime.now()) for room_fdb in eval(room_fdbs): #print('room_fdb',room_fdb, 'fdb_list',fdb_list) try: iteration_var_list = config.get(room_fdb).get("entry-list").get("iteration-var-list") except Exception as e: print('there was an error with the entry-list parameter in the config regarding the fdb ', room_fdb) if room_fdb in fdb_list and room_fdb not in [key for key in data]: iterdict = {} for i in eval(iteration_var_list): f = open(spider_directory + "/spiders/output/" + room_fdb + str(i) + "entryList.txt") text = f.read() dictionary_entry_list = eval(text) iterdict[i] = dictionary_entry_list data[room_fdb] = iterdict for i in eval(iteration_var_list): try: print(room_fdb, i) for key in data[room_fdb][i]: contains_tag = False period_data_formatted = dateutil.parser.parse(data[room_fdb][i][key]["period"]) name_data_lower = [word.lower() for word in data[room_fdb][i][key]["name"].split(' ')] info_data_lower = [word.lower() for word in data[room_fdb][i][key]["info"].split(' ')] text_data_lower = [word.lower() for word in data[room_fdb][i][key]["text"].split(' ')] tag_list = [] for tag in eval(room_tags): if '_' in tag: ntags = tag.split('_') ntags_length = len(ntags) ntag_count = 0 for nword in name_data_lower: for ntag in ntags: if ntag.lower() in nword: ntag_count += 1 if ntag_count == ntags_length: contains_tag = True tag_list.append(tag + ' in name') ntag_count = 0 for iword in info_data_lower: for ntag in ntags: if ntag.lower() in iword: ntag_count += 1 if ntag_count == ntags_length: contains_tag = True tag_list.append(tag + ' in info') ntag_count = 0 for tword in text_data_lower: ntag_count = 0 for ntag in ntags: if ntag.lower() in tword: ntag_count += 1 if ntag_count == ntags_length: contains_tag = True tag_list.append(tag + ' in text') else: for nword in name_data_lower: if tag.lower() in nword: contains_tag = True tag_list.append(tag + ' in name') for iword in info_data_lower: if tag.lower() in iword: contains_tag = True tag_list.append(tag + ' in info') for tword in text_data_lower: if tag.lower() in tword: contains_tag = True tag_list.append(tag + ' in text') if contains_tag == True: try: url = data[room_fdb][i][key]["domain"] except: url = data[room_fdb][i][key]["link"] entry_message = '<' + url + '|' + data[room_fdb][i][key]["name"]+ '>' + '\n' + data[room_fdb][i][key]["info"] + '\n' + 'The period of the entry is:' + str(period_data_formatted) + '\n' + str(tag_list) entry_message_identifier = '<' + url + '|' + data[room_fdb][i][key]["name"]+ '>' message_was_already_there = False for message in room_history_list: print('message that gets checked with identifier ', message[:-1]) print('the entry message identifier is:', entry_message_identifier) if url in message[:-1] or data[room_fdb][i][key]["name"] in message: message_was_already_there = True if message_was_already_there == False: print('went into already there false') api.send_message(entry_message, room[0]) print('before writing') try: room_file = open('roomhistories/' + room[0] + '.txt', 'x') room_file.close() except: print('appending to roomhistory') room_file = open('roomhistories/' + room[0] + '.txt', 'a') room_file.write(entry_message_identifier + '\n') room_file.close() print('after writing') except Exception as e: print("probably i was not there in last page, original error is:", e) #print('data',data['giz'][2],'data') already_updated = True # if datestime.split over etc and updated= true etc n += 1 if n%100 == 0: print(n) try: #print('getting the room history') date = datetime.datetime.now() - timedelta(days=3) room_history = api.get_private_room_history(room_id, oldest=date) except Exception as e: time.sleep(10) api = RocketChatAPI(settings={'username': botname, 'password': botpassword, 'domain': server_url}) time.sleep(5) room_history = api.get_private_room_history(room_id, oldest=date) print('got a connection error, original message is:',e) messages_list = [] for message in room_history['messages']: messages_list.append(message) if len(messages_list) >= 1: #print('blub', messages_list) latest_message_user_id = messages_list[0]['u']['_id'] latest_message_user_username = messages_list[0]['u']['username'] latest_message = messages_list[0]['msg'] latest_message_id = messages_list[0]['_id'] new_message_file = open('new_message_file.txt', 'r') new_message = new_message_file.read() new_message_file.close() new_message_list = new_message.split('§%§%') #print(latest_message, new_message_list[0]) if new_message_list[0] != latest_message and new_message_list[1] != latest_message_id and latest_message_user_id != bot_user_id: answer = 'Ich habe kein Kommando erhalten.' new_message_file = open('new_message_file.txt', 'w') new_message_file.write(latest_message + '§%§%' + latest_message_id) new_message_file.close() user_input = latest_message user_input_list = user_input.split(' ') if user_input_list[0] == 'addtags': try: room_id_add_tags = user_input_list[1] except: room_id_add_tags = 'NONE' try: new_tags = user_input_list[2] except: new_tags = 'NONE' try: thirdarg = user_input_list[3] except: thirdarg = 'NONE' if len(room_id_add_tags) >= 1 and len(new_tags) >= 1 and thirdarg == 'NONE': try: room_file_add_tags = open('rooms/' + user_input_list[1] + '.txt', 'r') room_info_raw = room_file_add_tags.read() room_file_add_tags.close() room_info = room_info_raw.split('§%§%') tag_list = eval(room_info[-1]) for tag in eval(user_input_list[2]): if tag not in tag_list: tag_list.append(tag) room_file_add_tags = open('rooms/' + user_input_list[1] + '.txt', 'w') room_file_add_tags.write( str(room_info[0]) + '§%§%' + str(room_info[1]) + '§%§%' + str(tag_list)) room_file_add_tags.close() answer = 'the updated tag list is' + str(tag_list) except Exception as e: print('error opening, original error is:', e) answer = "The room_id to update the tags was not found" if user_input_list[0] == 'printtags': try: room_id_to_print_tags = user_input_list[1] except: answer = "after the command printtags, the second argument has to be the room id.. use printrooms and look up the id of the room you want to print the tags" room_id_to_print_tags = 'NONE' if room_id_to_print_tags != 'NONE': try: room_file = open('rooms/' + room_id_to_print_tags + '.txt', 'r') room_info_raw = room_file.read() room_file.close() room_info = room_info_raw.split('§%§%') tag_list = eval(room_info[-1]) answer = tag_list except Exception as e: print('error opening, original error is:', e) answer = "The room_id to get the tags was not found" if user_input_list[0] == 'printcommands': answer = """ To print all available rooms and their configuration, use command : `printrooms` ----------------------------------------------------------------------------------------- To print all available fdbs, use command : `printfdbs` ----------------------------------------------------------------------------------------- To update all rooms use command : `updaterooms` ----------------------------------------------------------------------------------------- To update one room use the room_id from the output of printrooms: command : `updaterooms ` example : `updaterooms 6572012bebb39dd248d08320` ----------------------------------------------------------------------------------------- To create a room use following command, but be sure to **not have spaces** in your lists, as a space indicates a new command argument: command : `createroom ` example : `createroom room-test-1 ['giz','fdb2'] ['tag1','tag2','tag3']` ----------------------------------------------------------------------------------------- To delete a room use command : `deleteroom ` example : `deleteroom 6572012bebb39dd248d08320` ----------------------------------------------------------------------------------------- To print the tags of a room use command : `printtags ` example : `printtags 6572012bebb39dd248d08320` ----------------------------------------------------------------------------------------- To add tags to the existing tags use command : `addtags ` example : `addtags 6572012bebb39dd248d08320 ['tag1','tag2','tag3']` """ if user_input_list[0] == 'printrooms': room_was_found = False # get all the rooms import os room_list = [] rooms = os.listdir('rooms') for room in rooms: room_file = open('rooms/' + room, 'r') room_file_raw = room_file.read() room_file.close() room_file_list = room_file_raw.split('§%§%') room_list.append([room[:-4], room_file_list[0], room_file_list[1], room_file_list[2]]) room_list_string = '' for room in room_list: room_list_string += str(room) + '\n' + '------------------------------------------------------------------------------' + '\n' #print(room_list) answer = room_list_string if user_input_list[0] == 'updaterooms': try: if len(user_input_list[1]) >= 1: room_to_update = user_input_list[1] except Exception as e: room_to_update = 'NONE' room_list = [] rooms = os.listdir('rooms') for room in rooms: room_file = open('rooms/' + room, 'r') room_file_raw = room_file.read() room_file.close() room_file_list = room_file_raw.split('§%§%') room_list.append([room[:-4], room_file_list[0], room_file_list[1], room_file_list[2]]) with open(spider_directory + '/spiders/config.yaml' , "r") as stream: try: config = yaml.safe_load(stream) except yaml.YAMLError as exc: print(exc) fdb_list = [] #print(config) for key in config: fdb_list.append(key) answer = "Ich update die Rooms auf Basis der Daten von heute morgen um 6 Uhr.." data = dict({}) for room in room_list: if room[0] == room_to_update or room_to_update == 'NONE': room_was_found = True print(room[0]) room_fdbs = room[2] room_tags = room[3] # not running get_rooms because disfunctional #subprocess.run(["python", spider_directory + 'main.py', fdbs]) #myinfo = api.get_room_info(room[0]) #print(myinfo) #roomid = "'657cbeccebb39dd248d38ec3'" #roomoioioi = api.get_user_info(bot_user_id) #print(roomoioioi) #room_history_updateroom = api.get_private_room_history(room[0]) # reading from txt state history instead try: room_history_file = open('roomhistories/' + room[0] + '.txt', 'x') room_history_file.close() except: print('reading from roomhistory which is already there') room_history_list = [] with open('roomhistories/' + room[0] + '.txt') as room_history_file: lines = room_history_file.readlines() for line in lines: room_history_list.append(line) # #for message in room_history_raw: # print(message) for room_fdb in eval(room_fdbs): #print('room_fdb',room_fdb, 'fdb_list',fdb_list) try: iteration_var_list = config.get(room_fdb).get("entry-list").get("iteration-var-list") except Exception as e: print('there was an error with the entry-list parameter in the config regarding the fdb ', room_fdb) if room_fdb in fdb_list and room_fdb not in [key for key in data]: iterdict = {} for i in eval(iteration_var_list): f = open(spider_directory + "/spiders/output/" + room_fdb + str(i) + "entryList.txt") text = f.read() dictionary_entry_list = eval(text) iterdict[i] = dictionary_entry_list data[room_fdb] = iterdict for i in eval(iteration_var_list): try: print(room_fdb, i) for key in data[room_fdb][i]: contains_tag = False period_data_formatted = dateutil.parser.parse(data[room_fdb][i][key]["period"]) name_data_lower = [word.lower() for word in data[room_fdb][i][key]["name"].split(' ')] info_data_lower = [word.lower() for word in data[room_fdb][i][key]["info"].split(' ')] text_data_lower = [word.lower() for word in data[room_fdb][i][key]["text"].split(' ')] tag_list = [] for tag in eval(room_tags): if '_' in tag: ntags = tag.split('_') ntags_length = len(ntags) print(ntags) ntag_count = 0 for ntag in ntags: print('searching for ntag: ', ntag) for nword in name_data_lower: if ntag.lower() in nword: if nword != '': ntag_count += 1 print(ntag, ' ntag was found') break print('ntag count is ', ntag_count, 'ntag_length is ', ntags_length) if ntag_count == ntags_length: contains_tag = True tag_list.append('_'.join(ntags) + ' in name') ntag_count = 0 for ntag in ntags: for iword in info_data_lower: if ntag.lower() in iword: if iword != '': ntag_count += 1 break if ntag_count == ntags_length: contains_tag = True tag_list.append('_'.join(ntags) + ' in info') ntag_count = 0 for ntag in ntags: for tword in text_data_lower: if ntag.lower() in tword: if tword != '': ntag_count += 1 break if ntag_count == ntags_length: contains_tag = True tag_list.append('_'.join(ntags) + ' in text') else: #print('------------------') print(name_data_lower, info_data_lower, text_data_lower, room[0], room_tags) nword_contains_tag = False for nword in name_data_lower: print(tag.lower(), nword) if tag.lower() in nword: print('goooot heeeere') print(nword) if nword != '': print('goot behind nword check') nword_contains_tag = True if nword_contains_tag == True: contains_tag = True tag_list.append(tag + ' in name') iword_contains_tag = False for iword in info_data_lower: if tag.lower() in iword: if iword != '': iword_contains_tag = True print('oioiOIOIOIoioioiOIOIword', iword) if iword_contains_tag == True: contains_tag = True tag_list.append(tag + ' in info') tword_contains_tag = False for tword in text_data_lower: if tag.lower() in tword: if tword != '': tword_contains_tag = True if tword_contains_tag == True: contains_tag = True tag_list.append(tag + ' in text') if contains_tag == True: #print('------------------') #print(name_data_lower, info_data_lower, text_data_lower) try: url = data[room_fdb][i][key]["domain"] except: url = data[room_fdb][i][key]["link"] print('101110001101010010010101000111') entry_message = '<' + url + '|' + data[room_fdb][i][key]["name"]+ '>' + '\n' + data[room_fdb][i][key]["info"] + '\n' + 'The period of the entry is:' + str(period_data_formatted) + '\n' + str(tag_list) entry_message_identifier = '<' + url + '|' + data[room_fdb][i][key]["name"]+ '>' message_was_already_there = False for message in room_history_list: print('message', message[:-1], 'identifier', entry_message_identifier) if url in message[:-1] or data[room_fdb][i][key]["name"] in message: message_was_already_there = True if message_was_already_there == False: api.send_message(entry_message, room[0]) try: room_file = open('roomhistories/' + room[0] + '.txt', 'x') room_file.close() except: print('appending to roomhistory') room_file = open('roomhistories/' + room[0] + '.txt', 'a') room_file.write(entry_message_identifier + '\n') room_file.close() except Exception as e: print("probably i was not there in last page, original error is:", e) if room_to_update == 'NONE': answer = 'No room was specified, all rooms will get an update based on the data available' if room_was_found == False and room_to_update != 'NONE': answer = 'The room you specified does not exist, or there are no rooms configured, try the command without argument -> updaterooms' if room_was_found == True and room_to_update != 'NONE': answer = "I am updating the specified room based on the data in fdb-spider/spiders/output.." if user_input_list[0] == 'printfdbs': answer = str(fdb_list) if len(user_input_list) > 1: answer = 'Die Syntax zum Ausgeben der konfigurierten Förderdatenbanken hat einen Fehler. Versuche es erneut, mit -> printfdbs' if user_input_list[0] == 'deleteroom': try: room_id_del = user_input_list[1] except Exception as e: room_id_del = 'NONE' print(e) os.remove("rooms/" + room_id_del + ".txt") anwer = 'deleting the requested room..' if len(user_input_list) > 2: answer = 'Die Syntax zum Löschen eines Raumes hat einen Fehler. Versuche es erneut, nach dem Muster -> deleteroom ' if user_input_list[0] == 'createroom': try: room_name = user_input_list[1] except Exception as e: room_name = 'NONE' print(e) try: databases = user_input_list[2] except Exception as e: databases = 'NONE' print(e) try: filters = user_input_list[3] except Exception as e: filters = 'NONE' print(e) if len(user_input_list) > 3: answer = 'Die Syntax hat einen Fehler. Wahrscheinlich sind Leerzeichen in den Listen. Leerzeichen definieren die Syntax. Versuche es erneut, nach dem Muster -> createroom room1 ["fdb1","fdb2"] ["tag1","tag2"]' if room_name == 'NONE' or databases == 'NONE' or filters == 'NONE': answer = 'Um einen Raum zu erstellen, in dem neueste Einträge geteilt werden, lautet die Syntax: createroom ' else: try: new_room = api.create_public_room(room_name, members=[latest_message_user_username], read_only=False) new_room_id = new_room['channel']['_id'] room_file = open('rooms/' + new_room_id + '.txt', 'w') room_file.write( room_name + '§%§%' + databases + '§%§%' + filters) room_file.close() answer = 'Der Command wurde übermittelt' except Exception as e: print('it was not able to create the room, the original error message is:', e) answer = 'There was an error creating the room, look up the logs.. the original error was: ' + str(e) #print('oi', user_input) api.send_message('Die Anfrage wird bearbeitet..', room_id) # here comes the code interacting with the spiders output json #answer = 'the up to date entries are: oi Oi Oi!' api.send_message(answer, room_id) time.sleep(1) api.send_message('Ich bin wieder bereit für Konfigurationsinput : )', room_id) time.sleep(1)