diff --git a/spiders/__pycache__/fdb_spider.cpython-39.pyc b/spiders/__pycache__/fdb_spider.cpython-39.pyc
index 31c7d9b..d093fac 100644
Binary files a/spiders/__pycache__/fdb_spider.cpython-39.pyc and b/spiders/__pycache__/fdb_spider.cpython-39.pyc differ
diff --git a/spiders/fdb_spider.py b/spiders/fdb_spider.py
index bb0b6bd..5da1a6d 100644
--- a/spiders/fdb_spider.py
+++ b/spiders/fdb_spider.py
@@ -10,6 +10,7 @@ import lxml.html
 import lxml.html.soupparser
 from lxml import html
 
+from trafilatura import extract
 
 
 class fdb_spider(object):
@@ -215,8 +216,8 @@ class fdb_spider(object):
                     fdb_conf_entry_list_child_period = fdb_conf_entry_list.get("child-period")
 
 
-                    print('blabliblub')
-                    print('len', len(tree.xpath(fdb_conf_entry_list_parent)))
+                    #print('blabliblub')
+                    #print('len', len(tree.xpath(fdb_conf_entry_list_parent)))
                     for n in range(len(tree.xpath(fdb_conf_entry_list_parent))):
 
                         try:
@@ -253,7 +254,7 @@ class fdb_spider(object):
                                 + "]"
                                 + fdb_conf_entry_list_child_period
                             )[0]
-                            print('period', period)
+                            #print('period', period)
                         except Exception as e:
                             print("period could not be parsed", e, period)
                             period = 'NONE'
@@ -266,7 +267,7 @@ class fdb_spider(object):
                                 + "]"
                                 + fdb_conf_entry_list_child_link
                             )[0]
-                            print('link', link)
+                            #print('link', link)
                         
                         except Exception as e:
                             print("link could not be parsed", e, link)
@@ -386,9 +387,9 @@ class fdb_spider(object):
                 fdb_conf = self.config.get(fdb)
                 fdb_domain = fdb_conf.get("domain")
                 fdb_conf_entry = fdb_conf.get("entry")
-                print('balubaluba', fdb_conf_entry)
+                #print('balubaluba', fdb_conf_entry)
                 fdb_conf_entry_general = fdb_conf_entry.get("general")
-                print(fdb_conf_entry_general)
+                #print(fdb_conf_entry_general)
                 
                 
                 for entry_id in dictionary_entry_list:
@@ -424,7 +425,7 @@ class fdb_spider(object):
                                 fdb_conf_entry_unitrue_entry_child
                             )
 
-                            print("oi", child)
+                            #print("oi", child)
 
                             if len(child) > 0:
                                 dictionary_entry_list[entry_id][key] = child[
@@ -444,18 +445,73 @@ class fdb_spider(object):
                             
                             
 
-                        text = tree.xpath(
+                        p_text = tree.xpath(
                             "//p//text()"
                         )
-
-                        print("oi", text)
+                        
+                        div_text = tree.xpath(
+                            "//div//text()"
+                        )
+                        
+                        
+                        #print("oi", text)
                         generaltext = ''
-                        for n in range(len(text)):
+                        for n in range(len(p_text)):
+                            
+                            if len(p_text[n]) > 0:
+                                generaltext += p_text[n] + ' '
+                        
+                        for n in range(len(div_text)):
+                            
+                            if len(div_text[n]) > 0 and div_text[n] not in p_text:
+                                generaltext += div_text[n] + ' '
+                        
+                        
+                        generaltextlist = generaltext.split(' ')
+                        if len(generaltextlist) > 5000:
+                            print('text over 1000 words for entry id', entry_id, ' number of words:', len(generaltextlist))
                             
-                            if len(text[n]) > 0:
-                                generaltext += text[n] + ' '
+                            file_name = "spiders/pages/" + fdb + str(i) + "/" + str(entry_id) + ".html"
+                            
+                            try:
+                                with open(file_name , 'r', encoding='utf-8') as file:
+                                    html_content = file.read()
+                            except Exception as e:
+                                
+                                with open(file_name , 'r', encoding='latin-1') as file:
+                                    html_content = file.read()
+                                print('encoding utf8 in opening with trafilatura did not work, trying latin1, original error message is:', e)
+                            
+                            generaltext = extract(html_content)
+                            print('generaltext word count was: ', len(generaltextlist), 'but now trafilatura did the job and new wordcount is:', len(generaltext.split(' ')))
+                            
+                        if len(generaltextlist) < 2:
+                            print('no text parsed, the wc is', len(generaltextlist))
+                            
+                            print('text under 2 words for entry id', entry_id, ' number of words:', len(generaltextlist))
+                            
+                            file_name = "spiders/pages/" + fdb + str(i) + "/" + str(entry_id) + ".html"
+                            
+                            try:
+                                with open(file_name , 'r', encoding='utf-8') as file:
+                                    html_content = file.read()
+                            except Exception as e:
                                 
+                                with open(file_name , 'r', encoding='latin-1') as file:
+                                    html_content = file.read()
+                                print('encoding utf8 in opening with trafilatura did not work, trying latin1, original error message is:', e)
+                            
+                            generaltext = extract(html_content)
+                            try:
+                                if len(generaltext) > 2:
+                                    print('generaltext word count was: ', len(generaltextlist), 'but now trafilatura did the job and new wordcount is:', len(generaltext.split(' ')))
+                            except:
+                                
+                                print('trafilatura got this out:', generaltext , 'setting generaltext to NONE')
+                                generaltext = 'NONE'
+                        
                         dictionary_entry_list[entry_id]["text"] = generaltext
+                        dictionary_entry_list[entry_id]["text-word-count"] = len(generaltextlist)