|
@ -167,6 +167,14 @@ class fdb_spider(object): |
|
|
# driver = webdriver.Chrome() |
|
|
# driver = webdriver.Chrome() |
|
|
driver.implicitly_wait(5) |
|
|
driver.implicitly_wait(5) |
|
|
driver.get(entry_jsdomain) |
|
|
driver.get(entry_jsdomain) |
|
|
|
|
|
|
|
|
|
|
|
try: |
|
|
|
|
|
accept_button = driver.find_element("xpath","//button[contains(text(), 'akzeptieren')]") |
|
|
|
|
|
accept_button.click() |
|
|
|
|
|
except Exception as e: |
|
|
|
|
|
print(e, 'no cookies to accept..') |
|
|
|
|
|
pass |
|
|
|
|
|
|
|
|
for i in range(len(entry_jsiteration_var_list)): |
|
|
for i in range(len(entry_jsiteration_var_list)): |
|
|
time.sleep(1) |
|
|
time.sleep(1) |
|
|
print('trying to get element') |
|
|
print('trying to get element') |
|
@ -248,11 +256,11 @@ class fdb_spider(object): |
|
|
|
|
|
|
|
|
print('this is the n looped elements of the parent specified in config.yaml:') |
|
|
print('this is the n looped elements of the parent specified in config.yaml:') |
|
|
|
|
|
|
|
|
#print('entrylistparent', fdb_conf_entry_list_parent) |
|
|
|
|
|
|
|
|
print('entrylistparent', fdb_conf_entry_list_parent) |
|
|
|
|
|
|
|
|
#print(tree.xpath("//html//body//div//main//div//div[@class='row']//section[@class='l-search-result-list']")) |
|
|
|
|
|
|
|
|
print(tree.xpath("//html//body//div")) |
|
|
|
|
|
|
|
|
#print(etree.tostring(tree.xpath(fdb_conf_entry_list_parent)).decode()) |
|
|
|
|
|
|
|
|
print(etree.tostring(tree.xpath(fdb_conf_entry_list_parent)[0]).decode()) |
|
|
|
|
|
|
|
|
for n in range(len(tree.xpath(fdb_conf_entry_list_parent))): |
|
|
for n in range(len(tree.xpath(fdb_conf_entry_list_parent))): |
|
|
print('-----------------------------------------------------------------------------------------------------------------------------------------') |
|
|
print('-----------------------------------------------------------------------------------------------------------------------------------------') |
|
@ -482,7 +490,7 @@ class fdb_spider(object): |
|
|
#service_args = ['--verbose'] |
|
|
#service_args = ['--verbose'] |
|
|
#driver = webdriver.Chrome('/usr/bin/chromium') |
|
|
#driver = webdriver.Chrome('/usr/bin/chromium') |
|
|
options = webdriver.ChromeOptions() |
|
|
options = webdriver.ChromeOptions() |
|
|
options.add_argument('headless') |
|
|
|
|
|
|
|
|
#options.add_argument('headless') |
|
|
options.add_argument("--remote-debugging-port=9222") |
|
|
options.add_argument("--remote-debugging-port=9222") |
|
|
options.add_argument('--no-sandbox') |
|
|
options.add_argument('--no-sandbox') |
|
|
options.add_argument('--disable-dev-shm-usage') |
|
|
options.add_argument('--disable-dev-shm-usage') |
|
@ -540,6 +548,17 @@ class fdb_spider(object): |
|
|
print(entry_link) |
|
|
print(entry_link) |
|
|
|
|
|
|
|
|
if 'javascript' in entry_link or fdb_conf_entry_list_javascript_link != 'NONE': |
|
|
if 'javascript' in entry_link or fdb_conf_entry_list_javascript_link != 'NONE': |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
try: |
|
|
|
|
|
accept_button = driver.find_element("xpath","//button[contains(text(), 'akzeptieren')]") |
|
|
|
|
|
accept_button.click() |
|
|
|
|
|
except Exception as e: |
|
|
|
|
|
print(e, 'no cookies to accept..') |
|
|
|
|
|
pass |
|
|
|
|
|
|
|
|
|
|
|
driver.execute_script("scroll(0, 600)") |
|
|
|
|
|
|
|
|
print('oioioi',fdb_conf_entry_list_parent, entry_id, fdb_conf_entry_list_javascript_link) |
|
|
print('oioioi',fdb_conf_entry_list_parent, entry_id, fdb_conf_entry_list_javascript_link) |
|
|
element = driver.find_element( |
|
|
element = driver.find_element( |
|
|
"xpath", |
|
|
"xpath", |
|
@ -560,6 +579,9 @@ class fdb_spider(object): |
|
|
#element = driver.find_element("xpath", "//html") |
|
|
#element = driver.find_element("xpath", "//html") |
|
|
#web_content = element.text |
|
|
#web_content = element.text |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#entry_domain = driver.getCurrentUrl() |
|
|
#entry_domain = driver.getCurrentUrl() |
|
|
entry_domain = driver.current_url |
|
|
entry_domain = driver.current_url |
|
|
|
|
|
|
|
|