# Settings for the PEP crawler per country to crawl
# Follow the syntax and dont use tbody as it gets added by the browser (when researching xpath through inspector)

# xpath syntax: https://www.w3schools.com/xml/xpath_syntax.asp
# lxml xpath syntax: https://www.geeksforgeeks.org/web-scraping-using-lxml-and-xpath-in-python/

greenjobs:
  domain: 'https://www.greenjobs.de'
  entry-list:
    link1: 'https://www.greenjobs.de/angebote/index.html?s=&loc=&countrycode=de&dist='
    link2: '0&lng=&lat='
    jsdomain: 'NONE'
    #jslink1:  '/html/body/div[8]/main/div[4]/div/div/div[2]/table/thead/tr[1]/td/div[2]/div/span['
    #jslink2:  ']'
    #jsiteration-var-list: "[1,2, 3, 4, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,6,7,8,9,10]"
    iteration-var-list: "['1']"
    parent:  "/html/body/div/div/div/div/main/div[2]/table/tbody//tr"
    child-name:  "//td[1]/a/text()"
    child-link:  "//td[1]/a/@href"
    #javascript-link: ""
    child-info:  "//td[5]/text()"
    child-period:  "//td[6]/text()"
    child-sponsor: "//td[3]/text()"
  entry:
    general:
      uniform: 'TRUE'
    unitrue:
      #parent:  '//html//body//form//table'
      text:  '/html/body/div[2]/div[4]/div/div[5]/div/table/tbody/tr/td[5]/a/@href'
      #child-sum:  '//html//body//form//table//tr[2]//td[1]//span//img'
      #child-deadline:  '//html/body/form/table/tr[2]/td[3]/span + label.1'
    unifalse:
      wordlist:  "['Mobilität', 'Energie', 'Off-grid', 'regenerative Energien', 'Solar', 'Energienetze', 'Elektromobilität']"