From ba683b0a29e79b65de1d31c12f85f0f83a3d4720 Mon Sep 17 00:00:00 2001 From: corsaronero Date: Mon, 27 Feb 2023 11:00:13 +0000 Subject: [PATCH] copied external notes into git repo and pip freezed venv packages into requirements.txt --- notes.txt | 95 ++++++++++++++++++++++++++++++++++++++++++++++++ q5.txt | 4 ++ requirements.txt | 17 +++++++++ 3 files changed, 116 insertions(+) create mode 100644 notes.txt create mode 100644 requirements.txt diff --git a/notes.txt b/notes.txt new file mode 100644 index 0000000..d9e251d --- /dev/null +++ b/notes.txt @@ -0,0 +1,95 @@ + +mexico + + mexico memberlist of camera diputados is not available + + https://web.diputados.gob.mx/inicio/tusDiputados/listadoDiputadosBuscador;nombre=;estado=;cabeceraMunicipal=;grupoParlamentario=;mail= + + link is javascript event, need of selenium + + mexico list of senators needs selenium in python to click js + + + + +el salvador + + el salvador should work with same machine + +costa rica + + will work with same machine + +guatemala + + zurzeit nicht aufrufbar? --> doch per internet archive + + this one (jan 22) also works for the deputado links + + https://web.archive.org/web/20221007084458/https://www.congreso.gob.gt/buscador_diputados#gsc.tab=0 + +honduras + + nicht zu finden + + doch, aber nicht alle auf einer seite, sondern doppelt nach + + https://resultadosgenerales2021.cne.hn/Integracion/?id=4 + + its 120 peoplez + +general about parser yaml + + selenium general yaml conf for + xpaths to click and get data for lists + selenium or not + merge of doubles will be necessary too + merge or not + + + +write/read wikidata + + + There is a hard query deadline configured which is set to 60 seconds. + + + example of query + + https://query.wikidata.org/sparql?query=SELECT%20?dob%20WHERE%20{wd:Q42%20wdt:P569%20?dob.}&explain=details + +instance of (P31) + + Q5 is human + + +given name (P735) + + Christian has Q18001597 + +family name (P734) + + Lindner has also a Q + +occupation (P106) + + Politician Q82955 + + +postition held (P39) + + Member of German Bundestag Q1939555 + + start time (P) 27 October 2009 + end time (P) 10 July 2012 + + +member of political party (P102) + + party has Q + + +official website (P856) + + http etc without Q + diff --git a/q5.txt b/q5.txt index dacbb16..d57623d 100644 --- a/q5.txt +++ b/q5.txt @@ -1,3 +1,7 @@ +# the output of checking labels for names with property human +# python main.py > q5.txt + + started to parse data of members of nicaragua .. Q107326760 --------- diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..af9ab1c --- /dev/null +++ b/requirements.txt @@ -0,0 +1,17 @@ +backoff==2.2.1 +beautifulsoup4==4.11.2 +certifi==2022.12.7 +charset-normalizer==3.0.1 +idna==3.4 +lxml==4.9.2 +mwoauth==0.3.8 +oauthlib==3.2.2 +PyJWT==2.6.0 +PyYAML==6.0 +requests==2.28.2 +requests-oauthlib==1.3.1 +six==1.16.0 +soupsieve==2.4 +ujson==5.7.0 +urllib3==1.26.14 +wikibaseintegrator==0.12.3