From 0500f5853dfff1f7eb6e5a8245df14675ca1db7f Mon Sep 17 00:00:00 2001 From: alpcentaur Date: Mon, 15 Jan 2024 21:08:23 +0000 Subject: [PATCH] full working example from localhost --- main.py | 10 +++++----- .../__pycache__/fdb_spider.cpython-311.pyc | Bin 41511 -> 41231 bytes spiders/config.yaml | 4 ++-- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/main.py b/main.py index dd0dee2..17ff0f8 100644 --- a/main.py +++ b/main.py @@ -5,21 +5,21 @@ import sys config = "spiders/config.yaml" #list_of_fdbs = eval(sys.argv[1]) -#list_of_fdbs = ["giz","evergabe-online"] -list_of_fdbs = ["giz"] +list_of_fdbs = ["giz","evergabe-online","foerderinfo.bund.de-bekanntmachungen"] +#list_of_fdbs = ["giz"] # doing the crawling of government websites spider = fdb_spider(config) -#spider.download_entry_list_pages_of_funding_databases(list_of_fdbs) +spider.download_entry_list_pages_of_funding_databases(list_of_fdbs) #spider.find_config_parameter(list_of_fdbs) -#spider.parse_entry_list_data2dictionary(list_of_fdbs) +spider.parse_entry_list_data2dictionary(list_of_fdbs) -#spider.download_entry_data_htmls(list_of_fdbs) +spider.download_entry_data_htmls(list_of_fdbs) spider.parse_entry_data2dictionary(list_of_fdbs) diff --git a/spiders/__pycache__/fdb_spider.cpython-311.pyc b/spiders/__pycache__/fdb_spider.cpython-311.pyc index 48677c6df6a0cbd091f9a487b68cbf2667514bef..29b9f66ea7ed57753b1a68756249d0f141287225 100644 GIT binary patch delta 914 zcma)(Ur19?9LMi(cV~L<-PpK#nHyV5Wx3pR)260wwK6D)FeM?WaIk4&flbA&?lv+f z308w2(Mwb>X^@z;mtKMpp&oqDW&aky{W#}$KEK27`~4j_lXI$@ zvnt)BPRBDe?hii*u0PR@^@&dY^V%nbYjRVp)h6jT>DgiB^TJoFF}_-)7t5xfyGqBVRFj^W*KOeGhgGi<`4 zNEmkEMr4)UJynM9FBO9gPmh*i$>mzp9@!-8Cs>AgME6zz!!dz!?7RFJ>{ue-%9~EA z$46uxz92?$;F4^E3j8U1U=JFiW^9j2pmbluA5mLQVaB?c11hmAW|N!~K)vbSis<%0 zdDMc=xCx)fy1<1y;HSVxh^{SQ5B!oNzs|H~o&WtqUwqYIAqcdTJ20WfnT)*2am`oV(&alUrbwd6vywcy|;4jZ7IE7X}f{DlHs6*^@sAew6e}QTwXC>+9d*Uk;Ir7-}HCO_R!21&%M82 zKIfiuzQ1#R_x)R%>vuGU>jncy5qo9v)5yb{hN~$7*HhF7l#EVNEBu?LSq)3UT>5#q zpV~(A#akD$2ua1>Oa^J`v<}v!4)hR8(m%*oEM-q4vy z&uAU89XdwL@Xm~)b*{F-+1z2&4!mVW@M9;yz>_x}mOWB3gx|@}nK{#{92!G8U zGIv(r!E6&zKQgy~x}Y%k5!p}Wo5`NYw>Nf=(`pYZDDAD9tw%P7dtq&%0oE3$n7*)k zwI53JG5CDGnRuRxmqp%3oqEv=B)1U_fO1(?{-d=4Y8Tt>$HyxJ67{Q9>b{C{YO#?y z@xRW&AYVRgJ-**_dZ0@4zdERcrX@4;qTCQZ3GO8qdI`psT zYa;yO3cqX)Qvs^=<>ISv|B24$tZ_*kO-f>XQc8+Lrz?-T<@BLQG?|WwiDWV*=J;S# ziliSo4k;E9Q{kwXh)<4-`;m0yt!z3IzZem-lTth(W)i_n%qC|cg@Vw!%& zT}t=R2KH}Zzk>b5y)n6-*ua4;98hrJM~@eVt_9rc_lfm4Ht_Hk9#-&hsekb6X2t)S z;vXjOH?14}fg3dyt&kEXz-n;60+qY$jsm7cK# zHEgL{Pq?h1gE~W5r&XCnDsvUC8!b`HZXeOBjOl2nVY^eSat~6ux56GS_B6Yr6JmCD$7UJr<9FKi qD)-T&UgM5eOL&(;gl)UXtU7jv6zq2RgfUw88*LdoROCOTwf_R#U1^N~ diff --git a/spiders/config.yaml b/spiders/config.yaml index 201d189..a8b25d3 100644 --- a/spiders/config.yaml +++ b/spiders/config.yaml @@ -57,8 +57,8 @@ giz: link1: 'https://ausschreibungen.giz.de/Satellite/company/welcome.do?method=showTable&fromSearch=1&tableSortPROJECT_RESULT=2&tableSortAttributePROJECT_RESULT=publicationDate&selectedTablePagePROJECT_RESULT=' link2: '' jsdomain: 'None' - #iteration-var-list: "[1,2,3,4,5,6,7]" - iteration-var-list: "[1,2]" + iteration-var-list: "[1,2,3,4,5,6,7]" + #iteration-var-list: "[1,2]" #parent: "//html//body//div//main//div//div[@class='row']//div[@class='large-12']//a[@class='c-teaser']" parent: "//html//body//div//div//table[contains(@class, 'csx-new-table')]//tbody//tr" child-name: "//td[3]//text()"