From a846ce04cc48aaf97722d4e34e5ab197f2f644bc Mon Sep 17 00:00:00 2001 From: alpcentaur Date: Tue, 7 Nov 2023 14:55:05 +0000 Subject: [PATCH] specifying the links, new exception clause if soupparser does not work --- requirements.txt | 14 ++++++++++++ .../__pycache__/fdb_spider.cpython-311.pyc | Bin 0 -> 12079 bytes spiders/__pycache__/fdb_spider.cpython-39.pyc | Bin 5619 -> 6103 bytes spiders/config.yaml | 6 ++--- spiders/fdb_spider.py | 21 +++++++++++++----- .../output/foerderinfo.bund.de1entryList.txt | 1 + 6 files changed, 34 insertions(+), 8 deletions(-) create mode 100644 requirements.txt create mode 100644 spiders/__pycache__/fdb_spider.cpython-311.pyc diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..6927859 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,14 @@ +backoff==2.2.1 +beautifulsoup4==4.11.2 +certifi==2022.12.7 +charset-normalizer==3.0.1 +idna==3.4 +lxml==4.9.2 +PyJWT==2.6.0 +PyYAML==6.0 +requests==2.28.2 +requests-oauthlib==1.3.1 +six==1.16.0 +soupsieve==2.4 +ujson==5.7.0 +urllib3==1.26.14 diff --git a/spiders/__pycache__/fdb_spider.cpython-311.pyc b/spiders/__pycache__/fdb_spider.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..7e1e81975fd502bf422eae9ba2b04b659b58b4f3 GIT binary patch literal 12079 zcmd^FeM}o?nxFBv4F&@?F`phFd+tYQ`U1^3L32UTCobK|+>7^#0%=^LOvs`R zjhP6`l_rQ0IWMC55bph0zCS{EWWB(qIwCAB+f!N zMz!x~1E%ElGI^Pp)Se|S<50k0MKpOlzJM?6@l2H{GE#{JP+Vvc#8u*fwJ5e}I>cM+1Z!P_(3vY{FC{cu zYsG`=+L^%IX})@^P`x##yO9^md-9-U!;M`Dg0xno#RHooHgw%`!xB@RPvA&sK@YD+ zFe+J=#448YEsAtn&Jf=IOw~{~wBkD1SFioYvNp{<4E(CF&!PXUu+OGn%+o`B(WZxl zMTZu{^}4=h<6GoGBN;RVXpeNScUGumx@fwB!RS9Tajcm2rO2S#;0)Ul}P1!`q_nOa3m z1H`%aqvo)*og^zaf@`XD%qm7Xl?F4(5H3>0%<4K$K~}_wQk@>PL@cw49!OjG5P~1e z(OE@JJ-8 zT>Ix3dF|ie|7U4swjNuvz;el6Y4iI+;i>Uc7a5kJd?Ct9!SZ?D&x}!*ec_8#_##6k z*Wd}(8}ixdJXvfr^V?jy^Q8Y8`3otYqOBVPthPKFtwtQDZYsJ1WlWb!fv@zK)N1=L8Cx7Q6 z^sk?KrES|}iX$&5ZKgrkk3iZ9q^*fj9cP{w(`olu~;c3gX<;jDJ zjj$;@%G2V3vnrt>s*Y>ol~t>HZCUxZ1c<~lMG_)_8QkM7SoEf@Pj-E@YoU?fv{%@) zmvc73FMDC}${&8$lJfswOM1}Rvtaps!M_x6``_fZy(MgWi|;%qbe`j!o8gzMaiib4 zaYS`@g2;2gn1h%mADf7cEg$S!uIpW@>*cl|`qN47)z|)T`j4mi?I*d@!~FJPzV3oh zcVXHTZ)m?ec4zG8!4HDV4Lg<^b}XFX8}(fFms*6%WY>C$7ksdgYw_f?){z-|1>VH;>Me$}FfaCU$<2b=6Vz|)>PD!C;J z*amP;j?F3@52)?>k?F2v*UE%*IkYldAN8ge)k>g41_kR7P^W}5v^JtsZ>sBVkM13v z1OefihJX<*Kv>N=4q&7L5CCjZK~ItZS4{y1(y(Mih9$GO1V44qx`;-C110IKOzRR5 z(4$nB8Z{{EL>tt(000cx0g4*L0H`XBs(L7*8fYq!3gwBA2V7E(cH(Zf! z;|T6O(gCBP>dV=}RF3khuCw|M05{z}B5Hj3nHV)iOgZM6NnJ+NoKrK?4MFgj^*MMN z80OTpWR#wtQ!@khMXlkQbiak`(&1d=mI)lyC97jFrc2R++?MG4r{;2j+JZEfttcas zt~xhb_&wQ{O31KnQ#~yOk;12+D3Vb1SfSbiT{tQ^m+WyLxDAnl9oULOy0A>{WK2!R zXq%*Aj@Xnsmk72+g!4=Cc zjkQC))<%mWMYD>vE@c%QB1LK>V=YmS1FURCBlgstB&^TZmSWm#$0f(6^cd@aEY=BP z?I(YkTa)q!(B}a!<-2)0Y&lj3mp-vP=*xBu2D?!k<<|0oDh{yuN^F827S(j;Aa`7kz%ZE#MtvQegz?Yvh}2HLHaB(Z6gDPlnk_sDHDAbzx`X z?!;E1s78_WiTsm5F6yu$q9qiZ7{}%@NGM8ITnI%SdM2Va?7blBC&#_vi!4xAglGx} z!C``~U~!s@de$4bz=&Eu6Oh~{wgC%i1kqe9xlC*mCN!gN+Gmt~9Tpj5b~EPckX9BQ zv~LJ~H!pid;mL?5bXU;RP4dO-11fQkn5tAQPpK*rJ;{+5tFz@vv%w<`_A~Nv$wcN6Isb%l|9em3Pq2RtrC%UMs=hvKFTW5U=2LIR-Uxy91oAqGE8miXi`4V%_Po7NgUZ zPn0$#hyl_H)kGO1k<0a7H{CGBObN}7qSD6(qLP~F6)N1z6tk;Id>5} z7@?*ob|~Jm<&)NrTIVBt%Ye`_z}Xw)9Rtf92bMYxeC^~rUK2WAe0S>36u0FF-*{ALJj&M{6Y7rfRmX*@;|ZeN@dlZc z;3m21Zcp8sidS~Wt7}%ZWL53M>YCY__l9SNSM{2zdT`oZwW~&LRc*pVxOT3ZbyYi8 zwFU~DY7o=9L;*o<{EI9+wh?93)Aa1A_eObVi{NbGoGlM4s;9&6_0IIpzI|)w^v-xs zFIU+yU&B{!jqmQi-}l*NDJoQMoj=W2?pSd(&z+jDxp#(hZG~UFfB)CyS3MlnHSZOu zZ40G5wFgw@?epGGH*l_P@Qe2!mKT*m0NUy(+g}sxuf@o?t>~ulUE}*&XeRI6A~?73wobv;$=Ny~>xMCAylz}QsKLi1;lL+5 z`Fw{u;GzHfK5L&tv*@t&_39U0w!W?U#jSb>e^GSMaM-E)#%Vdc!}v|D?QoCrn;t!c zRgNh-Qn+wGqCv`U9zOmXbWF)}?ckWo&KmH=DR)w4egUFpII9vY%j!-7_?F#O=wJCz zJ6}dj>aR6~~v+=LZ$m?CBc7&fHCQA^k+8z)9&$2z;a8p(8q@*;UJQqNgMzobjc zbW@}Gv}qmCKCjQr*mGf}RtD2A^{fRYxMAt$5@{YjjIeb5NV~wRLSUkscj=!Fe1Mx0f+kE;Z4D2`2R`*)ddPr&Eoq*b~lPW zC?w#3jg^4z8tGosODy&&c(HC`CM9xtGzs5|bHcYI(5gc#8&Oaw(2-?pQAoH<6AX!_ zG4Cit0~i%`h~Gpbgc0T_Fj_$dMq>}71sRk{7zdCU1DNtC1LiL_<>>6>uo0)-A9bS^^ei9S7BzD_02+ zf7K{xgB2hesb5;|SvXt$3e}Y$ z3X1@%fk5EVJwGYb?73em)EtN%0xFein0aT})wSg6njhg^eS)hGv}#-Kw%uv_x%&h6 za_zRI+HDIP_}V_9wl8)VAOJA0ojle3dC9`7zb*X?w$|PE>v(G4pR`{e_=EM2R-SsD zd&9$1p4h>M_R<@Nf1&xnc(?FQ;k=orwhPqu`=>bjA>MvSupffJ183#***P8WY!jSq zu|9x~H}}80fBG^C8uV_ zUKGV$GCK{Kxu4=YBK6m%J_Aj_n^Gwb-$g)ca! zHngGIpEC$Nsf;3&QCoHdG9r`s+0jG9rrtLtIx8z39M~c-uHq2%P zNe;OxYMd@vCm@R=#dJv;U}W^F9SOv1zgJ>|mwPVgOwtaY44JS8<3u#lvBstbG)%+Zz(iaa1C#WPRQVw^sXHdL>0=qTIqa`hRI+C!) z_QJ5H4=wCP(T`#P#eNirP^5XLk{7@*><{rCgfQ$e6vsh`1!^`(w5#Lrh1+njL#)V_ znj(}cvt}k4r4ANfLb6^IM^Frd2qAH+hD8c&RG}n+GRdL_EGtR;wLe4ecUa2jutcrn zqb{V0K>StXqfJN>fryb0Z4Rz<`@%`SbkF@RzVrZZJ1E!=ayCF8&Rl->^I=nkYvu>H zci-9#^iqWz_@~@HO2+O?Rvy-q^L=*t^u&yU@fp z_6v>uoNZ&geeZJnD@*OK+@IvzPYCTNINPSUyMNief62W+Zm$BOYBLaJAUInqf-87m z?kIv!6+pxe0ND_C_Q%WJ^CqFZccDQj?*oFSY%jS|?gDP6j9S&|kbwXJ32o-x`11r_Fw?}S`#GRe-@@_1yXH~0pA4-)c3lkma1SZSJDr>{&4L&b?0nZ|26bFG+r!Az}Cv z4w$G3oUQB-OygtCCj8PA7a5O7w0JyYL3#rG3y+xZ@w`3Z^(S*!e5Tl~C|*Z#9>pk# zUlNZ%5|ZD+;@8uXK%X?kfVe8335{M4-;2tkc(We9Baj769w+rsFNHj>snr8$mBj(F z8#SI5?0BkN_=ac@!|1__x{**YAQmLQN@`#KC`t0<#QgO{=`%1nCOz@2v?c%7^+T~!og3C7q&yO%e;bgmmSg=$RB&^p=JCZ%Qi=BihY@0;r9tilPmL%hZ`KolT z5P4UnGoA{p5FOm}ek(*Nr~btW8<+aUi3%?DOBgO_N$9ieGFG}O<$RkXUuw<&0Z8rA APyhe` literal 0 HcmV?d00001 diff --git a/spiders/__pycache__/fdb_spider.cpython-39.pyc b/spiders/__pycache__/fdb_spider.cpython-39.pyc index cf6c13f31a49d4669de1db75a96e94e0b29ab2c1..25a1496f21a9571d3caa2ebb44b7c516ae798d89 100644 GIT binary patch delta 2129 zcma)7&2Jk;6yI5|*Xy;tb{u~+Zb-LP+r({y`$bD3Elt%z(?TOaC?&-WyWUB%$;O+^ zuJd87m4gxDfKb|1FTLO#A`VDMs29{bLP(rJSo{NUfCHSO((>NAO;RXQGt$p*-}}8c zGru=8`Sb8kB}GdlWDfog4qu@+2W~07#JY8O^o+q7AuBQ$8;=q04bBjZ@JE~xrtxNI zJYkS`xuWVFqLof9z>USg(8+u@33JG7W!vY!z@jwK+(0#N)7@dWAde-^)p-{m>ZBNw?f zzWka0X=H4eFCTA&UBL@C3E}Gd-0+Mz6KP3?;PH9RjiNsf3(s@wq#=%R)yo3shE{$g zTtfiepSth+rX4Y{`bS4ilAg4Z#)MSS#)c>!g)f=PsgE|g#w$vo%9gPkNHIpx8((UlJt1mEc=3x7sdBFIVss~fX|X_H>ENsP>_ zutu0}a;sj%Q-(M%X=Hjhw0k%s3*S*Mg`P!_oXH*gaWuChO0dWiE-bT?4~#Xkpo8p3 zQT8={MSm=O4wjPlgeFh8G46fN;2Qo1O7fDMGIl-HxE|PIp~*M8yslOkv=h3sj{WJH zNkbQIE(it(XL0v6mcrxG1+dxl`-F3TR(^f88+{po`u%y0Ic9ZAb*HJiJZGtsrbQjq zFb%b8yXun7X4NIroetuTU7JTfWiP51m_19YDjsYgSXMySDdaCxL)9GBbk&MovkY}w zTcoN^Y4r#c(WnSsOXxNnx(N$HA|~b)wC(<_#ej9)$xLDK5LAQ zjE>a5DV1;mrIKzNE2WZQE|yAjnpu5p!uO5y6-#rRv;E9o>Mxf{4z1uV+j3ij4%Jv? zdXQ14X1Rlw>A1)XN&$>*F)L3qCnj9F><%t5ZGN7zp!B@9UH89i)3s=S`37bU6wCfs z^1$G@tXrCHnYvZeH_44nQm%(=Q>}+o+Z<1JVn$8ZO`}%R{a@s7FJz%-#cs9>`-iUw z%OT_uq5y5df*!IEn(}tH+Tl*udcp7i5zAlgLqeE=(6k-0XqDk}!}cJk0L3)Jmyhj5 z*ay%~!#l(@RBmEXV-u(n+mF!${@d{{3rEpBhA@Dz^M%SK4v1I5>D=~*6FrytkVMjt zBtans!sB>Y2n)OG3d7xDD|OMVPTGUV*b)DJ`sLN25y9;q#wjl#ln@A}L@Lv!WlS7nfgGvlmg6K= zz(d7eLO6wR8sQAWwl3W#pFt<%&6k`;B>Bo zKsldg?!1#kP7&ce0v=m5xi}(*D3-RM@;U7#s?(F~o delta 1738 zcma)-&u`mg7{}k&u^lII>?CcS2G+F`Ei{xiYdfHbvY}Ey+hCo<_!U&Tw0_&VP2%RY z3vJ~n(t?oS)a}&-CzPHza2e{5K;p{ZfMhsu<-myx>H&Bjhp`ATX|eM0*U$U-d0&6u z$H|YS-zSVpI-Ml&`D5-PbH{EPr|9*Y6H8;S5<6zg9}!z-sdlWa+w=`mRDQ+GD@7VA z%;$`KeSC>hUHnPkq{i_zsS&f{FdoJ>m>(uy-dkfkzT?)pCVo<$wGFlp#spN@29|1;lkbegtKfEgbRF9V3vBXvaJvQO36pX~qMx zMLSX;9nq#1XdTJcKqVqYK_!DE#1f!VkxGKnBb5TxO8Xf;(;+w0@F4NkzOqj zI|G44kipOgU=F|A)tCyS*`85~MegOoa{kRYq>a(HN&T!${MeziuT#>LDLJrUDLL^= zV(3E7?hB;oVPFg3Vfx`a(3YCnAlvDKM~t_@E7*zbRzEoZenYki^o(_do4CChF`xV- zkwe)B5TB-oCZ8&mHvL+)R9bcI{ZeV&<+W1DudG%XoH%^#o=b6EA6HN4dna^Re53!+ z>W3=D0iMT-*misXVIed?C|CDt)lkL>!ldW!?(9^!$9Sl8&yPkq(2P8c`IU`O+1si3 zn;d$Mlr`V=eD2gY_z-3(yi(s_VXVsP9J`pG2K4b^U@^xN7#TsFxn&HhkD(eBr;V?M zODNAGoXsq0kVxreZdU&4HJieEsyC~gd};masrL%e`kLiCs;RBJ^29)jLG&}x=gux_^u z%o#^4AYMmYMcltHQfZ9E04O~Cuh6KL&}OtHZ8VC*&-g2tqXNqEmgm;X#xAc`oz*GM z8oLZ$CG4NF{!fr`?((vDD{s(Q!Scn0P<2