From 317ef997205fa593cdf4a5cb99ae2ace9d810658 Mon Sep 17 00:00:00 2001 From: alpcentaur Date: Tue, 14 Nov 2023 10:22:26 +0000 Subject: [PATCH] changed code in entrylist data2dictionary to handle empty or missing xml elements --- spiders/__pycache__/fdb_spider.cpython-39.pyc | Bin 7764 -> 8228 bytes spiders/fdb_spider.py | 79 ++++++++++++------ 2 files changed, 53 insertions(+), 26 deletions(-) diff --git a/spiders/__pycache__/fdb_spider.cpython-39.pyc b/spiders/__pycache__/fdb_spider.cpython-39.pyc index 3c02d28ba8df8a1a8a6cf7c8347a277afc51bd53..ebbced9753a44f1f3af0dc33a9c5ce455511150c 100644 GIT binary patch delta 1167 zcmbW1&ubGw6vyYyj_D?Cl1;M7Zu3J^6+tg8Qi_Ly1+})Qsaohy1FhXCF>O+}@khec z9AZyif^$|ZD0mSp`2)Q4>_txn^(J`n)Kk#vn@uQH(h4r@cV^yvXFl_G9{X|d<&baQ z>-7+PK97!P)`m8GUwvUO4$tAc;$nnwI8SOyx*_gFCKz^}iv|A!g_bwPC;wW7rq8Tu z(~)Y>acjL#v?_qb%ax=Vh%aglEjMX z5gUOXI9mS@Sc6IfX1@W}VFS{(Y|CbTekqe&DBN4LlKDb0c{7t-p0`&r)(lIHr7o1s zZ2nfE-TanoIb&xFRyo>i+j`hyHlHh-Ql0tOUyv;(@mown>aRi%!8L-1vPZ?S@Ca;+ z#qe;2QeZ%V{-V^gLlydyGWr8YF4|C44LlJ2hokF22kY>G2K#&b5`-q|m=v?V9TpJ1 zEKhkw%0($bDIqDB#JceYQew=Ug5LUTbC|lCUa=j$W;n?;ebRgtWrh~AA{~3;nvju{ z_#P{0ld?M{<*bxhu@vuxaq%qvu5(NVrls5vw-Y@vA9htx-Rfy|TCI>S^|*~c7;@5Q JUl;3%`fsrZ62Slf delta 744 zcmaKq&1(}u7{=$FOxE3`*(94ywu$*F^xzj*L_sj1XvMFzSW#mOfh2CEOJX+Nq<(}6 zJ%pYFiOj{Lf`}JE$yM-B{{Ro30$%lh5JlfG8xkHC!vK)K3d)scf${T(0eMrhIe=b z!j%_69^`Wtc?mp{B>m`==rdwWNq9zLSRLWf0R@6C`cI$GEMww|!J`(DtK9&m!HzZ9 z5O%zZL-+w6DG(l&&m^%;^v2Z3(Z{AffnMY01N<0ok(*O$O(_h)t8mkv#}3H`#muTq zTl?LSVXkFz%}yfuOLK^auvICM))FIK-;h3j4y4b3^b)w|lz6A66Wx$~kW-6vHSQmP zMeH<=jQDOWKz5|Xc44JwxK7?_bsW2%?{3!fP1kMre8@J!wPv-}bnNX~4Q0DFrmo`w z{+cmsRqxlw%xT?o$4a~IIj%isO{diu9g05!Wf?QxMe#au8QzF?HX;1h(s@vU0R{RS zZy)`|{QsK;CTT{0Vp^d;m^w%G(VitBbyYUMCgqY8LrP4_712(9{9n{rkZA2M#_mbAB}oRzXJ&Sa0nY0=1j l46aB}lCmLAIrpSZL5nOLvUF4u8XbQ=nq3ett$Wk diff --git a/spiders/fdb_spider.py b/spiders/fdb_spider.py index 8aa6ae9..ef76a6a 100644 --- a/spiders/fdb_spider.py +++ b/spiders/fdb_spider.py @@ -218,35 +218,62 @@ class fdb_spider(object): print('blabliblub') print('len', len(tree.xpath(fdb_conf_entry_list_parent))) for n in range(len(tree.xpath(fdb_conf_entry_list_parent))): - print('oi inside the loop') - name = tree.xpath( - fdb_conf_entry_list_parent - + fdb_conf_entry_list_child_name - )[n] + + try: + name = tree.xpath( + fdb_conf_entry_list_parent + + "[" + + str(n+1) + + "]" + + fdb_conf_entry_list_child_name + )[0] - info = tree.xpath( - fdb_conf_entry_list_parent - + fdb_conf_entry_list_child_info - )[n] + except Exception as e: + print("name could not be parsed", e) + name = 'NONE' + + try: + info = tree.xpath( + fdb_conf_entry_list_parent + + "[" + + str(n+1) + + "]" + + fdb_conf_entry_list_child_info + )[0] - period = tree.xpath( - fdb_conf_entry_list_parent - + fdb_conf_entry_list_child_period - )[n] - - print('oi ', name) - print('blablidubbiduub') - link = tree.xpath( - fdb_conf_entry_list_parent - # + "[" - # + str(n) - # + "]" - + fdb_conf_entry_list_child_link - )[n] - - print('oi' + name) + except Exception as e: + print("info could not be parsed", e, info) + info = 'NONE' + + try: + period = tree.xpath( + fdb_conf_entry_list_parent + + "[" + + str(n+1) + + "]" + + fdb_conf_entry_list_child_period + )[0] + print('period', period) + except Exception as e: + print("period could not be parsed", e, period) + period = 'NONE' + + try: + link = tree.xpath( + fdb_conf_entry_list_parent + + "[" + + str(n+1) + + "]" + + fdb_conf_entry_list_child_link + )[0] + print('link', link) + + except Exception as e: + print("link could not be parsed", e, link) + link = 'NONE' + - if len(name) > 0: + if len(name) > 0 and name != 'NONE': dictionary_entry_list[n] = {} dictionary_entry_list[n]["name"] = name dictionary_entry_list[n]["info"] = info