Don't try to download non-existing .csv files.
authorPhilipp Spitzer <philipp@spitzer.priv.at>
Fri, 19 Apr 2019 21:15:56 +0000 (23:15 +0200)
committerPhilipp Spitzer <philipp@spitzer.priv.at>
Fri, 19 Apr 2019 21:15:56 +0000 (23:15 +0200)
bob_download.py

index 807af3f3c758996a634bcd07d2d2d63c96b75398..0f5f4efc5571666fd4f042a15c310297d224a009 100755 (executable)
@@ -58,7 +58,9 @@ def main(username, password, destdir):
     links = html.findAll('a', class_="table-bill__link--pdf")
     for link in links:
         url_pdf = link['href']
-        date_range = link.parent.parent.parent.find(class_='table-bills__header').find(class_='text-copy').text  # 26.02.2019 - 25.03.2019
+        table_row = link.parent.parent.parent
+        date_range = table_row.find(class_='table-bills__header').find(class_='text-copy').text  # 26.02.2019 - 25.03.2019
+        evn_exists = len(table_row.findAll('a', {'href': url_pdf.replace('invoicePdf', 'invoiceDetails')})) == 1
         match = re.match(r'\d\d\.\d\d\.\d\d\d\d - (\d\d)\.(\d\d)\.(\d\d\d\d)', date_range)
 
         # Download PDF
@@ -74,7 +76,7 @@ def main(username, password, destdir):
         # https://ppp.bob.at/bobstart/invoiceDetailsCSV.sp?bsn=103
         filename_csv = '{}-{}-{}_EVN.csv'.format(*match.groups()[::-1])  # '2019-03-25_EVN.pdf'
         filepath_csv = os.path.join(destdir, filename_csv)
-        if not os.path.exists(filepath_csv):
+        if evn_exists and not os.path.exists(filepath_csv):
             url_csv = url_pdf.replace('invoicePdf', 'invoiceDetailsCSV')
             response = session.get(urljoin(response.url, url_csv))
             assert response.ok