Additional headers can be specified in the session - that makes the code shorter.
authorPhilipp Spitzer <philipp@spitzer.priv.at>
Sat, 3 Oct 2015 20:18:35 +0000 (22:18 +0200)
committerPhilipp Spitzer <philipp@spitzer.priv.at>
Sat, 3 Oct 2015 20:57:22 +0000 (22:57 +0200)
bob_download.py

index 2b1a272982341248ec8c3d788a3a21c01962831a..9896b7a39f1bdefad64e027628a43655ac1c16d6 100644 (file)
@@ -11,11 +11,13 @@ dest_dir = '/tmp/bob'
 
 
 session = requests.Session()
 
 
 session = requests.Session()
-additional_headers = {'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8'} # otherwise site with content '<HTML></HTML>' is returned
+session.headers.update({
+    'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', # otherwise site with content '<HTML></HTML>' is returned
+    })
 
 # load login page
 main_url = 'https://rechnung.bob.at/'
 
 # load login page
 main_url = 'https://rechnung.bob.at/'
-response = session.get(main_url, headers=additional_headers)
+response = session.get(main_url)
 html = BeautifulSoup(response.text, 'html.parser')
 
 # fill out login form (name='asmpform') with username=<phone number> and password
 html = BeautifulSoup(response.text, 'html.parser')
 
 # fill out login form (name='asmpform') with username=<phone number> and password
@@ -27,14 +29,14 @@ assert 'kkw' in fields # password
 fields['kkw'] = password
 
 # load overview page
 fields['kkw'] = password
 
 # load overview page
-response = session.post(form['action'], data=fields, headers=additional_headers)
+response = session.post(form['action'], data=fields)
 
 # reload overview page rechnung.bob.at - that makes the URLs in the page much prettier
 # previously:
 # https://rechnung.bob.at/bill/pdf/;BOBOBPSESSIONID=B7DB9938A3B9541E3D0EB6CD728F54C0.obpBobCustomer4Rechnungskopie_1509_523260091_1.pdf?bsn=61
 # same after reload:
 # '/bill/pdf/Rechnungskopie_1509_523260091_1.pdf?bsn=61'
 
 # reload overview page rechnung.bob.at - that makes the URLs in the page much prettier
 # previously:
 # https://rechnung.bob.at/bill/pdf/;BOBOBPSESSIONID=B7DB9938A3B9541E3D0EB6CD728F54C0.obpBobCustomer4Rechnungskopie_1509_523260091_1.pdf?bsn=61
 # same after reload:
 # '/bill/pdf/Rechnungskopie_1509_523260091_1.pdf?bsn=61'
-response = session.get(main_url, headers=additional_headers)
+response = session.get(main_url)
 html = BeautifulSoup(response.text, 'html.parser')
 
 # Download PDFs
 html = BeautifulSoup(response.text, 'html.parser')
 
 # Download PDFs
@@ -66,7 +68,7 @@ for link in links:
     assert 'OBP.utils.reloadAfterDelay("/bill.ctn.cdr.set.obp",5);' not in response.text
     html = BeautifulSoup(response.text, 'html.parser')
     assert html.find('a', id='link_csv_download') is not None
     assert 'OBP.utils.reloadAfterDelay("/bill.ctn.cdr.set.obp",5);' not in response.text
     html = BeautifulSoup(response.text, 'html.parser')
     assert html.find('a', id='link_csv_download') is not None
-    response = session.get('https://rechnung.bob.at/download.table.obp?fmt=TAB&table=obp.calls.table', headers=additional_headers)
+    response = session.get('https://rechnung.bob.at/download.table.obp?fmt=TAB&table=obp.calls.table')
     assert response.status_code == 200
     filename = response.headers['Content-Disposition'].split('=')[1] # e.g. 'EVN_1509_523260091_1_069911934859.txt'
     assert filename.startswith('EVN_')
     assert response.status_code == 200
     filename = response.headers['Content-Disposition'].split('=')[1] # e.g. 'EVN_1509_523260091_1_069911934859.txt'
     assert filename.startswith('EVN_')