#!/usr/bin/python3
import os
import re
+import time
+from urllib.parse import urljoin
import requests
from bs4 import BeautifulSoup
+user_name = '4369911111111' # user name is phone number
+password = 'abcdefg' # login password
dest_dir = '/tmp/bob'
+
session = requests.Session()
-additional_headers = {'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8'} # otherwise site with content '<HTML></HTML>' is returned
+session.headers.update({
+ 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', # otherwise site with content '<HTML></HTML>' is returned
+ })
# load login page
-main_url = 'https://rechnung.bob.at/'
-response = session.get(main_url, headers=additional_headers)
+response = session.get('https://rechnung.bob.at/')
html = BeautifulSoup(response.text, 'html.parser')
# fill out login form (name='asmpform') with username=<phone number> and password
form = html.find(attrs={'name': 'asmpform'})
fields = {e['name']: e.get('value', '') for e in form.find_all('input', {'name': True}) if e['name'] != 'submit'}
assert 'loginMsisdn' in fields # user name
-fields['loginMsisdn'] = '4369911111111' # user name is phone number
+fields['loginMsisdn'] = user_name # e.g. '4369911111111'
assert 'kkw' in fields # password
-fields['kkw'] = 'abcdefg'
+fields['kkw'] = password
# load overview page
-response = session.post(form['action'], data=fields, headers=additional_headers)
+response = session.post(form['action'], data=fields)
# reload overview page rechnung.bob.at - that makes the URLs in the page much prettier
# previously:
# https://rechnung.bob.at/bill/pdf/;BOBOBPSESSIONID=B7DB9938A3B9541E3D0EB6CD728F54C0.obpBobCustomer4Rechnungskopie_1509_523260091_1.pdf?bsn=61
# same after reload:
# '/bill/pdf/Rechnungskopie_1509_523260091_1.pdf?bsn=61'
-response = session.get(main_url, headers=additional_headers)
+response = session.get(response.url)
html = BeautifulSoup(response.text, 'html.parser')
# Download PDFs
assert filename.startswith('Rechnungskopie_')
filepath = os.path.join(dest_dir, filename)
if not os.path.exists(filepath):
- response = session.get(main_url[:-1] + url)
+ response = session.get(urljoin(response.url, url))
assert response.status_code == 200
with open(filepath, 'wb') as file:
file.write(response.content)
links = html.findAll('a', href=regexp)
for link in links:
url = link['href']
- response = session.get(main_url[:-1] + url)
+ response = session.get(urljoin(response.url, url))
assert response.status_code == 200
- assert response.text != ''
- import time
- time.sleep(3)
- response = session.get(main_url + 'bill.ctn.cdr.set.obp')
+ assert 'OBP.utils.reloadAfterDelay("/bill.ctn.cdr.set.obp",5);' in response.text
+ time.sleep(5) # OBP.utils.reloadAfterDelay("/bill.ctn.cdr.set.obp",5);
+ response = session.get(urljoin(response.url, 'bill.ctn.cdr.set.obp'))
+ assert 'OBP.utils.reloadAfterDelay("/bill.ctn.cdr.set.obp",5);' not in response.text
html = BeautifulSoup(response.text, 'html.parser')
assert html.find('a', id='link_csv_download') is not None
- response = session.get('https://rechnung.bob.at/download.table.obp?fmt=TAB&table=obp.calls.table', headers=additional_headers)
+ response = session.get('https://rechnung.bob.at/download.table.obp?fmt=TAB&table=obp.calls.table')
assert response.status_code == 200
filename = response.headers['Content-Disposition'].split('=')[1] # e.g. 'EVN_1509_523260091_1_069911934859.txt'
assert filename.startswith('EVN_')