From: Philipp Spitzer Date: Wed, 8 Jun 2022 21:13:01 +0000 (+0200) Subject: Continuation is supported - all sledrun JSON is now stored in the table. X-Git-Url: https://git.toastfreeware.priv.at/philipp/winterrodeln/wrpylib.git/commitdiff_plain/ceebddceb26a9deb61007749c15cb9380e39e8eb Continuation is supported - all sledrun JSON is now stored in the table. --- diff --git a/wrpylib/wrmwcache.py b/wrpylib/wrmwcache.py index e64161d..7e1a31e 100644 --- a/wrpylib/wrmwcache.py +++ b/wrpylib/wrmwcache.py @@ -318,43 +318,52 @@ def update_wrsledrunjsoncache(api_url: urllib.parse.ParseResult, connection: sql :param api_url: URL to the API, e.g. https://www.winterrodeln.org/mediawiki/api.php :param connection: open sqlalchemy connection """ - request = api_url._replace(query='action=query&list=categorymembers&cmtitle=Kategorie:Rodelbahn' - '&cmlimit=3&format=json') # cmlimit=max (5000) - response = urllib.request.urlopen(request.geturl()) - sledrun_list_response = json.load(response) - rows_to_insert = sledrun_list_response['query']['categorymembers'] - for row in rows_to_insert: - row['sledrun_title'] = f'{row["title"]}/Rodelbahn.json' - row['map_title'] = f'{row["title"]}/Landkarte.json' - - qs_titles = urllib.parse.urlencode({"titles": '|'.join(r['sledrun_title'] for r in rows_to_insert)}) - request = api_url._replace(query='action=query&prop=revisions&rvprop=content&format=json&rvslots=main&' + qs_titles) - response = urllib.request.urlopen(request.geturl()) - sledrun_json_list_response = json.load(response) - sledrun_json_dict = {s['title']: s for s in sledrun_json_list_response['query']['pages'].values()} - - qs_titles = urllib.parse.urlencode({"titles": '|'.join(r['map_title'] for r in rows_to_insert)}) - request = api_url._replace(query='action=query&prop=revisions&rvprop=content&format=json&rvslots=main&' + qs_titles) - response = urllib.request.urlopen(request.geturl()) - map_json_list_response = json.load(response) - map_json_dict = {m['title']: m for m in map_json_list_response['query']['pages'].values()} - - for row in rows_to_insert: - sledrun_json = sledrun_json_dict.get(row['sledrun_title']) - if sledrun_json is not None: - row['sledrun_pageid'] = sledrun_json['pageid'] - row['sledrun'] = sledrun_json['revisions'][0]['slots']['main']['*'] - map_json = map_json_dict.get(row['map_title']) - if map_json is not None: - row['map_pageid'] = map_json['pageid'] - row['map'] = map_json['revisions'][0]['slots']['main']['*'] - with connection.begin(): # connection.execute('truncate table wrsledrunjsoncache') # needs additional permissions connection.execute('delete from wrsledrunjsoncache') - sql = 'insert into wrsledrunjsoncache ' \ - '(sledrun_page_id, sledrun_json_page_id, sledrun_json, map_json_page_id, map_json) ' \ - 'values (%s, %s, %s, %s, %s)' - for row in rows_to_insert: - connection.execute(sql, (row['pageid'], row.get('sledrun_pageid'), row.get('sledrun'), - row.get('map_pageid'), row.get('map'))) + + cmcontinue = '' + while True: + request = api_url._replace(query='action=query&list=categorymembers&cmtitle=Kategorie:Rodelbahn' + f'&cmlimit=50&format=json&cmcontinue={cmcontinue}') # cmlimit=max (5000) + response = urllib.request.urlopen(request.geturl()) + sledrun_list_response = json.load(response) + rows_to_insert = sledrun_list_response['query']['categorymembers'] + for row in rows_to_insert: + row['sledrun_title'] = f'{row["title"]}/Rodelbahn.json' + row['map_title'] = f'{row["title"]}/Landkarte.json' + + qs_titles = urllib.parse.urlencode({"titles": '|'.join(r['sledrun_title'] for r in rows_to_insert)}) + request = api_url._replace(query='action=query&prop=revisions&rvprop=content&format=json&rvslots=main&' + + qs_titles) + response = urllib.request.urlopen(request.geturl()) + sledrun_json_list_response = json.load(response) + sledrun_json_dict = {s['title']: s for s in sledrun_json_list_response['query']['pages'].values()} + + qs_titles = urllib.parse.urlencode({"titles": '|'.join(r['map_title'] for r in rows_to_insert)}) + request = api_url._replace(query='action=query&prop=revisions&rvprop=content&format=json&rvslots=main&' + + qs_titles) + response = urllib.request.urlopen(request.geturl()) + map_json_list_response = json.load(response) + map_json_dict = {m['title']: m for m in map_json_list_response['query']['pages'].values()} + + for row in rows_to_insert: + sledrun_json = sledrun_json_dict.get(row['sledrun_title']) + if 'missing' not in sledrun_json: + row['sledrun_pageid'] = sledrun_json['pageid'] + row['sledrun'] = sledrun_json['revisions'][0]['slots']['main']['*'] + map_json = map_json_dict.get(row['map_title']) + if 'missing' not in map_json: + row['map_pageid'] = map_json['pageid'] + row['map'] = map_json['revisions'][0]['slots']['main']['*'] + + sql = 'insert into wrsledrunjsoncache ' \ + '(sledrun_page_id, sledrun_json_page_id, sledrun_json, map_json_page_id, map_json) ' \ + 'values (%s, %s, %s, %s, %s)' + for row in rows_to_insert: + connection.execute(sql, (row['pageid'], row.get('sledrun_pageid'), row.get('sledrun'), + row.get('map_pageid'), row.get('map'))) + + if 'continue' not in sledrun_list_response: + break + cmcontinue = sledrun_list_response['continue']['cmcontinue']