- sql = 'insert into wrsledrunjsoncache ' \
- '(sledrun_page_id, sledrun_json_page_id, sledrun_json, map_json_page_id, map_json) ' \
- 'values (%s, %s, %s, %s, %s)'
- for row in rows_to_insert:
- connection.execute(sql, (row['pageid'], row.get('sledrun_pageid'), row.get('sledrun'),
- row.get('map_pageid'), row.get('map')))
+
+ cmcontinue = ''
+ while True:
+ request = api_url._replace(query='action=query&list=categorymembers&cmtitle=Kategorie:Rodelbahn'
+ f'&cmlimit=50&format=json&cmcontinue={cmcontinue}') # cmlimit=max (5000)
+ response = urllib.request.urlopen(request.geturl())
+ sledrun_list_response = json.load(response)
+ rows_to_insert = sledrun_list_response['query']['categorymembers']
+ for row in rows_to_insert:
+ row['sledrun_title'] = f'{row["title"]}/Rodelbahn.json'
+ row['map_title'] = f'{row["title"]}/Landkarte.json'
+
+ qs_titles = urllib.parse.urlencode({"titles": '|'.join(r['sledrun_title'] for r in rows_to_insert)})
+ request = api_url._replace(query='action=query&prop=revisions&rvprop=content&format=json&rvslots=main&'
+ + qs_titles)
+ response = urllib.request.urlopen(request.geturl())
+ sledrun_json_list_response = json.load(response)
+ sledrun_json_dict = {s['title']: s for s in sledrun_json_list_response['query']['pages'].values()}
+
+ qs_titles = urllib.parse.urlencode({"titles": '|'.join(r['map_title'] for r in rows_to_insert)})
+ request = api_url._replace(query='action=query&prop=revisions&rvprop=content&format=json&rvslots=main&'
+ + qs_titles)
+ response = urllib.request.urlopen(request.geturl())
+ map_json_list_response = json.load(response)
+ map_json_dict = {m['title']: m for m in map_json_list_response['query']['pages'].values()}
+
+ for row in rows_to_insert:
+ sledrun_json = sledrun_json_dict.get(row['sledrun_title'])
+ if 'missing' not in sledrun_json:
+ row['sledrun_pageid'] = sledrun_json['pageid']
+ row['sledrun'] = sledrun_json['revisions'][0]['slots']['main']['*']
+ map_json = map_json_dict.get(row['map_title'])
+ if 'missing' not in map_json:
+ row['map_pageid'] = map_json['pageid']
+ row['map'] = map_json['revisions'][0]['slots']['main']['*']
+
+ sql = 'insert into wrsledrunjsoncache ' \
+ '(sledrun_page_id, sledrun_json_page_id, sledrun_json, map_json_page_id, map_json) ' \
+ 'values (%s, %s, %s, %s, %s)'
+ for row in rows_to_insert:
+ connection.execute(sql, (row['pageid'], row.get('sledrun_pageid'), row.get('sledrun'),
+ row.get('map_pageid'), row.get('map')))
+
+ if 'continue' not in sledrun_list_response:
+ break
+ cmcontinue = sledrun_list_response['continue']['cmcontinue']