From: Philipp Spitzer Date: Wed, 8 Jun 2022 19:46:36 +0000 (+0200) Subject: Start to create updatewrsledrunjsoncache.py (currently just adds 3 sledruns). X-Git-Url: https://git.toastfreeware.priv.at/philipp/winterrodeln/wrpylib.git/commitdiff_plain/c69dee58734c35c0017300ec8a330e1b72a0710c Start to create updatewrsledrunjsoncache.py (currently just adds 3 sledruns). --- diff --git a/scripts/updatewrsledrunjsoncache.py b/scripts/updatewrsledrunjsoncache.py new file mode 100644 index 0000000..83de932 --- /dev/null +++ b/scripts/updatewrsledrunjsoncache.py @@ -0,0 +1,47 @@ +#!/usr/bin/python3 +"""Updates the wrsledrunjsoncache table (by calling wrpylib.wrmwcache.update_wrsledrunjsoncache()). + +Command line usage: +$ python updatewrsledrunjsoncache.py inifile1.ini ... + +One or more .ini configuration files can be given. +At the end, the following entries have to be present: + + [mysql] + host=localhost + dbname=philipp_winterrodeln_wiki + user_name=philipp_www + user_pass=YYYYYY + + [robot] + wikiurl=https://www.winterrodeln.org/mediawiki/api.php +""" +import argparse +import configparser +import urllib.parse +from sqlalchemy.engine import create_engine +import wrpylib.wrmwcache + + +def update_wrsledrunjsoncache(inifile): + """ + :param inifile: filename of an .ini file or a list of .ini files. + """ + config = configparser.ConfigParser() + config.read(inifile) + + host = config.get('mysql', 'host') + dbname = config.get('mysql', 'dbname') + user = config.get('mysql', 'user_name') + passwd = config.get('mysql', 'user_pass') + + engine = create_engine(f'mysql://{user}@{host}:3306/{dbname}?passwd={passwd}&charset=utf8mb4') + api_url = urllib.parse.urlparse(config.get('robot', 'wikiurl')) + wrpylib.wrmwcache.update_wrsledrunjsoncache(api_url, engine.connect()) + + +if __name__ == '__main__': + parser = argparse.ArgumentParser(description='Updates the wrsledruncache table.') + parser.add_argument('inifile', nargs='+', help='inifile.ini, see: https://www.winterrodeln.org/trac/wiki/ConfigIni') + args = parser.parse_args() + update_wrsledrunjsoncache(args.inifile) diff --git a/wrpylib/wrmwcache.py b/wrpylib/wrmwcache.py index 0b26415..e64161d 100644 --- a/wrpylib/wrmwcache.py +++ b/wrpylib/wrmwcache.py @@ -1,5 +1,9 @@ """Contains functions that maintain/update the cache tables.""" +import urllib.parse +import urllib.request +import json import mwparserfromhell +import sqlalchemy.engine from sqlalchemy import schema, Table from sqlalchemy.engine import Connection from sqlalchemy.sql import select @@ -306,3 +310,51 @@ def update_wrregioncache(connection): point.SetPoint(0, row.position_longitude, row.position_latitude) if point.Within(ogr.CreateGeometryFromWkb(row.border)): connection.execute(ins.values(region_id=row.region_id, page_id=row.page_id)) + + +def update_wrsledrunjsoncache(api_url: urllib.parse.ParseResult, connection: sqlalchemy.engine.Connection): + """Updates the wrsledrunjsoncache from JSON files in the wiki. + + :param api_url: URL to the API, e.g. https://www.winterrodeln.org/mediawiki/api.php + :param connection: open sqlalchemy connection + """ + request = api_url._replace(query='action=query&list=categorymembers&cmtitle=Kategorie:Rodelbahn' + '&cmlimit=3&format=json') # cmlimit=max (5000) + response = urllib.request.urlopen(request.geturl()) + sledrun_list_response = json.load(response) + rows_to_insert = sledrun_list_response['query']['categorymembers'] + for row in rows_to_insert: + row['sledrun_title'] = f'{row["title"]}/Rodelbahn.json' + row['map_title'] = f'{row["title"]}/Landkarte.json' + + qs_titles = urllib.parse.urlencode({"titles": '|'.join(r['sledrun_title'] for r in rows_to_insert)}) + request = api_url._replace(query='action=query&prop=revisions&rvprop=content&format=json&rvslots=main&' + qs_titles) + response = urllib.request.urlopen(request.geturl()) + sledrun_json_list_response = json.load(response) + sledrun_json_dict = {s['title']: s for s in sledrun_json_list_response['query']['pages'].values()} + + qs_titles = urllib.parse.urlencode({"titles": '|'.join(r['map_title'] for r in rows_to_insert)}) + request = api_url._replace(query='action=query&prop=revisions&rvprop=content&format=json&rvslots=main&' + qs_titles) + response = urllib.request.urlopen(request.geturl()) + map_json_list_response = json.load(response) + map_json_dict = {m['title']: m for m in map_json_list_response['query']['pages'].values()} + + for row in rows_to_insert: + sledrun_json = sledrun_json_dict.get(row['sledrun_title']) + if sledrun_json is not None: + row['sledrun_pageid'] = sledrun_json['pageid'] + row['sledrun'] = sledrun_json['revisions'][0]['slots']['main']['*'] + map_json = map_json_dict.get(row['map_title']) + if map_json is not None: + row['map_pageid'] = map_json['pageid'] + row['map'] = map_json['revisions'][0]['slots']['main']['*'] + + with connection.begin(): + # connection.execute('truncate table wrsledrunjsoncache') # needs additional permissions + connection.execute('delete from wrsledrunjsoncache') + sql = 'insert into wrsledrunjsoncache ' \ + '(sledrun_page_id, sledrun_json_page_id, sledrun_json, map_json_page_id, map_json) ' \ + 'values (%s, %s, %s, %s, %s)' + for row in rows_to_insert: + connection.execute(sql, (row['pageid'], row.get('sledrun_pageid'), row.get('sledrun'), + row.get('map_pageid'), row.get('map')))