1 """Contains functions that maintain/update the cache tables."""
5 import mwparserfromhell
6 import sqlalchemy.engine
7 from sqlalchemy import schema, Table
8 from sqlalchemy.engine import Connection
9 from sqlalchemy.sql import select
10 from sqlalchemy.sql.expression import func as sqlfunc, text
12 from wrpylib import mwdb, wrmwdb, wrmwmarkup, wrvalidators
15 class UpdateCacheError(RuntimeError):
19 def _get_mw_text(connection: Connection, text: Table, content_address: str) -> str:
20 parts = content_address.split(':') # e.g. 'tt:15664'
21 if len(parts) != 2 or parts[0] != 'tt':
22 raise ValueError('Content has unexpected format')
23 old_id = int(parts[1])
24 query = select([text], text.c.old_id == old_id)
25 text_row = connection.execute(query).fetchone()
26 return text_row.old_text
29 def update_wrsledruncache(connection):
30 """Updates the wrsledruncache table from the wiki. If convert errors occur, an UpdateCacheError exception
31 is raised. No other exception type should be raised under normal circumstances.
33 >>> from sqlalchemy.engine import create_engine
34 >>> engine = create_engine('mysql://philipp@localhost:3306/philipp_winterrodeln_wiki?charset=utf8mb4')
35 >>> update_wrsledruncache(engine.connect())
37 metadata = schema.MetaData()
38 wrsledruncache = wrmwdb.wrsledruncache_table(metadata)
39 page = mwdb.page_table(metadata)
40 categorylinks = mwdb.categorylinks_table(metadata)
41 slots = mwdb.slots_table(metadata)
42 content = mwdb.content_table(metadata)
43 text = mwdb.text_table(metadata)
49 with connection.begin():
53 [page, categorylinks, slots, content],
54 (page.c.page_latest == slots.c.slot_revision_id) & (slots.c.slot_content_id == content.c.content_id) &
55 (categorylinks.c.cl_from == page.c.page_id) & (categorylinks.c.cl_to == 'Rodelbahn'))
56 sledrun_pages = connection.execute(q)
58 # Delete all existing entries in wrsledruncache
59 # We rely on transactions MySQL InnoDB
60 connection.execute(wrsledruncache.delete())
62 # Refill wrsledruncache table
63 for sledrun_page in sledrun_pages:
64 old_text = _get_mw_text(connection, text, sledrun_page.content_address)
65 rodelbahnbox = wrvalidators.rodelbahnbox_from_str(old_text)
66 sledrun = wrmwmarkup.sledrun_from_rodelbahnbox(rodelbahnbox, Sledrun())
67 sledrun.page_id = sledrun_page.page_id
68 sledrun.page_title = sledrun_page.page_title
69 sledrun.name_url = wrvalidators.sledrun_page_title_to_pretty_url(sledrun_page.page_title)
70 sledrun.under_construction = connection.execute(select(
72 (categorylinks.c.cl_from == sledrun_page.page_id) &
73 (categorylinks.c.cl_to == 'In_Arbeit')).alias('x')).fetchone()[0] > 0
74 connection.execute(wrsledruncache.insert(sledrun.__dict__))
76 except ValueError as e:
77 error_msg = f"Error at sled run '{sledrun_page.page_title}': {e}"
78 raise UpdateCacheError(error_msg, sledrun_page.page_title, e)
81 def update_wrinncache(connection):
82 """Updates the wrinncache table from the wiki. If convert errors occur, an UpdateCacheError exception
83 is raised. No other exception type should be raised under normal circumstances.
85 >>> from sqlalchemy.engine import create_engine
86 >>> engine = create_engine('mysql://philipp@localhost:3306/philipp_winterrodeln_wiki?charset=utf8mb4')
87 >>> update_wrinncache(engine.connect())
89 metadata = schema.MetaData()
90 wrinncache = wrmwdb.wrinncache_table(metadata)
91 page = mwdb.page_table(metadata)
92 categorylinks = mwdb.categorylinks_table(metadata)
93 slots = mwdb.slots_table(metadata)
94 content = mwdb.content_table(metadata)
95 text = mwdb.text_table(metadata)
101 with connection.begin():
105 [page, categorylinks, slots, content],
106 (page.c.page_latest == slots.c.slot_revision_id) & (slots.c.slot_content_id == content.c.content_id) &
107 (categorylinks.c.cl_from == page.c.page_id) & (categorylinks.c.cl_to == 'Gasthaus'))
108 inn_pages = connection.execute(q)
110 # Delete all existing entries in wrinncache
111 # We rely on transactions MySQL InnoDB
112 connection.execute(wrinncache.delete())
114 # Refill wrinncache table
115 for inn_page in inn_pages:
116 old_text = _get_mw_text(connection, text, inn_page.content_address)
117 gasthausbox = wrvalidators.gasthausbox_from_str(old_text)
118 inn = wrmwmarkup.inn_from_gasthausbox(gasthausbox, Inn())
119 inn.page_id = inn_page.page_id
120 inn.page_title = inn_page.page_title
121 inn.under_construction = connection.execute(select(
123 (categorylinks.c.cl_from == inn_page.page_id) &
124 (categorylinks.c.cl_to == 'In_Arbeit')).alias('x')) \
125 .fetchone()[0] > 0 # it would be better to do this in the query above
126 connection.execute(wrinncache.insert(inn.__dict__))
128 except ValueError as e:
129 error_msg = f"Error as inn '{inn_page.page_title}': {e}"
130 raise UpdateCacheError(error_msg, inn_page.page_title, e)
133 def update_wrreportcache(connection, page_id=None):
134 """Updates the wrreportcache table.
135 :param connection: sqlalchemy connection
136 :param page_id: Updates only the reportcache table for the sledrun described on the Winterrodeln wiki page
137 with the specified page_id. Use None for this parameter to update the whole table.
139 >>> from sqlalchemy.engine import create_engine
140 >>> engine = create_engine('mysql://philipp@localhost:3306/philipp_winterrodeln_wiki?charset=utf8mb4')
141 >>> update_wrreportcache(engine.connect())
143 metadata = schema.MetaData()
144 wrreportcache = wrmwdb.wrreportcache_table(metadata)
145 with connection.begin():
146 # Delete the datasets we are going to update
147 sql_del = wrreportcache.delete()
148 if page_id is not None:
149 sql_del = sql_del.where(wrreportcache.c.page_id == page_id)
150 connection.execute(sql_del)
152 def insert_row(connection_, row_list_):
153 if len(row_list_) == 0:
156 row_ = dict(row_list_[0])
157 connection_.execute(wrreportcache.insert(values=row_))
159 # Select the rows to update
160 sql = 'select page_id, page_title, wrreport.id as report_id, date_report, `condition`, description, ' \
162 'if(author_userid is null, null, author_username) as author_username from wrreport ' \
163 'where {0}`condition` is not null and date_invalid > now() and delete_date is null ' \
164 'order by page_id, date_report desc, date_entry desc' \
165 .format('' if page_id is None else f'page_id={page_id} and ')
166 cursor = connection.execute(sql)
170 if row.page_id != page_id:
171 insert_row(connection, row_list)
172 page_id = row.page_id
175 insert_row(connection, row_list)
178 def update_wrmapcache(connection):
179 """Updates the wrmappointcache and wrmappathcache tables from the wiki. If convert errors occur,
180 an UpdateCacheError exception is raised. No other exception type should be raised under normal circumstances.
182 >>> from sqlalchemy.engine import create_engine
183 >>> engine = create_engine('mysql://philipp@localhost:3306/philipp_winterrodeln_wiki?charset=utf8mb4')
185 >>> # engine = create_engine('mysql://philipp@localhost:3306/philipp_winterrodeln_wiki?charset=utf8mb4&passwd=XXX')
186 >>> update_wrmapcache(engine.connect())
188 metadata = schema.MetaData()
189 page = mwdb.page_table(metadata)
190 categorylinks = mwdb.categorylinks_table(metadata)
191 slots = mwdb.slots_table(metadata)
192 content = mwdb.content_table(metadata)
193 text = mwdb.text_table(metadata)
196 with connection.begin():
200 [page, categorylinks, slots, content],
201 (page.c.page_latest == slots.c.slot_revision_id) & (slots.c.slot_content_id == content.c.content_id) &
202 (categorylinks.c.cl_from == page.c.page_id) & (categorylinks.c.cl_to == 'Rodelbahn'))
203 sledrun_pages = connection.execute(q)
205 # Delete all existing entries in wrmappointcache
206 # We rely on transactions MySQL InnoDB
207 connection.execute('delete from wrmappointcache')
208 connection.execute('delete from wrmappathcache')
210 # Refill wrmappointcache and wrmappathcache tables
211 for sledrun_page in sledrun_pages:
212 old_text = _get_mw_text(connection, text, sledrun_page.content_address)
213 wikicode = mwparserfromhell.parse(old_text)
214 wrmap_list = wikicode.filter_tags(recursive=False, matches=lambda tag: tag.tag == 'wrmap')
215 if len(wrmap_list) == 0:
216 continue # not wrmap in page
217 if len(wrmap_list) > 1:
218 raise UpdateCacheError(
219 f'{len(wrmap_list)} <wrmap ...> entries found in article "{sledrun_page.page_title}"')
220 wrmap = wrmap_list[0]
221 geojson = wrmwmarkup.parse_wrmap(str(wrmap))
223 for feature in geojson['features']:
224 properties = feature['properties']
225 coordinates = feature['geometry']['coordinates']
228 if properties['type'] in wrmwmarkup.WRMAP_POINT_TYPES:
229 lon, lat = coordinates
230 label = properties.get('name')
233 'haltestelle': 'busstop',
234 'parkplatz': 'carpark',
235 'achtung': 'warning',
240 point_type = point_types[properties['type']]
241 sql = 'insert into wrmappointcache (page_id, type, point, label) values (%s, %s, POINT(%s, %s), %s)'
242 connection.execute(sql, (sledrun_page.page_id, point_type, lon, lat, label))
245 elif properties['type'] in wrmwmarkup.WRMAP_LINE_TYPES:
247 'rodelbahn': 'sledrun',
249 'alternative': 'alternative',
251 'anfahrt': 'recommendedcarroute',
253 path_type = path_types[properties['type']]
254 path = ", ".join([f"{lon} {lat}" for lon, lat in coordinates])
255 path = f'LineString({path})'
256 if path_type == 'recommendedcarroute':
258 sql = 'insert into wrmappathcache (path, page_id, type) values (GeomFromText(%s), %s, %s)'
259 connection.execute(sql, (path, sledrun_page.page_id, path_type))
262 raise RuntimeError(f'Unknown feature type {properties["type"]}')
263 except RuntimeError as e:
264 error_msg = f"Error at sledrun '{sledrun_page.page_title}': {e}"
265 raise UpdateCacheError(error_msg, sledrun_page.page_title, e)
268 def update_wrregioncache(connection):
269 """Updates the wrregioncache table from the wiki.
270 It relays on the table wrsledruncache to be up-to-date.
271 No exceptions should be raised under normal circumstances.
273 >>> from sqlalchemy.engine import create_engine
274 >>> engine = create_engine('mysql://philipp@localhost:3306/philipp_winterrodeln_wiki?charset=utf8mb4')
276 >>> # engine = create_engine('mysql://philipp@localhost:3306/philipp_winterrodeln_wiki?charset=utf8mb4&passwd=XXX')
277 >>> update_wrregioncache(engine.connect())
279 metadata = schema.MetaData()
280 wrregion = wrmwdb.wrregion_table(metadata)
281 wrsledruncache = wrmwdb.wrsledruncache_table(metadata)
282 wrregioncache = wrmwdb.wrregioncache_table(metadata)
284 with connection.begin():
286 # Delete all existing entries in wrregioncache
287 # We rely on transactions MySQL InnoDB
288 connection.execute(wrregioncache.delete())
290 # Query all combinations of sledruns and regions
293 wrregion.c.id.label('region_id'),
294 sqlfunc.AsWKB(wrregion.c.border).label('border'),
295 wrsledruncache.c.page_id,
296 wrsledruncache.c.position_longitude,
297 wrsledruncache.c.position_latitude
301 sqlfunc.point(wrsledruncache.c.position_longitude, wrsledruncache.c.position_latitude)
304 ins = wrregioncache.insert()
306 # Refill wrregioncache
307 point = ogr.Geometry(ogr.wkbPoint)
308 result = connection.execute(sel)
310 point.SetPoint(0, row.position_longitude, row.position_latitude)
311 if point.Within(ogr.CreateGeometryFromWkb(row.border)):
312 connection.execute(ins.values(region_id=row.region_id, page_id=row.page_id))
315 def update_wrsledrunjsoncache(api_url: urllib.parse.ParseResult, connection: sqlalchemy.engine.Connection):
316 """Updates the wrsledrunjsoncache from JSON files in the wiki.
318 :param api_url: URL to the API, e.g. https://www.winterrodeln.org/mediawiki/api.php
319 :param connection: open sqlalchemy connection
321 with connection.begin():
322 # connection.execute('truncate table wrsledrunjsoncache') # needs additional permissions
323 connection.execute('delete from wrsledrunjsoncache')
327 request = api_url._replace(query='action=query&list=categorymembers&cmtitle=Kategorie:Rodelbahn'
328 f'&cmlimit=50&format=json&cmcontinue={cmcontinue}') # cmlimit=max (5000)
329 response = urllib.request.urlopen(request.geturl())
330 sledrun_list_response = json.load(response)
331 rows_to_insert = sledrun_list_response['query']['categorymembers']
332 for row in rows_to_insert:
333 row['sledrun_title'] = f'{row["title"]}/Rodelbahn.json'
334 row['map_title'] = f'{row["title"]}/Landkarte.json'
336 qs_titles = urllib.parse.urlencode({"titles": '|'.join(r['sledrun_title'] for r in rows_to_insert)})
337 request = api_url._replace(query='action=query&prop=revisions&rvprop=content|ids&format=json&rvslots=main&'
339 response = urllib.request.urlopen(request.geturl())
340 sledrun_json_list_response = json.load(response)
341 sledrun_json_dict = {s['title']: s for s in sledrun_json_list_response['query']['pages'].values()}
343 qs_titles = urllib.parse.urlencode({"titles": '|'.join(r['map_title'] for r in rows_to_insert)})
344 request = api_url._replace(query='action=query&prop=revisions&rvprop=content|ids&format=json&rvslots=main&'
346 response = urllib.request.urlopen(request.geturl())
347 map_json_list_response = json.load(response)
348 map_json_dict = {m['title']: m for m in map_json_list_response['query']['pages'].values()}
350 for row in rows_to_insert:
351 sledrun_json = sledrun_json_dict.get(row['sledrun_title'])
352 if 'missing' not in sledrun_json:
353 row['sledrun_pageid'] = sledrun_json['pageid']
354 rev = sledrun_json['revisions'][0]
355 row['sledrun_revid'] = rev['revid']
356 row['sledrun'] = rev['slots']['main']['*']
357 map_json = map_json_dict.get(row['map_title'])
358 if 'missing' not in map_json:
359 row['map_pageid'] = map_json['pageid']
360 rev = map_json['revisions'][0]
361 row['map_revid'] = rev['revid']
362 row['map'] = rev['slots']['main']['*']
364 sql = 'insert into wrsledrunjsoncache ' \
365 '(sledrun_page_id, ' \
366 'sledrun_json_page_id, sledrun_json_rev_id, sledrun_json, ' \
367 'map_json_page_id, map_json_rev_id, map_json) ' \
368 'values (:sledrun_page_id, ' \
369 ':sledrun_json_page_id, :sledrun_json_rev_id, :sledrun_json, ' \
370 ':map_json_page_id, :map_json_rev_id, :map_json)'
371 for row in rows_to_insert:
372 connection.execute(text(sql), [{
373 'sledrun_page_id': row['pageid'],
374 'sledrun_json_page_id': row['sledrun_pageid'],
375 'sledrun_json_rev_id': row['sledrun_revid'],
376 'sledrun_json': row.get('sledrun'),
377 'map_json_page_id': row.get('map_pageid'),
378 'map_json_rev_id': row.get('map_revid'),
379 'map_json': row.get('map')
382 if 'continue' not in sledrun_list_response:
384 cmcontinue = sledrun_list_response['continue']['cmcontinue']