"""Contains functions that maintain/update the cache tables."""
-from sqlalchemy import schema
+import mwparserfromhell
+from sqlalchemy import schema, Table
+from sqlalchemy.engine import Connection
from sqlalchemy.sql import select
from sqlalchemy.sql.expression import func as sqlfunc
from osgeo import ogr
pass
+def _get_mw_text(connection: Connection, text: Table, content_address: str) -> str:
+ parts = content_address.split(':') # e.g. 'tt:15664'
+ if len(parts) != 2 or parts[0] != 'tt':
+ raise ValueError('Content has unexpected format')
+ old_id = int(parts[1])
+ query = select([text], text.c.old_id == old_id)
+ text_row = connection.execute(query).fetchone()
+ return text_row.old_text
+
+
def update_wrsledruncache(connection):
"""Updates the wrsledruncache table from the wiki. If convert errors occur, an UpdateCacheError exception
is raised. No other exception type should be raised under normal circumstances.
wrsledruncache = wrmwdb.wrsledruncache_table(metadata)
page = mwdb.page_table(metadata)
categorylinks = mwdb.categorylinks_table(metadata)
- revision = mwdb.revision_table(metadata)
+ slots = mwdb.slots_table(metadata)
+ content = mwdb.content_table(metadata)
text = mwdb.text_table(metadata)
class Sledrun:
pass
- transaction = connection.begin()
-
- # Query all sled runs
- q = select(
- [page, categorylinks, revision, text],
- (page.c.page_latest == revision.c.rev_id) & (text.c.old_id == revision.c.rev_text_id) &
- (categorylinks.c.cl_from == page.c.page_id) & (categorylinks.c.cl_to == 'Rodelbahn'))
- sledrun_pages = connection.execute(q)
- # Original SQL:
- # sql = u"select page_id, rev_id, old_id, page_title, old_text, 'In_Arbeit' in
- # (select cl_to from categorylinks where cl_from=page_id) as under_construction
- # from page, revision, text, categorylinks where page_latest=rev_id and old_id=rev_text_id and
- # cl_from=page_id and cl_to='Rodelbahn' order by page_title"
-
- # Delete all existing entries in wrsledruncache
- # We rely on transactions MySQL InnoDB
- connection.execute(wrsledruncache.delete())
-
- # Refill wrsledruncache table
- for sledrun_page in sledrun_pages:
- try:
- rodelbahnbox = wrvalidators.rodelbahnbox_from_str(sledrun_page.old_text)
- sledrun = wrmwmarkup.sledrun_from_rodelbahnbox(rodelbahnbox, Sledrun())
- sledrun.page_id = sledrun_page.page_id
- sledrun.page_title = sledrun_page.page_title
- sledrun.name_url = wrvalidators.sledrun_page_title_to_pretty_url(sledrun_page.page_title)
- sledrun.under_construction = connection.execute(select(
- [sqlfunc.count()],
- (categorylinks.c.cl_from == sledrun_page.page_id) &
- (categorylinks.c.cl_to == 'In_Arbeit')).alias('x')).fetchone()[0] > 0
- connection.execute(wrsledruncache.insert(sledrun.__dict__))
- except ValueError as e:
- transaction.rollback()
- error_msg = f"Error at sled run '{sledrun_page.page_title}': {e}"
- raise UpdateCacheError(error_msg, sledrun_page.page_title, e)
- transaction.commit()
+ try:
+ with connection.begin():
+
+ # Query all sled runs
+ q = select(
+ [page, categorylinks, slots, content],
+ (page.c.page_latest == slots.c.slot_revision_id) & (slots.c.slot_content_id == content.c.content_id) &
+ (categorylinks.c.cl_from == page.c.page_id) & (categorylinks.c.cl_to == 'Rodelbahn'))
+ sledrun_pages = connection.execute(q)
+
+ # Delete all existing entries in wrsledruncache
+ # We rely on transactions MySQL InnoDB
+ connection.execute(wrsledruncache.delete())
+
+ # Refill wrsledruncache table
+ for sledrun_page in sledrun_pages:
+ old_text = _get_mw_text(connection, text, sledrun_page.content_address)
+ rodelbahnbox = wrvalidators.rodelbahnbox_from_str(old_text)
+ sledrun = wrmwmarkup.sledrun_from_rodelbahnbox(rodelbahnbox, Sledrun())
+ sledrun.page_id = sledrun_page.page_id
+ sledrun.page_title = sledrun_page.page_title
+ sledrun.name_url = wrvalidators.sledrun_page_title_to_pretty_url(sledrun_page.page_title)
+ sledrun.under_construction = connection.execute(select(
+ [sqlfunc.count()],
+ (categorylinks.c.cl_from == sledrun_page.page_id) &
+ (categorylinks.c.cl_to == 'In_Arbeit')).alias('x')).fetchone()[0] > 0
+ connection.execute(wrsledruncache.insert(sledrun.__dict__))
+
+ except ValueError as e:
+ error_msg = f"Error at sled run '{sledrun_page.page_title}': {e}"
+ raise UpdateCacheError(error_msg, sledrun_page.page_title, e)
def update_wrinncache(connection):
wrinncache = wrmwdb.wrinncache_table(metadata)
page = mwdb.page_table(metadata)
categorylinks = mwdb.categorylinks_table(metadata)
- revision = mwdb.revision_table(metadata)
+ slots = mwdb.slots_table(metadata)
+ content = mwdb.content_table(metadata)
text = mwdb.text_table(metadata)
class Inn:
pass
- transaction = connection.begin()
-
- # Query all inns
- q = select(
- [page, categorylinks, revision, text],
- (page.c.page_latest == revision.c.rev_id) & (text.c.old_id == revision.c.rev_text_id) &
- (categorylinks.c.cl_from == page.c.page_id) & (categorylinks.c.cl_to == 'Gasthaus'))
- inn_pages = connection.execute(q)
-
- # Delete all existing entries in wrinncache
- # We rely on transactions MySQL InnoDB
- connection.execute(wrinncache.delete())
-
- # Refill wrinncache table
- for inn_page in inn_pages:
- try:
- gasthausbox = wrvalidators.gasthausbox_from_str(inn_page.old_text)
- inn = wrmwmarkup.inn_from_gasthausbox(gasthausbox, Inn())
- inn.page_id = inn_page.page_id
- inn.page_title = inn_page.page_title
- inn.under_construction = connection.execute(select(
- [sqlfunc.count()],
- (categorylinks.c.cl_from == inn_page.page_id) &
- (categorylinks.c.cl_to == 'In_Arbeit')).alias('x')) \
- .fetchone()[0] > 0 # it would be better to do this in the query above
- connection.execute(wrinncache.insert(inn.__dict__))
- except ValueError as e:
- transaction.rollback()
- error_msg = f"Error as inn '{inn_page.page_title}': {e}"
- raise UpdateCacheError(error_msg, inn_page.page_title, e)
- transaction.commit()
+ try:
+ with connection.begin():
+
+ # Query all inns
+ q = select(
+ [page, categorylinks, slots, content],
+ (page.c.page_latest == slots.c.slot_revision_id) & (slots.c.slot_content_id == content.c.content_id) &
+ (categorylinks.c.cl_from == page.c.page_id) & (categorylinks.c.cl_to == 'Gasthaus'))
+ inn_pages = connection.execute(q)
+
+ # Delete all existing entries in wrinncache
+ # We rely on transactions MySQL InnoDB
+ connection.execute(wrinncache.delete())
+
+ # Refill wrinncache table
+ for inn_page in inn_pages:
+ old_text = _get_mw_text(connection, text, inn_page.content_address)
+ gasthausbox = wrvalidators.gasthausbox_from_str(old_text)
+ inn = wrmwmarkup.inn_from_gasthausbox(gasthausbox, Inn())
+ inn.page_id = inn_page.page_id
+ inn.page_title = inn_page.page_title
+ inn.under_construction = connection.execute(select(
+ [sqlfunc.count()],
+ (categorylinks.c.cl_from == inn_page.page_id) &
+ (categorylinks.c.cl_to == 'In_Arbeit')).alias('x')) \
+ .fetchone()[0] > 0 # it would be better to do this in the query above
+ connection.execute(wrinncache.insert(inn.__dict__))
+
+ except ValueError as e:
+ error_msg = f"Error as inn '{inn_page.page_title}': {e}"
+ raise UpdateCacheError(error_msg, inn_page.page_title, e)
def update_wrreportcache(connection, page_id=None):
"""
metadata = schema.MetaData()
wrreportcache = wrmwdb.wrreportcache_table(metadata)
- transaction = connection.begin()
-
- # Delete the datasets we are going to update
- sql_del = wrreportcache.delete()
- if page_id is not None:
- sql_del = sql_del.where(wrreportcache.c.page_id == page_id)
- connection.execute(sql_del)
-
- def insert_row(connection_, row_list_):
- if len(row_list_) == 0:
- return
- # Insert the report
- row_ = dict(row_list_[0])
- connection_.execute(wrreportcache.insert(values=row_))
-
- # Select the rows to update
- sql = 'select page_id, page_title, wrreport.id as report_id, date_report, `condition`, description, author_name, ' \
- 'if(author_userid is null, null, author_username) as author_username from wrreport ' \
- 'where {0}`condition` is not null and date_invalid > now() and delete_date is null ' \
- 'order by page_id, date_report desc, date_entry desc' \
- .format('' if page_id is None else f'page_id={page_id} and ')
- cursor = connection.execute(sql)
- page_id = None
- row_list = []
- for row in cursor:
- if row.page_id != page_id:
- insert_row(connection, row_list)
- page_id = row.page_id
- row_list = []
- row_list.append(row)
- insert_row(connection, row_list)
- transaction.commit()
+ with connection.begin():
+ # Delete the datasets we are going to update
+ sql_del = wrreportcache.delete()
+ if page_id is not None:
+ sql_del = sql_del.where(wrreportcache.c.page_id == page_id)
+ connection.execute(sql_del)
+
+ def insert_row(connection_, row_list_):
+ if len(row_list_) == 0:
+ return
+ # Insert the report
+ row_ = dict(row_list_[0])
+ connection_.execute(wrreportcache.insert(values=row_))
+
+ # Select the rows to update
+ sql = 'select page_id, page_title, wrreport.id as report_id, date_report, `condition`, description, ' \
+ 'author_name, ' \
+ 'if(author_userid is null, null, author_username) as author_username from wrreport ' \
+ 'where {0}`condition` is not null and date_invalid > now() and delete_date is null ' \
+ 'order by page_id, date_report desc, date_entry desc' \
+ .format('' if page_id is None else f'page_id={page_id} and ')
+ cursor = connection.execute(sql)
+ page_id = None
+ row_list = []
+ for row in cursor:
+ if row.page_id != page_id:
+ insert_row(connection, row_list)
+ page_id = row.page_id
+ row_list = []
+ row_list.append(row)
+ insert_row(connection, row_list)
def update_wrmapcache(connection):
metadata = schema.MetaData()
page = mwdb.page_table(metadata)
categorylinks = mwdb.categorylinks_table(metadata)
- revision = mwdb.revision_table(metadata)
+ slots = mwdb.slots_table(metadata)
+ content = mwdb.content_table(metadata)
text = mwdb.text_table(metadata)
- transaction = connection.begin()
-
- # Query all sledruns
- q = select(
- [page, categorylinks, revision, text],
- (page.c.page_latest == revision.c.rev_id) & (text.c.old_id == revision.c.rev_text_id) &
- (categorylinks.c.cl_from == page.c.page_id) & (categorylinks.c.cl_to == 'Rodelbahn'))
- sledrun_pages = connection.execute(q)
- # Original SQL:
- # sql = u"select page_id, rev_id, old_id, page_title, old_text, 'In_Arbeit' in
- # (select cl_to from categorylinks where cl_from=page_id) as under_construction
- # from page, revision, text, categorylinks where page_latest=rev_id and old_id=rev_text_id and cl_from=page_id
- # and cl_to='Rodelbahn' order by page_title"
-
- # Delete all existing entries in wrmappointcache
- # We rely on transactions MySQL InnoDB
- connection.execute('delete from wrmappointcache')
- connection.execute('delete from wrmappathcache')
-
- # Refill wrmappointcache and wrmappathcache tables
- for sledrun_page in sledrun_pages:
- try:
- import mwparserfromhell
- wikicode = mwparserfromhell.parse(sledrun_page.old_text)
- wrmap_list = wikicode.filter_tags(recursive=False, matches=lambda tag: tag.tag == 'wrmap')
- if len(wrmap_list) == 0:
- continue # not wrmap in page
- if len(wrmap_list) > 1:
- raise UpdateCacheError(
- f'{len(wrmap_list)} <wrmap ...> entries found in article "{sledrun_page.page_title}"')
- wrmap = wrmap_list[0]
- geojson = wrmwmarkup.parse_wrmap(str(wrmap))
-
- for feature in geojson['features']:
- properties = feature['properties']
- coordinates = feature['geometry']['coordinates']
-
- # Points
- if properties['type'] in wrmwmarkup.WRMAP_POINT_TYPES:
- lon, lat = coordinates
- label = properties.get('name')
- point_types = {
- 'gasthaus': 'hut',
- 'haltestelle': 'busstop',
- 'parkplatz': 'carpark',
- 'achtung': 'warning',
- 'foto': 'photo',
- 'verleih': 'rental',
- 'punkt': 'point'
- }
- point_type = point_types[properties['type']]
- sql = 'insert into wrmappointcache (page_id, type, point, label) values (%s, %s, POINT(%s, %s), %s)'
- connection.execute(sql, (sledrun_page.page_id, point_type, lon, lat, label))
-
- # Paths
- elif properties['type'] in wrmwmarkup.WRMAP_LINE_TYPES:
- path_types = {
- 'rodelbahn': 'sledrun',
- 'gehweg': 'walkup',
- 'alternative': 'alternative',
- 'lift': 'lift',
- 'anfahrt': 'recommendedcarroute',
- 'linie': 'line'}
- path_type = path_types[properties['type']]
- path = ", ".join([f"{lon} {lat}" for lon, lat in coordinates])
- path = f'LineString({path})'
- if path_type == 'recommendedcarroute':
- continue
- sql = 'insert into wrmappathcache (path, page_id, type) values (GeomFromText(%s), %s, %s)'
- connection.execute(sql, (path, sledrun_page.page_id, path_type))
-
- else:
- raise RuntimeError(f'Unknown feature type {properties["type"]}')
- except RuntimeError as e:
- error_msg = f"Error at sledrun '{sledrun_page.page_title}': {e}"
- transaction.rollback()
- raise UpdateCacheError(error_msg, sledrun_page.page_title, e)
- transaction.commit()
+ try:
+ with connection.begin():
+
+ # Query all sledruns
+ q = select(
+ [page, categorylinks, slots, content],
+ (page.c.page_latest == slots.c.slot_revision_id) & (slots.c.slot_content_id == content.c.content_id) &
+ (categorylinks.c.cl_from == page.c.page_id) & (categorylinks.c.cl_to == 'Rodelbahn'))
+ sledrun_pages = connection.execute(q)
+
+ # Delete all existing entries in wrmappointcache
+ # We rely on transactions MySQL InnoDB
+ connection.execute('delete from wrmappointcache')
+ connection.execute('delete from wrmappathcache')
+
+ # Refill wrmappointcache and wrmappathcache tables
+ for sledrun_page in sledrun_pages:
+ old_text = _get_mw_text(connection, text, sledrun_page.content_address)
+ wikicode = mwparserfromhell.parse(old_text)
+ wrmap_list = wikicode.filter_tags(recursive=False, matches=lambda tag: tag.tag == 'wrmap')
+ if len(wrmap_list) == 0:
+ continue # not wrmap in page
+ if len(wrmap_list) > 1:
+ raise UpdateCacheError(
+ f'{len(wrmap_list)} <wrmap ...> entries found in article "{sledrun_page.page_title}"')
+ wrmap = wrmap_list[0]
+ geojson = wrmwmarkup.parse_wrmap(str(wrmap))
+
+ for feature in geojson['features']:
+ properties = feature['properties']
+ coordinates = feature['geometry']['coordinates']
+
+ # Points
+ if properties['type'] in wrmwmarkup.WRMAP_POINT_TYPES:
+ lon, lat = coordinates
+ label = properties.get('name')
+ point_types = {
+ 'gasthaus': 'hut',
+ 'haltestelle': 'busstop',
+ 'parkplatz': 'carpark',
+ 'achtung': 'warning',
+ 'foto': 'photo',
+ 'verleih': 'rental',
+ 'punkt': 'point'
+ }
+ point_type = point_types[properties['type']]
+ sql = 'insert into wrmappointcache (page_id, type, point, label) values (%s, %s, POINT(%s, %s), %s)'
+ connection.execute(sql, (sledrun_page.page_id, point_type, lon, lat, label))
+
+ # Paths
+ elif properties['type'] in wrmwmarkup.WRMAP_LINE_TYPES:
+ path_types = {
+ 'rodelbahn': 'sledrun',
+ 'gehweg': 'walkup',
+ 'alternative': 'alternative',
+ 'lift': 'lift',
+ 'anfahrt': 'recommendedcarroute',
+ 'linie': 'line'}
+ path_type = path_types[properties['type']]
+ path = ", ".join([f"{lon} {lat}" for lon, lat in coordinates])
+ path = f'LineString({path})'
+ if path_type == 'recommendedcarroute':
+ continue
+ sql = 'insert into wrmappathcache (path, page_id, type) values (GeomFromText(%s), %s, %s)'
+ connection.execute(sql, (path, sledrun_page.page_id, path_type))
+
+ else:
+ raise RuntimeError(f'Unknown feature type {properties["type"]}')
+ except RuntimeError as e:
+ error_msg = f"Error at sledrun '{sledrun_page.page_title}': {e}"
+ raise UpdateCacheError(error_msg, sledrun_page.page_title, e)
def update_wrregioncache(connection):
wrsledruncache = wrmwdb.wrsledruncache_table(metadata)
wrregioncache = wrmwdb.wrregioncache_table(metadata)
- transaction = connection.begin()
-
- # Delete all existing entries in wrregioncache
- # We rely on transactions MySQL InnoDB
- connection.execute(wrregioncache.delete())
-
- # Query all combinations of sledruns and regions
- sel = select(
- [
- wrregion.c.id.label('region_id'),
- sqlfunc.AsWKB(wrregion.c.border).label('border'),
- wrsledruncache.c.page_id,
- wrsledruncache.c.position_longitude,
- wrsledruncache.c.position_latitude
- ],
- sqlfunc.contains(
- wrregion.c.border,
- sqlfunc.point(wrsledruncache.c.position_longitude, wrsledruncache.c.position_latitude)
+ with connection.begin():
+
+ # Delete all existing entries in wrregioncache
+ # We rely on transactions MySQL InnoDB
+ connection.execute(wrregioncache.delete())
+
+ # Query all combinations of sledruns and regions
+ sel = select(
+ [
+ wrregion.c.id.label('region_id'),
+ sqlfunc.AsWKB(wrregion.c.border).label('border'),
+ wrsledruncache.c.page_id,
+ wrsledruncache.c.position_longitude,
+ wrsledruncache.c.position_latitude
+ ],
+ sqlfunc.contains(
+ wrregion.c.border,
+ sqlfunc.point(wrsledruncache.c.position_longitude, wrsledruncache.c.position_latitude)
+ )
)
- )
- ins = wrregioncache.insert()
-
- # Refill wrregioncache
- point = ogr.Geometry(ogr.wkbPoint)
- result = connection.execute(sel)
- for row in result:
- point.SetPoint(0, row.position_longitude, row.position_latitude)
- if point.Within(ogr.CreateGeometryFromWkb(row.border)):
- connection.execute(ins.values(region_id=row.region_id, page_id=row.page_id))
-
- # commit
- transaction.commit()
+ ins = wrregioncache.insert()
+
+ # Refill wrregioncache
+ point = ogr.Geometry(ogr.wkbPoint)
+ result = connection.execute(sel)
+ for row in result:
+ point.SetPoint(0, row.position_longitude, row.position_latitude)
+ if point.Within(ogr.CreateGeometryFromWkb(row.border)):
+ connection.execute(ins.values(region_id=row.region_id, page_id=row.page_id))