X-Git-Url: https://git.toastfreeware.priv.at/philipp/winterrodeln/wrpylib.git/blobdiff_plain/25d444a2a5f7b6bb4a94ec21f6c96d4f127a88f7..60bb81b192ff10deba17f324e4c7be1fdbfb7fe6:/wrpylib/wrmwcache.py?ds=sidebyside diff --git a/wrpylib/wrmwcache.py b/wrpylib/wrmwcache.py index 7966c56..95e0e96 100644 --- a/wrpylib/wrmwcache.py +++ b/wrpylib/wrmwcache.py @@ -1,12 +1,12 @@ -#!/usr/bin/python2.6 -# -*- coding: iso-8859-15 -*- +#!/usr/bin/python3.4 # $Id$ # $HeadURL$ """Contains functions that maintain/update the cache tables.""" from sqlalchemy import schema from sqlalchemy.sql import select -import formencode -from wrpylib import mwdb, wrmwdb, mwmarkup, wrmwmarkup +from sqlalchemy.sql.expression import func as sqlfunc +from osgeo import ogr +from wrpylib import mwdb, wrmwdb, wrmwmarkup, wrvalidators class UpdateCacheError(RuntimeError): @@ -18,7 +18,7 @@ def update_wrsledruncache(connection): is raised. No other exception type should be raised under normal circumstances. >>> from sqlalchemy.engine import create_engine - >>> engine = create_engine('mysql://philipp@localhost:3306/philipp_winterrodeln_wiki?charset=utf8&use_unicode=0') + >>> engine = create_engine('mysql://philipp@localhost:3306/philipp_winterrodeln_wiki?charset=utf8&use_unicode=1') >>> update_wrsledruncache(engine.connect()) """ metadata = schema.MetaData() @@ -27,11 +27,13 @@ def update_wrsledruncache(connection): categorylinks = mwdb.categorylinks_table(metadata) revision = mwdb.revision_table(metadata) text = mwdb.text_table(metadata) + class Sledrun: + pass transaction = connection.begin() # Query all sled runs - q = select([page, categorylinks, revision, text], (page.c.page_latest==revision.c.rev_id) & (text.c.old_id==revision.c.rev_text_id) & (categorylinks.c.cl_from==page.c.page_id) & (categorylinks.c.cl_to==u'Rodelbahn')) + q = select([page, categorylinks, revision, text], (page.c.page_latest==revision.c.rev_id) & (text.c.old_id==revision.c.rev_text_id) & (categorylinks.c.cl_from==page.c.page_id) & (categorylinks.c.cl_to=='Rodelbahn')) sledrun_pages = connection.execute(q) # Original SQL: # sql = u"select page_id, rev_id, old_id, page_title, old_text, 'In_Arbeit' in (select cl_to from categorylinks where cl_from=page_id) as under_construction from page, revision, text, categorylinks where page_latest=rev_id and old_id=rev_text_id and cl_from=page_id and cl_to='Rodelbahn' order by page_title" @@ -42,15 +44,17 @@ def update_wrsledruncache(connection): # Refill wrsledruncache table for sledrun_page in sledrun_pages: - try: - start, end, sledrun = wrmwmarkup.rodelbahnbox_to_sledrun(sledrun_page.old_text) + try: + rodelbahnbox = wrvalidators.rodelbahnbox_from_str(sledrun_page.old_text) + sledrun = wrmwmarkup.sledrun_from_rodelbahnbox(rodelbahnbox, Sledrun()) sledrun.page_id = sledrun_page.page_id sledrun.page_title = sledrun_page.page_title - sledrun.under_construction = connection.execute(select([categorylinks], (categorylinks.c.cl_from==sledrun_page.page_id) & (categorylinks.c.cl_to == u'In_Arbeit')).alias('x').count()).fetchone()[0] > 0 # It would be better to do this in the query above + sledrun.name_url = wrvalidators.sledrun_page_title_to_pretty_url(sledrun_page.page_title) + sledrun.under_construction = connection.execute(select([sqlfunc.count()], (categorylinks.c.cl_from==sledrun_page.page_id) & (categorylinks.c.cl_to == 'In_Arbeit')).alias('x')).fetchone()[0] > 0 connection.execute(wrsledruncache.insert(sledrun.__dict__)) - except (RuntimeError, formencode.Invalid) as e: + except ValueError as e: transaction.rollback() - error_msg = u"Error at sled run '{0}': {1}".format(sledrun_page.page_title, unicode(e)) + error_msg = "Error at sled run '{0}': {1}".format(sledrun_page.page_title, str(e)) raise UpdateCacheError(error_msg, sledrun_page.page_title, e) transaction.commit() @@ -60,7 +64,7 @@ def update_wrinncache(connection): is raised. No other exception type should be raised under normal circumstances. >>> from sqlalchemy.engine import create_engine - >>> engine = create_engine('mysql://philipp@localhost:3306/philipp_winterrodeln_wiki?charset=utf8&use_unicode=0') + >>> engine = create_engine('mysql://philipp@localhost:3306/philipp_winterrodeln_wiki?charset=utf8&use_unicode=1') >>> update_wrinncache(engine.connect()) """ metadata = schema.MetaData() @@ -69,11 +73,13 @@ def update_wrinncache(connection): categorylinks = mwdb.categorylinks_table(metadata) revision = mwdb.revision_table(metadata) text = mwdb.text_table(metadata) + class Inn: + pass transaction = connection.begin() # Query all inns - q = select([page, categorylinks, revision, text], (page.c.page_latest==revision.c.rev_id) & (text.c.old_id==revision.c.rev_text_id) & (categorylinks.c.cl_from==page.c.page_id) & (categorylinks.c.cl_to==u'Gasthaus')) + q = select([page, categorylinks, revision, text], (page.c.page_latest==revision.c.rev_id) & (text.c.old_id==revision.c.rev_text_id) & (categorylinks.c.cl_from==page.c.page_id) & (categorylinks.c.cl_to=='Gasthaus')) inn_pages = connection.execute(q) # Delete all existing entries in wrinncache @@ -82,15 +88,16 @@ def update_wrinncache(connection): # Refill wrinncache table for inn_page in inn_pages: - try: - start, end, inn = wrmwmarkup.gasthausbox_to_inn(inn_page.old_text) + try: + gasthausbox = wrvalidators.gasthausbox_from_str(inn_page.old_text) + inn = wrmwmarkup.inn_from_gasthausbox(gasthausbox, Inn()) inn.page_id = inn_page.page_id inn.page_title = inn_page.page_title - inn.under_construction = connection.execute(select([categorylinks], (categorylinks.c.cl_from==inn_page.page_id) & (categorylinks.c.cl_to == u'In_Arbeit')).alias('x').count()).fetchone()[0] > 0 # It would be better to do this in the query above + inn.under_construction = connection.execute(select([sqlfunc.count()], (categorylinks.c.cl_from==inn_page.page_id) & (categorylinks.c.cl_to == 'In_Arbeit')).alias('x')).fetchone()[0] > 0 # It would be better to do this in the query above connection.execute(wrinncache.insert(inn.__dict__)) - except (RuntimeError, formencode.Invalid) as e: + except ValueError as e: transaction.rollback() - error_msg = u"Error as inn '{0}': {1}".format(inn_page.page_title, unicode(e)) + error_msg = "Error as inn '{0}': {1}".format(inn_page.page_title, str(e)) raise UpdateCacheError(error_msg, inn_page.page_title, e) transaction.commit() @@ -106,7 +113,6 @@ def update_wrreportcache(connection, page_id=None): >>> update_wrreportcache(engine.connect()) """ metadata = schema.MetaData() - wrreport = wrmwdb.wrreport_table(metadata) wrreportcache = wrmwdb.wrreportcache_table(metadata) transaction = connection.begin() @@ -141,10 +147,9 @@ def update_wrmapcache(connection): is raised. No other exception type should be raised under normal circumstances. >>> from sqlalchemy.engine import create_engine - >>> engine = create_engine('mysql://philipp@localhost:3306/philipp_winterrodeln_wiki?charset=utf8&use_unicode=0') - >>> # or: engine = create_engine('mysql://philipp@localhost:3306/philipp_winterrodeln_wiki?charset=utf8&use_unicode=0&passwd=XXXXX') - >>> connection = engine.connect() - >>> update_wrmapcache(connection) + >>> engine = create_engine('mysql://philipp@localhost:3306/philipp_winterrodeln_wiki?charset=utf8&use_unicode=1') + >>> # or: engine = create_engine('mysql://philipp@localhost:3306/philipp_winterrodeln_wiki?charset=utf8&use_unicode=1&passwd=XXXXX') + >>> update_wrmapcache(engine.connect()) """ metadata = schema.MetaData() page = mwdb.page_table(metadata) @@ -155,7 +160,7 @@ def update_wrmapcache(connection): transaction = connection.begin() # Query all sledruns - q = select([page, categorylinks, revision, text], (page.c.page_latest==revision.c.rev_id) & (text.c.old_id==revision.c.rev_text_id) & (categorylinks.c.cl_from==page.c.page_id) & (categorylinks.c.cl_to==u'Rodelbahn')) + q = select([page, categorylinks, revision, text], (page.c.page_latest==revision.c.rev_id) & (text.c.old_id==revision.c.rev_text_id) & (categorylinks.c.cl_from==page.c.page_id) & (categorylinks.c.cl_to=='Rodelbahn')) sledrun_pages = connection.execute(q) # Original SQL: # sql = u"select page_id, rev_id, old_id, page_title, old_text, 'In_Arbeit' in (select cl_to from categorylinks where cl_from=page_id) as under_construction from page, revision, text, categorylinks where page_latest=rev_id and old_id=rev_text_id and cl_from=page_id and cl_to='Rodelbahn' order by page_title" @@ -167,31 +172,81 @@ def update_wrmapcache(connection): # Refill wrmappointcache and wrmappathcache tables for sledrun_page in sledrun_pages: - try: - result = mwmarkup.parse_googlemap(sledrun_page.old_text) - if not result is None: - center, zoom, coords, paths = result + try: + import mwparserfromhell + wikicode = mwparserfromhell.parse(sledrun_page.old_text) + wrmap_list = wikicode.filter_tags(recursive=False, matches=lambda tag: tag.tag == 'wrmap') + if len(wrmap_list) == 0: + continue # not wrmap in page + if len(wrmap_list) > 1: + raise UpdateCacheError('{} entries found in article "{}"'.format(len(wrmap_list), sledrun_page.page_title)) + wrmap = wrmap_list[0] + geojson = wrmwmarkup.parse_wrmap(str(wrmap)) + + for feature in geojson['features']: + properties = feature['properties'] + coordinates = feature['geometry']['coordinates'] + # Points - for coord in coords: - lon, lat, point_type, label = coord - point_types = {u'Gasthaus': u'hut', u'Haltestelle': u'busstop', u'Parkplatz': u'carpark', u'Achtung': u'warning'} - if not point_type is None: - if not point_types.has_key(point_type): raise RuntimeError(u'Unknown point type {0}'.format(point_type)) - point_type = point_types[point_type] - sql = u'insert into wrmappointcache (page_id, type, point, label) values (%s, %s, POINT(%s, %s), %s)' - connection.execute(sql, (sledrun_page.page_id, point_type, lon, lat, label)) + if properties['type'] in wrmwmarkup.WRMAP_POINT_TYPES: + lon, lat = coordinates + label = properties.get('name') + point_types = {'gasthaus': 'hut', 'haltestelle': 'busstop', 'parkplatz': 'carpark', 'achtung': 'warning', 'foto': 'photo', 'verleih': 'rental', 'punkt': 'point'} + point_type = point_types[properties['type']] + sql = 'insert into wrmappointcache (page_id, type, point, label) values (%s, %s, POINT(%s, %s), %s)' + connection.execute(sql, (sledrun_page.page_id, point_type, lon, lat, label)) + # Paths - for path_type, coords in paths: - path_type = path_type.lower() - path_types = {u'6#ff014e9a': u'sledrun', u'6#ffe98401': u'walkup', u'6#ff7f7fff': u'alternative', u'3#ff000000': u'lift'} - if not path_types.has_key(path_type): raise RuntimeError(u'Unknown path type {0}'.format(path_type)) - path_type = path_types[path_type] - path = u", ".join(["{0} {1}".format(lon, lat) for lon, lat, symbol, title in coords]) - path = u'LineString({0})'.format(path) - sql = u'insert into wrmappathcache (path, page_id, type) values (GeomFromText(%s), %s, %s)' + elif properties['type'] in wrmwmarkup.WRMAP_LINE_TYPES: + path_types = {'rodelbahn': 'sledrun', 'gehweg': 'walkup', 'alternative': 'alternative', 'lift': 'lift', 'anfahrt': 'recommendedcarroute', 'linie': 'line'} + path_type = path_types[properties['type']] + path = ", ".join(["{0} {1}".format(lon, lat) for lon, lat in coordinates]) + path = 'LineString({0})'.format(path) + if path_type == 'recommendedcarroute': continue + sql = 'insert into wrmappathcache (path, page_id, type) values (GeomFromText(%s), %s, %s)' connection.execute(sql, (path, sledrun_page.page_id, path_type)) + + else: + raise RuntimeError('Unknown feature type {0}'.format(properties['type'])) except RuntimeError as e: - error_msg = u"Error at sledrun '{0}': {1}".format(sledrun_page.page_title, unicode(e)) + error_msg = "Error at sledrun '{0}': {1}".format(sledrun_page.page_title, str(e)) transaction.rollback() raise UpdateCacheError(error_msg, sledrun_page.page_title, e) transaction.commit() + + +def update_wrregioncache(connection): + """Updates the wrregioncache table from the wiki. + It relays on the table wrsledruncache to be up-to-date. + No exceptions should be raised under normal circumstances. + + >>> from sqlalchemy.engine import create_engine + >>> engine = create_engine('mysql://philipp@localhost:3306/philipp_winterrodeln_wiki?charset=utf8&use_unicode=1') + >>> # or: engine = create_engine('mysql://philipp@localhost:3306/philipp_winterrodeln_wiki?charset=utf8&use_unicode=1&passwd=XXXXX') + >>> update_wrregioncache(engine.connect()) + """ + metadata = schema.MetaData() + wrregion = wrmwdb.wrregion_table(metadata) + wrsledruncache = wrmwdb.wrsledruncache_table(metadata) + wrregioncache = wrmwdb.wrregioncache_table(metadata) + + transaction = connection.begin() + + # Delete all existing entries in wrregioncache + # We rely on transactions MySQL InnoDB + connection.execute(wrregioncache.delete()) + + # Query all combinations of sledruns and regions + sel = select([wrregion.c.id.label('region_id'), sqlfunc.AsWKB(wrregion.c.border).label('border'), wrsledruncache.c.page_id, wrsledruncache.c.position_longitude, wrsledruncache.c.position_latitude], sqlfunc.contains(wrregion.c.border, sqlfunc.point(wrsledruncache.c.position_longitude, wrsledruncache.c.position_latitude))) + ins = wrregioncache.insert() + + # Refill wrregioncache + point = ogr.Geometry(ogr.wkbPoint) + result = connection.execute(sel) + for row in result: + point.SetPoint(0, row.position_longitude, row.position_latitude) + if point.Within(ogr.CreateGeometryFromWkb(row.border)): + connection.execute(ins.values(region_id=row.region_id, page_id=row.page_id)) + + # commit + transaction.commit()