X-Git-Url: https://git.toastfreeware.priv.at/philipp/winterrodeln/wrpylib.git/blobdiff_plain/02d36fe6e46df7d65bde90ec99d032546feb8893..8e51ab5879cb7901759939246df67d17dd26923b:/wrpylib/wrmwcache.py diff --git a/wrpylib/wrmwcache.py b/wrpylib/wrmwcache.py index 4b3df61..95e0e96 100644 --- a/wrpylib/wrmwcache.py +++ b/wrpylib/wrmwcache.py @@ -1,15 +1,12 @@ -#!/usr/bin/python2.7 -# -*- coding: iso-8859-15 -*- +#!/usr/bin/python3.4 # $Id$ # $HeadURL$ """Contains functions that maintain/update the cache tables.""" from sqlalchemy import schema from sqlalchemy.sql import select from sqlalchemy.sql.expression import func as sqlfunc -import geoalchemy.functions as geofunc from osgeo import ogr -import formencode -from wrpylib import mwdb, wrmwdb, mwmarkup, wrmwmarkup +from wrpylib import mwdb, wrmwdb, wrmwmarkup, wrvalidators class UpdateCacheError(RuntimeError): @@ -30,11 +27,13 @@ def update_wrsledruncache(connection): categorylinks = mwdb.categorylinks_table(metadata) revision = mwdb.revision_table(metadata) text = mwdb.text_table(metadata) + class Sledrun: + pass transaction = connection.begin() # Query all sled runs - q = select([page, categorylinks, revision, text], (page.c.page_latest==revision.c.rev_id) & (text.c.old_id==revision.c.rev_text_id) & (categorylinks.c.cl_from==page.c.page_id) & (categorylinks.c.cl_to==u'Rodelbahn')) + q = select([page, categorylinks, revision, text], (page.c.page_latest==revision.c.rev_id) & (text.c.old_id==revision.c.rev_text_id) & (categorylinks.c.cl_from==page.c.page_id) & (categorylinks.c.cl_to=='Rodelbahn')) sledrun_pages = connection.execute(q) # Original SQL: # sql = u"select page_id, rev_id, old_id, page_title, old_text, 'In_Arbeit' in (select cl_to from categorylinks where cl_from=page_id) as under_construction from page, revision, text, categorylinks where page_latest=rev_id and old_id=rev_text_id and cl_from=page_id and cl_to='Rodelbahn' order by page_title" @@ -45,15 +44,17 @@ def update_wrsledruncache(connection): # Refill wrsledruncache table for sledrun_page in sledrun_pages: - try: - start, end, sledrun = wrmwmarkup.rodelbahnbox_to_sledrun(sledrun_page.old_text) + try: + rodelbahnbox = wrvalidators.rodelbahnbox_from_str(sledrun_page.old_text) + sledrun = wrmwmarkup.sledrun_from_rodelbahnbox(rodelbahnbox, Sledrun()) sledrun.page_id = sledrun_page.page_id sledrun.page_title = sledrun_page.page_title - sledrun.under_construction = connection.execute(select([categorylinks], (categorylinks.c.cl_from==sledrun_page.page_id) & (categorylinks.c.cl_to == u'In_Arbeit')).alias('x').count()).fetchone()[0] > 0 # It would be better to do this in the query above + sledrun.name_url = wrvalidators.sledrun_page_title_to_pretty_url(sledrun_page.page_title) + sledrun.under_construction = connection.execute(select([sqlfunc.count()], (categorylinks.c.cl_from==sledrun_page.page_id) & (categorylinks.c.cl_to == 'In_Arbeit')).alias('x')).fetchone()[0] > 0 connection.execute(wrsledruncache.insert(sledrun.__dict__)) - except (RuntimeError, formencode.Invalid) as e: + except ValueError as e: transaction.rollback() - error_msg = u"Error at sled run '{0}': {1}".format(sledrun_page.page_title, unicode(e)) + error_msg = "Error at sled run '{0}': {1}".format(sledrun_page.page_title, str(e)) raise UpdateCacheError(error_msg, sledrun_page.page_title, e) transaction.commit() @@ -72,11 +73,13 @@ def update_wrinncache(connection): categorylinks = mwdb.categorylinks_table(metadata) revision = mwdb.revision_table(metadata) text = mwdb.text_table(metadata) + class Inn: + pass transaction = connection.begin() # Query all inns - q = select([page, categorylinks, revision, text], (page.c.page_latest==revision.c.rev_id) & (text.c.old_id==revision.c.rev_text_id) & (categorylinks.c.cl_from==page.c.page_id) & (categorylinks.c.cl_to==u'Gasthaus')) + q = select([page, categorylinks, revision, text], (page.c.page_latest==revision.c.rev_id) & (text.c.old_id==revision.c.rev_text_id) & (categorylinks.c.cl_from==page.c.page_id) & (categorylinks.c.cl_to=='Gasthaus')) inn_pages = connection.execute(q) # Delete all existing entries in wrinncache @@ -85,15 +88,16 @@ def update_wrinncache(connection): # Refill wrinncache table for inn_page in inn_pages: - try: - start, end, inn = wrmwmarkup.gasthausbox_to_inn(inn_page.old_text) + try: + gasthausbox = wrvalidators.gasthausbox_from_str(inn_page.old_text) + inn = wrmwmarkup.inn_from_gasthausbox(gasthausbox, Inn()) inn.page_id = inn_page.page_id inn.page_title = inn_page.page_title - inn.under_construction = connection.execute(select([categorylinks], (categorylinks.c.cl_from==inn_page.page_id) & (categorylinks.c.cl_to == u'In_Arbeit')).alias('x').count()).fetchone()[0] > 0 # It would be better to do this in the query above + inn.under_construction = connection.execute(select([sqlfunc.count()], (categorylinks.c.cl_from==inn_page.page_id) & (categorylinks.c.cl_to == 'In_Arbeit')).alias('x')).fetchone()[0] > 0 # It would be better to do this in the query above connection.execute(wrinncache.insert(inn.__dict__)) - except (RuntimeError, formencode.Invalid) as e: + except ValueError as e: transaction.rollback() - error_msg = u"Error as inn '{0}': {1}".format(inn_page.page_title, unicode(e)) + error_msg = "Error as inn '{0}': {1}".format(inn_page.page_title, str(e)) raise UpdateCacheError(error_msg, inn_page.page_title, e) transaction.commit() @@ -109,7 +113,6 @@ def update_wrreportcache(connection, page_id=None): >>> update_wrreportcache(engine.connect()) """ metadata = schema.MetaData() - wrreport = wrmwdb.wrreport_table(metadata) wrreportcache = wrmwdb.wrreportcache_table(metadata) transaction = connection.begin() @@ -157,7 +160,7 @@ def update_wrmapcache(connection): transaction = connection.begin() # Query all sledruns - q = select([page, categorylinks, revision, text], (page.c.page_latest==revision.c.rev_id) & (text.c.old_id==revision.c.rev_text_id) & (categorylinks.c.cl_from==page.c.page_id) & (categorylinks.c.cl_to==u'Rodelbahn')) + q = select([page, categorylinks, revision, text], (page.c.page_latest==revision.c.rev_id) & (text.c.old_id==revision.c.rev_text_id) & (categorylinks.c.cl_from==page.c.page_id) & (categorylinks.c.cl_to=='Rodelbahn')) sledrun_pages = connection.execute(q) # Original SQL: # sql = u"select page_id, rev_id, old_id, page_title, old_text, 'In_Arbeit' in (select cl_to from categorylinks where cl_from=page_id) as under_construction from page, revision, text, categorylinks where page_latest=rev_id and old_id=rev_text_id and cl_from=page_id and cl_to='Rodelbahn' order by page_title" @@ -169,11 +172,16 @@ def update_wrmapcache(connection): # Refill wrmappointcache and wrmappathcache tables for sledrun_page in sledrun_pages: - try: - start, content, endtag, end = mwmarkup.find_tag(sledrun_page.old_text, 'wrmap') - if content is None: - continue - geojson = wrmwmarkup.parse_wrmap(sledrun_page.old_text[start:end]) + try: + import mwparserfromhell + wikicode = mwparserfromhell.parse(sledrun_page.old_text) + wrmap_list = wikicode.filter_tags(recursive=False, matches=lambda tag: tag.tag == 'wrmap') + if len(wrmap_list) == 0: + continue # not wrmap in page + if len(wrmap_list) > 1: + raise UpdateCacheError('{} entries found in article "{}"'.format(len(wrmap_list), sledrun_page.page_title)) + wrmap = wrmap_list[0] + geojson = wrmwmarkup.parse_wrmap(str(wrmap)) for feature in geojson['features']: properties = feature['properties'] @@ -183,25 +191,25 @@ def update_wrmapcache(connection): if properties['type'] in wrmwmarkup.WRMAP_POINT_TYPES: lon, lat = coordinates label = properties.get('name') - point_types = {u'gasthaus': u'hut', u'haltestelle': u'busstop', u'parkplatz': u'carpark', u'achtung': u'warning', u'punkt': u'point'} + point_types = {'gasthaus': 'hut', 'haltestelle': 'busstop', 'parkplatz': 'carpark', 'achtung': 'warning', 'foto': 'photo', 'verleih': 'rental', 'punkt': 'point'} point_type = point_types[properties['type']] - sql = u'insert into wrmappointcache (page_id, type, point, label) values (%s, %s, POINT(%s, %s), %s)' + sql = 'insert into wrmappointcache (page_id, type, point, label) values (%s, %s, POINT(%s, %s), %s)' connection.execute(sql, (sledrun_page.page_id, point_type, lon, lat, label)) # Paths elif properties['type'] in wrmwmarkup.WRMAP_LINE_TYPES: - path_types = {u'rodelbahn': u'sledrun', u'gehweg': u'walkup', u'alternative': u'alternative', u'lift': u'lift', u'anfahrt': u'recommendedcarroute', u'linie': u'line'} + path_types = {'rodelbahn': 'sledrun', 'gehweg': 'walkup', 'alternative': 'alternative', 'lift': 'lift', 'anfahrt': 'recommendedcarroute', 'linie': 'line'} path_type = path_types[properties['type']] - path = u", ".join(["{0} {1}".format(lon, lat) for lon, lat in coordinates]) - path = u'LineString({0})'.format(path) - if path_type == u'recommendedcarroute': continue - sql = u'insert into wrmappathcache (path, page_id, type) values (GeomFromText(%s), %s, %s)' + path = ", ".join(["{0} {1}".format(lon, lat) for lon, lat in coordinates]) + path = 'LineString({0})'.format(path) + if path_type == 'recommendedcarroute': continue + sql = 'insert into wrmappathcache (path, page_id, type) values (GeomFromText(%s), %s, %s)' connection.execute(sql, (path, sledrun_page.page_id, path_type)) else: - raise RuntimeError(u'Unknown feature type {0}'.format(properties['type'])) + raise RuntimeError('Unknown feature type {0}'.format(properties['type'])) except RuntimeError as e: - error_msg = u"Error at sledrun '{0}': {1}".format(sledrun_page.page_title, unicode(e)) + error_msg = "Error at sledrun '{0}': {1}".format(sledrun_page.page_title, str(e)) transaction.rollback() raise UpdateCacheError(error_msg, sledrun_page.page_title, e) transaction.commit() @@ -229,7 +237,7 @@ def update_wrregioncache(connection): connection.execute(wrregioncache.delete()) # Query all combinations of sledruns and regions - sel = select([wrregion.c.id.label('wrregion_id'), wrregion.c.border.label('border'), wrsledruncache.c.page_id, wrsledruncache.c.position_longitude, wrsledruncache.c.position_latitude], geofunc.gcontains(wrregion.c.border, sqlfunc.point(wrsledruncache.c.position_longitude, wrsledruncache.c.position_latitude))) + sel = select([wrregion.c.id.label('region_id'), sqlfunc.AsWKB(wrregion.c.border).label('border'), wrsledruncache.c.page_id, wrsledruncache.c.position_longitude, wrsledruncache.c.position_latitude], sqlfunc.contains(wrregion.c.border, sqlfunc.point(wrsledruncache.c.position_longitude, wrsledruncache.c.position_latitude))) ins = wrregioncache.insert() # Refill wrregioncache @@ -237,9 +245,8 @@ def update_wrregioncache(connection): result = connection.execute(sel) for row in result: point.SetPoint(0, row.position_longitude, row.position_latitude) - if point.Within(ogr.CreateGeometryFromWkb(row.border.geom_wkb)): - connection.execute(ins.values(wrregion_id=row.wrregion_id, page_id=row.page_id)) + if point.Within(ogr.CreateGeometryFromWkb(row.border)): + connection.execute(ins.values(region_id=row.region_id, page_id=row.page_id)) # commit transaction.commit() -