X-Git-Url: https://git.toastfreeware.priv.at/philipp/winterrodeln/wrpylib.git/blobdiff_plain/5a216fd6da6d15205fc3cef9ff7faf4b4555307e..6d0763692a6b5d3b719197457c93c315e08e61cd:/wrpylib/wrmwmarkup.py diff --git a/wrpylib/wrmwmarkup.py b/wrpylib/wrmwmarkup.py index b6d8a72..6877f47 100644 --- a/wrpylib/wrmwmarkup.py +++ b/wrpylib/wrmwmarkup.py @@ -1,14 +1,23 @@ -#!/usr/bin/python2.6 +#!/usr/bin/python2.7 # -*- coding: iso-8859-15 -*- # $Id$ # $HeadURL$ """This module contains winterrodeln specific functions that are prcocessing the MediaWiki markup. """ import re +import xml.etree.ElementTree import formencode import wrpylib.wrvalidators import wrpylib.mwmarkup +WRMAP_POINT_TYPES = ['gasthaus', 'haltestelle', 'parkplatz', 'achtung', 'punkt'] +WRMAP_LINE_TYPES = ['rodelbahn', 'gehweg', 'alternative', 'lift', 'anfahrt', 'linie'] + + +class ParseError(RuntimeError): + """Exception used by some of the functions""" + pass + def _conv(fnct, value, fieldname): """Internal function. @@ -29,7 +38,7 @@ def rodelbahnbox_to_sledrun(wikitext, sledrun=None): # match Rodelbahnbox start, end = wrpylib.mwmarkup.find_template(wikitext, u'Rodelbahnbox') if start is None: raise formencode.Invalid(u"Rodelbahnbox nicht gefunden", wikitext, None) - template_name, properties = wrpylib.mwmarkup.split_template(wikitext[start:end]) + template_title, properties = wrpylib.mwmarkup.split_template(wikitext[start:end]) # process properties for key, value in properties.iteritems(): @@ -43,9 +52,9 @@ def rodelbahnbox_to_sledrun(wikitext, sledrun=None): elif key == u'Lawinen': sledrun.avalanches = _conv(wrpylib.wrvalidators.GermanAvalanches().to_python, value, key) # 'kaum' elif key == u'Betreiber': sledrun.operator = _conv(wrpylib.wrvalidators.UnicodeNone().to_python, value, key) # 'Max Mustermann' elif key == u'Öffentliche Anreise': sledrun.public_transport = _conv(wrpylib.wrvalidators.GermanPublicTransport().to_python, value, key) # 'Mittelmäßig' - elif key == u'Gehzeit': sledrun.walkup_time = _conv(wrpylib.wrvalidators.UnsignedNone().to_python, value, key) # 90 elif key == u'Aufstieg möglich': sledrun.walkup_possible = _conv(wrpylib.wrvalidators.GermanBoolNone().to_python, value, key) # 'Ja' elif key == u'Aufstieg getrennt': sledrun.walkup_separate, sledrun.walkup_separate_comment = _conv(wrpylib.wrvalidators.GermanTristateFloatComment().to_python, value, key) # 'Ja' + elif key == u'Gehzeit': sledrun.walkup_time = _conv(wrpylib.wrvalidators.UnsignedNone().to_python, value, key) # 90 elif key == u'Aufstiegshilfe': sledrun.lift, sledrun.lift_details = _conv(wrpylib.wrvalidators.GermanLift().to_python, value, key) # 'Gondel (unterer Teil)' elif key == u'Beleuchtungsanlage': sledrun.night_light, sledrun.night_light_comment = _conv(wrpylib.wrvalidators.GermanTristateFloatComment().to_python, value, key) elif key == u'Beleuchtungstage': sledrun.night_light_days, sledrun.night_light_days_comment = _conv(wrpylib.wrvalidators.UnsignedCommentNone(7).to_python, value, key) # '3 (Montag, Mittwoch, Freitag)' @@ -92,10 +101,10 @@ def sledrun_to_rodelbahnbox(sledrun, version): if version == '1.4': keys.append(u'Aufstieg möglich') values.append(wrpylib.wrvalidators.GermanBoolNone().from_python(sledrun.walkup_possible)) - keys.append(u'Gehzeit') - values.append(wrpylib.wrvalidators.UnsignedNone().from_python(sledrun.walkup_time)) keys.append(u'Aufstieg getrennt') values.append(wrpylib.wrvalidators.GermanTristateFloatComment().from_python((sledrun.walkup_separate, sledrun.walkup_separate_comment))) + keys.append(u'Gehzeit') + values.append(wrpylib.wrvalidators.UnsignedNone().from_python(sledrun.walkup_time)) keys.append(u'Aufstiegshilfe') values.append(wrpylib.wrvalidators.GermanLift().from_python((sledrun.lift, sledrun.lift_details))) keys.append(u'Beleuchtungsanlage') @@ -116,7 +125,7 @@ def sledrun_to_rodelbahnbox(sledrun, version): values.append(wrpylib.wrvalidators.GermanBoolNone().from_python(sledrun.show_in_overview)) keys.append(u'Forumid') values.append(wrpylib.wrvalidators.UnsignedNeinNone().from_python(sledrun.forum_id)) - return wrpylib.mwmarkup.create_template(u'Rodelbahnbox', [], keys, values, True) + return wrpylib.mwmarkup.create_template(u'Rodelbahnbox', [], keys, values, True, 20) def gasthausbox_to_inn(wikitext, inn=None): @@ -130,7 +139,7 @@ def gasthausbox_to_inn(wikitext, inn=None): # Match Gasthausbox start, end = wrpylib.mwmarkup.find_template(wikitext, u'Gasthausbox') if start is None: raise formencode.Invalid(u"No 'Gasthausbox' found", wikitext, None) - template_name, properties = wrpylib.mwmarkup.split_template(wikitext[start:end]) + template_title, properties = wrpylib.mwmarkup.split_template(wikitext[start:end]) # Process properties for key, value in properties.iteritems(): @@ -143,7 +152,7 @@ def gasthausbox_to_inn(wikitext, inn=None): elif key == u'Rodelverleih': inn.sled_rental, inn.sled_rental_comment = _conv(wrpylib.wrvalidators.BoolUnicodeTupleValidator().to_python, value, key) elif key == u'Handyempfang': inn.mobile_provider = _conv(wrpylib.wrvalidators.ValueCommentListNeinLoopNone().to_python, value, key) elif key == u'Homepage': inn.homepage = _conv(wrpylib.wrvalidators.UrlNeinNone().to_python, value, key) - elif key == u'E-Mail': inn.email_list = _conv(wrpylib.wrvalidators.EmailCommentListNeinLoopNone().to_python, value, key) + elif key == u'E-Mail': inn.email_list = _conv(wrpylib.wrvalidators.EmailCommentListNeinLoopNone(allow_masked_email=True).to_python, value, key) elif key == u'Telefon': inn.phone_list = _conv(wrpylib.wrvalidators.PhoneCommentListNeinLoopNone(comments_are_optional=True).to_python, value, key) elif key == u'Bild': inn.image = _conv(wrpylib.wrvalidators.UnicodeNone().to_python, value, key) elif key == u'Rodelbahnen': inn.sledding_list = _conv(wrpylib.wrvalidators.WikiPageListLoopNone().to_python, value, key) @@ -174,7 +183,7 @@ def inn_to_gasthausbox(inn): keys.append(u'Homepage') values.append(wrpylib.wrvalidators.UrlNeinNone().from_python(inn.homepage)) keys.append(u'E-Mail') - values.append(wrpylib.wrvalidators.EmailCommentListNeinLoopNone().from_python(inn.email_list)) + values.append(wrpylib.wrvalidators.EmailCommentListNeinLoopNone(allow_masked_email=True).from_python(inn.email_list)) keys.append(u'Telefon') values.append(wrpylib.wrvalidators.PhoneCommentListNeinLoopNone(comments_are_optional=True).from_python(inn.phone_list)) keys.append(u'Bild') @@ -185,3 +194,330 @@ def inn_to_gasthausbox(inn): return wrpylib.mwmarkup.create_template(u'Gasthausbox', [], keys, values, True) +def find_template_latlon_ele(wikitext, template_title): + """Finds the first occurance of the '{{template_title|47.076207 N 11.453553 E|1890}}' template + and returns the tuple (start, end, lat, lon, ele) or (None, None, None, None, None) if the + template was not found. If the template has no valid format, an exception is thrown.""" + start, end = wrpylib.mwmarkup.find_template(wikitext, template_title) + if start is None: return (None,) * 5 + title, params = wrpylib.mwmarkup.split_template(wikitext[start:end]) + lat, lon = wrpylib.wrvalidators.GeoNone().to_python(params[u'1'].strip()) + ele = wrpylib.wrvalidators.UnsignedNone().to_python(params[u'2'].strip()) + return start, end, lat, lon, ele + + +def create_template_latlon_ele(template_title, lat, lon, ele): + geo = wrpylib.wrvalidators.GeoNone().from_python((lat, lon)) + if len(geo) == 0: geo = u' ' + ele = wrpylib.wrvalidators.UnsignedNone().from_python(ele) + if len(ele) == 0: ele = u' ' + return wrpylib.mwmarkup.create_template(template_title, [geo, ele]) + + +def find_template_PositionOben(wikitext): + """Same as find_template_latlon_ele with template '{{Position oben|47.076207 N 11.453553 E|1890}}'""" + return find_template_latlon_ele(wikitext, u'Position oben') + + +def create_template_PositionOben(lat, lon, ele): + return create_template_latlon_ele(u'Position, oben', lat, lon, ele) + + +def find_template_PositionUnten(wikitext): + """Same as find_template_latlon_ele with template '{{Position unten|47.076207 N 11.453553 E|1890}}'""" + return find_template_latlon_ele(wikitext, u'Position unten') + + +def find_template_unsigned(wikitext, template_title): + """Finds the first occurance of the '{{template_title|1890}}' template + and returns the tuple (start, end, unsigned_value) or (None, None, None) if the + template was not found. If the template has no valid format, an exception is thrown.""" + start, end = wrpylib.mwmarkup.find_template(wikitext, template_title) + if start is None: return (None,) * 3 + title, params = wrpylib.mwmarkup.split_template(wikitext[start:end]) + unsigned_value = wrpylib.wrvalidators.UnsignedNone().to_python(params[u'1'].strip()) + return start, end, unsigned_value + + +def create_template_unsigned(template_title, unsigned): + unsigned = wrpylib.wrvalidators.UnsignedNone().from_python(unsigned) + if len(unsigned) == 0: unsigned = u' ' + return wrpylib.mwmarkup.create_template(template_title, [unsigned]) + + +def find_template_Hoehenunterschied(wikitext): + """Same as find_template_unsigned with template '{{Höhenunterschied|350}}'""" + return find_template_unsigned(wikitext, u'Höhenunterschied') + + +def create_template_Hoehenunterschied(ele_diff): + return create_template_unsigned(u'Höhenunterschied', ele_diff) + + +def find_template_Bahnlaenge(wikitext): + """Same as find_template_unsigned with template '{{Bahnlänge|4500}}'""" + return find_template_unsigned(wikitext, u'Bahnlänge') + + +def create_template_Bahnlaenge(length): + return create_template_unsigned(u'Bahnlänge', length) + + +def find_template_Gehzeit(wikitext): + """Same as find_template_unsigned with template '{{Gehzeit|60}}'""" + return find_template_unsigned(wikitext, u'Gehzeit') + + +def create_template_Gehzeit(walkup_time): + return create_template_unsigned(u'Gehzeit', walkup_time) + + +def find_template_Forumlink(wikitext): + """Same as find_template_unsigned with template '{{Forumlink|26}}'""" + start, end = wrpylib.mwmarkup.find_template(wikitext, u'Forumlink') + if start is None: return (None,) * 3 + title, params = wrpylib.mwmarkup.split_template(wikitext[start:end]) + forumid = params[u'1'].strip() + if forumid == u'': unsigned_value = None + else: unsigned_value = wrpylib.wrvalidators.UnsignedNone().to_python(forumid) + return start, end, unsigned_value + # return find_template_unsigned(wikitext, u'Forumlink') + + +def find_template_Parkplatz(wikitext): + """Same as find_template_latlon_ele with template '{{Parkplatz|47.076207 N 11.453553 E|1890}}'""" + return find_template_latlon_ele(wikitext, u'Parkplatz') + + +def find_template_Haltestelle(wikitext): + """Finds the first occurance of the '{{Haltestelle|Ortsname|Haltestellenname|47.076207 N 11.453553 E|1890}}' template + and returns the tuple (start, end, city, stop, lat, lon, ele) or (None, None, None, None, None, None, None) if the + template was not found. If the template has no valid format, an exception is thrown.""" + start, end = wrpylib.mwmarkup.find_template(wikitext, u'Haltestelle') + if start is None: return (None,) * 7 + title, params = wrpylib.mwmarkup.split_template(wikitext[start:end]) + city = wrpylib.wrvalidators.UnicodeNone().to_python(params[u'1'].strip()) + stop = wrpylib.wrvalidators.UnicodeNone().to_python(params[u'2'].strip()) + lat, lon = wrpylib.wrvalidators.GeoNone().to_python(params[u'3'].strip()) + ele = wrpylib.wrvalidators.UnsignedNone().to_python(params[u'4'].strip()) + return start, end, city, stop, lat, lon, ele + + +def find_all_templates(wikitext, find_func): + """Returns a list of return values of find_func that searches for a template. + Example: + >>> find_all_tempaltes(wikitext, find_template_Haltestelle) + Returns an empty list if the template was not found at all. + """ + results = [] + result = find_func(wikitext) + start, end = result[:2] + while start is not None: + results.append(result) + result = find_func(wikitext[end:]) + if result[0] is None: + start = None + else: + start = result[0] + end + end += result[1] + result = (start, end) + result[2:] + return results + + +def googlemap_to_wrmap(center, zoom, coords, paths): + """Converts the output of parse_googlemap to the GeoJSON format wrmap uses. + :returns: (GeoJSON as nested Python datatypes) + """ + json_features = [] + + # point + for point in coords: + lon, lat, symbol, title = point + properties = {} + if symbol is not None: properties['type'] = symbol.lower() + if title is not None: properties['name'] = title + json_features.append({ + 'type': 'Feature', + 'geometry': {'type': 'Point', 'coordinates': [lon, lat]}, + 'properties': properties}) + + # path + for path in paths: + style, entries = path + style = style.lower() + PATH_TYPES = {u'6#ff014e9a': u'rodelbahn', u'6#ffe98401': u'gehweg', u'6#ff7f7fff': u'alternative', u'3#ff000000': u'lift', u'3#ffe1e100': u'anfahrt'} + if PATH_TYPES.has_key(style): + properties = {'type': PATH_TYPES[style]} + else: + properties = {'type': 'line'} + properties['dicke'] = style[0] + properties['farbe'] = style[4:] + json_features.append({ + 'type': 'Feature', + 'geometry': { + 'type': 'LineString', + 'coordinates': [[lon, lat] for lon, lat, symbol, title in entries]}, + 'properties': properties}) + + geojson = { + 'type': 'FeatureCollection', + 'features': json_features, + 'properties': {'lon': center[0], 'lat': center[1], 'zoom': zoom}} + return geojson + + +def parse_wrmap_coordinates(coords): + '''gets a string coordinates and returns an array of lon/lat coordinate pairs, e.g. + 47.12 N 11.87 E + 47.13 N 11.70 E + -> + [[11.87, 47.12], [11.70, 47.13]]''' + result = [] + pos = 0 + for match in re.finditer(r'\s*(\d+\.?\d*)\s*N?\s+(\d+\.?\d*)\s*E?\s*', coords): + if match.start() != pos: + break + result.append([float(match.groups()[1]), float(match.groups()[0])]) + pos = match.end() + else: + if pos == len(coords): + return result + raise RuntimeError('Wrong coordinate format: {}'.format(coords)) + + +def parse_wrmap(wikitext): + """Parses the (unicode) u'content' of the Winterrodeln wrmap extension + out of a page. If wikitext does not contain the wrmap extension text None is returned. + If the wrmap contains invalid formatted lines, a ParseError is raised. + + :param wikitext: wikitext containing the template. Example: + + wikitext = u''' + + 47.240689 11.190454 + 47.245789 11.238971 + 47.245711 11.238283 + + 47.238587 11.203360 + 47.244951 11.230868 + 47.245470 11.237853 + + + ''' + :returns: GeoJSON as nested Python datatype + """ + # parse XML + try: + wrmap_xml = xml.etree.ElementTree.fromstring(wikitext.encode('utf-8')) + except xml.etree.ElementTree.ParseError as e: + row, column = e.position + raise ParseError("XML parse error on row {}, column {}: {}".format(row, column, e)) + if wrmap_xml.tag not in ['wrmap', 'wrgmap']: + raise ParseError('No valid tag name') + + # convert XML to geojson (http://www.geojson.org/geojson-spec.html) + json_features = [] + for feature in wrmap_xml: + # determine feature type + is_point = feature.tag in WRMAP_POINT_TYPES + is_line = feature.tag in WRMAP_LINE_TYPES + if (not is_point and not is_line): + raise ParseError('Unknown element <{}>.'.format(feature.tag)) + + # point + if is_point: + properties = {'type': feature.tag} + allowed_properties = set(['name', 'wiki']) + wrong_properties = set(feature.attrib.keys()) - allowed_properties + if len(wrong_properties) > 0: + raise ParseError("The attribute '{}' is not allowed at <{}>.".format(list(wrong_properties)[0], feature.tag)) + properties.update(feature.attrib) + coordinates = parse_wrmap_coordinates(feature.text) + if len(coordinates) != 1: + raise ParseError('The element <{}> has to have exactly one coordinate pair.'.format(feature.tag)) + json_features.append({ + 'type': 'Feature', + 'geometry': {'type': 'Point', 'coordinates': coordinates[0]}, + 'properties': properties}) + + # line + if is_line: + properties = {'type': feature.tag} + allowed_properties = set(['farbe', 'dicke']) + wrong_properties = set(feature.attrib.keys()) - allowed_properties + if len(wrong_properties) > 0: + raise ParseError("The attribute '{}' is not allowed at <{}>.".format(list(wrong_properties)[0], feature.tag)) + if feature.attrib.has_key('farbe'): + if not re.match('#[0-9a-fA-F]{6}$', feature.attrib['farbe']): + raise ParseError('The attribute "farbe" has to have a format like "#a0bb43".') + properties['strokeColor'] = feature.attrib['farbe'] # e.g. #a200b7 + if feature.attrib.has_key('dicke'): + try: + properties['strokeWidth'] = int(feature.attrib['dicke']) # e.g. 6 + except ValueError: + raise ParseError('The attribute "dicke" has to be an integer.') + json_features.append({ + 'type': 'Feature', + 'geometry': {'type': 'LineString', 'coordinates': parse_wrmap_coordinates(feature.text)}, + 'properties': properties}) + + # attributes + properties = {} + for k, v in wrmap_xml.attrib.iteritems(): + if k in ['lat', 'lon']: + try: + properties[k] = float(v) + except ValueError: + raise ParseError('Attribute "{}" has to be a float value.'.format(k)) + elif k in ['zoom', 'width', 'height']: + try: + properties[k] = int(v) + except ValueError: + raise ParseError('Attribute "{}" has to be an integer value.'.format(k)) + else: + raise ParseError('Unknown attribute "{}".'.format(k)) + + geojson = { + 'type': 'FeatureCollection', + 'features': json_features, + 'properties': properties} + + return geojson + + +def create_wrmap_coordinates(coords): + result = [] + for coord in coords: + result.append('{:.6f} N {:.6f} E'.format(coord[1], coord[0])) + return '\n'.join(result) + + +def create_wrmap(geojson): + """Creates a wikitext from geojson (as python types).""" + wrmap_xml = xml.etree.ElementTree.Element('wrmap') + wrmap_xml.text = '\n\n' + for k, v in geojson['properties'].iteritems(): + wrmap_xml.attrib[k] = str(v) + + assert geojson['type'] == 'FeatureCollection' + json_features = geojson['features'] + last_json_feature = None + for json_feature in json_features: + feature_xml = xml.etree.ElementTree.SubElement(wrmap_xml, json_feature['properties']['type']) + geo = json_feature['geometry'] + if geo['type'] == 'Point': + feature_xml.text = create_wrmap_coordinates([geo['coordinates']]) + if last_json_feature is not None: + last_json_feature.tail = '\n' + else: + if last_json_feature is not None: + last_json_feature.tail = '\n\n' + feature_xml.text = '\n' + create_wrmap_coordinates(geo['coordinates']) + '\n' + last_json_feature = feature_xml + feature_xml.attrib = json_feature['properties'] + del feature_xml.attrib['type'] + + if last_json_feature is not None: + last_json_feature.tail = '\n\n' + return xml.etree.ElementTree.tostring(wrmap_xml, encoding='utf-8').decode('utf-8') +