-#!/usr/bin/python2.6
+#!/usr/bin/python2.7
# -*- coding: iso-8859-15 -*-
# $Id$
# $HeadURL$
* py-wikimarkup https://github.com/dcramer/py-wikimarkup
* mwlib http://code.pediapress.com/wiki/wiki
"""
+import re
+import xml.etree.ElementTree
+def find_template(wikitext, template_title):
+ """Returns the tuple (start, end) of the first occurence of the template '{{template ...}} within wikitext'.
+ (None, None) is returned if the template is not found.
+ If you are sure that the wikitext contains the template, the template could be extracted like follows:
+
+ >>> wikitext = u'This is a {{Color|red|red text}} template.'
+ >>> start, end = find_template(wikitext, u'Color')
+ >>> print wikitext[start:end]
+ {{Color|red|red text}}
+
+ or just:
+
+ >>> print wikitext.__getslice__(*find_template(wikitext, u'Color'))
+ {{Color|red|red text}}
+
+ The search is done with regular expression. It gives wrong results when parsing a template
+ containing the characters "}}"
+
+ :param wikitext: The text (preferalbe unicode) that has the template in it.
+ :param template_title: The page title of the template with or without namespace (but as in the wikitext).
+ :return:
+ (start, end) of the first occurence with start >= 0 and end > start.
+ (None, None) if the template is not found.
+ """
+ match = re.search(u"\{\{" + template_title + "\s*(\|[^\}]*)?\}\}", wikitext, re.DOTALL)
+ if match is None: return None, None
+ return match.start(), match.end()
+
+
+def split_template(template):
+ """Takes a template, like u'{{Color|red|text=Any text}}' and translates it to a Python tuple
+ (template_title, parameters) where parameters is a Python dictionary {u'1': u'red', u'text'=u'Any text'}.
+ Anonymous parameters get integer keys (converted to unicode) starting with 1
+ like in MediaWiki, named parameters are unicode strings.
+ Whitespace is stripped.
+ If an unexpected format is encountered, a ValueError is raised."""
+ if not template.startswith(u'{{'): raise ValueError(u'Template does not start with "{{"')
+ if not template.endswith(u'}}'): raise ValueError(u'Template does not end with "}}"')
+ parts = template[2:-2].split(u'|')
+
+ # template name
+ template_title = parts[0].strip()
+ if len(template_title) == 0: raise ValueError(u'Empty template tilte.')
+ del parts[0]
+
+ # anonymous parameters
+ params = {} # result dictionary
+ param_num = 1
+ while len(parts) > 0:
+ equalsign_pos = parts[0].find(u'=')
+ if equalsign_pos >= 0: break # named parameter
+ params[unicode(param_num)] = parts[0].strip()
+ del parts[0]
+ param_num += 1
+
+ # named or numbered parameters
+ while len(parts) > 0:
+ equalsign_pos = parts[0].find(u'=')
+ if equalsign_pos < 0: raise ValueError(u'Anonymous parameter after named parameter.')
+ key, sep, value = parts[0].partition(u'=')
+ key = key.strip()
+ if len(key) == 0: raise ValueError(u'Empty key.')
+ if params.has_key(key): raise ValueError(u'Duplicate key: "{0}"'.format(key))
+ params[key] = value.strip()
+ del parts[0]
+
+ return template_title, params
+
+
+def create_template(template_title, anonym_params=[], named_param_keys=[], named_param_values=[], as_table=False, as_table_keylen=None):
+ """Formats a MediaWiki template.
+ :param template_title: Unicode string with the template name
+ :param anonym_params: list with parameters without keys
+ :param named_param_keys: list with keys of named parameters
+ :param named_param_values: list with values of named parameters, corresponding to named_param_keys.
+ :param as_table: formats the returned template in one row for each parameter
+ :param as_table_keylen: length of the key field. None for "automatic".
+ :return: unicode template"""
+ pipe_char, equal_char, end_char = (u'\n| ', u' = ', u'\n}}') if as_table else (u'|', u'=', u'}}')
+ parts = [u"{{" + template_title]
+ parts += anonym_params
+ if as_table and as_table_keylen is None:
+ as_table_keylen = max([len(k) for k in named_param_keys])
+ for i in xrange(len(named_param_keys)):
+ key = named_param_keys[i]
+ if as_table:
+ key = key.ljust(as_table_keylen)
+ parts.append((key + equal_char + named_param_values[i]).rstrip())
+ else:
+ parts.append(key + equal_char + named_param_values[i])
+ return pipe_char.join(parts) + end_char
+
+
+def parse_googlemap(wikitext, detail=False):
+ """Parses the (unicode) u'<googlemap ...>content</googlemap>' of the googlemap extension
+ out of a page. If wikitext does not contain the googlemap extension text None is returned.
+ If the googlemap contains invalid formatted lines, a RuntimeError is raised.
+
+ :param wikitext: wikitext containing the template. Example:
+ :param detail: bool. If True, start and end position of <googlemap>...</googlemap> is
+ returned additionally.
+
+ wikitext = '''
+ <googlemap version="0.9" lat="47.113291" lon="11.272337" zoom="15">
+ (Parkplatz)47.114958,11.266026
+ Parkplatz
+
+ (Gasthaus) 47.114715, 11.266262, Alt Bärnbad (Gasthaus)
+ 6#FF014E9A
+ 47.114715,11.266262
+ 47.114135,11.268381
+ 47.113421,11.269322
+ 47.11277,11.269979
+ 47.112408,11.271119
+ </googlemap>
+ '''
+ :returns: the tuple (center, zoom, coords, paths).
+ center is the tuple (lon, lat) of the google maps or (None, None) if not provided
+ zoom is the google zoom level as integer or None if not provided
+ coords is a list of (lon, lat, symbol, title) tuples.
+ paths is a list of (style, coords) tuples.
+ coords is again a list of (lot, lat, symbol, title) tuples.
+ If detail is True, (center, zoom, coords, paths, start, end) is returned."""
+
+ def is_coord(line):
+ """Returns True if the line contains a coordinate."""
+ match = re.search('[0-9]{1,2}\.[0-9]+, ?[0-9]{1,2}\.[0-9]+', line)
+ return not match is None
+
+ def is_path(line):
+ """Returns True if the line contains a path style definition."""
+ match = re.match('[0-9]#[0-9a-fA-F]{8}', line)
+ return not match is None
+
+ def parse_coord(line):
+ """Returns (lon, lat, symbol, title). If symbol or text is not present, None is returned."""
+ match = re.match(u'\(([^)]+)\) ?([0-9]{1,2}\.[0-9]+), ?([0-9]{1,2}\.[0-9]+), ?(.*)', line)
+ if not match is None: return (float(match.group(3)), float(match.group(2)), match.group(1), match.group(4))
+ match = re.match(u'\(([^)]+)\) ?([0-9]{1,2}\.[0-9]+), ?([0-9]{1,2}\.[0-9]+)', line)
+ if not match is None: return (float(match.group(3)), float(match.group(2)), match.group(1), None)
+ match = re.match(u'([0-9]{1,2}\.[0-9]+), ?([0-9]{1,2}\.[0-9]+), ?(.*)', line)
+ if not match is None: return (float(match.group(2)), float(match.group(1)), None, match.group(3))
+ match = re.match(u'([0-9]{1,2}\.[0-9]+), ?([0-9]{1,2}\.[0-9]+)', line)
+ if not match is None: return (float(match.group(2)), float(match.group(1)), None, None)
+ return RuntimeError(u'Could not parse line ' + line)
+
+ regexp = re.compile(u"(<googlemap[^>]*>)(.*?)(</googlemap>)", re.DOTALL)
+ match = regexp.search(wikitext)
+ if match is None: return None
+ start = match.start()
+ end = match.end()
+ content = match.group(2)
+ gm = xml.etree.ElementTree.XML((match.group(1)+match.group(3)).encode('UTF8'))
+ zoom = gm.get('zoom')
+ lon = gm.get('lon')
+ lat = gm.get('lat')
+ if not zoom is None: zoom = int(zoom)
+ if not lon is None: lon = float(lon)
+ if not lat is None: lat = float(lat)
+ center = (lon, lat)
+
+ coords = []
+ paths = []
+ lines = content.split("\n")
+ i = 0
+ while i < len(lines):
+ line = lines[i].strip()
+ i += 1
+
+ # Skip whitespace
+ if len(line) == 0: continue
+
+ # Handle a path
+ if is_path(line):
+ match = re.match(u'([0-9]#[0-9a-fA-F]{8})', line)
+ style = match.group(1)
+ local_coords = []
+ while i < len(lines):
+ line = lines[i].strip()
+ i += 1
+ if is_path(line):
+ i -= 1
+ break
+ if is_coord(line):
+ lon, lat, symbol, title = parse_coord(line)
+ local_coords.append((lon, lat, symbol, title))
+ paths.append((style, local_coords))
+ continue
+
+ # Handle a coordinate
+ if is_coord(line):
+ lon, lat, symbol, title = parse_coord(line)
+ while i < len(lines):
+ line = lines[i].strip()
+ i += 1
+ if is_path(line) or is_coord(line):
+ i -= 1
+ break
+ if len(line) > 0 and title is None: title = line
+ coords.append((lon, lat, symbol, title))
+ continue
+
+ raise RuntimeError(u'Unknown line syntax: ' + line)
+ if detail:
+ return (center, zoom, coords, paths, start, end)
+ return (center, zoom, coords, paths)
+