import xml.etree.ElementTree
+class ParseError(RuntimeError):
+ """Exception used by some of the functions"""
+ pass
+
+
def find_template(wikitext, template_title):
"""Returns the tuple (start, end) of the first occurence of the template '{{template ...}} within wikitext'.
(None, None) is returned if the template is not found.
def find_tag(wikitext, tagname, pos=0):
- """Returns the tuple (start, end) of the first occurence of the tag '<tag ...>...</tag>'
+ """Returns position information of the first occurence of the tag '<tag ...>...</tag>'
or '<tag ... />'.
- (None, None) is returned if the tag is not found.
If you are sure that the wikitext contains the tag, the tag could be extracted like follows:
>>> wikitext = u'This is a <tag>mytag</tag> tag.'
- >>> start, end = find_template(wikitext, u'tag')
+ >>> start, content, endtag, end = find_template(wikitext, u'tag')
>>> print wikitext[start:end]
<tag>mytag</tag>
return match_starttag.start(), match_starttag.end(), match_endtag.start(), match_endtag.end()
-def parse_googlemap(wikitext, detail=False):
- """Parses the (unicode) u'<googlemap ...>content</googlemap>' of the googlemap extension
- out of a page. If wikitext does not contain the googlemap extension text None is returned.
- If the googlemap contains invalid formatted lines, a RuntimeError is raised.
+def parse_googlemap(wikitext):
+ """Parses the (unicode) u'<googlemap ...>content</googlemap>' of the googlemap extension.
+ If wikitext does not contain the <googlemap> tag or if the <googlemap> tag contains
+ invalid formatted lines, a ParseError is raised.
+ Use find_tag(wikitext, 'googlemap') to find the googlemap tag within an arbitrary
+ wikitext before using this function.
:param wikitext: wikitext containing the template. Example:
- :param detail: bool. If True, start and end position of <googlemap>...</googlemap> is
- returned additionally.
wikitext = '''
<googlemap version="0.9" lat="47.113291" lon="11.272337" zoom="15">
47.112408,11.271119
</googlemap>
'''
- :returns: the tuple (center, zoom, coords, paths).
- center is the tuple (lon, lat) of the google maps or (None, None) if not provided
- zoom is the google zoom level as integer or None if not provided
+ :returns: The tuple (attributes, coords, paths) is returned.
+ attributes is a dict that contains the attribues that are present
+ (e.g. lon, lat, zoom, width, height) converted to float (lon, lat) or int.
coords is a list of (lon, lat, symbol, title) tuples.
paths is a list of (style, coords) tuples.
- coords is again a list of (lot, lat, symbol, title) tuples.
- If detail is True, (center, zoom, coords, paths, start, end) is returned."""
+ coords is again a list of (lon, lat, symbol, title) tuples."""
def is_coord(line):
"""Returns True if the line contains a coordinate."""
if not match is None: return (float(match.group(2)), float(match.group(1)), None, match.group(3))
match = re.match(u'([0-9]{1,2}\.[0-9]+), ?([0-9]{1,2}\.[0-9]+)', line)
if not match is None: return (float(match.group(2)), float(match.group(1)), None, None)
- return RuntimeError(u'Could not parse line ' + line)
-
- regexp = re.compile(u"(<googlemap[^>]*>)(.*?)(</googlemap>)", re.DOTALL)
- match = regexp.search(wikitext)
- if match is None: return None
- start = match.start()
- end = match.end()
- content = match.group(2)
- gm = xml.etree.ElementTree.XML((match.group(1)+match.group(3)).encode('UTF8'))
- zoom = gm.get('zoom')
- lon = gm.get('lon')
- lat = gm.get('lat')
- if not zoom is None: zoom = int(zoom)
- if not lon is None: lon = float(lon)
- if not lat is None: lat = float(lat)
- center = (lon, lat)
-
+ return ParseError(u'Could not parse line ' + line)
+
+ start, content, endtag, end = find_tag(wikitext, 'googlemap')
+ if start is None:
+ raise ParseError(u'<googlemap> tag not found.')
+ if content is None:
+ xml_only = wikitext[start:endtag]
+ else:
+ xml_only = wikitext[start:content]+wikitext[endtag:end]
+
+ try:
+ gm = xml.etree.ElementTree.XML(xml_only.encode('UTF8'))
+ except xml.etree.ElementTree.ParseError as e:
+ row, column = e.position
+ raise ParseError(u"XML parse error in <googlemap ...>.")
+
+ # parse attributes
+ attributes = {}
+ try:
+ for key in ['lon', 'lat']:
+ if gm.get(key) is not None:
+ attributes[key] = float(gm.get(key))
+ for key in ['zoom', 'width', 'height']:
+ if gm.get(key) is not None:
+ attributes[key] = int(gm.get(key))
+ except ValueError as error:
+ raise ParseError(u'Error at parsing attribute {0} of <googlemap>: {1}'.format(key, unicode(error)))
+
+ # parse points and lines
coords = []
paths = []
- lines = content.split("\n")
+ lines = wikitext[content:endtag].split("\n")
i = 0
while i < len(lines):
line = lines[i].strip()
coords.append((lon, lat, symbol, title))
continue
- raise RuntimeError(u'Unknown line syntax: ' + line)
- if detail:
- return (center, zoom, coords, paths, start, end)
- return (center, zoom, coords, paths)
+ raise ParseError(u'Unknown line syntax: ' + line)
+
+ return (attributes, coords, paths)