>>> print wikitext.__getslice__(*find_template(wikitext, u'Color'))
{{Color|red|red text}}
- The search is done with regular expression.
+ The search is done with regular expression. It gives wrong results when parsing a template
+ containing the characters "}}"
:param wikitext: The text (preferalbe unicode) that has the template in it.
:param template_title: The page title of the template with or without namespace (but as in the wikitext).
(start, end) of the first occurence with start >= 0 and end > start.
(None, None) if the template is not found.
"""
- match = re.search(u"\{\{" + template_title + "[^\}]*\}\}", wikitext, re.DOTALL)
+ match = re.search(u"\{\{" + template_title + "\s*(\|[^\}]*)?\}\}", wikitext, re.DOTALL)
if match is None: return None, None
return match.start(), match.end()
-
def split_template(template):
"""Takes a template, like u'{{Color|red|text=Any text}}' and translates it to a Python tuple
- (template_title, parameters) where parameters is a Python dictionary {1: u'red', u'text'=u'Any text'}.
+ (template_title, parameters) where parameters is a Python dictionary {u'1': u'red', u'text'=u'Any text'}.
Anonymous parameters get integer keys (converted to unicode) starting with 1
like in MediaWiki, named parameters are unicode strings.
Whitespace is stripped.
as_table_keylen = max([len(k) for k in named_param_keys])
for i in xrange(len(named_param_keys)):
key = named_param_keys[i]
- if as_table: key = key.ljust(as_table_keylen)
- parts.append(key + equal_char + named_param_values[i])
+ if as_table:
+ key = key.ljust(as_table_keylen)
+ parts.append((key + equal_char + named_param_values[i]).rstrip())
+ else:
+ parts.append(key + equal_char + named_param_values[i])
return pipe_char.join(parts) + end_char
def parse_coord(line):
"""Returns (lon, lat, symbol, title). If symbol or text is not present, None is returned."""
- match = re.match(u'\(([^)]+)\) ?([0-9]{1,2}\.[0-9]+), ?([0-9]{1,2}\.[0-9]+),(.*)', line)
+ match = re.match(u'\(([^)]+)\) ?([0-9]{1,2}\.[0-9]+), ?([0-9]{1,2}\.[0-9]+), ?(.*)', line)
if not match is None: return (float(match.group(3)), float(match.group(2)), match.group(1), match.group(4))
match = re.match(u'\(([^)]+)\) ?([0-9]{1,2}\.[0-9]+), ?([0-9]{1,2}\.[0-9]+)', line)
if not match is None: return (float(match.group(3)), float(match.group(2)), match.group(1), None)
- match = re.match(u'([0-9]{1,2}\.[0-9]+), ?([0-9]{1,2}\.[0-9]+),(.*)', line)
+ match = re.match(u'([0-9]{1,2}\.[0-9]+), ?([0-9]{1,2}\.[0-9]+), ?(.*)', line)
if not match is None: return (float(match.group(2)), float(match.group(1)), None, match.group(3))
match = re.match(u'([0-9]{1,2}\.[0-9]+), ?([0-9]{1,2}\.[0-9]+)', line)
if not match is None: return (float(match.group(2)), float(match.group(1)), None, None)
# Handle a coordinate
if is_coord(line):
lon, lat, symbol, title = parse_coord(line)
- coords.append((lon, lat, symbol, title))
while i < len(lines):
line = lines[i].strip()
i += 1
if is_path(line) or is_coord(line):
i -= 1
break
+ if len(line) > 0 and title is None: title = line
+ coords.append((lon, lat, symbol, title))
continue
raise RuntimeError(u'Unknown line syntax: ' + line)