2 # -*- coding: iso-8859-15 -*-
5 """This module contains general functions that help parsing the mediawiki markup.
6 I looked for an already existing MediaWiki parser in Python but I didn't find anything
7 that convinced me. However, here are the links:
9 * py-wikimarkup https://github.com/dcramer/py-wikimarkup
10 * mwlib http://code.pediapress.com/wiki/wiki
15 def find_template(wikitext, template_title):
16 """Returns the tuple (start, end) of the first occurence of the template '{{template ...}} within wikitext'.
17 (None, None) is returned if the template is not found.
18 If you are sure that the wikitext contains the template, the template could be extracted like follows:
20 >>> wikitext = u'This is a {{Color|red|red text}} template.'
21 >>> start, end = find_template(wikitext, u'Color')
22 >>> print wikitext[start:end]
23 {{Color|red|red text}}
27 >>> print wikitext.__getslice__(*find_template(wikitext, u'Color'))
28 {{Color|red|red text}}
30 The search is done with regular expression.
32 :param wikitext: The text (preferalbe unicode) that has the template in it.
33 :param template_title: The page title of the template with or without namespace (but as in the wikitext).
35 (start, end) of the first occurence with start >= 0 and end > start.
36 (None, None) if the template is not found.
38 match = re.search(u"\{\{" + template_title + "[^\}]*\}\}", wikitext, re.DOTALL)
39 if match is None: return None, None
40 return match.start(), match.end()
44 def split_template(template):
45 """Takes a template, like u'{{Color|red|text=Any text}}' and translates it to a Python tuple
46 (template_title, parameters) where parameters is a Python dictionary {1: u'red', u'text'=u'Any text'}.
47 Anonymous parameters get integer keys (converted to unicode) starting with 1
48 like in MediaWiki, named parameters are unicode strings.
49 Whitespace is stripped.
50 If an unexpected format is encountered, a ValueError is raised."""
51 if not template.startswith(u'{{'): raise ValueError(u'Template does not start with "{{"')
52 if not template.endswith(u'}}'): raise ValueError(u'Template does not end with "}}"')
53 parts = template[2:-2].split(u'|')
56 template_title = parts[0].strip()
57 if len(template_title) == 0: raise ValueError(u'Empty template tilte.')
60 # anonymous parameters
61 params = {} # result dictionary
64 equalsign_pos = parts[0].find(u'=')
65 if equalsign_pos >= 0: break # named parameter
66 params[unicode(param_num)] = parts[0].strip()
70 # named or numbered parameters
72 equalsign_pos = parts[0].find(u'=')
73 if equalsign_pos < 0: raise ValueError(u'Anonymous parameter after named parameter.')
74 key, sep, value = parts[0].partition(u'=')
76 if len(key) == 0: raise ValueError(u'Empty key.')
77 if params.has_key(key): raise ValueError(u'Duplicate key: "{0}"'.format(key))
78 params[key] = value.strip()
81 return template_title, params
84 def create_template(template_title, anonym_params=[], named_param_keys=[], named_param_values=[], as_table=False, as_table_keylen=None):
85 """Formats a MediaWiki template.
86 :param template_title: Unicode string with the template name
87 :param anonym_params: list with parameters without keys
88 :param named_param_keys: list with keys of named parameters
89 :param named_param_values: list with values of named parameters, corresponding to named_param_keys.
90 :param as_table: formats the returned template in one row for each parameter
91 :param as_table_keylen: length of the key field. None for "automatic".
92 :return: unicode template"""
93 pipe_char, equal_char, end_char = (u'\n| ', u' = ', u'\n}}') if as_table else (u'|', u'=', u'}}')
94 parts = [u"{{" + template_title]
95 parts += anonym_params
96 if as_table and as_table_keylen is None:
97 as_table_keylen = max([len(k) for k in named_param_keys])
98 for i in xrange(len(named_param_keys)):
99 key = named_param_keys[i]
100 if as_table: key = key.ljust(as_table_keylen)
101 parts.append(key + equal_char + named_param_values[i])
102 return pipe_char.join(parts) + end_char