return pipe_char.join(parts) + end_char
+def find_tag(wikitext, tagname, pos=0):
+ """Returns the tuple (start, end) of the first occurence of the tag '<tag ...>...</tag>'
+ or '<tag ... />'.
+ (None, None) is returned if the tag is not found.
+ If you are sure that the wikitext contains the tag, the tag could be extracted like follows:
+
+ >>> wikitext = u'This is a <tag>mytag</tag> tag.'
+ >>> start, end = find_template(wikitext, u'tag')
+ >>> print wikitext[start:end]
+ <tag>mytag</tag>
+
+ :param wikitext: The text (preferalbe unicode) that has the template in it.
+ :param tagname: Name of the tag, e.g. u'tag' for <tag>.
+ :param pos: position within wikitext to start searching the tag.
+ :return:
+ (start, content, endtag, end). start is the position of '<' of the tag,
+ content is the beginning of the content (after '>'), enttag is the
+ beginning of the end tag ('</') and end is one position after the end tag.
+ For single tags, (start, None, None, end) is returned.
+ If the tag is not found (or only the start tag is present,
+ (None, None, None, None) is returned.
+ """
+ # Find start tag
+ regexp_starttag = re.compile(u"<{0}.*?(/?)>".format(tagname), re.DOTALL)
+ match_starttag = regexp_starttag.search(wikitext, pos)
+ if match_starttag is None:
+ return None, None, None, None
+
+ # does the tag have content?
+ if len(match_starttag.group(1)) == 1: # group(1) is either '' or '/'.
+ # single tag
+ return match_starttag.start(), None, None, match_starttag.end()
+
+ # tag with content
+ regexp_endtag = re.compile(u'</{0}>'.format(tagname), re.DOTALL)
+ match_endtag = regexp_endtag.search(wikitext, match_starttag.end())
+ if match_endtag is None:
+ # No closing tag - error in wikitext
+ return None, None, None, None
+ return match_starttag.start(), match_starttag.end(), match_endtag.start(), match_endtag.end()
+
+
def parse_googlemap(wikitext, detail=False):
"""Parses the (unicode) u'<googlemap ...>content</googlemap>' of the googlemap extension
out of a page. If wikitext does not contain the googlemap extension text None is returned.