From 09a5ff008c6143da0ceef44cee561f350c884347 Mon Sep 17 00:00:00 2001 From: Philipp Spitzer Date: Tue, 14 Jun 2022 23:25:26 +0200 Subject: [PATCH] Fine-tune parsing of gastronomy. --- ...e_sledrun_json_from_wikitext_gastronomy.py | 65 ++++++++++--------- 1 file changed, 35 insertions(+), 30 deletions(-) diff --git a/bots/update_sledrun_json_from_wikitext_gastronomy.py b/bots/update_sledrun_json_from_wikitext_gastronomy.py index 6f3619d..3d23b8a 100644 --- a/bots/update_sledrun_json_from_wikitext_gastronomy.py +++ b/bots/update_sledrun_json_from_wikitext_gastronomy.py @@ -78,39 +78,44 @@ class UpdateSledrunJsonFromWikiText( gastronomy = [] line_iter = io.StringIO(value) line = next(line_iter, None) - while line is not None and line.rstrip() != "* '''Hütten''':": + while line is not None and not line.startswith("* '''Hütten''':"): line = next(line_iter, None) if line is None: return gastronomy - while line is not None: - line = next(line_iter, None) - if line is not None: - if line.startswith('** '): - g = {} - wiki = mwparserfromhell.parse(line) - wiki_link = next(wiki.ifilter_wikilinks(), None) - if isinstance(wiki_link, Wikilink): - g['wr_page'] = wikilink_to_json(wiki_link) - ext_link = next(wiki.ifilter_external_links(), None) - if isinstance(ext_link, ExternalLink): - g['weblink'] = external_link_to_json(ext_link) - remaining = str(Wikicode(n for n in wiki.nodes - if isinstance(n, (Text, Tag)) and str(n).strip() != '*')).strip() - match = re.match(r'(.*)\((.+)\)', remaining) - if match: - name, note = match.groups() - name = name.strip() - note = note.strip() - if len(name) > 0: - g['name'] = name - if len(note) > 0: - g['note'] = note - elif len(remaining) > 0 and remaining != '...': - g['name'] = remaining - if len(g) != 0: - gastronomy.append(g) - else: - break + line = re.match(r"^\* '''Hütten''':\s*(.*)\s*", line).group(1) + if len(line) == 0: + line = next(line_iter, '') + if not line.startswith('** '): + return gastronomy + line = re.match(r"^\*\*\s*(.*)\s*", line).group(1) + while True: + g = {} + wiki = mwparserfromhell.parse(line) + wiki_link = next(wiki.ifilter_wikilinks(), None) + if isinstance(wiki_link, Wikilink): + g['wr_page'] = wikilink_to_json(wiki_link) + ext_link = next(wiki.ifilter_external_links(), None) + if isinstance(ext_link, ExternalLink): + g['weblink'] = external_link_to_json(ext_link) + remaining = str(Wikicode(n for n in wiki.nodes + if isinstance(n, (Text, Tag)) and str(n).strip() != '*')).strip() + match = re.match(r'(.*)\((.+)\)', remaining) + if match: + name, note = match.groups() + name = name.strip() + note = note.strip() + if len(name) > 0: + g['name'] = name + if len(note) > 0: + g['note'] = note + elif len(remaining) > 0 and remaining != '...': + g['name'] = remaining + if len(g) != 0: + gastronomy.append(g) + line = next(line_iter, '') + if not line.startswith('** '): + break + line = re.match(r"^\*\*\s*(.*)\s*", line).group(1) return gastronomy w = _gastronomy(str(v)) -- 2.39.5