From: Philipp Spitzer Date: Sun, 12 Dec 2021 21:37:57 +0000 (+0100) Subject: Parse gastronomy. X-Git-Url: https://git.toastfreeware.priv.at/philipp/winterrodeln/wrpylib.git/commitdiff_plain/9118046d6301fd0bc36128ad069d0ceba7b0c670 Parse gastronomy. --- diff --git a/bots/sledrun_wikitext_to_json.py b/bots/sledrun_wikitext_to_json.py index 4933467..ee354c9 100644 --- a/bots/sledrun_wikitext_to_json.py +++ b/bots/sledrun_wikitext_to_json.py @@ -16,7 +16,7 @@ from typing import Any, Optional import mwparserfromhell import pywikibot -from mwparserfromhell.nodes import Tag, Text, ExternalLink, Template +from mwparserfromhell.nodes import Tag, Text, ExternalLink, Template, Wikilink from mwparserfromhell.wikicode import Wikicode from pywikibot import pagegenerators, Page from pywikibot.bot import ( @@ -258,7 +258,6 @@ class SledrunWikiTextToJsonBot( match = re.match(r"\*\* von \'\'\'(.+)\'\'\'(.*): ([\d.,]+) km", w.rstrip()) if match: ya, yb, yc = match.groups() - yc = float(yc.replace(',', '.')) x.append({ 'km': yc, @@ -269,6 +268,52 @@ class SledrunWikiTextToJsonBot( x = [] for v in wikicode.get_sections(levels=[2], matches='Allgemeines'): + def _gastronomy(value: str): + gastronomy = [] + line_iter = io.StringIO(value) + line = next(line_iter, None) + while line is not None and line.rstrip() != "* '''Hütten''':": + line = next(line_iter, None) + if line is None: + return gastronomy + while line is not None: + line = next(line_iter, None) + if line is not None: + if line.startswith('** '): + g = {} + wiki = mwparserfromhell.parse(line) + wiki_link = next(wiki.ifilter_wikilinks(), None) + if isinstance(wiki_link, Wikilink): + wl = { + 'title': str(wiki_link.title), + } + text = str_or_none(wiki_link.text) + if text is not None: + wl['text'] = text + g['wr_page'] = wl + ext_link = next(wiki.ifilter_external_links(), None) + if isinstance(ext_link, ExternalLink): + el = { + 'url': str(ext_link.url), + 'text': str(ext_link.title) + } + g['weblink'] = el + remaining = str(Wikicode(n for n in wiki.nodes + if isinstance(n, (Text, Tag)) and str(n).strip() is not '*')).\ + strip() + match = re.match(r'\((.+)\)', remaining) + if match: + remaining = match.group(1) + if len(remaining) > 0: + g['note'] = remaining + gastronomy.append(g) + else: + break + return gastronomy + w = _gastronomy(str(v)) + if len(w) > 0: + sledrun_json['gastronomy'] = w + i = iter(v.nodes) w = next(i, None) while w is not None: