]> ToastFreeware Gitweb - philipp/winterrodeln/wrpylib.git/commitdiff
Parse gastronomy.
authorPhilipp Spitzer <philipp@spitzer.priv.at>
Sun, 12 Dec 2021 21:37:57 +0000 (22:37 +0100)
committerPhilipp Spitzer <philipp@spitzer.priv.at>
Sun, 12 Dec 2021 21:37:57 +0000 (22:37 +0100)
bots/sledrun_wikitext_to_json.py

index 493346788964bb962e7d2cd1733aeb360a9759c5..ee354c9f3b0eb29797603db50411440d09f9012d 100644 (file)
@@ -16,7 +16,7 @@ from typing import Any, Optional
 
 import mwparserfromhell
 import pywikibot
 
 import mwparserfromhell
 import pywikibot
-from mwparserfromhell.nodes import Tag, Text, ExternalLink, Template
+from mwparserfromhell.nodes import Tag, Text, ExternalLink, Template, Wikilink
 from mwparserfromhell.wikicode import Wikicode
 from pywikibot import pagegenerators, Page
 from pywikibot.bot import (
 from mwparserfromhell.wikicode import Wikicode
 from pywikibot import pagegenerators, Page
 from pywikibot.bot import (
@@ -258,7 +258,6 @@ class SledrunWikiTextToJsonBot(
                 match = re.match(r"\*\* von \'\'\'(.+)\'\'\'(.*): ([\d.,]+) km", w.rstrip())
                 if match:
                     ya, yb, yc = match.groups()
                 match = re.match(r"\*\* von \'\'\'(.+)\'\'\'(.*): ([\d.,]+) km", w.rstrip())
                 if match:
                     ya, yb, yc = match.groups()
-
                     yc = float(yc.replace(',', '.'))
                     x.append({
                         'km': yc,
                     yc = float(yc.replace(',', '.'))
                     x.append({
                         'km': yc,
@@ -269,6 +268,52 @@ class SledrunWikiTextToJsonBot(
 
             x = []
             for v in wikicode.get_sections(levels=[2], matches='Allgemeines'):
 
             x = []
             for v in wikicode.get_sections(levels=[2], matches='Allgemeines'):
+                def _gastronomy(value: str):
+                    gastronomy = []
+                    line_iter = io.StringIO(value)
+                    line = next(line_iter, None)
+                    while line is not None and line.rstrip() != "* '''Hütten''':":
+                        line = next(line_iter, None)
+                    if line is None:
+                        return gastronomy
+                    while line is not None:
+                        line = next(line_iter, None)
+                        if line is not None:
+                            if line.startswith('** '):
+                                g = {}
+                                wiki = mwparserfromhell.parse(line)
+                                wiki_link = next(wiki.ifilter_wikilinks(), None)
+                                if isinstance(wiki_link, Wikilink):
+                                    wl = {
+                                        'title': str(wiki_link.title),
+                                    }
+                                    text = str_or_none(wiki_link.text)
+                                    if text is not None:
+                                        wl['text'] = text
+                                    g['wr_page'] = wl
+                                ext_link = next(wiki.ifilter_external_links(), None)
+                                if isinstance(ext_link, ExternalLink):
+                                    el = {
+                                        'url': str(ext_link.url),
+                                        'text': str(ext_link.title)
+                                    }
+                                    g['weblink'] = el
+                                remaining = str(Wikicode(n for n in wiki.nodes
+                                                         if isinstance(n, (Text, Tag)) and str(n).strip() is not '*')).\
+                                                strip()
+                                match = re.match(r'\((.+)\)', remaining)
+                                if match:
+                                    remaining = match.group(1)
+                                if len(remaining) > 0:
+                                    g['note'] = remaining
+                                gastronomy.append(g)
+                            else:
+                                break
+                    return gastronomy
+                w = _gastronomy(str(v))
+                if len(w) > 0:
+                    sledrun_json['gastronomy'] = w
+
                 i = iter(v.nodes)
                 w = next(i, None)
                 while w is not None:
                 i = iter(v.nodes)
                 w = next(i, None)
                 while w is not None: