]> ToastFreeware Gitweb - philipp/winterrodeln/wrpylib.git/commitdiff
Better parsing for car description.
authorPhilipp Spitzer <philipp@spitzer.priv.at>
Sun, 9 Jan 2022 22:30:35 +0000 (23:30 +0100)
committerPhilipp Spitzer <philipp@spitzer.priv.at>
Sun, 9 Jan 2022 22:30:35 +0000 (23:30 +0100)
bots/sledrun_wikitext_to_json.py

index 1e3cbfe56686ce150bfc53205a688eea83561715..f71b19a181b7a0ed9a1f1c54001f9caaecb2f6d9 100644 (file)
@@ -12,13 +12,14 @@ The following generators and filters are supported:
 import io
 import json
 import re
 import io
 import json
 import re
+from itertools import takewhile, dropwhile
 from typing import Any, Optional
 
 import mwparserfromhell
 from mwparserfromhell.nodes.extras import Parameter
 
 import pywikibot
 from typing import Any, Optional
 
 import mwparserfromhell
 from mwparserfromhell.nodes.extras import Parameter
 
 import pywikibot
-from mwparserfromhell.nodes import Tag, Text, ExternalLink, Template, Wikilink
+from mwparserfromhell.nodes import Tag, Text, ExternalLink, Template, Wikilink, Heading
 from mwparserfromhell.wikicode import Wikicode
 from pywikibot import pagegenerators, Page
 from pywikibot.bot import (
 from mwparserfromhell.wikicode import Wikicode
 from pywikibot import pagegenerators, Page
 from pywikibot.bot import (
@@ -271,34 +272,39 @@ class SledrunWikiTextToJsonBot(
             break
 
         def _car():
             break
 
         def _car():
-            for v in wikicode.get_sections(levels=[2], matches='Anreise mit dem Auto'):
-                for w in v.ifilter_text(recursive=False):
-                    x = w.strip()
-                    if x:
-                        sledrun_json["car_description"] = str(x)
-                        break
-                x = []
-                for w in v.ifilter_templates(matches='Parkplatz'):
-                    za = str_or_none(w.get(1, None))
-                    zb = str_or_none(w.get(2, None))
-                    z = lonlat_ele_to_json(opt_lonlat_from_str(za), opt_uint_from_str(zb))
-                    if len(z) > 0:
-                        x.append({'position': z})
-                if len(x) > 0:
-                    sledrun_json['car_parking'] = x
-
-                x = []
-                for w in io.StringIO(str(v)):
-                    match = re.match(r"\*\* von \'\'\'(.+)\'\'\'(.*): ([\d.,]+) km", w.rstrip())
-                    if match:
-                        ya, yb, yc = match.groups()
-                        yc = float(yc.replace(',', '.'))
-                        x.append({
-                            'km': yc,
-                            'route': (ya.strip() + ' ' + yb.strip()).strip(),
-                        })
-                if len(x) > 0:
-                    sledrun_json['car_distances'] = x
+            car_section_list = wikicode.get_sections(levels=[2], matches='Anreise mit dem Auto')
+            if not car_section_list:
+                return
+            v = car_section_list[0]
+
+            description_nodes = dropwhile(lambda w: isinstance(w, Heading), v.nodes)
+            description_nodes = takewhile(lambda w: not (isinstance(w, Tag) and w.wiki_markup == '*'),
+                                          description_nodes)
+            if description := str(Wikicode(list(description_nodes))).strip():
+                sledrun_json["car_description"] = description
+
+            x = []
+            for w in v.ifilter_templates(matches='Parkplatz'):
+                za = str_or_none(w.get(1, None))
+                zb = str_or_none(w.get(2, None))
+                z = lonlat_ele_to_json(opt_lonlat_from_str(za), opt_uint_from_str(zb))
+                if len(z) > 0:
+                    x.append({'position': z})
+            if len(x) > 0:
+                sledrun_json['car_parking'] = x
+
+            x = []
+            for w in io.StringIO(str(v)):
+                match = re.match(r"\*\* von \'\'\'(.+)\'\'\'(.*): ([\d.,]+) km", w.rstrip())
+                if match:
+                    ya, yb, yc = match.groups()
+                    yc = float(yc.replace(',', '.'))
+                    x.append({
+                        'km': yc,
+                        'route': (ya.strip() + ' ' + yb.strip()).strip(),
+                    })
+            if len(x) > 0:
+                sledrun_json['car_distances'] = x
         _car()
 
         x = []
         _car()
 
         x = []