Better parsing for car description.

author Philipp Spitzer <philipp@spitzer.priv.at>

Sun, 9 Jan 2022 22:30:35 +0000 (23:30 +0100)

committer Philipp Spitzer <philipp@spitzer.priv.at>

Sun, 9 Jan 2022 22:30:35 +0000 (23:30 +0100)
author Philipp Spitzer <philipp@spitzer.priv.at>
Sun, 9 Jan 2022 22:30:35 +0000 (23:30 +0100)
committer Philipp Spitzer <philipp@spitzer.priv.at>
Sun, 9 Jan 2022 22:30:35 +0000 (23:30 +0100)
diff --git a/bots/sledrun_wikitext_to_json.py b/bots/sledrun_wikitext_to_json.py

index 1e3cbfe56686ce150bfc53205a688eea83561715..f71b19a181b7a0ed9a1f1c54001f9caaecb2f6d9 100644 (file)
--- a/bots/sledrun_wikitext_to_json.py
+++ b/bots/sledrun_wikitext_to_json.py
@@ -12,13 +12,14 @@ The following generators and filters are supported:
  import io
  import json
  import re
+from itertools import takewhile, dropwhile
  from typing import Any, Optional
  
  import mwparserfromhell
  from mwparserfromhell.nodes.extras import Parameter
  
  import pywikibot
-from mwparserfromhell.nodes import Tag, Text, ExternalLink, Template, Wikilink
+from mwparserfromhell.nodes import Tag, Text, ExternalLink, Template, Wikilink, Heading
  from mwparserfromhell.wikicode import Wikicode
  from pywikibot import pagegenerators, Page
  from pywikibot.bot import (
@@ -271,34 +272,39 @@ class SledrunWikiTextToJsonBot(
              break
  
          def _car():
-            for v in wikicode.get_sections(levels=[2], matches='Anreise mit dem Auto'):
-                for w in v.ifilter_text(recursive=False):
-                    x = w.strip()
-                    if x:
-                        sledrun_json["car_description"] = str(x)
-                        break
-                x = []
-                for w in v.ifilter_templates(matches='Parkplatz'):
-                    za = str_or_none(w.get(1, None))
-                    zb = str_or_none(w.get(2, None))
-                    z = lonlat_ele_to_json(opt_lonlat_from_str(za), opt_uint_from_str(zb))
-                    if len(z) > 0:
-                        x.append({'position': z})
-                if len(x) > 0:
-                    sledrun_json['car_parking'] = x
-
-                x = []
-                for w in io.StringIO(str(v)):
-                    match = re.match(r"\*\* von \'\'\'(.+)\'\'\'(.*): ([\d.,]+) km", w.rstrip())
-                    if match:
-                        ya, yb, yc = match.groups()
-                        yc = float(yc.replace(',', '.'))
-                        x.append({
-                            'km': yc,
-                            'route': (ya.strip() + ' ' + yb.strip()).strip(),
-                        })
-                if len(x) > 0:
-                    sledrun_json['car_distances'] = x
+            car_section_list = wikicode.get_sections(levels=[2], matches='Anreise mit dem Auto')
+            if not car_section_list:
+                return
+            v = car_section_list[0]
+
+            description_nodes = dropwhile(lambda w: isinstance(w, Heading), v.nodes)
+            description_nodes = takewhile(lambda w: not (isinstance(w, Tag) and w.wiki_markup == '*'),
+                                          description_nodes)
+            if description := str(Wikicode(list(description_nodes))).strip():
+                sledrun_json["car_description"] = description
+
+            x = []
+            for w in v.ifilter_templates(matches='Parkplatz'):
+                za = str_or_none(w.get(1, None))
+                zb = str_or_none(w.get(2, None))
+                z = lonlat_ele_to_json(opt_lonlat_from_str(za), opt_uint_from_str(zb))
+                if len(z) > 0:
+                    x.append({'position': z})
+            if len(x) > 0:
+                sledrun_json['car_parking'] = x
+
+            x = []
+            for w in io.StringIO(str(v)):
+                match = re.match(r"\*\* von \'\'\'(.+)\'\'\'(.*): ([\d.,]+) km", w.rstrip())
+                if match:
+                    ya, yb, yc = match.groups()
+                    yc = float(yc.replace(',', '.'))
+                    x.append({
+                        'km': yc,
+                        'route': (ya.strip() + ' ' + yb.strip()).strip(),
+                    })
+            if len(x) > 0:
+                sledrun_json['car_distances'] = x
          _car()
  
          x = []
author	Philipp Spitzer <philipp@spitzer.priv.at>
	Sun, 9 Jan 2022 22:30:35 +0000 (23:30 +0100)
committer	Philipp Spitzer <philipp@spitzer.priv.at>
	Sun, 9 Jan 2022 22:30:35 +0000 (23:30 +0100)