]> ToastFreeware Gitweb - philipp/winterrodeln/wrpylib.git/commitdiff
Improve automatic parsing of car-distances.
authorPhilipp Spitzer <philipp@spitzer.priv.at>
Fri, 15 Jul 2022 16:18:23 +0000 (18:18 +0200)
committerPhilipp Spitzer <philipp@spitzer.priv.at>
Fri, 15 Jul 2022 16:18:23 +0000 (18:18 +0200)
bots/sledrun_wikitext_to_json.py
bots/update_sledrun_json_from_wikitext_car_distances.py [new file with mode: 0644]

index d53313dd8efe27ab1958d95c916cec5fdfdf5899..81c2a9b55eddf4863c86f7eed5e35a18c0aa7fa8 100644 (file)
@@ -302,7 +302,7 @@ class SledrunWikiTextToJsonBot(
 
             x = []
             for w in io.StringIO(str(v)):
 
             x = []
             for w in io.StringIO(str(v)):
-                match = re.match(r"\*\* von \'\'\'(.+)\'\'\'(.*): ([\d.,]+) km", w.rstrip())
+                match = re.match(r"\*\* [Vv]on \'\'\'(.+)\'\'\'(.*): ([\d.,]+) km", w.rstrip())
                 if match:
                     ya, yb, yc = match.groups()
                     yc = float(yc.replace(',', '.'))
                 if match:
                     ya, yb, yc = match.groups()
                     yc = float(yc.replace(',', '.'))
diff --git a/bots/update_sledrun_json_from_wikitext_car_distances.py b/bots/update_sledrun_json_from_wikitext_car_distances.py
new file mode 100644 (file)
index 0000000..20f323e
--- /dev/null
@@ -0,0 +1,117 @@
+#!/usr/bin/python
+"""
+User script for pywikibot (https://gerrit.wikimedia.org/r/pywikibot/core.git), tested with version 6.6.1.
+Put it in directory scripts/userscripts.
+
+Update a sledrun JSON page from a detail in a sledrun wikitext page.
+
+The following generators and filters are supported:
+
+&params;
+"""
+import io
+import json
+import re
+from itertools import takewhile, dropwhile
+from typing import Optional
+
+import jsonschema
+import mwparserfromhell
+from mwparserfromhell.nodes.extras import Parameter
+
+import pywikibot
+from mwparserfromhell.nodes import Tag, Text, ExternalLink, Template, Wikilink, Heading
+from mwparserfromhell.wikicode import Wikicode
+from pywikibot import pagegenerators, Page
+from pywikibot.bot import (
+    AutomaticTWSummaryBot,
+    ConfigParserBot,
+    ExistingPageBot,
+    NoRedirectPageBot,
+    SingleSiteBot,
+)
+from pywikibot.logging import warning
+from pywikibot.site._namespace import BuiltinNamespace
+from wrpylib.json_tools import order_json_keys
+
+from wrpylib.wrmwmarkup import create_sledrun_wiki, lonlat_to_json, lonlat_ele_to_json, parse_wrmap
+from wrpylib.wrvalidators import rodelbahnbox_from_template, tristate_german_to_str, difficulty_german_to_str, \
+    avalanches_german_to_str, public_transport_german_to_str, opt_lonlat_from_str, \
+    opt_uint_from_str
+from wrpylib.lib_sledrun_wikitext_to_json import optional_set, get_sledrun_description
+
+docuReplacements = {'&params;': pagegenerators.parameterHelp}
+
+
+class UpdateSledrunJsonFromWikiText(
+    SingleSiteBot,
+    ConfigParserBot,
+    ExistingPageBot,
+    AutomaticTWSummaryBot,
+):
+    def setup(self) -> None:
+        schema = Page(self.site, 'Winterrodeln:Datenschema/Rodelbahn/V1.json')
+        assert schema.content_model == 'json'
+        self.sledrun_schema = json.loads(schema.text)
+
+    def treat_page(self) -> None:
+        """Load the given page, do some changes, and save it."""
+        wikitext_content_model = 'wikitext'
+        if self.current_page.content_model != wikitext_content_model:
+            warning(f"The content model of {self.current_page.title()} is {self.current_page.content_model} "
+                    f"instead of {wikitext_content_model}.")
+            return
+
+        sledrun_json_page = Page(self.site, self.current_page.title() + '/Rodelbahn.json')
+        if not sledrun_json_page.exists():
+            return
+        sledrun_json = json.loads(sledrun_json_page.text)
+        sledrun_json_orig = json.loads(sledrun_json_page.text)
+        sledrun_json_orig_text = json.dumps(sledrun_json_orig, ensure_ascii=False, indent=4)
+
+        car_distances = []
+        for line in self.current_page.text.split('\n'):
+            match = re.match(r"\*\* [Vv]on \'\'\'(.+)\'\'\'(.*): ([\d.,]+) km", line.rstrip())
+            if match:
+                ya, yb, yc = match.groups()
+                yc = float(yc.replace(',', '.'))
+                car_distances.append({
+                    'km': yc,
+                    'route': (ya.strip() + ' ' + yb.strip()).strip(),
+                })
+            else:
+                match = re.match(r"\*\* [Vv]on (.+): ([\d.,]+) km", line.rstrip())
+                if match:
+                    ya, yb = match.groups()
+                    yb = float(yb.replace(',', '.'))
+                    car_distances.append({
+                        'km': yb,
+                        'route': ya.strip(),
+                    })
+        if len(car_distances) > 0:
+            sledrun_json['car_distances'] = car_distances
+
+        jsonschema.validate(instance=sledrun_json, schema=self.sledrun_schema)
+        sledrun_json_ordered = order_json_keys(sledrun_json, self.sledrun_schema)
+        assert sledrun_json_ordered == sledrun_json
+        if sledrun_json == sledrun_json_orig:
+            return
+        sledrun_json_text = json.dumps(sledrun_json_ordered, ensure_ascii=False, indent=4)
+        summary = 'Entfernung mit dem Auto im Rodelbahn JSON aktualisiert vom Wikitext.'
+        self.userPut(sledrun_json_page, sledrun_json_orig_text, sledrun_json_text, summary=summary, contentmodel='json')
+
+
+def main(*args: str) -> None:
+    local_args = pywikibot.handle_args(args)
+    gen_factory = pagegenerators.GeneratorFactory()
+    gen_factory.handle_args(local_args)
+    gen = gen_factory.getCombinedGenerator(preload=True)
+    if gen:
+        bot = UpdateSledrunJsonFromWikiText(generator=gen)
+        bot.run()
+    else:
+        pywikibot.bot.suggest_help(missing_generator=True)
+
+
+if __name__ == '__main__':
+    main()