#!/usr/bin/python """ User script for pywikibot (https://gerrit.wikimedia.org/r/pywikibot/core.git), tested with version 6.6.1. Put it in directory scripts/userscripts. Create a sledrun JSON page from a sledrun wikitext page (including map). The following generators and filters are supported: ¶ms; """ import io import json import re from itertools import takewhile, dropwhile from typing import Optional import jsonschema import mwparserfromhell from mwparserfromhell.nodes.extras import Parameter import pywikibot from mwparserfromhell.nodes import Tag, Text, ExternalLink, Template, Wikilink, Heading from mwparserfromhell.wikicode import Wikicode from pywikibot import pagegenerators, Page from pywikibot.bot import ( AutomaticTWSummaryBot, ConfigParserBot, ExistingPageBot, NoRedirectPageBot, SingleSiteBot, ) from pywikibot.logging import warning from pywikibot.site._namespace import BuiltinNamespace from wrpylib.json_tools import order_json_keys from wrpylib.wrmwmarkup import create_sledrun_wiki, lonlat_to_json, lonlat_ele_to_json, parse_wrmap from wrpylib.wrvalidators import rodelbahnbox_from_template, tristate_german_to_str, difficulty_german_to_str, \ avalanches_german_to_str, public_transport_german_to_str, opt_lonlat_from_str, \ opt_uint_from_str from wrpylib.lib_sledrun_wikitext_to_json import optional_set, get_sledrun_description, wikilink_to_json, \ template_to_json, external_link_to_json docuReplacements = {'¶ms;': pagegenerators.parameterHelp} class SledrunWikiTextToJsonBot( SingleSiteBot, ConfigParserBot, ExistingPageBot, NoRedirectPageBot, AutomaticTWSummaryBot, ): def setup(self) -> None: schema = Page(self.site, 'Winterrodeln:Datenschema/Rodelbahn/V1.json') assert schema.content_model == 'json' self.sledrun_schema = json.loads(schema.text) def treat_page(self) -> None: """Load the given page, do some changes, and save it.""" wikitext_content_model = 'wikitext' if self.current_page.content_model != wikitext_content_model: warning(f"The content model of {self.current_page.title()} is {self.current_page.content_model} " f"instead of {wikitext_content_model}.") return wikicode = mwparserfromhell.parse(self.current_page.text) wikilink_list = wikicode.filter_wikilinks() category_sledrun = 'Kategorie:Rodelbahn' if sum(1 for c in wikilink_list if c.title == category_sledrun) == 0: warning(f'The page {self.current_page.title()} does not have category {category_sledrun}.') return sledrun_json_page = Page(self.site, self.current_page.title() + '/Rodelbahn.json') map_json_page = Page(self.site, self.current_page.title() + '/Landkarte.json') if sledrun_json_page.exists() and map_json_page.exists(): # should be an option return map_json = None v = wikicode.filter_tags(matches='wrmap') if len(v) > 0: map_json = parse_wrmap(str(v[0])) sledrun_json = { "name": self.current_page.title(), "aliases": [], "entry_under_construction": sum(1 for c in wikilink_list if c.title == 'Kategorie:In Arbeit') > 0, } optional_set(sledrun_json, 'description', get_sledrun_description(wikicode)) rbb_list = wikicode.filter_templates(recursive=False, matches=lambda t: t.name.strip() == 'Rodelbahnbox') if len(rbb_list) == 1: rbb = rodelbahnbox_from_template(rbb_list[0]) v = rbb['Bild'] if v is not None: image_page = Page(self.site, v, ns=BuiltinNamespace.FILE) if not image_page.exists(): warning(f"{image_page.title()} does not exist.") sledrun_json['image'] = v optional_set(sledrun_json, 'length', rbb['Länge']) v = rbb['Schwierigkeit'] if v is not None: sledrun_json['difficulty'] = difficulty_german_to_str(v) v = rbb['Lawinen'] if v is not None: sledrun_json['avalanches'] = avalanches_german_to_str(v) v, w = rbb['Betreiber'] optional_set(sledrun_json, 'has_operator', v) optional_set(sledrun_json, 'operator', w) optional_set(sledrun_json, 'walkup_possible', rbb['Aufstieg möglich']) v, w = rbb['Aufstieg getrennt'] if v is not None: sledrun_json['walkup_separate'] = tristate_german_to_str(v) optional_set(sledrun_json, 'walkup_note', w) optional_set(sledrun_json, 'walkup_time', rbb['Gehzeit']) def _walkup_support(): walkup_support_rbb = rbb['Aufstiegshilfe'] if walkup_support_rbb is not None: walkup_supports = [] for walkup_support_type, note in walkup_support_rbb: walkup_support = {'type': walkup_support_type} optional_set(walkup_support, 'note', note) walkup_supports.append(walkup_support) sledrun_json['walkup_supports'] = walkup_supports _walkup_support() v, w = rbb['Beleuchtungsanlage'] if v is not None: sledrun_json['nightlight_possible'] = tristate_german_to_str(v) optional_set(sledrun_json, 'nightlight_possible_note', w) v, w = rbb['Beleuchtungstage'] optional_set(sledrun_json, 'nightlight_weekdays_count', v) optional_set(sledrun_json, 'nightlight_weekdays_note', w) def _sled_rental(): v = rbb['Rodelverleih'] if v is not None: sledrun_json['sled_rental_direct'] = v != [] w = [] for name, note in v: x = {} name_code = mwparserfromhell.parse(name) wiki_link = next(name_code.ifilter_wikilinks(), None) if isinstance(wiki_link, Wikilink): x['wr_page'] = wikilink_to_json(wiki_link) else: x['name'] = name optional_set(x, 'note', note) w.append(x) sledrun_json['sled_rental'] = w _sled_rental() def _cachet(): v = rbb['Gütesiegel'] if v is not None: sledrun_json['cachet'] = len(v) > 0 _cachet() optional_set(sledrun_json, 'show_in_overview', rbb['In Übersichtskarte']) optional_set(sledrun_json, 'forum_id', rbb['Forumid']) v = rbb['Position'] if v is not None: sledrun_json['position'] = lonlat_to_json(v) v = lonlat_ele_to_json(rbb['Position oben'], rbb['Höhe oben']) if v != {}: sledrun_json['top'] = v v = lonlat_ele_to_json(rbb['Position unten'], rbb['Höhe unten']) if v != {}: sledrun_json['bottom'] = v v = rbb['Telefonauskunft'] if v is not None: sledrun_json['info_phone'] = [{'phone': p, 'name': n} for p, n in v] v, w = rbb['Webauskunft'] if v is not None: if v: sledrun_json['info_web'] = [{'url': w}] else: sledrun_json['info_web'] = [] v = rbb['Öffentliche Anreise'] if v is not None: sledrun_json['public_transport'] = public_transport_german_to_str(v) def _button_bar(): bb_iter = wikicode.ifilter_templates(recursive=False, matches=lambda t: t.name.strip() == 'Buttonleiste') bb = next(bb_iter, None) if bb is not None: video = bb.get('video', None) if isinstance(video, Parameter) and video.value.strip() != "": sledrun_json['videos'] = [{'url': str(video.value.strip())}] webcam = bb.get('webcam', None) if isinstance(webcam, Parameter) and webcam.value.strip() != "": sledrun_json['webcams'] = [{'url': str(webcam.value.strip())}] correction = bb.get('Korrektur_To', None) if isinstance(correction, Parameter) and correction.value.strip() != "": sledrun_json['correction_email'] = correction.value.strip() _button_bar() def _public_transport(): pt_sections = wikicode.get_sections(levels=[2], matches='Anreise mit öffentlichen Verkehrsmitteln', include_headings=False) if len(pt_sections) < 1: return pt = pt_sections[0] node = next((node for node in pt.nodes if isinstance(node, Tag) and node.wiki_markup == '*'), None) if node is not None: description = str(Wikicode(pt.nodes[:pt.nodes.index(node)])).strip() if description and not description.startswith("Hier wird beschrieben werden, wie und wie gut man die " "Rodelbahn mit öffentlichen Verkehrsmitteln erreicht."): sledrun_json["public_transport_description"] = str(description) public_transport_stops = [] public_transport_lines = [] public_transport_links = [] ya = None for node in pt.nodes: if isinstance(node, Template): if node.name == 'Haltestelle': if ya is not None: public_transport_stops.append(ya) if len([1 for p in node.params if len(p.strip()) != 0]) == 0: continue ya = {} z = node.get(1, None) if z is not None: ya['municipality'] = str(z) z = node.get(2, None) if z is not None: ya['name_local'] = str(z) za = str(node.get(3, '')).strip() zb = str(node.get(4, '')).strip() z = lonlat_ele_to_json(opt_lonlat_from_str(za), opt_uint_from_str(zb)) if len(z) > 0: ya['position'] = z elif node.name in ["Fahrplan Abfahrtsmonitor VVT", "Fahrplan Abfahrtsmonitor VVV"]: ya['monitor_template'] = template_to_json(node) elif node.name in ["Fahrplan Hinfahrt VVT", "Fahrplan Hinfahrt VVV"]: ya['route_arrival_template'] = template_to_json(node) elif node.name in ["Fahrplan Rückfahrt VVT", "Fahrplan Rückfahrt VVV"]: ya['route_departure_template'] = template_to_json(node) elif node.name in ["Fahrplan Linie VVT", "Fahrplan Linie VVV"]: if ya is not None: public_transport_stops.append(ya) ya = None y = { 'timetable_template': template_to_json(node), } public_transport_lines.append(y) elif isinstance(node, ExternalLink): public_transport_links.append(external_link_to_json(node)) if ya is not None: public_transport_stops.append(ya) if len(public_transport_stops) > 0: sledrun_json['public_transport_stops'] = public_transport_stops if len(public_transport_lines) > 0: sledrun_json['public_transport_lines'] = public_transport_lines if len(public_transport_links) > 0: sledrun_json['public_transport_links'] = public_transport_links _public_transport() def _car(): car_section_list = wikicode.get_sections(levels=[2], matches='Anreise mit dem Auto') if not car_section_list: return v = car_section_list[0] description_nodes = dropwhile(lambda w: isinstance(w, Heading), v.nodes) description_nodes = takewhile(lambda w: not (isinstance(w, Tag) and w.wiki_markup == '*'), description_nodes) if description := str(Wikicode(list(description_nodes))).strip(): if not description.startswith("Hier wollen wir Besonderheiten beschreiben, die es zu beachten gibt, " "wenn man mit dem Auto zur Rodelbahn anreist."): sledrun_json["car_description"] = description x = [] for w in v.ifilter_templates(matches='Parkplatz'): za = str(w.get(1, '')).strip() zb = str(w.get(2, '')).strip() z = lonlat_ele_to_json(opt_lonlat_from_str(za), opt_uint_from_str(zb)) if len(z) > 0: x.append({'position': z}) if len(x) > 0: sledrun_json['car_parking'] = x x = [] for w in io.StringIO(str(v)): match = re.match(r"\*\* von \'\'\'(.+)\'\'\'(.*): ([\d.,]+) km", w.rstrip()) if match: ya, yb, yc = match.groups() yc = float(yc.replace(',', '.')) x.append({ 'km': yc, 'route': (ya.strip() + ' ' + yb.strip()).strip(), }) if len(x) > 0: sledrun_json['car_distances'] = x _car() x = [] for v in wikicode.get_sections(levels=[2], matches='Allgemeines'): def _nightlight(value: str) -> Optional[str]: line_iter = io.StringIO(value) line = next(line_iter, None) while line is not None and not line.startswith("* '''Beleuchtung''':"): line = next(line_iter, None) if line is None: return None line = line.replace("* '''Beleuchtung''':", "").strip() if len(line) > 0: return line return None optional_set(sledrun_json, 'nightlight_description', _nightlight(str(v))) def _gastronomy(value: str): gastronomy = [] line_iter = io.StringIO(value) line = next(line_iter, None) while line is not None and line.rstrip() != "* '''Hütten''':": line = next(line_iter, None) if line is None: return gastronomy while line is not None: line = next(line_iter, None) if line is not None: if line.startswith('** '): g = {} wiki = mwparserfromhell.parse(line) wiki_link = next(wiki.ifilter_wikilinks(), None) if isinstance(wiki_link, Wikilink): g['wr_page'] = wikilink_to_json(wiki_link) ext_link = next(wiki.ifilter_external_links(), None) if isinstance(ext_link, ExternalLink): g['weblink'] = external_link_to_json(ext_link) remaining = str(Wikicode(n for n in wiki.nodes if isinstance(n, (Text, Tag)) and str(n).strip() != '*')).strip() match = re.match(r'(.*)\((.+)\)', remaining) if match: name, note = match.groups() name = name.strip() note = note.strip() if len(name) > 0: g['name'] = name if len(note) > 0: g['note'] = note elif len(remaining) > 0 and remaining != '...': g['name'] = remaining if len(g) != 0: gastronomy.append(g) else: break return gastronomy w = _gastronomy(str(v)) if len(w) > 0: sledrun_json['gastronomy'] = w def _sled_rental_description(): line_iter = io.StringIO(str(v)) line = next(line_iter, None) match = None while line is not None and (match := re.match(r"\* '''Rodelverleih''':(.*)", line)) is None: line = next(line_iter, None) if match is None: return result = [match.group(1)] line = next(line_iter, None) while line is not None and re.match(r"\* ", line) is None: result.append(line) line = next(line_iter, None) description = ''.join(result).strip() if len(description) > 0: sledrun_json['sled_rental_description'] = description _sled_rental_description() i = iter(v.nodes) w = next(i, None) while w is not None: if isinstance(w, Tag) and str(w) == "'''Siehe auch'''": w = next(i, None) break w = next(i, None) while w is not None: if isinstance(w, ExternalLink): x.append(external_link_to_json(w)) elif isinstance(w, (Text, Tag)) and str(w).strip() in ['', '*', ':']: pass else: break w = next(i, None) if len(x) > 0: sledrun_json['see_also'] = x sledrun_json['allow_reports'] = True def _tiroler_naturrodelbahn_guetesiegel(): for gst in wikicode.filter_templates(): if gst.name.strip() != 'Tiroler Naturrodelbahn Gütesiegel': continue gsj = {} keys = { 'Anlagename': 'name', 'Organisation': 'organization', 'Erstverleihung': 'first_issued', 'Verlängerung': 'valid_from', 'Forum': 'forum_id', 'Thread': 'thread_id', } numeric = ['first_issued', 'valid_from', 'forum_id', 'thread_id'] for key, value in keys.items(): if gst.has(key): v = gst.get(key).value.strip() if v != '': if value in numeric: v = int(v) gsj[value] = v if len(gsj) > 0: sledrun_json['tiroler_naturrodelbahn_gütesiegel'] = gsj _tiroler_naturrodelbahn_guetesiegel() impressions = None sledrun_impressions_page = Page(self.site, self.current_page.title() + '/Impressionen') if sledrun_impressions_page.exists(): impressions = sledrun_impressions_page.title() text = create_sledrun_wiki(sledrun_json, map_json, impressions) pywikibot.output(text) pywikibot.output('\03{lightpurple}---\03{default}') pywikibot.showDiff(self.current_page.text, text) jsonschema.validate(instance=sledrun_json, schema=self.sledrun_schema) sledrun_json_ordered = order_json_keys(sledrun_json, self.sledrun_schema) assert sledrun_json_ordered == sledrun_json sledrun_json_text = json.dumps(sledrun_json_ordered, ensure_ascii=False, indent=4) if not sledrun_json_page.exists(): summary = 'Rodelbahnbeschreibung konvertiert von Wikitext nach JSON.' pywikibot.output('\03{lightpurple}---\03{default}') pywikibot.output(sledrun_json_text) pywikibot.output('\03{lightpurple}---\03{default}') self.userPut(sledrun_json_page, sledrun_json_page.text, sledrun_json_text, summary=summary, contentmodel='json') if map_json is not None and not map_json_page.exists(): map_json_text = json.dumps(map_json, ensure_ascii=False, indent=4) summary = 'Landkarte konvertiert von Wikitext nach JSON.' self.userPut(map_json_page, map_json_page.text, map_json_text, summary=summary, contentmodel='json') def main(*args: str) -> None: local_args = pywikibot.handle_args(args) gen_factory = pagegenerators.GeneratorFactory() gen_factory.handle_args(local_args) gen = gen_factory.getCombinedGenerator(preload=True) if gen: bot = SledrunWikiTextToJsonBot(generator=gen) bot.run() else: pywikibot.bot.suggest_help(missing_generator=True) if __name__ == '__main__': main()