#!/usr/bin/python """ User script for pywikibot (https://gerrit.wikimedia.org/r/pywikibot/core.git), tested with version 6.6.1. Put it in directory scripts/userscripts. Create a sledrun JSON page from a sledrun wikitext page (including map). The following generators and filters are supported: ¶ms; """ import io import json import re from typing import Any, Optional import mwparserfromhell import pywikibot from mwparserfromhell.nodes import Tag, Text, ExternalLink, Template, Wikilink from mwparserfromhell.wikicode import Wikicode from pywikibot import pagegenerators, Page from pywikibot.bot import ( AutomaticTWSummaryBot, ConfigParserBot, ExistingPageBot, NoRedirectPageBot, SingleSiteBot, ) from pywikibot.logging import warning from pywikibot.site._namespace import BuiltinNamespace from wrpylib.wrmwmarkup import create_sledrun_wiki, lonlat_to_json, lonlat_ele_to_json, parse_wrmap from wrpylib.wrvalidators import rodelbahnbox_from_template, tristate_german_to_str, difficulty_german_to_str, \ avalanches_german_to_str, public_transport_german_to_str, opt_str_opt_comment_enum_to_str, opt_lonlat_from_str, \ opt_uint_from_str from pywikibot.site import Namespace docuReplacements = {'¶ms;': pagegenerators.parameterHelp} def str_or_none(value: Any) -> Optional[str]: if value is not None: return str(value) return None def template_to_json(value: Template) -> dict: parameter = [] for p in value.params: parameter.append({'value': str(p)}) return { 'name': str(value.name), 'parameter': parameter } class SledrunWikiTextToJsonBot( SingleSiteBot, ConfigParserBot, ExistingPageBot, NoRedirectPageBot, AutomaticTWSummaryBot, ): def treat_page(self) -> None: """Load the given page, do some changes, and save it.""" wikitext_content_model = 'wikitext' if self.current_page.content_model != wikitext_content_model: warning(f"The content model of {self.current_page.title()} is {self.current_page.content_model} " f"instead of {wikitext_content_model}.") return wikicode = mwparserfromhell.parse(self.current_page.text) wikilink_list = wikicode.filter_wikilinks() category_sledrun = 'Kategorie:Rodelbahn' if sum(1 for c in wikilink_list if c.title == category_sledrun) == 0: warning(f'The page {self.current_page.title()} does not have category {category_sledrun}.') return sledrun_json_page = Page(self.site, self.current_page.title() + '/Rodelbahn.json') if sledrun_json_page.exists(): warning(f"{sledrun_json_page.title()} already exists, skipping {self.current_page.title()}.") return map_json_page = Page(self.site, self.current_page.title() + '/Landkarte.json') if map_json_page.exists(): warning(f"{map_json_page.title()} already exists, skipping {self.current_page.title()}.") return map_json = None v = wikicode.filter_tags(matches='wrmap') if len(v) > 0: map_json = parse_wrmap(str(v[0])) sledrun_json = { "name": self.current_page.title(), "aliases": [], "entry_under_construction": sum(1 for c in wikilink_list if c.text == 'Kategorie:In Arbeit') > 0, } for v in wikicode.get_sections(levels=[2], matches='Allgemeines'): for w in v.ifilter_text(recursive=False): x = w.strip() if x: sledrun_json["description"] = str(x) break break rbb_list = wikicode.filter_templates(recursive=False, matches=lambda t: t.name.strip() == 'Rodelbahnbox') if len(rbb_list) == 1: rbb = rodelbahnbox_from_template(rbb_list[0]) v = rbb['Bild'] if v is not None: image_page = Page(self.site, v, ns=BuiltinNamespace.FILE) if not image_page.exists(): warning(f"{image_page.title()} does not exist.") sledrun_json['image'] = v v = rbb['Länge'] if v is not None: sledrun_json['length'] = v v = rbb['Schwierigkeit'] if v is not None: sledrun_json['difficulty'] = difficulty_german_to_str(v) v = rbb['Lawinen'] if v is not None: sledrun_json['avalanches'] = avalanches_german_to_str(v) v, w = rbb['Betreiber'] if v is not None: sledrun_json['has_operator'] = v if w is not None: sledrun_json['operator'] = w v = rbb['Aufstieg möglich'] if v is not None: sledrun_json['walkup_possible'] = v v, w = rbb['Aufstieg getrennt'] if v is not None: sledrun_json['walkup_separate'] = tristate_german_to_str(v) if w is not None: sledrun_json['walkup_comment'] = w # TODO v = rbb['Gehzeit'] if v is not None: sledrun_json['walkup_time'] = v v, w = rbb['Beleuchtungsanlage'] if v is not None: sledrun_json['nightlight_possible'] = tristate_german_to_str(v) if w is not None: sledrun_json['nightlight_description'] = w v = rbb['Rodelverleih'] if v is not None: sledrun_json['sled_rental_direct'] = v != [] sledrun_json['sled_rental_description'] = opt_str_opt_comment_enum_to_str(v) v = rbb['In Übersichtskarte'] if v is not None: sledrun_json['show_in_overview'] = v v = rbb['Forumid'] if v is not None: sledrun_json['forum_id'] = v v = rbb['Position'] if v is not None: sledrun_json['position'] = lonlat_to_json(v) v = lonlat_ele_to_json(rbb['Position oben'], rbb['Höhe oben']) if v != {}: sledrun_json['top'] = v v = lonlat_ele_to_json(rbb['Position unten'], rbb['Höhe unten']) if v != {}: sledrun_json['bottom'] = v v = rbb['Telefonauskunft'] if v is not None: sledrun_json['info_phone'] = [{'phone': p, 'name': n} for p, n in v] v = rbb['Öffentliche Anreise'] if v is not None: sledrun_json['public_transport'] = public_transport_german_to_str(v) for v in wikicode.get_sections(levels=[2], matches='Anreise mit öffentlichen Verkehrsmitteln', include_headings=False): w = next((w for w in v.nodes if isinstance(w, Tag) and w.wiki_markup == '*'), None) if w is not None: x = str(Wikicode(v.nodes[:v.nodes.index(w)])).strip() if x: sledrun_json["public_transport_description"] = str(x) public_transport_stops = [] public_transport_lines = [] ya = None for w in v.nodes: if isinstance(w, Template): if w.name == 'Haltestelle': if ya is not None: public_transport_stops.append(ya) ya = {} z = w.get(1, None) if z is not None: ya['municipality'] = str(z) z = w.get(2, None) if z is not None: ya['name_local'] = str(z) za = str_or_none(w.get(3, None)) zb = str_or_none(w.get(4, None)) z = lonlat_ele_to_json(opt_lonlat_from_str(za), opt_uint_from_str(zb)) if len(z) > 0: ya['position'] = z elif w.name in ["Fahrplan Abfahrtsmonitor VVT"]: ya['monitor_template'] = template_to_json(w) elif w.name in ["Fahrplan Hinfahrt VVT"]: ya['route_arrival_template'] = template_to_json(w) elif w.name in ["Fahrplan Rückfahrt VVT"]: ya['route_departure_template'] = template_to_json(w) elif w.name in ["Fahrplan Linie VVT"]: if ya is not None: public_transport_stops.append(ya) ya = None y = { 'timetable_template': template_to_json(w), } public_transport_lines.append(y) if ya is not None: public_transport_stops.append(ya) if len(public_transport_stops) > 0: sledrun_json['public_transport_stops'] = public_transport_stops if len(public_transport_lines) > 0: sledrun_json['public_transport_lines'] = public_transport_lines break for v in wikicode.get_sections(levels=[2], matches='Anreise mit dem Auto'): for w in v.ifilter_text(recursive=False): x = w.strip() if x: sledrun_json["car_description"] = str(x) break x = [] for w in v.ifilter_templates(matches='Parkplatz'): za = str_or_none(w.get(1, None)) zb = str_or_none(w.get(2, None)) z = lonlat_ele_to_json(opt_lonlat_from_str(za), opt_uint_from_str(zb)) if len(z) > 0: x.append({'position': z}) if len(x) > 0: sledrun_json['car_parking'] = x x = [] for w in io.StringIO(str(v)): match = re.match(r"\*\* von \'\'\'(.+)\'\'\'(.*): ([\d.,]+) km", w.rstrip()) if match: ya, yb, yc = match.groups() yc = float(yc.replace(',', '.')) x.append({ 'km': yc, 'route': (ya.strip() + ' ' + yb.strip()).strip(), }) if len(x) > 0: sledrun_json['car_distances'] = x x = [] for v in wikicode.get_sections(levels=[2], matches='Allgemeines'): def _gastronomy(value: str): gastronomy = [] line_iter = io.StringIO(value) line = next(line_iter, None) while line is not None and line.rstrip() != "* '''Hütten''':": line = next(line_iter, None) if line is None: return gastronomy while line is not None: line = next(line_iter, None) if line is not None: if line.startswith('** '): g = {} wiki = mwparserfromhell.parse(line) wiki_link = next(wiki.ifilter_wikilinks(), None) if isinstance(wiki_link, Wikilink): wl = { 'title': str(wiki_link.title), } text = str_or_none(wiki_link.text) if text is not None: wl['text'] = text g['wr_page'] = wl ext_link = next(wiki.ifilter_external_links(), None) if isinstance(ext_link, ExternalLink): el = { 'url': str(ext_link.url), 'text': str(ext_link.title) } g['weblink'] = el remaining = str(Wikicode(n for n in wiki.nodes if isinstance(n, (Text, Tag)) and str(n).strip() is not '*')).\ strip() match = re.match(r'\((.+)\)', remaining) if match: remaining = match.group(1) if len(remaining) > 0: g['note'] = remaining gastronomy.append(g) else: break return gastronomy w = _gastronomy(str(v)) if len(w) > 0: sledrun_json['gastronomy'] = w i = iter(v.nodes) w = next(i, None) while w is not None: if isinstance(w, Tag) and str(w) == "'''Siehe auch'''": w = next(i, None) break w = next(i, None) while w is not None: if isinstance(w, ExternalLink): link = {'url': w.url} if w.title is not None: link['text'] = w.title x.append(link) elif isinstance(w, (Text, Tag)) and str(w).strip() in ['', '*', ':']: pass else: break w = next(i, None) if len(x) > 0: sledrun_json['see_also'] = x sledrun_json['allow_reports'] = True text = create_sledrun_wiki(sledrun_json, map_json) summary = 'Rodelbahnbeschreibung nach Konvertierung nach und von JSON.' self.put_current(text, summary=summary) def main(*args: str) -> None: local_args = pywikibot.handle_args(args) gen_factory = pagegenerators.GeneratorFactory() gen_factory.handle_args(local_args) gen = gen_factory.getCombinedGenerator(preload=True) if gen: bot = SledrunWikiTextToJsonBot(generator=gen) bot.run() else: pywikibot.bot.suggest_help(missing_generator=True) if __name__ == '__main__': main()