#!/usr/bin/python """ User script for pywikibot (https://gerrit.wikimedia.org/r/pywikibot/core.git), tested with version 6.6.1. Put it in directory scripts/userscripts. Update a sledrun JSON page from a detail in a sledrun wikitext page. The following generators and filters are supported: ¶ms; """ import io import json import re from itertools import takewhile, dropwhile from typing import Optional import jsonschema import mwparserfromhell from mwparserfromhell.nodes.extras import Parameter import pywikibot from mwparserfromhell.nodes import Tag, Text, ExternalLink, Template, Wikilink, Heading from mwparserfromhell.wikicode import Wikicode from pywikibot import pagegenerators, Page from pywikibot.bot import ( AutomaticTWSummaryBot, ConfigParserBot, ExistingPageBot, NoRedirectPageBot, SingleSiteBot, ) from pywikibot.logging import warning from pywikibot.site._namespace import BuiltinNamespace from wrpylib.json_tools import order_json_keys from wrpylib.wrmwmarkup import create_sledrun_wiki, lonlat_to_json, lonlat_ele_to_json, parse_wrmap from wrpylib.wrvalidators import rodelbahnbox_from_template, tristate_german_to_str, difficulty_german_to_str, \ avalanches_german_to_str, public_transport_german_to_str, opt_lonlat_from_str, \ opt_uint_from_str from wrpylib.lib_sledrun_wikitext_to_json import optional_set, get_sledrun_description, wikilink_to_json, \ external_link_to_json docuReplacements = {'¶ms;': pagegenerators.parameterHelp} class UpdateSledrunJsonFromWikiText( SingleSiteBot, ConfigParserBot, ExistingPageBot, NoRedirectPageBot, AutomaticTWSummaryBot, ): def setup(self) -> None: schema = Page(self.site, 'Winterrodeln:Datenschema/Rodelbahn/V1.json') assert schema.content_model == 'json' self.sledrun_schema = json.loads(schema.text) def treat_page(self) -> None: """Load the given page, do some changes, and save it.""" wikitext_content_model = 'wikitext' if self.current_page.content_model != wikitext_content_model: warning(f"The content model of {self.current_page.title()} is {self.current_page.content_model} " f"instead of {wikitext_content_model}.") return wikicode = mwparserfromhell.parse(self.current_page.text) sledrun_json_page = Page(self.site, self.current_page.title() + '/Rodelbahn.json') if not sledrun_json_page.exists(): return sledrun_json = json.loads(sledrun_json_page.text) sledrun_json_orig = json.loads(sledrun_json_page.text) sledrun_json_orig_text = json.dumps(sledrun_json_orig, ensure_ascii=False, indent=4) for v in wikicode.get_sections(levels=[2], matches='Allgemeines'): def _gastronomy(value: str): gastronomy = [] line_iter = io.StringIO(value) line = next(line_iter, None) while line is not None and line.rstrip() != "* '''Hütten''':": line = next(line_iter, None) if line is None: return gastronomy while line is not None: line = next(line_iter, None) if line is not None: if line.startswith('** '): g = {} wiki = mwparserfromhell.parse(line) wiki_link = next(wiki.ifilter_wikilinks(), None) if isinstance(wiki_link, Wikilink): g['wr_page'] = wikilink_to_json(wiki_link) ext_link = next(wiki.ifilter_external_links(), None) if isinstance(ext_link, ExternalLink): g['weblink'] = external_link_to_json(ext_link) remaining = str(Wikicode(n for n in wiki.nodes if isinstance(n, (Text, Tag)) and str(n).strip() != '*')).strip() match = re.match(r'(.*)\((.+)\)', remaining) if match: name, note = match.groups() name = name.strip() note = note.strip() if len(name) > 0: g['name'] = name if len(note) > 0: g['note'] = note elif len(remaining) > 0 and remaining != '...': g['name'] = remaining if len(g) != 0: gastronomy.append(g) else: break return gastronomy w = _gastronomy(str(v)) if len(w) > 0: sledrun_json['gastronomy'] = w jsonschema.validate(instance=sledrun_json, schema=self.sledrun_schema) sledrun_json_ordered = order_json_keys(sledrun_json, self.sledrun_schema) assert sledrun_json_ordered == sledrun_json if sledrun_json == sledrun_json_orig: return sledrun_json_text = json.dumps(sledrun_json_ordered, ensure_ascii=False, indent=4) summary = 'Gastronomie Information im Rodelbahn JSON aktualisiert vom Wikitext.' self.userPut(sledrun_json_page, sledrun_json_orig_text, sledrun_json_text, summary=summary, contentmodel='json') def main(*args: str) -> None: local_args = pywikibot.handle_args(args) gen_factory = pagegenerators.GeneratorFactory() gen_factory.handle_args(local_args) gen = gen_factory.getCombinedGenerator(preload=True) if gen: bot = UpdateSledrunJsonFromWikiText(generator=gen) bot.run() else: pywikibot.bot.suggest_help(missing_generator=True) if __name__ == '__main__': main()