3 User script for pywikibot (https://gerrit.wikimedia.org/r/pywikibot/core.git), tested with version 6.6.1.
4 Put it in directory scripts/userscripts.
6 Update a sledrun JSON page from a detail in a sledrun wikitext page.
8 The following generators and filters are supported:
16 from itertools import takewhile, dropwhile
17 from typing import Optional, List, Dict, Iterable
20 import mwparserfromhell
21 from mwparserfromhell.nodes.extras import Parameter
24 from mwparserfromhell.nodes import Tag, Text, ExternalLink, Template, Wikilink, Heading
25 from mwparserfromhell.wikicode import Wikicode
26 from pywikibot import pagegenerators, Page
27 from pywikibot.bot import (
28 AutomaticTWSummaryBot,
34 from pywikibot.logging import warning
35 from pywikibot.site._namespace import BuiltinNamespace
36 from wrpylib.json_tools import order_json_keys
38 from wrpylib.wrmwmarkup import create_sledrun_wiki, lonlat_to_json, lonlat_ele_to_json, parse_wrmap
39 from wrpylib.wrvalidators import rodelbahnbox_from_template, tristate_german_to_str, difficulty_german_to_str, \
40 avalanches_german_to_str, public_transport_german_to_str, opt_lonlat_from_str, \
42 from wrpylib.lib_sledrun_wikitext_to_json import optional_set, get_sledrun_description, wikilink_to_json, \
45 docuReplacements = {'¶ms;': pagegenerators.parameterHelp}
48 class UpdateSledrunJsonFromWikiText(
53 AutomaticTWSummaryBot,
55 def setup(self) -> None:
56 schema = Page(self.site, 'Winterrodeln:Datenschema/Rodelbahn/V1.json')
57 assert schema.content_model == 'json'
58 self.sledrun_schema = json.loads(schema.text)
60 def treat_page(self) -> None:
61 """Load the given page, do some changes, and save it."""
62 wikitext_content_model = 'wikitext'
63 if self.current_page.content_model != wikitext_content_model:
64 warning(f"The content model of {self.current_page.title()} is {self.current_page.content_model} "
65 f"instead of {wikitext_content_model}.")
68 wikicode = mwparserfromhell.parse(self.current_page.text)
70 sledrun_json_page = Page(self.site, self.current_page.title() + '/Rodelbahn.json')
71 if not sledrun_json_page.exists():
73 sledrun_json = json.loads(sledrun_json_page.text)
74 sledrun_json_orig = json.loads(sledrun_json_page.text)
75 sledrun_json_orig_text = json.dumps(sledrun_json_orig, ensure_ascii=False, indent=4)
77 def _strip_brackets(text: str) -> str:
78 """Removes brackets if they are present"""
79 match = re.match(r'\((.+)\)', text)
84 def _parse_weblink(line: str) -> Optional[Dict]:
85 wikicode = mwparserfromhell.parse(line)
86 nodes = dropwhile(lambda node: not isinstance(node, ExternalLink), wikicode.nodes)
87 link = next(nodes, None)
90 remaining = _strip_brackets(str(Wikicode(list(nodes))).strip()).strip()
96 title = str(link.title)
98 title = f'{title} ({remaining})'
99 weblink = {'url': str(link.url)}
100 if title is not None:
101 weblink['text'] = title
104 def _see_also(wikicode: Wikicode) -> Iterable[Dict]:
105 wikicode_common = next(iter(wikicode.get_sections(levels=[2], matches='Allgemeines')), None)
106 wikitext_common = str(wikicode_common)
107 lines = wikitext_common.split('\n')
108 lines = dropwhile(lambda line: "'''Siehe auch'''" not in line, lines)
109 lines = itertools.islice(lines, 1, None) # omit "Siehe auch" line
110 lines = takewhile(lambda line: line.startswith('**'), lines)
112 weblink = _parse_weblink(line)
113 if weblink is not None:
116 see_also_list = list(_see_also(wikicode))
117 if len(see_also_list) > 0:
118 sledrun_json['see_also'] = see_also_list
120 jsonschema.validate(instance=sledrun_json, schema=self.sledrun_schema)
121 sledrun_json_ordered = order_json_keys(sledrun_json, self.sledrun_schema)
122 assert sledrun_json_ordered == sledrun_json
123 if sledrun_json == sledrun_json_orig:
125 sledrun_json_text = json.dumps(sledrun_json_ordered, ensure_ascii=False, indent=4)
126 summary = 'Information zu "Anderen Rodelbahnen" im Rodelbahn JSON aktualisiert vom Wikitext.'
127 self.userPut(sledrun_json_page, sledrun_json_orig_text, sledrun_json_text, summary=summary, contentmodel='json')
130 def main(*args: str) -> None:
131 local_args = pywikibot.handle_args(args)
132 gen_factory = pagegenerators.GeneratorFactory()
133 gen_factory.handle_args(local_args)
134 gen = gen_factory.getCombinedGenerator(preload=True)
136 bot = UpdateSledrunJsonFromWikiText(generator=gen)
139 pywikibot.bot.suggest_help(missing_generator=True)
142 if __name__ == '__main__':