3 User script for pywikibot (https://gerrit.wikimedia.org/r/pywikibot/core.git), tested with version 6.6.1.
4 Put it in directory scripts/userscripts.
6 Create a sledrun JSON page from a sledrun wikitext page (including map).
8 The following generators and filters are supported:
15 from itertools import takewhile, dropwhile
16 from typing import Optional
19 import mwparserfromhell
20 from mwparserfromhell.nodes.extras import Parameter
23 from mwparserfromhell.nodes import Tag, Text, ExternalLink, Template, Wikilink, Heading
24 from mwparserfromhell.wikicode import Wikicode
25 from pywikibot import pagegenerators, Page
26 from pywikibot.bot import (
27 AutomaticTWSummaryBot,
33 from pywikibot.logging import warning
34 from pywikibot.site._namespace import BuiltinNamespace
35 from wrpylib.json_tools import order_json_keys
37 from wrpylib.wrmwmarkup import create_sledrun_wiki, lonlat_to_json, lonlat_ele_to_json, parse_wrmap
38 from wrpylib.wrvalidators import rodelbahnbox_from_template, tristate_german_to_str, difficulty_german_to_str, \
39 avalanches_german_to_str, public_transport_german_to_str, opt_lonlat_from_str, \
41 from wrpylib.lib_sledrun_wikitext_to_json import optional_set, get_sledrun_description
43 docuReplacements = {'¶ms;': pagegenerators.parameterHelp}
46 def template_to_json(value: Template) -> dict:
48 for p in value.params:
49 parameter.append({'value': str(p)})
51 'name': str(value.name),
52 'parameter': parameter
56 def wikilink_to_json(value: Wikilink) -> dict:
57 wl = {'title': str(value.title)}
58 if value.text is not None:
59 wl['text'] = str(value.text)
63 def external_link_to_json(value: ExternalLink) -> dict:
64 link = {'url': str(value.url)}
65 if value.title is not None:
66 link['text'] = str(value.title)
70 class SledrunWikiTextToJsonBot(
75 AutomaticTWSummaryBot,
77 def setup(self) -> None:
78 schema = Page(self.site, 'Winterrodeln:Datenschema/Rodelbahn/V1.json')
79 assert schema.content_model == 'json'
80 self.sledrun_schema = json.loads(schema.text)
82 def treat_page(self) -> None:
83 """Load the given page, do some changes, and save it."""
84 wikitext_content_model = 'wikitext'
85 if self.current_page.content_model != wikitext_content_model:
86 warning(f"The content model of {self.current_page.title()} is {self.current_page.content_model} "
87 f"instead of {wikitext_content_model}.")
90 wikicode = mwparserfromhell.parse(self.current_page.text)
91 wikilink_list = wikicode.filter_wikilinks()
92 category_sledrun = 'Kategorie:Rodelbahn'
93 if sum(1 for c in wikilink_list if c.title == category_sledrun) == 0:
94 warning(f'The page {self.current_page.title()} does not have category {category_sledrun}.')
97 sledrun_json_page = Page(self.site, self.current_page.title() + '/Rodelbahn.json')
99 map_json_page = Page(self.site, self.current_page.title() + '/Landkarte.json')
102 v = wikicode.filter_tags(matches='wrmap')
104 map_json = parse_wrmap(str(v[0]))
107 "name": self.current_page.title(),
109 "entry_under_construction": sum(1 for c in wikilink_list if c.text == 'Kategorie:In Arbeit') > 0,
112 optional_set(sledrun_json, 'description', get_sledrun_description(wikicode))
114 rbb_list = wikicode.filter_templates(recursive=False, matches=lambda t: t.name.strip() == 'Rodelbahnbox')
115 if len(rbb_list) == 1:
116 rbb = rodelbahnbox_from_template(rbb_list[0])
119 image_page = Page(self.site, v, ns=BuiltinNamespace.FILE)
120 if not image_page.exists():
121 warning(f"{image_page.title()} does not exist.")
122 sledrun_json['image'] = v
124 optional_set(sledrun_json, 'length', rbb['Länge'])
126 v = rbb['Schwierigkeit']
128 sledrun_json['difficulty'] = difficulty_german_to_str(v)
132 sledrun_json['avalanches'] = avalanches_german_to_str(v)
134 v, w = rbb['Betreiber']
135 optional_set(sledrun_json, 'has_operator', v)
136 optional_set(sledrun_json, 'operator', w)
138 optional_set(sledrun_json, 'walkup_possible', rbb['Aufstieg möglich'])
140 v, w = rbb['Aufstieg getrennt']
142 sledrun_json['walkup_separate'] = tristate_german_to_str(v)
143 optional_set(sledrun_json, 'walkup_comment', w)
145 optional_set(sledrun_json, 'walkup_time', rbb['Gehzeit'])
147 def _walkup_support():
148 walkup_support_rbb = rbb['Aufstiegshilfe']
149 if walkup_support_rbb is not None:
151 for walkup_support_type, comment in walkup_support_rbb:
152 walkup_support = {'type': walkup_support_type}
153 optional_set(walkup_support, 'comment', comment)
154 walkup_supports.append(walkup_support)
155 sledrun_json['walkup_supports'] = walkup_supports
158 v, w = rbb['Beleuchtungsanlage']
160 sledrun_json['nightlight_possible'] = tristate_german_to_str(v)
161 optional_set(sledrun_json, 'nightlight_possible_comment', w)
163 v, w = rbb['Beleuchtungstage']
164 optional_set(sledrun_json, 'nightlight_weekdays_count', v)
165 optional_set(sledrun_json, 'nightlight_weekdays_comment', w)
168 v = rbb['Rodelverleih']
170 sledrun_json['sled_rental_direct'] = v != []
172 for name, comment in v:
174 name_code = mwparserfromhell.parse(name)
175 wiki_link = next(name_code.ifilter_wikilinks(), None)
176 if isinstance(wiki_link, Wikilink):
177 x['wr_page'] = wikilink_to_json(wiki_link)
180 optional_set(x, 'comment', comment)
182 sledrun_json['sled_rental'] = w
186 v = rbb['Gütesiegel']
188 sledrun_json['cachet'] = len(v) > 0
191 optional_set(sledrun_json, 'show_in_overview', rbb['In Übersichtskarte'])
192 optional_set(sledrun_json, 'forum_id', rbb['Forumid'])
196 sledrun_json['position'] = lonlat_to_json(v)
198 v = lonlat_ele_to_json(rbb['Position oben'], rbb['Höhe oben'])
200 sledrun_json['top'] = v
202 v = lonlat_ele_to_json(rbb['Position unten'], rbb['Höhe unten'])
204 sledrun_json['bottom'] = v
206 v = rbb['Telefonauskunft']
208 sledrun_json['info_phone'] = [{'phone': p, 'name': n} for p, n in v]
210 v, w = rbb['Webauskunft']
213 sledrun_json['info_web'] = [{'url': w}]
215 sledrun_json['info_web'] = []
217 v = rbb['Öffentliche Anreise']
219 sledrun_json['public_transport'] = public_transport_german_to_str(v)
222 bb_iter = wikicode.ifilter_templates(recursive=False, matches=lambda t: t.name.strip() == 'Buttonleiste')
223 bb = next(bb_iter, None)
225 video = bb.get('video', None)
226 if isinstance(video, Parameter):
227 sledrun_json['videos'] = [{'url': video.value}]
230 def _public_transport():
231 pt_sections = wikicode.get_sections(levels=[2], matches='Anreise mit öffentlichen Verkehrsmitteln',
232 include_headings=False)
233 if len(pt_sections) < 1:
236 node = next((node for node in pt.nodes if isinstance(node, Tag) and node.wiki_markup == '*'), None)
238 description = str(Wikicode(pt.nodes[:pt.nodes.index(node)])).strip()
240 sledrun_json["public_transport_description"] = str(description)
242 public_transport_stops = []
243 public_transport_lines = []
244 public_transport_links = []
246 for node in pt.nodes:
247 if isinstance(node, Template):
248 if node.name == 'Haltestelle':
250 public_transport_stops.append(ya)
252 z = node.get(1, None)
254 ya['municipality'] = str(z)
255 z = node.get(2, None)
257 ya['name_local'] = str(z)
258 za = str(node.get(3, '')).strip()
259 zb = str(node.get(4, '')).strip()
260 z = lonlat_ele_to_json(opt_lonlat_from_str(za), opt_uint_from_str(zb))
263 elif node.name in ["Fahrplan Abfahrtsmonitor VVT"]:
264 ya['monitor_template'] = template_to_json(node)
265 elif node.name in ["Fahrplan Hinfahrt VVT"]:
266 ya['route_arrival_template'] = template_to_json(node)
267 elif node.name in ["Fahrplan Rückfahrt VVT"]:
268 ya['route_departure_template'] = template_to_json(node)
269 elif node.name in ["Fahrplan Linie VVT"]:
271 public_transport_stops.append(ya)
274 'timetable_template': template_to_json(node),
276 public_transport_lines.append(y)
277 elif isinstance(node, ExternalLink):
278 public_transport_links.append(external_link_to_json(node))
280 public_transport_stops.append(ya)
281 if len(public_transport_stops) > 0:
282 sledrun_json['public_transport_stops'] = public_transport_stops
283 if len(public_transport_lines) > 0:
284 sledrun_json['public_transport_lines'] = public_transport_lines
285 if len(public_transport_links) > 0:
286 sledrun_json['public_transport_links'] = public_transport_links
290 car_section_list = wikicode.get_sections(levels=[2], matches='Anreise mit dem Auto')
291 if not car_section_list:
293 v = car_section_list[0]
295 description_nodes = dropwhile(lambda w: isinstance(w, Heading), v.nodes)
296 description_nodes = takewhile(lambda w: not (isinstance(w, Tag) and w.wiki_markup == '*'),
298 if description := str(Wikicode(list(description_nodes))).strip():
299 sledrun_json["car_description"] = description
302 for w in v.ifilter_templates(matches='Parkplatz'):
303 za = str(w.get(1, '')).strip()
304 zb = str(w.get(2, '')).strip()
305 z = lonlat_ele_to_json(opt_lonlat_from_str(za), opt_uint_from_str(zb))
307 x.append({'position': z})
309 sledrun_json['car_parking'] = x
312 for w in io.StringIO(str(v)):
313 match = re.match(r"\*\* von \'\'\'(.+)\'\'\'(.*): ([\d.,]+) km", w.rstrip())
315 ya, yb, yc = match.groups()
316 yc = float(yc.replace(',', '.'))
319 'route': (ya.strip() + ' ' + yb.strip()).strip(),
322 sledrun_json['car_distances'] = x
326 for v in wikicode.get_sections(levels=[2], matches='Allgemeines'):
327 def _nightlight(value: str) -> Optional[str]:
328 line_iter = io.StringIO(value)
329 line = next(line_iter, None)
330 while line is not None and not line.startswith("* '''Beleuchtung''':"):
331 line = next(line_iter, None)
334 line = line.replace("* '''Beleuchtung''':", "").strip()
338 optional_set(sledrun_json, 'nightlight_description', _nightlight(str(v)))
340 def _gastronomy(value: str):
342 line_iter = io.StringIO(value)
343 line = next(line_iter, None)
344 while line is not None and line.rstrip() != "* '''Hütten''':":
345 line = next(line_iter, None)
348 while line is not None:
349 line = next(line_iter, None)
351 if line.startswith('** '):
353 wiki = mwparserfromhell.parse(line)
354 wiki_link = next(wiki.ifilter_wikilinks(), None)
355 if isinstance(wiki_link, Wikilink):
356 g['wr_page'] = wikilink_to_json(wiki_link)
357 ext_link = next(wiki.ifilter_external_links(), None)
358 if isinstance(ext_link, ExternalLink):
359 g['weblink'] = external_link_to_json(ext_link)
360 remaining = str(Wikicode(n for n in wiki.nodes
361 if isinstance(n, (Text, Tag)) and str(n).strip() != '*')).strip()
362 match = re.match(r'\((.+)\)', remaining)
364 remaining = match.group(1)
365 if len(remaining) > 0:
366 g['note'] = remaining
372 w = _gastronomy(str(v))
374 sledrun_json['gastronomy'] = w
376 def _sled_rental_description():
377 line_iter = io.StringIO(str(v))
378 line = next(line_iter, None)
380 while line is not None and (match := re.match(r"\* '''Rodelverleih''':(.*)", line)) is None:
381 line = next(line_iter, None)
384 result = [match.group(1)]
385 line = next(line_iter, None)
386 while line is not None and re.match(r"\* ", line) is None:
388 line = next(line_iter, None)
389 sledrun_json['sled_rental_description'] = ''.join(result).strip()
390 _sled_rental_description()
395 if isinstance(w, Tag) and str(w) == "'''Siehe auch'''":
400 if isinstance(w, ExternalLink):
401 x.append(external_link_to_json(w))
402 elif isinstance(w, (Text, Tag)) and str(w).strip() in ['', '*', ':']:
408 sledrun_json['see_also'] = x
410 sledrun_json['allow_reports'] = True
413 sledrun_impressions_page = Page(self.site, self.current_page.title() + '/Impressionen')
414 if sledrun_impressions_page.exists():
415 impressions = sledrun_impressions_page.title()
417 text = create_sledrun_wiki(sledrun_json, map_json, impressions)
418 pywikibot.output(text)
419 pywikibot.output('\03{lightpurple}---\03{default}')
420 pywikibot.showDiff(self.current_page.text, text)
422 jsonschema.validate(instance=sledrun_json, schema=self.sledrun_schema)
423 sledrun_json_ordered = order_json_keys(sledrun_json, self.sledrun_schema)
424 assert sledrun_json_ordered == sledrun_json
425 sledrun_json_text = json.dumps(sledrun_json_ordered, ensure_ascii=False, indent=4)
426 summary = 'Rodelbahnbeschreibung konvertiert von Wikitext nach JSON.'
427 pywikibot.output('\03{lightpurple}---\03{default}')
428 pywikibot.output(sledrun_json_text)
429 pywikibot.output('\03{lightpurple}---\03{default}')
430 self.userPut(sledrun_json_page, sledrun_json_page.text, sledrun_json_text, summary=summary)
432 map_json_text = json.dumps(map_json, ensure_ascii=False, indent=4)
433 summary = 'Landkarte konvertiert von Wikitext nach JSON.'
434 self.userPut(map_json_page, map_json_page.text, map_json_text, summary=summary)
437 def main(*args: str) -> None:
438 local_args = pywikibot.handle_args(args)
439 gen_factory = pagegenerators.GeneratorFactory()
440 gen_factory.handle_args(local_args)
441 gen = gen_factory.getCombinedGenerator(preload=True)
443 bot = SledrunWikiTextToJsonBot(generator=gen)
446 pywikibot.bot.suggest_help(missing_generator=True)
449 if __name__ == '__main__':