3 User script for pywikibot (https://gerrit.wikimedia.org/r/pywikibot/core.git), tested with version 6.6.1.
4 Put it in directory scripts/userscripts.
6 Create a sledrun JSON page from a sledrun wikitext page (including map).
8 The following generators and filters are supported:
15 from itertools import takewhile, dropwhile
16 from typing import Optional
19 import mwparserfromhell
20 from mwparserfromhell.nodes.extras import Parameter
23 from mwparserfromhell.nodes import Tag, Text, ExternalLink, Template, Wikilink, Heading
24 from mwparserfromhell.wikicode import Wikicode
25 from pywikibot import pagegenerators, Page
26 from pywikibot.bot import (
27 AutomaticTWSummaryBot,
33 from pywikibot.logging import warning
34 from pywikibot.site._namespace import BuiltinNamespace
35 from wrpylib.json_tools import order_json_keys
37 from wrpylib.wrmwmarkup import create_sledrun_wiki, lonlat_to_json, lonlat_ele_to_json, parse_wrmap
38 from wrpylib.wrvalidators import rodelbahnbox_from_template, tristate_german_to_str, difficulty_german_to_str, \
39 avalanches_german_to_str, public_transport_german_to_str, opt_lonlat_from_str, \
41 from wrpylib.lib_sledrun_wikitext_to_json import optional_set, get_sledrun_description
43 docuReplacements = {'¶ms;': pagegenerators.parameterHelp}
46 def template_to_json(value: Template) -> dict:
48 for p in value.params:
49 parameter.append({'value': str(p)})
51 'name': str(value.name),
52 'parameter': parameter
56 def wikilink_to_json(value: Wikilink) -> dict:
57 wl = {'title': str(value.title)}
58 if value.text is not None:
59 wl['text'] = str(value.text)
63 def external_link_to_json(value: ExternalLink) -> dict:
64 link = {'url': str(value.url)}
65 if value.title is not None:
66 link['text'] = str(value.title)
70 class SledrunWikiTextToJsonBot(
75 AutomaticTWSummaryBot,
77 def setup(self) -> None:
78 schema = Page(self.site, 'Winterrodeln:Datenschema/Rodelbahn/V1.json')
79 assert schema.content_model == 'json'
80 self.sledrun_schema = json.loads(schema.text)
82 def treat_page(self) -> None:
83 """Load the given page, do some changes, and save it."""
84 wikitext_content_model = 'wikitext'
85 if self.current_page.content_model != wikitext_content_model:
86 warning(f"The content model of {self.current_page.title()} is {self.current_page.content_model} "
87 f"instead of {wikitext_content_model}.")
90 wikicode = mwparserfromhell.parse(self.current_page.text)
91 wikilink_list = wikicode.filter_wikilinks()
92 category_sledrun = 'Kategorie:Rodelbahn'
93 if sum(1 for c in wikilink_list if c.title == category_sledrun) == 0:
94 warning(f'The page {self.current_page.title()} does not have category {category_sledrun}.')
97 sledrun_json_page = Page(self.site, self.current_page.title() + '/Rodelbahn.json')
99 map_json_page = Page(self.site, self.current_page.title() + '/Landkarte.json')
101 if sledrun_json_page.exists() and map_json_page.exists(): # should be an option
105 v = wikicode.filter_tags(matches='wrmap')
107 map_json = parse_wrmap(str(v[0]))
110 "name": self.current_page.title(),
112 "entry_under_construction": sum(1 for c in wikilink_list if c.title == 'Kategorie:In Arbeit') > 0,
115 optional_set(sledrun_json, 'description', get_sledrun_description(wikicode))
117 rbb_list = wikicode.filter_templates(recursive=False, matches=lambda t: t.name.strip() == 'Rodelbahnbox')
118 if len(rbb_list) == 1:
119 rbb = rodelbahnbox_from_template(rbb_list[0])
122 image_page = Page(self.site, v, ns=BuiltinNamespace.FILE)
123 if not image_page.exists():
124 warning(f"{image_page.title()} does not exist.")
125 sledrun_json['image'] = v
127 optional_set(sledrun_json, 'length', rbb['Länge'])
129 v = rbb['Schwierigkeit']
131 sledrun_json['difficulty'] = difficulty_german_to_str(v)
135 sledrun_json['avalanches'] = avalanches_german_to_str(v)
137 v, w = rbb['Betreiber']
138 optional_set(sledrun_json, 'has_operator', v)
139 optional_set(sledrun_json, 'operator', w)
141 optional_set(sledrun_json, 'walkup_possible', rbb['Aufstieg möglich'])
143 v, w = rbb['Aufstieg getrennt']
145 sledrun_json['walkup_separate'] = tristate_german_to_str(v)
146 optional_set(sledrun_json, 'walkup_note', w)
148 optional_set(sledrun_json, 'walkup_time', rbb['Gehzeit'])
150 def _walkup_support():
151 walkup_support_rbb = rbb['Aufstiegshilfe']
152 if walkup_support_rbb is not None:
154 for walkup_support_type, note in walkup_support_rbb:
155 walkup_support = {'type': walkup_support_type}
156 optional_set(walkup_support, 'note', note)
157 walkup_supports.append(walkup_support)
158 sledrun_json['walkup_supports'] = walkup_supports
161 v, w = rbb['Beleuchtungsanlage']
163 sledrun_json['nightlight_possible'] = tristate_german_to_str(v)
164 optional_set(sledrun_json, 'nightlight_possible_note', w)
166 v, w = rbb['Beleuchtungstage']
167 optional_set(sledrun_json, 'nightlight_weekdays_count', v)
168 optional_set(sledrun_json, 'nightlight_weekdays_note', w)
171 v = rbb['Rodelverleih']
173 sledrun_json['sled_rental_direct'] = v != []
177 name_code = mwparserfromhell.parse(name)
178 wiki_link = next(name_code.ifilter_wikilinks(), None)
179 if isinstance(wiki_link, Wikilink):
180 x['wr_page'] = wikilink_to_json(wiki_link)
183 optional_set(x, 'note', note)
185 sledrun_json['sled_rental'] = w
189 v = rbb['Gütesiegel']
191 sledrun_json['cachet'] = len(v) > 0
194 optional_set(sledrun_json, 'show_in_overview', rbb['In Übersichtskarte'])
195 optional_set(sledrun_json, 'forum_id', rbb['Forumid'])
199 sledrun_json['position'] = lonlat_to_json(v)
201 v = lonlat_ele_to_json(rbb['Position oben'], rbb['Höhe oben'])
203 sledrun_json['top'] = v
205 v = lonlat_ele_to_json(rbb['Position unten'], rbb['Höhe unten'])
207 sledrun_json['bottom'] = v
209 v = rbb['Telefonauskunft']
211 sledrun_json['info_phone'] = [{'phone': p, 'name': n} for p, n in v]
213 v, w = rbb['Webauskunft']
216 sledrun_json['info_web'] = [{'url': w}]
218 sledrun_json['info_web'] = []
220 v = rbb['Öffentliche Anreise']
222 sledrun_json['public_transport'] = public_transport_german_to_str(v)
225 bb_iter = wikicode.ifilter_templates(recursive=False, matches=lambda t: t.name.strip() == 'Buttonleiste')
226 bb = next(bb_iter, None)
228 video = bb.get('video', None)
229 if isinstance(video, Parameter) and video.value.strip() != "":
230 sledrun_json['videos'] = [{'url': str(video.value.strip())}]
231 correction = bb.get('Korrektur_To', None)
232 if isinstance(correction, Parameter) and correction.value.strip() != "":
233 sledrun_json['correction_email'] = correction.value.strip()
236 def _public_transport():
237 pt_sections = wikicode.get_sections(levels=[2], matches='Anreise mit öffentlichen Verkehrsmitteln',
238 include_headings=False)
239 if len(pt_sections) < 1:
242 node = next((node for node in pt.nodes if isinstance(node, Tag) and node.wiki_markup == '*'), None)
244 description = str(Wikicode(pt.nodes[:pt.nodes.index(node)])).strip()
245 if description and not description.startswith("Hier wird beschrieben werden, wie und wie gut man die "
246 "Rodelbahn mit öffentlichen Verkehrsmitteln erreicht."):
247 sledrun_json["public_transport_description"] = str(description)
249 public_transport_stops = []
250 public_transport_lines = []
251 public_transport_links = []
253 for node in pt.nodes:
254 if isinstance(node, Template):
255 if node.name == 'Haltestelle':
257 public_transport_stops.append(ya)
258 if len([1 for p in node.params if len(p.strip()) != 0]) == 0:
261 z = node.get(1, None)
263 ya['municipality'] = str(z)
264 z = node.get(2, None)
266 ya['name_local'] = str(z)
267 za = str(node.get(3, '')).strip()
268 zb = str(node.get(4, '')).strip()
269 z = lonlat_ele_to_json(opt_lonlat_from_str(za), opt_uint_from_str(zb))
272 elif node.name in ["Fahrplan Abfahrtsmonitor VVT", "Fahrplan Abfahrtsmonitor VVV"]:
273 ya['monitor_template'] = template_to_json(node)
274 elif node.name in ["Fahrplan Hinfahrt VVT", "Fahrplan Hinfahrt VVV"]:
275 ya['route_arrival_template'] = template_to_json(node)
276 elif node.name in ["Fahrplan Rückfahrt VVT", "Fahrplan Rückfahrt VVV"]:
277 ya['route_departure_template'] = template_to_json(node)
278 elif node.name in ["Fahrplan Linie VVT", "Fahrplan Linie VVV"]:
280 public_transport_stops.append(ya)
283 'timetable_template': template_to_json(node),
285 public_transport_lines.append(y)
286 elif isinstance(node, ExternalLink):
287 public_transport_links.append(external_link_to_json(node))
289 public_transport_stops.append(ya)
290 if len(public_transport_stops) > 0:
291 sledrun_json['public_transport_stops'] = public_transport_stops
292 if len(public_transport_lines) > 0:
293 sledrun_json['public_transport_lines'] = public_transport_lines
294 if len(public_transport_links) > 0:
295 sledrun_json['public_transport_links'] = public_transport_links
299 car_section_list = wikicode.get_sections(levels=[2], matches='Anreise mit dem Auto')
300 if not car_section_list:
302 v = car_section_list[0]
304 description_nodes = dropwhile(lambda w: isinstance(w, Heading), v.nodes)
305 description_nodes = takewhile(lambda w: not (isinstance(w, Tag) and w.wiki_markup == '*'),
307 if description := str(Wikicode(list(description_nodes))).strip():
308 if not description.startswith("Hier wollen wir Besonderheiten beschreiben, die es zu beachten gibt, "
309 "wenn man mit dem Auto zur Rodelbahn anreist."):
310 sledrun_json["car_description"] = description
313 for w in v.ifilter_templates(matches='Parkplatz'):
314 za = str(w.get(1, '')).strip()
315 zb = str(w.get(2, '')).strip()
316 z = lonlat_ele_to_json(opt_lonlat_from_str(za), opt_uint_from_str(zb))
318 x.append({'position': z})
320 sledrun_json['car_parking'] = x
323 for w in io.StringIO(str(v)):
324 match = re.match(r"\*\* von \'\'\'(.+)\'\'\'(.*): ([\d.,]+) km", w.rstrip())
326 ya, yb, yc = match.groups()
327 yc = float(yc.replace(',', '.'))
330 'route': (ya.strip() + ' ' + yb.strip()).strip(),
333 sledrun_json['car_distances'] = x
337 for v in wikicode.get_sections(levels=[2], matches='Allgemeines'):
338 def _nightlight(value: str) -> Optional[str]:
339 line_iter = io.StringIO(value)
340 line = next(line_iter, None)
341 while line is not None and not line.startswith("* '''Beleuchtung''':"):
342 line = next(line_iter, None)
345 line = line.replace("* '''Beleuchtung''':", "").strip()
349 optional_set(sledrun_json, 'nightlight_description', _nightlight(str(v)))
351 def _gastronomy(value: str):
353 line_iter = io.StringIO(value)
354 line = next(line_iter, None)
355 while line is not None and line.rstrip() != "* '''Hütten''':":
356 line = next(line_iter, None)
359 while line is not None:
360 line = next(line_iter, None)
362 if line.startswith('** '):
364 wiki = mwparserfromhell.parse(line)
365 wiki_link = next(wiki.ifilter_wikilinks(), None)
366 if isinstance(wiki_link, Wikilink):
367 g['wr_page'] = wikilink_to_json(wiki_link)
368 ext_link = next(wiki.ifilter_external_links(), None)
369 if isinstance(ext_link, ExternalLink):
370 g['weblink'] = external_link_to_json(ext_link)
371 remaining = str(Wikicode(n for n in wiki.nodes
372 if isinstance(n, (Text, Tag)) and str(n).strip() != '*')).strip()
373 match = re.match(r'(.*)\((.+)\)', remaining)
375 name, note = match.groups()
382 elif len(remaining) > 0 and remaining != '...':
383 g['name'] = remaining
384 if len(gastronomy) != 0:
390 w = _gastronomy(str(v))
392 sledrun_json['gastronomy'] = w
394 def _sled_rental_description():
395 line_iter = io.StringIO(str(v))
396 line = next(line_iter, None)
398 while line is not None and (match := re.match(r"\* '''Rodelverleih''':(.*)", line)) is None:
399 line = next(line_iter, None)
402 result = [match.group(1)]
403 line = next(line_iter, None)
404 while line is not None and re.match(r"\* ", line) is None:
406 line = next(line_iter, None)
407 description = ''.join(result).strip()
408 if len(description) > 0:
409 sledrun_json['sled_rental_description'] = description
410 _sled_rental_description()
415 if isinstance(w, Tag) and str(w) == "'''Siehe auch'''":
420 if isinstance(w, ExternalLink):
421 x.append(external_link_to_json(w))
422 elif isinstance(w, (Text, Tag)) and str(w).strip() in ['', '*', ':']:
428 sledrun_json['see_also'] = x
430 sledrun_json['allow_reports'] = True
432 def _tiroler_naturrodelbahn_guetesiegel():
433 for gst in wikicode.filter_templates():
434 if gst.name.strip() != 'Tiroler Naturrodelbahn Gütesiegel':
438 'Anlagename': 'name',
439 'Organisation': 'organization',
440 'Erstverleihung': 'first_issued',
441 'Verlängerung': 'valid_from',
443 'Thread': 'thread_id',
445 numeric = ['first_issued', 'valid_from', 'forum_id', 'thread_id']
446 for key, value in keys.items():
448 v = gst.get(key).value.strip()
454 sledrun_json['tiroler_naturrodelbahn_gütesiegel'] = gsj
455 _tiroler_naturrodelbahn_guetesiegel()
458 sledrun_impressions_page = Page(self.site, self.current_page.title() + '/Impressionen')
459 if sledrun_impressions_page.exists():
460 impressions = sledrun_impressions_page.title()
462 text = create_sledrun_wiki(sledrun_json, map_json, impressions)
463 pywikibot.output(text)
464 pywikibot.output('\03{lightpurple}---\03{default}')
465 pywikibot.showDiff(self.current_page.text, text)
467 jsonschema.validate(instance=sledrun_json, schema=self.sledrun_schema)
468 sledrun_json_ordered = order_json_keys(sledrun_json, self.sledrun_schema)
469 assert sledrun_json_ordered == sledrun_json
470 sledrun_json_text = json.dumps(sledrun_json_ordered, ensure_ascii=False, indent=4)
471 if not sledrun_json_page.exists():
472 summary = 'Rodelbahnbeschreibung konvertiert von Wikitext nach JSON.'
473 pywikibot.output('\03{lightpurple}---\03{default}')
474 pywikibot.output(sledrun_json_text)
475 pywikibot.output('\03{lightpurple}---\03{default}')
476 self.userPut(sledrun_json_page, sledrun_json_page.text, sledrun_json_text, summary=summary, contentmodel='json')
478 if map_json is not None and not map_json_page.exists():
479 map_json_text = json.dumps(map_json, ensure_ascii=False, indent=4)
480 summary = 'Landkarte konvertiert von Wikitext nach JSON.'
481 self.userPut(map_json_page, map_json_page.text, map_json_text, summary=summary, contentmodel='json')
484 def main(*args: str) -> None:
485 local_args = pywikibot.handle_args(args)
486 gen_factory = pagegenerators.GeneratorFactory()
487 gen_factory.handle_args(local_args)
488 gen = gen_factory.getCombinedGenerator(preload=True)
490 bot = SledrunWikiTextToJsonBot(generator=gen)
493 pywikibot.bot.suggest_help(missing_generator=True)
496 if __name__ == '__main__':