3 User script for pywikibot (https://gerrit.wikimedia.org/r/pywikibot/core.git), tested with version 6.6.1.
4 Put it in directory scripts/userscripts.
6 Create a sledrun JSON page from a sledrun wikitext page (including map).
8 The following generators and filters are supported:
15 from itertools import takewhile, dropwhile
16 from typing import Optional
19 import mwparserfromhell
20 from mwparserfromhell.nodes.extras import Parameter
23 from mwparserfromhell.nodes import Tag, Text, ExternalLink, Template, Wikilink, Heading
24 from mwparserfromhell.wikicode import Wikicode
25 from pywikibot import pagegenerators, Page
26 from pywikibot.bot import (
27 AutomaticTWSummaryBot,
33 from pywikibot.logging import warning
34 from pywikibot.site._namespace import BuiltinNamespace
35 from wrpylib.json_tools import order_json_keys
37 from wrpylib.wrmwmarkup import create_sledrun_wiki, lonlat_to_json, lonlat_ele_to_json, parse_wrmap
38 from wrpylib.wrvalidators import rodelbahnbox_from_template, tristate_german_to_str, difficulty_german_to_str, \
39 avalanches_german_to_str, public_transport_german_to_str, opt_lonlat_from_str, \
41 from wrpylib.lib_sledrun_wikitext_to_json import optional_set, get_sledrun_description, wikilink_to_json, \
42 template_to_json, external_link_to_json
44 docuReplacements = {'¶ms;': pagegenerators.parameterHelp}
47 class SledrunWikiTextToJsonBot(
52 AutomaticTWSummaryBot,
54 def setup(self) -> None:
55 schema = Page(self.site, 'Winterrodeln:Datenschema/Rodelbahn/V1.json')
56 assert schema.content_model == 'json'
57 self.sledrun_schema = json.loads(schema.text)
59 def treat_page(self) -> None:
60 """Load the given page, do some changes, and save it."""
61 wikitext_content_model = 'wikitext'
62 if self.current_page.content_model != wikitext_content_model:
63 warning(f"The content model of {self.current_page.title()} is {self.current_page.content_model} "
64 f"instead of {wikitext_content_model}.")
67 wikicode = mwparserfromhell.parse(self.current_page.text)
68 wikilink_list = wikicode.filter_wikilinks()
69 category_sledrun = 'Kategorie:Rodelbahn'
70 if sum(1 for c in wikilink_list if c.title == category_sledrun) == 0:
71 warning(f'The page {self.current_page.title()} does not have category {category_sledrun}.')
74 sledrun_json_page = Page(self.site, self.current_page.title() + '/Rodelbahn.json')
76 map_json_page = Page(self.site, self.current_page.title() + '/Landkarte.json')
78 if sledrun_json_page.exists() and map_json_page.exists(): # should be an option
82 v = wikicode.filter_tags(matches='wrmap')
84 map_json = parse_wrmap(str(v[0]))
87 "name": self.current_page.title(),
89 "entry_under_construction": sum(1 for c in wikilink_list if c.title == 'Kategorie:In Arbeit') > 0,
92 optional_set(sledrun_json, 'description', get_sledrun_description(wikicode))
94 rbb_list = wikicode.filter_templates(recursive=False, matches=lambda t: t.name.strip() == 'Rodelbahnbox')
95 if len(rbb_list) == 1:
96 rbb = rodelbahnbox_from_template(rbb_list[0])
99 image_page = Page(self.site, v, ns=BuiltinNamespace.FILE)
100 if not image_page.exists():
101 warning(f"{image_page.title()} does not exist.")
102 sledrun_json['image'] = v
104 optional_set(sledrun_json, 'length', rbb['Länge'])
106 v = rbb['Schwierigkeit']
108 sledrun_json['difficulty'] = difficulty_german_to_str(v)
112 sledrun_json['avalanches'] = avalanches_german_to_str(v)
114 v, w = rbb['Betreiber']
115 optional_set(sledrun_json, 'has_operator', v)
116 optional_set(sledrun_json, 'operator', w)
118 optional_set(sledrun_json, 'walkup_possible', rbb['Aufstieg möglich'])
120 v, w = rbb['Aufstieg getrennt']
122 sledrun_json['walkup_separate'] = tristate_german_to_str(v)
123 optional_set(sledrun_json, 'walkup_note', w)
125 optional_set(sledrun_json, 'walkup_time', rbb['Gehzeit'])
127 def _walkup_support():
128 walkup_support_rbb = rbb['Aufstiegshilfe']
129 if walkup_support_rbb is not None:
131 for walkup_support_type, note in walkup_support_rbb:
132 walkup_support = {'type': walkup_support_type}
133 optional_set(walkup_support, 'note', note)
134 walkup_supports.append(walkup_support)
135 sledrun_json['walkup_supports'] = walkup_supports
138 v, w = rbb['Beleuchtungsanlage']
140 sledrun_json['nightlight_possible'] = tristate_german_to_str(v)
141 optional_set(sledrun_json, 'nightlight_possible_note', w)
143 v, w = rbb['Beleuchtungstage']
144 optional_set(sledrun_json, 'nightlight_weekdays_count', v)
145 optional_set(sledrun_json, 'nightlight_weekdays_note', w)
148 v = rbb['Rodelverleih']
150 sledrun_json['sled_rental_direct'] = v != []
154 name_code = mwparserfromhell.parse(name)
155 wiki_link = next(name_code.ifilter_wikilinks(), None)
156 if isinstance(wiki_link, Wikilink):
157 x['wr_page'] = wikilink_to_json(wiki_link)
160 optional_set(x, 'note', note)
162 sledrun_json['sled_rental'] = w
166 v = rbb['Gütesiegel']
168 sledrun_json['cachet'] = len(v) > 0
171 optional_set(sledrun_json, 'show_in_overview', rbb['In Übersichtskarte'])
172 optional_set(sledrun_json, 'forum_id', rbb['Forumid'])
176 sledrun_json['position'] = lonlat_to_json(v)
178 v = lonlat_ele_to_json(rbb['Position oben'], rbb['Höhe oben'])
180 sledrun_json['top'] = v
182 v = lonlat_ele_to_json(rbb['Position unten'], rbb['Höhe unten'])
184 sledrun_json['bottom'] = v
186 v = rbb['Telefonauskunft']
188 sledrun_json['info_phone'] = [{'phone': p, 'name': n} for p, n in v]
190 v, w = rbb['Webauskunft']
193 sledrun_json['info_web'] = [{'url': w}]
195 sledrun_json['info_web'] = []
197 v = rbb['Öffentliche Anreise']
199 sledrun_json['public_transport'] = public_transport_german_to_str(v)
202 bb_iter = wikicode.ifilter_templates(recursive=False, matches=lambda t: t.name.strip() == 'Buttonleiste')
203 bb = next(bb_iter, None)
205 video = bb.get('video', None)
206 if isinstance(video, Parameter) and video.value.strip() != "":
207 sledrun_json['videos'] = [{'url': str(video.value.strip())}]
208 correction = bb.get('Korrektur_To', None)
209 if isinstance(correction, Parameter) and correction.value.strip() != "":
210 sledrun_json['correction_email'] = correction.value.strip()
213 def _public_transport():
214 pt_sections = wikicode.get_sections(levels=[2], matches='Anreise mit öffentlichen Verkehrsmitteln',
215 include_headings=False)
216 if len(pt_sections) < 1:
219 node = next((node for node in pt.nodes if isinstance(node, Tag) and node.wiki_markup == '*'), None)
221 description = str(Wikicode(pt.nodes[:pt.nodes.index(node)])).strip()
222 if description and not description.startswith("Hier wird beschrieben werden, wie und wie gut man die "
223 "Rodelbahn mit öffentlichen Verkehrsmitteln erreicht."):
224 sledrun_json["public_transport_description"] = str(description)
226 public_transport_stops = []
227 public_transport_lines = []
228 public_transport_links = []
230 for node in pt.nodes:
231 if isinstance(node, Template):
232 if node.name == 'Haltestelle':
234 public_transport_stops.append(ya)
235 if len([1 for p in node.params if len(p.strip()) != 0]) == 0:
238 z = node.get(1, None)
240 ya['municipality'] = str(z)
241 z = node.get(2, None)
243 ya['name_local'] = str(z)
244 za = str(node.get(3, '')).strip()
245 zb = str(node.get(4, '')).strip()
246 z = lonlat_ele_to_json(opt_lonlat_from_str(za), opt_uint_from_str(zb))
249 elif node.name in ["Fahrplan Abfahrtsmonitor VVT", "Fahrplan Abfahrtsmonitor VVV"]:
250 ya['monitor_template'] = template_to_json(node)
251 elif node.name in ["Fahrplan Hinfahrt VVT", "Fahrplan Hinfahrt VVV"]:
252 ya['route_arrival_template'] = template_to_json(node)
253 elif node.name in ["Fahrplan Rückfahrt VVT", "Fahrplan Rückfahrt VVV"]:
254 ya['route_departure_template'] = template_to_json(node)
255 elif node.name in ["Fahrplan Linie VVT", "Fahrplan Linie VVV"]:
257 public_transport_stops.append(ya)
260 'timetable_template': template_to_json(node),
262 public_transport_lines.append(y)
263 elif isinstance(node, ExternalLink):
264 public_transport_links.append(external_link_to_json(node))
266 public_transport_stops.append(ya)
267 if len(public_transport_stops) > 0:
268 sledrun_json['public_transport_stops'] = public_transport_stops
269 if len(public_transport_lines) > 0:
270 sledrun_json['public_transport_lines'] = public_transport_lines
271 if len(public_transport_links) > 0:
272 sledrun_json['public_transport_links'] = public_transport_links
276 car_section_list = wikicode.get_sections(levels=[2], matches='Anreise mit dem Auto')
277 if not car_section_list:
279 v = car_section_list[0]
281 description_nodes = dropwhile(lambda w: isinstance(w, Heading), v.nodes)
282 description_nodes = takewhile(lambda w: not (isinstance(w, Tag) and w.wiki_markup == '*'),
284 if description := str(Wikicode(list(description_nodes))).strip():
285 if not description.startswith("Hier wollen wir Besonderheiten beschreiben, die es zu beachten gibt, "
286 "wenn man mit dem Auto zur Rodelbahn anreist."):
287 sledrun_json["car_description"] = description
290 for w in v.ifilter_templates(matches='Parkplatz'):
291 za = str(w.get(1, '')).strip()
292 zb = str(w.get(2, '')).strip()
293 z = lonlat_ele_to_json(opt_lonlat_from_str(za), opt_uint_from_str(zb))
295 x.append({'position': z})
297 sledrun_json['car_parking'] = x
300 for w in io.StringIO(str(v)):
301 match = re.match(r"\*\* von \'\'\'(.+)\'\'\'(.*): ([\d.,]+) km", w.rstrip())
303 ya, yb, yc = match.groups()
304 yc = float(yc.replace(',', '.'))
307 'route': (ya.strip() + ' ' + yb.strip()).strip(),
310 sledrun_json['car_distances'] = x
314 for v in wikicode.get_sections(levels=[2], matches='Allgemeines'):
315 def _nightlight(value: str) -> Optional[str]:
316 line_iter = io.StringIO(value)
317 line = next(line_iter, None)
318 while line is not None and not line.startswith("* '''Beleuchtung''':"):
319 line = next(line_iter, None)
322 line = line.replace("* '''Beleuchtung''':", "").strip()
326 optional_set(sledrun_json, 'nightlight_description', _nightlight(str(v)))
328 def _gastronomy(value: str):
330 line_iter = io.StringIO(value)
331 line = next(line_iter, None)
332 while line is not None and line.rstrip() != "* '''Hütten''':":
333 line = next(line_iter, None)
336 while line is not None:
337 line = next(line_iter, None)
339 if line.startswith('** '):
341 wiki = mwparserfromhell.parse(line)
342 wiki_link = next(wiki.ifilter_wikilinks(), None)
343 if isinstance(wiki_link, Wikilink):
344 g['wr_page'] = wikilink_to_json(wiki_link)
345 ext_link = next(wiki.ifilter_external_links(), None)
346 if isinstance(ext_link, ExternalLink):
347 g['weblink'] = external_link_to_json(ext_link)
348 remaining = str(Wikicode(n for n in wiki.nodes
349 if isinstance(n, (Text, Tag)) and str(n).strip() != '*')).strip()
350 match = re.match(r'(.*)\((.+)\)', remaining)
352 name, note = match.groups()
359 elif len(remaining) > 0 and remaining != '...':
360 g['name'] = remaining
367 w = _gastronomy(str(v))
369 sledrun_json['gastronomy'] = w
371 def _sled_rental_description():
372 line_iter = io.StringIO(str(v))
373 line = next(line_iter, None)
375 while line is not None and (match := re.match(r"\* '''Rodelverleih''':(.*)", line)) is None:
376 line = next(line_iter, None)
379 result = [match.group(1)]
380 line = next(line_iter, None)
381 while line is not None and re.match(r"\* ", line) is None:
383 line = next(line_iter, None)
384 description = ''.join(result).strip()
385 if len(description) > 0:
386 sledrun_json['sled_rental_description'] = description
387 _sled_rental_description()
392 if isinstance(w, Tag) and str(w) == "'''Siehe auch'''":
397 if isinstance(w, ExternalLink):
398 x.append(external_link_to_json(w))
399 elif isinstance(w, (Text, Tag)) and str(w).strip() in ['', '*', ':']:
405 sledrun_json['see_also'] = x
407 sledrun_json['allow_reports'] = True
409 def _tiroler_naturrodelbahn_guetesiegel():
410 for gst in wikicode.filter_templates():
411 if gst.name.strip() != 'Tiroler Naturrodelbahn Gütesiegel':
415 'Anlagename': 'name',
416 'Organisation': 'organization',
417 'Erstverleihung': 'first_issued',
418 'Verlängerung': 'valid_from',
420 'Thread': 'thread_id',
422 numeric = ['first_issued', 'valid_from', 'forum_id', 'thread_id']
423 for key, value in keys.items():
425 v = gst.get(key).value.strip()
431 sledrun_json['tiroler_naturrodelbahn_gütesiegel'] = gsj
432 _tiroler_naturrodelbahn_guetesiegel()
435 sledrun_impressions_page = Page(self.site, self.current_page.title() + '/Impressionen')
436 if sledrun_impressions_page.exists():
437 impressions = sledrun_impressions_page.title()
439 text = create_sledrun_wiki(sledrun_json, map_json, impressions)
440 pywikibot.output(text)
441 pywikibot.output('\03{lightpurple}---\03{default}')
442 pywikibot.showDiff(self.current_page.text, text)
444 jsonschema.validate(instance=sledrun_json, schema=self.sledrun_schema)
445 sledrun_json_ordered = order_json_keys(sledrun_json, self.sledrun_schema)
446 assert sledrun_json_ordered == sledrun_json
447 sledrun_json_text = json.dumps(sledrun_json_ordered, ensure_ascii=False, indent=4)
448 if not sledrun_json_page.exists():
449 summary = 'Rodelbahnbeschreibung konvertiert von Wikitext nach JSON.'
450 pywikibot.output('\03{lightpurple}---\03{default}')
451 pywikibot.output(sledrun_json_text)
452 pywikibot.output('\03{lightpurple}---\03{default}')
453 self.userPut(sledrun_json_page, sledrun_json_page.text, sledrun_json_text, summary=summary, contentmodel='json')
455 if map_json is not None and not map_json_page.exists():
456 map_json_text = json.dumps(map_json, ensure_ascii=False, indent=4)
457 summary = 'Landkarte konvertiert von Wikitext nach JSON.'
458 self.userPut(map_json_page, map_json_page.text, map_json_text, summary=summary, contentmodel='json')
461 def main(*args: str) -> None:
462 local_args = pywikibot.handle_args(args)
463 gen_factory = pagegenerators.GeneratorFactory()
464 gen_factory.handle_args(local_args)
465 gen = gen_factory.getCombinedGenerator(preload=True)
467 bot = SledrunWikiTextToJsonBot(generator=gen)
470 pywikibot.bot.suggest_help(missing_generator=True)
473 if __name__ == '__main__':