3 User script for pywikibot (https://gerrit.wikimedia.org/r/pywikibot/core.git), tested with version 6.6.1.
4 Put it in directory scripts/userscripts.
6 Create a sledrun JSON page from a sledrun wikitext page (including map).
8 The following generators and filters are supported:
15 from itertools import takewhile, dropwhile
16 from typing import Optional
19 import mwparserfromhell
20 from mwparserfromhell.nodes.extras import Parameter
23 from mwparserfromhell.nodes import Tag, Text, ExternalLink, Template, Wikilink, Heading
24 from mwparserfromhell.wikicode import Wikicode
25 from pywikibot import pagegenerators, Page
26 from pywikibot.bot import (
27 AutomaticTWSummaryBot,
33 from pywikibot.logging import warning
34 from pywikibot.site._namespace import BuiltinNamespace
35 from wrpylib.json_tools import order_json_keys
37 from wrpylib.wrmwmarkup import create_sledrun_wiki, lonlat_to_json, lonlat_ele_to_json, parse_wrmap
38 from wrpylib.wrvalidators import rodelbahnbox_from_template, tristate_german_to_str, difficulty_german_to_str, \
39 avalanches_german_to_str, public_transport_german_to_str, opt_lonlat_from_str, \
41 from wrpylib.lib_sledrun_wikitext_to_json import optional_set, get_sledrun_description, wikilink_to_json, \
42 template_to_json, external_link_to_json
44 docuReplacements = {'¶ms;': pagegenerators.parameterHelp}
47 class SledrunWikiTextToJsonBot(
52 AutomaticTWSummaryBot,
54 def setup(self) -> None:
55 schema = Page(self.site, 'Winterrodeln:Datenschema/Rodelbahn/V1.json')
56 assert schema.content_model == 'json'
57 self.sledrun_schema = json.loads(schema.text)
59 def treat_page(self) -> None:
60 """Load the given page, do some changes, and save it."""
61 wikitext_content_model = 'wikitext'
62 if self.current_page.content_model != wikitext_content_model:
63 warning(f"The content model of {self.current_page.title()} is {self.current_page.content_model} "
64 f"instead of {wikitext_content_model}.")
67 wikicode = mwparserfromhell.parse(self.current_page.text)
68 wikilink_list = wikicode.filter_wikilinks()
69 category_sledrun = 'Kategorie:Rodelbahn'
70 if sum(1 for c in wikilink_list if c.title == category_sledrun) == 0:
71 warning(f'The page {self.current_page.title()} does not have category {category_sledrun}.')
74 sledrun_json_page = Page(self.site, self.current_page.title() + '/Rodelbahn.json')
76 map_json_page = Page(self.site, self.current_page.title() + '/Landkarte.json')
78 if sledrun_json_page.exists() and map_json_page.exists(): # should be an option
82 v = wikicode.filter_tags(matches='wrmap')
84 map_json = parse_wrmap(str(v[0]))
87 "name": self.current_page.title(),
89 "entry_under_construction": sum(1 for c in wikilink_list if c.title == 'Kategorie:In Arbeit') > 0,
92 optional_set(sledrun_json, 'description', get_sledrun_description(wikicode))
94 rbb_list = wikicode.filter_templates(recursive=False, matches=lambda t: t.name.strip() == 'Rodelbahnbox')
95 if len(rbb_list) == 1:
96 rbb = rodelbahnbox_from_template(rbb_list[0])
99 image_page = Page(self.site, v, ns=BuiltinNamespace.FILE)
100 if not image_page.exists():
101 warning(f"{image_page.title()} does not exist.")
102 sledrun_json['image'] = v
104 optional_set(sledrun_json, 'length', rbb['Länge'])
106 v = rbb['Schwierigkeit']
108 sledrun_json['difficulty'] = difficulty_german_to_str(v)
112 sledrun_json['avalanches'] = avalanches_german_to_str(v)
114 v, w = rbb['Betreiber']
115 optional_set(sledrun_json, 'has_operator', v)
116 optional_set(sledrun_json, 'operator', w)
118 optional_set(sledrun_json, 'walkup_possible', rbb['Aufstieg möglich'])
120 v, w = rbb['Aufstieg getrennt']
122 sledrun_json['walkup_separate'] = tristate_german_to_str(v)
123 optional_set(sledrun_json, 'walkup_note', w)
125 optional_set(sledrun_json, 'walkup_time', rbb['Gehzeit'])
127 def _walkup_support():
128 walkup_support_rbb = rbb['Aufstiegshilfe']
129 if walkup_support_rbb is not None:
131 for walkup_support_type, note in walkup_support_rbb:
132 walkup_support = {'type': walkup_support_type}
133 optional_set(walkup_support, 'note', note)
134 walkup_supports.append(walkup_support)
135 sledrun_json['walkup_supports'] = walkup_supports
138 v, w = rbb['Beleuchtungsanlage']
140 sledrun_json['nightlight_possible'] = tristate_german_to_str(v)
141 optional_set(sledrun_json, 'nightlight_possible_note', w)
143 v, w = rbb['Beleuchtungstage']
144 optional_set(sledrun_json, 'nightlight_weekdays_count', v)
145 optional_set(sledrun_json, 'nightlight_weekdays_note', w)
148 v = rbb['Rodelverleih']
150 sledrun_json['sled_rental_direct'] = v != []
154 name_code = mwparserfromhell.parse(name)
155 wiki_link = next(name_code.ifilter_wikilinks(), None)
156 if isinstance(wiki_link, Wikilink):
157 x['wr_page'] = wikilink_to_json(wiki_link)
160 optional_set(x, 'note', note)
162 sledrun_json['sled_rental'] = w
166 v = rbb['Gütesiegel']
168 sledrun_json['cachet'] = len(v) > 0
171 optional_set(sledrun_json, 'show_in_overview', rbb['In Übersichtskarte'])
172 optional_set(sledrun_json, 'forum_id', rbb['Forumid'])
176 sledrun_json['position'] = lonlat_to_json(v)
178 v = lonlat_ele_to_json(rbb['Position oben'], rbb['Höhe oben'])
180 sledrun_json['top'] = v
182 v = lonlat_ele_to_json(rbb['Position unten'], rbb['Höhe unten'])
184 sledrun_json['bottom'] = v
186 v = rbb['Telefonauskunft']
188 sledrun_json['info_phone'] = [{'phone': p, 'name': n} for p, n in v]
190 v, w = rbb['Webauskunft']
193 sledrun_json['info_web'] = [{'url': w}]
195 sledrun_json['info_web'] = []
197 v = rbb['Öffentliche Anreise']
199 sledrun_json['public_transport'] = public_transport_german_to_str(v)
202 bb_iter = wikicode.ifilter_templates(recursive=False, matches=lambda t: t.name.strip() == 'Buttonleiste')
203 bb = next(bb_iter, None)
205 video = bb.get('video', None)
206 if isinstance(video, Parameter) and video.value.strip() != "":
207 sledrun_json['videos'] = [{'url': str(video.value.strip())}]
208 webcam = bb.get('webcam', None)
209 if isinstance(webcam, Parameter) and webcam.value.strip() != "":
210 sledrun_json['webcams'] = [{'url': str(webcam.value.strip())}]
211 correction = bb.get('Korrektur_To', None)
212 if isinstance(correction, Parameter) and correction.value.strip() != "":
213 sledrun_json['correction_email'] = correction.value.strip()
216 def _public_transport():
217 pt_sections = wikicode.get_sections(levels=[2], matches='Anreise mit öffentlichen Verkehrsmitteln',
218 include_headings=False)
219 if len(pt_sections) < 1:
222 node = next((node for node in pt.nodes if isinstance(node, Tag) and node.wiki_markup == '*'), None)
224 description = str(Wikicode(pt.nodes[:pt.nodes.index(node)])).strip()
225 if description and not description.startswith("Hier wird beschrieben werden, wie und wie gut man die "
226 "Rodelbahn mit öffentlichen Verkehrsmitteln erreicht."):
227 sledrun_json["public_transport_description"] = str(description)
229 public_transport_stops = []
230 public_transport_lines = []
231 public_transport_links = []
233 for node in pt.nodes:
234 if isinstance(node, Template):
235 if node.name == 'Haltestelle':
237 public_transport_stops.append(ya)
238 if len([1 for p in node.params if len(p.strip()) != 0]) == 0:
241 z = node.get(1, None)
243 ya['municipality'] = str(z)
244 z = node.get(2, None)
246 ya['name_local'] = str(z)
247 za = str(node.get(3, '')).strip()
248 zb = str(node.get(4, '')).strip()
249 z = lonlat_ele_to_json(opt_lonlat_from_str(za), opt_uint_from_str(zb))
252 elif node.name in ["Fahrplan Abfahrtsmonitor VVT", "Fahrplan Abfahrtsmonitor VVV",
253 "Fahrplan Abfahrtsmonitor SVV"]:
254 ya['monitor_template'] = template_to_json(node)
255 elif node.name in ["Fahrplan Hinfahrt VVT", "Fahrplan Hinfahrt VVV", "Fahrplan Hinfahrt SVV"]:
256 ya['route_arrival_template'] = template_to_json(node)
257 elif node.name in ["Fahrplan Rückfahrt VVT", "Fahrplan Rückfahrt VVV", "Fahrplan Rückfahrt SVV"]:
258 ya['route_departure_template'] = template_to_json(node)
259 elif node.name in ["Fahrplan Linie VVT", "Fahrplan Linie VVV", "Fahrplan Linie SVV"]:
261 public_transport_stops.append(ya)
264 'timetable_template': template_to_json(node),
266 public_transport_lines.append(y)
267 elif isinstance(node, ExternalLink):
268 public_transport_links.append(external_link_to_json(node))
270 public_transport_stops.append(ya)
271 if len(public_transport_stops) > 0:
272 sledrun_json['public_transport_stops'] = public_transport_stops
273 if len(public_transport_lines) > 0:
274 sledrun_json['public_transport_lines'] = public_transport_lines
275 if len(public_transport_links) > 0:
276 sledrun_json['public_transport_links'] = public_transport_links
280 car_section_list = wikicode.get_sections(levels=[2], matches='Anreise mit dem Auto')
281 if not car_section_list:
283 v = car_section_list[0]
285 description_nodes = dropwhile(lambda w: isinstance(w, Heading), v.nodes)
286 description_nodes = takewhile(lambda w: not (isinstance(w, Tag) and w.wiki_markup == '*'),
288 if description := str(Wikicode(list(description_nodes))).strip():
289 if not description.startswith("Hier wollen wir Besonderheiten beschreiben, die es zu beachten gibt, "
290 "wenn man mit dem Auto zur Rodelbahn anreist."):
291 sledrun_json["car_description"] = description
294 for w in v.ifilter_templates(matches='Parkplatz'):
295 za = str(w.get(1, '')).strip()
296 zb = str(w.get(2, '')).strip()
297 z = lonlat_ele_to_json(opt_lonlat_from_str(za), opt_uint_from_str(zb))
299 x.append({'position': z})
301 sledrun_json['car_parking'] = x
304 for w in io.StringIO(str(v)):
305 match = re.match(r"\*\* von \'\'\'(.+)\'\'\'(.*): ([\d.,]+) km", w.rstrip())
307 ya, yb, yc = match.groups()
308 yc = float(yc.replace(',', '.'))
311 'route': (ya.strip() + ' ' + yb.strip()).strip(),
314 sledrun_json['car_distances'] = x
318 for v in wikicode.get_sections(levels=[2], matches='Allgemeines'):
319 def _nightlight(value: str) -> Optional[str]:
320 line_iter = io.StringIO(value)
321 line = next(line_iter, None)
322 while line is not None and not line.startswith("* '''Beleuchtung''':"):
323 line = next(line_iter, None)
326 line = line.replace("* '''Beleuchtung''':", "").strip()
330 optional_set(sledrun_json, 'nightlight_description', _nightlight(str(v)))
332 def _gastronomy(value: str):
334 line_iter = io.StringIO(value)
335 line = next(line_iter, None)
336 while line is not None and line.rstrip() != "* '''Hütten''':":
337 line = next(line_iter, None)
340 while line is not None:
341 line = next(line_iter, None)
343 if line.startswith('** '):
345 wiki = mwparserfromhell.parse(line)
346 wiki_link = next(wiki.ifilter_wikilinks(), None)
347 if isinstance(wiki_link, Wikilink):
348 g['wr_page'] = wikilink_to_json(wiki_link)
349 ext_link = next(wiki.ifilter_external_links(), None)
350 if isinstance(ext_link, ExternalLink):
351 g['weblink'] = external_link_to_json(ext_link)
352 remaining = str(Wikicode(n for n in wiki.nodes
353 if isinstance(n, (Text, Tag)) and str(n).strip() != '*')).strip()
354 match = re.match(r'(.*)\((.+)\)', remaining)
356 name, note = match.groups()
363 elif len(remaining) > 0 and remaining != '...':
364 g['name'] = remaining
371 w = _gastronomy(str(v))
373 sledrun_json['gastronomy'] = w
375 def _sled_rental_description():
376 line_iter = io.StringIO(str(v))
377 line = next(line_iter, None)
379 while line is not None and (match := re.match(r"\* '''Rodelverleih''':(.*)", line)) is None:
380 line = next(line_iter, None)
383 result = [match.group(1)]
384 line = next(line_iter, None)
385 while line is not None and re.match(r"\* ", line) is None:
387 line = next(line_iter, None)
388 description = ''.join(result).strip()
389 if len(description) > 0:
390 sledrun_json['sled_rental_description'] = description
391 _sled_rental_description()
396 if isinstance(w, Tag) and str(w) == "'''Siehe auch'''":
401 if isinstance(w, ExternalLink):
402 x.append(external_link_to_json(w))
403 elif isinstance(w, (Text, Tag)) and str(w).strip() in ['', '*', ':']:
409 sledrun_json['see_also'] = x
411 sledrun_json['allow_reports'] = True
413 def _tiroler_naturrodelbahn_guetesiegel():
414 for gst in wikicode.filter_templates():
415 if gst.name.strip() != 'Tiroler Naturrodelbahn Gütesiegel':
419 'Anlagename': 'name',
420 'Organisation': 'organization',
421 'Erstverleihung': 'first_issued',
422 'Verlängerung': 'valid_from',
424 'Thread': 'thread_id',
426 numeric = ['first_issued', 'valid_from', 'forum_id', 'thread_id']
427 for key, value in keys.items():
429 v = gst.get(key).value.strip()
435 sledrun_json['tiroler_naturrodelbahn_gütesiegel'] = gsj
436 _tiroler_naturrodelbahn_guetesiegel()
439 sledrun_impressions_page = Page(self.site, self.current_page.title() + '/Impressionen')
440 if sledrun_impressions_page.exists():
441 impressions = sledrun_impressions_page.title()
443 text = create_sledrun_wiki(sledrun_json, map_json, impressions)
444 pywikibot.output(text)
445 pywikibot.output('\03{lightpurple}---\03{default}')
446 pywikibot.showDiff(self.current_page.text, text)
448 jsonschema.validate(instance=sledrun_json, schema=self.sledrun_schema)
449 sledrun_json_ordered = order_json_keys(sledrun_json, self.sledrun_schema)
450 assert sledrun_json_ordered == sledrun_json
451 sledrun_json_text = json.dumps(sledrun_json_ordered, ensure_ascii=False, indent=4)
452 if not sledrun_json_page.exists():
453 summary = 'Rodelbahnbeschreibung konvertiert von Wikitext nach JSON.'
454 pywikibot.output('\03{lightpurple}---\03{default}')
455 pywikibot.output(sledrun_json_text)
456 pywikibot.output('\03{lightpurple}---\03{default}')
457 self.userPut(sledrun_json_page, sledrun_json_page.text, sledrun_json_text, summary=summary, contentmodel='json')
459 if map_json is not None and not map_json_page.exists():
460 map_json_text = json.dumps(map_json, ensure_ascii=False, indent=4)
461 summary = 'Landkarte konvertiert von Wikitext nach JSON.'
462 self.userPut(map_json_page, map_json_page.text, map_json_text, summary=summary, contentmodel='json')
465 def main(*args: str) -> None:
466 local_args = pywikibot.handle_args(args)
467 gen_factory = pagegenerators.GeneratorFactory()
468 gen_factory.handle_args(local_args)
469 gen = gen_factory.getCombinedGenerator(preload=True)
471 bot = SledrunWikiTextToJsonBot(generator=gen)
474 pywikibot.bot.suggest_help(missing_generator=True)
477 if __name__ == '__main__':