3 User script for pywikibot (https://gerrit.wikimedia.org/r/pywikibot/core.git), tested with version 6.6.1.
4 Put it in directory scripts/userscripts.
6 Create a sledrun JSON page from a sledrun wikitext page (including map).
8 The following generators and filters are supported:
15 from itertools import takewhile, dropwhile
16 from typing import Optional
19 import mwparserfromhell
20 from mwparserfromhell.nodes.extras import Parameter
23 from mwparserfromhell.nodes import Tag, Text, ExternalLink, Template, Wikilink, Heading
24 from mwparserfromhell.wikicode import Wikicode
25 from pywikibot import pagegenerators, Page
26 from pywikibot.bot import (
27 AutomaticTWSummaryBot,
33 from pywikibot.logging import warning
34 from pywikibot.site._namespace import BuiltinNamespace
35 from wrpylib.json_tools import order_json_keys
37 from wrpylib.wrmwmarkup import create_sledrun_wiki, lonlat_to_json, lonlat_ele_to_json, parse_wrmap
38 from wrpylib.wrvalidators import rodelbahnbox_from_template, tristate_german_to_str, difficulty_german_to_str, \
39 avalanches_german_to_str, public_transport_german_to_str, opt_lonlat_from_str, \
41 from wrpylib.lib_sledrun_wikitext_to_json import optional_set, get_sledrun_description, wikilink_to_json, \
42 template_to_json, external_link_to_json
44 docuReplacements = {'¶ms;': pagegenerators.parameterHelp}
47 class SledrunWikiTextToJsonBot(
52 AutomaticTWSummaryBot,
54 def setup(self) -> None:
55 schema = Page(self.site, 'Winterrodeln:Datenschema/Rodelbahn/V1.json')
56 assert schema.content_model == 'json'
57 self.sledrun_schema = json.loads(schema.text)
59 def treat_page(self) -> None:
60 """Load the given page, do some changes, and save it."""
61 wikitext_content_model = 'wikitext'
62 if self.current_page.content_model != wikitext_content_model:
63 warning(f"The content model of {self.current_page.title()} is {self.current_page.content_model} "
64 f"instead of {wikitext_content_model}.")
67 wikicode = mwparserfromhell.parse(self.current_page.text)
68 wikilink_list = wikicode.filter_wikilinks()
69 category_sledrun = 'Kategorie:Rodelbahn'
70 if sum(1 for c in wikilink_list if c.title == category_sledrun) == 0:
71 warning(f'The page {self.current_page.title()} does not have category {category_sledrun}.')
74 sledrun_json_page = Page(self.site, self.current_page.title() + '/Rodelbahn.json')
76 map_json_page = Page(self.site, self.current_page.title() + '/Landkarte.json')
78 if sledrun_json_page.exists() and map_json_page.exists(): # should be an option
82 v = wikicode.filter_tags(matches='wrmap')
84 map_json = parse_wrmap(str(v[0]))
87 "name": self.current_page.title(),
89 "entry_under_construction": sum(1 for c in wikilink_list if c.title == 'Kategorie:In Arbeit') > 0,
92 optional_set(sledrun_json, 'description', get_sledrun_description(wikicode))
94 rbb_list = wikicode.filter_templates(recursive=False, matches=lambda t: t.name.strip() == 'Rodelbahnbox')
95 if len(rbb_list) == 1:
96 rbb = rodelbahnbox_from_template(rbb_list[0])
99 image_page = Page(self.site, v, ns=BuiltinNamespace.FILE)
100 if not image_page.exists():
101 warning(f"{image_page.title()} does not exist.")
102 sledrun_json['image'] = v
104 optional_set(sledrun_json, 'length', rbb['Länge'])
106 v = rbb['Schwierigkeit']
108 sledrun_json['difficulty'] = difficulty_german_to_str(v)
112 sledrun_json['avalanches'] = avalanches_german_to_str(v)
114 v, w = rbb['Betreiber']
115 optional_set(sledrun_json, 'has_operator', v)
116 optional_set(sledrun_json, 'operator', w)
118 optional_set(sledrun_json, 'walkup_possible', rbb['Aufstieg möglich'])
120 v, w = rbb['Aufstieg getrennt']
122 sledrun_json['walkup_separate'] = tristate_german_to_str(v)
123 optional_set(sledrun_json, 'walkup_note', w)
125 optional_set(sledrun_json, 'walkup_time', rbb['Gehzeit'])
127 def _walkup_support():
128 walkup_support_rbb = rbb['Aufstiegshilfe']
129 if walkup_support_rbb is not None:
131 for walkup_support_type, note in walkup_support_rbb:
132 walkup_support = {'type': walkup_support_type}
133 optional_set(walkup_support, 'note', note)
134 walkup_supports.append(walkup_support)
135 sledrun_json['walkup_supports'] = walkup_supports
138 v, w = rbb['Beleuchtungsanlage']
140 sledrun_json['nightlight_possible'] = tristate_german_to_str(v)
141 optional_set(sledrun_json, 'nightlight_possible_note', w)
143 v, w = rbb['Beleuchtungstage']
144 optional_set(sledrun_json, 'nightlight_weekdays_count', v)
145 optional_set(sledrun_json, 'nightlight_weekdays_note', w)
148 v = rbb['Rodelverleih']
150 sledrun_json['sled_rental_direct'] = v != []
154 name_code = mwparserfromhell.parse(name)
155 wiki_link = next(name_code.ifilter_wikilinks(), None)
156 if isinstance(wiki_link, Wikilink):
157 x['wr_page'] = wikilink_to_json(wiki_link)
160 optional_set(x, 'note', note)
162 sledrun_json['sled_rental'] = w
166 v = rbb['Gütesiegel']
168 sledrun_json['cachet'] = len(v) > 0
171 optional_set(sledrun_json, 'show_in_overview', rbb['In Übersichtskarte'])
172 optional_set(sledrun_json, 'forum_id', rbb['Forumid'])
176 sledrun_json['position'] = lonlat_to_json(v)
178 v = lonlat_ele_to_json(rbb['Position oben'], rbb['Höhe oben'])
180 sledrun_json['top'] = v
182 v = lonlat_ele_to_json(rbb['Position unten'], rbb['Höhe unten'])
184 sledrun_json['bottom'] = v
186 v = rbb['Telefonauskunft']
188 sledrun_json['info_phone'] = [{'phone': p, 'name': n} for p, n in v]
190 v, w = rbb['Webauskunft']
193 sledrun_json['info_web'] = [{'url': w}]
195 sledrun_json['info_web'] = []
197 v = rbb['Öffentliche Anreise']
199 sledrun_json['public_transport'] = public_transport_german_to_str(v)
202 bb_iter = wikicode.ifilter_templates(recursive=False, matches=lambda t: t.name.strip() == 'Buttonleiste')
203 bb = next(bb_iter, None)
205 video = bb.get('video', None)
206 if isinstance(video, Parameter) and video.value.strip() != "":
207 sledrun_json['videos'] = [{'url': str(video.value.strip())}]
208 webcam = bb.get('webcam', None)
209 if isinstance(webcam, Parameter) and webcam.value.strip() != "":
210 sledrun_json['webcams'] = [{'url': str(webcam.value.strip())}]
211 correction = bb.get('Korrektur_To', None)
212 if isinstance(correction, Parameter) and correction.value.strip() != "":
213 sledrun_json['correction_email'] = correction.value.strip()
216 def _public_transport():
217 pt_sections = wikicode.get_sections(levels=[2], matches='Anreise mit öffentlichen Verkehrsmitteln',
218 include_headings=False)
219 if len(pt_sections) < 1:
222 node = next((node for node in pt.nodes if isinstance(node, Tag) and node.wiki_markup == '*'), None)
224 description = str(Wikicode(pt.nodes[:pt.nodes.index(node)])).strip()
225 if description and not description.startswith("Hier wird beschrieben werden, wie und wie gut man die "
226 "Rodelbahn mit öffentlichen Verkehrsmitteln erreicht."):
227 sledrun_json["public_transport_description"] = str(description)
229 public_transport_stops = []
230 public_transport_lines = []
231 public_transport_links = []
233 for node in pt.nodes:
234 if isinstance(node, Template):
235 if node.name == 'Haltestelle':
237 public_transport_stops.append(ya)
238 if len([1 for p in node.params if len(p.strip()) != 0]) == 0:
241 z = node.get(1, None)
243 ya['municipality'] = str(z)
244 z = node.get(2, None)
246 ya['name_local'] = str(z)
247 za = str(node.get(3, '')).strip()
248 zb = str(node.get(4, '')).strip()
249 z = lonlat_ele_to_json(opt_lonlat_from_str(za), opt_uint_from_str(zb))
252 elif node.name in ["Fahrplan Abfahrtsmonitor VVT", "Fahrplan Abfahrtsmonitor VVV"]:
253 ya['monitor_template'] = template_to_json(node)
254 elif node.name in ["Fahrplan Hinfahrt VVT", "Fahrplan Hinfahrt VVV"]:
255 ya['route_arrival_template'] = template_to_json(node)
256 elif node.name in ["Fahrplan Rückfahrt VVT", "Fahrplan Rückfahrt VVV"]:
257 ya['route_departure_template'] = template_to_json(node)
258 elif node.name in ["Fahrplan Linie VVT", "Fahrplan Linie VVV"]:
260 public_transport_stops.append(ya)
263 'timetable_template': template_to_json(node),
265 public_transport_lines.append(y)
266 elif isinstance(node, ExternalLink):
267 public_transport_links.append(external_link_to_json(node))
269 public_transport_stops.append(ya)
270 if len(public_transport_stops) > 0:
271 sledrun_json['public_transport_stops'] = public_transport_stops
272 if len(public_transport_lines) > 0:
273 sledrun_json['public_transport_lines'] = public_transport_lines
274 if len(public_transport_links) > 0:
275 sledrun_json['public_transport_links'] = public_transport_links
279 car_section_list = wikicode.get_sections(levels=[2], matches='Anreise mit dem Auto')
280 if not car_section_list:
282 v = car_section_list[0]
284 description_nodes = dropwhile(lambda w: isinstance(w, Heading), v.nodes)
285 description_nodes = takewhile(lambda w: not (isinstance(w, Tag) and w.wiki_markup == '*'),
287 if description := str(Wikicode(list(description_nodes))).strip():
288 if not description.startswith("Hier wollen wir Besonderheiten beschreiben, die es zu beachten gibt, "
289 "wenn man mit dem Auto zur Rodelbahn anreist."):
290 sledrun_json["car_description"] = description
293 for w in v.ifilter_templates(matches='Parkplatz'):
294 za = str(w.get(1, '')).strip()
295 zb = str(w.get(2, '')).strip()
296 z = lonlat_ele_to_json(opt_lonlat_from_str(za), opt_uint_from_str(zb))
298 x.append({'position': z})
300 sledrun_json['car_parking'] = x
303 for w in io.StringIO(str(v)):
304 match = re.match(r"\*\* von \'\'\'(.+)\'\'\'(.*): ([\d.,]+) km", w.rstrip())
306 ya, yb, yc = match.groups()
307 yc = float(yc.replace(',', '.'))
310 'route': (ya.strip() + ' ' + yb.strip()).strip(),
313 sledrun_json['car_distances'] = x
317 for v in wikicode.get_sections(levels=[2], matches='Allgemeines'):
318 def _nightlight(value: str) -> Optional[str]:
319 line_iter = io.StringIO(value)
320 line = next(line_iter, None)
321 while line is not None and not line.startswith("* '''Beleuchtung''':"):
322 line = next(line_iter, None)
325 line = line.replace("* '''Beleuchtung''':", "").strip()
329 optional_set(sledrun_json, 'nightlight_description', _nightlight(str(v)))
331 def _gastronomy(value: str):
333 line_iter = io.StringIO(value)
334 line = next(line_iter, None)
335 while line is not None and line.rstrip() != "* '''Hütten''':":
336 line = next(line_iter, None)
339 while line is not None:
340 line = next(line_iter, None)
342 if line.startswith('** '):
344 wiki = mwparserfromhell.parse(line)
345 wiki_link = next(wiki.ifilter_wikilinks(), None)
346 if isinstance(wiki_link, Wikilink):
347 g['wr_page'] = wikilink_to_json(wiki_link)
348 ext_link = next(wiki.ifilter_external_links(), None)
349 if isinstance(ext_link, ExternalLink):
350 g['weblink'] = external_link_to_json(ext_link)
351 remaining = str(Wikicode(n for n in wiki.nodes
352 if isinstance(n, (Text, Tag)) and str(n).strip() != '*')).strip()
353 match = re.match(r'(.*)\((.+)\)', remaining)
355 name, note = match.groups()
362 elif len(remaining) > 0 and remaining != '...':
363 g['name'] = remaining
370 w = _gastronomy(str(v))
372 sledrun_json['gastronomy'] = w
374 def _sled_rental_description():
375 line_iter = io.StringIO(str(v))
376 line = next(line_iter, None)
378 while line is not None and (match := re.match(r"\* '''Rodelverleih''':(.*)", line)) is None:
379 line = next(line_iter, None)
382 result = [match.group(1)]
383 line = next(line_iter, None)
384 while line is not None and re.match(r"\* ", line) is None:
386 line = next(line_iter, None)
387 description = ''.join(result).strip()
388 if len(description) > 0:
389 sledrun_json['sled_rental_description'] = description
390 _sled_rental_description()
395 if isinstance(w, Tag) and str(w) == "'''Siehe auch'''":
400 if isinstance(w, ExternalLink):
401 x.append(external_link_to_json(w))
402 elif isinstance(w, (Text, Tag)) and str(w).strip() in ['', '*', ':']:
408 sledrun_json['see_also'] = x
410 sledrun_json['allow_reports'] = True
412 def _tiroler_naturrodelbahn_guetesiegel():
413 for gst in wikicode.filter_templates():
414 if gst.name.strip() != 'Tiroler Naturrodelbahn Gütesiegel':
418 'Anlagename': 'name',
419 'Organisation': 'organization',
420 'Erstverleihung': 'first_issued',
421 'Verlängerung': 'valid_from',
423 'Thread': 'thread_id',
425 numeric = ['first_issued', 'valid_from', 'forum_id', 'thread_id']
426 for key, value in keys.items():
428 v = gst.get(key).value.strip()
434 sledrun_json['tiroler_naturrodelbahn_gütesiegel'] = gsj
435 _tiroler_naturrodelbahn_guetesiegel()
438 sledrun_impressions_page = Page(self.site, self.current_page.title() + '/Impressionen')
439 if sledrun_impressions_page.exists():
440 impressions = sledrun_impressions_page.title()
442 text = create_sledrun_wiki(sledrun_json, map_json, impressions)
443 pywikibot.output(text)
444 pywikibot.output('\03{lightpurple}---\03{default}')
445 pywikibot.showDiff(self.current_page.text, text)
447 jsonschema.validate(instance=sledrun_json, schema=self.sledrun_schema)
448 sledrun_json_ordered = order_json_keys(sledrun_json, self.sledrun_schema)
449 assert sledrun_json_ordered == sledrun_json
450 sledrun_json_text = json.dumps(sledrun_json_ordered, ensure_ascii=False, indent=4)
451 if not sledrun_json_page.exists():
452 summary = 'Rodelbahnbeschreibung konvertiert von Wikitext nach JSON.'
453 pywikibot.output('\03{lightpurple}---\03{default}')
454 pywikibot.output(sledrun_json_text)
455 pywikibot.output('\03{lightpurple}---\03{default}')
456 self.userPut(sledrun_json_page, sledrun_json_page.text, sledrun_json_text, summary=summary, contentmodel='json')
458 if map_json is not None and not map_json_page.exists():
459 map_json_text = json.dumps(map_json, ensure_ascii=False, indent=4)
460 summary = 'Landkarte konvertiert von Wikitext nach JSON.'
461 self.userPut(map_json_page, map_json_page.text, map_json_text, summary=summary, contentmodel='json')
464 def main(*args: str) -> None:
465 local_args = pywikibot.handle_args(args)
466 gen_factory = pagegenerators.GeneratorFactory()
467 gen_factory.handle_args(local_args)
468 gen = gen_factory.getCombinedGenerator(preload=True)
470 bot = SledrunWikiTextToJsonBot(generator=gen)
473 pywikibot.bot.suggest_help(missing_generator=True)
476 if __name__ == '__main__':