3 User script for pywikibot (https://gerrit.wikimedia.org/r/pywikibot/core.git), tested with version 6.6.1.
4 Put it in directory scripts/userscripts.
6 Create a sledrun JSON page from a sledrun wikitext page (including map).
8 The following generators and filters are supported:
15 from itertools import takewhile, dropwhile
16 from typing import Optional
19 import mwparserfromhell
20 from mwparserfromhell.nodes.extras import Parameter
23 from mwparserfromhell.nodes import Tag, Text, ExternalLink, Template, Wikilink, Heading
24 from mwparserfromhell.wikicode import Wikicode
25 from pywikibot import pagegenerators, Page
26 from pywikibot.bot import (
27 AutomaticTWSummaryBot,
33 from pywikibot.logging import warning
34 from pywikibot.site._namespace import BuiltinNamespace
35 from wrpylib.json_tools import order_json_keys
37 from wrpylib.wrmwmarkup import create_sledrun_wiki, lonlat_to_json, lonlat_ele_to_json, parse_wrmap
38 from wrpylib.wrvalidators import rodelbahnbox_from_template, tristate_german_to_str, difficulty_german_to_str, \
39 avalanches_german_to_str, public_transport_german_to_str, opt_lonlat_from_str, \
41 from wrpylib.lib_sledrun_wikitext_to_json import optional_set, get_sledrun_description
43 docuReplacements = {'¶ms;': pagegenerators.parameterHelp}
46 def template_to_json(value: Template) -> dict:
48 for p in value.params:
49 parameter.append({'value': str(p)})
51 'name': str(value.name),
52 'parameter': parameter
56 def wikilink_to_json(value: Wikilink) -> dict:
57 wl = {'title': str(value.title)}
58 if value.text is not None:
59 wl['text'] = str(value.text)
63 def external_link_to_json(value: ExternalLink) -> dict:
64 link = {'url': str(value.url)}
65 if value.title is not None:
66 link['text'] = str(value.title)
70 class SledrunWikiTextToJsonBot(
75 AutomaticTWSummaryBot,
77 def setup(self) -> None:
78 schema = Page(self.site, 'Winterrodeln:Datenschema/Rodelbahn/V1.json')
79 assert schema.content_model == 'json'
80 self.sledrun_schema = json.loads(schema.text)
82 def treat_page(self) -> None:
83 """Load the given page, do some changes, and save it."""
84 wikitext_content_model = 'wikitext'
85 if self.current_page.content_model != wikitext_content_model:
86 warning(f"The content model of {self.current_page.title()} is {self.current_page.content_model} "
87 f"instead of {wikitext_content_model}.")
90 wikicode = mwparserfromhell.parse(self.current_page.text)
91 wikilink_list = wikicode.filter_wikilinks()
92 category_sledrun = 'Kategorie:Rodelbahn'
93 if sum(1 for c in wikilink_list if c.title == category_sledrun) == 0:
94 warning(f'The page {self.current_page.title()} does not have category {category_sledrun}.')
97 sledrun_json_page = Page(self.site, self.current_page.title() + '/Rodelbahn.json')
99 if sledrun_json_page.exists(): # should be an option
102 map_json_page = Page(self.site, self.current_page.title() + '/Landkarte.json')
105 v = wikicode.filter_tags(matches='wrmap')
107 map_json = parse_wrmap(str(v[0]))
110 "name": self.current_page.title(),
112 "entry_under_construction": sum(1 for c in wikilink_list if c.title == 'Kategorie:In Arbeit') > 0,
115 optional_set(sledrun_json, 'description', get_sledrun_description(wikicode))
117 rbb_list = wikicode.filter_templates(recursive=False, matches=lambda t: t.name.strip() == 'Rodelbahnbox')
118 if len(rbb_list) == 1:
119 rbb = rodelbahnbox_from_template(rbb_list[0])
122 image_page = Page(self.site, v, ns=BuiltinNamespace.FILE)
123 if not image_page.exists():
124 warning(f"{image_page.title()} does not exist.")
125 sledrun_json['image'] = v
127 optional_set(sledrun_json, 'length', rbb['Länge'])
129 v = rbb['Schwierigkeit']
131 sledrun_json['difficulty'] = difficulty_german_to_str(v)
135 sledrun_json['avalanches'] = avalanches_german_to_str(v)
137 v, w = rbb['Betreiber']
138 optional_set(sledrun_json, 'has_operator', v)
139 optional_set(sledrun_json, 'operator', w)
141 optional_set(sledrun_json, 'walkup_possible', rbb['Aufstieg möglich'])
143 v, w = rbb['Aufstieg getrennt']
145 sledrun_json['walkup_separate'] = tristate_german_to_str(v)
146 optional_set(sledrun_json, 'walkup_note', w)
148 optional_set(sledrun_json, 'walkup_time', rbb['Gehzeit'])
150 def _walkup_support():
151 walkup_support_rbb = rbb['Aufstiegshilfe']
152 if walkup_support_rbb is not None:
154 for walkup_support_type, note in walkup_support_rbb:
155 walkup_support = {'type': walkup_support_type}
156 optional_set(walkup_support, 'note', note)
157 walkup_supports.append(walkup_support)
158 sledrun_json['walkup_supports'] = walkup_supports
161 v, w = rbb['Beleuchtungsanlage']
163 sledrun_json['nightlight_possible'] = tristate_german_to_str(v)
164 optional_set(sledrun_json, 'nightlight_possible_note', w)
166 v, w = rbb['Beleuchtungstage']
167 optional_set(sledrun_json, 'nightlight_weekdays_count', v)
168 optional_set(sledrun_json, 'nightlight_weekdays_note', w)
171 v = rbb['Rodelverleih']
173 sledrun_json['sled_rental_direct'] = v != []
177 name_code = mwparserfromhell.parse(name)
178 wiki_link = next(name_code.ifilter_wikilinks(), None)
179 if isinstance(wiki_link, Wikilink):
180 x['wr_page'] = wikilink_to_json(wiki_link)
183 optional_set(x, 'note', note)
185 sledrun_json['sled_rental'] = w
189 v = rbb['Gütesiegel']
191 sledrun_json['cachet'] = len(v) > 0
194 optional_set(sledrun_json, 'show_in_overview', rbb['In Übersichtskarte'])
195 optional_set(sledrun_json, 'forum_id', rbb['Forumid'])
199 sledrun_json['position'] = lonlat_to_json(v)
201 v = lonlat_ele_to_json(rbb['Position oben'], rbb['Höhe oben'])
203 sledrun_json['top'] = v
205 v = lonlat_ele_to_json(rbb['Position unten'], rbb['Höhe unten'])
207 sledrun_json['bottom'] = v
209 v = rbb['Telefonauskunft']
211 sledrun_json['info_phone'] = [{'phone': p, 'name': n} for p, n in v]
213 v, w = rbb['Webauskunft']
216 sledrun_json['info_web'] = [{'url': w}]
218 sledrun_json['info_web'] = []
220 v = rbb['Öffentliche Anreise']
222 sledrun_json['public_transport'] = public_transport_german_to_str(v)
225 bb_iter = wikicode.ifilter_templates(recursive=False, matches=lambda t: t.name.strip() == 'Buttonleiste')
226 bb = next(bb_iter, None)
228 video = bb.get('video', None)
229 if isinstance(video, Parameter) and video.value != "":
230 sledrun_json['videos'] = [{'url': str(video.value)}]
233 def _public_transport():
234 pt_sections = wikicode.get_sections(levels=[2], matches='Anreise mit öffentlichen Verkehrsmitteln',
235 include_headings=False)
236 if len(pt_sections) < 1:
239 node = next((node for node in pt.nodes if isinstance(node, Tag) and node.wiki_markup == '*'), None)
241 description = str(Wikicode(pt.nodes[:pt.nodes.index(node)])).strip()
242 if description and not description.startswith("Hier wird beschrieben werden, wie und wie gut man die "
243 "Rodelbahn mit öffentlichen Verkehrsmitteln erreicht."):
244 sledrun_json["public_transport_description"] = str(description)
246 public_transport_stops = []
247 public_transport_lines = []
248 public_transport_links = []
250 for node in pt.nodes:
251 if isinstance(node, Template):
252 if node.name == 'Haltestelle':
254 public_transport_stops.append(ya)
255 if len([1 for p in node.params if len(p.strip()) != 0]) == 0:
258 z = node.get(1, None)
260 ya['municipality'] = str(z)
261 z = node.get(2, None)
263 ya['name_local'] = str(z)
264 za = str(node.get(3, '')).strip()
265 zb = str(node.get(4, '')).strip()
266 z = lonlat_ele_to_json(opt_lonlat_from_str(za), opt_uint_from_str(zb))
269 elif node.name in ["Fahrplan Abfahrtsmonitor VVT"]:
270 ya['monitor_template'] = template_to_json(node)
271 elif node.name in ["Fahrplan Hinfahrt VVT"]:
272 ya['route_arrival_template'] = template_to_json(node)
273 elif node.name in ["Fahrplan Rückfahrt VVT"]:
274 ya['route_departure_template'] = template_to_json(node)
275 elif node.name in ["Fahrplan Linie VVT"]:
277 public_transport_stops.append(ya)
280 'timetable_template': template_to_json(node),
282 public_transport_lines.append(y)
283 elif isinstance(node, ExternalLink):
284 public_transport_links.append(external_link_to_json(node))
286 public_transport_stops.append(ya)
287 if len(public_transport_stops) > 0:
288 sledrun_json['public_transport_stops'] = public_transport_stops
289 if len(public_transport_lines) > 0:
290 sledrun_json['public_transport_lines'] = public_transport_lines
291 if len(public_transport_links) > 0:
292 sledrun_json['public_transport_links'] = public_transport_links
296 car_section_list = wikicode.get_sections(levels=[2], matches='Anreise mit dem Auto')
297 if not car_section_list:
299 v = car_section_list[0]
301 description_nodes = dropwhile(lambda w: isinstance(w, Heading), v.nodes)
302 description_nodes = takewhile(lambda w: not (isinstance(w, Tag) and w.wiki_markup == '*'),
304 if description := str(Wikicode(list(description_nodes))).strip():
305 if not description.startswith("Hier wollen wir Besonderheiten beschreiben, die es zu beachten gibt, "
306 "wenn man mit dem Auto zur Rodelbahn anreist."):
307 sledrun_json["car_description"] = description
310 for w in v.ifilter_templates(matches='Parkplatz'):
311 za = str(w.get(1, '')).strip()
312 zb = str(w.get(2, '')).strip()
313 z = lonlat_ele_to_json(opt_lonlat_from_str(za), opt_uint_from_str(zb))
315 x.append({'position': z})
317 sledrun_json['car_parking'] = x
320 for w in io.StringIO(str(v)):
321 match = re.match(r"\*\* von \'\'\'(.+)\'\'\'(.*): ([\d.,]+) km", w.rstrip())
323 ya, yb, yc = match.groups()
324 yc = float(yc.replace(',', '.'))
327 'route': (ya.strip() + ' ' + yb.strip()).strip(),
330 sledrun_json['car_distances'] = x
334 for v in wikicode.get_sections(levels=[2], matches='Allgemeines'):
335 def _nightlight(value: str) -> Optional[str]:
336 line_iter = io.StringIO(value)
337 line = next(line_iter, None)
338 while line is not None and not line.startswith("* '''Beleuchtung''':"):
339 line = next(line_iter, None)
342 line = line.replace("* '''Beleuchtung''':", "").strip()
346 optional_set(sledrun_json, 'nightlight_description', _nightlight(str(v)))
348 def _gastronomy(value: str):
350 line_iter = io.StringIO(value)
351 line = next(line_iter, None)
352 while line is not None and line.rstrip() != "* '''Hütten''':":
353 line = next(line_iter, None)
356 while line is not None:
357 line = next(line_iter, None)
359 if line.startswith('** '):
361 wiki = mwparserfromhell.parse(line)
362 wiki_link = next(wiki.ifilter_wikilinks(), None)
363 if isinstance(wiki_link, Wikilink):
364 g['wr_page'] = wikilink_to_json(wiki_link)
365 ext_link = next(wiki.ifilter_external_links(), None)
366 if isinstance(ext_link, ExternalLink):
367 g['weblink'] = external_link_to_json(ext_link)
368 remaining = str(Wikicode(n for n in wiki.nodes
369 if isinstance(n, (Text, Tag)) and str(n).strip() != '*')).strip()
370 match = re.match(r'(.*)\((.+)\)', remaining)
372 name, note = match.groups()
379 elif len(remaining) > 0 and remaining != '...':
380 g['name'] = remaining
381 if len(gastronomy) != 0:
387 w = _gastronomy(str(v))
389 sledrun_json['gastronomy'] = w
391 def _sled_rental_description():
392 line_iter = io.StringIO(str(v))
393 line = next(line_iter, None)
395 while line is not None and (match := re.match(r"\* '''Rodelverleih''':(.*)", line)) is None:
396 line = next(line_iter, None)
399 result = [match.group(1)]
400 line = next(line_iter, None)
401 while line is not None and re.match(r"\* ", line) is None:
403 line = next(line_iter, None)
404 description = ''.join(result).strip()
405 if len(description) > 0:
406 sledrun_json['sled_rental_description'] = description
407 _sled_rental_description()
412 if isinstance(w, Tag) and str(w) == "'''Siehe auch'''":
417 if isinstance(w, ExternalLink):
418 x.append(external_link_to_json(w))
419 elif isinstance(w, (Text, Tag)) and str(w).strip() in ['', '*', ':']:
425 sledrun_json['see_also'] = x
427 sledrun_json['allow_reports'] = True
430 sledrun_impressions_page = Page(self.site, self.current_page.title() + '/Impressionen')
431 if sledrun_impressions_page.exists():
432 impressions = sledrun_impressions_page.title()
434 text = create_sledrun_wiki(sledrun_json, map_json, impressions)
435 pywikibot.output(text)
436 pywikibot.output('\03{lightpurple}---\03{default}')
437 pywikibot.showDiff(self.current_page.text, text)
439 jsonschema.validate(instance=sledrun_json, schema=self.sledrun_schema)
440 sledrun_json_ordered = order_json_keys(sledrun_json, self.sledrun_schema)
441 assert sledrun_json_ordered == sledrun_json
442 sledrun_json_text = json.dumps(sledrun_json_ordered, ensure_ascii=False, indent=4)
443 summary = 'Rodelbahnbeschreibung konvertiert von Wikitext nach JSON.'
444 pywikibot.output('\03{lightpurple}---\03{default}')
445 pywikibot.output(sledrun_json_text)
446 pywikibot.output('\03{lightpurple}---\03{default}')
447 self.userPut(sledrun_json_page, sledrun_json_page.text, sledrun_json_text, summary=summary, contentmodel='json')
449 map_json_text = json.dumps(map_json, ensure_ascii=False, indent=4)
450 summary = 'Landkarte konvertiert von Wikitext nach JSON.'
451 self.userPut(map_json_page, map_json_page.text, map_json_text, summary=summary, contentmodel='json')
454 def main(*args: str) -> None:
455 local_args = pywikibot.handle_args(args)
456 gen_factory = pagegenerators.GeneratorFactory()
457 gen_factory.handle_args(local_args)
458 gen = gen_factory.getCombinedGenerator(preload=True)
460 bot = SledrunWikiTextToJsonBot(generator=gen)
463 pywikibot.bot.suggest_help(missing_generator=True)
466 if __name__ == '__main__':