3 User script for pywikibot (https://gerrit.wikimedia.org/r/pywikibot/core.git), tested with version 6.6.1.
4 Put it in directory scripts/userscripts.
6 Create a sledrun JSON page from a sledrun wikitext page (including map).
8 The following generators and filters are supported:
15 from itertools import takewhile, dropwhile
16 from typing import Optional
19 import mwparserfromhell
20 from mwparserfromhell.nodes.extras import Parameter
23 from mwparserfromhell.nodes import Tag, Text, ExternalLink, Template, Wikilink, Heading
24 from mwparserfromhell.wikicode import Wikicode
25 from pywikibot import pagegenerators, Page
26 from pywikibot.bot import (
27 AutomaticTWSummaryBot,
33 from pywikibot.logging import warning
34 from pywikibot.site._namespace import BuiltinNamespace
35 from wrpylib.json_tools import order_json_keys
37 from wrpylib.wrmwmarkup import create_sledrun_wiki, lonlat_to_json, lonlat_ele_to_json, parse_wrmap
38 from wrpylib.wrvalidators import rodelbahnbox_from_template, tristate_german_to_str, difficulty_german_to_str, \
39 avalanches_german_to_str, public_transport_german_to_str, opt_lonlat_from_str, \
41 from wrpylib.lib_sledrun_wikitext_to_json import optional_set, get_sledrun_description
43 docuReplacements = {'¶ms;': pagegenerators.parameterHelp}
46 def template_to_json(value: Template) -> dict:
48 for p in value.params:
49 parameter.append({'value': str(p)})
51 'name': str(value.name),
52 'parameter': parameter
56 def wikilink_to_json(value: Wikilink) -> dict:
57 wl = {'title': str(value.title)}
58 if value.text is not None:
59 wl['text'] = str(value.text)
63 def external_link_to_json(value: ExternalLink) -> dict:
64 link = {'url': str(value.url)}
65 if value.title is not None:
66 link['text'] = str(value.title)
70 class SledrunWikiTextToJsonBot(
75 AutomaticTWSummaryBot,
77 def setup(self) -> None:
78 schema = Page(self.site, 'Winterrodeln:Datenschema/Rodelbahn/V1.json')
79 assert schema.content_model == 'json'
80 self.sledrun_schema = json.loads(schema.text)
82 def treat_page(self) -> None:
83 """Load the given page, do some changes, and save it."""
84 wikitext_content_model = 'wikitext'
85 if self.current_page.content_model != wikitext_content_model:
86 warning(f"The content model of {self.current_page.title()} is {self.current_page.content_model} "
87 f"instead of {wikitext_content_model}.")
90 wikicode = mwparserfromhell.parse(self.current_page.text)
91 wikilink_list = wikicode.filter_wikilinks()
92 category_sledrun = 'Kategorie:Rodelbahn'
93 if sum(1 for c in wikilink_list if c.title == category_sledrun) == 0:
94 warning(f'The page {self.current_page.title()} does not have category {category_sledrun}.')
97 sledrun_json_page = Page(self.site, self.current_page.title() + '/Rodelbahn.json')
99 map_json_page = Page(self.site, self.current_page.title() + '/Landkarte.json')
102 v = wikicode.filter_tags(matches='wrmap')
104 map_json = parse_wrmap(str(v[0]))
107 "name": self.current_page.title(),
109 "entry_under_construction": sum(1 for c in wikilink_list if c.text == 'Kategorie:In Arbeit') > 0,
112 optional_set(sledrun_json, 'description', get_sledrun_description(wikicode))
114 rbb_list = wikicode.filter_templates(recursive=False, matches=lambda t: t.name.strip() == 'Rodelbahnbox')
115 if len(rbb_list) == 1:
116 rbb = rodelbahnbox_from_template(rbb_list[0])
119 image_page = Page(self.site, v, ns=BuiltinNamespace.FILE)
120 if not image_page.exists():
121 warning(f"{image_page.title()} does not exist.")
122 sledrun_json['image'] = v
126 sledrun_json['length'] = v
128 v = rbb['Schwierigkeit']
130 sledrun_json['difficulty'] = difficulty_german_to_str(v)
134 sledrun_json['avalanches'] = avalanches_german_to_str(v)
136 v, w = rbb['Betreiber']
138 sledrun_json['has_operator'] = v
140 sledrun_json['operator'] = w
142 v = rbb['Aufstieg möglich']
144 sledrun_json['walkup_possible'] = v
146 v, w = rbb['Aufstieg getrennt']
148 sledrun_json['walkup_separate'] = tristate_german_to_str(v)
150 sledrun_json['walkup_comment'] = w # TODO
154 sledrun_json['walkup_time'] = v
156 def _walkup_support():
157 walkup_support_rbb = rbb['Aufstiegshilfe']
158 if walkup_support_rbb is not None:
160 for walkup_support_type, comment in walkup_support_rbb:
161 walkup_support = {'type': walkup_support_type}
162 if comment is not None:
163 walkup_support['comment'] = comment
164 walkup_supports.append(walkup_support)
165 sledrun_json['walkup_supports'] = walkup_supports
168 v, w = rbb['Beleuchtungsanlage']
170 sledrun_json['nightlight_possible'] = tristate_german_to_str(v)
172 sledrun_json['nightlight_possible_comment'] = w
174 v, w = rbb['Beleuchtungstage']
176 sledrun_json['nightlight_weekdays_count'] = v
178 sledrun_json['nightlight_weekdays_comment'] = w
181 v = rbb['Rodelverleih']
183 sledrun_json['sled_rental_direct'] = v != []
185 for name, comment in v:
187 name_code = mwparserfromhell.parse(name)
188 wiki_link = next(name_code.ifilter_wikilinks(), None)
189 if isinstance(wiki_link, Wikilink):
190 x['wr_page'] = wikilink_to_json(wiki_link)
193 if comment is not None:
194 x['comment'] = comment
196 sledrun_json['sled_rental'] = w
200 v = rbb['Gütesiegel']
202 sledrun_json['cachet'] = len(v) > 0
205 v = rbb['In Übersichtskarte']
207 sledrun_json['show_in_overview'] = v
211 sledrun_json['forum_id'] = v
215 sledrun_json['position'] = lonlat_to_json(v)
217 v = lonlat_ele_to_json(rbb['Position oben'], rbb['Höhe oben'])
219 sledrun_json['top'] = v
221 v = lonlat_ele_to_json(rbb['Position unten'], rbb['Höhe unten'])
223 sledrun_json['bottom'] = v
225 v = rbb['Telefonauskunft']
227 sledrun_json['info_phone'] = [{'phone': p, 'name': n} for p, n in v]
229 v, w = rbb['Webauskunft']
232 sledrun_json['info_web'] = [{'url': w}]
234 sledrun_json['info_web'] = []
236 v = rbb['Öffentliche Anreise']
238 sledrun_json['public_transport'] = public_transport_german_to_str(v)
241 bb_iter = wikicode.ifilter_templates(recursive=False, matches=lambda t: t.name.strip() == 'Buttonleiste')
242 bb = next(bb_iter, None)
244 video = bb.get('video', None)
245 if isinstance(video, Parameter):
246 sledrun_json['videos'] = [{'url': video.value}]
249 def _public_transport():
250 pt_sections = wikicode.get_sections(levels=[2], matches='Anreise mit öffentlichen Verkehrsmitteln',
251 include_headings=False)
252 if len(pt_sections) < 1:
255 node = next((node for node in pt.nodes if isinstance(node, Tag) and node.wiki_markup == '*'), None)
257 description = str(Wikicode(pt.nodes[:pt.nodes.index(node)])).strip()
259 sledrun_json["public_transport_description"] = str(description)
261 public_transport_stops = []
262 public_transport_lines = []
263 public_transport_links = []
265 for node in pt.nodes:
266 if isinstance(node, Template):
267 if node.name == 'Haltestelle':
269 public_transport_stops.append(ya)
271 z = node.get(1, None)
273 ya['municipality'] = str(z)
274 z = node.get(2, None)
276 ya['name_local'] = str(z)
277 za = str(node.get(3, '')).strip()
278 zb = str(node.get(4, '')).strip()
279 z = lonlat_ele_to_json(opt_lonlat_from_str(za), opt_uint_from_str(zb))
282 elif node.name in ["Fahrplan Abfahrtsmonitor VVT"]:
283 ya['monitor_template'] = template_to_json(node)
284 elif node.name in ["Fahrplan Hinfahrt VVT"]:
285 ya['route_arrival_template'] = template_to_json(node)
286 elif node.name in ["Fahrplan Rückfahrt VVT"]:
287 ya['route_departure_template'] = template_to_json(node)
288 elif node.name in ["Fahrplan Linie VVT"]:
290 public_transport_stops.append(ya)
293 'timetable_template': template_to_json(node),
295 public_transport_lines.append(y)
296 elif isinstance(node, ExternalLink):
297 public_transport_links.append(external_link_to_json(node))
299 public_transport_stops.append(ya)
300 if len(public_transport_stops) > 0:
301 sledrun_json['public_transport_stops'] = public_transport_stops
302 if len(public_transport_lines) > 0:
303 sledrun_json['public_transport_lines'] = public_transport_lines
304 if len(public_transport_links) > 0:
305 sledrun_json['public_transport_links'] = public_transport_links
309 car_section_list = wikicode.get_sections(levels=[2], matches='Anreise mit dem Auto')
310 if not car_section_list:
312 v = car_section_list[0]
314 description_nodes = dropwhile(lambda w: isinstance(w, Heading), v.nodes)
315 description_nodes = takewhile(lambda w: not (isinstance(w, Tag) and w.wiki_markup == '*'),
317 if description := str(Wikicode(list(description_nodes))).strip():
318 sledrun_json["car_description"] = description
321 for w in v.ifilter_templates(matches='Parkplatz'):
322 za = str(w.get(1, '')).strip()
323 zb = str(w.get(2, '')).strip()
324 z = lonlat_ele_to_json(opt_lonlat_from_str(za), opt_uint_from_str(zb))
326 x.append({'position': z})
328 sledrun_json['car_parking'] = x
331 for w in io.StringIO(str(v)):
332 match = re.match(r"\*\* von \'\'\'(.+)\'\'\'(.*): ([\d.,]+) km", w.rstrip())
334 ya, yb, yc = match.groups()
335 yc = float(yc.replace(',', '.'))
338 'route': (ya.strip() + ' ' + yb.strip()).strip(),
341 sledrun_json['car_distances'] = x
345 for v in wikicode.get_sections(levels=[2], matches='Allgemeines'):
346 def _nightlight(value: str) -> Optional[str]:
347 line_iter = io.StringIO(value)
348 line = next(line_iter, None)
349 while line is not None and not line.startswith("* '''Beleuchtung''':"):
350 line = next(line_iter, None)
353 line = line.replace("* '''Beleuchtung''':", "").strip()
357 w = _nightlight(str(v))
359 sledrun_json['nightlight_description'] = w
361 def _gastronomy(value: str):
363 line_iter = io.StringIO(value)
364 line = next(line_iter, None)
365 while line is not None and line.rstrip() != "* '''Hütten''':":
366 line = next(line_iter, None)
369 while line is not None:
370 line = next(line_iter, None)
372 if line.startswith('** '):
374 wiki = mwparserfromhell.parse(line)
375 wiki_link = next(wiki.ifilter_wikilinks(), None)
376 if isinstance(wiki_link, Wikilink):
377 g['wr_page'] = wikilink_to_json(wiki_link)
378 ext_link = next(wiki.ifilter_external_links(), None)
379 if isinstance(ext_link, ExternalLink):
380 g['weblink'] = external_link_to_json(ext_link)
381 remaining = str(Wikicode(n for n in wiki.nodes
382 if isinstance(n, (Text, Tag)) and str(n).strip() != '*')).strip()
383 match = re.match(r'\((.+)\)', remaining)
385 remaining = match.group(1)
386 if len(remaining) > 0:
387 g['note'] = remaining
392 w = _gastronomy(str(v))
394 sledrun_json['gastronomy'] = w
396 def _sled_rental_description():
397 line_iter = io.StringIO(str(v))
398 line = next(line_iter, None)
400 while line is not None and (match := re.match(r"\* '''Rodelverleih''':(.*)", line)) is None:
401 line = next(line_iter, None)
404 result = [match.group(1)]
405 line = next(line_iter, None)
406 while line is not None and re.match(r"\* ", line) is None:
408 line = next(line_iter, None)
409 sledrun_json['sled_rental_description'] = ''.join(result).strip()
410 _sled_rental_description()
415 if isinstance(w, Tag) and str(w) == "'''Siehe auch'''":
420 if isinstance(w, ExternalLink):
421 x.append(external_link_to_json(w))
422 elif isinstance(w, (Text, Tag)) and str(w).strip() in ['', '*', ':']:
428 sledrun_json['see_also'] = x
430 sledrun_json['allow_reports'] = True
433 sledrun_impressions_page = Page(self.site, self.current_page.title() + '/Impressionen')
434 if sledrun_impressions_page.exists():
435 impressions = sledrun_impressions_page.title()
437 text = create_sledrun_wiki(sledrun_json, map_json, impressions)
438 pywikibot.output(text)
439 pywikibot.output('\03{lightpurple}---\03{default}')
440 pywikibot.showDiff(self.current_page.text, text)
442 jsonschema.validate(instance=sledrun_json, schema=self.sledrun_schema)
443 sledrun_json_ordered = order_json_keys(sledrun_json, self.sledrun_schema)
444 assert sledrun_json_ordered == sledrun_json
445 sledrun_json_text = json.dumps(sledrun_json_ordered, ensure_ascii=False, indent=4)
446 summary = 'Rodelbahnbeschreibung konvertiert von Wikitext nach JSON.'
447 pywikibot.output('\03{lightpurple}---\03{default}')
448 pywikibot.output(sledrun_json_text)
449 pywikibot.output('\03{lightpurple}---\03{default}')
450 self.userPut(sledrun_json_page, sledrun_json_page.text, sledrun_json_text, summary=summary)
452 map_json_text = json.dumps(map_json, ensure_ascii=False, indent=4)
453 summary = 'Landkarte konvertiert von Wikitext nach JSON.'
454 self.userPut(map_json_page, map_json_page.text, map_json_text, summary=summary)
457 def main(*args: str) -> None:
458 local_args = pywikibot.handle_args(args)
459 gen_factory = pagegenerators.GeneratorFactory()
460 gen_factory.handle_args(local_args)
461 gen = gen_factory.getCombinedGenerator(preload=True)
463 bot = SledrunWikiTextToJsonBot(generator=gen)
466 pywikibot.bot.suggest_help(missing_generator=True)
469 if __name__ == '__main__':