3 User script for pywikibot (https://gerrit.wikimedia.org/r/pywikibot/core.git), tested with version 6.6.1.
4 Put it in directory scripts/userscripts.
6 Create a sledrun JSON page from a sledrun wikitext page (including map).
8 The following generators and filters are supported:
15 from itertools import takewhile, dropwhile
16 from typing import Optional
19 import mwparserfromhell
20 from mwparserfromhell.nodes.extras import Parameter
23 from mwparserfromhell.nodes import Tag, Text, ExternalLink, Template, Wikilink, Heading
24 from mwparserfromhell.wikicode import Wikicode
25 from pywikibot import pagegenerators, Page
26 from pywikibot.bot import (
27 AutomaticTWSummaryBot,
33 from pywikibot.logging import warning
34 from pywikibot.site._namespace import BuiltinNamespace
35 from wrpylib.json_tools import order_json_keys
37 from wrpylib.wrmwmarkup import create_sledrun_wiki, lonlat_to_json, lonlat_ele_to_json, parse_wrmap
38 from wrpylib.wrvalidators import rodelbahnbox_from_template, tristate_german_to_str, difficulty_german_to_str, \
39 avalanches_german_to_str, public_transport_german_to_str, opt_lonlat_from_str, \
41 from wrpylib.lib_sledrun_wikitext_to_json import optional_set, get_sledrun_description
43 docuReplacements = {'¶ms;': pagegenerators.parameterHelp}
46 def template_to_json(value: Template) -> dict:
48 for p in value.params:
49 parameter.append({'value': str(p)})
51 'name': str(value.name),
52 'parameter': parameter
56 def wikilink_to_json(value: Wikilink) -> dict:
57 wl = {'title': str(value.title)}
58 if value.text is not None:
59 wl['text'] = str(value.text)
63 def external_link_to_json(value: ExternalLink) -> dict:
64 link = {'url': str(value.url)}
65 if value.title is not None:
66 link['text'] = str(value.title)
70 class SledrunWikiTextToJsonBot(
75 AutomaticTWSummaryBot,
77 def setup(self) -> None:
78 schema = Page(self.site, 'Winterrodeln:Datenschema/Rodelbahn/V1.json')
79 assert schema.content_model == 'json'
80 self.sledrun_schema = json.loads(schema.text)
82 def treat_page(self) -> None:
83 """Load the given page, do some changes, and save it."""
84 wikitext_content_model = 'wikitext'
85 if self.current_page.content_model != wikitext_content_model:
86 warning(f"The content model of {self.current_page.title()} is {self.current_page.content_model} "
87 f"instead of {wikitext_content_model}.")
90 wikicode = mwparserfromhell.parse(self.current_page.text)
91 wikilink_list = wikicode.filter_wikilinks()
92 category_sledrun = 'Kategorie:Rodelbahn'
93 if sum(1 for c in wikilink_list if c.title == category_sledrun) == 0:
94 warning(f'The page {self.current_page.title()} does not have category {category_sledrun}.')
97 sledrun_json_page = Page(self.site, self.current_page.title() + '/Rodelbahn.json')
99 if sledrun_json_page.exists(): # should be an option
102 map_json_page = Page(self.site, self.current_page.title() + '/Landkarte.json')
105 v = wikicode.filter_tags(matches='wrmap')
107 map_json = parse_wrmap(str(v[0]))
110 "name": self.current_page.title(),
112 "entry_under_construction": sum(1 for c in wikilink_list if c.title == 'Kategorie:In Arbeit') > 0,
115 optional_set(sledrun_json, 'description', get_sledrun_description(wikicode))
117 rbb_list = wikicode.filter_templates(recursive=False, matches=lambda t: t.name.strip() == 'Rodelbahnbox')
118 if len(rbb_list) == 1:
119 rbb = rodelbahnbox_from_template(rbb_list[0])
122 image_page = Page(self.site, v, ns=BuiltinNamespace.FILE)
123 if not image_page.exists():
124 warning(f"{image_page.title()} does not exist.")
125 sledrun_json['image'] = v
127 optional_set(sledrun_json, 'length', rbb['Länge'])
129 v = rbb['Schwierigkeit']
131 sledrun_json['difficulty'] = difficulty_german_to_str(v)
135 sledrun_json['avalanches'] = avalanches_german_to_str(v)
137 v, w = rbb['Betreiber']
138 optional_set(sledrun_json, 'has_operator', v)
139 optional_set(sledrun_json, 'operator', w)
141 optional_set(sledrun_json, 'walkup_possible', rbb['Aufstieg möglich'])
143 v, w = rbb['Aufstieg getrennt']
145 sledrun_json['walkup_separate'] = tristate_german_to_str(v)
146 optional_set(sledrun_json, 'walkup_note', w)
148 optional_set(sledrun_json, 'walkup_time', rbb['Gehzeit'])
150 def _walkup_support():
151 walkup_support_rbb = rbb['Aufstiegshilfe']
152 if walkup_support_rbb is not None:
154 for walkup_support_type, note in walkup_support_rbb:
155 walkup_support = {'type': walkup_support_type}
156 optional_set(walkup_support, 'note', note)
157 walkup_supports.append(walkup_support)
158 sledrun_json['walkup_supports'] = walkup_supports
161 v, w = rbb['Beleuchtungsanlage']
163 sledrun_json['nightlight_possible'] = tristate_german_to_str(v)
164 optional_set(sledrun_json, 'nightlight_possible_note', w)
166 v, w = rbb['Beleuchtungstage']
167 optional_set(sledrun_json, 'nightlight_weekdays_count', v)
168 optional_set(sledrun_json, 'nightlight_weekdays_note', w)
171 v = rbb['Rodelverleih']
173 sledrun_json['sled_rental_direct'] = v != []
177 name_code = mwparserfromhell.parse(name)
178 wiki_link = next(name_code.ifilter_wikilinks(), None)
179 if isinstance(wiki_link, Wikilink):
180 x['wr_page'] = wikilink_to_json(wiki_link)
183 optional_set(x, 'note', note)
185 sledrun_json['sled_rental'] = w
189 v = rbb['Gütesiegel']
191 sledrun_json['cachet'] = len(v) > 0
194 optional_set(sledrun_json, 'show_in_overview', rbb['In Übersichtskarte'])
195 optional_set(sledrun_json, 'forum_id', rbb['Forumid'])
199 sledrun_json['position'] = lonlat_to_json(v)
201 v = lonlat_ele_to_json(rbb['Position oben'], rbb['Höhe oben'])
203 sledrun_json['top'] = v
205 v = lonlat_ele_to_json(rbb['Position unten'], rbb['Höhe unten'])
207 sledrun_json['bottom'] = v
209 v = rbb['Telefonauskunft']
211 sledrun_json['info_phone'] = [{'phone': p, 'name': n} for p, n in v]
213 v, w = rbb['Webauskunft']
216 sledrun_json['info_web'] = [{'url': w}]
218 sledrun_json['info_web'] = []
220 v = rbb['Öffentliche Anreise']
222 sledrun_json['public_transport'] = public_transport_german_to_str(v)
225 bb_iter = wikicode.ifilter_templates(recursive=False, matches=lambda t: t.name.strip() == 'Buttonleiste')
226 bb = next(bb_iter, None)
228 video = bb.get('video', None)
229 if isinstance(video, Parameter):
230 sledrun_json['videos'] = [{'url': str(video.value)}]
233 def _public_transport():
234 pt_sections = wikicode.get_sections(levels=[2], matches='Anreise mit öffentlichen Verkehrsmitteln',
235 include_headings=False)
236 if len(pt_sections) < 1:
239 node = next((node for node in pt.nodes if isinstance(node, Tag) and node.wiki_markup == '*'), None)
241 description = str(Wikicode(pt.nodes[:pt.nodes.index(node)])).strip()
243 sledrun_json["public_transport_description"] = str(description)
245 public_transport_stops = []
246 public_transport_lines = []
247 public_transport_links = []
249 for node in pt.nodes:
250 if isinstance(node, Template):
251 if node.name == 'Haltestelle':
253 public_transport_stops.append(ya)
255 z = node.get(1, None)
257 ya['municipality'] = str(z)
258 z = node.get(2, None)
260 ya['name_local'] = str(z)
261 za = str(node.get(3, '')).strip()
262 zb = str(node.get(4, '')).strip()
263 z = lonlat_ele_to_json(opt_lonlat_from_str(za), opt_uint_from_str(zb))
266 elif node.name in ["Fahrplan Abfahrtsmonitor VVT"]:
267 ya['monitor_template'] = template_to_json(node)
268 elif node.name in ["Fahrplan Hinfahrt VVT"]:
269 ya['route_arrival_template'] = template_to_json(node)
270 elif node.name in ["Fahrplan Rückfahrt VVT"]:
271 ya['route_departure_template'] = template_to_json(node)
272 elif node.name in ["Fahrplan Linie VVT"]:
274 public_transport_stops.append(ya)
277 'timetable_template': template_to_json(node),
279 public_transport_lines.append(y)
280 elif isinstance(node, ExternalLink):
281 public_transport_links.append(external_link_to_json(node))
283 public_transport_stops.append(ya)
284 if len(public_transport_stops) > 0:
285 sledrun_json['public_transport_stops'] = public_transport_stops
286 if len(public_transport_lines) > 0:
287 sledrun_json['public_transport_lines'] = public_transport_lines
288 if len(public_transport_links) > 0:
289 sledrun_json['public_transport_links'] = public_transport_links
293 car_section_list = wikicode.get_sections(levels=[2], matches='Anreise mit dem Auto')
294 if not car_section_list:
296 v = car_section_list[0]
298 description_nodes = dropwhile(lambda w: isinstance(w, Heading), v.nodes)
299 description_nodes = takewhile(lambda w: not (isinstance(w, Tag) and w.wiki_markup == '*'),
301 if description := str(Wikicode(list(description_nodes))).strip():
302 sledrun_json["car_description"] = description
305 for w in v.ifilter_templates(matches='Parkplatz'):
306 za = str(w.get(1, '')).strip()
307 zb = str(w.get(2, '')).strip()
308 z = lonlat_ele_to_json(opt_lonlat_from_str(za), opt_uint_from_str(zb))
310 x.append({'position': z})
312 sledrun_json['car_parking'] = x
315 for w in io.StringIO(str(v)):
316 match = re.match(r"\*\* von \'\'\'(.+)\'\'\'(.*): ([\d.,]+) km", w.rstrip())
318 ya, yb, yc = match.groups()
319 yc = float(yc.replace(',', '.'))
322 'route': (ya.strip() + ' ' + yb.strip()).strip(),
325 sledrun_json['car_distances'] = x
329 for v in wikicode.get_sections(levels=[2], matches='Allgemeines'):
330 def _nightlight(value: str) -> Optional[str]:
331 line_iter = io.StringIO(value)
332 line = next(line_iter, None)
333 while line is not None and not line.startswith("* '''Beleuchtung''':"):
334 line = next(line_iter, None)
337 line = line.replace("* '''Beleuchtung''':", "").strip()
341 optional_set(sledrun_json, 'nightlight_description', _nightlight(str(v)))
343 def _gastronomy(value: str):
345 line_iter = io.StringIO(value)
346 line = next(line_iter, None)
347 while line is not None and line.rstrip() != "* '''Hütten''':":
348 line = next(line_iter, None)
351 while line is not None:
352 line = next(line_iter, None)
354 if line.startswith('** '):
356 wiki = mwparserfromhell.parse(line)
357 wiki_link = next(wiki.ifilter_wikilinks(), None)
358 if isinstance(wiki_link, Wikilink):
359 g['wr_page'] = wikilink_to_json(wiki_link)
360 ext_link = next(wiki.ifilter_external_links(), None)
361 if isinstance(ext_link, ExternalLink):
362 g['weblink'] = external_link_to_json(ext_link)
363 remaining = str(Wikicode(n for n in wiki.nodes
364 if isinstance(n, (Text, Tag)) and str(n).strip() != '*')).strip()
365 match = re.match(r'(.*)\((.+)\)', remaining)
367 name, note = match.groups()
374 elif len(remaining) > 0:
375 g['name'] = remaining
381 w = _gastronomy(str(v))
383 sledrun_json['gastronomy'] = w
385 def _sled_rental_description():
386 line_iter = io.StringIO(str(v))
387 line = next(line_iter, None)
389 while line is not None and (match := re.match(r"\* '''Rodelverleih''':(.*)", line)) is None:
390 line = next(line_iter, None)
393 result = [match.group(1)]
394 line = next(line_iter, None)
395 while line is not None and re.match(r"\* ", line) is None:
397 line = next(line_iter, None)
398 sledrun_json['sled_rental_description'] = ''.join(result).strip()
399 _sled_rental_description()
404 if isinstance(w, Tag) and str(w) == "'''Siehe auch'''":
409 if isinstance(w, ExternalLink):
410 x.append(external_link_to_json(w))
411 elif isinstance(w, (Text, Tag)) and str(w).strip() in ['', '*', ':']:
417 sledrun_json['see_also'] = x
419 sledrun_json['allow_reports'] = True
422 sledrun_impressions_page = Page(self.site, self.current_page.title() + '/Impressionen')
423 if sledrun_impressions_page.exists():
424 impressions = sledrun_impressions_page.title()
426 text = create_sledrun_wiki(sledrun_json, map_json, impressions)
427 pywikibot.output(text)
428 pywikibot.output('\03{lightpurple}---\03{default}')
429 pywikibot.showDiff(self.current_page.text, text)
431 jsonschema.validate(instance=sledrun_json, schema=self.sledrun_schema)
432 sledrun_json_ordered = order_json_keys(sledrun_json, self.sledrun_schema)
433 assert sledrun_json_ordered == sledrun_json
434 sledrun_json_text = json.dumps(sledrun_json_ordered, ensure_ascii=False, indent=4)
435 summary = 'Rodelbahnbeschreibung konvertiert von Wikitext nach JSON.'
436 pywikibot.output('\03{lightpurple}---\03{default}')
437 pywikibot.output(sledrun_json_text)
438 pywikibot.output('\03{lightpurple}---\03{default}')
439 self.userPut(sledrun_json_page, sledrun_json_page.text, sledrun_json_text, summary=summary, contentmodel='json')
441 map_json_text = json.dumps(map_json, ensure_ascii=False, indent=4)
442 summary = 'Landkarte konvertiert von Wikitext nach JSON.'
443 self.userPut(map_json_page, map_json_page.text, map_json_text, summary=summary, contentmodel='json')
446 def main(*args: str) -> None:
447 local_args = pywikibot.handle_args(args)
448 gen_factory = pagegenerators.GeneratorFactory()
449 gen_factory.handle_args(local_args)
450 gen = gen_factory.getCombinedGenerator(preload=True)
452 bot = SledrunWikiTextToJsonBot(generator=gen)
455 pywikibot.bot.suggest_help(missing_generator=True)
458 if __name__ == '__main__':