]> ToastFreeware Gitweb - philipp/winterrodeln/wrpylib.git/blob - bots/sledrun_wikitext_to_json.py
dc5d06c9ec17dcbfbfd7ad98acda485d23a5574c
[philipp/winterrodeln/wrpylib.git] / bots / sledrun_wikitext_to_json.py
1 #!/usr/bin/python
2 """
3 User script for pywikibot (https://gerrit.wikimedia.org/r/pywikibot/core.git), tested with version 6.6.1.
4 Put it in directory scripts/userscripts.
5
6 Create a sledrun JSON page from a sledrun wikitext page (including map).
7
8 The following generators and filters are supported:
9
10 &params;
11 """
12 import io
13 import json
14 import re
15 from itertools import takewhile, dropwhile
16 from typing import Optional
17
18 import jsonschema
19 import mwparserfromhell
20 from mwparserfromhell.nodes.extras import Parameter
21
22 import pywikibot
23 from mwparserfromhell.nodes import Tag, Text, ExternalLink, Template, Wikilink, Heading
24 from mwparserfromhell.wikicode import Wikicode
25 from pywikibot import pagegenerators, Page
26 from pywikibot.bot import (
27     AutomaticTWSummaryBot,
28     ConfigParserBot,
29     ExistingPageBot,
30     NoRedirectPageBot,
31     SingleSiteBot,
32 )
33 from pywikibot.logging import warning
34 from pywikibot.site._namespace import BuiltinNamespace
35 from wrpylib.json_tools import order_json_keys
36
37 from wrpylib.wrmwmarkup import create_sledrun_wiki, lonlat_to_json, lonlat_ele_to_json, parse_wrmap
38 from wrpylib.wrvalidators import rodelbahnbox_from_template, tristate_german_to_str, difficulty_german_to_str, \
39     avalanches_german_to_str, public_transport_german_to_str, opt_lonlat_from_str, \
40     opt_uint_from_str
41 from wrpylib.lib_sledrun_wikitext_to_json import optional_set, get_sledrun_description
42
43 docuReplacements = {'&params;': pagegenerators.parameterHelp}
44
45
46 def template_to_json(value: Template) -> dict:
47     parameter = []
48     for p in value.params:
49         parameter.append({'value': str(p)})
50     return {
51         'name': str(value.name),
52         'parameter': parameter
53     }
54
55
56 def wikilink_to_json(value: Wikilink) -> dict:
57     wl = {'title': str(value.title)}
58     if value.text is not None:
59         wl['text'] = str(value.text)
60     return wl
61
62
63 def external_link_to_json(value: ExternalLink) -> dict:
64     link = {'url': str(value.url)}
65     if value.title is not None:
66         link['text'] = str(value.title)
67     return link
68
69
70 class SledrunWikiTextToJsonBot(
71     SingleSiteBot,
72     ConfigParserBot,
73     ExistingPageBot,
74     NoRedirectPageBot,
75     AutomaticTWSummaryBot,
76 ):
77     def setup(self) -> None:
78         schema = Page(self.site, 'Winterrodeln:Datenschema/Rodelbahn/V1.json')
79         assert schema.content_model == 'json'
80         self.sledrun_schema = json.loads(schema.text)
81
82     def treat_page(self) -> None:
83         """Load the given page, do some changes, and save it."""
84         wikitext_content_model = 'wikitext'
85         if self.current_page.content_model != wikitext_content_model:
86             warning(f"The content model of {self.current_page.title()} is {self.current_page.content_model} "
87                     f"instead of {wikitext_content_model}.")
88             return
89
90         wikicode = mwparserfromhell.parse(self.current_page.text)
91         wikilink_list = wikicode.filter_wikilinks()
92         category_sledrun = 'Kategorie:Rodelbahn'
93         if sum(1 for c in wikilink_list if c.title == category_sledrun) == 0:
94             warning(f'The page {self.current_page.title()} does not have category {category_sledrun}.')
95             return
96
97         sledrun_json_page = Page(self.site, self.current_page.title() + '/Rodelbahn.json')
98
99         if sledrun_json_page.exists():  # should be an option
100             return
101
102         map_json_page = Page(self.site, self.current_page.title() + '/Landkarte.json')
103
104         map_json = None
105         v = wikicode.filter_tags(matches='wrmap')
106         if len(v) > 0:
107             map_json = parse_wrmap(str(v[0]))
108
109         sledrun_json = {
110             "name": self.current_page.title(),
111             "aliases": [],
112             "entry_under_construction": sum(1 for c in wikilink_list if c.title == 'Kategorie:In Arbeit') > 0,
113         }
114
115         optional_set(sledrun_json, 'description', get_sledrun_description(wikicode))
116
117         rbb_list = wikicode.filter_templates(recursive=False, matches=lambda t: t.name.strip() == 'Rodelbahnbox')
118         if len(rbb_list) == 1:
119             rbb = rodelbahnbox_from_template(rbb_list[0])
120             v = rbb['Bild']
121             if v is not None:
122                 image_page = Page(self.site, v, ns=BuiltinNamespace.FILE)
123                 if not image_page.exists():
124                     warning(f"{image_page.title()} does not exist.")
125                 sledrun_json['image'] = v
126
127             optional_set(sledrun_json, 'length', rbb['Länge'])
128
129             v = rbb['Schwierigkeit']
130             if v is not None:
131                 sledrun_json['difficulty'] = difficulty_german_to_str(v)
132
133             v = rbb['Lawinen']
134             if v is not None:
135                 sledrun_json['avalanches'] = avalanches_german_to_str(v)
136
137             v, w = rbb['Betreiber']
138             optional_set(sledrun_json, 'has_operator', v)
139             optional_set(sledrun_json, 'operator', w)
140
141             optional_set(sledrun_json, 'walkup_possible', rbb['Aufstieg möglich'])
142
143             v, w = rbb['Aufstieg getrennt']
144             if v is not None:
145                 sledrun_json['walkup_separate'] = tristate_german_to_str(v)
146             optional_set(sledrun_json, 'walkup_note', w)
147
148             optional_set(sledrun_json, 'walkup_time', rbb['Gehzeit'])
149
150             def _walkup_support():
151                 walkup_support_rbb = rbb['Aufstiegshilfe']
152                 if walkup_support_rbb is not None:
153                     walkup_supports = []
154                     for walkup_support_type, note in walkup_support_rbb:
155                         walkup_support = {'type': walkup_support_type}
156                         optional_set(walkup_support, 'note', note)
157                         walkup_supports.append(walkup_support)
158                     sledrun_json['walkup_supports'] = walkup_supports
159             _walkup_support()
160
161             v, w = rbb['Beleuchtungsanlage']
162             if v is not None:
163                 sledrun_json['nightlight_possible'] = tristate_german_to_str(v)
164             optional_set(sledrun_json, 'nightlight_possible_note', w)
165
166             v, w = rbb['Beleuchtungstage']
167             optional_set(sledrun_json, 'nightlight_weekdays_count', v)
168             optional_set(sledrun_json, 'nightlight_weekdays_note', w)
169
170             def _sled_rental():
171                 v = rbb['Rodelverleih']
172                 if v is not None:
173                     sledrun_json['sled_rental_direct'] = v != []
174                     w = []
175                     for name, note in v:
176                         x = {}
177                         name_code = mwparserfromhell.parse(name)
178                         wiki_link = next(name_code.ifilter_wikilinks(), None)
179                         if isinstance(wiki_link, Wikilink):
180                             x['wr_page'] = wikilink_to_json(wiki_link)
181                         else:
182                             x['name'] = name
183                         optional_set(x, 'note', note)
184                         w.append(x)
185                     sledrun_json['sled_rental'] = w
186             _sled_rental()
187
188             def _cachet():
189                 v = rbb['Gütesiegel']
190                 if v is not None:
191                     sledrun_json['cachet'] = len(v) > 0
192             _cachet()
193
194             optional_set(sledrun_json, 'show_in_overview', rbb['In Übersichtskarte'])
195             optional_set(sledrun_json, 'forum_id', rbb['Forumid'])
196
197             v = rbb['Position']
198             if v is not None:
199                 sledrun_json['position'] = lonlat_to_json(v)
200
201             v = lonlat_ele_to_json(rbb['Position oben'], rbb['Höhe oben'])
202             if v != {}:
203                 sledrun_json['top'] = v
204
205             v = lonlat_ele_to_json(rbb['Position unten'], rbb['Höhe unten'])
206             if v != {}:
207                 sledrun_json['bottom'] = v
208
209             v = rbb['Telefonauskunft']
210             if v is not None:
211                 sledrun_json['info_phone'] = [{'phone': p, 'name': n} for p, n in v]
212
213             v, w = rbb['Webauskunft']
214             if v is not None:
215                 if v:
216                     sledrun_json['info_web'] = [{'url': w}]
217                 else:
218                     sledrun_json['info_web'] = []
219
220             v = rbb['Öffentliche Anreise']
221             if v is not None:
222                 sledrun_json['public_transport'] = public_transport_german_to_str(v)
223
224         def _button_bar():
225             bb_iter = wikicode.ifilter_templates(recursive=False, matches=lambda t: t.name.strip() == 'Buttonleiste')
226             bb = next(bb_iter, None)
227             if bb is not None:
228                 video = bb.get('video', None)
229                 if isinstance(video, Parameter) and video.value.strip() != "":
230                     sledrun_json['videos'] = [{'url': str(video.value.strip())}]
231                 correction = bb.get('Korrektur_To', None)
232                 if isinstance(correction, Parameter) and correction.value.strip() != "":
233                     sledrun_json['correction_email'] = correction.value.strip()
234         _button_bar()
235
236         def _public_transport():
237             pt_sections = wikicode.get_sections(levels=[2], matches='Anreise mit öffentlichen Verkehrsmitteln',
238                                                 include_headings=False)
239             if len(pt_sections) < 1:
240                 return
241             pt = pt_sections[0]
242             node = next((node for node in pt.nodes if isinstance(node, Tag) and node.wiki_markup == '*'), None)
243             if node is not None:
244                 description = str(Wikicode(pt.nodes[:pt.nodes.index(node)])).strip()
245                 if description and not description.startswith("Hier wird beschrieben werden, wie und wie gut man die "
246                                                               "Rodelbahn mit öffentlichen Verkehrsmitteln erreicht."):
247                     sledrun_json["public_transport_description"] = str(description)
248
249             public_transport_stops = []
250             public_transport_lines = []
251             public_transport_links = []
252             ya = None
253             for node in pt.nodes:
254                 if isinstance(node, Template):
255                     if node.name == 'Haltestelle':
256                         if ya is not None:
257                             public_transport_stops.append(ya)
258                         if len([1 for p in node.params if len(p.strip()) != 0]) == 0:
259                             continue
260                         ya = {}
261                         z = node.get(1, None)
262                         if z is not None:
263                             ya['municipality'] = str(z)
264                         z = node.get(2, None)
265                         if z is not None:
266                             ya['name_local'] = str(z)
267                         za = str(node.get(3, '')).strip()
268                         zb = str(node.get(4, '')).strip()
269                         z = lonlat_ele_to_json(opt_lonlat_from_str(za), opt_uint_from_str(zb))
270                         if len(z) > 0:
271                             ya['position'] = z
272                     elif node.name in ["Fahrplan Abfahrtsmonitor VVT", "Fahrplan Abfahrtsmonitor VVV"]:
273                         ya['monitor_template'] = template_to_json(node)
274                     elif node.name in ["Fahrplan Hinfahrt VVT", "Fahrplan Hinfahrt VVV"]:
275                         ya['route_arrival_template'] = template_to_json(node)
276                     elif node.name in ["Fahrplan Rückfahrt VVT", "Fahrplan Rückfahrt VVV"]:
277                         ya['route_departure_template'] = template_to_json(node)
278                     elif node.name in ["Fahrplan Linie VVT", "Fahrplan Linie VVV"]:
279                         if ya is not None:
280                             public_transport_stops.append(ya)
281                             ya = None
282                         y = {
283                             'timetable_template': template_to_json(node),
284                         }
285                         public_transport_lines.append(y)
286                 elif isinstance(node, ExternalLink):
287                     public_transport_links.append(external_link_to_json(node))
288             if ya is not None:
289                 public_transport_stops.append(ya)
290             if len(public_transport_stops) > 0:
291                 sledrun_json['public_transport_stops'] = public_transport_stops
292             if len(public_transport_lines) > 0:
293                 sledrun_json['public_transport_lines'] = public_transport_lines
294             if len(public_transport_links) > 0:
295                 sledrun_json['public_transport_links'] = public_transport_links
296         _public_transport()
297
298         def _car():
299             car_section_list = wikicode.get_sections(levels=[2], matches='Anreise mit dem Auto')
300             if not car_section_list:
301                 return
302             v = car_section_list[0]
303
304             description_nodes = dropwhile(lambda w: isinstance(w, Heading), v.nodes)
305             description_nodes = takewhile(lambda w: not (isinstance(w, Tag) and w.wiki_markup == '*'),
306                                           description_nodes)
307             if description := str(Wikicode(list(description_nodes))).strip():
308                 if not description.startswith("Hier wollen wir Besonderheiten beschreiben, die es zu beachten gibt, "
309                                               "wenn man mit dem Auto zur Rodelbahn anreist."):
310                     sledrun_json["car_description"] = description
311
312             x = []
313             for w in v.ifilter_templates(matches='Parkplatz'):
314                 za = str(w.get(1, '')).strip()
315                 zb = str(w.get(2, '')).strip()
316                 z = lonlat_ele_to_json(opt_lonlat_from_str(za), opt_uint_from_str(zb))
317                 if len(z) > 0:
318                     x.append({'position': z})
319             if len(x) > 0:
320                 sledrun_json['car_parking'] = x
321
322             x = []
323             for w in io.StringIO(str(v)):
324                 match = re.match(r"\*\* von \'\'\'(.+)\'\'\'(.*): ([\d.,]+) km", w.rstrip())
325                 if match:
326                     ya, yb, yc = match.groups()
327                     yc = float(yc.replace(',', '.'))
328                     x.append({
329                         'km': yc,
330                         'route': (ya.strip() + ' ' + yb.strip()).strip(),
331                     })
332             if len(x) > 0:
333                 sledrun_json['car_distances'] = x
334         _car()
335
336         x = []
337         for v in wikicode.get_sections(levels=[2], matches='Allgemeines'):
338             def _nightlight(value: str) -> Optional[str]:
339                 line_iter = io.StringIO(value)
340                 line = next(line_iter, None)
341                 while line is not None and not line.startswith("* '''Beleuchtung''':"):
342                     line = next(line_iter, None)
343                 if line is None:
344                     return None
345                 line = line.replace("* '''Beleuchtung''':", "").strip()
346                 if len(line) > 0:
347                     return line
348                 return None
349             optional_set(sledrun_json, 'nightlight_description', _nightlight(str(v)))
350
351             def _gastronomy(value: str):
352                 gastronomy = []
353                 line_iter = io.StringIO(value)
354                 line = next(line_iter, None)
355                 while line is not None and line.rstrip() != "* '''Hütten''':":
356                     line = next(line_iter, None)
357                 if line is None:
358                     return gastronomy
359                 while line is not None:
360                     line = next(line_iter, None)
361                     if line is not None:
362                         if line.startswith('** '):
363                             g = {}
364                             wiki = mwparserfromhell.parse(line)
365                             wiki_link = next(wiki.ifilter_wikilinks(), None)
366                             if isinstance(wiki_link, Wikilink):
367                                 g['wr_page'] = wikilink_to_json(wiki_link)
368                             ext_link = next(wiki.ifilter_external_links(), None)
369                             if isinstance(ext_link, ExternalLink):
370                                 g['weblink'] = external_link_to_json(ext_link)
371                             remaining = str(Wikicode(n for n in wiki.nodes
372                                                      if isinstance(n, (Text, Tag)) and str(n).strip() != '*')).strip()
373                             match = re.match(r'(.*)\((.+)\)', remaining)
374                             if match:
375                                 name, note = match.groups()
376                                 name = name.strip()
377                                 note = note.strip()
378                                 if len(name) > 0:
379                                     g['name'] = name
380                                 if len(note) > 0:
381                                     g['note'] = note
382                             elif len(remaining) > 0 and remaining != '...':
383                                 g['name'] = remaining
384                             if len(gastronomy) != 0:
385                                 gastronomy.append(g)
386                         else:
387                             break
388                 return gastronomy
389
390             w = _gastronomy(str(v))
391             if len(w) > 0:
392                 sledrun_json['gastronomy'] = w
393
394             def _sled_rental_description():
395                 line_iter = io.StringIO(str(v))
396                 line = next(line_iter, None)
397                 match = None
398                 while line is not None and (match := re.match(r"\* '''Rodelverleih''':(.*)", line)) is None:
399                     line = next(line_iter, None)
400                 if match is None:
401                     return
402                 result = [match.group(1)]
403                 line = next(line_iter, None)
404                 while line is not None and re.match(r"\* ", line) is None:
405                     result.append(line)
406                     line = next(line_iter, None)
407                 description = ''.join(result).strip()
408                 if len(description) > 0:
409                     sledrun_json['sled_rental_description'] = description
410             _sled_rental_description()
411
412             i = iter(v.nodes)
413             w = next(i, None)
414             while w is not None:
415                 if isinstance(w, Tag) and str(w) == "'''Siehe auch'''":
416                     w = next(i, None)
417                     break
418                 w = next(i, None)
419             while w is not None:
420                 if isinstance(w, ExternalLink):
421                     x.append(external_link_to_json(w))
422                 elif isinstance(w, (Text, Tag)) and str(w).strip() in ['', '*', ':']:
423                     pass
424                 else:
425                     break
426                 w = next(i, None)
427         if len(x) > 0:
428             sledrun_json['see_also'] = x
429
430         sledrun_json['allow_reports'] = True
431
432         def _tiroler_naturrodelbahn_guetesiegel():
433             for gst in wikicode.filter_templates():
434                 if gst.name.strip() != 'Tiroler Naturrodelbahn Gütesiegel':
435                     continue
436                 gsj = {}
437                 keys = {
438                     'Anlagename': 'name',
439                     'Organisation': 'organization',
440                     'Erstverleihung': 'first_issued',
441                     'Verlängerung': 'valid_from',
442                     'Forum': 'forum_id',
443                     'Thread': 'thread_id',
444                 }
445                 numeric = ['first_issued', 'valid_from', 'forum_id', 'thread_id']
446                 for key, value in keys.items():
447                     if gst.has(key):
448                         v = gst.get(key).value.strip()
449                         if v != '':
450                             if value in numeric:
451                                 v = int(v)
452                             gsj[value] = v
453                 if len(gsj) > 0:
454                     sledrun_json['tiroler_naturrodelbahn_gütesiegel'] = gsj
455         _tiroler_naturrodelbahn_guetesiegel()
456
457         impressions = None
458         sledrun_impressions_page = Page(self.site, self.current_page.title() + '/Impressionen')
459         if sledrun_impressions_page.exists():
460             impressions = sledrun_impressions_page.title()
461
462         text = create_sledrun_wiki(sledrun_json, map_json, impressions)
463         pywikibot.output(text)
464         pywikibot.output('\03{lightpurple}---\03{default}')
465         pywikibot.showDiff(self.current_page.text, text)
466
467         jsonschema.validate(instance=sledrun_json, schema=self.sledrun_schema)
468         sledrun_json_ordered = order_json_keys(sledrun_json, self.sledrun_schema)
469         assert sledrun_json_ordered == sledrun_json
470         sledrun_json_text = json.dumps(sledrun_json_ordered, ensure_ascii=False, indent=4)
471         summary = 'Rodelbahnbeschreibung konvertiert von Wikitext nach JSON.'
472         pywikibot.output('\03{lightpurple}---\03{default}')
473         pywikibot.output(sledrun_json_text)
474         pywikibot.output('\03{lightpurple}---\03{default}')
475         self.userPut(sledrun_json_page, sledrun_json_page.text, sledrun_json_text, summary=summary, contentmodel='json')
476
477         if map_json is not None:
478             map_json_text = json.dumps(map_json, ensure_ascii=False, indent=4)
479             summary = 'Landkarte konvertiert von Wikitext nach JSON.'
480             self.userPut(map_json_page, map_json_page.text, map_json_text, summary=summary, contentmodel='json')
481
482
483 def main(*args: str) -> None:
484     local_args = pywikibot.handle_args(args)
485     gen_factory = pagegenerators.GeneratorFactory()
486     gen_factory.handle_args(local_args)
487     gen = gen_factory.getCombinedGenerator(preload=True)
488     if gen:
489         bot = SledrunWikiTextToJsonBot(generator=gen)
490         bot.run()
491     else:
492         pywikibot.bot.suggest_help(missing_generator=True)
493
494
495 if __name__ == '__main__':
496     main()