]> ToastFreeware Gitweb - philipp/winterrodeln/wrpylib.git/blob - bots/sledrun_wikitext_to_json.py
9ce3b20ef7a14615ad4a40d8ef088b28ee600aa6
[philipp/winterrodeln/wrpylib.git] / bots / sledrun_wikitext_to_json.py
1 #!/usr/bin/python
2 """
3 User script for pywikibot (https://gerrit.wikimedia.org/r/pywikibot/core.git), tested with version 6.6.1.
4 Put it in directory scripts/userscripts.
5
6 Create a sledrun JSON page from a sledrun wikitext page (including map).
7
8 The following generators and filters are supported:
9
10 &params;
11 """
12 import io
13 import json
14 import re
15 from itertools import takewhile, dropwhile
16 from typing import Optional
17
18 import jsonschema
19 import mwparserfromhell
20 from mwparserfromhell.nodes.extras import Parameter
21
22 import pywikibot
23 from mwparserfromhell.nodes import Tag, Text, ExternalLink, Template, Wikilink, Heading
24 from mwparserfromhell.wikicode import Wikicode
25 from pywikibot import pagegenerators, Page
26 from pywikibot.bot import (
27     AutomaticTWSummaryBot,
28     ConfigParserBot,
29     ExistingPageBot,
30     NoRedirectPageBot,
31     SingleSiteBot,
32 )
33 from pywikibot.logging import warning
34 from pywikibot.site._namespace import BuiltinNamespace
35 from wrpylib.json_tools import order_json_keys
36
37 from wrpylib.wrmwmarkup import create_sledrun_wiki, lonlat_to_json, lonlat_ele_to_json, parse_wrmap
38 from wrpylib.wrvalidators import rodelbahnbox_from_template, tristate_german_to_str, difficulty_german_to_str, \
39     avalanches_german_to_str, public_transport_german_to_str, opt_lonlat_from_str, \
40     opt_uint_from_str
41 from wrpylib.lib_sledrun_wikitext_to_json import optional_set, get_sledrun_description, wikilink_to_json, \
42     template_to_json, external_link_to_json
43
44 docuReplacements = {'&params;': pagegenerators.parameterHelp}
45
46
47 class SledrunWikiTextToJsonBot(
48     SingleSiteBot,
49     ConfigParserBot,
50     ExistingPageBot,
51     NoRedirectPageBot,
52     AutomaticTWSummaryBot,
53 ):
54     def setup(self) -> None:
55         schema = Page(self.site, 'Winterrodeln:Datenschema/Rodelbahn/V1.json')
56         assert schema.content_model == 'json'
57         self.sledrun_schema = json.loads(schema.text)
58
59     def treat_page(self) -> None:
60         """Load the given page, do some changes, and save it."""
61         wikitext_content_model = 'wikitext'
62         if self.current_page.content_model != wikitext_content_model:
63             warning(f"The content model of {self.current_page.title()} is {self.current_page.content_model} "
64                     f"instead of {wikitext_content_model}.")
65             return
66
67         wikicode = mwparserfromhell.parse(self.current_page.text)
68         wikilink_list = wikicode.filter_wikilinks()
69         category_sledrun = 'Kategorie:Rodelbahn'
70         if sum(1 for c in wikilink_list if c.title == category_sledrun) == 0:
71             warning(f'The page {self.current_page.title()} does not have category {category_sledrun}.')
72             return
73
74         sledrun_json_page = Page(self.site, self.current_page.title() + '/Rodelbahn.json')
75
76         map_json_page = Page(self.site, self.current_page.title() + '/Landkarte.json')
77
78         if sledrun_json_page.exists() and map_json_page.exists():  # should be an option
79             return
80
81         map_json = None
82         v = wikicode.filter_tags(matches='wrmap')
83         if len(v) > 0:
84             map_json = parse_wrmap(str(v[0]))
85
86         sledrun_json = {
87             "name": self.current_page.title(),
88             "aliases": [],
89             "entry_under_construction": sum(1 for c in wikilink_list if c.title == 'Kategorie:In Arbeit') > 0,
90         }
91
92         optional_set(sledrun_json, 'description', get_sledrun_description(wikicode))
93
94         rbb_list = wikicode.filter_templates(recursive=False, matches=lambda t: t.name.strip() == 'Rodelbahnbox')
95         if len(rbb_list) == 1:
96             rbb = rodelbahnbox_from_template(rbb_list[0])
97             v = rbb['Bild']
98             if v is not None:
99                 image_page = Page(self.site, v, ns=BuiltinNamespace.FILE)
100                 if not image_page.exists():
101                     warning(f"{image_page.title()} does not exist.")
102                 sledrun_json['image'] = v
103
104             optional_set(sledrun_json, 'length', rbb['Länge'])
105
106             v = rbb['Schwierigkeit']
107             if v is not None:
108                 sledrun_json['difficulty'] = difficulty_german_to_str(v)
109
110             v = rbb['Lawinen']
111             if v is not None:
112                 sledrun_json['avalanches'] = avalanches_german_to_str(v)
113
114             v, w = rbb['Betreiber']
115             optional_set(sledrun_json, 'has_operator', v)
116             optional_set(sledrun_json, 'operator', w)
117
118             optional_set(sledrun_json, 'walkup_possible', rbb['Aufstieg möglich'])
119
120             v, w = rbb['Aufstieg getrennt']
121             if v is not None:
122                 sledrun_json['walkup_separate'] = tristate_german_to_str(v)
123             optional_set(sledrun_json, 'walkup_note', w)
124
125             optional_set(sledrun_json, 'walkup_time', rbb['Gehzeit'])
126
127             def _walkup_support():
128                 walkup_support_rbb = rbb['Aufstiegshilfe']
129                 if walkup_support_rbb is not None:
130                     walkup_supports = []
131                     for walkup_support_type, note in walkup_support_rbb:
132                         walkup_support = {'type': walkup_support_type}
133                         optional_set(walkup_support, 'note', note)
134                         walkup_supports.append(walkup_support)
135                     sledrun_json['walkup_supports'] = walkup_supports
136             _walkup_support()
137
138             v, w = rbb['Beleuchtungsanlage']
139             if v is not None:
140                 sledrun_json['nightlight_possible'] = tristate_german_to_str(v)
141             optional_set(sledrun_json, 'nightlight_possible_note', w)
142
143             v, w = rbb['Beleuchtungstage']
144             optional_set(sledrun_json, 'nightlight_weekdays_count', v)
145             optional_set(sledrun_json, 'nightlight_weekdays_note', w)
146
147             def _sled_rental():
148                 v = rbb['Rodelverleih']
149                 if v is not None:
150                     sledrun_json['sled_rental_direct'] = v != []
151                     w = []
152                     for name, note in v:
153                         x = {}
154                         name_code = mwparserfromhell.parse(name)
155                         wiki_link = next(name_code.ifilter_wikilinks(), None)
156                         if isinstance(wiki_link, Wikilink):
157                             x['wr_page'] = wikilink_to_json(wiki_link)
158                         else:
159                             x['name'] = name
160                         optional_set(x, 'note', note)
161                         w.append(x)
162                     sledrun_json['sled_rental'] = w
163             _sled_rental()
164
165             def _cachet():
166                 v = rbb['Gütesiegel']
167                 if v is not None:
168                     sledrun_json['cachet'] = len(v) > 0
169             _cachet()
170
171             optional_set(sledrun_json, 'show_in_overview', rbb['In Übersichtskarte'])
172             optional_set(sledrun_json, 'forum_id', rbb['Forumid'])
173
174             v = rbb['Position']
175             if v is not None:
176                 sledrun_json['position'] = lonlat_to_json(v)
177
178             v = lonlat_ele_to_json(rbb['Position oben'], rbb['Höhe oben'])
179             if v != {}:
180                 sledrun_json['top'] = v
181
182             v = lonlat_ele_to_json(rbb['Position unten'], rbb['Höhe unten'])
183             if v != {}:
184                 sledrun_json['bottom'] = v
185
186             v = rbb['Telefonauskunft']
187             if v is not None:
188                 sledrun_json['info_phone'] = [{'phone': p, 'name': n} for p, n in v]
189
190             v, w = rbb['Webauskunft']
191             if v is not None:
192                 if v:
193                     sledrun_json['info_web'] = [{'url': w}]
194                 else:
195                     sledrun_json['info_web'] = []
196
197             v = rbb['Öffentliche Anreise']
198             if v is not None:
199                 sledrun_json['public_transport'] = public_transport_german_to_str(v)
200
201         def _button_bar():
202             bb_iter = wikicode.ifilter_templates(recursive=False, matches=lambda t: t.name.strip() == 'Buttonleiste')
203             bb = next(bb_iter, None)
204             if bb is not None:
205                 video = bb.get('video', None)
206                 if isinstance(video, Parameter) and video.value.strip() != "":
207                     sledrun_json['videos'] = [{'url': str(video.value.strip())}]
208                 webcam = bb.get('webcam', None)
209                 if isinstance(webcam, Parameter) and webcam.value.strip() != "":
210                     sledrun_json['webcams'] = [{'url': str(webcam.value.strip())}]
211                 correction = bb.get('Korrektur_To', None)
212                 if isinstance(correction, Parameter) and correction.value.strip() != "":
213                     sledrun_json['correction_email'] = correction.value.strip()
214         _button_bar()
215
216         def _public_transport():
217             pt_sections = wikicode.get_sections(levels=[2], matches='Anreise mit öffentlichen Verkehrsmitteln',
218                                                 include_headings=False)
219             if len(pt_sections) < 1:
220                 return
221             pt = pt_sections[0]
222             node = next((node for node in pt.nodes if isinstance(node, Tag) and node.wiki_markup == '*'), None)
223             if node is not None:
224                 description = str(Wikicode(pt.nodes[:pt.nodes.index(node)])).strip()
225                 if description and not description.startswith("Hier wird beschrieben werden, wie und wie gut man die "
226                                                               "Rodelbahn mit öffentlichen Verkehrsmitteln erreicht."):
227                     sledrun_json["public_transport_description"] = str(description)
228
229             public_transport_stops = []
230             public_transport_lines = []
231             public_transport_links = []
232             ya = None
233             for node in pt.nodes:
234                 if isinstance(node, Template):
235                     if node.name == 'Haltestelle':
236                         if ya is not None:
237                             public_transport_stops.append(ya)
238                         if len([1 for p in node.params if len(p.strip()) != 0]) == 0:
239                             continue
240                         ya = {}
241                         z = node.get(1, None)
242                         if z is not None:
243                             ya['municipality'] = str(z)
244                         z = node.get(2, None)
245                         if z is not None:
246                             ya['name_local'] = str(z)
247                         za = str(node.get(3, '')).strip()
248                         zb = str(node.get(4, '')).strip()
249                         z = lonlat_ele_to_json(opt_lonlat_from_str(za), opt_uint_from_str(zb))
250                         if len(z) > 0:
251                             ya['position'] = z
252                     elif node.name in ["Fahrplan Abfahrtsmonitor VVT", "Fahrplan Abfahrtsmonitor VVV"]:
253                         ya['monitor_template'] = template_to_json(node)
254                     elif node.name in ["Fahrplan Hinfahrt VVT", "Fahrplan Hinfahrt VVV"]:
255                         ya['route_arrival_template'] = template_to_json(node)
256                     elif node.name in ["Fahrplan Rückfahrt VVT", "Fahrplan Rückfahrt VVV"]:
257                         ya['route_departure_template'] = template_to_json(node)
258                     elif node.name in ["Fahrplan Linie VVT", "Fahrplan Linie VVV"]:
259                         if ya is not None:
260                             public_transport_stops.append(ya)
261                             ya = None
262                         y = {
263                             'timetable_template': template_to_json(node),
264                         }
265                         public_transport_lines.append(y)
266                 elif isinstance(node, ExternalLink):
267                     public_transport_links.append(external_link_to_json(node))
268             if ya is not None:
269                 public_transport_stops.append(ya)
270             if len(public_transport_stops) > 0:
271                 sledrun_json['public_transport_stops'] = public_transport_stops
272             if len(public_transport_lines) > 0:
273                 sledrun_json['public_transport_lines'] = public_transport_lines
274             if len(public_transport_links) > 0:
275                 sledrun_json['public_transport_links'] = public_transport_links
276         _public_transport()
277
278         def _car():
279             car_section_list = wikicode.get_sections(levels=[2], matches='Anreise mit dem Auto')
280             if not car_section_list:
281                 return
282             v = car_section_list[0]
283
284             description_nodes = dropwhile(lambda w: isinstance(w, Heading), v.nodes)
285             description_nodes = takewhile(lambda w: not (isinstance(w, Tag) and w.wiki_markup == '*'),
286                                           description_nodes)
287             if description := str(Wikicode(list(description_nodes))).strip():
288                 if not description.startswith("Hier wollen wir Besonderheiten beschreiben, die es zu beachten gibt, "
289                                               "wenn man mit dem Auto zur Rodelbahn anreist."):
290                     sledrun_json["car_description"] = description
291
292             x = []
293             for w in v.ifilter_templates(matches='Parkplatz'):
294                 za = str(w.get(1, '')).strip()
295                 zb = str(w.get(2, '')).strip()
296                 z = lonlat_ele_to_json(opt_lonlat_from_str(za), opt_uint_from_str(zb))
297                 if len(z) > 0:
298                     x.append({'position': z})
299             if len(x) > 0:
300                 sledrun_json['car_parking'] = x
301
302             x = []
303             for w in io.StringIO(str(v)):
304                 match = re.match(r"\*\* von \'\'\'(.+)\'\'\'(.*): ([\d.,]+) km", w.rstrip())
305                 if match:
306                     ya, yb, yc = match.groups()
307                     yc = float(yc.replace(',', '.'))
308                     x.append({
309                         'km': yc,
310                         'route': (ya.strip() + ' ' + yb.strip()).strip(),
311                     })
312             if len(x) > 0:
313                 sledrun_json['car_distances'] = x
314         _car()
315
316         x = []
317         for v in wikicode.get_sections(levels=[2], matches='Allgemeines'):
318             def _nightlight(value: str) -> Optional[str]:
319                 line_iter = io.StringIO(value)
320                 line = next(line_iter, None)
321                 while line is not None and not line.startswith("* '''Beleuchtung''':"):
322                     line = next(line_iter, None)
323                 if line is None:
324                     return None
325                 line = line.replace("* '''Beleuchtung''':", "").strip()
326                 if len(line) > 0:
327                     return line
328                 return None
329             optional_set(sledrun_json, 'nightlight_description', _nightlight(str(v)))
330
331             def _gastronomy(value: str):
332                 gastronomy = []
333                 line_iter = io.StringIO(value)
334                 line = next(line_iter, None)
335                 while line is not None and line.rstrip() != "* '''Hütten''':":
336                     line = next(line_iter, None)
337                 if line is None:
338                     return gastronomy
339                 while line is not None:
340                     line = next(line_iter, None)
341                     if line is not None:
342                         if line.startswith('** '):
343                             g = {}
344                             wiki = mwparserfromhell.parse(line)
345                             wiki_link = next(wiki.ifilter_wikilinks(), None)
346                             if isinstance(wiki_link, Wikilink):
347                                 g['wr_page'] = wikilink_to_json(wiki_link)
348                             ext_link = next(wiki.ifilter_external_links(), None)
349                             if isinstance(ext_link, ExternalLink):
350                                 g['weblink'] = external_link_to_json(ext_link)
351                             remaining = str(Wikicode(n for n in wiki.nodes
352                                                      if isinstance(n, (Text, Tag)) and str(n).strip() != '*')).strip()
353                             match = re.match(r'(.*)\((.+)\)', remaining)
354                             if match:
355                                 name, note = match.groups()
356                                 name = name.strip()
357                                 note = note.strip()
358                                 if len(name) > 0:
359                                     g['name'] = name
360                                 if len(note) > 0:
361                                     g['note'] = note
362                             elif len(remaining) > 0 and remaining != '...':
363                                 g['name'] = remaining
364                             if len(g) != 0:
365                                 gastronomy.append(g)
366                         else:
367                             break
368                 return gastronomy
369
370             w = _gastronomy(str(v))
371             if len(w) > 0:
372                 sledrun_json['gastronomy'] = w
373
374             def _sled_rental_description():
375                 line_iter = io.StringIO(str(v))
376                 line = next(line_iter, None)
377                 match = None
378                 while line is not None and (match := re.match(r"\* '''Rodelverleih''':(.*)", line)) is None:
379                     line = next(line_iter, None)
380                 if match is None:
381                     return
382                 result = [match.group(1)]
383                 line = next(line_iter, None)
384                 while line is not None and re.match(r"\* ", line) is None:
385                     result.append(line)
386                     line = next(line_iter, None)
387                 description = ''.join(result).strip()
388                 if len(description) > 0:
389                     sledrun_json['sled_rental_description'] = description
390             _sled_rental_description()
391
392             i = iter(v.nodes)
393             w = next(i, None)
394             while w is not None:
395                 if isinstance(w, Tag) and str(w) == "'''Siehe auch'''":
396                     w = next(i, None)
397                     break
398                 w = next(i, None)
399             while w is not None:
400                 if isinstance(w, ExternalLink):
401                     x.append(external_link_to_json(w))
402                 elif isinstance(w, (Text, Tag)) and str(w).strip() in ['', '*', ':']:
403                     pass
404                 else:
405                     break
406                 w = next(i, None)
407         if len(x) > 0:
408             sledrun_json['see_also'] = x
409
410         sledrun_json['allow_reports'] = True
411
412         def _tiroler_naturrodelbahn_guetesiegel():
413             for gst in wikicode.filter_templates():
414                 if gst.name.strip() != 'Tiroler Naturrodelbahn Gütesiegel':
415                     continue
416                 gsj = {}
417                 keys = {
418                     'Anlagename': 'name',
419                     'Organisation': 'organization',
420                     'Erstverleihung': 'first_issued',
421                     'Verlängerung': 'valid_from',
422                     'Forum': 'forum_id',
423                     'Thread': 'thread_id',
424                 }
425                 numeric = ['first_issued', 'valid_from', 'forum_id', 'thread_id']
426                 for key, value in keys.items():
427                     if gst.has(key):
428                         v = gst.get(key).value.strip()
429                         if v != '':
430                             if value in numeric:
431                                 v = int(v)
432                             gsj[value] = v
433                 if len(gsj) > 0:
434                     sledrun_json['tiroler_naturrodelbahn_gütesiegel'] = gsj
435         _tiroler_naturrodelbahn_guetesiegel()
436
437         impressions = None
438         sledrun_impressions_page = Page(self.site, self.current_page.title() + '/Impressionen')
439         if sledrun_impressions_page.exists():
440             impressions = sledrun_impressions_page.title()
441
442         text = create_sledrun_wiki(sledrun_json, map_json, impressions)
443         pywikibot.output(text)
444         pywikibot.output('\03{lightpurple}---\03{default}')
445         pywikibot.showDiff(self.current_page.text, text)
446
447         jsonschema.validate(instance=sledrun_json, schema=self.sledrun_schema)
448         sledrun_json_ordered = order_json_keys(sledrun_json, self.sledrun_schema)
449         assert sledrun_json_ordered == sledrun_json
450         sledrun_json_text = json.dumps(sledrun_json_ordered, ensure_ascii=False, indent=4)
451         if not sledrun_json_page.exists():
452             summary = 'Rodelbahnbeschreibung konvertiert von Wikitext nach JSON.'
453             pywikibot.output('\03{lightpurple}---\03{default}')
454             pywikibot.output(sledrun_json_text)
455             pywikibot.output('\03{lightpurple}---\03{default}')
456             self.userPut(sledrun_json_page, sledrun_json_page.text, sledrun_json_text, summary=summary, contentmodel='json')
457
458         if map_json is not None and not map_json_page.exists():
459             map_json_text = json.dumps(map_json, ensure_ascii=False, indent=4)
460             summary = 'Landkarte konvertiert von Wikitext nach JSON.'
461             self.userPut(map_json_page, map_json_page.text, map_json_text, summary=summary, contentmodel='json')
462
463
464 def main(*args: str) -> None:
465     local_args = pywikibot.handle_args(args)
466     gen_factory = pagegenerators.GeneratorFactory()
467     gen_factory.handle_args(local_args)
468     gen = gen_factory.getCombinedGenerator(preload=True)
469     if gen:
470         bot = SledrunWikiTextToJsonBot(generator=gen)
471         bot.run()
472     else:
473         pywikibot.bot.suggest_help(missing_generator=True)
474
475
476 if __name__ == '__main__':
477     main()