]> ToastFreeware Gitweb - philipp/winterrodeln/wrpylib.git/blob - bots/sledrun_wikitext_to_json.py
Parse public transport SVV.
[philipp/winterrodeln/wrpylib.git] / bots / sledrun_wikitext_to_json.py
1 #!/usr/bin/python
2 """
3 User script for pywikibot (https://gerrit.wikimedia.org/r/pywikibot/core.git), tested with version 6.6.1.
4 Put it in directory scripts/userscripts.
5
6 Create a sledrun JSON page from a sledrun wikitext page (including map).
7
8 The following generators and filters are supported:
9
10 &params;
11 """
12 import io
13 import json
14 import re
15 from itertools import takewhile, dropwhile
16 from typing import Optional
17
18 import jsonschema
19 import mwparserfromhell
20 from mwparserfromhell.nodes.extras import Parameter
21
22 import pywikibot
23 from mwparserfromhell.nodes import Tag, Text, ExternalLink, Template, Wikilink, Heading
24 from mwparserfromhell.wikicode import Wikicode
25 from pywikibot import pagegenerators, Page
26 from pywikibot.bot import (
27     AutomaticTWSummaryBot,
28     ConfigParserBot,
29     ExistingPageBot,
30     NoRedirectPageBot,
31     SingleSiteBot,
32 )
33 from pywikibot.logging import warning
34 from pywikibot.site._namespace import BuiltinNamespace
35 from wrpylib.json_tools import order_json_keys
36
37 from wrpylib.wrmwmarkup import create_sledrun_wiki, lonlat_to_json, lonlat_ele_to_json, parse_wrmap
38 from wrpylib.wrvalidators import rodelbahnbox_from_template, tristate_german_to_str, difficulty_german_to_str, \
39     avalanches_german_to_str, public_transport_german_to_str, opt_lonlat_from_str, \
40     opt_uint_from_str
41 from wrpylib.lib_sledrun_wikitext_to_json import optional_set, get_sledrun_description, wikilink_to_json, \
42     template_to_json, external_link_to_json
43
44 docuReplacements = {'&params;': pagegenerators.parameterHelp}
45
46
47 class SledrunWikiTextToJsonBot(
48     SingleSiteBot,
49     ConfigParserBot,
50     ExistingPageBot,
51     NoRedirectPageBot,
52     AutomaticTWSummaryBot,
53 ):
54     def setup(self) -> None:
55         schema = Page(self.site, 'Winterrodeln:Datenschema/Rodelbahn/V1.json')
56         assert schema.content_model == 'json'
57         self.sledrun_schema = json.loads(schema.text)
58
59     def treat_page(self) -> None:
60         """Load the given page, do some changes, and save it."""
61         wikitext_content_model = 'wikitext'
62         if self.current_page.content_model != wikitext_content_model:
63             warning(f"The content model of {self.current_page.title()} is {self.current_page.content_model} "
64                     f"instead of {wikitext_content_model}.")
65             return
66
67         wikicode = mwparserfromhell.parse(self.current_page.text)
68         wikilink_list = wikicode.filter_wikilinks()
69         category_sledrun = 'Kategorie:Rodelbahn'
70         if sum(1 for c in wikilink_list if c.title == category_sledrun) == 0:
71             warning(f'The page {self.current_page.title()} does not have category {category_sledrun}.')
72             return
73
74         sledrun_json_page = Page(self.site, self.current_page.title() + '/Rodelbahn.json')
75
76         map_json_page = Page(self.site, self.current_page.title() + '/Landkarte.json')
77
78         if sledrun_json_page.exists() and map_json_page.exists():  # should be an option
79             return
80
81         map_json = None
82         v = wikicode.filter_tags(matches='wrmap')
83         if len(v) > 0:
84             map_json = parse_wrmap(str(v[0]))
85
86         sledrun_json = {
87             "name": self.current_page.title(),
88             "aliases": [],
89             "entry_under_construction": sum(1 for c in wikilink_list if c.title == 'Kategorie:In Arbeit') > 0,
90         }
91
92         optional_set(sledrun_json, 'description', get_sledrun_description(wikicode))
93
94         rbb_list = wikicode.filter_templates(recursive=False, matches=lambda t: t.name.strip() == 'Rodelbahnbox')
95         if len(rbb_list) == 1:
96             rbb = rodelbahnbox_from_template(rbb_list[0])
97             v = rbb['Bild']
98             if v is not None:
99                 image_page = Page(self.site, v, ns=BuiltinNamespace.FILE)
100                 if not image_page.exists():
101                     warning(f"{image_page.title()} does not exist.")
102                 sledrun_json['image'] = v
103
104             optional_set(sledrun_json, 'length', rbb['Länge'])
105
106             v = rbb['Schwierigkeit']
107             if v is not None:
108                 sledrun_json['difficulty'] = difficulty_german_to_str(v)
109
110             v = rbb['Lawinen']
111             if v is not None:
112                 sledrun_json['avalanches'] = avalanches_german_to_str(v)
113
114             v, w = rbb['Betreiber']
115             optional_set(sledrun_json, 'has_operator', v)
116             optional_set(sledrun_json, 'operator', w)
117
118             optional_set(sledrun_json, 'walkup_possible', rbb['Aufstieg möglich'])
119
120             v, w = rbb['Aufstieg getrennt']
121             if v is not None:
122                 sledrun_json['walkup_separate'] = tristate_german_to_str(v)
123             optional_set(sledrun_json, 'walkup_note', w)
124
125             optional_set(sledrun_json, 'walkup_time', rbb['Gehzeit'])
126
127             def _walkup_support():
128                 walkup_support_rbb = rbb['Aufstiegshilfe']
129                 if walkup_support_rbb is not None:
130                     walkup_supports = []
131                     for walkup_support_type, note in walkup_support_rbb:
132                         walkup_support = {'type': walkup_support_type}
133                         optional_set(walkup_support, 'note', note)
134                         walkup_supports.append(walkup_support)
135                     sledrun_json['walkup_supports'] = walkup_supports
136             _walkup_support()
137
138             v, w = rbb['Beleuchtungsanlage']
139             if v is not None:
140                 sledrun_json['nightlight_possible'] = tristate_german_to_str(v)
141             optional_set(sledrun_json, 'nightlight_possible_note', w)
142
143             v, w = rbb['Beleuchtungstage']
144             optional_set(sledrun_json, 'nightlight_weekdays_count', v)
145             optional_set(sledrun_json, 'nightlight_weekdays_note', w)
146
147             def _sled_rental():
148                 v = rbb['Rodelverleih']
149                 if v is not None:
150                     sledrun_json['sled_rental_direct'] = v != []
151                     w = []
152                     for name, note in v:
153                         x = {}
154                         name_code = mwparserfromhell.parse(name)
155                         wiki_link = next(name_code.ifilter_wikilinks(), None)
156                         if isinstance(wiki_link, Wikilink):
157                             x['wr_page'] = wikilink_to_json(wiki_link)
158                         else:
159                             x['name'] = name
160                         optional_set(x, 'note', note)
161                         w.append(x)
162                     sledrun_json['sled_rental'] = w
163             _sled_rental()
164
165             def _cachet():
166                 v = rbb['Gütesiegel']
167                 if v is not None:
168                     sledrun_json['cachet'] = len(v) > 0
169             _cachet()
170
171             optional_set(sledrun_json, 'show_in_overview', rbb['In Übersichtskarte'])
172             optional_set(sledrun_json, 'forum_id', rbb['Forumid'])
173
174             v = rbb['Position']
175             if v is not None:
176                 sledrun_json['position'] = lonlat_to_json(v)
177
178             v = lonlat_ele_to_json(rbb['Position oben'], rbb['Höhe oben'])
179             if v != {}:
180                 sledrun_json['top'] = v
181
182             v = lonlat_ele_to_json(rbb['Position unten'], rbb['Höhe unten'])
183             if v != {}:
184                 sledrun_json['bottom'] = v
185
186             v = rbb['Telefonauskunft']
187             if v is not None:
188                 sledrun_json['info_phone'] = [{'phone': p, 'name': n} for p, n in v]
189
190             v, w = rbb['Webauskunft']
191             if v is not None:
192                 if v:
193                     sledrun_json['info_web'] = [{'url': w}]
194                 else:
195                     sledrun_json['info_web'] = []
196
197             v = rbb['Öffentliche Anreise']
198             if v is not None:
199                 sledrun_json['public_transport'] = public_transport_german_to_str(v)
200
201         def _button_bar():
202             bb_iter = wikicode.ifilter_templates(recursive=False, matches=lambda t: t.name.strip() == 'Buttonleiste')
203             bb = next(bb_iter, None)
204             if bb is not None:
205                 video = bb.get('video', None)
206                 if isinstance(video, Parameter) and video.value.strip() != "":
207                     sledrun_json['videos'] = [{'url': str(video.value.strip())}]
208                 webcam = bb.get('webcam', None)
209                 if isinstance(webcam, Parameter) and webcam.value.strip() != "":
210                     sledrun_json['webcams'] = [{'url': str(webcam.value.strip())}]
211                 correction = bb.get('Korrektur_To', None)
212                 if isinstance(correction, Parameter) and correction.value.strip() != "":
213                     sledrun_json['correction_email'] = correction.value.strip()
214         _button_bar()
215
216         def _public_transport():
217             pt_sections = wikicode.get_sections(levels=[2], matches='Anreise mit öffentlichen Verkehrsmitteln',
218                                                 include_headings=False)
219             if len(pt_sections) < 1:
220                 return
221             pt = pt_sections[0]
222             node = next((node for node in pt.nodes if isinstance(node, Tag) and node.wiki_markup == '*'), None)
223             if node is not None:
224                 description = str(Wikicode(pt.nodes[:pt.nodes.index(node)])).strip()
225                 if description and not description.startswith("Hier wird beschrieben werden, wie und wie gut man die "
226                                                               "Rodelbahn mit öffentlichen Verkehrsmitteln erreicht."):
227                     sledrun_json["public_transport_description"] = str(description)
228
229             public_transport_stops = []
230             public_transport_lines = []
231             public_transport_links = []
232             ya = None
233             for node in pt.nodes:
234                 if isinstance(node, Template):
235                     if node.name == 'Haltestelle':
236                         if ya is not None:
237                             public_transport_stops.append(ya)
238                         if len([1 for p in node.params if len(p.strip()) != 0]) == 0:
239                             continue
240                         ya = {}
241                         z = node.get(1, None)
242                         if z is not None:
243                             ya['municipality'] = str(z)
244                         z = node.get(2, None)
245                         if z is not None:
246                             ya['name_local'] = str(z)
247                         za = str(node.get(3, '')).strip()
248                         zb = str(node.get(4, '')).strip()
249                         z = lonlat_ele_to_json(opt_lonlat_from_str(za), opt_uint_from_str(zb))
250                         if len(z) > 0:
251                             ya['position'] = z
252                     elif node.name in ["Fahrplan Abfahrtsmonitor VVT", "Fahrplan Abfahrtsmonitor VVV",
253                                        "Fahrplan Abfahrtsmonitor SVV"]:
254                         ya['monitor_template'] = template_to_json(node)
255                     elif node.name in ["Fahrplan Hinfahrt VVT", "Fahrplan Hinfahrt VVV", "Fahrplan Hinfahrt SVV"]:
256                         ya['route_arrival_template'] = template_to_json(node)
257                     elif node.name in ["Fahrplan Rückfahrt VVT", "Fahrplan Rückfahrt VVV", "Fahrplan Rückfahrt SVV"]:
258                         ya['route_departure_template'] = template_to_json(node)
259                     elif node.name in ["Fahrplan Linie VVT", "Fahrplan Linie VVV", "Fahrplan Linie SVV"]:
260                         if ya is not None:
261                             public_transport_stops.append(ya)
262                             ya = None
263                         y = {
264                             'timetable_template': template_to_json(node),
265                         }
266                         public_transport_lines.append(y)
267                 elif isinstance(node, ExternalLink):
268                     public_transport_links.append(external_link_to_json(node))
269             if ya is not None:
270                 public_transport_stops.append(ya)
271             if len(public_transport_stops) > 0:
272                 sledrun_json['public_transport_stops'] = public_transport_stops
273             if len(public_transport_lines) > 0:
274                 sledrun_json['public_transport_lines'] = public_transport_lines
275             if len(public_transport_links) > 0:
276                 sledrun_json['public_transport_links'] = public_transport_links
277         _public_transport()
278
279         def _car():
280             car_section_list = wikicode.get_sections(levels=[2], matches='Anreise mit dem Auto')
281             if not car_section_list:
282                 return
283             v = car_section_list[0]
284
285             description_nodes = dropwhile(lambda w: isinstance(w, Heading), v.nodes)
286             description_nodes = takewhile(lambda w: not (isinstance(w, Tag) and w.wiki_markup == '*'),
287                                           description_nodes)
288             if description := str(Wikicode(list(description_nodes))).strip():
289                 if not description.startswith("Hier wollen wir Besonderheiten beschreiben, die es zu beachten gibt, "
290                                               "wenn man mit dem Auto zur Rodelbahn anreist."):
291                     sledrun_json["car_description"] = description
292
293             x = []
294             for w in v.ifilter_templates(matches='Parkplatz'):
295                 za = str(w.get(1, '')).strip()
296                 zb = str(w.get(2, '')).strip()
297                 z = lonlat_ele_to_json(opt_lonlat_from_str(za), opt_uint_from_str(zb))
298                 if len(z) > 0:
299                     x.append({'position': z})
300             if len(x) > 0:
301                 sledrun_json['car_parking'] = x
302
303             x = []
304             for w in io.StringIO(str(v)):
305                 match = re.match(r"\*\* von \'\'\'(.+)\'\'\'(.*): ([\d.,]+) km", w.rstrip())
306                 if match:
307                     ya, yb, yc = match.groups()
308                     yc = float(yc.replace(',', '.'))
309                     x.append({
310                         'km': yc,
311                         'route': (ya.strip() + ' ' + yb.strip()).strip(),
312                     })
313             if len(x) > 0:
314                 sledrun_json['car_distances'] = x
315         _car()
316
317         x = []
318         for v in wikicode.get_sections(levels=[2], matches='Allgemeines'):
319             def _nightlight(value: str) -> Optional[str]:
320                 line_iter = io.StringIO(value)
321                 line = next(line_iter, None)
322                 while line is not None and not line.startswith("* '''Beleuchtung''':"):
323                     line = next(line_iter, None)
324                 if line is None:
325                     return None
326                 line = line.replace("* '''Beleuchtung''':", "").strip()
327                 if len(line) > 0:
328                     return line
329                 return None
330             optional_set(sledrun_json, 'nightlight_description', _nightlight(str(v)))
331
332             def _gastronomy(value: str):
333                 gastronomy = []
334                 line_iter = io.StringIO(value)
335                 line = next(line_iter, None)
336                 while line is not None and line.rstrip() != "* '''Hütten''':":
337                     line = next(line_iter, None)
338                 if line is None:
339                     return gastronomy
340                 while line is not None:
341                     line = next(line_iter, None)
342                     if line is not None:
343                         if line.startswith('** '):
344                             g = {}
345                             wiki = mwparserfromhell.parse(line)
346                             wiki_link = next(wiki.ifilter_wikilinks(), None)
347                             if isinstance(wiki_link, Wikilink):
348                                 g['wr_page'] = wikilink_to_json(wiki_link)
349                             ext_link = next(wiki.ifilter_external_links(), None)
350                             if isinstance(ext_link, ExternalLink):
351                                 g['weblink'] = external_link_to_json(ext_link)
352                             remaining = str(Wikicode(n for n in wiki.nodes
353                                                      if isinstance(n, (Text, Tag)) and str(n).strip() != '*')).strip()
354                             match = re.match(r'(.*)\((.+)\)', remaining)
355                             if match:
356                                 name, note = match.groups()
357                                 name = name.strip()
358                                 note = note.strip()
359                                 if len(name) > 0:
360                                     g['name'] = name
361                                 if len(note) > 0:
362                                     g['note'] = note
363                             elif len(remaining) > 0 and remaining != '...':
364                                 g['name'] = remaining
365                             if len(g) != 0:
366                                 gastronomy.append(g)
367                         else:
368                             break
369                 return gastronomy
370
371             w = _gastronomy(str(v))
372             if len(w) > 0:
373                 sledrun_json['gastronomy'] = w
374
375             def _sled_rental_description():
376                 line_iter = io.StringIO(str(v))
377                 line = next(line_iter, None)
378                 match = None
379                 while line is not None and (match := re.match(r"\* '''Rodelverleih''':(.*)", line)) is None:
380                     line = next(line_iter, None)
381                 if match is None:
382                     return
383                 result = [match.group(1)]
384                 line = next(line_iter, None)
385                 while line is not None and re.match(r"\* ", line) is None:
386                     result.append(line)
387                     line = next(line_iter, None)
388                 description = ''.join(result).strip()
389                 if len(description) > 0:
390                     sledrun_json['sled_rental_description'] = description
391             _sled_rental_description()
392
393             i = iter(v.nodes)
394             w = next(i, None)
395             while w is not None:
396                 if isinstance(w, Tag) and str(w) == "'''Siehe auch'''":
397                     w = next(i, None)
398                     break
399                 w = next(i, None)
400             while w is not None:
401                 if isinstance(w, ExternalLink):
402                     x.append(external_link_to_json(w))
403                 elif isinstance(w, (Text, Tag)) and str(w).strip() in ['', '*', ':']:
404                     pass
405                 else:
406                     break
407                 w = next(i, None)
408         if len(x) > 0:
409             sledrun_json['see_also'] = x
410
411         sledrun_json['allow_reports'] = True
412
413         def _tiroler_naturrodelbahn_guetesiegel():
414             for gst in wikicode.filter_templates():
415                 if gst.name.strip() != 'Tiroler Naturrodelbahn Gütesiegel':
416                     continue
417                 gsj = {}
418                 keys = {
419                     'Anlagename': 'name',
420                     'Organisation': 'organization',
421                     'Erstverleihung': 'first_issued',
422                     'Verlängerung': 'valid_from',
423                     'Forum': 'forum_id',
424                     'Thread': 'thread_id',
425                 }
426                 numeric = ['first_issued', 'valid_from', 'forum_id', 'thread_id']
427                 for key, value in keys.items():
428                     if gst.has(key):
429                         v = gst.get(key).value.strip()
430                         if v != '':
431                             if value in numeric:
432                                 v = int(v)
433                             gsj[value] = v
434                 if len(gsj) > 0:
435                     sledrun_json['tiroler_naturrodelbahn_gütesiegel'] = gsj
436         _tiroler_naturrodelbahn_guetesiegel()
437
438         impressions = None
439         sledrun_impressions_page = Page(self.site, self.current_page.title() + '/Impressionen')
440         if sledrun_impressions_page.exists():
441             impressions = sledrun_impressions_page.title()
442
443         text = create_sledrun_wiki(sledrun_json, map_json, impressions)
444         pywikibot.output(text)
445         pywikibot.output('\03{lightpurple}---\03{default}')
446         pywikibot.showDiff(self.current_page.text, text)
447
448         jsonschema.validate(instance=sledrun_json, schema=self.sledrun_schema)
449         sledrun_json_ordered = order_json_keys(sledrun_json, self.sledrun_schema)
450         assert sledrun_json_ordered == sledrun_json
451         sledrun_json_text = json.dumps(sledrun_json_ordered, ensure_ascii=False, indent=4)
452         if not sledrun_json_page.exists():
453             summary = 'Rodelbahnbeschreibung konvertiert von Wikitext nach JSON.'
454             pywikibot.output('\03{lightpurple}---\03{default}')
455             pywikibot.output(sledrun_json_text)
456             pywikibot.output('\03{lightpurple}---\03{default}')
457             self.userPut(sledrun_json_page, sledrun_json_page.text, sledrun_json_text, summary=summary, contentmodel='json')
458
459         if map_json is not None and not map_json_page.exists():
460             map_json_text = json.dumps(map_json, ensure_ascii=False, indent=4)
461             summary = 'Landkarte konvertiert von Wikitext nach JSON.'
462             self.userPut(map_json_page, map_json_page.text, map_json_text, summary=summary, contentmodel='json')
463
464
465 def main(*args: str) -> None:
466     local_args = pywikibot.handle_args(args)
467     gen_factory = pagegenerators.GeneratorFactory()
468     gen_factory.handle_args(local_args)
469     gen = gen_factory.getCombinedGenerator(preload=True)
470     if gen:
471         bot = SledrunWikiTextToJsonBot(generator=gen)
472         bot.run()
473     else:
474         pywikibot.bot.suggest_help(missing_generator=True)
475
476
477 if __name__ == '__main__':
478     main()