]> ToastFreeware Gitweb - philipp/winterrodeln/wrpylib.git/blob - bots/sledrun_wikitext_to_json.py
Move some functions to lib_sledrun_wikitext_to_json.py
[philipp/winterrodeln/wrpylib.git] / bots / sledrun_wikitext_to_json.py
1 #!/usr/bin/python
2 """
3 User script for pywikibot (https://gerrit.wikimedia.org/r/pywikibot/core.git), tested with version 6.6.1.
4 Put it in directory scripts/userscripts.
5
6 Create a sledrun JSON page from a sledrun wikitext page (including map).
7
8 The following generators and filters are supported:
9
10 &params;
11 """
12 import io
13 import json
14 import re
15 from itertools import takewhile, dropwhile
16 from typing import Optional
17
18 import jsonschema
19 import mwparserfromhell
20 from mwparserfromhell.nodes.extras import Parameter
21
22 import pywikibot
23 from mwparserfromhell.nodes import Tag, Text, ExternalLink, Template, Wikilink, Heading
24 from mwparserfromhell.wikicode import Wikicode
25 from pywikibot import pagegenerators, Page
26 from pywikibot.bot import (
27     AutomaticTWSummaryBot,
28     ConfigParserBot,
29     ExistingPageBot,
30     NoRedirectPageBot,
31     SingleSiteBot,
32 )
33 from pywikibot.logging import warning
34 from pywikibot.site._namespace import BuiltinNamespace
35 from wrpylib.json_tools import order_json_keys
36
37 from wrpylib.wrmwmarkup import create_sledrun_wiki, lonlat_to_json, lonlat_ele_to_json, parse_wrmap
38 from wrpylib.wrvalidators import rodelbahnbox_from_template, tristate_german_to_str, difficulty_german_to_str, \
39     avalanches_german_to_str, public_transport_german_to_str, opt_lonlat_from_str, \
40     opt_uint_from_str
41 from wrpylib.lib_sledrun_wikitext_to_json import optional_set, get_sledrun_description, wikilink_to_json, \
42     template_to_json, external_link_to_json
43
44 docuReplacements = {'&params;': pagegenerators.parameterHelp}
45
46
47 class SledrunWikiTextToJsonBot(
48     SingleSiteBot,
49     ConfigParserBot,
50     ExistingPageBot,
51     NoRedirectPageBot,
52     AutomaticTWSummaryBot,
53 ):
54     def setup(self) -> None:
55         schema = Page(self.site, 'Winterrodeln:Datenschema/Rodelbahn/V1.json')
56         assert schema.content_model == 'json'
57         self.sledrun_schema = json.loads(schema.text)
58
59     def treat_page(self) -> None:
60         """Load the given page, do some changes, and save it."""
61         wikitext_content_model = 'wikitext'
62         if self.current_page.content_model != wikitext_content_model:
63             warning(f"The content model of {self.current_page.title()} is {self.current_page.content_model} "
64                     f"instead of {wikitext_content_model}.")
65             return
66
67         wikicode = mwparserfromhell.parse(self.current_page.text)
68         wikilink_list = wikicode.filter_wikilinks()
69         category_sledrun = 'Kategorie:Rodelbahn'
70         if sum(1 for c in wikilink_list if c.title == category_sledrun) == 0:
71             warning(f'The page {self.current_page.title()} does not have category {category_sledrun}.')
72             return
73
74         sledrun_json_page = Page(self.site, self.current_page.title() + '/Rodelbahn.json')
75
76         map_json_page = Page(self.site, self.current_page.title() + '/Landkarte.json')
77
78         if sledrun_json_page.exists() and map_json_page.exists():  # should be an option
79             return
80
81         map_json = None
82         v = wikicode.filter_tags(matches='wrmap')
83         if len(v) > 0:
84             map_json = parse_wrmap(str(v[0]))
85
86         sledrun_json = {
87             "name": self.current_page.title(),
88             "aliases": [],
89             "entry_under_construction": sum(1 for c in wikilink_list if c.title == 'Kategorie:In Arbeit') > 0,
90         }
91
92         optional_set(sledrun_json, 'description', get_sledrun_description(wikicode))
93
94         rbb_list = wikicode.filter_templates(recursive=False, matches=lambda t: t.name.strip() == 'Rodelbahnbox')
95         if len(rbb_list) == 1:
96             rbb = rodelbahnbox_from_template(rbb_list[0])
97             v = rbb['Bild']
98             if v is not None:
99                 image_page = Page(self.site, v, ns=BuiltinNamespace.FILE)
100                 if not image_page.exists():
101                     warning(f"{image_page.title()} does not exist.")
102                 sledrun_json['image'] = v
103
104             optional_set(sledrun_json, 'length', rbb['Länge'])
105
106             v = rbb['Schwierigkeit']
107             if v is not None:
108                 sledrun_json['difficulty'] = difficulty_german_to_str(v)
109
110             v = rbb['Lawinen']
111             if v is not None:
112                 sledrun_json['avalanches'] = avalanches_german_to_str(v)
113
114             v, w = rbb['Betreiber']
115             optional_set(sledrun_json, 'has_operator', v)
116             optional_set(sledrun_json, 'operator', w)
117
118             optional_set(sledrun_json, 'walkup_possible', rbb['Aufstieg möglich'])
119
120             v, w = rbb['Aufstieg getrennt']
121             if v is not None:
122                 sledrun_json['walkup_separate'] = tristate_german_to_str(v)
123             optional_set(sledrun_json, 'walkup_note', w)
124
125             optional_set(sledrun_json, 'walkup_time', rbb['Gehzeit'])
126
127             def _walkup_support():
128                 walkup_support_rbb = rbb['Aufstiegshilfe']
129                 if walkup_support_rbb is not None:
130                     walkup_supports = []
131                     for walkup_support_type, note in walkup_support_rbb:
132                         walkup_support = {'type': walkup_support_type}
133                         optional_set(walkup_support, 'note', note)
134                         walkup_supports.append(walkup_support)
135                     sledrun_json['walkup_supports'] = walkup_supports
136             _walkup_support()
137
138             v, w = rbb['Beleuchtungsanlage']
139             if v is not None:
140                 sledrun_json['nightlight_possible'] = tristate_german_to_str(v)
141             optional_set(sledrun_json, 'nightlight_possible_note', w)
142
143             v, w = rbb['Beleuchtungstage']
144             optional_set(sledrun_json, 'nightlight_weekdays_count', v)
145             optional_set(sledrun_json, 'nightlight_weekdays_note', w)
146
147             def _sled_rental():
148                 v = rbb['Rodelverleih']
149                 if v is not None:
150                     sledrun_json['sled_rental_direct'] = v != []
151                     w = []
152                     for name, note in v:
153                         x = {}
154                         name_code = mwparserfromhell.parse(name)
155                         wiki_link = next(name_code.ifilter_wikilinks(), None)
156                         if isinstance(wiki_link, Wikilink):
157                             x['wr_page'] = wikilink_to_json(wiki_link)
158                         else:
159                             x['name'] = name
160                         optional_set(x, 'note', note)
161                         w.append(x)
162                     sledrun_json['sled_rental'] = w
163             _sled_rental()
164
165             def _cachet():
166                 v = rbb['Gütesiegel']
167                 if v is not None:
168                     sledrun_json['cachet'] = len(v) > 0
169             _cachet()
170
171             optional_set(sledrun_json, 'show_in_overview', rbb['In Übersichtskarte'])
172             optional_set(sledrun_json, 'forum_id', rbb['Forumid'])
173
174             v = rbb['Position']
175             if v is not None:
176                 sledrun_json['position'] = lonlat_to_json(v)
177
178             v = lonlat_ele_to_json(rbb['Position oben'], rbb['Höhe oben'])
179             if v != {}:
180                 sledrun_json['top'] = v
181
182             v = lonlat_ele_to_json(rbb['Position unten'], rbb['Höhe unten'])
183             if v != {}:
184                 sledrun_json['bottom'] = v
185
186             v = rbb['Telefonauskunft']
187             if v is not None:
188                 sledrun_json['info_phone'] = [{'phone': p, 'name': n} for p, n in v]
189
190             v, w = rbb['Webauskunft']
191             if v is not None:
192                 if v:
193                     sledrun_json['info_web'] = [{'url': w}]
194                 else:
195                     sledrun_json['info_web'] = []
196
197             v = rbb['Öffentliche Anreise']
198             if v is not None:
199                 sledrun_json['public_transport'] = public_transport_german_to_str(v)
200
201         def _button_bar():
202             bb_iter = wikicode.ifilter_templates(recursive=False, matches=lambda t: t.name.strip() == 'Buttonleiste')
203             bb = next(bb_iter, None)
204             if bb is not None:
205                 video = bb.get('video', None)
206                 if isinstance(video, Parameter) and video.value.strip() != "":
207                     sledrun_json['videos'] = [{'url': str(video.value.strip())}]
208                 correction = bb.get('Korrektur_To', None)
209                 if isinstance(correction, Parameter) and correction.value.strip() != "":
210                     sledrun_json['correction_email'] = correction.value.strip()
211         _button_bar()
212
213         def _public_transport():
214             pt_sections = wikicode.get_sections(levels=[2], matches='Anreise mit öffentlichen Verkehrsmitteln',
215                                                 include_headings=False)
216             if len(pt_sections) < 1:
217                 return
218             pt = pt_sections[0]
219             node = next((node for node in pt.nodes if isinstance(node, Tag) and node.wiki_markup == '*'), None)
220             if node is not None:
221                 description = str(Wikicode(pt.nodes[:pt.nodes.index(node)])).strip()
222                 if description and not description.startswith("Hier wird beschrieben werden, wie und wie gut man die "
223                                                               "Rodelbahn mit öffentlichen Verkehrsmitteln erreicht."):
224                     sledrun_json["public_transport_description"] = str(description)
225
226             public_transport_stops = []
227             public_transport_lines = []
228             public_transport_links = []
229             ya = None
230             for node in pt.nodes:
231                 if isinstance(node, Template):
232                     if node.name == 'Haltestelle':
233                         if ya is not None:
234                             public_transport_stops.append(ya)
235                         if len([1 for p in node.params if len(p.strip()) != 0]) == 0:
236                             continue
237                         ya = {}
238                         z = node.get(1, None)
239                         if z is not None:
240                             ya['municipality'] = str(z)
241                         z = node.get(2, None)
242                         if z is not None:
243                             ya['name_local'] = str(z)
244                         za = str(node.get(3, '')).strip()
245                         zb = str(node.get(4, '')).strip()
246                         z = lonlat_ele_to_json(opt_lonlat_from_str(za), opt_uint_from_str(zb))
247                         if len(z) > 0:
248                             ya['position'] = z
249                     elif node.name in ["Fahrplan Abfahrtsmonitor VVT", "Fahrplan Abfahrtsmonitor VVV"]:
250                         ya['monitor_template'] = template_to_json(node)
251                     elif node.name in ["Fahrplan Hinfahrt VVT", "Fahrplan Hinfahrt VVV"]:
252                         ya['route_arrival_template'] = template_to_json(node)
253                     elif node.name in ["Fahrplan Rückfahrt VVT", "Fahrplan Rückfahrt VVV"]:
254                         ya['route_departure_template'] = template_to_json(node)
255                     elif node.name in ["Fahrplan Linie VVT", "Fahrplan Linie VVV"]:
256                         if ya is not None:
257                             public_transport_stops.append(ya)
258                             ya = None
259                         y = {
260                             'timetable_template': template_to_json(node),
261                         }
262                         public_transport_lines.append(y)
263                 elif isinstance(node, ExternalLink):
264                     public_transport_links.append(external_link_to_json(node))
265             if ya is not None:
266                 public_transport_stops.append(ya)
267             if len(public_transport_stops) > 0:
268                 sledrun_json['public_transport_stops'] = public_transport_stops
269             if len(public_transport_lines) > 0:
270                 sledrun_json['public_transport_lines'] = public_transport_lines
271             if len(public_transport_links) > 0:
272                 sledrun_json['public_transport_links'] = public_transport_links
273         _public_transport()
274
275         def _car():
276             car_section_list = wikicode.get_sections(levels=[2], matches='Anreise mit dem Auto')
277             if not car_section_list:
278                 return
279             v = car_section_list[0]
280
281             description_nodes = dropwhile(lambda w: isinstance(w, Heading), v.nodes)
282             description_nodes = takewhile(lambda w: not (isinstance(w, Tag) and w.wiki_markup == '*'),
283                                           description_nodes)
284             if description := str(Wikicode(list(description_nodes))).strip():
285                 if not description.startswith("Hier wollen wir Besonderheiten beschreiben, die es zu beachten gibt, "
286                                               "wenn man mit dem Auto zur Rodelbahn anreist."):
287                     sledrun_json["car_description"] = description
288
289             x = []
290             for w in v.ifilter_templates(matches='Parkplatz'):
291                 za = str(w.get(1, '')).strip()
292                 zb = str(w.get(2, '')).strip()
293                 z = lonlat_ele_to_json(opt_lonlat_from_str(za), opt_uint_from_str(zb))
294                 if len(z) > 0:
295                     x.append({'position': z})
296             if len(x) > 0:
297                 sledrun_json['car_parking'] = x
298
299             x = []
300             for w in io.StringIO(str(v)):
301                 match = re.match(r"\*\* von \'\'\'(.+)\'\'\'(.*): ([\d.,]+) km", w.rstrip())
302                 if match:
303                     ya, yb, yc = match.groups()
304                     yc = float(yc.replace(',', '.'))
305                     x.append({
306                         'km': yc,
307                         'route': (ya.strip() + ' ' + yb.strip()).strip(),
308                     })
309             if len(x) > 0:
310                 sledrun_json['car_distances'] = x
311         _car()
312
313         x = []
314         for v in wikicode.get_sections(levels=[2], matches='Allgemeines'):
315             def _nightlight(value: str) -> Optional[str]:
316                 line_iter = io.StringIO(value)
317                 line = next(line_iter, None)
318                 while line is not None and not line.startswith("* '''Beleuchtung''':"):
319                     line = next(line_iter, None)
320                 if line is None:
321                     return None
322                 line = line.replace("* '''Beleuchtung''':", "").strip()
323                 if len(line) > 0:
324                     return line
325                 return None
326             optional_set(sledrun_json, 'nightlight_description', _nightlight(str(v)))
327
328             def _gastronomy(value: str):
329                 gastronomy = []
330                 line_iter = io.StringIO(value)
331                 line = next(line_iter, None)
332                 while line is not None and line.rstrip() != "* '''Hütten''':":
333                     line = next(line_iter, None)
334                 if line is None:
335                     return gastronomy
336                 while line is not None:
337                     line = next(line_iter, None)
338                     if line is not None:
339                         if line.startswith('** '):
340                             g = {}
341                             wiki = mwparserfromhell.parse(line)
342                             wiki_link = next(wiki.ifilter_wikilinks(), None)
343                             if isinstance(wiki_link, Wikilink):
344                                 g['wr_page'] = wikilink_to_json(wiki_link)
345                             ext_link = next(wiki.ifilter_external_links(), None)
346                             if isinstance(ext_link, ExternalLink):
347                                 g['weblink'] = external_link_to_json(ext_link)
348                             remaining = str(Wikicode(n for n in wiki.nodes
349                                                      if isinstance(n, (Text, Tag)) and str(n).strip() != '*')).strip()
350                             match = re.match(r'(.*)\((.+)\)', remaining)
351                             if match:
352                                 name, note = match.groups()
353                                 name = name.strip()
354                                 note = note.strip()
355                                 if len(name) > 0:
356                                     g['name'] = name
357                                 if len(note) > 0:
358                                     g['note'] = note
359                             elif len(remaining) > 0 and remaining != '...':
360                                 g['name'] = remaining
361                             if len(gastronomy) != 0:
362                                 gastronomy.append(g)
363                         else:
364                             break
365                 return gastronomy
366
367             w = _gastronomy(str(v))
368             if len(w) > 0:
369                 sledrun_json['gastronomy'] = w
370
371             def _sled_rental_description():
372                 line_iter = io.StringIO(str(v))
373                 line = next(line_iter, None)
374                 match = None
375                 while line is not None and (match := re.match(r"\* '''Rodelverleih''':(.*)", line)) is None:
376                     line = next(line_iter, None)
377                 if match is None:
378                     return
379                 result = [match.group(1)]
380                 line = next(line_iter, None)
381                 while line is not None and re.match(r"\* ", line) is None:
382                     result.append(line)
383                     line = next(line_iter, None)
384                 description = ''.join(result).strip()
385                 if len(description) > 0:
386                     sledrun_json['sled_rental_description'] = description
387             _sled_rental_description()
388
389             i = iter(v.nodes)
390             w = next(i, None)
391             while w is not None:
392                 if isinstance(w, Tag) and str(w) == "'''Siehe auch'''":
393                     w = next(i, None)
394                     break
395                 w = next(i, None)
396             while w is not None:
397                 if isinstance(w, ExternalLink):
398                     x.append(external_link_to_json(w))
399                 elif isinstance(w, (Text, Tag)) and str(w).strip() in ['', '*', ':']:
400                     pass
401                 else:
402                     break
403                 w = next(i, None)
404         if len(x) > 0:
405             sledrun_json['see_also'] = x
406
407         sledrun_json['allow_reports'] = True
408
409         def _tiroler_naturrodelbahn_guetesiegel():
410             for gst in wikicode.filter_templates():
411                 if gst.name.strip() != 'Tiroler Naturrodelbahn Gütesiegel':
412                     continue
413                 gsj = {}
414                 keys = {
415                     'Anlagename': 'name',
416                     'Organisation': 'organization',
417                     'Erstverleihung': 'first_issued',
418                     'Verlängerung': 'valid_from',
419                     'Forum': 'forum_id',
420                     'Thread': 'thread_id',
421                 }
422                 numeric = ['first_issued', 'valid_from', 'forum_id', 'thread_id']
423                 for key, value in keys.items():
424                     if gst.has(key):
425                         v = gst.get(key).value.strip()
426                         if v != '':
427                             if value in numeric:
428                                 v = int(v)
429                             gsj[value] = v
430                 if len(gsj) > 0:
431                     sledrun_json['tiroler_naturrodelbahn_gütesiegel'] = gsj
432         _tiroler_naturrodelbahn_guetesiegel()
433
434         impressions = None
435         sledrun_impressions_page = Page(self.site, self.current_page.title() + '/Impressionen')
436         if sledrun_impressions_page.exists():
437             impressions = sledrun_impressions_page.title()
438
439         text = create_sledrun_wiki(sledrun_json, map_json, impressions)
440         pywikibot.output(text)
441         pywikibot.output('\03{lightpurple}---\03{default}')
442         pywikibot.showDiff(self.current_page.text, text)
443
444         jsonschema.validate(instance=sledrun_json, schema=self.sledrun_schema)
445         sledrun_json_ordered = order_json_keys(sledrun_json, self.sledrun_schema)
446         assert sledrun_json_ordered == sledrun_json
447         sledrun_json_text = json.dumps(sledrun_json_ordered, ensure_ascii=False, indent=4)
448         if not sledrun_json_page.exists():
449             summary = 'Rodelbahnbeschreibung konvertiert von Wikitext nach JSON.'
450             pywikibot.output('\03{lightpurple}---\03{default}')
451             pywikibot.output(sledrun_json_text)
452             pywikibot.output('\03{lightpurple}---\03{default}')
453             self.userPut(sledrun_json_page, sledrun_json_page.text, sledrun_json_text, summary=summary, contentmodel='json')
454
455         if map_json is not None and not map_json_page.exists():
456             map_json_text = json.dumps(map_json, ensure_ascii=False, indent=4)
457             summary = 'Landkarte konvertiert von Wikitext nach JSON.'
458             self.userPut(map_json_page, map_json_page.text, map_json_text, summary=summary, contentmodel='json')
459
460
461 def main(*args: str) -> None:
462     local_args = pywikibot.handle_args(args)
463     gen_factory = pagegenerators.GeneratorFactory()
464     gen_factory.handle_args(local_args)
465     gen = gen_factory.getCombinedGenerator(preload=True)
466     if gen:
467         bot = SledrunWikiTextToJsonBot(generator=gen)
468         bot.run()
469     else:
470         pywikibot.bot.suggest_help(missing_generator=True)
471
472
473 if __name__ == '__main__':
474     main()