]> ToastFreeware Gitweb - philipp/winterrodeln/wrpylib.git/blob - bots/sledrun_wikitext_to_json.py
196c0ce5bb2704750b37c74ae1fce874edac3499
[philipp/winterrodeln/wrpylib.git] / bots / sledrun_wikitext_to_json.py
1 #!/usr/bin/python
2 """
3 User script for pywikibot (https://gerrit.wikimedia.org/r/pywikibot/core.git), tested with version 6.6.1.
4 Put it in directory scripts/userscripts.
5
6 Create a sledrun JSON page from a sledrun wikitext page (including map).
7
8 The following generators and filters are supported:
9
10 &params;
11 """
12 import io
13 import json
14 import re
15 from itertools import takewhile, dropwhile
16 from typing import Optional
17
18 import jsonschema
19 import mwparserfromhell
20 from mwparserfromhell.nodes.extras import Parameter
21
22 import pywikibot
23 from mwparserfromhell.nodes import Tag, Text, ExternalLink, Template, Wikilink, Heading
24 from mwparserfromhell.wikicode import Wikicode
25 from pywikibot import pagegenerators, Page
26 from pywikibot.bot import (
27     AutomaticTWSummaryBot,
28     ConfigParserBot,
29     ExistingPageBot,
30     NoRedirectPageBot,
31     SingleSiteBot,
32 )
33 from pywikibot.logging import warning
34 from pywikibot.site._namespace import BuiltinNamespace
35 from wrpylib.json_tools import order_json_keys
36
37 from wrpylib.wrmwmarkup import create_sledrun_wiki, lonlat_to_json, lonlat_ele_to_json, parse_wrmap
38 from wrpylib.wrvalidators import rodelbahnbox_from_template, tristate_german_to_str, difficulty_german_to_str, \
39     avalanches_german_to_str, public_transport_german_to_str, opt_lonlat_from_str, \
40     opt_uint_from_str
41 from wrpylib.lib_sledrun_wikitext_to_json import optional_set, get_sledrun_description
42
43 docuReplacements = {'&params;': pagegenerators.parameterHelp}
44
45
46 def template_to_json(value: Template) -> dict:
47     parameter = []
48     for p in value.params:
49         parameter.append({'value': str(p)})
50     return {
51         'name': str(value.name),
52         'parameter': parameter
53     }
54
55
56 def wikilink_to_json(value: Wikilink) -> dict:
57     wl = {'title': str(value.title)}
58     if value.text is not None:
59         wl['text'] = str(value.text)
60     return wl
61
62
63 def external_link_to_json(value: ExternalLink) -> dict:
64     link = {'url': str(value.url)}
65     if value.title is not None:
66         link['text'] = str(value.title)
67     return link
68
69
70 class SledrunWikiTextToJsonBot(
71     SingleSiteBot,
72     ConfigParserBot,
73     ExistingPageBot,
74     NoRedirectPageBot,
75     AutomaticTWSummaryBot,
76 ):
77     def setup(self) -> None:
78         schema = Page(self.site, 'Winterrodeln:Datenschema/Rodelbahn/V1.json')
79         assert schema.content_model == 'json'
80         self.sledrun_schema = json.loads(schema.text)
81
82     def treat_page(self) -> None:
83         """Load the given page, do some changes, and save it."""
84         wikitext_content_model = 'wikitext'
85         if self.current_page.content_model != wikitext_content_model:
86             warning(f"The content model of {self.current_page.title()} is {self.current_page.content_model} "
87                     f"instead of {wikitext_content_model}.")
88             return
89
90         wikicode = mwparserfromhell.parse(self.current_page.text)
91         wikilink_list = wikicode.filter_wikilinks()
92         category_sledrun = 'Kategorie:Rodelbahn'
93         if sum(1 for c in wikilink_list if c.title == category_sledrun) == 0:
94             warning(f'The page {self.current_page.title()} does not have category {category_sledrun}.')
95             return
96
97         sledrun_json_page = Page(self.site, self.current_page.title() + '/Rodelbahn.json')
98
99         if sledrun_json_page.exists():  # should be an option
100             return
101
102         map_json_page = Page(self.site, self.current_page.title() + '/Landkarte.json')
103
104         map_json = None
105         v = wikicode.filter_tags(matches='wrmap')
106         if len(v) > 0:
107             map_json = parse_wrmap(str(v[0]))
108
109         sledrun_json = {
110             "name": self.current_page.title(),
111             "aliases": [],
112             "entry_under_construction": sum(1 for c in wikilink_list if c.title == 'Kategorie:In Arbeit') > 0,
113         }
114
115         optional_set(sledrun_json, 'description', get_sledrun_description(wikicode))
116
117         rbb_list = wikicode.filter_templates(recursive=False, matches=lambda t: t.name.strip() == 'Rodelbahnbox')
118         if len(rbb_list) == 1:
119             rbb = rodelbahnbox_from_template(rbb_list[0])
120             v = rbb['Bild']
121             if v is not None:
122                 image_page = Page(self.site, v, ns=BuiltinNamespace.FILE)
123                 if not image_page.exists():
124                     warning(f"{image_page.title()} does not exist.")
125                 sledrun_json['image'] = v
126
127             optional_set(sledrun_json, 'length', rbb['Länge'])
128
129             v = rbb['Schwierigkeit']
130             if v is not None:
131                 sledrun_json['difficulty'] = difficulty_german_to_str(v)
132
133             v = rbb['Lawinen']
134             if v is not None:
135                 sledrun_json['avalanches'] = avalanches_german_to_str(v)
136
137             v, w = rbb['Betreiber']
138             optional_set(sledrun_json, 'has_operator', v)
139             optional_set(sledrun_json, 'operator', w)
140
141             optional_set(sledrun_json, 'walkup_possible', rbb['Aufstieg möglich'])
142
143             v, w = rbb['Aufstieg getrennt']
144             if v is not None:
145                 sledrun_json['walkup_separate'] = tristate_german_to_str(v)
146             optional_set(sledrun_json, 'walkup_note', w)
147
148             optional_set(sledrun_json, 'walkup_time', rbb['Gehzeit'])
149
150             def _walkup_support():
151                 walkup_support_rbb = rbb['Aufstiegshilfe']
152                 if walkup_support_rbb is not None:
153                     walkup_supports = []
154                     for walkup_support_type, note in walkup_support_rbb:
155                         walkup_support = {'type': walkup_support_type}
156                         optional_set(walkup_support, 'note', note)
157                         walkup_supports.append(walkup_support)
158                     sledrun_json['walkup_supports'] = walkup_supports
159             _walkup_support()
160
161             v, w = rbb['Beleuchtungsanlage']
162             if v is not None:
163                 sledrun_json['nightlight_possible'] = tristate_german_to_str(v)
164             optional_set(sledrun_json, 'nightlight_possible_note', w)
165
166             v, w = rbb['Beleuchtungstage']
167             optional_set(sledrun_json, 'nightlight_weekdays_count', v)
168             optional_set(sledrun_json, 'nightlight_weekdays_note', w)
169
170             def _sled_rental():
171                 v = rbb['Rodelverleih']
172                 if v is not None:
173                     sledrun_json['sled_rental_direct'] = v != []
174                     w = []
175                     for name, note in v:
176                         x = {}
177                         name_code = mwparserfromhell.parse(name)
178                         wiki_link = next(name_code.ifilter_wikilinks(), None)
179                         if isinstance(wiki_link, Wikilink):
180                             x['wr_page'] = wikilink_to_json(wiki_link)
181                         else:
182                             x['name'] = name
183                         optional_set(x, 'note', note)
184                         w.append(x)
185                     sledrun_json['sled_rental'] = w
186             _sled_rental()
187
188             def _cachet():
189                 v = rbb['Gütesiegel']
190                 if v is not None:
191                     sledrun_json['cachet'] = len(v) > 0
192             _cachet()
193
194             optional_set(sledrun_json, 'show_in_overview', rbb['In Übersichtskarte'])
195             optional_set(sledrun_json, 'forum_id', rbb['Forumid'])
196
197             v = rbb['Position']
198             if v is not None:
199                 sledrun_json['position'] = lonlat_to_json(v)
200
201             v = lonlat_ele_to_json(rbb['Position oben'], rbb['Höhe oben'])
202             if v != {}:
203                 sledrun_json['top'] = v
204
205             v = lonlat_ele_to_json(rbb['Position unten'], rbb['Höhe unten'])
206             if v != {}:
207                 sledrun_json['bottom'] = v
208
209             v = rbb['Telefonauskunft']
210             if v is not None:
211                 sledrun_json['info_phone'] = [{'phone': p, 'name': n} for p, n in v]
212
213             v, w = rbb['Webauskunft']
214             if v is not None:
215                 if v:
216                     sledrun_json['info_web'] = [{'url': w}]
217                 else:
218                     sledrun_json['info_web'] = []
219
220             v = rbb['Öffentliche Anreise']
221             if v is not None:
222                 sledrun_json['public_transport'] = public_transport_german_to_str(v)
223
224         def _button_bar():
225             bb_iter = wikicode.ifilter_templates(recursive=False, matches=lambda t: t.name.strip() == 'Buttonleiste')
226             bb = next(bb_iter, None)
227             if bb is not None:
228                 video = bb.get('video', None)
229                 if isinstance(video, Parameter) and video.value != "":
230                     sledrun_json['videos'] = [{'url': str(video.value)}]
231         _button_bar()
232
233         def _public_transport():
234             pt_sections = wikicode.get_sections(levels=[2], matches='Anreise mit öffentlichen Verkehrsmitteln',
235                                                 include_headings=False)
236             if len(pt_sections) < 1:
237                 return
238             pt = pt_sections[0]
239             node = next((node for node in pt.nodes if isinstance(node, Tag) and node.wiki_markup == '*'), None)
240             if node is not None:
241                 description = str(Wikicode(pt.nodes[:pt.nodes.index(node)])).strip()
242                 if description and not description.startswith("Hier wird beschrieben werden, wie und wie gut man die "
243                                                               "Rodelbahn mit öffentlichen Verkehrsmitteln erreicht."):
244                     sledrun_json["public_transport_description"] = str(description)
245
246             public_transport_stops = []
247             public_transport_lines = []
248             public_transport_links = []
249             ya = None
250             for node in pt.nodes:
251                 if isinstance(node, Template):
252                     if node.name == 'Haltestelle':
253                         if ya is not None:
254                             public_transport_stops.append(ya)
255                         if len([1 for p in node.params if len(p.strip()) != 0]) == 0:
256                             continue
257                         ya = {}
258                         z = node.get(1, None)
259                         if z is not None:
260                             ya['municipality'] = str(z)
261                         z = node.get(2, None)
262                         if z is not None:
263                             ya['name_local'] = str(z)
264                         za = str(node.get(3, '')).strip()
265                         zb = str(node.get(4, '')).strip()
266                         z = lonlat_ele_to_json(opt_lonlat_from_str(za), opt_uint_from_str(zb))
267                         if len(z) > 0:
268                             ya['position'] = z
269                     elif node.name in ["Fahrplan Abfahrtsmonitor VVT"]:
270                         ya['monitor_template'] = template_to_json(node)
271                     elif node.name in ["Fahrplan Hinfahrt VVT"]:
272                         ya['route_arrival_template'] = template_to_json(node)
273                     elif node.name in ["Fahrplan Rückfahrt VVT"]:
274                         ya['route_departure_template'] = template_to_json(node)
275                     elif node.name in ["Fahrplan Linie VVT"]:
276                         if ya is not None:
277                             public_transport_stops.append(ya)
278                             ya = None
279                         y = {
280                             'timetable_template': template_to_json(node),
281                         }
282                         public_transport_lines.append(y)
283                 elif isinstance(node, ExternalLink):
284                     public_transport_links.append(external_link_to_json(node))
285             if ya is not None:
286                 public_transport_stops.append(ya)
287             if len(public_transport_stops) > 0:
288                 sledrun_json['public_transport_stops'] = public_transport_stops
289             if len(public_transport_lines) > 0:
290                 sledrun_json['public_transport_lines'] = public_transport_lines
291             if len(public_transport_links) > 0:
292                 sledrun_json['public_transport_links'] = public_transport_links
293         _public_transport()
294
295         def _car():
296             car_section_list = wikicode.get_sections(levels=[2], matches='Anreise mit dem Auto')
297             if not car_section_list:
298                 return
299             v = car_section_list[0]
300
301             description_nodes = dropwhile(lambda w: isinstance(w, Heading), v.nodes)
302             description_nodes = takewhile(lambda w: not (isinstance(w, Tag) and w.wiki_markup == '*'),
303                                           description_nodes)
304             if description := str(Wikicode(list(description_nodes))).strip():
305                 if not description.startswith("Hier wollen wir Besonderheiten beschreiben, die es zu beachten gibt, "
306                                               "wenn man mit dem Auto zur Rodelbahn anreist."):
307                     sledrun_json["car_description"] = description
308
309             x = []
310             for w in v.ifilter_templates(matches='Parkplatz'):
311                 za = str(w.get(1, '')).strip()
312                 zb = str(w.get(2, '')).strip()
313                 z = lonlat_ele_to_json(opt_lonlat_from_str(za), opt_uint_from_str(zb))
314                 if len(z) > 0:
315                     x.append({'position': z})
316             if len(x) > 0:
317                 sledrun_json['car_parking'] = x
318
319             x = []
320             for w in io.StringIO(str(v)):
321                 match = re.match(r"\*\* von \'\'\'(.+)\'\'\'(.*): ([\d.,]+) km", w.rstrip())
322                 if match:
323                     ya, yb, yc = match.groups()
324                     yc = float(yc.replace(',', '.'))
325                     x.append({
326                         'km': yc,
327                         'route': (ya.strip() + ' ' + yb.strip()).strip(),
328                     })
329             if len(x) > 0:
330                 sledrun_json['car_distances'] = x
331         _car()
332
333         x = []
334         for v in wikicode.get_sections(levels=[2], matches='Allgemeines'):
335             def _nightlight(value: str) -> Optional[str]:
336                 line_iter = io.StringIO(value)
337                 line = next(line_iter, None)
338                 while line is not None and not line.startswith("* '''Beleuchtung''':"):
339                     line = next(line_iter, None)
340                 if line is None:
341                     return None
342                 line = line.replace("* '''Beleuchtung''':", "").strip()
343                 if len(line) > 0:
344                     return line
345                 return None
346             optional_set(sledrun_json, 'nightlight_description', _nightlight(str(v)))
347
348             def _gastronomy(value: str):
349                 gastronomy = []
350                 line_iter = io.StringIO(value)
351                 line = next(line_iter, None)
352                 while line is not None and line.rstrip() != "* '''Hütten''':":
353                     line = next(line_iter, None)
354                 if line is None:
355                     return gastronomy
356                 while line is not None:
357                     line = next(line_iter, None)
358                     if line is not None:
359                         if line.startswith('** '):
360                             g = {}
361                             wiki = mwparserfromhell.parse(line)
362                             wiki_link = next(wiki.ifilter_wikilinks(), None)
363                             if isinstance(wiki_link, Wikilink):
364                                 g['wr_page'] = wikilink_to_json(wiki_link)
365                             ext_link = next(wiki.ifilter_external_links(), None)
366                             if isinstance(ext_link, ExternalLink):
367                                 g['weblink'] = external_link_to_json(ext_link)
368                             remaining = str(Wikicode(n for n in wiki.nodes
369                                                      if isinstance(n, (Text, Tag)) and str(n).strip() != '*')).strip()
370                             match = re.match(r'(.*)\((.+)\)', remaining)
371                             if match:
372                                 name, note = match.groups()
373                                 name = name.strip()
374                                 note = note.strip()
375                                 if len(name) > 0:
376                                     g['name'] = name
377                                 if len(note) > 0:
378                                     g['note'] = note
379                             elif len(remaining) > 0 and remaining != '...':
380                                 g['name'] = remaining
381                             if len(gastronomy) != 0:
382                                 gastronomy.append(g)
383                         else:
384                             break
385                 return gastronomy
386
387             w = _gastronomy(str(v))
388             if len(w) > 0:
389                 sledrun_json['gastronomy'] = w
390
391             def _sled_rental_description():
392                 line_iter = io.StringIO(str(v))
393                 line = next(line_iter, None)
394                 match = None
395                 while line is not None and (match := re.match(r"\* '''Rodelverleih''':(.*)", line)) is None:
396                     line = next(line_iter, None)
397                 if match is None:
398                     return
399                 result = [match.group(1)]
400                 line = next(line_iter, None)
401                 while line is not None and re.match(r"\* ", line) is None:
402                     result.append(line)
403                     line = next(line_iter, None)
404                 description = ''.join(result).strip()
405                 if len(description) > 0:
406                     sledrun_json['sled_rental_description'] = description
407             _sled_rental_description()
408
409             i = iter(v.nodes)
410             w = next(i, None)
411             while w is not None:
412                 if isinstance(w, Tag) and str(w) == "'''Siehe auch'''":
413                     w = next(i, None)
414                     break
415                 w = next(i, None)
416             while w is not None:
417                 if isinstance(w, ExternalLink):
418                     x.append(external_link_to_json(w))
419                 elif isinstance(w, (Text, Tag)) and str(w).strip() in ['', '*', ':']:
420                     pass
421                 else:
422                     break
423                 w = next(i, None)
424         if len(x) > 0:
425             sledrun_json['see_also'] = x
426
427         sledrun_json['allow_reports'] = True
428
429         impressions = None
430         sledrun_impressions_page = Page(self.site, self.current_page.title() + '/Impressionen')
431         if sledrun_impressions_page.exists():
432             impressions = sledrun_impressions_page.title()
433
434         text = create_sledrun_wiki(sledrun_json, map_json, impressions)
435         pywikibot.output(text)
436         pywikibot.output('\03{lightpurple}---\03{default}')
437         pywikibot.showDiff(self.current_page.text, text)
438
439         jsonschema.validate(instance=sledrun_json, schema=self.sledrun_schema)
440         sledrun_json_ordered = order_json_keys(sledrun_json, self.sledrun_schema)
441         assert sledrun_json_ordered == sledrun_json
442         sledrun_json_text = json.dumps(sledrun_json_ordered, ensure_ascii=False, indent=4)
443         summary = 'Rodelbahnbeschreibung konvertiert von Wikitext nach JSON.'
444         pywikibot.output('\03{lightpurple}---\03{default}')
445         pywikibot.output(sledrun_json_text)
446         pywikibot.output('\03{lightpurple}---\03{default}')
447         self.userPut(sledrun_json_page, sledrun_json_page.text, sledrun_json_text, summary=summary, contentmodel='json')
448
449         map_json_text = json.dumps(map_json, ensure_ascii=False, indent=4)
450         summary = 'Landkarte konvertiert von Wikitext nach JSON.'
451         self.userPut(map_json_page, map_json_page.text, map_json_text, summary=summary, contentmodel='json')
452
453
454 def main(*args: str) -> None:
455     local_args = pywikibot.handle_args(args)
456     gen_factory = pagegenerators.GeneratorFactory()
457     gen_factory.handle_args(local_args)
458     gen = gen_factory.getCombinedGenerator(preload=True)
459     if gen:
460         bot = SledrunWikiTextToJsonBot(generator=gen)
461         bot.run()
462     else:
463         pywikibot.bot.suggest_help(missing_generator=True)
464
465
466 if __name__ == '__main__':
467     main()