]> ToastFreeware Gitweb - philipp/winterrodeln/wrpylib.git/blob - bots/sledrun_wikitext_to_json.py
3bc293e9f4c965ea1475fd2fbd181b62ed5cdca1
[philipp/winterrodeln/wrpylib.git] / bots / sledrun_wikitext_to_json.py
1 #!/usr/bin/python
2 """
3 User script for pywikibot (https://gerrit.wikimedia.org/r/pywikibot/core.git), tested with version 6.6.1.
4 Put it in directory scripts/userscripts.
5
6 Create a sledrun JSON page from a sledrun wikitext page (including map).
7
8 The following generators and filters are supported:
9
10 &params;
11 """
12 import io
13 import json
14 import re
15 from itertools import takewhile, dropwhile
16 from typing import Optional
17
18 import jsonschema
19 import mwparserfromhell
20 from mwparserfromhell.nodes.extras import Parameter
21
22 import pywikibot
23 from mwparserfromhell.nodes import Tag, Text, ExternalLink, Template, Wikilink, Heading
24 from mwparserfromhell.wikicode import Wikicode
25 from pywikibot import pagegenerators, Page
26 from pywikibot.bot import (
27     AutomaticTWSummaryBot,
28     ConfigParserBot,
29     ExistingPageBot,
30     NoRedirectPageBot,
31     SingleSiteBot,
32 )
33 from pywikibot.logging import warning
34 from pywikibot.site._namespace import BuiltinNamespace
35 from wrpylib.json_tools import order_json_keys
36
37 from wrpylib.wrmwmarkup import create_sledrun_wiki, lonlat_to_json, lonlat_ele_to_json, parse_wrmap
38 from wrpylib.wrvalidators import rodelbahnbox_from_template, tristate_german_to_str, difficulty_german_to_str, \
39     avalanches_german_to_str, public_transport_german_to_str, opt_lonlat_from_str, \
40     opt_uint_from_str
41 from wrpylib.lib_sledrun_wikitext_to_json import optional_set, get_sledrun_description
42
43 docuReplacements = {'&params;': pagegenerators.parameterHelp}
44
45
46 def template_to_json(value: Template) -> dict:
47     parameter = []
48     for p in value.params:
49         parameter.append({'value': str(p)})
50     return {
51         'name': str(value.name),
52         'parameter': parameter
53     }
54
55
56 def wikilink_to_json(value: Wikilink) -> dict:
57     wl = {'title': str(value.title)}
58     if value.text is not None:
59         wl['text'] = str(value.text)
60     return wl
61
62
63 def external_link_to_json(value: ExternalLink) -> dict:
64     link = {'url': str(value.url)}
65     if value.title is not None:
66         link['text'] = str(value.title)
67     return link
68
69
70 class SledrunWikiTextToJsonBot(
71     SingleSiteBot,
72     ConfigParserBot,
73     ExistingPageBot,
74     NoRedirectPageBot,
75     AutomaticTWSummaryBot,
76 ):
77     def setup(self) -> None:
78         schema = Page(self.site, 'Winterrodeln:Datenschema/Rodelbahn/V1.json')
79         assert schema.content_model == 'json'
80         self.sledrun_schema = json.loads(schema.text)
81
82     def treat_page(self) -> None:
83         """Load the given page, do some changes, and save it."""
84         wikitext_content_model = 'wikitext'
85         if self.current_page.content_model != wikitext_content_model:
86             warning(f"The content model of {self.current_page.title()} is {self.current_page.content_model} "
87                     f"instead of {wikitext_content_model}.")
88             return
89
90         wikicode = mwparserfromhell.parse(self.current_page.text)
91         wikilink_list = wikicode.filter_wikilinks()
92         category_sledrun = 'Kategorie:Rodelbahn'
93         if sum(1 for c in wikilink_list if c.title == category_sledrun) == 0:
94             warning(f'The page {self.current_page.title()} does not have category {category_sledrun}.')
95             return
96
97         sledrun_json_page = Page(self.site, self.current_page.title() + '/Rodelbahn.json')
98
99         if sledrun_json_page.exists():  # should be an option
100             return
101
102         map_json_page = Page(self.site, self.current_page.title() + '/Landkarte.json')
103
104         map_json = None
105         v = wikicode.filter_tags(matches='wrmap')
106         if len(v) > 0:
107             map_json = parse_wrmap(str(v[0]))
108
109         sledrun_json = {
110             "name": self.current_page.title(),
111             "aliases": [],
112             "entry_under_construction": sum(1 for c in wikilink_list if c.title == 'Kategorie:In Arbeit') > 0,
113         }
114
115         optional_set(sledrun_json, 'description', get_sledrun_description(wikicode))
116
117         rbb_list = wikicode.filter_templates(recursive=False, matches=lambda t: t.name.strip() == 'Rodelbahnbox')
118         if len(rbb_list) == 1:
119             rbb = rodelbahnbox_from_template(rbb_list[0])
120             v = rbb['Bild']
121             if v is not None:
122                 image_page = Page(self.site, v, ns=BuiltinNamespace.FILE)
123                 if not image_page.exists():
124                     warning(f"{image_page.title()} does not exist.")
125                 sledrun_json['image'] = v
126
127             optional_set(sledrun_json, 'length', rbb['Länge'])
128
129             v = rbb['Schwierigkeit']
130             if v is not None:
131                 sledrun_json['difficulty'] = difficulty_german_to_str(v)
132
133             v = rbb['Lawinen']
134             if v is not None:
135                 sledrun_json['avalanches'] = avalanches_german_to_str(v)
136
137             v, w = rbb['Betreiber']
138             optional_set(sledrun_json, 'has_operator', v)
139             optional_set(sledrun_json, 'operator', w)
140
141             optional_set(sledrun_json, 'walkup_possible', rbb['Aufstieg möglich'])
142
143             v, w = rbb['Aufstieg getrennt']
144             if v is not None:
145                 sledrun_json['walkup_separate'] = tristate_german_to_str(v)
146             optional_set(sledrun_json, 'walkup_note', w)
147
148             optional_set(sledrun_json, 'walkup_time', rbb['Gehzeit'])
149
150             def _walkup_support():
151                 walkup_support_rbb = rbb['Aufstiegshilfe']
152                 if walkup_support_rbb is not None:
153                     walkup_supports = []
154                     for walkup_support_type, note in walkup_support_rbb:
155                         walkup_support = {'type': walkup_support_type}
156                         optional_set(walkup_support, 'note', note)
157                         walkup_supports.append(walkup_support)
158                     sledrun_json['walkup_supports'] = walkup_supports
159             _walkup_support()
160
161             v, w = rbb['Beleuchtungsanlage']
162             if v is not None:
163                 sledrun_json['nightlight_possible'] = tristate_german_to_str(v)
164             optional_set(sledrun_json, 'nightlight_possible_note', w)
165
166             v, w = rbb['Beleuchtungstage']
167             optional_set(sledrun_json, 'nightlight_weekdays_count', v)
168             optional_set(sledrun_json, 'nightlight_weekdays_note', w)
169
170             def _sled_rental():
171                 v = rbb['Rodelverleih']
172                 if v is not None:
173                     sledrun_json['sled_rental_direct'] = v != []
174                     w = []
175                     for name, note in v:
176                         x = {}
177                         name_code = mwparserfromhell.parse(name)
178                         wiki_link = next(name_code.ifilter_wikilinks(), None)
179                         if isinstance(wiki_link, Wikilink):
180                             x['wr_page'] = wikilink_to_json(wiki_link)
181                         else:
182                             x['name'] = name
183                         optional_set(x, 'note', note)
184                         w.append(x)
185                     sledrun_json['sled_rental'] = w
186             _sled_rental()
187
188             def _cachet():
189                 v = rbb['Gütesiegel']
190                 if v is not None:
191                     sledrun_json['cachet'] = len(v) > 0
192             _cachet()
193
194             optional_set(sledrun_json, 'show_in_overview', rbb['In Übersichtskarte'])
195             optional_set(sledrun_json, 'forum_id', rbb['Forumid'])
196
197             v = rbb['Position']
198             if v is not None:
199                 sledrun_json['position'] = lonlat_to_json(v)
200
201             v = lonlat_ele_to_json(rbb['Position oben'], rbb['Höhe oben'])
202             if v != {}:
203                 sledrun_json['top'] = v
204
205             v = lonlat_ele_to_json(rbb['Position unten'], rbb['Höhe unten'])
206             if v != {}:
207                 sledrun_json['bottom'] = v
208
209             v = rbb['Telefonauskunft']
210             if v is not None:
211                 sledrun_json['info_phone'] = [{'phone': p, 'name': n} for p, n in v]
212
213             v, w = rbb['Webauskunft']
214             if v is not None:
215                 if v:
216                     sledrun_json['info_web'] = [{'url': w}]
217                 else:
218                     sledrun_json['info_web'] = []
219
220             v = rbb['Öffentliche Anreise']
221             if v is not None:
222                 sledrun_json['public_transport'] = public_transport_german_to_str(v)
223
224         def _button_bar():
225             bb_iter = wikicode.ifilter_templates(recursive=False, matches=lambda t: t.name.strip() == 'Buttonleiste')
226             bb = next(bb_iter, None)
227             if bb is not None:
228                 video = bb.get('video', None)
229                 if isinstance(video, Parameter) and video.value != "":
230                     sledrun_json['videos'] = [{'url': str(video.value)}]
231         _button_bar()
232
233         def _public_transport():
234             pt_sections = wikicode.get_sections(levels=[2], matches='Anreise mit öffentlichen Verkehrsmitteln',
235                                                 include_headings=False)
236             if len(pt_sections) < 1:
237                 return
238             pt = pt_sections[0]
239             node = next((node for node in pt.nodes if isinstance(node, Tag) and node.wiki_markup == '*'), None)
240             if node is not None:
241                 description = str(Wikicode(pt.nodes[:pt.nodes.index(node)])).strip()
242                 if description:
243                     sledrun_json["public_transport_description"] = str(description)
244
245             public_transport_stops = []
246             public_transport_lines = []
247             public_transport_links = []
248             ya = None
249             for node in pt.nodes:
250                 if isinstance(node, Template):
251                     if node.name == 'Haltestelle':
252                         if ya is not None:
253                             public_transport_stops.append(ya)
254                         if len([1 for p in node.params if len(p.strip()) != 0]) == 0:
255                             continue
256                         ya = {}
257                         z = node.get(1, None)
258                         if z is not None:
259                             ya['municipality'] = str(z)
260                         z = node.get(2, None)
261                         if z is not None:
262                             ya['name_local'] = str(z)
263                         za = str(node.get(3, '')).strip()
264                         zb = str(node.get(4, '')).strip()
265                         z = lonlat_ele_to_json(opt_lonlat_from_str(za), opt_uint_from_str(zb))
266                         if len(z) > 0:
267                             ya['position'] = z
268                     elif node.name in ["Fahrplan Abfahrtsmonitor VVT"]:
269                         ya['monitor_template'] = template_to_json(node)
270                     elif node.name in ["Fahrplan Hinfahrt VVT"]:
271                         ya['route_arrival_template'] = template_to_json(node)
272                     elif node.name in ["Fahrplan Rückfahrt VVT"]:
273                         ya['route_departure_template'] = template_to_json(node)
274                     elif node.name in ["Fahrplan Linie VVT"]:
275                         if ya is not None:
276                             public_transport_stops.append(ya)
277                             ya = None
278                         y = {
279                             'timetable_template': template_to_json(node),
280                         }
281                         public_transport_lines.append(y)
282                 elif isinstance(node, ExternalLink):
283                     public_transport_links.append(external_link_to_json(node))
284             if ya is not None:
285                 public_transport_stops.append(ya)
286             if len(public_transport_stops) > 0:
287                 sledrun_json['public_transport_stops'] = public_transport_stops
288             if len(public_transport_lines) > 0:
289                 sledrun_json['public_transport_lines'] = public_transport_lines
290             if len(public_transport_links) > 0:
291                 sledrun_json['public_transport_links'] = public_transport_links
292         _public_transport()
293
294         def _car():
295             car_section_list = wikicode.get_sections(levels=[2], matches='Anreise mit dem Auto')
296             if not car_section_list:
297                 return
298             v = car_section_list[0]
299
300             description_nodes = dropwhile(lambda w: isinstance(w, Heading), v.nodes)
301             description_nodes = takewhile(lambda w: not (isinstance(w, Tag) and w.wiki_markup == '*'),
302                                           description_nodes)
303             if description := str(Wikicode(list(description_nodes))).strip():
304                 sledrun_json["car_description"] = description
305
306             x = []
307             for w in v.ifilter_templates(matches='Parkplatz'):
308                 za = str(w.get(1, '')).strip()
309                 zb = str(w.get(2, '')).strip()
310                 z = lonlat_ele_to_json(opt_lonlat_from_str(za), opt_uint_from_str(zb))
311                 if len(z) > 0:
312                     x.append({'position': z})
313             if len(x) > 0:
314                 sledrun_json['car_parking'] = x
315
316             x = []
317             for w in io.StringIO(str(v)):
318                 match = re.match(r"\*\* von \'\'\'(.+)\'\'\'(.*): ([\d.,]+) km", w.rstrip())
319                 if match:
320                     ya, yb, yc = match.groups()
321                     yc = float(yc.replace(',', '.'))
322                     x.append({
323                         'km': yc,
324                         'route': (ya.strip() + ' ' + yb.strip()).strip(),
325                     })
326             if len(x) > 0:
327                 sledrun_json['car_distances'] = x
328         _car()
329
330         x = []
331         for v in wikicode.get_sections(levels=[2], matches='Allgemeines'):
332             def _nightlight(value: str) -> Optional[str]:
333                 line_iter = io.StringIO(value)
334                 line = next(line_iter, None)
335                 while line is not None and not line.startswith("* '''Beleuchtung''':"):
336                     line = next(line_iter, None)
337                 if line is None:
338                     return None
339                 line = line.replace("* '''Beleuchtung''':", "").strip()
340                 if len(line) > 0:
341                     return line
342                 return None
343             optional_set(sledrun_json, 'nightlight_description', _nightlight(str(v)))
344
345             def _gastronomy(value: str):
346                 gastronomy = []
347                 line_iter = io.StringIO(value)
348                 line = next(line_iter, None)
349                 while line is not None and line.rstrip() != "* '''Hütten''':":
350                     line = next(line_iter, None)
351                 if line is None:
352                     return gastronomy
353                 while line is not None:
354                     line = next(line_iter, None)
355                     if line is not None:
356                         if line.startswith('** '):
357                             g = {}
358                             wiki = mwparserfromhell.parse(line)
359                             wiki_link = next(wiki.ifilter_wikilinks(), None)
360                             if isinstance(wiki_link, Wikilink):
361                                 g['wr_page'] = wikilink_to_json(wiki_link)
362                             ext_link = next(wiki.ifilter_external_links(), None)
363                             if isinstance(ext_link, ExternalLink):
364                                 g['weblink'] = external_link_to_json(ext_link)
365                             remaining = str(Wikicode(n for n in wiki.nodes
366                                                      if isinstance(n, (Text, Tag)) and str(n).strip() != '*')).strip()
367                             match = re.match(r'(.*)\((.+)\)', remaining)
368                             if match:
369                                 name, note = match.groups()
370                                 name = name.strip()
371                                 note = note.strip()
372                                 if len(name) > 0:
373                                     g['name'] = name
374                                 if len(note) > 0:
375                                     g['note'] = note
376                             elif len(remaining) > 0:
377                                 g['name'] = remaining
378                             gastronomy.append(g)
379                         else:
380                             break
381                 return gastronomy
382
383             w = _gastronomy(str(v))
384             if len(w) > 0:
385                 sledrun_json['gastronomy'] = w
386
387             def _sled_rental_description():
388                 line_iter = io.StringIO(str(v))
389                 line = next(line_iter, None)
390                 match = None
391                 while line is not None and (match := re.match(r"\* '''Rodelverleih''':(.*)", line)) is None:
392                     line = next(line_iter, None)
393                 if match is None:
394                     return
395                 result = [match.group(1)]
396                 line = next(line_iter, None)
397                 while line is not None and re.match(r"\* ", line) is None:
398                     result.append(line)
399                     line = next(line_iter, None)
400                 sledrun_json['sled_rental_description'] = ''.join(result).strip()
401             _sled_rental_description()
402
403             i = iter(v.nodes)
404             w = next(i, None)
405             while w is not None:
406                 if isinstance(w, Tag) and str(w) == "'''Siehe auch'''":
407                     w = next(i, None)
408                     break
409                 w = next(i, None)
410             while w is not None:
411                 if isinstance(w, ExternalLink):
412                     x.append(external_link_to_json(w))
413                 elif isinstance(w, (Text, Tag)) and str(w).strip() in ['', '*', ':']:
414                     pass
415                 else:
416                     break
417                 w = next(i, None)
418         if len(x) > 0:
419             sledrun_json['see_also'] = x
420
421         sledrun_json['allow_reports'] = True
422
423         impressions = None
424         sledrun_impressions_page = Page(self.site, self.current_page.title() + '/Impressionen')
425         if sledrun_impressions_page.exists():
426             impressions = sledrun_impressions_page.title()
427
428         text = create_sledrun_wiki(sledrun_json, map_json, impressions)
429         pywikibot.output(text)
430         pywikibot.output('\03{lightpurple}---\03{default}')
431         pywikibot.showDiff(self.current_page.text, text)
432
433         jsonschema.validate(instance=sledrun_json, schema=self.sledrun_schema)
434         sledrun_json_ordered = order_json_keys(sledrun_json, self.sledrun_schema)
435         assert sledrun_json_ordered == sledrun_json
436         sledrun_json_text = json.dumps(sledrun_json_ordered, ensure_ascii=False, indent=4)
437         summary = 'Rodelbahnbeschreibung konvertiert von Wikitext nach JSON.'
438         pywikibot.output('\03{lightpurple}---\03{default}')
439         pywikibot.output(sledrun_json_text)
440         pywikibot.output('\03{lightpurple}---\03{default}')
441         self.userPut(sledrun_json_page, sledrun_json_page.text, sledrun_json_text, summary=summary, contentmodel='json')
442
443         map_json_text = json.dumps(map_json, ensure_ascii=False, indent=4)
444         summary = 'Landkarte konvertiert von Wikitext nach JSON.'
445         self.userPut(map_json_page, map_json_page.text, map_json_text, summary=summary, contentmodel='json')
446
447
448 def main(*args: str) -> None:
449     local_args = pywikibot.handle_args(args)
450     gen_factory = pagegenerators.GeneratorFactory()
451     gen_factory.handle_args(local_args)
452     gen = gen_factory.getCombinedGenerator(preload=True)
453     if gen:
454         bot = SledrunWikiTextToJsonBot(generator=gen)
455         bot.run()
456     else:
457         pywikibot.bot.suggest_help(missing_generator=True)
458
459
460 if __name__ == '__main__':
461     main()