]> ToastFreeware Gitweb - philipp/winterrodeln/wrpylib.git/blob - bots/sledrun_wikitext_to_json.py
e06d48a0fa07ad3867b044f1c85d40363e8abb93
[philipp/winterrodeln/wrpylib.git] / bots / sledrun_wikitext_to_json.py
1 #!/usr/bin/python
2 """
3 User script for pywikibot (https://gerrit.wikimedia.org/r/pywikibot/core.git), tested with version 6.6.1.
4 Put it in directory scripts/userscripts.
5
6 Create a sledrun JSON page from a sledrun wikitext page (including map).
7
8 The following generators and filters are supported:
9
10 &params;
11 """
12 import io
13 import json
14 import re
15 from itertools import takewhile, dropwhile
16 from typing import Optional
17
18 import jsonschema
19 import mwparserfromhell
20 from mwparserfromhell.nodes.extras import Parameter
21
22 import pywikibot
23 from mwparserfromhell.nodes import Tag, Text, ExternalLink, Template, Wikilink, Heading
24 from mwparserfromhell.wikicode import Wikicode
25 from pywikibot import pagegenerators, Page
26 from pywikibot.bot import (
27     AutomaticTWSummaryBot,
28     ConfigParserBot,
29     ExistingPageBot,
30     NoRedirectPageBot,
31     SingleSiteBot,
32 )
33 from pywikibot.logging import warning
34 from pywikibot.site._namespace import BuiltinNamespace
35 from wrpylib.json_tools import order_json_keys
36
37 from wrpylib.wrmwmarkup import create_sledrun_wiki, lonlat_to_json, lonlat_ele_to_json, parse_wrmap
38 from wrpylib.wrvalidators import rodelbahnbox_from_template, tristate_german_to_str, difficulty_german_to_str, \
39     avalanches_german_to_str, public_transport_german_to_str, opt_lonlat_from_str, \
40     opt_uint_from_str
41 from wrpylib.lib_sledrun_wikitext_to_json import optional_set, get_sledrun_description
42
43 docuReplacements = {'&params;': pagegenerators.parameterHelp}
44
45
46 def template_to_json(value: Template) -> dict:
47     parameter = []
48     for p in value.params:
49         parameter.append({'value': str(p)})
50     return {
51         'name': str(value.name),
52         'parameter': parameter
53     }
54
55
56 def wikilink_to_json(value: Wikilink) -> dict:
57     wl = {'title': str(value.title)}
58     if value.text is not None:
59         wl['text'] = str(value.text)
60     return wl
61
62
63 def external_link_to_json(value: ExternalLink) -> dict:
64     link = {'url': str(value.url)}
65     if value.title is not None:
66         link['text'] = str(value.title)
67     return link
68
69
70 class SledrunWikiTextToJsonBot(
71     SingleSiteBot,
72     ConfigParserBot,
73     ExistingPageBot,
74     NoRedirectPageBot,
75     AutomaticTWSummaryBot,
76 ):
77     def setup(self) -> None:
78         schema = Page(self.site, 'Winterrodeln:Datenschema/Rodelbahn/V1.json')
79         assert schema.content_model == 'json'
80         self.sledrun_schema = json.loads(schema.text)
81
82     def treat_page(self) -> None:
83         """Load the given page, do some changes, and save it."""
84         wikitext_content_model = 'wikitext'
85         if self.current_page.content_model != wikitext_content_model:
86             warning(f"The content model of {self.current_page.title()} is {self.current_page.content_model} "
87                     f"instead of {wikitext_content_model}.")
88             return
89
90         wikicode = mwparserfromhell.parse(self.current_page.text)
91         wikilink_list = wikicode.filter_wikilinks()
92         category_sledrun = 'Kategorie:Rodelbahn'
93         if sum(1 for c in wikilink_list if c.title == category_sledrun) == 0:
94             warning(f'The page {self.current_page.title()} does not have category {category_sledrun}.')
95             return
96
97         sledrun_json_page = Page(self.site, self.current_page.title() + '/Rodelbahn.json')
98
99         if sledrun_json_page.exists():  # should be an option
100             return
101
102         map_json_page = Page(self.site, self.current_page.title() + '/Landkarte.json')
103
104         map_json = None
105         v = wikicode.filter_tags(matches='wrmap')
106         if len(v) > 0:
107             map_json = parse_wrmap(str(v[0]))
108
109         sledrun_json = {
110             "name": self.current_page.title(),
111             "aliases": [],
112             "entry_under_construction": sum(1 for c in wikilink_list if c.title == 'Kategorie:In Arbeit') > 0,
113         }
114
115         optional_set(sledrun_json, 'description', get_sledrun_description(wikicode))
116
117         rbb_list = wikicode.filter_templates(recursive=False, matches=lambda t: t.name.strip() == 'Rodelbahnbox')
118         if len(rbb_list) == 1:
119             rbb = rodelbahnbox_from_template(rbb_list[0])
120             v = rbb['Bild']
121             if v is not None:
122                 image_page = Page(self.site, v, ns=BuiltinNamespace.FILE)
123                 if not image_page.exists():
124                     warning(f"{image_page.title()} does not exist.")
125                 sledrun_json['image'] = v
126
127             optional_set(sledrun_json, 'length', rbb['Länge'])
128
129             v = rbb['Schwierigkeit']
130             if v is not None:
131                 sledrun_json['difficulty'] = difficulty_german_to_str(v)
132
133             v = rbb['Lawinen']
134             if v is not None:
135                 sledrun_json['avalanches'] = avalanches_german_to_str(v)
136
137             v, w = rbb['Betreiber']
138             optional_set(sledrun_json, 'has_operator', v)
139             optional_set(sledrun_json, 'operator', w)
140
141             optional_set(sledrun_json, 'walkup_possible', rbb['Aufstieg möglich'])
142
143             v, w = rbb['Aufstieg getrennt']
144             if v is not None:
145                 sledrun_json['walkup_separate'] = tristate_german_to_str(v)
146             optional_set(sledrun_json, 'walkup_note', w)
147
148             optional_set(sledrun_json, 'walkup_time', rbb['Gehzeit'])
149
150             def _walkup_support():
151                 walkup_support_rbb = rbb['Aufstiegshilfe']
152                 if walkup_support_rbb is not None:
153                     walkup_supports = []
154                     for walkup_support_type, note in walkup_support_rbb:
155                         walkup_support = {'type': walkup_support_type}
156                         optional_set(walkup_support, 'note', note)
157                         walkup_supports.append(walkup_support)
158                     sledrun_json['walkup_supports'] = walkup_supports
159             _walkup_support()
160
161             v, w = rbb['Beleuchtungsanlage']
162             if v is not None:
163                 sledrun_json['nightlight_possible'] = tristate_german_to_str(v)
164             optional_set(sledrun_json, 'nightlight_possible_note', w)
165
166             v, w = rbb['Beleuchtungstage']
167             optional_set(sledrun_json, 'nightlight_weekdays_count', v)
168             optional_set(sledrun_json, 'nightlight_weekdays_note', w)
169
170             def _sled_rental():
171                 v = rbb['Rodelverleih']
172                 if v is not None:
173                     sledrun_json['sled_rental_direct'] = v != []
174                     w = []
175                     for name, note in v:
176                         x = {}
177                         name_code = mwparserfromhell.parse(name)
178                         wiki_link = next(name_code.ifilter_wikilinks(), None)
179                         if isinstance(wiki_link, Wikilink):
180                             x['wr_page'] = wikilink_to_json(wiki_link)
181                         else:
182                             x['name'] = name
183                         optional_set(x, 'note', note)
184                         w.append(x)
185                     sledrun_json['sled_rental'] = w
186             _sled_rental()
187
188             def _cachet():
189                 v = rbb['Gütesiegel']
190                 if v is not None:
191                     sledrun_json['cachet'] = len(v) > 0
192             _cachet()
193
194             optional_set(sledrun_json, 'show_in_overview', rbb['In Übersichtskarte'])
195             optional_set(sledrun_json, 'forum_id', rbb['Forumid'])
196
197             v = rbb['Position']
198             if v is not None:
199                 sledrun_json['position'] = lonlat_to_json(v)
200
201             v = lonlat_ele_to_json(rbb['Position oben'], rbb['Höhe oben'])
202             if v != {}:
203                 sledrun_json['top'] = v
204
205             v = lonlat_ele_to_json(rbb['Position unten'], rbb['Höhe unten'])
206             if v != {}:
207                 sledrun_json['bottom'] = v
208
209             v = rbb['Telefonauskunft']
210             if v is not None:
211                 sledrun_json['info_phone'] = [{'phone': p, 'name': n} for p, n in v]
212
213             v, w = rbb['Webauskunft']
214             if v is not None:
215                 if v:
216                     sledrun_json['info_web'] = [{'url': w}]
217                 else:
218                     sledrun_json['info_web'] = []
219
220             v = rbb['Öffentliche Anreise']
221             if v is not None:
222                 sledrun_json['public_transport'] = public_transport_german_to_str(v)
223
224         def _button_bar():
225             bb_iter = wikicode.ifilter_templates(recursive=False, matches=lambda t: t.name.strip() == 'Buttonleiste')
226             bb = next(bb_iter, None)
227             if bb is not None:
228                 video = bb.get('video', None)
229                 if isinstance(video, Parameter) and video.value != "":
230                     sledrun_json['videos'] = [{'url': str(video.value)}]
231         _button_bar()
232
233         def _public_transport():
234             pt_sections = wikicode.get_sections(levels=[2], matches='Anreise mit öffentlichen Verkehrsmitteln',
235                                                 include_headings=False)
236             if len(pt_sections) < 1:
237                 return
238             pt = pt_sections[0]
239             node = next((node for node in pt.nodes if isinstance(node, Tag) and node.wiki_markup == '*'), None)
240             if node is not None:
241                 description = str(Wikicode(pt.nodes[:pt.nodes.index(node)])).strip()
242                 if description and not description.startswith("Hier wird beschrieben werden, wie und wie gut man die "
243                                                               "Rodelbahn mit öffentlichen Verkehrsmitteln erreicht."):
244                     sledrun_json["public_transport_description"] = str(description)
245
246             public_transport_stops = []
247             public_transport_lines = []
248             public_transport_links = []
249             ya = None
250             for node in pt.nodes:
251                 if isinstance(node, Template):
252                     if node.name == 'Haltestelle':
253                         if ya is not None:
254                             public_transport_stops.append(ya)
255                         if len([1 for p in node.params if len(p.strip()) != 0]) == 0:
256                             continue
257                         ya = {}
258                         z = node.get(1, None)
259                         if z is not None:
260                             ya['municipality'] = str(z)
261                         z = node.get(2, None)
262                         if z is not None:
263                             ya['name_local'] = str(z)
264                         za = str(node.get(3, '')).strip()
265                         zb = str(node.get(4, '')).strip()
266                         z = lonlat_ele_to_json(opt_lonlat_from_str(za), opt_uint_from_str(zb))
267                         if len(z) > 0:
268                             ya['position'] = z
269                     elif node.name in ["Fahrplan Abfahrtsmonitor VVT"]:
270                         ya['monitor_template'] = template_to_json(node)
271                     elif node.name in ["Fahrplan Hinfahrt VVT"]:
272                         ya['route_arrival_template'] = template_to_json(node)
273                     elif node.name in ["Fahrplan Rückfahrt VVT"]:
274                         ya['route_departure_template'] = template_to_json(node)
275                     elif node.name in ["Fahrplan Linie VVT"]:
276                         if ya is not None:
277                             public_transport_stops.append(ya)
278                             ya = None
279                         y = {
280                             'timetable_template': template_to_json(node),
281                         }
282                         public_transport_lines.append(y)
283                 elif isinstance(node, ExternalLink):
284                     public_transport_links.append(external_link_to_json(node))
285             if ya is not None:
286                 public_transport_stops.append(ya)
287             if len(public_transport_stops) > 0:
288                 sledrun_json['public_transport_stops'] = public_transport_stops
289             if len(public_transport_lines) > 0:
290                 sledrun_json['public_transport_lines'] = public_transport_lines
291             if len(public_transport_links) > 0:
292                 sledrun_json['public_transport_links'] = public_transport_links
293         _public_transport()
294
295         def _car():
296             car_section_list = wikicode.get_sections(levels=[2], matches='Anreise mit dem Auto')
297             if not car_section_list:
298                 return
299             v = car_section_list[0]
300
301             description_nodes = dropwhile(lambda w: isinstance(w, Heading), v.nodes)
302             description_nodes = takewhile(lambda w: not (isinstance(w, Tag) and w.wiki_markup == '*'),
303                                           description_nodes)
304             if description := str(Wikicode(list(description_nodes))).strip():
305                 sledrun_json["car_description"] = description
306
307             x = []
308             for w in v.ifilter_templates(matches='Parkplatz'):
309                 za = str(w.get(1, '')).strip()
310                 zb = str(w.get(2, '')).strip()
311                 z = lonlat_ele_to_json(opt_lonlat_from_str(za), opt_uint_from_str(zb))
312                 if len(z) > 0:
313                     x.append({'position': z})
314             if len(x) > 0:
315                 sledrun_json['car_parking'] = x
316
317             x = []
318             for w in io.StringIO(str(v)):
319                 match = re.match(r"\*\* von \'\'\'(.+)\'\'\'(.*): ([\d.,]+) km", w.rstrip())
320                 if match:
321                     ya, yb, yc = match.groups()
322                     yc = float(yc.replace(',', '.'))
323                     x.append({
324                         'km': yc,
325                         'route': (ya.strip() + ' ' + yb.strip()).strip(),
326                     })
327             if len(x) > 0:
328                 sledrun_json['car_distances'] = x
329         _car()
330
331         x = []
332         for v in wikicode.get_sections(levels=[2], matches='Allgemeines'):
333             def _nightlight(value: str) -> Optional[str]:
334                 line_iter = io.StringIO(value)
335                 line = next(line_iter, None)
336                 while line is not None and not line.startswith("* '''Beleuchtung''':"):
337                     line = next(line_iter, None)
338                 if line is None:
339                     return None
340                 line = line.replace("* '''Beleuchtung''':", "").strip()
341                 if len(line) > 0:
342                     return line
343                 return None
344             optional_set(sledrun_json, 'nightlight_description', _nightlight(str(v)))
345
346             def _gastronomy(value: str):
347                 gastronomy = []
348                 line_iter = io.StringIO(value)
349                 line = next(line_iter, None)
350                 while line is not None and line.rstrip() != "* '''Hütten''':":
351                     line = next(line_iter, None)
352                 if line is None:
353                     return gastronomy
354                 while line is not None:
355                     line = next(line_iter, None)
356                     if line is not None:
357                         if line.startswith('** '):
358                             g = {}
359                             wiki = mwparserfromhell.parse(line)
360                             wiki_link = next(wiki.ifilter_wikilinks(), None)
361                             if isinstance(wiki_link, Wikilink):
362                                 g['wr_page'] = wikilink_to_json(wiki_link)
363                             ext_link = next(wiki.ifilter_external_links(), None)
364                             if isinstance(ext_link, ExternalLink):
365                                 g['weblink'] = external_link_to_json(ext_link)
366                             remaining = str(Wikicode(n for n in wiki.nodes
367                                                      if isinstance(n, (Text, Tag)) and str(n).strip() != '*')).strip()
368                             match = re.match(r'(.*)\((.+)\)', remaining)
369                             if match:
370                                 name, note = match.groups()
371                                 name = name.strip()
372                                 note = note.strip()
373                                 if len(name) > 0:
374                                     g['name'] = name
375                                 if len(note) > 0:
376                                     g['note'] = note
377                             elif len(remaining) > 0 and remaining != '...':
378                                 g['name'] = remaining
379                             if len(gastronomy) != 0:
380                                 gastronomy.append(g)
381                         else:
382                             break
383                 return gastronomy
384
385             w = _gastronomy(str(v))
386             if len(w) > 0:
387                 sledrun_json['gastronomy'] = w
388
389             def _sled_rental_description():
390                 line_iter = io.StringIO(str(v))
391                 line = next(line_iter, None)
392                 match = None
393                 while line is not None and (match := re.match(r"\* '''Rodelverleih''':(.*)", line)) is None:
394                     line = next(line_iter, None)
395                 if match is None:
396                     return
397                 result = [match.group(1)]
398                 line = next(line_iter, None)
399                 while line is not None and re.match(r"\* ", line) is None:
400                     result.append(line)
401                     line = next(line_iter, None)
402                 sledrun_json['sled_rental_description'] = ''.join(result).strip()
403             _sled_rental_description()
404
405             i = iter(v.nodes)
406             w = next(i, None)
407             while w is not None:
408                 if isinstance(w, Tag) and str(w) == "'''Siehe auch'''":
409                     w = next(i, None)
410                     break
411                 w = next(i, None)
412             while w is not None:
413                 if isinstance(w, ExternalLink):
414                     x.append(external_link_to_json(w))
415                 elif isinstance(w, (Text, Tag)) and str(w).strip() in ['', '*', ':']:
416                     pass
417                 else:
418                     break
419                 w = next(i, None)
420         if len(x) > 0:
421             sledrun_json['see_also'] = x
422
423         sledrun_json['allow_reports'] = True
424
425         impressions = None
426         sledrun_impressions_page = Page(self.site, self.current_page.title() + '/Impressionen')
427         if sledrun_impressions_page.exists():
428             impressions = sledrun_impressions_page.title()
429
430         text = create_sledrun_wiki(sledrun_json, map_json, impressions)
431         pywikibot.output(text)
432         pywikibot.output('\03{lightpurple}---\03{default}')
433         pywikibot.showDiff(self.current_page.text, text)
434
435         jsonschema.validate(instance=sledrun_json, schema=self.sledrun_schema)
436         sledrun_json_ordered = order_json_keys(sledrun_json, self.sledrun_schema)
437         assert sledrun_json_ordered == sledrun_json
438         sledrun_json_text = json.dumps(sledrun_json_ordered, ensure_ascii=False, indent=4)
439         summary = 'Rodelbahnbeschreibung konvertiert von Wikitext nach JSON.'
440         pywikibot.output('\03{lightpurple}---\03{default}')
441         pywikibot.output(sledrun_json_text)
442         pywikibot.output('\03{lightpurple}---\03{default}')
443         self.userPut(sledrun_json_page, sledrun_json_page.text, sledrun_json_text, summary=summary, contentmodel='json')
444
445         map_json_text = json.dumps(map_json, ensure_ascii=False, indent=4)
446         summary = 'Landkarte konvertiert von Wikitext nach JSON.'
447         self.userPut(map_json_page, map_json_page.text, map_json_text, summary=summary, contentmodel='json')
448
449
450 def main(*args: str) -> None:
451     local_args = pywikibot.handle_args(args)
452     gen_factory = pagegenerators.GeneratorFactory()
453     gen_factory.handle_args(local_args)
454     gen = gen_factory.getCombinedGenerator(preload=True)
455     if gen:
456         bot = SledrunWikiTextToJsonBot(generator=gen)
457         bot.run()
458     else:
459         pywikibot.bot.suggest_help(missing_generator=True)
460
461
462 if __name__ == '__main__':
463     main()