]> ToastFreeware Gitweb - philipp/winterrodeln/wrpylib.git/blob - bots/sledrun_wikitext_to_json.py
83590dad94a4646d89476f026f30d3a11c244e56
[philipp/winterrodeln/wrpylib.git] / bots / sledrun_wikitext_to_json.py
1 #!/usr/bin/python
2 """
3 User script for pywikibot (https://gerrit.wikimedia.org/r/pywikibot/core.git), tested with version 6.6.1.
4 Put it in directory scripts/userscripts.
5
6 Create a sledrun JSON page from a sledrun wikitext page (including map).
7
8 The following generators and filters are supported:
9
10 &params;
11 """
12 import io
13 import json
14 import re
15 from itertools import takewhile, dropwhile
16 from typing import Optional
17
18 import jsonschema
19 import mwparserfromhell
20 from mwparserfromhell.nodes.extras import Parameter
21
22 import pywikibot
23 from mwparserfromhell.nodes import Tag, Text, ExternalLink, Template, Wikilink, Heading
24 from mwparserfromhell.wikicode import Wikicode
25 from pywikibot import pagegenerators, Page
26 from pywikibot.bot import (
27     AutomaticTWSummaryBot,
28     ConfigParserBot,
29     ExistingPageBot,
30     NoRedirectPageBot,
31     SingleSiteBot,
32 )
33 from pywikibot.logging import warning
34 from pywikibot.site._namespace import BuiltinNamespace
35 from wrpylib.json_tools import order_json_keys
36
37 from wrpylib.wrmwmarkup import create_sledrun_wiki, lonlat_to_json, lonlat_ele_to_json, parse_wrmap
38 from wrpylib.wrvalidators import rodelbahnbox_from_template, tristate_german_to_str, difficulty_german_to_str, \
39     avalanches_german_to_str, public_transport_german_to_str, opt_lonlat_from_str, \
40     opt_uint_from_str
41 from wrpylib.lib_sledrun_wikitext_to_json import optional_set, get_sledrun_description
42
43 docuReplacements = {'&params;': pagegenerators.parameterHelp}
44
45
46 def template_to_json(value: Template) -> dict:
47     parameter = []
48     for p in value.params:
49         parameter.append({'value': str(p)})
50     return {
51         'name': str(value.name),
52         'parameter': parameter
53     }
54
55
56 def wikilink_to_json(value: Wikilink) -> dict:
57     wl = {'title': str(value.title)}
58     if value.text is not None:
59         wl['text'] = str(value.text)
60     return wl
61
62
63 def external_link_to_json(value: ExternalLink) -> dict:
64     link = {'url': str(value.url)}
65     if value.title is not None:
66         link['text'] = str(value.title)
67     return link
68
69
70 class SledrunWikiTextToJsonBot(
71     SingleSiteBot,
72     ConfigParserBot,
73     ExistingPageBot,
74     NoRedirectPageBot,
75     AutomaticTWSummaryBot,
76 ):
77     def setup(self) -> None:
78         schema = Page(self.site, 'Winterrodeln:Datenschema/Rodelbahn/V1.json')
79         assert schema.content_model == 'json'
80         self.sledrun_schema = json.loads(schema.text)
81
82     def treat_page(self) -> None:
83         """Load the given page, do some changes, and save it."""
84         wikitext_content_model = 'wikitext'
85         if self.current_page.content_model != wikitext_content_model:
86             warning(f"The content model of {self.current_page.title()} is {self.current_page.content_model} "
87                     f"instead of {wikitext_content_model}.")
88             return
89
90         wikicode = mwparserfromhell.parse(self.current_page.text)
91         wikilink_list = wikicode.filter_wikilinks()
92         category_sledrun = 'Kategorie:Rodelbahn'
93         if sum(1 for c in wikilink_list if c.title == category_sledrun) == 0:
94             warning(f'The page {self.current_page.title()} does not have category {category_sledrun}.')
95             return
96
97         sledrun_json_page = Page(self.site, self.current_page.title() + '/Rodelbahn.json')
98
99         if sledrun_json_page.exists():  # should be an option
100             return
101
102         map_json_page = Page(self.site, self.current_page.title() + '/Landkarte.json')
103
104         map_json = None
105         v = wikicode.filter_tags(matches='wrmap')
106         if len(v) > 0:
107             map_json = parse_wrmap(str(v[0]))
108
109         sledrun_json = {
110             "name": self.current_page.title(),
111             "aliases": [],
112             "entry_under_construction": sum(1 for c in wikilink_list if c.title == 'Kategorie:In Arbeit') > 0,
113         }
114
115         optional_set(sledrun_json, 'description', get_sledrun_description(wikicode))
116
117         rbb_list = wikicode.filter_templates(recursive=False, matches=lambda t: t.name.strip() == 'Rodelbahnbox')
118         if len(rbb_list) == 1:
119             rbb = rodelbahnbox_from_template(rbb_list[0])
120             v = rbb['Bild']
121             if v is not None:
122                 image_page = Page(self.site, v, ns=BuiltinNamespace.FILE)
123                 if not image_page.exists():
124                     warning(f"{image_page.title()} does not exist.")
125                 sledrun_json['image'] = v
126
127             optional_set(sledrun_json, 'length', rbb['Länge'])
128
129             v = rbb['Schwierigkeit']
130             if v is not None:
131                 sledrun_json['difficulty'] = difficulty_german_to_str(v)
132
133             v = rbb['Lawinen']
134             if v is not None:
135                 sledrun_json['avalanches'] = avalanches_german_to_str(v)
136
137             v, w = rbb['Betreiber']
138             optional_set(sledrun_json, 'has_operator', v)
139             optional_set(sledrun_json, 'operator', w)
140
141             optional_set(sledrun_json, 'walkup_possible', rbb['Aufstieg möglich'])
142
143             v, w = rbb['Aufstieg getrennt']
144             if v is not None:
145                 sledrun_json['walkup_separate'] = tristate_german_to_str(v)
146             optional_set(sledrun_json, 'walkup_note', w)
147
148             optional_set(sledrun_json, 'walkup_time', rbb['Gehzeit'])
149
150             def _walkup_support():
151                 walkup_support_rbb = rbb['Aufstiegshilfe']
152                 if walkup_support_rbb is not None:
153                     walkup_supports = []
154                     for walkup_support_type, note in walkup_support_rbb:
155                         walkup_support = {'type': walkup_support_type}
156                         optional_set(walkup_support, 'note', note)
157                         walkup_supports.append(walkup_support)
158                     sledrun_json['walkup_supports'] = walkup_supports
159             _walkup_support()
160
161             v, w = rbb['Beleuchtungsanlage']
162             if v is not None:
163                 sledrun_json['nightlight_possible'] = tristate_german_to_str(v)
164             optional_set(sledrun_json, 'nightlight_possible_note', w)
165
166             v, w = rbb['Beleuchtungstage']
167             optional_set(sledrun_json, 'nightlight_weekdays_count', v)
168             optional_set(sledrun_json, 'nightlight_weekdays_note', w)
169
170             def _sled_rental():
171                 v = rbb['Rodelverleih']
172                 if v is not None:
173                     sledrun_json['sled_rental_direct'] = v != []
174                     w = []
175                     for name, note in v:
176                         x = {}
177                         name_code = mwparserfromhell.parse(name)
178                         wiki_link = next(name_code.ifilter_wikilinks(), None)
179                         if isinstance(wiki_link, Wikilink):
180                             x['wr_page'] = wikilink_to_json(wiki_link)
181                         else:
182                             x['name'] = name
183                         optional_set(x, 'note', note)
184                         w.append(x)
185                     sledrun_json['sled_rental'] = w
186             _sled_rental()
187
188             def _cachet():
189                 v = rbb['Gütesiegel']
190                 if v is not None:
191                     sledrun_json['cachet'] = len(v) > 0
192             _cachet()
193
194             optional_set(sledrun_json, 'show_in_overview', rbb['In Übersichtskarte'])
195             optional_set(sledrun_json, 'forum_id', rbb['Forumid'])
196
197             v = rbb['Position']
198             if v is not None:
199                 sledrun_json['position'] = lonlat_to_json(v)
200
201             v = lonlat_ele_to_json(rbb['Position oben'], rbb['Höhe oben'])
202             if v != {}:
203                 sledrun_json['top'] = v
204
205             v = lonlat_ele_to_json(rbb['Position unten'], rbb['Höhe unten'])
206             if v != {}:
207                 sledrun_json['bottom'] = v
208
209             v = rbb['Telefonauskunft']
210             if v is not None:
211                 sledrun_json['info_phone'] = [{'phone': p, 'name': n} for p, n in v]
212
213             v, w = rbb['Webauskunft']
214             if v is not None:
215                 if v:
216                     sledrun_json['info_web'] = [{'url': w}]
217                 else:
218                     sledrun_json['info_web'] = []
219
220             v = rbb['Öffentliche Anreise']
221             if v is not None:
222                 sledrun_json['public_transport'] = public_transport_german_to_str(v)
223
224         def _button_bar():
225             bb_iter = wikicode.ifilter_templates(recursive=False, matches=lambda t: t.name.strip() == 'Buttonleiste')
226             bb = next(bb_iter, None)
227             if bb is not None:
228                 video = bb.get('video', None)
229                 if isinstance(video, Parameter):
230                     sledrun_json['videos'] = [{'url': str(video.value)}]
231         _button_bar()
232
233         def _public_transport():
234             pt_sections = wikicode.get_sections(levels=[2], matches='Anreise mit öffentlichen Verkehrsmitteln',
235                                                 include_headings=False)
236             if len(pt_sections) < 1:
237                 return
238             pt = pt_sections[0]
239             node = next((node for node in pt.nodes if isinstance(node, Tag) and node.wiki_markup == '*'), None)
240             if node is not None:
241                 description = str(Wikicode(pt.nodes[:pt.nodes.index(node)])).strip()
242                 if description:
243                     sledrun_json["public_transport_description"] = str(description)
244
245             public_transport_stops = []
246             public_transport_lines = []
247             public_transport_links = []
248             ya = None
249             for node in pt.nodes:
250                 if isinstance(node, Template):
251                     if node.name == 'Haltestelle':
252                         if ya is not None:
253                             public_transport_stops.append(ya)
254                         ya = {}
255                         z = node.get(1, None)
256                         if z is not None:
257                             ya['municipality'] = str(z)
258                         z = node.get(2, None)
259                         if z is not None:
260                             ya['name_local'] = str(z)
261                         za = str(node.get(3, '')).strip()
262                         zb = str(node.get(4, '')).strip()
263                         z = lonlat_ele_to_json(opt_lonlat_from_str(za), opt_uint_from_str(zb))
264                         if len(z) > 0:
265                             ya['position'] = z
266                     elif node.name in ["Fahrplan Abfahrtsmonitor VVT"]:
267                         ya['monitor_template'] = template_to_json(node)
268                     elif node.name in ["Fahrplan Hinfahrt VVT"]:
269                         ya['route_arrival_template'] = template_to_json(node)
270                     elif node.name in ["Fahrplan Rückfahrt VVT"]:
271                         ya['route_departure_template'] = template_to_json(node)
272                     elif node.name in ["Fahrplan Linie VVT"]:
273                         if ya is not None:
274                             public_transport_stops.append(ya)
275                             ya = None
276                         y = {
277                             'timetable_template': template_to_json(node),
278                         }
279                         public_transport_lines.append(y)
280                 elif isinstance(node, ExternalLink):
281                     public_transport_links.append(external_link_to_json(node))
282             if ya is not None:
283                 public_transport_stops.append(ya)
284             if len(public_transport_stops) > 0:
285                 sledrun_json['public_transport_stops'] = public_transport_stops
286             if len(public_transport_lines) > 0:
287                 sledrun_json['public_transport_lines'] = public_transport_lines
288             if len(public_transport_links) > 0:
289                 sledrun_json['public_transport_links'] = public_transport_links
290         _public_transport()
291
292         def _car():
293             car_section_list = wikicode.get_sections(levels=[2], matches='Anreise mit dem Auto')
294             if not car_section_list:
295                 return
296             v = car_section_list[0]
297
298             description_nodes = dropwhile(lambda w: isinstance(w, Heading), v.nodes)
299             description_nodes = takewhile(lambda w: not (isinstance(w, Tag) and w.wiki_markup == '*'),
300                                           description_nodes)
301             if description := str(Wikicode(list(description_nodes))).strip():
302                 sledrun_json["car_description"] = description
303
304             x = []
305             for w in v.ifilter_templates(matches='Parkplatz'):
306                 za = str(w.get(1, '')).strip()
307                 zb = str(w.get(2, '')).strip()
308                 z = lonlat_ele_to_json(opt_lonlat_from_str(za), opt_uint_from_str(zb))
309                 if len(z) > 0:
310                     x.append({'position': z})
311             if len(x) > 0:
312                 sledrun_json['car_parking'] = x
313
314             x = []
315             for w in io.StringIO(str(v)):
316                 match = re.match(r"\*\* von \'\'\'(.+)\'\'\'(.*): ([\d.,]+) km", w.rstrip())
317                 if match:
318                     ya, yb, yc = match.groups()
319                     yc = float(yc.replace(',', '.'))
320                     x.append({
321                         'km': yc,
322                         'route': (ya.strip() + ' ' + yb.strip()).strip(),
323                     })
324             if len(x) > 0:
325                 sledrun_json['car_distances'] = x
326         _car()
327
328         x = []
329         for v in wikicode.get_sections(levels=[2], matches='Allgemeines'):
330             def _nightlight(value: str) -> Optional[str]:
331                 line_iter = io.StringIO(value)
332                 line = next(line_iter, None)
333                 while line is not None and not line.startswith("* '''Beleuchtung''':"):
334                     line = next(line_iter, None)
335                 if line is None:
336                     return None
337                 line = line.replace("* '''Beleuchtung''':", "").strip()
338                 if len(line) > 0:
339                     return line
340                 return None
341             optional_set(sledrun_json, 'nightlight_description', _nightlight(str(v)))
342
343             def _gastronomy(value: str):
344                 gastronomy = []
345                 line_iter = io.StringIO(value)
346                 line = next(line_iter, None)
347                 while line is not None and line.rstrip() != "* '''Hütten''':":
348                     line = next(line_iter, None)
349                 if line is None:
350                     return gastronomy
351                 while line is not None:
352                     line = next(line_iter, None)
353                     if line is not None:
354                         if line.startswith('** '):
355                             g = {}
356                             wiki = mwparserfromhell.parse(line)
357                             wiki_link = next(wiki.ifilter_wikilinks(), None)
358                             if isinstance(wiki_link, Wikilink):
359                                 g['wr_page'] = wikilink_to_json(wiki_link)
360                             ext_link = next(wiki.ifilter_external_links(), None)
361                             if isinstance(ext_link, ExternalLink):
362                                 g['weblink'] = external_link_to_json(ext_link)
363                             remaining = str(Wikicode(n for n in wiki.nodes
364                                                      if isinstance(n, (Text, Tag)) and str(n).strip() != '*')).strip()
365                             match = re.match(r'(.*)\((.+)\)', remaining)
366                             if match:
367                                 name, note = match.groups()
368                                 name = name.strip()
369                                 note = note.strip()
370                                 if len(name) > 0:
371                                     g['name'] = name
372                                 if len(note) > 0:
373                                     g['note'] = note
374                             elif len(remaining) > 0:
375                                 g['name'] = remaining
376                             gastronomy.append(g)
377                         else:
378                             break
379                 return gastronomy
380
381             w = _gastronomy(str(v))
382             if len(w) > 0:
383                 sledrun_json['gastronomy'] = w
384
385             def _sled_rental_description():
386                 line_iter = io.StringIO(str(v))
387                 line = next(line_iter, None)
388                 match = None
389                 while line is not None and (match := re.match(r"\* '''Rodelverleih''':(.*)", line)) is None:
390                     line = next(line_iter, None)
391                 if match is None:
392                     return
393                 result = [match.group(1)]
394                 line = next(line_iter, None)
395                 while line is not None and re.match(r"\* ", line) is None:
396                     result.append(line)
397                     line = next(line_iter, None)
398                 sledrun_json['sled_rental_description'] = ''.join(result).strip()
399             _sled_rental_description()
400
401             i = iter(v.nodes)
402             w = next(i, None)
403             while w is not None:
404                 if isinstance(w, Tag) and str(w) == "'''Siehe auch'''":
405                     w = next(i, None)
406                     break
407                 w = next(i, None)
408             while w is not None:
409                 if isinstance(w, ExternalLink):
410                     x.append(external_link_to_json(w))
411                 elif isinstance(w, (Text, Tag)) and str(w).strip() in ['', '*', ':']:
412                     pass
413                 else:
414                     break
415                 w = next(i, None)
416         if len(x) > 0:
417             sledrun_json['see_also'] = x
418
419         sledrun_json['allow_reports'] = True
420
421         impressions = None
422         sledrun_impressions_page = Page(self.site, self.current_page.title() + '/Impressionen')
423         if sledrun_impressions_page.exists():
424             impressions = sledrun_impressions_page.title()
425
426         text = create_sledrun_wiki(sledrun_json, map_json, impressions)
427         pywikibot.output(text)
428         pywikibot.output('\03{lightpurple}---\03{default}')
429         pywikibot.showDiff(self.current_page.text, text)
430
431         jsonschema.validate(instance=sledrun_json, schema=self.sledrun_schema)
432         sledrun_json_ordered = order_json_keys(sledrun_json, self.sledrun_schema)
433         assert sledrun_json_ordered == sledrun_json
434         sledrun_json_text = json.dumps(sledrun_json_ordered, ensure_ascii=False, indent=4)
435         summary = 'Rodelbahnbeschreibung konvertiert von Wikitext nach JSON.'
436         pywikibot.output('\03{lightpurple}---\03{default}')
437         pywikibot.output(sledrun_json_text)
438         pywikibot.output('\03{lightpurple}---\03{default}')
439         self.userPut(sledrun_json_page, sledrun_json_page.text, sledrun_json_text, summary=summary, contentmodel='json')
440
441         map_json_text = json.dumps(map_json, ensure_ascii=False, indent=4)
442         summary = 'Landkarte konvertiert von Wikitext nach JSON.'
443         self.userPut(map_json_page, map_json_page.text, map_json_text, summary=summary, contentmodel='json')
444
445
446 def main(*args: str) -> None:
447     local_args = pywikibot.handle_args(args)
448     gen_factory = pagegenerators.GeneratorFactory()
449     gen_factory.handle_args(local_args)
450     gen = gen_factory.getCombinedGenerator(preload=True)
451     if gen:
452         bot = SledrunWikiTextToJsonBot(generator=gen)
453         bot.run()
454     else:
455         pywikibot.bot.suggest_help(missing_generator=True)
456
457
458 if __name__ == '__main__':
459     main()