]> ToastFreeware Gitweb - philipp/winterrodeln/wrpylib.git/blob - bots/sledrun_wikitext_to_json.py
eec3329f468836d9e82042697d8e11e19d03b37a
[philipp/winterrodeln/wrpylib.git] / bots / sledrun_wikitext_to_json.py
1 #!/usr/bin/python
2 """
3 User script for pywikibot (https://gerrit.wikimedia.org/r/pywikibot/core.git), tested with version 6.6.1.
4 Put it in directory scripts/userscripts.
5
6 Create a sledrun JSON page from a sledrun wikitext page (including map).
7
8 The following generators and filters are supported:
9
10 &params;
11 """
12 import io
13 import json
14 import re
15 from itertools import takewhile, dropwhile
16 from typing import Optional
17
18 import jsonschema
19 import mwparserfromhell
20 from mwparserfromhell.nodes.extras import Parameter
21
22 import pywikibot
23 from mwparserfromhell.nodes import Tag, Text, ExternalLink, Template, Wikilink, Heading
24 from mwparserfromhell.wikicode import Wikicode
25 from pywikibot import pagegenerators, Page
26 from pywikibot.bot import (
27     AutomaticTWSummaryBot,
28     ConfigParserBot,
29     ExistingPageBot,
30     NoRedirectPageBot,
31     SingleSiteBot,
32 )
33 from pywikibot.logging import warning
34 from pywikibot.site._namespace import BuiltinNamespace
35 from wrpylib.json_tools import order_json_keys
36
37 from wrpylib.wrmwmarkup import create_sledrun_wiki, lonlat_to_json, lonlat_ele_to_json, parse_wrmap
38 from wrpylib.wrvalidators import rodelbahnbox_from_template, tristate_german_to_str, difficulty_german_to_str, \
39     avalanches_german_to_str, public_transport_german_to_str, opt_lonlat_from_str, \
40     opt_uint_from_str
41 from wrpylib.lib_sledrun_wikitext_to_json import optional_set, get_sledrun_description
42
43 docuReplacements = {'&params;': pagegenerators.parameterHelp}
44
45
46 def template_to_json(value: Template) -> dict:
47     parameter = []
48     for p in value.params:
49         parameter.append({'value': str(p)})
50     return {
51         'name': str(value.name),
52         'parameter': parameter
53     }
54
55
56 def wikilink_to_json(value: Wikilink) -> dict:
57     wl = {'title': str(value.title)}
58     if value.text is not None:
59         wl['text'] = str(value.text)
60     return wl
61
62
63 def external_link_to_json(value: ExternalLink) -> dict:
64     link = {'url': str(value.url)}
65     if value.title is not None:
66         link['text'] = str(value.title)
67     return link
68
69
70 class SledrunWikiTextToJsonBot(
71     SingleSiteBot,
72     ConfigParserBot,
73     ExistingPageBot,
74     NoRedirectPageBot,
75     AutomaticTWSummaryBot,
76 ):
77     def setup(self) -> None:
78         schema = Page(self.site, 'Winterrodeln:Datenschema/Rodelbahn/V1.json')
79         assert schema.content_model == 'json'
80         self.sledrun_schema = json.loads(schema.text)
81
82     def treat_page(self) -> None:
83         """Load the given page, do some changes, and save it."""
84         wikitext_content_model = 'wikitext'
85         if self.current_page.content_model != wikitext_content_model:
86             warning(f"The content model of {self.current_page.title()} is {self.current_page.content_model} "
87                     f"instead of {wikitext_content_model}.")
88             return
89
90         wikicode = mwparserfromhell.parse(self.current_page.text)
91         wikilink_list = wikicode.filter_wikilinks()
92         category_sledrun = 'Kategorie:Rodelbahn'
93         if sum(1 for c in wikilink_list if c.title == category_sledrun) == 0:
94             warning(f'The page {self.current_page.title()} does not have category {category_sledrun}.')
95             return
96
97         sledrun_json_page = Page(self.site, self.current_page.title() + '/Rodelbahn.json')
98
99         map_json_page = Page(self.site, self.current_page.title() + '/Landkarte.json')
100
101         map_json = None
102         v = wikicode.filter_tags(matches='wrmap')
103         if len(v) > 0:
104             map_json = parse_wrmap(str(v[0]))
105
106         sledrun_json = {
107             "name": self.current_page.title(),
108             "aliases": [],
109             "entry_under_construction": sum(1 for c in wikilink_list if c.text == 'Kategorie:In Arbeit') > 0,
110         }
111
112         optional_set(sledrun_json, 'description', get_sledrun_description(wikicode))
113
114         rbb_list = wikicode.filter_templates(recursive=False, matches=lambda t: t.name.strip() == 'Rodelbahnbox')
115         if len(rbb_list) == 1:
116             rbb = rodelbahnbox_from_template(rbb_list[0])
117             v = rbb['Bild']
118             if v is not None:
119                 image_page = Page(self.site, v, ns=BuiltinNamespace.FILE)
120                 if not image_page.exists():
121                     warning(f"{image_page.title()} does not exist.")
122                 sledrun_json['image'] = v
123
124             v = rbb['Länge']
125             if v is not None:
126                 sledrun_json['length'] = v
127
128             v = rbb['Schwierigkeit']
129             if v is not None:
130                 sledrun_json['difficulty'] = difficulty_german_to_str(v)
131
132             v = rbb['Lawinen']
133             if v is not None:
134                 sledrun_json['avalanches'] = avalanches_german_to_str(v)
135
136             v, w = rbb['Betreiber']
137             if v is not None:
138                 sledrun_json['has_operator'] = v
139             if w is not None:
140                 sledrun_json['operator'] = w
141
142             v = rbb['Aufstieg möglich']
143             if v is not None:
144                 sledrun_json['walkup_possible'] = v
145
146             v, w = rbb['Aufstieg getrennt']
147             if v is not None:
148                 sledrun_json['walkup_separate'] = tristate_german_to_str(v)
149             if w is not None:
150                 sledrun_json['walkup_comment'] = w  # TODO
151
152             v = rbb['Gehzeit']
153             if v is not None:
154                 sledrun_json['walkup_time'] = v
155
156             def _walkup_support():
157                 walkup_support_rbb = rbb['Aufstiegshilfe']
158                 if walkup_support_rbb is not None:
159                     walkup_supports = []
160                     for walkup_support_type, comment in walkup_support_rbb:
161                         walkup_support = {'type': walkup_support_type}
162                         if comment is not None:
163                             walkup_support['comment'] = comment
164                         walkup_supports.append(walkup_support)
165                     sledrun_json['walkup_supports'] = walkup_supports
166             _walkup_support()
167
168             v, w = rbb['Beleuchtungsanlage']
169             if v is not None:
170                 sledrun_json['nightlight_possible'] = tristate_german_to_str(v)
171             if w is not None:
172                 sledrun_json['nightlight_possible_comment'] = w
173
174             v, w = rbb['Beleuchtungstage']
175             if v is not None:
176                 sledrun_json['nightlight_weekdays_count'] = v
177             if w is not None:
178                 sledrun_json['nightlight_weekdays_comment'] = w
179
180             def _sled_rental():
181                 v = rbb['Rodelverleih']
182                 if v is not None:
183                     sledrun_json['sled_rental_direct'] = v != []
184                     w = []
185                     for name, comment in v:
186                         x = {}
187                         name_code = mwparserfromhell.parse(name)
188                         wiki_link = next(name_code.ifilter_wikilinks(), None)
189                         if isinstance(wiki_link, Wikilink):
190                             x['wr_page'] = wikilink_to_json(wiki_link)
191                         else:
192                             x['name'] = name
193                         if comment is not None:
194                             x['comment'] = comment
195                         w.append(x)
196                     sledrun_json['sled_rental'] = w
197             _sled_rental()
198
199             def _cachet():
200                 v = rbb['Gütesiegel']
201                 if v is not None:
202                     sledrun_json['cachet'] = len(v) > 0
203             _cachet()
204
205             v = rbb['In Übersichtskarte']
206             if v is not None:
207                 sledrun_json['show_in_overview'] = v
208
209             v = rbb['Forumid']
210             if v is not None:
211                 sledrun_json['forum_id'] = v
212
213             v = rbb['Position']
214             if v is not None:
215                 sledrun_json['position'] = lonlat_to_json(v)
216
217             v = lonlat_ele_to_json(rbb['Position oben'], rbb['Höhe oben'])
218             if v != {}:
219                 sledrun_json['top'] = v
220
221             v = lonlat_ele_to_json(rbb['Position unten'], rbb['Höhe unten'])
222             if v != {}:
223                 sledrun_json['bottom'] = v
224
225             v = rbb['Telefonauskunft']
226             if v is not None:
227                 sledrun_json['info_phone'] = [{'phone': p, 'name': n} for p, n in v]
228
229             v, w = rbb['Webauskunft']
230             if v is not None:
231                 if v:
232                     sledrun_json['info_web'] = [{'url': w}]
233                 else:
234                     sledrun_json['info_web'] = []
235
236             v = rbb['Öffentliche Anreise']
237             if v is not None:
238                 sledrun_json['public_transport'] = public_transport_german_to_str(v)
239
240         def _button_bar():
241             bb_iter = wikicode.ifilter_templates(recursive=False, matches=lambda t: t.name.strip() == 'Buttonleiste')
242             bb = next(bb_iter, None)
243             if bb is not None:
244                 video = bb.get('video', None)
245                 if isinstance(video, Parameter):
246                     sledrun_json['videos'] = [{'url': video.value}]
247         _button_bar()
248
249         def _public_transport():
250             pt_sections = wikicode.get_sections(levels=[2], matches='Anreise mit öffentlichen Verkehrsmitteln',
251                                                 include_headings=False)
252             if len(pt_sections) < 1:
253                 return
254             pt = pt_sections[0]
255             node = next((node for node in pt.nodes if isinstance(node, Tag) and node.wiki_markup == '*'), None)
256             if node is not None:
257                 description = str(Wikicode(pt.nodes[:pt.nodes.index(node)])).strip()
258                 if description:
259                     sledrun_json["public_transport_description"] = str(description)
260
261             public_transport_stops = []
262             public_transport_lines = []
263             public_transport_links = []
264             ya = None
265             for node in pt.nodes:
266                 if isinstance(node, Template):
267                     if node.name == 'Haltestelle':
268                         if ya is not None:
269                             public_transport_stops.append(ya)
270                         ya = {}
271                         z = node.get(1, None)
272                         if z is not None:
273                             ya['municipality'] = str(z)
274                         z = node.get(2, None)
275                         if z is not None:
276                             ya['name_local'] = str(z)
277                         za = str(node.get(3, '')).strip()
278                         zb = str(node.get(4, '')).strip()
279                         z = lonlat_ele_to_json(opt_lonlat_from_str(za), opt_uint_from_str(zb))
280                         if len(z) > 0:
281                             ya['position'] = z
282                     elif node.name in ["Fahrplan Abfahrtsmonitor VVT"]:
283                         ya['monitor_template'] = template_to_json(node)
284                     elif node.name in ["Fahrplan Hinfahrt VVT"]:
285                         ya['route_arrival_template'] = template_to_json(node)
286                     elif node.name in ["Fahrplan Rückfahrt VVT"]:
287                         ya['route_departure_template'] = template_to_json(node)
288                     elif node.name in ["Fahrplan Linie VVT"]:
289                         if ya is not None:
290                             public_transport_stops.append(ya)
291                             ya = None
292                         y = {
293                             'timetable_template': template_to_json(node),
294                         }
295                         public_transport_lines.append(y)
296                 elif isinstance(node, ExternalLink):
297                     public_transport_links.append(external_link_to_json(node))
298             if ya is not None:
299                 public_transport_stops.append(ya)
300             if len(public_transport_stops) > 0:
301                 sledrun_json['public_transport_stops'] = public_transport_stops
302             if len(public_transport_lines) > 0:
303                 sledrun_json['public_transport_lines'] = public_transport_lines
304             if len(public_transport_links) > 0:
305                 sledrun_json['public_transport_links'] = public_transport_links
306         _public_transport()
307
308         def _car():
309             car_section_list = wikicode.get_sections(levels=[2], matches='Anreise mit dem Auto')
310             if not car_section_list:
311                 return
312             v = car_section_list[0]
313
314             description_nodes = dropwhile(lambda w: isinstance(w, Heading), v.nodes)
315             description_nodes = takewhile(lambda w: not (isinstance(w, Tag) and w.wiki_markup == '*'),
316                                           description_nodes)
317             if description := str(Wikicode(list(description_nodes))).strip():
318                 sledrun_json["car_description"] = description
319
320             x = []
321             for w in v.ifilter_templates(matches='Parkplatz'):
322                 za = str(w.get(1, '')).strip()
323                 zb = str(w.get(2, '')).strip()
324                 z = lonlat_ele_to_json(opt_lonlat_from_str(za), opt_uint_from_str(zb))
325                 if len(z) > 0:
326                     x.append({'position': z})
327             if len(x) > 0:
328                 sledrun_json['car_parking'] = x
329
330             x = []
331             for w in io.StringIO(str(v)):
332                 match = re.match(r"\*\* von \'\'\'(.+)\'\'\'(.*): ([\d.,]+) km", w.rstrip())
333                 if match:
334                     ya, yb, yc = match.groups()
335                     yc = float(yc.replace(',', '.'))
336                     x.append({
337                         'km': yc,
338                         'route': (ya.strip() + ' ' + yb.strip()).strip(),
339                     })
340             if len(x) > 0:
341                 sledrun_json['car_distances'] = x
342         _car()
343
344         x = []
345         for v in wikicode.get_sections(levels=[2], matches='Allgemeines'):
346             def _nightlight(value: str) -> Optional[str]:
347                 line_iter = io.StringIO(value)
348                 line = next(line_iter, None)
349                 while line is not None and not line.startswith("* '''Beleuchtung''':"):
350                     line = next(line_iter, None)
351                 if line is None:
352                     return None
353                 line = line.replace("* '''Beleuchtung''':", "").strip()
354                 if len(line) > 0:
355                     return line
356                 return None
357             w = _nightlight(str(v))
358             if w is not None:
359                 sledrun_json['nightlight_description'] = w
360
361             def _gastronomy(value: str):
362                 gastronomy = []
363                 line_iter = io.StringIO(value)
364                 line = next(line_iter, None)
365                 while line is not None and line.rstrip() != "* '''Hütten''':":
366                     line = next(line_iter, None)
367                 if line is None:
368                     return gastronomy
369                 while line is not None:
370                     line = next(line_iter, None)
371                     if line is not None:
372                         if line.startswith('** '):
373                             g = {}
374                             wiki = mwparserfromhell.parse(line)
375                             wiki_link = next(wiki.ifilter_wikilinks(), None)
376                             if isinstance(wiki_link, Wikilink):
377                                 g['wr_page'] = wikilink_to_json(wiki_link)
378                             ext_link = next(wiki.ifilter_external_links(), None)
379                             if isinstance(ext_link, ExternalLink):
380                                 g['weblink'] = external_link_to_json(ext_link)
381                             remaining = str(Wikicode(n for n in wiki.nodes
382                                                      if isinstance(n, (Text, Tag)) and str(n).strip() != '*')).strip()
383                             match = re.match(r'\((.+)\)', remaining)
384                             if match:
385                                 remaining = match.group(1)
386                             if len(remaining) > 0:
387                                 g['note'] = remaining
388                             gastronomy.append(g)
389                         else:
390                             break
391                 return gastronomy
392             w = _gastronomy(str(v))
393             if len(w) > 0:
394                 sledrun_json['gastronomy'] = w
395
396             def _sled_rental_description():
397                 line_iter = io.StringIO(str(v))
398                 line = next(line_iter, None)
399                 match = None
400                 while line is not None and (match := re.match(r"\* '''Rodelverleih''':(.*)", line)) is None:
401                     line = next(line_iter, None)
402                 if match is None:
403                     return
404                 result = [match.group(1)]
405                 line = next(line_iter, None)
406                 while line is not None and re.match(r"\* ", line) is None:
407                     result.append(line)
408                     line = next(line_iter, None)
409                 sledrun_json['sled_rental_description'] = ''.join(result).strip()
410             _sled_rental_description()
411
412             i = iter(v.nodes)
413             w = next(i, None)
414             while w is not None:
415                 if isinstance(w, Tag) and str(w) == "'''Siehe auch'''":
416                     w = next(i, None)
417                     break
418                 w = next(i, None)
419             while w is not None:
420                 if isinstance(w, ExternalLink):
421                     x.append(external_link_to_json(w))
422                 elif isinstance(w, (Text, Tag)) and str(w).strip() in ['', '*', ':']:
423                     pass
424                 else:
425                     break
426                 w = next(i, None)
427         if len(x) > 0:
428             sledrun_json['see_also'] = x
429
430         sledrun_json['allow_reports'] = True
431
432         impressions = None
433         sledrun_impressions_page = Page(self.site, self.current_page.title() + '/Impressionen')
434         if sledrun_impressions_page.exists():
435             impressions = sledrun_impressions_page.title()
436
437         text = create_sledrun_wiki(sledrun_json, map_json, impressions)
438         pywikibot.output(text)
439         pywikibot.output('\03{lightpurple}---\03{default}')
440         pywikibot.showDiff(self.current_page.text, text)
441
442         jsonschema.validate(instance=sledrun_json, schema=self.sledrun_schema)
443         sledrun_json_ordered = order_json_keys(sledrun_json, self.sledrun_schema)
444         assert sledrun_json_ordered == sledrun_json
445         sledrun_json_text = json.dumps(sledrun_json_ordered, ensure_ascii=False, indent=4)
446         summary = 'Rodelbahnbeschreibung konvertiert von Wikitext nach JSON.'
447         pywikibot.output('\03{lightpurple}---\03{default}')
448         pywikibot.output(sledrun_json_text)
449         pywikibot.output('\03{lightpurple}---\03{default}')
450         self.userPut(sledrun_json_page, sledrun_json_page.text, sledrun_json_text, summary=summary)
451
452         map_json_text = json.dumps(map_json, ensure_ascii=False, indent=4)
453         summary = 'Landkarte konvertiert von Wikitext nach JSON.'
454         self.userPut(map_json_page, map_json_page.text, map_json_text, summary=summary)
455
456
457 def main(*args: str) -> None:
458     local_args = pywikibot.handle_args(args)
459     gen_factory = pagegenerators.GeneratorFactory()
460     gen_factory.handle_args(local_args)
461     gen = gen_factory.getCombinedGenerator(preload=True)
462     if gen:
463         bot = SledrunWikiTextToJsonBot(generator=gen)
464         bot.run()
465     else:
466         pywikibot.bot.suggest_help(missing_generator=True)
467
468
469 if __name__ == '__main__':
470     main()