]> ToastFreeware Gitweb - philipp/winterrodeln/wrpylib.git/blob - bots/sledrun_wikitext_to_json.py
428316fdf983f4e69fdd61ab58c0eeb0ccd67895
[philipp/winterrodeln/wrpylib.git] / bots / sledrun_wikitext_to_json.py
1 #!/usr/bin/python
2 """
3 User script for pywikibot (https://gerrit.wikimedia.org/r/pywikibot/core.git), tested with version 6.6.1.
4 Put it in directory scripts/userscripts.
5
6 Create a sledrun JSON page from a sledrun wikitext page (including map).
7
8 The following generators and filters are supported:
9
10 &params;
11 """
12 import io
13 import json
14 import re
15 from itertools import takewhile, dropwhile
16 from typing import Any, Optional
17
18 import mwparserfromhell
19 from mwparserfromhell.nodes.extras import Parameter
20
21 import pywikibot
22 from mwparserfromhell.nodes import Tag, Text, ExternalLink, Template, Wikilink, Heading
23 from mwparserfromhell.wikicode import Wikicode
24 from pywikibot import pagegenerators, Page
25 from pywikibot.bot import (
26     AutomaticTWSummaryBot,
27     ConfigParserBot,
28     ExistingPageBot,
29     NoRedirectPageBot,
30     SingleSiteBot,
31 )
32 from pywikibot.logging import warning
33 from pywikibot.site._namespace import BuiltinNamespace
34
35 from wrpylib.wrmwmarkup import create_sledrun_wiki, lonlat_to_json, lonlat_ele_to_json, parse_wrmap
36 from wrpylib.wrvalidators import rodelbahnbox_from_template, tristate_german_to_str, difficulty_german_to_str, \
37     avalanches_german_to_str, public_transport_german_to_str, opt_str_opt_comment_enum_to_str, opt_lonlat_from_str, \
38     opt_uint_from_str
39
40 from pywikibot.site import Namespace
41
42 docuReplacements = {'&params;': pagegenerators.parameterHelp}
43
44
45 def str_or_none(value: Any) -> Optional[str]:
46     if value is not None:
47         return str(value)
48     return None
49
50
51 def template_to_json(value: Template) -> dict:
52     parameter = []
53     for p in value.params:
54         parameter.append({'value': str(p)})
55     return {
56         'name': str(value.name),
57         'parameter': parameter
58     }
59
60
61 def wikilink_to_json(value: Wikilink) -> dict:
62     wl = {'title': str(value.title)}
63     text = str_or_none(value.text)
64     if text is not None:
65         wl['text'] = text
66     return wl
67
68
69 def external_link_to_json(value: ExternalLink) -> dict:
70     link = {'url': str(value.url)}
71     if value.title is not None:
72         link['text'] = str(value.title)
73     return link
74
75
76 class SledrunWikiTextToJsonBot(
77     SingleSiteBot,
78     ConfigParserBot,
79     ExistingPageBot,
80     NoRedirectPageBot,
81     AutomaticTWSummaryBot,
82 ):
83     def treat_page(self) -> None:
84         """Load the given page, do some changes, and save it."""
85         wikitext_content_model = 'wikitext'
86         if self.current_page.content_model != wikitext_content_model:
87             warning(f"The content model of {self.current_page.title()} is {self.current_page.content_model} "
88                     f"instead of {wikitext_content_model}.")
89             return
90
91         wikicode = mwparserfromhell.parse(self.current_page.text)
92         wikilink_list = wikicode.filter_wikilinks()
93         category_sledrun = 'Kategorie:Rodelbahn'
94         if sum(1 for c in wikilink_list if c.title == category_sledrun) == 0:
95             warning(f'The page {self.current_page.title()} does not have category {category_sledrun}.')
96             return
97
98         sledrun_json_page = Page(self.site, self.current_page.title() + '/Rodelbahn.json')
99         if sledrun_json_page.exists():
100             warning(f"{sledrun_json_page.title()} already exists, skipping {self.current_page.title()}.")
101             return
102
103         map_json_page = Page(self.site, self.current_page.title() + '/Landkarte.json')
104         if map_json_page.exists():
105             warning(f"{map_json_page.title()} already exists, skipping {self.current_page.title()}.")
106             return
107
108         map_json = None
109         v = wikicode.filter_tags(matches='wrmap')
110         if len(v) > 0:
111             map_json = parse_wrmap(str(v[0]))
112
113         sledrun_json = {
114             "name": self.current_page.title(),
115             "aliases": [],
116             "entry_under_construction": sum(1 for c in wikilink_list if c.text == 'Kategorie:In Arbeit') > 0,
117         }
118
119         for v in wikicode.get_sections(levels=[2], matches='Allgemeines'):
120             for w in v.ifilter_text(recursive=False):
121                 x = w.strip()
122                 if x:
123                     sledrun_json["description"] = str(x)
124                     break
125             break
126
127         rbb_list = wikicode.filter_templates(recursive=False, matches=lambda t: t.name.strip() == 'Rodelbahnbox')
128         if len(rbb_list) == 1:
129             rbb = rodelbahnbox_from_template(rbb_list[0])
130             v = rbb['Bild']
131             if v is not None:
132                 image_page = Page(self.site, v, ns=BuiltinNamespace.FILE)
133                 if not image_page.exists():
134                     warning(f"{image_page.title()} does not exist.")
135                 sledrun_json['image'] = v
136
137             v = rbb['Länge']
138             if v is not None:
139                 sledrun_json['length'] = v
140
141             v = rbb['Schwierigkeit']
142             if v is not None:
143                 sledrun_json['difficulty'] = difficulty_german_to_str(v)
144
145             v = rbb['Lawinen']
146             if v is not None:
147                 sledrun_json['avalanches'] = avalanches_german_to_str(v)
148
149             v, w = rbb['Betreiber']
150             if v is not None:
151                 sledrun_json['has_operator'] = v
152             if w is not None:
153                 sledrun_json['operator'] = w
154
155             v = rbb['Aufstieg möglich']
156             if v is not None:
157                 sledrun_json['walkup_possible'] = v
158
159             v, w = rbb['Aufstieg getrennt']
160             if v is not None:
161                 sledrun_json['walkup_separate'] = tristate_german_to_str(v)
162             if w is not None:
163                 sledrun_json['walkup_comment'] = w  # TODO
164
165             v = rbb['Gehzeit']
166             if v is not None:
167                 sledrun_json['walkup_time'] = v
168
169             def _walkup_support():
170                 walkup_support_rbb = rbb['Aufstiegshilfe']
171                 if walkup_support_rbb is not None:
172                     walkup_supports = []
173                     for walkup_support_type, comment in walkup_support_rbb:
174                         walkup_support = {'type': walkup_support_type}
175                         if comment is not None:
176                             walkup_support['comment']: comment
177                         walkup_supports.append(walkup_support)
178                     sledrun_json['walkup_supports'] = walkup_supports
179             _walkup_support()
180
181             v, w = rbb['Beleuchtungsanlage']
182             if v is not None:
183                 sledrun_json['nightlight_possible'] = tristate_german_to_str(v)
184             if w is not None:
185                 sledrun_json['nightlight_description'] = w
186
187             def _sled_rental():
188                 v = rbb['Rodelverleih']
189                 if v is not None:
190                     sledrun_json['sled_rental_direct'] = v != []
191                     w = []
192                     for name, comment in v:
193                         x = {}
194                         name_code = mwparserfromhell.parse(name)
195                         wiki_link = next(name_code.ifilter_wikilinks(), None)
196                         if isinstance(wiki_link, Wikilink):
197                             x['wr_page'] = wikilink_to_json(wiki_link)
198                         else:
199                             x['name'] = name
200                         if comment is not None:
201                             x['comment'] = comment
202                         w.append(x)
203                     sledrun_json['sled_rental'] = w
204             _sled_rental()
205
206             def _cachet():
207                 v = rbb['Gütesiegel']
208                 if v is not None:
209                     sledrun_json['cachet'] = len(v) > 0
210             _cachet()
211
212             v = rbb['In Übersichtskarte']
213             if v is not None:
214                 sledrun_json['show_in_overview'] = v
215
216             v = rbb['Forumid']
217             if v is not None:
218                 sledrun_json['forum_id'] = v
219
220             v = rbb['Position']
221             if v is not None:
222                 sledrun_json['position'] = lonlat_to_json(v)
223
224             v = lonlat_ele_to_json(rbb['Position oben'], rbb['Höhe oben'])
225             if v != {}:
226                 sledrun_json['top'] = v
227
228             v = lonlat_ele_to_json(rbb['Position unten'], rbb['Höhe unten'])
229             if v != {}:
230                 sledrun_json['bottom'] = v
231
232             v = rbb['Telefonauskunft']
233             if v is not None:
234                 sledrun_json['info_phone'] = [{'phone': p, 'name': n} for p, n in v]
235
236             v = rbb['Öffentliche Anreise']
237             if v is not None:
238                 sledrun_json['public_transport'] = public_transport_german_to_str(v)
239
240         def _button_bar():
241             bb_iter = wikicode.ifilter_templates(recursive=False, matches=lambda t: t.name.strip() == 'Buttonleiste')
242             bb = next(bb_iter, None)
243             if bb is not None:
244                 video = bb.get('video', None)
245                 if isinstance(video, Parameter):
246                     sledrun_json['videos'] = [{'url': video.value}]
247         _button_bar()
248
249         def _public_transport():
250             pt_sections = wikicode.get_sections(levels=[2], matches='Anreise mit öffentlichen Verkehrsmitteln',
251                                                 include_headings=False)
252             if len(pt_sections) < 1:
253                 return
254             pt = pt_sections[0]
255             node = next((node for node in pt.nodes if isinstance(node, Tag) and node.wiki_markup == '*'), None)
256             if node is not None:
257                 description = str(Wikicode(pt.nodes[:pt.nodes.index(node)])).strip()
258                 if description:
259                     sledrun_json["public_transport_description"] = str(description)
260
261             public_transport_stops = []
262             public_transport_lines = []
263             public_transport_links = []
264             ya = None
265             for node in pt.nodes:
266                 if isinstance(node, Template):
267                     if node.name == 'Haltestelle':
268                         if ya is not None:
269                             public_transport_stops.append(ya)
270                         ya = {}
271                         z = node.get(1, None)
272                         if z is not None:
273                             ya['municipality'] = str(z)
274                         z = node.get(2, None)
275                         if z is not None:
276                             ya['name_local'] = str(z)
277                         za = str_or_none(node.get(3, None))
278                         zb = str_or_none(node.get(4, None))
279                         z = lonlat_ele_to_json(opt_lonlat_from_str(za), opt_uint_from_str(zb))
280                         if len(z) > 0:
281                             ya['position'] = z
282                     elif node.name in ["Fahrplan Abfahrtsmonitor VVT"]:
283                         ya['monitor_template'] = template_to_json(node)
284                     elif node.name in ["Fahrplan Hinfahrt VVT"]:
285                         ya['route_arrival_template'] = template_to_json(node)
286                     elif node.name in ["Fahrplan Rückfahrt VVT"]:
287                         ya['route_departure_template'] = template_to_json(node)
288                     elif node.name in ["Fahrplan Linie VVT"]:
289                         if ya is not None:
290                             public_transport_stops.append(ya)
291                             ya = None
292                         y = {
293                             'timetable_template': template_to_json(node),
294                         }
295                         public_transport_lines.append(y)
296                 elif isinstance(node, ExternalLink):
297                     public_transport_links.append(external_link_to_json(node))
298             if ya is not None:
299                 public_transport_stops.append(ya)
300             if len(public_transport_stops) > 0:
301                 sledrun_json['public_transport_stops'] = public_transport_stops
302             if len(public_transport_lines) > 0:
303                 sledrun_json['public_transport_lines'] = public_transport_lines
304             if len(public_transport_links) > 0:
305                 sledrun_json['public_transport_links'] = public_transport_links
306         _public_transport()
307
308         def _car():
309             car_section_list = wikicode.get_sections(levels=[2], matches='Anreise mit dem Auto')
310             if not car_section_list:
311                 return
312             v = car_section_list[0]
313
314             description_nodes = dropwhile(lambda w: isinstance(w, Heading), v.nodes)
315             description_nodes = takewhile(lambda w: not (isinstance(w, Tag) and w.wiki_markup == '*'),
316                                           description_nodes)
317             if description := str(Wikicode(list(description_nodes))).strip():
318                 sledrun_json["car_description"] = description
319
320             x = []
321             for w in v.ifilter_templates(matches='Parkplatz'):
322                 za = str_or_none(w.get(1, None))
323                 zb = str_or_none(w.get(2, None))
324                 z = lonlat_ele_to_json(opt_lonlat_from_str(za), opt_uint_from_str(zb))
325                 if len(z) > 0:
326                     x.append({'position': z})
327             if len(x) > 0:
328                 sledrun_json['car_parking'] = x
329
330             x = []
331             for w in io.StringIO(str(v)):
332                 match = re.match(r"\*\* von \'\'\'(.+)\'\'\'(.*): ([\d.,]+) km", w.rstrip())
333                 if match:
334                     ya, yb, yc = match.groups()
335                     yc = float(yc.replace(',', '.'))
336                     x.append({
337                         'km': yc,
338                         'route': (ya.strip() + ' ' + yb.strip()).strip(),
339                     })
340             if len(x) > 0:
341                 sledrun_json['car_distances'] = x
342         _car()
343
344         x = []
345         for v in wikicode.get_sections(levels=[2], matches='Allgemeines'):
346             def _gastronomy(value: str):
347                 gastronomy = []
348                 line_iter = io.StringIO(value)
349                 line = next(line_iter, None)
350                 while line is not None and line.rstrip() != "* '''Hütten''':":
351                     line = next(line_iter, None)
352                 if line is None:
353                     return gastronomy
354                 while line is not None:
355                     line = next(line_iter, None)
356                     if line is not None:
357                         if line.startswith('** '):
358                             g = {}
359                             wiki = mwparserfromhell.parse(line)
360                             wiki_link = next(wiki.ifilter_wikilinks(), None)
361                             if isinstance(wiki_link, Wikilink):
362                                 g['wr_page'] = wikilink_to_json(wiki_link)
363                             ext_link = next(wiki.ifilter_external_links(), None)
364                             if isinstance(ext_link, ExternalLink):
365                                 g['weblink'] = external_link_to_json(ext_link)
366                             remaining = str(Wikicode(n for n in wiki.nodes
367                                                      if isinstance(n, (Text, Tag)) and str(n).strip() != '*')).strip()
368                             match = re.match(r'\((.+)\)', remaining)
369                             if match:
370                                 remaining = match.group(1)
371                             if len(remaining) > 0:
372                                 g['note'] = remaining
373                             gastronomy.append(g)
374                         else:
375                             break
376                 return gastronomy
377             w = _gastronomy(str(v))
378             if len(w) > 0:
379                 sledrun_json['gastronomy'] = w
380
381             def _sled_rental_description():
382                 line_iter = io.StringIO(str(v))
383                 line = next(line_iter, None)
384                 match = None
385                 while line is not None and (match := re.match(r"\* '''Rodelverleih''':(.*)", line)) is None:
386                     line = next(line_iter, None)
387                 if match is None:
388                     return
389                 result = [match.group(1)]
390                 line = next(line_iter, None)
391                 while line is not None and re.match(r"\* ", line) is None:
392                     result.append(line)
393                     line = next(line_iter, None)
394                 sledrun_json['sled_rental_description'] = ''.join(result).strip()
395             _sled_rental_description()
396
397             i = iter(v.nodes)
398             w = next(i, None)
399             while w is not None:
400                 if isinstance(w, Tag) and str(w) == "'''Siehe auch'''":
401                     w = next(i, None)
402                     break
403                 w = next(i, None)
404             while w is not None:
405                 if isinstance(w, ExternalLink):
406                     x.append(external_link_to_json(w))
407                 elif isinstance(w, (Text, Tag)) and str(w).strip() in ['', '*', ':']:
408                     pass
409                 else:
410                     break
411                 w = next(i, None)
412         if len(x) > 0:
413             sledrun_json['see_also'] = x
414
415         sledrun_json['allow_reports'] = True
416
417         impressions = None
418         sledrun_impressions_page = Page(self.site, self.current_page.title() + '/Impressionen')
419         if sledrun_impressions_page.exists():
420             impressions = sledrun_impressions_page.title()
421
422         text = create_sledrun_wiki(sledrun_json, map_json, impressions)
423         summary = 'Rodelbahnbeschreibung nach Konvertierung nach und von JSON.'
424         self.put_current(text, summary=summary)
425
426
427 def main(*args: str) -> None:
428     local_args = pywikibot.handle_args(args)
429     gen_factory = pagegenerators.GeneratorFactory()
430     gen_factory.handle_args(local_args)
431     gen = gen_factory.getCombinedGenerator(preload=True)
432     if gen:
433         bot = SledrunWikiTextToJsonBot(generator=gen)
434         bot.run()
435     else:
436         pywikibot.bot.suggest_help(missing_generator=True)
437
438
439 if __name__ == '__main__':
440     main()