]> ToastFreeware Gitweb - philipp/winterrodeln/wrpylib.git/blob - bots/sledrun_wikitext_to_json.py
d52abd741ac7c1113b74ccdcb31bbe060954f8a5
[philipp/winterrodeln/wrpylib.git] / bots / sledrun_wikitext_to_json.py
1 #!/usr/bin/python
2 """
3 User script for pywikibot (https://gerrit.wikimedia.org/r/pywikibot/core.git), tested with version 6.6.1.
4 Put it in directory scripts/userscripts.
5
6 Create a sledrun JSON page from a sledrun wikitext page (including map).
7
8 The following generators and filters are supported:
9
10 &params;
11 """
12 import io
13 import json
14 import re
15 from itertools import takewhile, dropwhile
16 from typing import Any, Optional
17
18 import mwparserfromhell
19 from mwparserfromhell.nodes.extras import Parameter
20
21 import pywikibot
22 from mwparserfromhell.nodes import Tag, Text, ExternalLink, Template, Wikilink, Heading
23 from mwparserfromhell.wikicode import Wikicode
24 from pywikibot import pagegenerators, Page
25 from pywikibot.bot import (
26     AutomaticTWSummaryBot,
27     ConfigParserBot,
28     ExistingPageBot,
29     NoRedirectPageBot,
30     SingleSiteBot,
31 )
32 from pywikibot.logging import warning
33 from pywikibot.site._namespace import BuiltinNamespace
34
35 from wrpylib.wrmwmarkup import create_sledrun_wiki, lonlat_to_json, lonlat_ele_to_json, parse_wrmap
36 from wrpylib.wrvalidators import rodelbahnbox_from_template, tristate_german_to_str, difficulty_german_to_str, \
37     avalanches_german_to_str, public_transport_german_to_str, opt_lonlat_from_str, \
38     opt_uint_from_str
39
40 from pywikibot.site import Namespace
41
42 docuReplacements = {'&params;': pagegenerators.parameterHelp}
43
44
45 def template_to_json(value: Template) -> dict:
46     parameter = []
47     for p in value.params:
48         parameter.append({'value': str(p)})
49     return {
50         'name': str(value.name),
51         'parameter': parameter
52     }
53
54
55 def wikilink_to_json(value: Wikilink) -> dict:
56     wl = {'title': str(value.title)}
57     if value.text is not None:
58         wl['text'] = str(value.text)
59     return wl
60
61
62 def external_link_to_json(value: ExternalLink) -> dict:
63     link = {'url': str(value.url)}
64     if value.title is not None:
65         link['text'] = str(value.title)
66     return link
67
68
69 class SledrunWikiTextToJsonBot(
70     SingleSiteBot,
71     ConfigParserBot,
72     ExistingPageBot,
73     NoRedirectPageBot,
74     AutomaticTWSummaryBot,
75 ):
76     def treat_page(self) -> None:
77         """Load the given page, do some changes, and save it."""
78         wikitext_content_model = 'wikitext'
79         if self.current_page.content_model != wikitext_content_model:
80             warning(f"The content model of {self.current_page.title()} is {self.current_page.content_model} "
81                     f"instead of {wikitext_content_model}.")
82             return
83
84         wikicode = mwparserfromhell.parse(self.current_page.text)
85         wikilink_list = wikicode.filter_wikilinks()
86         category_sledrun = 'Kategorie:Rodelbahn'
87         if sum(1 for c in wikilink_list if c.title == category_sledrun) == 0:
88             warning(f'The page {self.current_page.title()} does not have category {category_sledrun}.')
89             return
90
91         sledrun_json_page = Page(self.site, self.current_page.title() + '/Rodelbahn.json')
92         if sledrun_json_page.exists():
93             warning(f"{sledrun_json_page.title()} already exists, skipping {self.current_page.title()}.")
94             return
95
96         map_json_page = Page(self.site, self.current_page.title() + '/Landkarte.json')
97         if map_json_page.exists():
98             warning(f"{map_json_page.title()} already exists, skipping {self.current_page.title()}.")
99             return
100
101         map_json = None
102         v = wikicode.filter_tags(matches='wrmap')
103         if len(v) > 0:
104             map_json = parse_wrmap(str(v[0]))
105
106         sledrun_json = {
107             "name": self.current_page.title(),
108             "aliases": [],
109             "entry_under_construction": sum(1 for c in wikilink_list if c.text == 'Kategorie:In Arbeit') > 0,
110         }
111
112         for v in wikicode.get_sections(levels=[2], matches='Allgemeines'):
113             for w in v.ifilter_text(recursive=False):
114                 x = w.strip()
115                 if x:
116                     sledrun_json["description"] = str(x)
117                     break
118             break
119
120         rbb_list = wikicode.filter_templates(recursive=False, matches=lambda t: t.name.strip() == 'Rodelbahnbox')
121         if len(rbb_list) == 1:
122             rbb = rodelbahnbox_from_template(rbb_list[0])
123             v = rbb['Bild']
124             if v is not None:
125                 image_page = Page(self.site, v, ns=BuiltinNamespace.FILE)
126                 if not image_page.exists():
127                     warning(f"{image_page.title()} does not exist.")
128                 sledrun_json['image'] = v
129
130             v = rbb['Länge']
131             if v is not None:
132                 sledrun_json['length'] = v
133
134             v = rbb['Schwierigkeit']
135             if v is not None:
136                 sledrun_json['difficulty'] = difficulty_german_to_str(v)
137
138             v = rbb['Lawinen']
139             if v is not None:
140                 sledrun_json['avalanches'] = avalanches_german_to_str(v)
141
142             v, w = rbb['Betreiber']
143             if v is not None:
144                 sledrun_json['has_operator'] = v
145             if w is not None:
146                 sledrun_json['operator'] = w
147
148             v = rbb['Aufstieg möglich']
149             if v is not None:
150                 sledrun_json['walkup_possible'] = v
151
152             v, w = rbb['Aufstieg getrennt']
153             if v is not None:
154                 sledrun_json['walkup_separate'] = tristate_german_to_str(v)
155             if w is not None:
156                 sledrun_json['walkup_comment'] = w  # TODO
157
158             v = rbb['Gehzeit']
159             if v is not None:
160                 sledrun_json['walkup_time'] = v
161
162             def _walkup_support():
163                 walkup_support_rbb = rbb['Aufstiegshilfe']
164                 if walkup_support_rbb is not None:
165                     walkup_supports = []
166                     for walkup_support_type, comment in walkup_support_rbb:
167                         walkup_support = {'type': walkup_support_type}
168                         if comment is not None:
169                             walkup_support['comment']: comment
170                         walkup_supports.append(walkup_support)
171                     sledrun_json['walkup_supports'] = walkup_supports
172             _walkup_support()
173
174             v, w = rbb['Beleuchtungsanlage']
175             if v is not None:
176                 sledrun_json['nightlight_possible'] = tristate_german_to_str(v)
177             if w is not None:
178                 sledrun_json['nightlight_possible_comment'] = w
179
180             v, w = rbb['Beleuchtungstage']
181             if v is not None:
182                 sledrun_json['nightlight_weekdays_count'] = v
183             if w is not None:
184                 sledrun_json['nightlight_weekdays_comment'] = w
185
186             def _sled_rental():
187                 v = rbb['Rodelverleih']
188                 if v is not None:
189                     sledrun_json['sled_rental_direct'] = v != []
190                     w = []
191                     for name, comment in v:
192                         x = {}
193                         name_code = mwparserfromhell.parse(name)
194                         wiki_link = next(name_code.ifilter_wikilinks(), None)
195                         if isinstance(wiki_link, Wikilink):
196                             x['wr_page'] = wikilink_to_json(wiki_link)
197                         else:
198                             x['name'] = name
199                         if comment is not None:
200                             x['comment'] = comment
201                         w.append(x)
202                     sledrun_json['sled_rental'] = w
203             _sled_rental()
204
205             def _cachet():
206                 v = rbb['Gütesiegel']
207                 if v is not None:
208                     sledrun_json['cachet'] = len(v) > 0
209             _cachet()
210
211             v = rbb['In Übersichtskarte']
212             if v is not None:
213                 sledrun_json['show_in_overview'] = v
214
215             v = rbb['Forumid']
216             if v is not None:
217                 sledrun_json['forum_id'] = v
218
219             v = rbb['Position']
220             if v is not None:
221                 sledrun_json['position'] = lonlat_to_json(v)
222
223             v = lonlat_ele_to_json(rbb['Position oben'], rbb['Höhe oben'])
224             if v != {}:
225                 sledrun_json['top'] = v
226
227             v = lonlat_ele_to_json(rbb['Position unten'], rbb['Höhe unten'])
228             if v != {}:
229                 sledrun_json['bottom'] = v
230
231             v = rbb['Telefonauskunft']
232             if v is not None:
233                 sledrun_json['info_phone'] = [{'phone': p, 'name': n} for p, n in v]
234
235             v = rbb['Öffentliche Anreise']
236             if v is not None:
237                 sledrun_json['public_transport'] = public_transport_german_to_str(v)
238
239         def _button_bar():
240             bb_iter = wikicode.ifilter_templates(recursive=False, matches=lambda t: t.name.strip() == 'Buttonleiste')
241             bb = next(bb_iter, None)
242             if bb is not None:
243                 video = bb.get('video', None)
244                 if isinstance(video, Parameter):
245                     sledrun_json['videos'] = [{'url': video.value}]
246         _button_bar()
247
248         def _public_transport():
249             pt_sections = wikicode.get_sections(levels=[2], matches='Anreise mit öffentlichen Verkehrsmitteln',
250                                                 include_headings=False)
251             if len(pt_sections) < 1:
252                 return
253             pt = pt_sections[0]
254             node = next((node for node in pt.nodes if isinstance(node, Tag) and node.wiki_markup == '*'), None)
255             if node is not None:
256                 description = str(Wikicode(pt.nodes[:pt.nodes.index(node)])).strip()
257                 if description:
258                     sledrun_json["public_transport_description"] = str(description)
259
260             public_transport_stops = []
261             public_transport_lines = []
262             public_transport_links = []
263             ya = None
264             for node in pt.nodes:
265                 if isinstance(node, Template):
266                     if node.name == 'Haltestelle':
267                         if ya is not None:
268                             public_transport_stops.append(ya)
269                         ya = {}
270                         z = node.get(1, None)
271                         if z is not None:
272                             ya['municipality'] = str(z)
273                         z = node.get(2, None)
274                         if z is not None:
275                             ya['name_local'] = str(z)
276                         za = str(node.get(3, '')).strip()
277                         zb = str(node.get(4, '')).strip()
278                         z = lonlat_ele_to_json(opt_lonlat_from_str(za), opt_uint_from_str(zb))
279                         if len(z) > 0:
280                             ya['position'] = z
281                     elif node.name in ["Fahrplan Abfahrtsmonitor VVT"]:
282                         ya['monitor_template'] = template_to_json(node)
283                     elif node.name in ["Fahrplan Hinfahrt VVT"]:
284                         ya['route_arrival_template'] = template_to_json(node)
285                     elif node.name in ["Fahrplan Rückfahrt VVT"]:
286                         ya['route_departure_template'] = template_to_json(node)
287                     elif node.name in ["Fahrplan Linie VVT"]:
288                         if ya is not None:
289                             public_transport_stops.append(ya)
290                             ya = None
291                         y = {
292                             'timetable_template': template_to_json(node),
293                         }
294                         public_transport_lines.append(y)
295                 elif isinstance(node, ExternalLink):
296                     public_transport_links.append(external_link_to_json(node))
297             if ya is not None:
298                 public_transport_stops.append(ya)
299             if len(public_transport_stops) > 0:
300                 sledrun_json['public_transport_stops'] = public_transport_stops
301             if len(public_transport_lines) > 0:
302                 sledrun_json['public_transport_lines'] = public_transport_lines
303             if len(public_transport_links) > 0:
304                 sledrun_json['public_transport_links'] = public_transport_links
305         _public_transport()
306
307         def _car():
308             car_section_list = wikicode.get_sections(levels=[2], matches='Anreise mit dem Auto')
309             if not car_section_list:
310                 return
311             v = car_section_list[0]
312
313             description_nodes = dropwhile(lambda w: isinstance(w, Heading), v.nodes)
314             description_nodes = takewhile(lambda w: not (isinstance(w, Tag) and w.wiki_markup == '*'),
315                                           description_nodes)
316             if description := str(Wikicode(list(description_nodes))).strip():
317                 sledrun_json["car_description"] = description
318
319             x = []
320             for w in v.ifilter_templates(matches='Parkplatz'):
321                 za = str(w.get(1, '')).strip()
322                 zb = str(w.get(2, '')).strip()
323                 z = lonlat_ele_to_json(opt_lonlat_from_str(za), opt_uint_from_str(zb))
324                 if len(z) > 0:
325                     x.append({'position': z})
326             if len(x) > 0:
327                 sledrun_json['car_parking'] = x
328
329             x = []
330             for w in io.StringIO(str(v)):
331                 match = re.match(r"\*\* von \'\'\'(.+)\'\'\'(.*): ([\d.,]+) km", w.rstrip())
332                 if match:
333                     ya, yb, yc = match.groups()
334                     yc = float(yc.replace(',', '.'))
335                     x.append({
336                         'km': yc,
337                         'route': (ya.strip() + ' ' + yb.strip()).strip(),
338                     })
339             if len(x) > 0:
340                 sledrun_json['car_distances'] = x
341         _car()
342
343         x = []
344         for v in wikicode.get_sections(levels=[2], matches='Allgemeines'):
345             def _nightlight(value: str) -> Optional[str]:
346                 line_iter = io.StringIO(value)
347                 line = next(line_iter, None)
348                 while line is not None and not line.startswith("* '''Beleuchtung''':"):
349                     line = next(line_iter, None)
350                 if line is None:
351                     return None
352                 line = line.replace("* '''Beleuchtung''':", "").strip()
353                 if len(line) > 0:
354                     return line
355                 return None
356             w = _nightlight(str(v))
357             if w is not None:
358                 sledrun_json['nightlight_description'] = w
359
360             def _gastronomy(value: str):
361                 gastronomy = []
362                 line_iter = io.StringIO(value)
363                 line = next(line_iter, None)
364                 while line is not None and line.rstrip() != "* '''Hütten''':":
365                     line = next(line_iter, None)
366                 if line is None:
367                     return gastronomy
368                 while line is not None:
369                     line = next(line_iter, None)
370                     if line is not None:
371                         if line.startswith('** '):
372                             g = {}
373                             wiki = mwparserfromhell.parse(line)
374                             wiki_link = next(wiki.ifilter_wikilinks(), None)
375                             if isinstance(wiki_link, Wikilink):
376                                 g['wr_page'] = wikilink_to_json(wiki_link)
377                             ext_link = next(wiki.ifilter_external_links(), None)
378                             if isinstance(ext_link, ExternalLink):
379                                 g['weblink'] = external_link_to_json(ext_link)
380                             remaining = str(Wikicode(n for n in wiki.nodes
381                                                      if isinstance(n, (Text, Tag)) and str(n).strip() != '*')).strip()
382                             match = re.match(r'\((.+)\)', remaining)
383                             if match:
384                                 remaining = match.group(1)
385                             if len(remaining) > 0:
386                                 g['note'] = remaining
387                             gastronomy.append(g)
388                         else:
389                             break
390                 return gastronomy
391             w = _gastronomy(str(v))
392             if len(w) > 0:
393                 sledrun_json['gastronomy'] = w
394
395             def _sled_rental_description():
396                 line_iter = io.StringIO(str(v))
397                 line = next(line_iter, None)
398                 match = None
399                 while line is not None and (match := re.match(r"\* '''Rodelverleih''':(.*)", line)) is None:
400                     line = next(line_iter, None)
401                 if match is None:
402                     return
403                 result = [match.group(1)]
404                 line = next(line_iter, None)
405                 while line is not None and re.match(r"\* ", line) is None:
406                     result.append(line)
407                     line = next(line_iter, None)
408                 sledrun_json['sled_rental_description'] = ''.join(result).strip()
409             _sled_rental_description()
410
411             i = iter(v.nodes)
412             w = next(i, None)
413             while w is not None:
414                 if isinstance(w, Tag) and str(w) == "'''Siehe auch'''":
415                     w = next(i, None)
416                     break
417                 w = next(i, None)
418             while w is not None:
419                 if isinstance(w, ExternalLink):
420                     x.append(external_link_to_json(w))
421                 elif isinstance(w, (Text, Tag)) and str(w).strip() in ['', '*', ':']:
422                     pass
423                 else:
424                     break
425                 w = next(i, None)
426         if len(x) > 0:
427             sledrun_json['see_also'] = x
428
429         sledrun_json['allow_reports'] = True
430
431         impressions = None
432         sledrun_impressions_page = Page(self.site, self.current_page.title() + '/Impressionen')
433         if sledrun_impressions_page.exists():
434             impressions = sledrun_impressions_page.title()
435
436         text = create_sledrun_wiki(sledrun_json, map_json, impressions)
437         summary = 'Rodelbahnbeschreibung nach Konvertierung nach und von JSON.'
438         self.put_current(text, summary=summary)
439
440
441 def main(*args: str) -> None:
442     local_args = pywikibot.handle_args(args)
443     gen_factory = pagegenerators.GeneratorFactory()
444     gen_factory.handle_args(local_args)
445     gen = gen_factory.getCombinedGenerator(preload=True)
446     if gen:
447         bot = SledrunWikiTextToJsonBot(generator=gen)
448         bot.run()
449     else:
450         pywikibot.bot.suggest_help(missing_generator=True)
451
452
453 if __name__ == '__main__':
454     main()