]> ToastFreeware Gitweb - philipp/winterrodeln/wrpylib.git/blob - bots/sledrun_wikitext_to_json.py
3201097206a72cfda4422faa077cbef57cf1611b
[philipp/winterrodeln/wrpylib.git] / bots / sledrun_wikitext_to_json.py
1 #!/usr/bin/python
2 """
3 User script for pywikibot (https://gerrit.wikimedia.org/r/pywikibot/core.git), tested with version 6.6.1.
4 Put it in directory scripts/userscripts.
5
6 Create a sledrun JSON page from a sledrun wikitext page (including map).
7
8 The following generators and filters are supported:
9
10 &params;
11 """
12 import io
13 import json
14 import re
15 from itertools import takewhile, dropwhile
16 from typing import Any, Optional
17
18 import mwparserfromhell
19 from mwparserfromhell.nodes.extras import Parameter
20
21 import pywikibot
22 from mwparserfromhell.nodes import Tag, Text, ExternalLink, Template, Wikilink, Heading
23 from mwparserfromhell.wikicode import Wikicode
24 from pywikibot import pagegenerators, Page
25 from pywikibot.bot import (
26     AutomaticTWSummaryBot,
27     ConfigParserBot,
28     ExistingPageBot,
29     NoRedirectPageBot,
30     SingleSiteBot,
31 )
32 from pywikibot.logging import warning
33 from pywikibot.site._namespace import BuiltinNamespace
34
35 from wrpylib.wrmwmarkup import create_sledrun_wiki, lonlat_to_json, lonlat_ele_to_json, parse_wrmap
36 from wrpylib.wrvalidators import rodelbahnbox_from_template, tristate_german_to_str, difficulty_german_to_str, \
37     avalanches_german_to_str, public_transport_german_to_str, opt_str_opt_comment_enum_to_str, opt_lonlat_from_str, \
38     opt_uint_from_str
39
40 from pywikibot.site import Namespace
41
42 docuReplacements = {'&params;': pagegenerators.parameterHelp}
43
44
45 def template_to_json(value: Template) -> dict:
46     parameter = []
47     for p in value.params:
48         parameter.append({'value': str(p)})
49     return {
50         'name': str(value.name),
51         'parameter': parameter
52     }
53
54
55 def wikilink_to_json(value: Wikilink) -> dict:
56     wl = {'title': str(value.title)}
57     if value.text is not None:
58         wl['text'] = str(value.text)
59     return wl
60
61
62 def external_link_to_json(value: ExternalLink) -> dict:
63     link = {'url': str(value.url)}
64     if value.title is not None:
65         link['text'] = str(value.title)
66     return link
67
68
69 class SledrunWikiTextToJsonBot(
70     SingleSiteBot,
71     ConfigParserBot,
72     ExistingPageBot,
73     NoRedirectPageBot,
74     AutomaticTWSummaryBot,
75 ):
76     def treat_page(self) -> None:
77         """Load the given page, do some changes, and save it."""
78         wikitext_content_model = 'wikitext'
79         if self.current_page.content_model != wikitext_content_model:
80             warning(f"The content model of {self.current_page.title()} is {self.current_page.content_model} "
81                     f"instead of {wikitext_content_model}.")
82             return
83
84         wikicode = mwparserfromhell.parse(self.current_page.text)
85         wikilink_list = wikicode.filter_wikilinks()
86         category_sledrun = 'Kategorie:Rodelbahn'
87         if sum(1 for c in wikilink_list if c.title == category_sledrun) == 0:
88             warning(f'The page {self.current_page.title()} does not have category {category_sledrun}.')
89             return
90
91         sledrun_json_page = Page(self.site, self.current_page.title() + '/Rodelbahn.json')
92         if sledrun_json_page.exists():
93             warning(f"{sledrun_json_page.title()} already exists, skipping {self.current_page.title()}.")
94             return
95
96         map_json_page = Page(self.site, self.current_page.title() + '/Landkarte.json')
97         if map_json_page.exists():
98             warning(f"{map_json_page.title()} already exists, skipping {self.current_page.title()}.")
99             return
100
101         map_json = None
102         v = wikicode.filter_tags(matches='wrmap')
103         if len(v) > 0:
104             map_json = parse_wrmap(str(v[0]))
105
106         sledrun_json = {
107             "name": self.current_page.title(),
108             "aliases": [],
109             "entry_under_construction": sum(1 for c in wikilink_list if c.text == 'Kategorie:In Arbeit') > 0,
110         }
111
112         for v in wikicode.get_sections(levels=[2], matches='Allgemeines'):
113             for w in v.ifilter_text(recursive=False):
114                 x = w.strip()
115                 if x:
116                     sledrun_json["description"] = str(x)
117                     break
118             break
119
120         rbb_list = wikicode.filter_templates(recursive=False, matches=lambda t: t.name.strip() == 'Rodelbahnbox')
121         if len(rbb_list) == 1:
122             rbb = rodelbahnbox_from_template(rbb_list[0])
123             v = rbb['Bild']
124             if v is not None:
125                 image_page = Page(self.site, v, ns=BuiltinNamespace.FILE)
126                 if not image_page.exists():
127                     warning(f"{image_page.title()} does not exist.")
128                 sledrun_json['image'] = v
129
130             v = rbb['Länge']
131             if v is not None:
132                 sledrun_json['length'] = v
133
134             v = rbb['Schwierigkeit']
135             if v is not None:
136                 sledrun_json['difficulty'] = difficulty_german_to_str(v)
137
138             v = rbb['Lawinen']
139             if v is not None:
140                 sledrun_json['avalanches'] = avalanches_german_to_str(v)
141
142             v, w = rbb['Betreiber']
143             if v is not None:
144                 sledrun_json['has_operator'] = v
145             if w is not None:
146                 sledrun_json['operator'] = w
147
148             v = rbb['Aufstieg möglich']
149             if v is not None:
150                 sledrun_json['walkup_possible'] = v
151
152             v, w = rbb['Aufstieg getrennt']
153             if v is not None:
154                 sledrun_json['walkup_separate'] = tristate_german_to_str(v)
155             if w is not None:
156                 sledrun_json['walkup_comment'] = w  # TODO
157
158             v = rbb['Gehzeit']
159             if v is not None:
160                 sledrun_json['walkup_time'] = v
161
162             def _walkup_support():
163                 walkup_support_rbb = rbb['Aufstiegshilfe']
164                 if walkup_support_rbb is not None:
165                     walkup_supports = []
166                     for walkup_support_type, comment in walkup_support_rbb:
167                         walkup_support = {'type': walkup_support_type}
168                         if comment is not None:
169                             walkup_support['comment']: comment
170                         walkup_supports.append(walkup_support)
171                     sledrun_json['walkup_supports'] = walkup_supports
172             _walkup_support()
173
174             v, w = rbb['Beleuchtungsanlage']
175             if v is not None:
176                 sledrun_json['nightlight_possible'] = tristate_german_to_str(v)
177             if w is not None:
178                 sledrun_json['nightlight_description'] = w
179
180             v, w = rbb['Beleuchtungstage']
181             if v is not None:
182                 sledrun_json['nightlight_weekdays_count'] = v
183             if w is not None:
184                 sledrun_json['nightlight_weekdays_comment'] = w
185
186             def _sled_rental():
187                 v = rbb['Rodelverleih']
188                 if v is not None:
189                     sledrun_json['sled_rental_direct'] = v != []
190                     w = []
191                     for name, comment in v:
192                         x = {}
193                         name_code = mwparserfromhell.parse(name)
194                         wiki_link = next(name_code.ifilter_wikilinks(), None)
195                         if isinstance(wiki_link, Wikilink):
196                             x['wr_page'] = wikilink_to_json(wiki_link)
197                         else:
198                             x['name'] = name
199                         if comment is not None:
200                             x['comment'] = comment
201                         w.append(x)
202                     sledrun_json['sled_rental'] = w
203             _sled_rental()
204
205             def _cachet():
206                 v = rbb['Gütesiegel']
207                 if v is not None:
208                     sledrun_json['cachet'] = len(v) > 0
209             _cachet()
210
211             v = rbb['In Übersichtskarte']
212             if v is not None:
213                 sledrun_json['show_in_overview'] = v
214
215             v = rbb['Forumid']
216             if v is not None:
217                 sledrun_json['forum_id'] = v
218
219             v = rbb['Position']
220             if v is not None:
221                 sledrun_json['position'] = lonlat_to_json(v)
222
223             v = lonlat_ele_to_json(rbb['Position oben'], rbb['Höhe oben'])
224             if v != {}:
225                 sledrun_json['top'] = v
226
227             v = lonlat_ele_to_json(rbb['Position unten'], rbb['Höhe unten'])
228             if v != {}:
229                 sledrun_json['bottom'] = v
230
231             v = rbb['Telefonauskunft']
232             if v is not None:
233                 sledrun_json['info_phone'] = [{'phone': p, 'name': n} for p, n in v]
234
235             v = rbb['Öffentliche Anreise']
236             if v is not None:
237                 sledrun_json['public_transport'] = public_transport_german_to_str(v)
238
239         def _button_bar():
240             bb_iter = wikicode.ifilter_templates(recursive=False, matches=lambda t: t.name.strip() == 'Buttonleiste')
241             bb = next(bb_iter, None)
242             if bb is not None:
243                 video = bb.get('video', None)
244                 if isinstance(video, Parameter):
245                     sledrun_json['videos'] = [{'url': video.value}]
246         _button_bar()
247
248         def _public_transport():
249             pt_sections = wikicode.get_sections(levels=[2], matches='Anreise mit öffentlichen Verkehrsmitteln',
250                                                 include_headings=False)
251             if len(pt_sections) < 1:
252                 return
253             pt = pt_sections[0]
254             node = next((node for node in pt.nodes if isinstance(node, Tag) and node.wiki_markup == '*'), None)
255             if node is not None:
256                 description = str(Wikicode(pt.nodes[:pt.nodes.index(node)])).strip()
257                 if description:
258                     sledrun_json["public_transport_description"] = str(description)
259
260             public_transport_stops = []
261             public_transport_lines = []
262             public_transport_links = []
263             ya = None
264             for node in pt.nodes:
265                 if isinstance(node, Template):
266                     if node.name == 'Haltestelle':
267                         if ya is not None:
268                             public_transport_stops.append(ya)
269                         ya = {}
270                         z = node.get(1, None)
271                         if z is not None:
272                             ya['municipality'] = str(z)
273                         z = node.get(2, None)
274                         if z is not None:
275                             ya['name_local'] = str(z)
276                         za = str(node.get(3, '')).strip()
277                         zb = str(node.get(4, '')).strip()
278                         z = lonlat_ele_to_json(opt_lonlat_from_str(za), opt_uint_from_str(zb))
279                         if len(z) > 0:
280                             ya['position'] = z
281                     elif node.name in ["Fahrplan Abfahrtsmonitor VVT"]:
282                         ya['monitor_template'] = template_to_json(node)
283                     elif node.name in ["Fahrplan Hinfahrt VVT"]:
284                         ya['route_arrival_template'] = template_to_json(node)
285                     elif node.name in ["Fahrplan Rückfahrt VVT"]:
286                         ya['route_departure_template'] = template_to_json(node)
287                     elif node.name in ["Fahrplan Linie VVT"]:
288                         if ya is not None:
289                             public_transport_stops.append(ya)
290                             ya = None
291                         y = {
292                             'timetable_template': template_to_json(node),
293                         }
294                         public_transport_lines.append(y)
295                 elif isinstance(node, ExternalLink):
296                     public_transport_links.append(external_link_to_json(node))
297             if ya is not None:
298                 public_transport_stops.append(ya)
299             if len(public_transport_stops) > 0:
300                 sledrun_json['public_transport_stops'] = public_transport_stops
301             if len(public_transport_lines) > 0:
302                 sledrun_json['public_transport_lines'] = public_transport_lines
303             if len(public_transport_links) > 0:
304                 sledrun_json['public_transport_links'] = public_transport_links
305         _public_transport()
306
307         def _car():
308             car_section_list = wikicode.get_sections(levels=[2], matches='Anreise mit dem Auto')
309             if not car_section_list:
310                 return
311             v = car_section_list[0]
312
313             description_nodes = dropwhile(lambda w: isinstance(w, Heading), v.nodes)
314             description_nodes = takewhile(lambda w: not (isinstance(w, Tag) and w.wiki_markup == '*'),
315                                           description_nodes)
316             if description := str(Wikicode(list(description_nodes))).strip():
317                 sledrun_json["car_description"] = description
318
319             x = []
320             for w in v.ifilter_templates(matches='Parkplatz'):
321                 za = str(w.get(1, '')).strip()
322                 zb = str(w.get(2, '')).strip()
323                 z = lonlat_ele_to_json(opt_lonlat_from_str(za), opt_uint_from_str(zb))
324                 if len(z) > 0:
325                     x.append({'position': z})
326             if len(x) > 0:
327                 sledrun_json['car_parking'] = x
328
329             x = []
330             for w in io.StringIO(str(v)):
331                 match = re.match(r"\*\* von \'\'\'(.+)\'\'\'(.*): ([\d.,]+) km", w.rstrip())
332                 if match:
333                     ya, yb, yc = match.groups()
334                     yc = float(yc.replace(',', '.'))
335                     x.append({
336                         'km': yc,
337                         'route': (ya.strip() + ' ' + yb.strip()).strip(),
338                     })
339             if len(x) > 0:
340                 sledrun_json['car_distances'] = x
341         _car()
342
343         x = []
344         for v in wikicode.get_sections(levels=[2], matches='Allgemeines'):
345             def _gastronomy(value: str):
346                 gastronomy = []
347                 line_iter = io.StringIO(value)
348                 line = next(line_iter, None)
349                 while line is not None and line.rstrip() != "* '''Hütten''':":
350                     line = next(line_iter, None)
351                 if line is None:
352                     return gastronomy
353                 while line is not None:
354                     line = next(line_iter, None)
355                     if line is not None:
356                         if line.startswith('** '):
357                             g = {}
358                             wiki = mwparserfromhell.parse(line)
359                             wiki_link = next(wiki.ifilter_wikilinks(), None)
360                             if isinstance(wiki_link, Wikilink):
361                                 g['wr_page'] = wikilink_to_json(wiki_link)
362                             ext_link = next(wiki.ifilter_external_links(), None)
363                             if isinstance(ext_link, ExternalLink):
364                                 g['weblink'] = external_link_to_json(ext_link)
365                             remaining = str(Wikicode(n for n in wiki.nodes
366                                                      if isinstance(n, (Text, Tag)) and str(n).strip() != '*')).strip()
367                             match = re.match(r'\((.+)\)', remaining)
368                             if match:
369                                 remaining = match.group(1)
370                             if len(remaining) > 0:
371                                 g['note'] = remaining
372                             gastronomy.append(g)
373                         else:
374                             break
375                 return gastronomy
376             w = _gastronomy(str(v))
377             if len(w) > 0:
378                 sledrun_json['gastronomy'] = w
379
380             def _sled_rental_description():
381                 line_iter = io.StringIO(str(v))
382                 line = next(line_iter, None)
383                 match = None
384                 while line is not None and (match := re.match(r"\* '''Rodelverleih''':(.*)", line)) is None:
385                     line = next(line_iter, None)
386                 if match is None:
387                     return
388                 result = [match.group(1)]
389                 line = next(line_iter, None)
390                 while line is not None and re.match(r"\* ", line) is None:
391                     result.append(line)
392                     line = next(line_iter, None)
393                 sledrun_json['sled_rental_description'] = ''.join(result).strip()
394             _sled_rental_description()
395
396             i = iter(v.nodes)
397             w = next(i, None)
398             while w is not None:
399                 if isinstance(w, Tag) and str(w) == "'''Siehe auch'''":
400                     w = next(i, None)
401                     break
402                 w = next(i, None)
403             while w is not None:
404                 if isinstance(w, ExternalLink):
405                     x.append(external_link_to_json(w))
406                 elif isinstance(w, (Text, Tag)) and str(w).strip() in ['', '*', ':']:
407                     pass
408                 else:
409                     break
410                 w = next(i, None)
411         if len(x) > 0:
412             sledrun_json['see_also'] = x
413
414         sledrun_json['allow_reports'] = True
415
416         impressions = None
417         sledrun_impressions_page = Page(self.site, self.current_page.title() + '/Impressionen')
418         if sledrun_impressions_page.exists():
419             impressions = sledrun_impressions_page.title()
420
421         text = create_sledrun_wiki(sledrun_json, map_json, impressions)
422         summary = 'Rodelbahnbeschreibung nach Konvertierung nach und von JSON.'
423         self.put_current(text, summary=summary)
424
425
426 def main(*args: str) -> None:
427     local_args = pywikibot.handle_args(args)
428     gen_factory = pagegenerators.GeneratorFactory()
429     gen_factory.handle_args(local_args)
430     gen = gen_factory.getCombinedGenerator(preload=True)
431     if gen:
432         bot = SledrunWikiTextToJsonBot(generator=gen)
433         bot.run()
434     else:
435         pywikibot.bot.suggest_help(missing_generator=True)
436
437
438 if __name__ == '__main__':
439     main()