]> ToastFreeware Gitweb - philipp/winterrodeln/wrpylib.git/blob - bots/sledrun_wikitext_to_json.py
27d3f688434f1dbcadeae20d22e83fb04234b374
[philipp/winterrodeln/wrpylib.git] / bots / sledrun_wikitext_to_json.py
1 #!/usr/bin/python
2 """
3 User script for pywikibot (https://gerrit.wikimedia.org/r/pywikibot/core.git), tested with version 6.6.1.
4 Put it in directory scripts/userscripts.
5
6 Create a sledrun JSON page from a sledrun wikitext page (including map).
7
8 The following generators and filters are supported:
9
10 &params;
11 """
12 import io
13 import re
14 from itertools import takewhile, dropwhile
15 from typing import Optional
16
17 import mwparserfromhell
18 from mwparserfromhell.nodes.extras import Parameter
19
20 import pywikibot
21 from mwparserfromhell.nodes import Tag, Text, ExternalLink, Template, Wikilink, Heading
22 from mwparserfromhell.wikicode import Wikicode
23 from pywikibot import pagegenerators, Page
24 from pywikibot.bot import (
25     AutomaticTWSummaryBot,
26     ConfigParserBot,
27     ExistingPageBot,
28     NoRedirectPageBot,
29     SingleSiteBot,
30 )
31 from pywikibot.logging import warning
32 from pywikibot.site._namespace import BuiltinNamespace
33
34 from wrpylib.wrmwmarkup import create_sledrun_wiki, lonlat_to_json, lonlat_ele_to_json, parse_wrmap
35 from wrpylib.wrvalidators import rodelbahnbox_from_template, tristate_german_to_str, difficulty_german_to_str, \
36     avalanches_german_to_str, public_transport_german_to_str, opt_lonlat_from_str, \
37     opt_uint_from_str
38
39 docuReplacements = {'&params;': pagegenerators.parameterHelp}
40
41
42 def template_to_json(value: Template) -> dict:
43     parameter = []
44     for p in value.params:
45         parameter.append({'value': str(p)})
46     return {
47         'name': str(value.name),
48         'parameter': parameter
49     }
50
51
52 def wikilink_to_json(value: Wikilink) -> dict:
53     wl = {'title': str(value.title)}
54     if value.text is not None:
55         wl['text'] = str(value.text)
56     return wl
57
58
59 def external_link_to_json(value: ExternalLink) -> dict:
60     link = {'url': str(value.url)}
61     if value.title is not None:
62         link['text'] = str(value.title)
63     return link
64
65
66 class SledrunWikiTextToJsonBot(
67     SingleSiteBot,
68     ConfigParserBot,
69     ExistingPageBot,
70     NoRedirectPageBot,
71     AutomaticTWSummaryBot,
72 ):
73     def treat_page(self) -> None:
74         """Load the given page, do some changes, and save it."""
75         wikitext_content_model = 'wikitext'
76         if self.current_page.content_model != wikitext_content_model:
77             warning(f"The content model of {self.current_page.title()} is {self.current_page.content_model} "
78                     f"instead of {wikitext_content_model}.")
79             return
80
81         wikicode = mwparserfromhell.parse(self.current_page.text)
82         wikilink_list = wikicode.filter_wikilinks()
83         category_sledrun = 'Kategorie:Rodelbahn'
84         if sum(1 for c in wikilink_list if c.title == category_sledrun) == 0:
85             warning(f'The page {self.current_page.title()} does not have category {category_sledrun}.')
86             return
87
88         sledrun_json_page = Page(self.site, self.current_page.title() + '/Rodelbahn.json')
89         if sledrun_json_page.exists():
90             warning(f"{sledrun_json_page.title()} already exists, skipping {self.current_page.title()}.")
91             return
92
93         map_json_page = Page(self.site, self.current_page.title() + '/Landkarte.json')
94         if map_json_page.exists():
95             warning(f"{map_json_page.title()} already exists, skipping {self.current_page.title()}.")
96             return
97
98         map_json = None
99         v = wikicode.filter_tags(matches='wrmap')
100         if len(v) > 0:
101             map_json = parse_wrmap(str(v[0]))
102
103         sledrun_json = {
104             "name": self.current_page.title(),
105             "aliases": [],
106             "entry_under_construction": sum(1 for c in wikilink_list if c.text == 'Kategorie:In Arbeit') > 0,
107         }
108
109         for v in wikicode.get_sections(levels=[2], matches='Allgemeines'):
110             for w in v.ifilter_text(recursive=False):
111                 x = w.strip()
112                 if x:
113                     sledrun_json["description"] = str(x)
114                     break
115             break
116
117         rbb_list = wikicode.filter_templates(recursive=False, matches=lambda t: t.name.strip() == 'Rodelbahnbox')
118         if len(rbb_list) == 1:
119             rbb = rodelbahnbox_from_template(rbb_list[0])
120             v = rbb['Bild']
121             if v is not None:
122                 image_page = Page(self.site, v, ns=BuiltinNamespace.FILE)
123                 if not image_page.exists():
124                     warning(f"{image_page.title()} does not exist.")
125                 sledrun_json['image'] = v
126
127             v = rbb['Länge']
128             if v is not None:
129                 sledrun_json['length'] = v
130
131             v = rbb['Schwierigkeit']
132             if v is not None:
133                 sledrun_json['difficulty'] = difficulty_german_to_str(v)
134
135             v = rbb['Lawinen']
136             if v is not None:
137                 sledrun_json['avalanches'] = avalanches_german_to_str(v)
138
139             v, w = rbb['Betreiber']
140             if v is not None:
141                 sledrun_json['has_operator'] = v
142             if w is not None:
143                 sledrun_json['operator'] = w
144
145             v = rbb['Aufstieg möglich']
146             if v is not None:
147                 sledrun_json['walkup_possible'] = v
148
149             v, w = rbb['Aufstieg getrennt']
150             if v is not None:
151                 sledrun_json['walkup_separate'] = tristate_german_to_str(v)
152             if w is not None:
153                 sledrun_json['walkup_comment'] = w  # TODO
154
155             v = rbb['Gehzeit']
156             if v is not None:
157                 sledrun_json['walkup_time'] = v
158
159             def _walkup_support():
160                 walkup_support_rbb = rbb['Aufstiegshilfe']
161                 if walkup_support_rbb is not None:
162                     walkup_supports = []
163                     for walkup_support_type, comment in walkup_support_rbb:
164                         walkup_support = {'type': walkup_support_type}
165                         if comment is not None:
166                             walkup_support['comment']: comment
167                         walkup_supports.append(walkup_support)
168                     sledrun_json['walkup_supports'] = walkup_supports
169             _walkup_support()
170
171             v, w = rbb['Beleuchtungsanlage']
172             if v is not None:
173                 sledrun_json['nightlight_possible'] = tristate_german_to_str(v)
174             if w is not None:
175                 sledrun_json['nightlight_possible_comment'] = w
176
177             v, w = rbb['Beleuchtungstage']
178             if v is not None:
179                 sledrun_json['nightlight_weekdays_count'] = v
180             if w is not None:
181                 sledrun_json['nightlight_weekdays_comment'] = w
182
183             def _sled_rental():
184                 v = rbb['Rodelverleih']
185                 if v is not None:
186                     sledrun_json['sled_rental_direct'] = v != []
187                     w = []
188                     for name, comment in v:
189                         x = {}
190                         name_code = mwparserfromhell.parse(name)
191                         wiki_link = next(name_code.ifilter_wikilinks(), None)
192                         if isinstance(wiki_link, Wikilink):
193                             x['wr_page'] = wikilink_to_json(wiki_link)
194                         else:
195                             x['name'] = name
196                         if comment is not None:
197                             x['comment'] = comment
198                         w.append(x)
199                     sledrun_json['sled_rental'] = w
200             _sled_rental()
201
202             def _cachet():
203                 v = rbb['Gütesiegel']
204                 if v is not None:
205                     sledrun_json['cachet'] = len(v) > 0
206             _cachet()
207
208             v = rbb['In Übersichtskarte']
209             if v is not None:
210                 sledrun_json['show_in_overview'] = v
211
212             v = rbb['Forumid']
213             if v is not None:
214                 sledrun_json['forum_id'] = v
215
216             v = rbb['Position']
217             if v is not None:
218                 sledrun_json['position'] = lonlat_to_json(v)
219
220             v = lonlat_ele_to_json(rbb['Position oben'], rbb['Höhe oben'])
221             if v != {}:
222                 sledrun_json['top'] = v
223
224             v = lonlat_ele_to_json(rbb['Position unten'], rbb['Höhe unten'])
225             if v != {}:
226                 sledrun_json['bottom'] = v
227
228             v = rbb['Telefonauskunft']
229             if v is not None:
230                 sledrun_json['info_phone'] = [{'phone': p, 'name': n} for p, n in v]
231
232             v = rbb['Öffentliche Anreise']
233             if v is not None:
234                 sledrun_json['public_transport'] = public_transport_german_to_str(v)
235
236         def _button_bar():
237             bb_iter = wikicode.ifilter_templates(recursive=False, matches=lambda t: t.name.strip() == 'Buttonleiste')
238             bb = next(bb_iter, None)
239             if bb is not None:
240                 video = bb.get('video', None)
241                 if isinstance(video, Parameter):
242                     sledrun_json['videos'] = [{'url': video.value}]
243         _button_bar()
244
245         def _public_transport():
246             pt_sections = wikicode.get_sections(levels=[2], matches='Anreise mit öffentlichen Verkehrsmitteln',
247                                                 include_headings=False)
248             if len(pt_sections) < 1:
249                 return
250             pt = pt_sections[0]
251             node = next((node for node in pt.nodes if isinstance(node, Tag) and node.wiki_markup == '*'), None)
252             if node is not None:
253                 description = str(Wikicode(pt.nodes[:pt.nodes.index(node)])).strip()
254                 if description:
255                     sledrun_json["public_transport_description"] = str(description)
256
257             public_transport_stops = []
258             public_transport_lines = []
259             public_transport_links = []
260             ya = None
261             for node in pt.nodes:
262                 if isinstance(node, Template):
263                     if node.name == 'Haltestelle':
264                         if ya is not None:
265                             public_transport_stops.append(ya)
266                         ya = {}
267                         z = node.get(1, None)
268                         if z is not None:
269                             ya['municipality'] = str(z)
270                         z = node.get(2, None)
271                         if z is not None:
272                             ya['name_local'] = str(z)
273                         za = str(node.get(3, '')).strip()
274                         zb = str(node.get(4, '')).strip()
275                         z = lonlat_ele_to_json(opt_lonlat_from_str(za), opt_uint_from_str(zb))
276                         if len(z) > 0:
277                             ya['position'] = z
278                     elif node.name in ["Fahrplan Abfahrtsmonitor VVT"]:
279                         ya['monitor_template'] = template_to_json(node)
280                     elif node.name in ["Fahrplan Hinfahrt VVT"]:
281                         ya['route_arrival_template'] = template_to_json(node)
282                     elif node.name in ["Fahrplan Rückfahrt VVT"]:
283                         ya['route_departure_template'] = template_to_json(node)
284                     elif node.name in ["Fahrplan Linie VVT"]:
285                         if ya is not None:
286                             public_transport_stops.append(ya)
287                             ya = None
288                         y = {
289                             'timetable_template': template_to_json(node),
290                         }
291                         public_transport_lines.append(y)
292                 elif isinstance(node, ExternalLink):
293                     public_transport_links.append(external_link_to_json(node))
294             if ya is not None:
295                 public_transport_stops.append(ya)
296             if len(public_transport_stops) > 0:
297                 sledrun_json['public_transport_stops'] = public_transport_stops
298             if len(public_transport_lines) > 0:
299                 sledrun_json['public_transport_lines'] = public_transport_lines
300             if len(public_transport_links) > 0:
301                 sledrun_json['public_transport_links'] = public_transport_links
302         _public_transport()
303
304         def _car():
305             car_section_list = wikicode.get_sections(levels=[2], matches='Anreise mit dem Auto')
306             if not car_section_list:
307                 return
308             v = car_section_list[0]
309
310             description_nodes = dropwhile(lambda w: isinstance(w, Heading), v.nodes)
311             description_nodes = takewhile(lambda w: not (isinstance(w, Tag) and w.wiki_markup == '*'),
312                                           description_nodes)
313             if description := str(Wikicode(list(description_nodes))).strip():
314                 sledrun_json["car_description"] = description
315
316             x = []
317             for w in v.ifilter_templates(matches='Parkplatz'):
318                 za = str(w.get(1, '')).strip()
319                 zb = str(w.get(2, '')).strip()
320                 z = lonlat_ele_to_json(opt_lonlat_from_str(za), opt_uint_from_str(zb))
321                 if len(z) > 0:
322                     x.append({'position': z})
323             if len(x) > 0:
324                 sledrun_json['car_parking'] = x
325
326             x = []
327             for w in io.StringIO(str(v)):
328                 match = re.match(r"\*\* von \'\'\'(.+)\'\'\'(.*): ([\d.,]+) km", w.rstrip())
329                 if match:
330                     ya, yb, yc = match.groups()
331                     yc = float(yc.replace(',', '.'))
332                     x.append({
333                         'km': yc,
334                         'route': (ya.strip() + ' ' + yb.strip()).strip(),
335                     })
336             if len(x) > 0:
337                 sledrun_json['car_distances'] = x
338         _car()
339
340         x = []
341         for v in wikicode.get_sections(levels=[2], matches='Allgemeines'):
342             def _nightlight(value: str) -> Optional[str]:
343                 line_iter = io.StringIO(value)
344                 line = next(line_iter, None)
345                 while line is not None and not line.startswith("* '''Beleuchtung''':"):
346                     line = next(line_iter, None)
347                 if line is None:
348                     return None
349                 line = line.replace("* '''Beleuchtung''':", "").strip()
350                 if len(line) > 0:
351                     return line
352                 return None
353             w = _nightlight(str(v))
354             if w is not None:
355                 sledrun_json['nightlight_description'] = w
356
357             def _gastronomy(value: str):
358                 gastronomy = []
359                 line_iter = io.StringIO(value)
360                 line = next(line_iter, None)
361                 while line is not None and line.rstrip() != "* '''Hütten''':":
362                     line = next(line_iter, None)
363                 if line is None:
364                     return gastronomy
365                 while line is not None:
366                     line = next(line_iter, None)
367                     if line is not None:
368                         if line.startswith('** '):
369                             g = {}
370                             wiki = mwparserfromhell.parse(line)
371                             wiki_link = next(wiki.ifilter_wikilinks(), None)
372                             if isinstance(wiki_link, Wikilink):
373                                 g['wr_page'] = wikilink_to_json(wiki_link)
374                             ext_link = next(wiki.ifilter_external_links(), None)
375                             if isinstance(ext_link, ExternalLink):
376                                 g['weblink'] = external_link_to_json(ext_link)
377                             remaining = str(Wikicode(n for n in wiki.nodes
378                                                      if isinstance(n, (Text, Tag)) and str(n).strip() != '*')).strip()
379                             match = re.match(r'\((.+)\)', remaining)
380                             if match:
381                                 remaining = match.group(1)
382                             if len(remaining) > 0:
383                                 g['note'] = remaining
384                             gastronomy.append(g)
385                         else:
386                             break
387                 return gastronomy
388             w = _gastronomy(str(v))
389             if len(w) > 0:
390                 sledrun_json['gastronomy'] = w
391
392             def _sled_rental_description():
393                 line_iter = io.StringIO(str(v))
394                 line = next(line_iter, None)
395                 match = None
396                 while line is not None and (match := re.match(r"\* '''Rodelverleih''':(.*)", line)) is None:
397                     line = next(line_iter, None)
398                 if match is None:
399                     return
400                 result = [match.group(1)]
401                 line = next(line_iter, None)
402                 while line is not None and re.match(r"\* ", line) is None:
403                     result.append(line)
404                     line = next(line_iter, None)
405                 sledrun_json['sled_rental_description'] = ''.join(result).strip()
406             _sled_rental_description()
407
408             i = iter(v.nodes)
409             w = next(i, None)
410             while w is not None:
411                 if isinstance(w, Tag) and str(w) == "'''Siehe auch'''":
412                     w = next(i, None)
413                     break
414                 w = next(i, None)
415             while w is not None:
416                 if isinstance(w, ExternalLink):
417                     x.append(external_link_to_json(w))
418                 elif isinstance(w, (Text, Tag)) and str(w).strip() in ['', '*', ':']:
419                     pass
420                 else:
421                     break
422                 w = next(i, None)
423         if len(x) > 0:
424             sledrun_json['see_also'] = x
425
426         sledrun_json['allow_reports'] = True
427
428         impressions = None
429         sledrun_impressions_page = Page(self.site, self.current_page.title() + '/Impressionen')
430         if sledrun_impressions_page.exists():
431             impressions = sledrun_impressions_page.title()
432
433         text = create_sledrun_wiki(sledrun_json, map_json, impressions)
434         summary = 'Rodelbahnbeschreibung nach Konvertierung nach und von JSON.'
435         self.put_current(text, summary=summary)
436
437
438 def main(*args: str) -> None:
439     local_args = pywikibot.handle_args(args)
440     gen_factory = pagegenerators.GeneratorFactory()
441     gen_factory.handle_args(local_args)
442     gen = gen_factory.getCombinedGenerator(preload=True)
443     if gen:
444         bot = SledrunWikiTextToJsonBot(generator=gen)
445         bot.run()
446     else:
447         pywikibot.bot.suggest_help(missing_generator=True)
448
449
450 if __name__ == '__main__':
451     main()