]> ToastFreeware Gitweb - philipp/winterrodeln/wrpylib.git/blob - bots/sledrun_wikitext_to_json.py
Use functions get_sledrun_description and optional_set.
[philipp/winterrodeln/wrpylib.git] / bots / sledrun_wikitext_to_json.py
1 #!/usr/bin/python
2 """
3 User script for pywikibot (https://gerrit.wikimedia.org/r/pywikibot/core.git), tested with version 6.6.1.
4 Put it in directory scripts/userscripts.
5
6 Create a sledrun JSON page from a sledrun wikitext page (including map).
7
8 The following generators and filters are supported:
9
10 &params;
11 """
12 import io
13 import re
14 from itertools import takewhile, dropwhile
15 from typing import Optional
16
17 import mwparserfromhell
18 from mwparserfromhell.nodes.extras import Parameter
19
20 import pywikibot
21 from mwparserfromhell.nodes import Tag, Text, ExternalLink, Template, Wikilink, Heading
22 from mwparserfromhell.wikicode import Wikicode
23 from pywikibot import pagegenerators, Page
24 from pywikibot.bot import (
25     AutomaticTWSummaryBot,
26     ConfigParserBot,
27     ExistingPageBot,
28     NoRedirectPageBot,
29     SingleSiteBot,
30 )
31 from pywikibot.logging import warning
32 from pywikibot.site._namespace import BuiltinNamespace
33
34 from wrpylib.wrmwmarkup import create_sledrun_wiki, lonlat_to_json, lonlat_ele_to_json, parse_wrmap
35 from wrpylib.wrvalidators import rodelbahnbox_from_template, tristate_german_to_str, difficulty_german_to_str, \
36     avalanches_german_to_str, public_transport_german_to_str, opt_lonlat_from_str, \
37     opt_uint_from_str
38 from wrpylib.lib_sledrun_wikitext_to_json import optional_set, get_sledrun_description
39
40 docuReplacements = {'&params;': pagegenerators.parameterHelp}
41
42
43 def template_to_json(value: Template) -> dict:
44     parameter = []
45     for p in value.params:
46         parameter.append({'value': str(p)})
47     return {
48         'name': str(value.name),
49         'parameter': parameter
50     }
51
52
53 def wikilink_to_json(value: Wikilink) -> dict:
54     wl = {'title': str(value.title)}
55     if value.text is not None:
56         wl['text'] = str(value.text)
57     return wl
58
59
60 def external_link_to_json(value: ExternalLink) -> dict:
61     link = {'url': str(value.url)}
62     if value.title is not None:
63         link['text'] = str(value.title)
64     return link
65
66
67 class SledrunWikiTextToJsonBot(
68     SingleSiteBot,
69     ConfigParserBot,
70     ExistingPageBot,
71     NoRedirectPageBot,
72     AutomaticTWSummaryBot,
73 ):
74     def treat_page(self) -> None:
75         """Load the given page, do some changes, and save it."""
76         wikitext_content_model = 'wikitext'
77         if self.current_page.content_model != wikitext_content_model:
78             warning(f"The content model of {self.current_page.title()} is {self.current_page.content_model} "
79                     f"instead of {wikitext_content_model}.")
80             return
81
82         wikicode = mwparserfromhell.parse(self.current_page.text)
83         wikilink_list = wikicode.filter_wikilinks()
84         category_sledrun = 'Kategorie:Rodelbahn'
85         if sum(1 for c in wikilink_list if c.title == category_sledrun) == 0:
86             warning(f'The page {self.current_page.title()} does not have category {category_sledrun}.')
87             return
88
89         sledrun_json_page = Page(self.site, self.current_page.title() + '/Rodelbahn.json')
90         if sledrun_json_page.exists():
91             warning(f"{sledrun_json_page.title()} already exists, skipping {self.current_page.title()}.")
92             return
93
94         map_json_page = Page(self.site, self.current_page.title() + '/Landkarte.json')
95         if map_json_page.exists():
96             warning(f"{map_json_page.title()} already exists, skipping {self.current_page.title()}.")
97             return
98
99         map_json = None
100         v = wikicode.filter_tags(matches='wrmap')
101         if len(v) > 0:
102             map_json = parse_wrmap(str(v[0]))
103
104         sledrun_json = {
105             "name": self.current_page.title(),
106             "aliases": [],
107             "entry_under_construction": sum(1 for c in wikilink_list if c.text == 'Kategorie:In Arbeit') > 0,
108         }
109
110         optional_set(sledrun_json, 'description', get_sledrun_description(wikicode))
111
112         rbb_list = wikicode.filter_templates(recursive=False, matches=lambda t: t.name.strip() == 'Rodelbahnbox')
113         if len(rbb_list) == 1:
114             rbb = rodelbahnbox_from_template(rbb_list[0])
115             v = rbb['Bild']
116             if v is not None:
117                 image_page = Page(self.site, v, ns=BuiltinNamespace.FILE)
118                 if not image_page.exists():
119                     warning(f"{image_page.title()} does not exist.")
120                 sledrun_json['image'] = v
121
122             v = rbb['Länge']
123             if v is not None:
124                 sledrun_json['length'] = v
125
126             v = rbb['Schwierigkeit']
127             if v is not None:
128                 sledrun_json['difficulty'] = difficulty_german_to_str(v)
129
130             v = rbb['Lawinen']
131             if v is not None:
132                 sledrun_json['avalanches'] = avalanches_german_to_str(v)
133
134             v, w = rbb['Betreiber']
135             if v is not None:
136                 sledrun_json['has_operator'] = v
137             if w is not None:
138                 sledrun_json['operator'] = w
139
140             v = rbb['Aufstieg möglich']
141             if v is not None:
142                 sledrun_json['walkup_possible'] = v
143
144             v, w = rbb['Aufstieg getrennt']
145             if v is not None:
146                 sledrun_json['walkup_separate'] = tristate_german_to_str(v)
147             if w is not None:
148                 sledrun_json['walkup_comment'] = w  # TODO
149
150             v = rbb['Gehzeit']
151             if v is not None:
152                 sledrun_json['walkup_time'] = v
153
154             def _walkup_support():
155                 walkup_support_rbb = rbb['Aufstiegshilfe']
156                 if walkup_support_rbb is not None:
157                     walkup_supports = []
158                     for walkup_support_type, comment in walkup_support_rbb:
159                         walkup_support = {'type': walkup_support_type}
160                         if comment is not None:
161                             walkup_support['comment'] = comment
162                         walkup_supports.append(walkup_support)
163                     sledrun_json['walkup_supports'] = walkup_supports
164             _walkup_support()
165
166             v, w = rbb['Beleuchtungsanlage']
167             if v is not None:
168                 sledrun_json['nightlight_possible'] = tristate_german_to_str(v)
169             if w is not None:
170                 sledrun_json['nightlight_possible_comment'] = w
171
172             v, w = rbb['Beleuchtungstage']
173             if v is not None:
174                 sledrun_json['nightlight_weekdays_count'] = v
175             if w is not None:
176                 sledrun_json['nightlight_weekdays_comment'] = w
177
178             def _sled_rental():
179                 v = rbb['Rodelverleih']
180                 if v is not None:
181                     sledrun_json['sled_rental_direct'] = v != []
182                     w = []
183                     for name, comment in v:
184                         x = {}
185                         name_code = mwparserfromhell.parse(name)
186                         wiki_link = next(name_code.ifilter_wikilinks(), None)
187                         if isinstance(wiki_link, Wikilink):
188                             x['wr_page'] = wikilink_to_json(wiki_link)
189                         else:
190                             x['name'] = name
191                         if comment is not None:
192                             x['comment'] = comment
193                         w.append(x)
194                     sledrun_json['sled_rental'] = w
195             _sled_rental()
196
197             def _cachet():
198                 v = rbb['Gütesiegel']
199                 if v is not None:
200                     sledrun_json['cachet'] = len(v) > 0
201             _cachet()
202
203             v = rbb['In Übersichtskarte']
204             if v is not None:
205                 sledrun_json['show_in_overview'] = v
206
207             v = rbb['Forumid']
208             if v is not None:
209                 sledrun_json['forum_id'] = v
210
211             v = rbb['Position']
212             if v is not None:
213                 sledrun_json['position'] = lonlat_to_json(v)
214
215             v = lonlat_ele_to_json(rbb['Position oben'], rbb['Höhe oben'])
216             if v != {}:
217                 sledrun_json['top'] = v
218
219             v = lonlat_ele_to_json(rbb['Position unten'], rbb['Höhe unten'])
220             if v != {}:
221                 sledrun_json['bottom'] = v
222
223             v = rbb['Telefonauskunft']
224             if v is not None:
225                 sledrun_json['info_phone'] = [{'phone': p, 'name': n} for p, n in v]
226
227             v, w = rbb['Webauskunft']
228             if v is not None:
229                 if v:
230                     sledrun_json['info_web'] = [{'url': w}]
231                 else:
232                     sledrun_json['info_web'] = []
233
234             v = rbb['Öffentliche Anreise']
235             if v is not None:
236                 sledrun_json['public_transport'] = public_transport_german_to_str(v)
237
238         def _button_bar():
239             bb_iter = wikicode.ifilter_templates(recursive=False, matches=lambda t: t.name.strip() == 'Buttonleiste')
240             bb = next(bb_iter, None)
241             if bb is not None:
242                 video = bb.get('video', None)
243                 if isinstance(video, Parameter):
244                     sledrun_json['videos'] = [{'url': video.value}]
245         _button_bar()
246
247         def _public_transport():
248             pt_sections = wikicode.get_sections(levels=[2], matches='Anreise mit öffentlichen Verkehrsmitteln',
249                                                 include_headings=False)
250             if len(pt_sections) < 1:
251                 return
252             pt = pt_sections[0]
253             node = next((node for node in pt.nodes if isinstance(node, Tag) and node.wiki_markup == '*'), None)
254             if node is not None:
255                 description = str(Wikicode(pt.nodes[:pt.nodes.index(node)])).strip()
256                 if description:
257                     sledrun_json["public_transport_description"] = str(description)
258
259             public_transport_stops = []
260             public_transport_lines = []
261             public_transport_links = []
262             ya = None
263             for node in pt.nodes:
264                 if isinstance(node, Template):
265                     if node.name == 'Haltestelle':
266                         if ya is not None:
267                             public_transport_stops.append(ya)
268                         ya = {}
269                         z = node.get(1, None)
270                         if z is not None:
271                             ya['municipality'] = str(z)
272                         z = node.get(2, None)
273                         if z is not None:
274                             ya['name_local'] = str(z)
275                         za = str(node.get(3, '')).strip()
276                         zb = str(node.get(4, '')).strip()
277                         z = lonlat_ele_to_json(opt_lonlat_from_str(za), opt_uint_from_str(zb))
278                         if len(z) > 0:
279                             ya['position'] = z
280                     elif node.name in ["Fahrplan Abfahrtsmonitor VVT"]:
281                         ya['monitor_template'] = template_to_json(node)
282                     elif node.name in ["Fahrplan Hinfahrt VVT"]:
283                         ya['route_arrival_template'] = template_to_json(node)
284                     elif node.name in ["Fahrplan Rückfahrt VVT"]:
285                         ya['route_departure_template'] = template_to_json(node)
286                     elif node.name in ["Fahrplan Linie VVT"]:
287                         if ya is not None:
288                             public_transport_stops.append(ya)
289                             ya = None
290                         y = {
291                             'timetable_template': template_to_json(node),
292                         }
293                         public_transport_lines.append(y)
294                 elif isinstance(node, ExternalLink):
295                     public_transport_links.append(external_link_to_json(node))
296             if ya is not None:
297                 public_transport_stops.append(ya)
298             if len(public_transport_stops) > 0:
299                 sledrun_json['public_transport_stops'] = public_transport_stops
300             if len(public_transport_lines) > 0:
301                 sledrun_json['public_transport_lines'] = public_transport_lines
302             if len(public_transport_links) > 0:
303                 sledrun_json['public_transport_links'] = public_transport_links
304         _public_transport()
305
306         def _car():
307             car_section_list = wikicode.get_sections(levels=[2], matches='Anreise mit dem Auto')
308             if not car_section_list:
309                 return
310             v = car_section_list[0]
311
312             description_nodes = dropwhile(lambda w: isinstance(w, Heading), v.nodes)
313             description_nodes = takewhile(lambda w: not (isinstance(w, Tag) and w.wiki_markup == '*'),
314                                           description_nodes)
315             if description := str(Wikicode(list(description_nodes))).strip():
316                 sledrun_json["car_description"] = description
317
318             x = []
319             for w in v.ifilter_templates(matches='Parkplatz'):
320                 za = str(w.get(1, '')).strip()
321                 zb = str(w.get(2, '')).strip()
322                 z = lonlat_ele_to_json(opt_lonlat_from_str(za), opt_uint_from_str(zb))
323                 if len(z) > 0:
324                     x.append({'position': z})
325             if len(x) > 0:
326                 sledrun_json['car_parking'] = x
327
328             x = []
329             for w in io.StringIO(str(v)):
330                 match = re.match(r"\*\* von \'\'\'(.+)\'\'\'(.*): ([\d.,]+) km", w.rstrip())
331                 if match:
332                     ya, yb, yc = match.groups()
333                     yc = float(yc.replace(',', '.'))
334                     x.append({
335                         'km': yc,
336                         'route': (ya.strip() + ' ' + yb.strip()).strip(),
337                     })
338             if len(x) > 0:
339                 sledrun_json['car_distances'] = x
340         _car()
341
342         x = []
343         for v in wikicode.get_sections(levels=[2], matches='Allgemeines'):
344             def _nightlight(value: str) -> Optional[str]:
345                 line_iter = io.StringIO(value)
346                 line = next(line_iter, None)
347                 while line is not None and not line.startswith("* '''Beleuchtung''':"):
348                     line = next(line_iter, None)
349                 if line is None:
350                     return None
351                 line = line.replace("* '''Beleuchtung''':", "").strip()
352                 if len(line) > 0:
353                     return line
354                 return None
355             w = _nightlight(str(v))
356             if w is not None:
357                 sledrun_json['nightlight_description'] = w
358
359             def _gastronomy(value: str):
360                 gastronomy = []
361                 line_iter = io.StringIO(value)
362                 line = next(line_iter, None)
363                 while line is not None and line.rstrip() != "* '''Hütten''':":
364                     line = next(line_iter, None)
365                 if line is None:
366                     return gastronomy
367                 while line is not None:
368                     line = next(line_iter, None)
369                     if line is not None:
370                         if line.startswith('** '):
371                             g = {}
372                             wiki = mwparserfromhell.parse(line)
373                             wiki_link = next(wiki.ifilter_wikilinks(), None)
374                             if isinstance(wiki_link, Wikilink):
375                                 g['wr_page'] = wikilink_to_json(wiki_link)
376                             ext_link = next(wiki.ifilter_external_links(), None)
377                             if isinstance(ext_link, ExternalLink):
378                                 g['weblink'] = external_link_to_json(ext_link)
379                             remaining = str(Wikicode(n for n in wiki.nodes
380                                                      if isinstance(n, (Text, Tag)) and str(n).strip() != '*')).strip()
381                             match = re.match(r'\((.+)\)', remaining)
382                             if match:
383                                 remaining = match.group(1)
384                             if len(remaining) > 0:
385                                 g['note'] = remaining
386                             gastronomy.append(g)
387                         else:
388                             break
389                 return gastronomy
390             w = _gastronomy(str(v))
391             if len(w) > 0:
392                 sledrun_json['gastronomy'] = w
393
394             def _sled_rental_description():
395                 line_iter = io.StringIO(str(v))
396                 line = next(line_iter, None)
397                 match = None
398                 while line is not None and (match := re.match(r"\* '''Rodelverleih''':(.*)", line)) is None:
399                     line = next(line_iter, None)
400                 if match is None:
401                     return
402                 result = [match.group(1)]
403                 line = next(line_iter, None)
404                 while line is not None and re.match(r"\* ", line) is None:
405                     result.append(line)
406                     line = next(line_iter, None)
407                 sledrun_json['sled_rental_description'] = ''.join(result).strip()
408             _sled_rental_description()
409
410             i = iter(v.nodes)
411             w = next(i, None)
412             while w is not None:
413                 if isinstance(w, Tag) and str(w) == "'''Siehe auch'''":
414                     w = next(i, None)
415                     break
416                 w = next(i, None)
417             while w is not None:
418                 if isinstance(w, ExternalLink):
419                     x.append(external_link_to_json(w))
420                 elif isinstance(w, (Text, Tag)) and str(w).strip() in ['', '*', ':']:
421                     pass
422                 else:
423                     break
424                 w = next(i, None)
425         if len(x) > 0:
426             sledrun_json['see_also'] = x
427
428         sledrun_json['allow_reports'] = True
429
430         impressions = None
431         sledrun_impressions_page = Page(self.site, self.current_page.title() + '/Impressionen')
432         if sledrun_impressions_page.exists():
433             impressions = sledrun_impressions_page.title()
434
435         text = create_sledrun_wiki(sledrun_json, map_json, impressions)
436         summary = 'Rodelbahnbeschreibung nach Konvertierung nach und von JSON.'
437         self.put_current(text, summary=summary)
438
439
440 def main(*args: str) -> None:
441     local_args = pywikibot.handle_args(args)
442     gen_factory = pagegenerators.GeneratorFactory()
443     gen_factory.handle_args(local_args)
444     gen = gen_factory.getCombinedGenerator(preload=True)
445     if gen:
446         bot = SledrunWikiTextToJsonBot(generator=gen)
447         bot.run()
448     else:
449         pywikibot.bot.suggest_help(missing_generator=True)
450
451
452 if __name__ == '__main__':
453     main()