]> ToastFreeware Gitweb - philipp/winterrodeln/wrpylib.git/blob - bots/sledrun_wikitext_to_json.py
Support parsing info_web.
[philipp/winterrodeln/wrpylib.git] / bots / sledrun_wikitext_to_json.py
1 #!/usr/bin/python
2 """
3 User script for pywikibot (https://gerrit.wikimedia.org/r/pywikibot/core.git), tested with version 6.6.1.
4 Put it in directory scripts/userscripts.
5
6 Create a sledrun JSON page from a sledrun wikitext page (including map).
7
8 The following generators and filters are supported:
9
10 &params;
11 """
12 import io
13 import re
14 from itertools import takewhile, dropwhile
15 from typing import Optional
16
17 import mwparserfromhell
18 from mwparserfromhell.nodes.extras import Parameter
19
20 import pywikibot
21 from mwparserfromhell.nodes import Tag, Text, ExternalLink, Template, Wikilink, Heading
22 from mwparserfromhell.wikicode import Wikicode
23 from pywikibot import pagegenerators, Page
24 from pywikibot.bot import (
25     AutomaticTWSummaryBot,
26     ConfigParserBot,
27     ExistingPageBot,
28     NoRedirectPageBot,
29     SingleSiteBot,
30 )
31 from pywikibot.logging import warning
32 from pywikibot.site._namespace import BuiltinNamespace
33
34 from wrpylib.wrmwmarkup import create_sledrun_wiki, lonlat_to_json, lonlat_ele_to_json, parse_wrmap
35 from wrpylib.wrvalidators import rodelbahnbox_from_template, tristate_german_to_str, difficulty_german_to_str, \
36     avalanches_german_to_str, public_transport_german_to_str, opt_lonlat_from_str, \
37     opt_uint_from_str
38
39 docuReplacements = {'&params;': pagegenerators.parameterHelp}
40
41
42 def template_to_json(value: Template) -> dict:
43     parameter = []
44     for p in value.params:
45         parameter.append({'value': str(p)})
46     return {
47         'name': str(value.name),
48         'parameter': parameter
49     }
50
51
52 def wikilink_to_json(value: Wikilink) -> dict:
53     wl = {'title': str(value.title)}
54     if value.text is not None:
55         wl['text'] = str(value.text)
56     return wl
57
58
59 def external_link_to_json(value: ExternalLink) -> dict:
60     link = {'url': str(value.url)}
61     if value.title is not None:
62         link['text'] = str(value.title)
63     return link
64
65
66 class SledrunWikiTextToJsonBot(
67     SingleSiteBot,
68     ConfigParserBot,
69     ExistingPageBot,
70     NoRedirectPageBot,
71     AutomaticTWSummaryBot,
72 ):
73     def treat_page(self) -> None:
74         """Load the given page, do some changes, and save it."""
75         wikitext_content_model = 'wikitext'
76         if self.current_page.content_model != wikitext_content_model:
77             warning(f"The content model of {self.current_page.title()} is {self.current_page.content_model} "
78                     f"instead of {wikitext_content_model}.")
79             return
80
81         wikicode = mwparserfromhell.parse(self.current_page.text)
82         wikilink_list = wikicode.filter_wikilinks()
83         category_sledrun = 'Kategorie:Rodelbahn'
84         if sum(1 for c in wikilink_list if c.title == category_sledrun) == 0:
85             warning(f'The page {self.current_page.title()} does not have category {category_sledrun}.')
86             return
87
88         sledrun_json_page = Page(self.site, self.current_page.title() + '/Rodelbahn.json')
89         if sledrun_json_page.exists():
90             warning(f"{sledrun_json_page.title()} already exists, skipping {self.current_page.title()}.")
91             return
92
93         map_json_page = Page(self.site, self.current_page.title() + '/Landkarte.json')
94         if map_json_page.exists():
95             warning(f"{map_json_page.title()} already exists, skipping {self.current_page.title()}.")
96             return
97
98         map_json = None
99         v = wikicode.filter_tags(matches='wrmap')
100         if len(v) > 0:
101             map_json = parse_wrmap(str(v[0]))
102
103         sledrun_json = {
104             "name": self.current_page.title(),
105             "aliases": [],
106             "entry_under_construction": sum(1 for c in wikilink_list if c.text == 'Kategorie:In Arbeit') > 0,
107         }
108
109         for v in wikicode.get_sections(levels=[2], matches='Allgemeines'):
110             for w in v.ifilter_text(recursive=False):
111                 x = w.strip()
112                 if x:
113                     sledrun_json["description"] = str(x)
114                     break
115             break
116
117         rbb_list = wikicode.filter_templates(recursive=False, matches=lambda t: t.name.strip() == 'Rodelbahnbox')
118         if len(rbb_list) == 1:
119             rbb = rodelbahnbox_from_template(rbb_list[0])
120             v = rbb['Bild']
121             if v is not None:
122                 image_page = Page(self.site, v, ns=BuiltinNamespace.FILE)
123                 if not image_page.exists():
124                     warning(f"{image_page.title()} does not exist.")
125                 sledrun_json['image'] = v
126
127             v = rbb['Länge']
128             if v is not None:
129                 sledrun_json['length'] = v
130
131             v = rbb['Schwierigkeit']
132             if v is not None:
133                 sledrun_json['difficulty'] = difficulty_german_to_str(v)
134
135             v = rbb['Lawinen']
136             if v is not None:
137                 sledrun_json['avalanches'] = avalanches_german_to_str(v)
138
139             v, w = rbb['Betreiber']
140             if v is not None:
141                 sledrun_json['has_operator'] = v
142             if w is not None:
143                 sledrun_json['operator'] = w
144
145             v = rbb['Aufstieg möglich']
146             if v is not None:
147                 sledrun_json['walkup_possible'] = v
148
149             v, w = rbb['Aufstieg getrennt']
150             if v is not None:
151                 sledrun_json['walkup_separate'] = tristate_german_to_str(v)
152             if w is not None:
153                 sledrun_json['walkup_comment'] = w  # TODO
154
155             v = rbb['Gehzeit']
156             if v is not None:
157                 sledrun_json['walkup_time'] = v
158
159             def _walkup_support():
160                 walkup_support_rbb = rbb['Aufstiegshilfe']
161                 if walkup_support_rbb is not None:
162                     walkup_supports = []
163                     for walkup_support_type, comment in walkup_support_rbb:
164                         walkup_support = {'type': walkup_support_type}
165                         if comment is not None:
166                             walkup_support['comment'] = comment
167                         walkup_supports.append(walkup_support)
168                     sledrun_json['walkup_supports'] = walkup_supports
169             _walkup_support()
170
171             v, w = rbb['Beleuchtungsanlage']
172             if v is not None:
173                 sledrun_json['nightlight_possible'] = tristate_german_to_str(v)
174             if w is not None:
175                 sledrun_json['nightlight_possible_comment'] = w
176
177             v, w = rbb['Beleuchtungstage']
178             if v is not None:
179                 sledrun_json['nightlight_weekdays_count'] = v
180             if w is not None:
181                 sledrun_json['nightlight_weekdays_comment'] = w
182
183             def _sled_rental():
184                 v = rbb['Rodelverleih']
185                 if v is not None:
186                     sledrun_json['sled_rental_direct'] = v != []
187                     w = []
188                     for name, comment in v:
189                         x = {}
190                         name_code = mwparserfromhell.parse(name)
191                         wiki_link = next(name_code.ifilter_wikilinks(), None)
192                         if isinstance(wiki_link, Wikilink):
193                             x['wr_page'] = wikilink_to_json(wiki_link)
194                         else:
195                             x['name'] = name
196                         if comment is not None:
197                             x['comment'] = comment
198                         w.append(x)
199                     sledrun_json['sled_rental'] = w
200             _sled_rental()
201
202             def _cachet():
203                 v = rbb['Gütesiegel']
204                 if v is not None:
205                     sledrun_json['cachet'] = len(v) > 0
206             _cachet()
207
208             v = rbb['In Übersichtskarte']
209             if v is not None:
210                 sledrun_json['show_in_overview'] = v
211
212             v = rbb['Forumid']
213             if v is not None:
214                 sledrun_json['forum_id'] = v
215
216             v = rbb['Position']
217             if v is not None:
218                 sledrun_json['position'] = lonlat_to_json(v)
219
220             v = lonlat_ele_to_json(rbb['Position oben'], rbb['Höhe oben'])
221             if v != {}:
222                 sledrun_json['top'] = v
223
224             v = lonlat_ele_to_json(rbb['Position unten'], rbb['Höhe unten'])
225             if v != {}:
226                 sledrun_json['bottom'] = v
227
228             v = rbb['Telefonauskunft']
229             if v is not None:
230                 sledrun_json['info_phone'] = [{'phone': p, 'name': n} for p, n in v]
231
232             v, w = rbb['Webauskunft']
233             if v is not None:
234                 if v:
235                     sledrun_json['info_web'] = [{'url': w}]
236                 else:
237                     sledrun_json['info_web'] = []
238
239             v = rbb['Öffentliche Anreise']
240             if v is not None:
241                 sledrun_json['public_transport'] = public_transport_german_to_str(v)
242
243         def _button_bar():
244             bb_iter = wikicode.ifilter_templates(recursive=False, matches=lambda t: t.name.strip() == 'Buttonleiste')
245             bb = next(bb_iter, None)
246             if bb is not None:
247                 video = bb.get('video', None)
248                 if isinstance(video, Parameter):
249                     sledrun_json['videos'] = [{'url': video.value}]
250         _button_bar()
251
252         def _public_transport():
253             pt_sections = wikicode.get_sections(levels=[2], matches='Anreise mit öffentlichen Verkehrsmitteln',
254                                                 include_headings=False)
255             if len(pt_sections) < 1:
256                 return
257             pt = pt_sections[0]
258             node = next((node for node in pt.nodes if isinstance(node, Tag) and node.wiki_markup == '*'), None)
259             if node is not None:
260                 description = str(Wikicode(pt.nodes[:pt.nodes.index(node)])).strip()
261                 if description:
262                     sledrun_json["public_transport_description"] = str(description)
263
264             public_transport_stops = []
265             public_transport_lines = []
266             public_transport_links = []
267             ya = None
268             for node in pt.nodes:
269                 if isinstance(node, Template):
270                     if node.name == 'Haltestelle':
271                         if ya is not None:
272                             public_transport_stops.append(ya)
273                         ya = {}
274                         z = node.get(1, None)
275                         if z is not None:
276                             ya['municipality'] = str(z)
277                         z = node.get(2, None)
278                         if z is not None:
279                             ya['name_local'] = str(z)
280                         za = str(node.get(3, '')).strip()
281                         zb = str(node.get(4, '')).strip()
282                         z = lonlat_ele_to_json(opt_lonlat_from_str(za), opt_uint_from_str(zb))
283                         if len(z) > 0:
284                             ya['position'] = z
285                     elif node.name in ["Fahrplan Abfahrtsmonitor VVT"]:
286                         ya['monitor_template'] = template_to_json(node)
287                     elif node.name in ["Fahrplan Hinfahrt VVT"]:
288                         ya['route_arrival_template'] = template_to_json(node)
289                     elif node.name in ["Fahrplan Rückfahrt VVT"]:
290                         ya['route_departure_template'] = template_to_json(node)
291                     elif node.name in ["Fahrplan Linie VVT"]:
292                         if ya is not None:
293                             public_transport_stops.append(ya)
294                             ya = None
295                         y = {
296                             'timetable_template': template_to_json(node),
297                         }
298                         public_transport_lines.append(y)
299                 elif isinstance(node, ExternalLink):
300                     public_transport_links.append(external_link_to_json(node))
301             if ya is not None:
302                 public_transport_stops.append(ya)
303             if len(public_transport_stops) > 0:
304                 sledrun_json['public_transport_stops'] = public_transport_stops
305             if len(public_transport_lines) > 0:
306                 sledrun_json['public_transport_lines'] = public_transport_lines
307             if len(public_transport_links) > 0:
308                 sledrun_json['public_transport_links'] = public_transport_links
309         _public_transport()
310
311         def _car():
312             car_section_list = wikicode.get_sections(levels=[2], matches='Anreise mit dem Auto')
313             if not car_section_list:
314                 return
315             v = car_section_list[0]
316
317             description_nodes = dropwhile(lambda w: isinstance(w, Heading), v.nodes)
318             description_nodes = takewhile(lambda w: not (isinstance(w, Tag) and w.wiki_markup == '*'),
319                                           description_nodes)
320             if description := str(Wikicode(list(description_nodes))).strip():
321                 sledrun_json["car_description"] = description
322
323             x = []
324             for w in v.ifilter_templates(matches='Parkplatz'):
325                 za = str(w.get(1, '')).strip()
326                 zb = str(w.get(2, '')).strip()
327                 z = lonlat_ele_to_json(opt_lonlat_from_str(za), opt_uint_from_str(zb))
328                 if len(z) > 0:
329                     x.append({'position': z})
330             if len(x) > 0:
331                 sledrun_json['car_parking'] = x
332
333             x = []
334             for w in io.StringIO(str(v)):
335                 match = re.match(r"\*\* von \'\'\'(.+)\'\'\'(.*): ([\d.,]+) km", w.rstrip())
336                 if match:
337                     ya, yb, yc = match.groups()
338                     yc = float(yc.replace(',', '.'))
339                     x.append({
340                         'km': yc,
341                         'route': (ya.strip() + ' ' + yb.strip()).strip(),
342                     })
343             if len(x) > 0:
344                 sledrun_json['car_distances'] = x
345         _car()
346
347         x = []
348         for v in wikicode.get_sections(levels=[2], matches='Allgemeines'):
349             def _nightlight(value: str) -> Optional[str]:
350                 line_iter = io.StringIO(value)
351                 line = next(line_iter, None)
352                 while line is not None and not line.startswith("* '''Beleuchtung''':"):
353                     line = next(line_iter, None)
354                 if line is None:
355                     return None
356                 line = line.replace("* '''Beleuchtung''':", "").strip()
357                 if len(line) > 0:
358                     return line
359                 return None
360             w = _nightlight(str(v))
361             if w is not None:
362                 sledrun_json['nightlight_description'] = w
363
364             def _gastronomy(value: str):
365                 gastronomy = []
366                 line_iter = io.StringIO(value)
367                 line = next(line_iter, None)
368                 while line is not None and line.rstrip() != "* '''Hütten''':":
369                     line = next(line_iter, None)
370                 if line is None:
371                     return gastronomy
372                 while line is not None:
373                     line = next(line_iter, None)
374                     if line is not None:
375                         if line.startswith('** '):
376                             g = {}
377                             wiki = mwparserfromhell.parse(line)
378                             wiki_link = next(wiki.ifilter_wikilinks(), None)
379                             if isinstance(wiki_link, Wikilink):
380                                 g['wr_page'] = wikilink_to_json(wiki_link)
381                             ext_link = next(wiki.ifilter_external_links(), None)
382                             if isinstance(ext_link, ExternalLink):
383                                 g['weblink'] = external_link_to_json(ext_link)
384                             remaining = str(Wikicode(n for n in wiki.nodes
385                                                      if isinstance(n, (Text, Tag)) and str(n).strip() != '*')).strip()
386                             match = re.match(r'\((.+)\)', remaining)
387                             if match:
388                                 remaining = match.group(1)
389                             if len(remaining) > 0:
390                                 g['note'] = remaining
391                             gastronomy.append(g)
392                         else:
393                             break
394                 return gastronomy
395             w = _gastronomy(str(v))
396             if len(w) > 0:
397                 sledrun_json['gastronomy'] = w
398
399             def _sled_rental_description():
400                 line_iter = io.StringIO(str(v))
401                 line = next(line_iter, None)
402                 match = None
403                 while line is not None and (match := re.match(r"\* '''Rodelverleih''':(.*)", line)) is None:
404                     line = next(line_iter, None)
405                 if match is None:
406                     return
407                 result = [match.group(1)]
408                 line = next(line_iter, None)
409                 while line is not None and re.match(r"\* ", line) is None:
410                     result.append(line)
411                     line = next(line_iter, None)
412                 sledrun_json['sled_rental_description'] = ''.join(result).strip()
413             _sled_rental_description()
414
415             i = iter(v.nodes)
416             w = next(i, None)
417             while w is not None:
418                 if isinstance(w, Tag) and str(w) == "'''Siehe auch'''":
419                     w = next(i, None)
420                     break
421                 w = next(i, None)
422             while w is not None:
423                 if isinstance(w, ExternalLink):
424                     x.append(external_link_to_json(w))
425                 elif isinstance(w, (Text, Tag)) and str(w).strip() in ['', '*', ':']:
426                     pass
427                 else:
428                     break
429                 w = next(i, None)
430         if len(x) > 0:
431             sledrun_json['see_also'] = x
432
433         sledrun_json['allow_reports'] = True
434
435         impressions = None
436         sledrun_impressions_page = Page(self.site, self.current_page.title() + '/Impressionen')
437         if sledrun_impressions_page.exists():
438             impressions = sledrun_impressions_page.title()
439
440         text = create_sledrun_wiki(sledrun_json, map_json, impressions)
441         summary = 'Rodelbahnbeschreibung nach Konvertierung nach und von JSON.'
442         self.put_current(text, summary=summary)
443
444
445 def main(*args: str) -> None:
446     local_args = pywikibot.handle_args(args)
447     gen_factory = pagegenerators.GeneratorFactory()
448     gen_factory.handle_args(local_args)
449     gen = gen_factory.getCombinedGenerator(preload=True)
450     if gen:
451         bot = SledrunWikiTextToJsonBot(generator=gen)
452         bot.run()
453     else:
454         pywikibot.bot.suggest_help(missing_generator=True)
455
456
457 if __name__ == '__main__':
458     main()