]> ToastFreeware Gitweb - philipp/winterrodeln/wrpylib.git/blob - bots/sledrun_wikitext_to_json.py
90cdc49e24c70ec108d7bef1ddc486123c250521
[philipp/winterrodeln/wrpylib.git] / bots / sledrun_wikitext_to_json.py
1 #!/usr/bin/python
2 """
3 User script for pywikibot (https://gerrit.wikimedia.org/r/pywikibot/core.git), tested with version 6.6.1.
4 Put it in directory scripts/userscripts.
5
6 Create a sledrun JSON page from a sledrun wikitext page (including map).
7
8 The following generators and filters are supported:
9
10 &params;
11 """
12 import io
13 import json
14 import re
15 from itertools import takewhile, dropwhile
16 from typing import Any, Optional
17
18 import mwparserfromhell
19 from mwparserfromhell.nodes.extras import Parameter
20
21 import pywikibot
22 from mwparserfromhell.nodes import Tag, Text, ExternalLink, Template, Wikilink, Heading
23 from mwparserfromhell.wikicode import Wikicode
24 from pywikibot import pagegenerators, Page
25 from pywikibot.bot import (
26     AutomaticTWSummaryBot,
27     ConfigParserBot,
28     ExistingPageBot,
29     NoRedirectPageBot,
30     SingleSiteBot,
31 )
32 from pywikibot.logging import warning
33 from pywikibot.site._namespace import BuiltinNamespace
34
35 from wrpylib.wrmwmarkup import create_sledrun_wiki, lonlat_to_json, lonlat_ele_to_json, parse_wrmap
36 from wrpylib.wrvalidators import rodelbahnbox_from_template, tristate_german_to_str, difficulty_german_to_str, \
37     avalanches_german_to_str, public_transport_german_to_str, opt_str_opt_comment_enum_to_str, opt_lonlat_from_str, \
38     opt_uint_from_str
39
40 from pywikibot.site import Namespace
41
42 docuReplacements = {'&params;': pagegenerators.parameterHelp}
43
44
45 def template_to_json(value: Template) -> dict:
46     parameter = []
47     for p in value.params:
48         parameter.append({'value': str(p)})
49     return {
50         'name': str(value.name),
51         'parameter': parameter
52     }
53
54
55 def wikilink_to_json(value: Wikilink) -> dict:
56     wl = {'title': str(value.title)}
57     if value.text is not None:
58         wl['text'] = str(value.text)
59     return wl
60
61
62 def external_link_to_json(value: ExternalLink) -> dict:
63     link = {'url': str(value.url)}
64     if value.title is not None:
65         link['text'] = str(value.title)
66     return link
67
68
69 class SledrunWikiTextToJsonBot(
70     SingleSiteBot,
71     ConfigParserBot,
72     ExistingPageBot,
73     NoRedirectPageBot,
74     AutomaticTWSummaryBot,
75 ):
76     def treat_page(self) -> None:
77         """Load the given page, do some changes, and save it."""
78         wikitext_content_model = 'wikitext'
79         if self.current_page.content_model != wikitext_content_model:
80             warning(f"The content model of {self.current_page.title()} is {self.current_page.content_model} "
81                     f"instead of {wikitext_content_model}.")
82             return
83
84         wikicode = mwparserfromhell.parse(self.current_page.text)
85         wikilink_list = wikicode.filter_wikilinks()
86         category_sledrun = 'Kategorie:Rodelbahn'
87         if sum(1 for c in wikilink_list if c.title == category_sledrun) == 0:
88             warning(f'The page {self.current_page.title()} does not have category {category_sledrun}.')
89             return
90
91         sledrun_json_page = Page(self.site, self.current_page.title() + '/Rodelbahn.json')
92         if sledrun_json_page.exists():
93             warning(f"{sledrun_json_page.title()} already exists, skipping {self.current_page.title()}.")
94             return
95
96         map_json_page = Page(self.site, self.current_page.title() + '/Landkarte.json')
97         if map_json_page.exists():
98             warning(f"{map_json_page.title()} already exists, skipping {self.current_page.title()}.")
99             return
100
101         map_json = None
102         v = wikicode.filter_tags(matches='wrmap')
103         if len(v) > 0:
104             map_json = parse_wrmap(str(v[0]))
105
106         sledrun_json = {
107             "name": self.current_page.title(),
108             "aliases": [],
109             "entry_under_construction": sum(1 for c in wikilink_list if c.text == 'Kategorie:In Arbeit') > 0,
110         }
111
112         for v in wikicode.get_sections(levels=[2], matches='Allgemeines'):
113             for w in v.ifilter_text(recursive=False):
114                 x = w.strip()
115                 if x:
116                     sledrun_json["description"] = str(x)
117                     break
118             break
119
120         rbb_list = wikicode.filter_templates(recursive=False, matches=lambda t: t.name.strip() == 'Rodelbahnbox')
121         if len(rbb_list) == 1:
122             rbb = rodelbahnbox_from_template(rbb_list[0])
123             v = rbb['Bild']
124             if v is not None:
125                 image_page = Page(self.site, v, ns=BuiltinNamespace.FILE)
126                 if not image_page.exists():
127                     warning(f"{image_page.title()} does not exist.")
128                 sledrun_json['image'] = v
129
130             v = rbb['Länge']
131             if v is not None:
132                 sledrun_json['length'] = v
133
134             v = rbb['Schwierigkeit']
135             if v is not None:
136                 sledrun_json['difficulty'] = difficulty_german_to_str(v)
137
138             v = rbb['Lawinen']
139             if v is not None:
140                 sledrun_json['avalanches'] = avalanches_german_to_str(v)
141
142             v, w = rbb['Betreiber']
143             if v is not None:
144                 sledrun_json['has_operator'] = v
145             if w is not None:
146                 sledrun_json['operator'] = w
147
148             v = rbb['Aufstieg möglich']
149             if v is not None:
150                 sledrun_json['walkup_possible'] = v
151
152             v, w = rbb['Aufstieg getrennt']
153             if v is not None:
154                 sledrun_json['walkup_separate'] = tristate_german_to_str(v)
155             if w is not None:
156                 sledrun_json['walkup_comment'] = w  # TODO
157
158             v = rbb['Gehzeit']
159             if v is not None:
160                 sledrun_json['walkup_time'] = v
161
162             def _walkup_support():
163                 walkup_support_rbb = rbb['Aufstiegshilfe']
164                 if walkup_support_rbb is not None:
165                     walkup_supports = []
166                     for walkup_support_type, comment in walkup_support_rbb:
167                         walkup_support = {'type': walkup_support_type}
168                         if comment is not None:
169                             walkup_support['comment']: comment
170                         walkup_supports.append(walkup_support)
171                     sledrun_json['walkup_supports'] = walkup_supports
172             _walkup_support()
173
174             v, w = rbb['Beleuchtungsanlage']
175             if v is not None:
176                 sledrun_json['nightlight_possible'] = tristate_german_to_str(v)
177             if w is not None:
178                 sledrun_json['nightlight_description'] = w
179
180             def _sled_rental():
181                 v = rbb['Rodelverleih']
182                 if v is not None:
183                     sledrun_json['sled_rental_direct'] = v != []
184                     w = []
185                     for name, comment in v:
186                         x = {}
187                         name_code = mwparserfromhell.parse(name)
188                         wiki_link = next(name_code.ifilter_wikilinks(), None)
189                         if isinstance(wiki_link, Wikilink):
190                             x['wr_page'] = wikilink_to_json(wiki_link)
191                         else:
192                             x['name'] = name
193                         if comment is not None:
194                             x['comment'] = comment
195                         w.append(x)
196                     sledrun_json['sled_rental'] = w
197             _sled_rental()
198
199             def _cachet():
200                 v = rbb['Gütesiegel']
201                 if v is not None:
202                     sledrun_json['cachet'] = len(v) > 0
203             _cachet()
204
205             v = rbb['In Übersichtskarte']
206             if v is not None:
207                 sledrun_json['show_in_overview'] = v
208
209             v = rbb['Forumid']
210             if v is not None:
211                 sledrun_json['forum_id'] = v
212
213             v = rbb['Position']
214             if v is not None:
215                 sledrun_json['position'] = lonlat_to_json(v)
216
217             v = lonlat_ele_to_json(rbb['Position oben'], rbb['Höhe oben'])
218             if v != {}:
219                 sledrun_json['top'] = v
220
221             v = lonlat_ele_to_json(rbb['Position unten'], rbb['Höhe unten'])
222             if v != {}:
223                 sledrun_json['bottom'] = v
224
225             v = rbb['Telefonauskunft']
226             if v is not None:
227                 sledrun_json['info_phone'] = [{'phone': p, 'name': n} for p, n in v]
228
229             v = rbb['Öffentliche Anreise']
230             if v is not None:
231                 sledrun_json['public_transport'] = public_transport_german_to_str(v)
232
233         def _button_bar():
234             bb_iter = wikicode.ifilter_templates(recursive=False, matches=lambda t: t.name.strip() == 'Buttonleiste')
235             bb = next(bb_iter, None)
236             if bb is not None:
237                 video = bb.get('video', None)
238                 if isinstance(video, Parameter):
239                     sledrun_json['videos'] = [{'url': video.value}]
240         _button_bar()
241
242         def _public_transport():
243             pt_sections = wikicode.get_sections(levels=[2], matches='Anreise mit öffentlichen Verkehrsmitteln',
244                                                 include_headings=False)
245             if len(pt_sections) < 1:
246                 return
247             pt = pt_sections[0]
248             node = next((node for node in pt.nodes if isinstance(node, Tag) and node.wiki_markup == '*'), None)
249             if node is not None:
250                 description = str(Wikicode(pt.nodes[:pt.nodes.index(node)])).strip()
251                 if description:
252                     sledrun_json["public_transport_description"] = str(description)
253
254             public_transport_stops = []
255             public_transport_lines = []
256             public_transport_links = []
257             ya = None
258             for node in pt.nodes:
259                 if isinstance(node, Template):
260                     if node.name == 'Haltestelle':
261                         if ya is not None:
262                             public_transport_stops.append(ya)
263                         ya = {}
264                         z = node.get(1, None)
265                         if z is not None:
266                             ya['municipality'] = str(z)
267                         z = node.get(2, None)
268                         if z is not None:
269                             ya['name_local'] = str(z)
270                         za = str(node.get(3, '')).strip()
271                         zb = str(node.get(4, '')).strip()
272                         z = lonlat_ele_to_json(opt_lonlat_from_str(za), opt_uint_from_str(zb))
273                         if len(z) > 0:
274                             ya['position'] = z
275                     elif node.name in ["Fahrplan Abfahrtsmonitor VVT"]:
276                         ya['monitor_template'] = template_to_json(node)
277                     elif node.name in ["Fahrplan Hinfahrt VVT"]:
278                         ya['route_arrival_template'] = template_to_json(node)
279                     elif node.name in ["Fahrplan Rückfahrt VVT"]:
280                         ya['route_departure_template'] = template_to_json(node)
281                     elif node.name in ["Fahrplan Linie VVT"]:
282                         if ya is not None:
283                             public_transport_stops.append(ya)
284                             ya = None
285                         y = {
286                             'timetable_template': template_to_json(node),
287                         }
288                         public_transport_lines.append(y)
289                 elif isinstance(node, ExternalLink):
290                     public_transport_links.append(external_link_to_json(node))
291             if ya is not None:
292                 public_transport_stops.append(ya)
293             if len(public_transport_stops) > 0:
294                 sledrun_json['public_transport_stops'] = public_transport_stops
295             if len(public_transport_lines) > 0:
296                 sledrun_json['public_transport_lines'] = public_transport_lines
297             if len(public_transport_links) > 0:
298                 sledrun_json['public_transport_links'] = public_transport_links
299         _public_transport()
300
301         def _car():
302             car_section_list = wikicode.get_sections(levels=[2], matches='Anreise mit dem Auto')
303             if not car_section_list:
304                 return
305             v = car_section_list[0]
306
307             description_nodes = dropwhile(lambda w: isinstance(w, Heading), v.nodes)
308             description_nodes = takewhile(lambda w: not (isinstance(w, Tag) and w.wiki_markup == '*'),
309                                           description_nodes)
310             if description := str(Wikicode(list(description_nodes))).strip():
311                 sledrun_json["car_description"] = description
312
313             x = []
314             for w in v.ifilter_templates(matches='Parkplatz'):
315                 za = str(w.get(1, '')).strip()
316                 zb = str(w.get(2, '')).strip()
317                 z = lonlat_ele_to_json(opt_lonlat_from_str(za), opt_uint_from_str(zb))
318                 if len(z) > 0:
319                     x.append({'position': z})
320             if len(x) > 0:
321                 sledrun_json['car_parking'] = x
322
323             x = []
324             for w in io.StringIO(str(v)):
325                 match = re.match(r"\*\* von \'\'\'(.+)\'\'\'(.*): ([\d.,]+) km", w.rstrip())
326                 if match:
327                     ya, yb, yc = match.groups()
328                     yc = float(yc.replace(',', '.'))
329                     x.append({
330                         'km': yc,
331                         'route': (ya.strip() + ' ' + yb.strip()).strip(),
332                     })
333             if len(x) > 0:
334                 sledrun_json['car_distances'] = x
335         _car()
336
337         x = []
338         for v in wikicode.get_sections(levels=[2], matches='Allgemeines'):
339             def _gastronomy(value: str):
340                 gastronomy = []
341                 line_iter = io.StringIO(value)
342                 line = next(line_iter, None)
343                 while line is not None and line.rstrip() != "* '''Hütten''':":
344                     line = next(line_iter, None)
345                 if line is None:
346                     return gastronomy
347                 while line is not None:
348                     line = next(line_iter, None)
349                     if line is not None:
350                         if line.startswith('** '):
351                             g = {}
352                             wiki = mwparserfromhell.parse(line)
353                             wiki_link = next(wiki.ifilter_wikilinks(), None)
354                             if isinstance(wiki_link, Wikilink):
355                                 g['wr_page'] = wikilink_to_json(wiki_link)
356                             ext_link = next(wiki.ifilter_external_links(), None)
357                             if isinstance(ext_link, ExternalLink):
358                                 g['weblink'] = external_link_to_json(ext_link)
359                             remaining = str(Wikicode(n for n in wiki.nodes
360                                                      if isinstance(n, (Text, Tag)) and str(n).strip() != '*')).strip()
361                             match = re.match(r'\((.+)\)', remaining)
362                             if match:
363                                 remaining = match.group(1)
364                             if len(remaining) > 0:
365                                 g['note'] = remaining
366                             gastronomy.append(g)
367                         else:
368                             break
369                 return gastronomy
370             w = _gastronomy(str(v))
371             if len(w) > 0:
372                 sledrun_json['gastronomy'] = w
373
374             def _sled_rental_description():
375                 line_iter = io.StringIO(str(v))
376                 line = next(line_iter, None)
377                 match = None
378                 while line is not None and (match := re.match(r"\* '''Rodelverleih''':(.*)", line)) is None:
379                     line = next(line_iter, None)
380                 if match is None:
381                     return
382                 result = [match.group(1)]
383                 line = next(line_iter, None)
384                 while line is not None and re.match(r"\* ", line) is None:
385                     result.append(line)
386                     line = next(line_iter, None)
387                 sledrun_json['sled_rental_description'] = ''.join(result).strip()
388             _sled_rental_description()
389
390             i = iter(v.nodes)
391             w = next(i, None)
392             while w is not None:
393                 if isinstance(w, Tag) and str(w) == "'''Siehe auch'''":
394                     w = next(i, None)
395                     break
396                 w = next(i, None)
397             while w is not None:
398                 if isinstance(w, ExternalLink):
399                     x.append(external_link_to_json(w))
400                 elif isinstance(w, (Text, Tag)) and str(w).strip() in ['', '*', ':']:
401                     pass
402                 else:
403                     break
404                 w = next(i, None)
405         if len(x) > 0:
406             sledrun_json['see_also'] = x
407
408         sledrun_json['allow_reports'] = True
409
410         impressions = None
411         sledrun_impressions_page = Page(self.site, self.current_page.title() + '/Impressionen')
412         if sledrun_impressions_page.exists():
413             impressions = sledrun_impressions_page.title()
414
415         text = create_sledrun_wiki(sledrun_json, map_json, impressions)
416         summary = 'Rodelbahnbeschreibung nach Konvertierung nach und von JSON.'
417         self.put_current(text, summary=summary)
418
419
420 def main(*args: str) -> None:
421     local_args = pywikibot.handle_args(args)
422     gen_factory = pagegenerators.GeneratorFactory()
423     gen_factory.handle_args(local_args)
424     gen = gen_factory.getCombinedGenerator(preload=True)
425     if gen:
426         bot = SledrunWikiTextToJsonBot(generator=gen)
427         bot.run()
428     else:
429         pywikibot.bot.suggest_help(missing_generator=True)
430
431
432 if __name__ == '__main__':
433     main()