]> ToastFreeware Gitweb - philipp/winterrodeln/wrpylib.git/blob - bots/sledrun_wikitext_to_json.py
718b275479edc0fa64ed8dc02249423b0e832932
[philipp/winterrodeln/wrpylib.git] / bots / sledrun_wikitext_to_json.py
1 #!/usr/bin/python
2 """
3 User script for pywikibot (https://gerrit.wikimedia.org/r/pywikibot/core.git), tested with version 6.6.1.
4 Put it in directory scripts/userscripts.
5
6 Create a sledrun JSON page from a sledrun wikitext page (including map).
7
8 The following generators and filters are supported:
9
10 &params;
11 """
12 import io
13 import json
14 import re
15 from itertools import takewhile, dropwhile
16 from typing import Any, Optional
17
18 import mwparserfromhell
19 from mwparserfromhell.nodes.extras import Parameter
20
21 import pywikibot
22 from mwparserfromhell.nodes import Tag, Text, ExternalLink, Template, Wikilink, Heading
23 from mwparserfromhell.wikicode import Wikicode
24 from pywikibot import pagegenerators, Page
25 from pywikibot.bot import (
26     AutomaticTWSummaryBot,
27     ConfigParserBot,
28     ExistingPageBot,
29     NoRedirectPageBot,
30     SingleSiteBot,
31 )
32 from pywikibot.logging import warning
33 from pywikibot.site._namespace import BuiltinNamespace
34
35 from wrpylib.wrmwmarkup import create_sledrun_wiki, lonlat_to_json, lonlat_ele_to_json, parse_wrmap
36 from wrpylib.wrvalidators import rodelbahnbox_from_template, tristate_german_to_str, difficulty_german_to_str, \
37     avalanches_german_to_str, public_transport_german_to_str, opt_str_opt_comment_enum_to_str, opt_lonlat_from_str, \
38     opt_uint_from_str
39
40 from pywikibot.site import Namespace
41
42 docuReplacements = {'&params;': pagegenerators.parameterHelp}
43
44
45 def str_or_none(value: Any) -> Optional[str]:
46     if value is not None:
47         return str(value)
48     return None
49
50
51 def template_to_json(value: Template) -> dict:
52     parameter = []
53     for p in value.params:
54         parameter.append({'value': str(p)})
55     return {
56         'name': str(value.name),
57         'parameter': parameter
58     }
59
60
61 def wikilink_to_json(value: Wikilink) -> dict:
62     wl = {'title': str(value.title)}
63     text = str_or_none(value.text)
64     if text is not None:
65         wl['text'] = text
66     return wl
67
68
69 class SledrunWikiTextToJsonBot(
70     SingleSiteBot,
71     ConfigParserBot,
72     ExistingPageBot,
73     NoRedirectPageBot,
74     AutomaticTWSummaryBot,
75 ):
76     def treat_page(self) -> None:
77         """Load the given page, do some changes, and save it."""
78         wikitext_content_model = 'wikitext'
79         if self.current_page.content_model != wikitext_content_model:
80             warning(f"The content model of {self.current_page.title()} is {self.current_page.content_model} "
81                     f"instead of {wikitext_content_model}.")
82             return
83
84         wikicode = mwparserfromhell.parse(self.current_page.text)
85         wikilink_list = wikicode.filter_wikilinks()
86         category_sledrun = 'Kategorie:Rodelbahn'
87         if sum(1 for c in wikilink_list if c.title == category_sledrun) == 0:
88             warning(f'The page {self.current_page.title()} does not have category {category_sledrun}.')
89             return
90
91         sledrun_json_page = Page(self.site, self.current_page.title() + '/Rodelbahn.json')
92         if sledrun_json_page.exists():
93             warning(f"{sledrun_json_page.title()} already exists, skipping {self.current_page.title()}.")
94             return
95
96         map_json_page = Page(self.site, self.current_page.title() + '/Landkarte.json')
97         if map_json_page.exists():
98             warning(f"{map_json_page.title()} already exists, skipping {self.current_page.title()}.")
99             return
100
101         map_json = None
102         v = wikicode.filter_tags(matches='wrmap')
103         if len(v) > 0:
104             map_json = parse_wrmap(str(v[0]))
105
106         sledrun_json = {
107             "name": self.current_page.title(),
108             "aliases": [],
109             "entry_under_construction": sum(1 for c in wikilink_list if c.text == 'Kategorie:In Arbeit') > 0,
110         }
111
112         for v in wikicode.get_sections(levels=[2], matches='Allgemeines'):
113             for w in v.ifilter_text(recursive=False):
114                 x = w.strip()
115                 if x:
116                     sledrun_json["description"] = str(x)
117                     break
118             break
119
120         rbb_list = wikicode.filter_templates(recursive=False, matches=lambda t: t.name.strip() == 'Rodelbahnbox')
121         if len(rbb_list) == 1:
122             rbb = rodelbahnbox_from_template(rbb_list[0])
123             v = rbb['Bild']
124             if v is not None:
125                 image_page = Page(self.site, v, ns=BuiltinNamespace.FILE)
126                 if not image_page.exists():
127                     warning(f"{image_page.title()} does not exist.")
128                 sledrun_json['image'] = v
129
130             v = rbb['Länge']
131             if v is not None:
132                 sledrun_json['length'] = v
133
134             v = rbb['Schwierigkeit']
135             if v is not None:
136                 sledrun_json['difficulty'] = difficulty_german_to_str(v)
137
138             v = rbb['Lawinen']
139             if v is not None:
140                 sledrun_json['avalanches'] = avalanches_german_to_str(v)
141
142             v, w = rbb['Betreiber']
143             if v is not None:
144                 sledrun_json['has_operator'] = v
145             if w is not None:
146                 sledrun_json['operator'] = w
147
148             v = rbb['Aufstieg möglich']
149             if v is not None:
150                 sledrun_json['walkup_possible'] = v
151
152             v, w = rbb['Aufstieg getrennt']
153             if v is not None:
154                 sledrun_json['walkup_separate'] = tristate_german_to_str(v)
155             if w is not None:
156                 sledrun_json['walkup_comment'] = w  # TODO
157
158             v = rbb['Gehzeit']
159             if v is not None:
160                 sledrun_json['walkup_time'] = v
161
162             def _walkup_support():
163                 walkup_support_rbb = rbb['Aufstiegshilfe']
164                 if walkup_support_rbb is not None:
165                     walkup_supports = []
166                     for walkup_support_type, comment in walkup_support_rbb:
167                         walkup_support = {'type': walkup_support_type}
168                         if comment is not None:
169                             walkup_support['comment']: comment
170                         walkup_supports.append(walkup_support)
171                     sledrun_json['walkup_supports'] = walkup_supports
172             _walkup_support()
173
174             v, w = rbb['Beleuchtungsanlage']
175             if v is not None:
176                 sledrun_json['nightlight_possible'] = tristate_german_to_str(v)
177             if w is not None:
178                 sledrun_json['nightlight_description'] = w
179
180             def _sled_rental():
181                 v = rbb['Rodelverleih']
182                 if v is not None:
183                     sledrun_json['sled_rental_direct'] = v != []
184                     w = []
185                     for name, comment in v:
186                         x = {}
187                         name_code = mwparserfromhell.parse(name)
188                         wiki_link = next(name_code.ifilter_wikilinks(), None)
189                         if isinstance(wiki_link, Wikilink):
190                             x['wr_page'] = wikilink_to_json(wiki_link)
191                         else:
192                             x['name'] = name
193                         if comment is not None:
194                             x['comment'] = comment
195                         w.append(x)
196                     sledrun_json['sled_rental'] = w
197             _sled_rental()
198
199             v = rbb['In Übersichtskarte']
200             if v is not None:
201                 sledrun_json['show_in_overview'] = v
202
203             v = rbb['Forumid']
204             if v is not None:
205                 sledrun_json['forum_id'] = v
206
207             v = rbb['Position']
208             if v is not None:
209                 sledrun_json['position'] = lonlat_to_json(v)
210
211             v = lonlat_ele_to_json(rbb['Position oben'], rbb['Höhe oben'])
212             if v != {}:
213                 sledrun_json['top'] = v
214
215             v = lonlat_ele_to_json(rbb['Position unten'], rbb['Höhe unten'])
216             if v != {}:
217                 sledrun_json['bottom'] = v
218
219             v = rbb['Telefonauskunft']
220             if v is not None:
221                 sledrun_json['info_phone'] = [{'phone': p, 'name': n} for p, n in v]
222
223             v = rbb['Öffentliche Anreise']
224             if v is not None:
225                 sledrun_json['public_transport'] = public_transport_german_to_str(v)
226
227         def _button_bar():
228             bb_iter = wikicode.ifilter_templates(recursive=False, matches=lambda t: t.name.strip() == 'Buttonleiste')
229             bb = next(bb_iter, None)
230             if bb is not None:
231                 video = bb.get('video', None)
232                 if isinstance(video, Parameter):
233                     sledrun_json['videos'] = [{'url': video.value}]
234         _button_bar()
235
236         for v in wikicode.get_sections(levels=[2], matches='Anreise mit öffentlichen Verkehrsmitteln',
237                                        include_headings=False):
238             w = next((w for w in v.nodes if isinstance(w, Tag) and w.wiki_markup == '*'), None)
239             if w is not None:
240                 x = str(Wikicode(v.nodes[:v.nodes.index(w)])).strip()
241                 if x:
242                     sledrun_json["public_transport_description"] = str(x)
243
244             public_transport_stops = []
245             public_transport_lines = []
246             ya = None
247             for w in v.nodes:
248                 if isinstance(w, Template):
249                     if w.name == 'Haltestelle':
250                         if ya is not None:
251                             public_transport_stops.append(ya)
252                         ya = {}
253                         z = w.get(1, None)
254                         if z is not None:
255                             ya['municipality'] = str(z)
256                         z = w.get(2, None)
257                         if z is not None:
258                             ya['name_local'] = str(z)
259                         za = str_or_none(w.get(3, None))
260                         zb = str_or_none(w.get(4, None))
261                         z = lonlat_ele_to_json(opt_lonlat_from_str(za), opt_uint_from_str(zb))
262                         if len(z) > 0:
263                             ya['position'] = z
264                     elif w.name in ["Fahrplan Abfahrtsmonitor VVT"]:
265                         ya['monitor_template'] = template_to_json(w)
266                     elif w.name in ["Fahrplan Hinfahrt VVT"]:
267                         ya['route_arrival_template'] = template_to_json(w)
268                     elif w.name in ["Fahrplan Rückfahrt VVT"]:
269                         ya['route_departure_template'] = template_to_json(w)
270                     elif w.name in ["Fahrplan Linie VVT"]:
271                         if ya is not None:
272                             public_transport_stops.append(ya)
273                             ya = None
274                         y = {
275                             'timetable_template': template_to_json(w),
276                         }
277                         public_transport_lines.append(y)
278             if ya is not None:
279                 public_transport_stops.append(ya)
280             if len(public_transport_stops) > 0:
281                 sledrun_json['public_transport_stops'] = public_transport_stops
282             if len(public_transport_lines) > 0:
283                 sledrun_json['public_transport_lines'] = public_transport_lines
284             break
285
286         def _car():
287             car_section_list = wikicode.get_sections(levels=[2], matches='Anreise mit dem Auto')
288             if not car_section_list:
289                 return
290             v = car_section_list[0]
291
292             description_nodes = dropwhile(lambda w: isinstance(w, Heading), v.nodes)
293             description_nodes = takewhile(lambda w: not (isinstance(w, Tag) and w.wiki_markup == '*'),
294                                           description_nodes)
295             if description := str(Wikicode(list(description_nodes))).strip():
296                 sledrun_json["car_description"] = description
297
298             x = []
299             for w in v.ifilter_templates(matches='Parkplatz'):
300                 za = str_or_none(w.get(1, None))
301                 zb = str_or_none(w.get(2, None))
302                 z = lonlat_ele_to_json(opt_lonlat_from_str(za), opt_uint_from_str(zb))
303                 if len(z) > 0:
304                     x.append({'position': z})
305             if len(x) > 0:
306                 sledrun_json['car_parking'] = x
307
308             x = []
309             for w in io.StringIO(str(v)):
310                 match = re.match(r"\*\* von \'\'\'(.+)\'\'\'(.*): ([\d.,]+) km", w.rstrip())
311                 if match:
312                     ya, yb, yc = match.groups()
313                     yc = float(yc.replace(',', '.'))
314                     x.append({
315                         'km': yc,
316                         'route': (ya.strip() + ' ' + yb.strip()).strip(),
317                     })
318             if len(x) > 0:
319                 sledrun_json['car_distances'] = x
320         _car()
321
322         x = []
323         for v in wikicode.get_sections(levels=[2], matches='Allgemeines'):
324             def _gastronomy(value: str):
325                 gastronomy = []
326                 line_iter = io.StringIO(value)
327                 line = next(line_iter, None)
328                 while line is not None and line.rstrip() != "* '''Hütten''':":
329                     line = next(line_iter, None)
330                 if line is None:
331                     return gastronomy
332                 while line is not None:
333                     line = next(line_iter, None)
334                     if line is not None:
335                         if line.startswith('** '):
336                             g = {}
337                             wiki = mwparserfromhell.parse(line)
338                             wiki_link = next(wiki.ifilter_wikilinks(), None)
339                             if isinstance(wiki_link, Wikilink):
340                                 g['wr_page'] = wikilink_to_json(wiki_link)
341                             ext_link = next(wiki.ifilter_external_links(), None)
342                             if isinstance(ext_link, ExternalLink):
343                                 el = {
344                                     'url': str(ext_link.url),
345                                     'text': str(ext_link.title)
346                                 }
347                                 g['weblink'] = el
348                             remaining = str(Wikicode(n for n in wiki.nodes
349                                                      if isinstance(n, (Text, Tag)) and str(n).strip() != '*')).strip()
350                             match = re.match(r'\((.+)\)', remaining)
351                             if match:
352                                 remaining = match.group(1)
353                             if len(remaining) > 0:
354                                 g['note'] = remaining
355                             gastronomy.append(g)
356                         else:
357                             break
358                 return gastronomy
359             w = _gastronomy(str(v))
360             if len(w) > 0:
361                 sledrun_json['gastronomy'] = w
362
363             def _sled_rental_description():
364                 line_iter = io.StringIO(str(v))
365                 line = next(line_iter, None)
366                 match = None
367                 while line is not None and (match := re.match(r"\* '''Rodelverleih''':(.*)", line)) is None:
368                     line = next(line_iter, None)
369                 if match is None:
370                     return
371                 result = [match.group(1)]
372                 line = next(line_iter, None)
373                 while line is not None and re.match(r"\* ", line) is None:
374                     result.append(line)
375                     line = next(line_iter, None)
376                 sledrun_json['sled_rental_description'] = ''.join(result).strip()
377             _sled_rental_description()
378
379             i = iter(v.nodes)
380             w = next(i, None)
381             while w is not None:
382                 if isinstance(w, Tag) and str(w) == "'''Siehe auch'''":
383                     w = next(i, None)
384                     break
385                 w = next(i, None)
386             while w is not None:
387                 if isinstance(w, ExternalLink):
388                     link = {'url': w.url}
389                     if w.title is not None:
390                         link['text'] = w.title
391                     x.append(link)
392                 elif isinstance(w, (Text, Tag)) and str(w).strip() in ['', '*', ':']:
393                     pass
394                 else:
395                     break
396                 w = next(i, None)
397         if len(x) > 0:
398             sledrun_json['see_also'] = x
399
400         sledrun_json['allow_reports'] = True
401
402         impressions = None
403         sledrun_impressions_page = Page(self.site, self.current_page.title() + '/Impressionen')
404         if sledrun_impressions_page.exists():
405             impressions = sledrun_impressions_page.title()
406
407         text = create_sledrun_wiki(sledrun_json, map_json, impressions)
408         summary = 'Rodelbahnbeschreibung nach Konvertierung nach und von JSON.'
409         self.put_current(text, summary=summary)
410
411
412 def main(*args: str) -> None:
413     local_args = pywikibot.handle_args(args)
414     gen_factory = pagegenerators.GeneratorFactory()
415     gen_factory.handle_args(local_args)
416     gen = gen_factory.getCombinedGenerator(preload=True)
417     if gen:
418         bot = SledrunWikiTextToJsonBot(generator=gen)
419         bot.run()
420     else:
421         pywikibot.bot.suggest_help(missing_generator=True)
422
423
424 if __name__ == '__main__':
425     main()