]> ToastFreeware Gitweb - philipp/winterrodeln/wrpylib.git/blob - bots/sledrun_wikitext_to_json.py
f71b19a181b7a0ed9a1f1c54001f9caaecb2f6d9
[philipp/winterrodeln/wrpylib.git] / bots / sledrun_wikitext_to_json.py
1 #!/usr/bin/python
2 """
3 User script for pywikibot (https://gerrit.wikimedia.org/r/pywikibot/core.git), tested with version 6.6.1.
4 Put it in directory scripts/userscripts.
5
6 Create a sledrun JSON page from a sledrun wikitext page (including map).
7
8 The following generators and filters are supported:
9
10 &params;
11 """
12 import io
13 import json
14 import re
15 from itertools import takewhile, dropwhile
16 from typing import Any, Optional
17
18 import mwparserfromhell
19 from mwparserfromhell.nodes.extras import Parameter
20
21 import pywikibot
22 from mwparserfromhell.nodes import Tag, Text, ExternalLink, Template, Wikilink, Heading
23 from mwparserfromhell.wikicode import Wikicode
24 from pywikibot import pagegenerators, Page
25 from pywikibot.bot import (
26     AutomaticTWSummaryBot,
27     ConfigParserBot,
28     ExistingPageBot,
29     NoRedirectPageBot,
30     SingleSiteBot,
31 )
32 from pywikibot.logging import warning
33 from pywikibot.site._namespace import BuiltinNamespace
34
35 from wrpylib.wrmwmarkup import create_sledrun_wiki, lonlat_to_json, lonlat_ele_to_json, parse_wrmap
36 from wrpylib.wrvalidators import rodelbahnbox_from_template, tristate_german_to_str, difficulty_german_to_str, \
37     avalanches_german_to_str, public_transport_german_to_str, opt_str_opt_comment_enum_to_str, opt_lonlat_from_str, \
38     opt_uint_from_str
39
40 from pywikibot.site import Namespace
41
42 docuReplacements = {'&params;': pagegenerators.parameterHelp}
43
44
45 def str_or_none(value: Any) -> Optional[str]:
46     if value is not None:
47         return str(value)
48     return None
49
50
51 def template_to_json(value: Template) -> dict:
52     parameter = []
53     for p in value.params:
54         parameter.append({'value': str(p)})
55     return {
56         'name': str(value.name),
57         'parameter': parameter
58     }
59
60
61 def wikilink_to_json(value: Wikilink) -> dict:
62     wl = {'title': str(value.title)}
63     text = str_or_none(value.text)
64     if text is not None:
65         wl['text'] = text
66     return wl
67
68
69 class SledrunWikiTextToJsonBot(
70     SingleSiteBot,
71     ConfigParserBot,
72     ExistingPageBot,
73     NoRedirectPageBot,
74     AutomaticTWSummaryBot,
75 ):
76     def treat_page(self) -> None:
77         """Load the given page, do some changes, and save it."""
78         wikitext_content_model = 'wikitext'
79         if self.current_page.content_model != wikitext_content_model:
80             warning(f"The content model of {self.current_page.title()} is {self.current_page.content_model} "
81                     f"instead of {wikitext_content_model}.")
82             return
83
84         wikicode = mwparserfromhell.parse(self.current_page.text)
85         wikilink_list = wikicode.filter_wikilinks()
86         category_sledrun = 'Kategorie:Rodelbahn'
87         if sum(1 for c in wikilink_list if c.title == category_sledrun) == 0:
88             warning(f'The page {self.current_page.title()} does not have category {category_sledrun}.')
89             return
90
91         sledrun_json_page = Page(self.site, self.current_page.title() + '/Rodelbahn.json')
92         if sledrun_json_page.exists():
93             warning(f"{sledrun_json_page.title()} already exists, skipping {self.current_page.title()}.")
94             return
95
96         map_json_page = Page(self.site, self.current_page.title() + '/Landkarte.json')
97         if map_json_page.exists():
98             warning(f"{map_json_page.title()} already exists, skipping {self.current_page.title()}.")
99             return
100
101         map_json = None
102         v = wikicode.filter_tags(matches='wrmap')
103         if len(v) > 0:
104             map_json = parse_wrmap(str(v[0]))
105
106         sledrun_json = {
107             "name": self.current_page.title(),
108             "aliases": [],
109             "entry_under_construction": sum(1 for c in wikilink_list if c.text == 'Kategorie:In Arbeit') > 0,
110         }
111
112         for v in wikicode.get_sections(levels=[2], matches='Allgemeines'):
113             for w in v.ifilter_text(recursive=False):
114                 x = w.strip()
115                 if x:
116                     sledrun_json["description"] = str(x)
117                     break
118             break
119
120         rbb_list = wikicode.filter_templates(recursive=False, matches=lambda t: t.name.strip() == 'Rodelbahnbox')
121         if len(rbb_list) == 1:
122             rbb = rodelbahnbox_from_template(rbb_list[0])
123             v = rbb['Bild']
124             if v is not None:
125                 image_page = Page(self.site, v, ns=BuiltinNamespace.FILE)
126                 if not image_page.exists():
127                     warning(f"{image_page.title()} does not exist.")
128                 sledrun_json['image'] = v
129
130             v = rbb['Länge']
131             if v is not None:
132                 sledrun_json['length'] = v
133
134             v = rbb['Schwierigkeit']
135             if v is not None:
136                 sledrun_json['difficulty'] = difficulty_german_to_str(v)
137
138             v = rbb['Lawinen']
139             if v is not None:
140                 sledrun_json['avalanches'] = avalanches_german_to_str(v)
141
142             v, w = rbb['Betreiber']
143             if v is not None:
144                 sledrun_json['has_operator'] = v
145             if w is not None:
146                 sledrun_json['operator'] = w
147
148             v = rbb['Aufstieg möglich']
149             if v is not None:
150                 sledrun_json['walkup_possible'] = v
151
152             v, w = rbb['Aufstieg getrennt']
153             if v is not None:
154                 sledrun_json['walkup_separate'] = tristate_german_to_str(v)
155             if w is not None:
156                 sledrun_json['walkup_comment'] = w  # TODO
157
158             v = rbb['Gehzeit']
159             if v is not None:
160                 sledrun_json['walkup_time'] = v
161
162             v, w = rbb['Beleuchtungsanlage']
163             if v is not None:
164                 sledrun_json['nightlight_possible'] = tristate_german_to_str(v)
165             if w is not None:
166                 sledrun_json['nightlight_description'] = w
167
168             def _sled_rental():
169                 v = rbb['Rodelverleih']
170                 if v is not None:
171                     sledrun_json['sled_rental_direct'] = v != []
172                     w = []
173                     for name, comment in v:
174                         x = {}
175                         name_code = mwparserfromhell.parse(name)
176                         wiki_link = next(name_code.ifilter_wikilinks(), None)
177                         if isinstance(wiki_link, Wikilink):
178                             x['wr_page'] = wikilink_to_json(wiki_link)
179                         else:
180                             x['name'] = name
181                         if comment is not None:
182                             x['comment'] = comment
183                         w.append(x)
184                     sledrun_json['sled_rental'] = w
185             _sled_rental()
186
187             v = rbb['In Übersichtskarte']
188             if v is not None:
189                 sledrun_json['show_in_overview'] = v
190
191             v = rbb['Forumid']
192             if v is not None:
193                 sledrun_json['forum_id'] = v
194
195             v = rbb['Position']
196             if v is not None:
197                 sledrun_json['position'] = lonlat_to_json(v)
198
199             v = lonlat_ele_to_json(rbb['Position oben'], rbb['Höhe oben'])
200             if v != {}:
201                 sledrun_json['top'] = v
202
203             v = lonlat_ele_to_json(rbb['Position unten'], rbb['Höhe unten'])
204             if v != {}:
205                 sledrun_json['bottom'] = v
206
207             v = rbb['Telefonauskunft']
208             if v is not None:
209                 sledrun_json['info_phone'] = [{'phone': p, 'name': n} for p, n in v]
210
211             v = rbb['Öffentliche Anreise']
212             if v is not None:
213                 sledrun_json['public_transport'] = public_transport_german_to_str(v)
214
215         def _button_bar():
216             bb_iter = wikicode.ifilter_templates(recursive=False, matches=lambda t: t.name.strip() == 'Buttonleiste')
217             bb = next(bb_iter, None)
218             if bb is not None:
219                 video = bb.get('video', None)
220                 if isinstance(video, Parameter):
221                     sledrun_json['videos'] = [{'url': video.value}]
222         _button_bar()
223
224         for v in wikicode.get_sections(levels=[2], matches='Anreise mit öffentlichen Verkehrsmitteln',
225                                        include_headings=False):
226             w = next((w for w in v.nodes if isinstance(w, Tag) and w.wiki_markup == '*'), None)
227             if w is not None:
228                 x = str(Wikicode(v.nodes[:v.nodes.index(w)])).strip()
229                 if x:
230                     sledrun_json["public_transport_description"] = str(x)
231
232             public_transport_stops = []
233             public_transport_lines = []
234             ya = None
235             for w in v.nodes:
236                 if isinstance(w, Template):
237                     if w.name == 'Haltestelle':
238                         if ya is not None:
239                             public_transport_stops.append(ya)
240                         ya = {}
241                         z = w.get(1, None)
242                         if z is not None:
243                             ya['municipality'] = str(z)
244                         z = w.get(2, None)
245                         if z is not None:
246                             ya['name_local'] = str(z)
247                         za = str_or_none(w.get(3, None))
248                         zb = str_or_none(w.get(4, None))
249                         z = lonlat_ele_to_json(opt_lonlat_from_str(za), opt_uint_from_str(zb))
250                         if len(z) > 0:
251                             ya['position'] = z
252                     elif w.name in ["Fahrplan Abfahrtsmonitor VVT"]:
253                         ya['monitor_template'] = template_to_json(w)
254                     elif w.name in ["Fahrplan Hinfahrt VVT"]:
255                         ya['route_arrival_template'] = template_to_json(w)
256                     elif w.name in ["Fahrplan Rückfahrt VVT"]:
257                         ya['route_departure_template'] = template_to_json(w)
258                     elif w.name in ["Fahrplan Linie VVT"]:
259                         if ya is not None:
260                             public_transport_stops.append(ya)
261                             ya = None
262                         y = {
263                             'timetable_template': template_to_json(w),
264                         }
265                         public_transport_lines.append(y)
266             if ya is not None:
267                 public_transport_stops.append(ya)
268             if len(public_transport_stops) > 0:
269                 sledrun_json['public_transport_stops'] = public_transport_stops
270             if len(public_transport_lines) > 0:
271                 sledrun_json['public_transport_lines'] = public_transport_lines
272             break
273
274         def _car():
275             car_section_list = wikicode.get_sections(levels=[2], matches='Anreise mit dem Auto')
276             if not car_section_list:
277                 return
278             v = car_section_list[0]
279
280             description_nodes = dropwhile(lambda w: isinstance(w, Heading), v.nodes)
281             description_nodes = takewhile(lambda w: not (isinstance(w, Tag) and w.wiki_markup == '*'),
282                                           description_nodes)
283             if description := str(Wikicode(list(description_nodes))).strip():
284                 sledrun_json["car_description"] = description
285
286             x = []
287             for w in v.ifilter_templates(matches='Parkplatz'):
288                 za = str_or_none(w.get(1, None))
289                 zb = str_or_none(w.get(2, None))
290                 z = lonlat_ele_to_json(opt_lonlat_from_str(za), opt_uint_from_str(zb))
291                 if len(z) > 0:
292                     x.append({'position': z})
293             if len(x) > 0:
294                 sledrun_json['car_parking'] = x
295
296             x = []
297             for w in io.StringIO(str(v)):
298                 match = re.match(r"\*\* von \'\'\'(.+)\'\'\'(.*): ([\d.,]+) km", w.rstrip())
299                 if match:
300                     ya, yb, yc = match.groups()
301                     yc = float(yc.replace(',', '.'))
302                     x.append({
303                         'km': yc,
304                         'route': (ya.strip() + ' ' + yb.strip()).strip(),
305                     })
306             if len(x) > 0:
307                 sledrun_json['car_distances'] = x
308         _car()
309
310         x = []
311         for v in wikicode.get_sections(levels=[2], matches='Allgemeines'):
312             def _gastronomy(value: str):
313                 gastronomy = []
314                 line_iter = io.StringIO(value)
315                 line = next(line_iter, None)
316                 while line is not None and line.rstrip() != "* '''Hütten''':":
317                     line = next(line_iter, None)
318                 if line is None:
319                     return gastronomy
320                 while line is not None:
321                     line = next(line_iter, None)
322                     if line is not None:
323                         if line.startswith('** '):
324                             g = {}
325                             wiki = mwparserfromhell.parse(line)
326                             wiki_link = next(wiki.ifilter_wikilinks(), None)
327                             if isinstance(wiki_link, Wikilink):
328                                 g['wr_page'] = wikilink_to_json(wiki_link)
329                             ext_link = next(wiki.ifilter_external_links(), None)
330                             if isinstance(ext_link, ExternalLink):
331                                 el = {
332                                     'url': str(ext_link.url),
333                                     'text': str(ext_link.title)
334                                 }
335                                 g['weblink'] = el
336                             remaining = str(Wikicode(n for n in wiki.nodes
337                                                      if isinstance(n, (Text, Tag)) and str(n).strip() != '*')).strip()
338                             match = re.match(r'\((.+)\)', remaining)
339                             if match:
340                                 remaining = match.group(1)
341                             if len(remaining) > 0:
342                                 g['note'] = remaining
343                             gastronomy.append(g)
344                         else:
345                             break
346                 return gastronomy
347             w = _gastronomy(str(v))
348             if len(w) > 0:
349                 sledrun_json['gastronomy'] = w
350
351             def _sled_rental_description():
352                 line_iter = io.StringIO(str(v))
353                 line = next(line_iter, None)
354                 match = None
355                 while line is not None and (match := re.match(r"\* '''Rodelverleih''':(.*)", line)) is None:
356                     line = next(line_iter, None)
357                 if match is None:
358                     return
359                 result = [match.group(1)]
360                 line = next(line_iter, None)
361                 while line is not None and re.match(r"\* ", line) is None:
362                     result.append(line)
363                     line = next(line_iter, None)
364                 sledrun_json['sled_rental_description'] = ''.join(result).strip()
365             _sled_rental_description()
366
367             i = iter(v.nodes)
368             w = next(i, None)
369             while w is not None:
370                 if isinstance(w, Tag) and str(w) == "'''Siehe auch'''":
371                     w = next(i, None)
372                     break
373                 w = next(i, None)
374             while w is not None:
375                 if isinstance(w, ExternalLink):
376                     link = {'url': w.url}
377                     if w.title is not None:
378                         link['text'] = w.title
379                     x.append(link)
380                 elif isinstance(w, (Text, Tag)) and str(w).strip() in ['', '*', ':']:
381                     pass
382                 else:
383                     break
384                 w = next(i, None)
385         if len(x) > 0:
386             sledrun_json['see_also'] = x
387
388         sledrun_json['allow_reports'] = True
389
390         impressions = None
391         sledrun_impressions_page = Page(self.site, self.current_page.title() + '/Impressionen')
392         if sledrun_impressions_page.exists():
393             impressions = sledrun_impressions_page.title()
394
395         text = create_sledrun_wiki(sledrun_json, map_json, impressions)
396         summary = 'Rodelbahnbeschreibung nach Konvertierung nach und von JSON.'
397         self.put_current(text, summary=summary)
398
399
400 def main(*args: str) -> None:
401     local_args = pywikibot.handle_args(args)
402     gen_factory = pagegenerators.GeneratorFactory()
403     gen_factory.handle_args(local_args)
404     gen = gen_factory.getCombinedGenerator(preload=True)
405     if gen:
406         bot = SledrunWikiTextToJsonBot(generator=gen)
407         bot.run()
408     else:
409         pywikibot.bot.suggest_help(missing_generator=True)
410
411
412 if __name__ == '__main__':
413     main()