]> ToastFreeware Gitweb - philipp/winterrodeln/wrpylib.git/blob - bots/sledrun_wikitext_to_json.py
1e3cbfe56686ce150bfc53205a688eea83561715
[philipp/winterrodeln/wrpylib.git] / bots / sledrun_wikitext_to_json.py
1 #!/usr/bin/python
2 """
3 User script for pywikibot (https://gerrit.wikimedia.org/r/pywikibot/core.git), tested with version 6.6.1.
4 Put it in directory scripts/userscripts.
5
6 Create a sledrun JSON page from a sledrun wikitext page (including map).
7
8 The following generators and filters are supported:
9
10 &params;
11 """
12 import io
13 import json
14 import re
15 from typing import Any, Optional
16
17 import mwparserfromhell
18 from mwparserfromhell.nodes.extras import Parameter
19
20 import pywikibot
21 from mwparserfromhell.nodes import Tag, Text, ExternalLink, Template, Wikilink
22 from mwparserfromhell.wikicode import Wikicode
23 from pywikibot import pagegenerators, Page
24 from pywikibot.bot import (
25     AutomaticTWSummaryBot,
26     ConfigParserBot,
27     ExistingPageBot,
28     NoRedirectPageBot,
29     SingleSiteBot,
30 )
31 from pywikibot.logging import warning
32 from pywikibot.site._namespace import BuiltinNamespace
33
34 from wrpylib.wrmwmarkup import create_sledrun_wiki, lonlat_to_json, lonlat_ele_to_json, parse_wrmap
35 from wrpylib.wrvalidators import rodelbahnbox_from_template, tristate_german_to_str, difficulty_german_to_str, \
36     avalanches_german_to_str, public_transport_german_to_str, opt_str_opt_comment_enum_to_str, opt_lonlat_from_str, \
37     opt_uint_from_str
38
39 from pywikibot.site import Namespace
40
41 docuReplacements = {'&params;': pagegenerators.parameterHelp}
42
43
44 def str_or_none(value: Any) -> Optional[str]:
45     if value is not None:
46         return str(value)
47     return None
48
49
50 def template_to_json(value: Template) -> dict:
51     parameter = []
52     for p in value.params:
53         parameter.append({'value': str(p)})
54     return {
55         'name': str(value.name),
56         'parameter': parameter
57     }
58
59
60 def wikilink_to_json(value: Wikilink) -> dict:
61     wl = {'title': str(value.title)}
62     text = str_or_none(value.text)
63     if text is not None:
64         wl['text'] = text
65     return wl
66
67
68 class SledrunWikiTextToJsonBot(
69     SingleSiteBot,
70     ConfigParserBot,
71     ExistingPageBot,
72     NoRedirectPageBot,
73     AutomaticTWSummaryBot,
74 ):
75     def treat_page(self) -> None:
76         """Load the given page, do some changes, and save it."""
77         wikitext_content_model = 'wikitext'
78         if self.current_page.content_model != wikitext_content_model:
79             warning(f"The content model of {self.current_page.title()} is {self.current_page.content_model} "
80                     f"instead of {wikitext_content_model}.")
81             return
82
83         wikicode = mwparserfromhell.parse(self.current_page.text)
84         wikilink_list = wikicode.filter_wikilinks()
85         category_sledrun = 'Kategorie:Rodelbahn'
86         if sum(1 for c in wikilink_list if c.title == category_sledrun) == 0:
87             warning(f'The page {self.current_page.title()} does not have category {category_sledrun}.')
88             return
89
90         sledrun_json_page = Page(self.site, self.current_page.title() + '/Rodelbahn.json')
91         if sledrun_json_page.exists():
92             warning(f"{sledrun_json_page.title()} already exists, skipping {self.current_page.title()}.")
93             return
94
95         map_json_page = Page(self.site, self.current_page.title() + '/Landkarte.json')
96         if map_json_page.exists():
97             warning(f"{map_json_page.title()} already exists, skipping {self.current_page.title()}.")
98             return
99
100         map_json = None
101         v = wikicode.filter_tags(matches='wrmap')
102         if len(v) > 0:
103             map_json = parse_wrmap(str(v[0]))
104
105         sledrun_json = {
106             "name": self.current_page.title(),
107             "aliases": [],
108             "entry_under_construction": sum(1 for c in wikilink_list if c.text == 'Kategorie:In Arbeit') > 0,
109         }
110
111         for v in wikicode.get_sections(levels=[2], matches='Allgemeines'):
112             for w in v.ifilter_text(recursive=False):
113                 x = w.strip()
114                 if x:
115                     sledrun_json["description"] = str(x)
116                     break
117             break
118
119         rbb_list = wikicode.filter_templates(recursive=False, matches=lambda t: t.name.strip() == 'Rodelbahnbox')
120         if len(rbb_list) == 1:
121             rbb = rodelbahnbox_from_template(rbb_list[0])
122             v = rbb['Bild']
123             if v is not None:
124                 image_page = Page(self.site, v, ns=BuiltinNamespace.FILE)
125                 if not image_page.exists():
126                     warning(f"{image_page.title()} does not exist.")
127                 sledrun_json['image'] = v
128
129             v = rbb['Länge']
130             if v is not None:
131                 sledrun_json['length'] = v
132
133             v = rbb['Schwierigkeit']
134             if v is not None:
135                 sledrun_json['difficulty'] = difficulty_german_to_str(v)
136
137             v = rbb['Lawinen']
138             if v is not None:
139                 sledrun_json['avalanches'] = avalanches_german_to_str(v)
140
141             v, w = rbb['Betreiber']
142             if v is not None:
143                 sledrun_json['has_operator'] = v
144             if w is not None:
145                 sledrun_json['operator'] = w
146
147             v = rbb['Aufstieg möglich']
148             if v is not None:
149                 sledrun_json['walkup_possible'] = v
150
151             v, w = rbb['Aufstieg getrennt']
152             if v is not None:
153                 sledrun_json['walkup_separate'] = tristate_german_to_str(v)
154             if w is not None:
155                 sledrun_json['walkup_comment'] = w  # TODO
156
157             v = rbb['Gehzeit']
158             if v is not None:
159                 sledrun_json['walkup_time'] = v
160
161             v, w = rbb['Beleuchtungsanlage']
162             if v is not None:
163                 sledrun_json['nightlight_possible'] = tristate_german_to_str(v)
164             if w is not None:
165                 sledrun_json['nightlight_description'] = w
166
167             def _sled_rental():
168                 v = rbb['Rodelverleih']
169                 if v is not None:
170                     sledrun_json['sled_rental_direct'] = v != []
171                     w = []
172                     for name, comment in v:
173                         x = {}
174                         name_code = mwparserfromhell.parse(name)
175                         wiki_link = next(name_code.ifilter_wikilinks(), None)
176                         if isinstance(wiki_link, Wikilink):
177                             x['wr_page'] = wikilink_to_json(wiki_link)
178                         else:
179                             x['name'] = name
180                         if comment is not None:
181                             x['comment'] = comment
182                         w.append(x)
183                     sledrun_json['sled_rental'] = w
184             _sled_rental()
185
186             v = rbb['In Übersichtskarte']
187             if v is not None:
188                 sledrun_json['show_in_overview'] = v
189
190             v = rbb['Forumid']
191             if v is not None:
192                 sledrun_json['forum_id'] = v
193
194             v = rbb['Position']
195             if v is not None:
196                 sledrun_json['position'] = lonlat_to_json(v)
197
198             v = lonlat_ele_to_json(rbb['Position oben'], rbb['Höhe oben'])
199             if v != {}:
200                 sledrun_json['top'] = v
201
202             v = lonlat_ele_to_json(rbb['Position unten'], rbb['Höhe unten'])
203             if v != {}:
204                 sledrun_json['bottom'] = v
205
206             v = rbb['Telefonauskunft']
207             if v is not None:
208                 sledrun_json['info_phone'] = [{'phone': p, 'name': n} for p, n in v]
209
210             v = rbb['Öffentliche Anreise']
211             if v is not None:
212                 sledrun_json['public_transport'] = public_transport_german_to_str(v)
213
214         def _button_bar():
215             bb_iter = wikicode.ifilter_templates(recursive=False, matches=lambda t: t.name.strip() == 'Buttonleiste')
216             bb = next(bb_iter, None)
217             if bb is not None:
218                 video = bb.get('video', None)
219                 if isinstance(video, Parameter):
220                     sledrun_json['videos'] = [{'url': video.value}]
221         _button_bar()
222
223         for v in wikicode.get_sections(levels=[2], matches='Anreise mit öffentlichen Verkehrsmitteln',
224                                        include_headings=False):
225             w = next((w for w in v.nodes if isinstance(w, Tag) and w.wiki_markup == '*'), None)
226             if w is not None:
227                 x = str(Wikicode(v.nodes[:v.nodes.index(w)])).strip()
228                 if x:
229                     sledrun_json["public_transport_description"] = str(x)
230
231             public_transport_stops = []
232             public_transport_lines = []
233             ya = None
234             for w in v.nodes:
235                 if isinstance(w, Template):
236                     if w.name == 'Haltestelle':
237                         if ya is not None:
238                             public_transport_stops.append(ya)
239                         ya = {}
240                         z = w.get(1, None)
241                         if z is not None:
242                             ya['municipality'] = str(z)
243                         z = w.get(2, None)
244                         if z is not None:
245                             ya['name_local'] = str(z)
246                         za = str_or_none(w.get(3, None))
247                         zb = str_or_none(w.get(4, None))
248                         z = lonlat_ele_to_json(opt_lonlat_from_str(za), opt_uint_from_str(zb))
249                         if len(z) > 0:
250                             ya['position'] = z
251                     elif w.name in ["Fahrplan Abfahrtsmonitor VVT"]:
252                         ya['monitor_template'] = template_to_json(w)
253                     elif w.name in ["Fahrplan Hinfahrt VVT"]:
254                         ya['route_arrival_template'] = template_to_json(w)
255                     elif w.name in ["Fahrplan Rückfahrt VVT"]:
256                         ya['route_departure_template'] = template_to_json(w)
257                     elif w.name in ["Fahrplan Linie VVT"]:
258                         if ya is not None:
259                             public_transport_stops.append(ya)
260                             ya = None
261                         y = {
262                             'timetable_template': template_to_json(w),
263                         }
264                         public_transport_lines.append(y)
265             if ya is not None:
266                 public_transport_stops.append(ya)
267             if len(public_transport_stops) > 0:
268                 sledrun_json['public_transport_stops'] = public_transport_stops
269             if len(public_transport_lines) > 0:
270                 sledrun_json['public_transport_lines'] = public_transport_lines
271             break
272
273         def _car():
274             for v in wikicode.get_sections(levels=[2], matches='Anreise mit dem Auto'):
275                 for w in v.ifilter_text(recursive=False):
276                     x = w.strip()
277                     if x:
278                         sledrun_json["car_description"] = str(x)
279                         break
280                 x = []
281                 for w in v.ifilter_templates(matches='Parkplatz'):
282                     za = str_or_none(w.get(1, None))
283                     zb = str_or_none(w.get(2, None))
284                     z = lonlat_ele_to_json(opt_lonlat_from_str(za), opt_uint_from_str(zb))
285                     if len(z) > 0:
286                         x.append({'position': z})
287                 if len(x) > 0:
288                     sledrun_json['car_parking'] = x
289
290                 x = []
291                 for w in io.StringIO(str(v)):
292                     match = re.match(r"\*\* von \'\'\'(.+)\'\'\'(.*): ([\d.,]+) km", w.rstrip())
293                     if match:
294                         ya, yb, yc = match.groups()
295                         yc = float(yc.replace(',', '.'))
296                         x.append({
297                             'km': yc,
298                             'route': (ya.strip() + ' ' + yb.strip()).strip(),
299                         })
300                 if len(x) > 0:
301                     sledrun_json['car_distances'] = x
302         _car()
303
304         x = []
305         for v in wikicode.get_sections(levels=[2], matches='Allgemeines'):
306             def _gastronomy(value: str):
307                 gastronomy = []
308                 line_iter = io.StringIO(value)
309                 line = next(line_iter, None)
310                 while line is not None and line.rstrip() != "* '''Hütten''':":
311                     line = next(line_iter, None)
312                 if line is None:
313                     return gastronomy
314                 while line is not None:
315                     line = next(line_iter, None)
316                     if line is not None:
317                         if line.startswith('** '):
318                             g = {}
319                             wiki = mwparserfromhell.parse(line)
320                             wiki_link = next(wiki.ifilter_wikilinks(), None)
321                             if isinstance(wiki_link, Wikilink):
322                                 g['wr_page'] = wikilink_to_json(wiki_link)
323                             ext_link = next(wiki.ifilter_external_links(), None)
324                             if isinstance(ext_link, ExternalLink):
325                                 el = {
326                                     'url': str(ext_link.url),
327                                     'text': str(ext_link.title)
328                                 }
329                                 g['weblink'] = el
330                             remaining = str(Wikicode(n for n in wiki.nodes
331                                                      if isinstance(n, (Text, Tag)) and str(n).strip() != '*')).strip()
332                             match = re.match(r'\((.+)\)', remaining)
333                             if match:
334                                 remaining = match.group(1)
335                             if len(remaining) > 0:
336                                 g['note'] = remaining
337                             gastronomy.append(g)
338                         else:
339                             break
340                 return gastronomy
341             w = _gastronomy(str(v))
342             if len(w) > 0:
343                 sledrun_json['gastronomy'] = w
344
345             def _sled_rental_description():
346                 line_iter = io.StringIO(str(v))
347                 line = next(line_iter, None)
348                 match = None
349                 while line is not None and (match := re.match(r"\* '''Rodelverleih''':(.*)", line)) is None:
350                     line = next(line_iter, None)
351                 if match is None:
352                     return
353                 result = [match.group(1)]
354                 line = next(line_iter, None)
355                 while line is not None and re.match(r"\* ", line) is None:
356                     result.append(line)
357                     line = next(line_iter, None)
358                 sledrun_json['sled_rental_description'] = ''.join(result).strip()
359             _sled_rental_description()
360
361             i = iter(v.nodes)
362             w = next(i, None)
363             while w is not None:
364                 if isinstance(w, Tag) and str(w) == "'''Siehe auch'''":
365                     w = next(i, None)
366                     break
367                 w = next(i, None)
368             while w is not None:
369                 if isinstance(w, ExternalLink):
370                     link = {'url': w.url}
371                     if w.title is not None:
372                         link['text'] = w.title
373                     x.append(link)
374                 elif isinstance(w, (Text, Tag)) and str(w).strip() in ['', '*', ':']:
375                     pass
376                 else:
377                     break
378                 w = next(i, None)
379         if len(x) > 0:
380             sledrun_json['see_also'] = x
381
382         sledrun_json['allow_reports'] = True
383
384         impressions = None
385         sledrun_impressions_page = Page(self.site, self.current_page.title() + '/Impressionen')
386         if sledrun_impressions_page.exists():
387             impressions = sledrun_impressions_page.title()
388
389         text = create_sledrun_wiki(sledrun_json, map_json, impressions)
390         summary = 'Rodelbahnbeschreibung nach Konvertierung nach und von JSON.'
391         self.put_current(text, summary=summary)
392
393
394 def main(*args: str) -> None:
395     local_args = pywikibot.handle_args(args)
396     gen_factory = pagegenerators.GeneratorFactory()
397     gen_factory.handle_args(local_args)
398     gen = gen_factory.getCombinedGenerator(preload=True)
399     if gen:
400         bot = SledrunWikiTextToJsonBot(generator=gen)
401         bot.run()
402     else:
403         pywikibot.bot.suggest_help(missing_generator=True)
404
405
406 if __name__ == '__main__':
407     main()