]> ToastFreeware Gitweb - philipp/winterrodeln/wrpylib.git/blob - bots/sledrun_wikitext_to_json.py
Parse video from Buttonleiste.
[philipp/winterrodeln/wrpylib.git] / bots / sledrun_wikitext_to_json.py
1 #!/usr/bin/python
2 """
3 User script for pywikibot (https://gerrit.wikimedia.org/r/pywikibot/core.git), tested with version 6.6.1.
4 Put it in directory scripts/userscripts.
5
6 Create a sledrun JSON page from a sledrun wikitext page (including map).
7
8 The following generators and filters are supported:
9
10 &params;
11 """
12 import io
13 import json
14 import re
15 from typing import Any, Optional
16
17 import mwparserfromhell
18 from mwparserfromhell.nodes.extras import Parameter
19
20 import pywikibot
21 from mwparserfromhell.nodes import Tag, Text, ExternalLink, Template, Wikilink
22 from mwparserfromhell.wikicode import Wikicode
23 from pywikibot import pagegenerators, Page
24 from pywikibot.bot import (
25     AutomaticTWSummaryBot,
26     ConfigParserBot,
27     ExistingPageBot,
28     NoRedirectPageBot,
29     SingleSiteBot,
30 )
31 from pywikibot.logging import warning
32 from pywikibot.site._namespace import BuiltinNamespace
33
34 from wrpylib.wrmwmarkup import create_sledrun_wiki, lonlat_to_json, lonlat_ele_to_json, parse_wrmap
35 from wrpylib.wrvalidators import rodelbahnbox_from_template, tristate_german_to_str, difficulty_german_to_str, \
36     avalanches_german_to_str, public_transport_german_to_str, opt_str_opt_comment_enum_to_str, opt_lonlat_from_str, \
37     opt_uint_from_str
38
39 from pywikibot.site import Namespace
40
41 docuReplacements = {'&params;': pagegenerators.parameterHelp}
42
43
44 def str_or_none(value: Any) -> Optional[str]:
45     if value is not None:
46         return str(value)
47     return None
48
49
50 def template_to_json(value: Template) -> dict:
51     parameter = []
52     for p in value.params:
53         parameter.append({'value': str(p)})
54     return {
55         'name': str(value.name),
56         'parameter': parameter
57     }
58
59
60 def wikilink_to_json(value: Wikilink) -> dict:
61     wl = {'title': str(value.title)}
62     text = str_or_none(value.text)
63     if text is not None:
64         wl['text'] = text
65     return wl
66
67
68 class SledrunWikiTextToJsonBot(
69     SingleSiteBot,
70     ConfigParserBot,
71     ExistingPageBot,
72     NoRedirectPageBot,
73     AutomaticTWSummaryBot,
74 ):
75     def treat_page(self) -> None:
76         """Load the given page, do some changes, and save it."""
77         wikitext_content_model = 'wikitext'
78         if self.current_page.content_model != wikitext_content_model:
79             warning(f"The content model of {self.current_page.title()} is {self.current_page.content_model} "
80                     f"instead of {wikitext_content_model}.")
81             return
82
83         wikicode = mwparserfromhell.parse(self.current_page.text)
84         wikilink_list = wikicode.filter_wikilinks()
85         category_sledrun = 'Kategorie:Rodelbahn'
86         if sum(1 for c in wikilink_list if c.title == category_sledrun) == 0:
87             warning(f'The page {self.current_page.title()} does not have category {category_sledrun}.')
88             return
89
90         sledrun_json_page = Page(self.site, self.current_page.title() + '/Rodelbahn.json')
91         if sledrun_json_page.exists():
92             warning(f"{sledrun_json_page.title()} already exists, skipping {self.current_page.title()}.")
93             return
94
95         map_json_page = Page(self.site, self.current_page.title() + '/Landkarte.json')
96         if map_json_page.exists():
97             warning(f"{map_json_page.title()} already exists, skipping {self.current_page.title()}.")
98             return
99
100         map_json = None
101         v = wikicode.filter_tags(matches='wrmap')
102         if len(v) > 0:
103             map_json = parse_wrmap(str(v[0]))
104
105         sledrun_json = {
106             "name": self.current_page.title(),
107             "aliases": [],
108             "entry_under_construction": sum(1 for c in wikilink_list if c.text == 'Kategorie:In Arbeit') > 0,
109         }
110
111         for v in wikicode.get_sections(levels=[2], matches='Allgemeines'):
112             for w in v.ifilter_text(recursive=False):
113                 x = w.strip()
114                 if x:
115                     sledrun_json["description"] = str(x)
116                     break
117             break
118
119         rbb_list = wikicode.filter_templates(recursive=False, matches=lambda t: t.name.strip() == 'Rodelbahnbox')
120         if len(rbb_list) == 1:
121             rbb = rodelbahnbox_from_template(rbb_list[0])
122             v = rbb['Bild']
123             if v is not None:
124                 image_page = Page(self.site, v, ns=BuiltinNamespace.FILE)
125                 if not image_page.exists():
126                     warning(f"{image_page.title()} does not exist.")
127                 sledrun_json['image'] = v
128
129             v = rbb['Länge']
130             if v is not None:
131                 sledrun_json['length'] = v
132
133             v = rbb['Schwierigkeit']
134             if v is not None:
135                 sledrun_json['difficulty'] = difficulty_german_to_str(v)
136
137             v = rbb['Lawinen']
138             if v is not None:
139                 sledrun_json['avalanches'] = avalanches_german_to_str(v)
140
141             v, w = rbb['Betreiber']
142             if v is not None:
143                 sledrun_json['has_operator'] = v
144             if w is not None:
145                 sledrun_json['operator'] = w
146
147             v = rbb['Aufstieg möglich']
148             if v is not None:
149                 sledrun_json['walkup_possible'] = v
150
151             v, w = rbb['Aufstieg getrennt']
152             if v is not None:
153                 sledrun_json['walkup_separate'] = tristate_german_to_str(v)
154             if w is not None:
155                 sledrun_json['walkup_comment'] = w  # TODO
156
157             v = rbb['Gehzeit']
158             if v is not None:
159                 sledrun_json['walkup_time'] = v
160
161             v, w = rbb['Beleuchtungsanlage']
162             if v is not None:
163                 sledrun_json['nightlight_possible'] = tristate_german_to_str(v)
164             if w is not None:
165                 sledrun_json['nightlight_description'] = w
166
167             def _sled_rental():
168                 v = rbb['Rodelverleih']
169                 if v is not None:
170                     sledrun_json['sled_rental_direct'] = v != []
171                     w = []
172                     for name, comment in v:
173                         x = {}
174                         name_code = mwparserfromhell.parse(name)
175                         wiki_link = next(name_code.ifilter_wikilinks(), None)
176                         if isinstance(wiki_link, Wikilink):
177                             x['wr_page'] = wikilink_to_json(wiki_link)
178                         else:
179                             x['name'] = name
180                         if comment is not None:
181                             x['comment'] = comment
182                         w.append(x)
183                     sledrun_json['sled_rental'] = w
184             _sled_rental()
185
186             v = rbb['In Übersichtskarte']
187             if v is not None:
188                 sledrun_json['show_in_overview'] = v
189
190             v = rbb['Forumid']
191             if v is not None:
192                 sledrun_json['forum_id'] = v
193
194             v = rbb['Position']
195             if v is not None:
196                 sledrun_json['position'] = lonlat_to_json(v)
197
198             v = lonlat_ele_to_json(rbb['Position oben'], rbb['Höhe oben'])
199             if v != {}:
200                 sledrun_json['top'] = v
201
202             v = lonlat_ele_to_json(rbb['Position unten'], rbb['Höhe unten'])
203             if v != {}:
204                 sledrun_json['bottom'] = v
205
206             v = rbb['Telefonauskunft']
207             if v is not None:
208                 sledrun_json['info_phone'] = [{'phone': p, 'name': n} for p, n in v]
209
210             v = rbb['Öffentliche Anreise']
211             if v is not None:
212                 sledrun_json['public_transport'] = public_transport_german_to_str(v)
213
214         def _button_bar():
215             bb_iter = wikicode.ifilter_templates(recursive=False, matches=lambda t: t.name.strip() == 'Buttonleiste')
216             bb = next(bb_iter, None)
217             if bb is not None:
218                 video = bb.get('video', None)
219                 if isinstance(video, Parameter):
220                     sledrun_json['videos'] = [{'url': video.value}]
221         _button_bar()
222
223         for v in wikicode.get_sections(levels=[2], matches='Anreise mit öffentlichen Verkehrsmitteln',
224                                        include_headings=False):
225             w = next((w for w in v.nodes if isinstance(w, Tag) and w.wiki_markup == '*'), None)
226             if w is not None:
227                 x = str(Wikicode(v.nodes[:v.nodes.index(w)])).strip()
228                 if x:
229                     sledrun_json["public_transport_description"] = str(x)
230
231             public_transport_stops = []
232             public_transport_lines = []
233             ya = None
234             for w in v.nodes:
235                 if isinstance(w, Template):
236                     if w.name == 'Haltestelle':
237                         if ya is not None:
238                             public_transport_stops.append(ya)
239                         ya = {}
240                         z = w.get(1, None)
241                         if z is not None:
242                             ya['municipality'] = str(z)
243                         z = w.get(2, None)
244                         if z is not None:
245                             ya['name_local'] = str(z)
246                         za = str_or_none(w.get(3, None))
247                         zb = str_or_none(w.get(4, None))
248                         z = lonlat_ele_to_json(opt_lonlat_from_str(za), opt_uint_from_str(zb))
249                         if len(z) > 0:
250                             ya['position'] = z
251                     elif w.name in ["Fahrplan Abfahrtsmonitor VVT"]:
252                         ya['monitor_template'] = template_to_json(w)
253                     elif w.name in ["Fahrplan Hinfahrt VVT"]:
254                         ya['route_arrival_template'] = template_to_json(w)
255                     elif w.name in ["Fahrplan Rückfahrt VVT"]:
256                         ya['route_departure_template'] = template_to_json(w)
257                     elif w.name in ["Fahrplan Linie VVT"]:
258                         if ya is not None:
259                             public_transport_stops.append(ya)
260                             ya = None
261                         y = {
262                             'timetable_template': template_to_json(w),
263                         }
264                         public_transport_lines.append(y)
265             if ya is not None:
266                 public_transport_stops.append(ya)
267             if len(public_transport_stops) > 0:
268                 sledrun_json['public_transport_stops'] = public_transport_stops
269             if len(public_transport_lines) > 0:
270                 sledrun_json['public_transport_lines'] = public_transport_lines
271             break
272
273         for v in wikicode.get_sections(levels=[2], matches='Anreise mit dem Auto'):
274             for w in v.ifilter_text(recursive=False):
275                 x = w.strip()
276                 if x:
277                     sledrun_json["car_description"] = str(x)
278                     break
279             x = []
280             for w in v.ifilter_templates(matches='Parkplatz'):
281                 za = str_or_none(w.get(1, None))
282                 zb = str_or_none(w.get(2, None))
283                 z = lonlat_ele_to_json(opt_lonlat_from_str(za), opt_uint_from_str(zb))
284                 if len(z) > 0:
285                     x.append({'position': z})
286             if len(x) > 0:
287                 sledrun_json['car_parking'] = x
288
289             x = []
290             for w in io.StringIO(str(v)):
291                 match = re.match(r"\*\* von \'\'\'(.+)\'\'\'(.*): ([\d.,]+) km", w.rstrip())
292                 if match:
293                     ya, yb, yc = match.groups()
294                     yc = float(yc.replace(',', '.'))
295                     x.append({
296                         'km': yc,
297                         'route': (ya.strip() + ' ' + yb.strip()).strip(),
298                     })
299             if len(x) > 0:
300                 sledrun_json['car_distances'] = x
301
302             x = []
303             for v in wikicode.get_sections(levels=[2], matches='Allgemeines'):
304                 def _gastronomy(value: str):
305                     gastronomy = []
306                     line_iter = io.StringIO(value)
307                     line = next(line_iter, None)
308                     while line is not None and line.rstrip() != "* '''Hütten''':":
309                         line = next(line_iter, None)
310                     if line is None:
311                         return gastronomy
312                     while line is not None:
313                         line = next(line_iter, None)
314                         if line is not None:
315                             if line.startswith('** '):
316                                 g = {}
317                                 wiki = mwparserfromhell.parse(line)
318                                 wiki_link = next(wiki.ifilter_wikilinks(), None)
319                                 if isinstance(wiki_link, Wikilink):
320                                     g['wr_page'] = wikilink_to_json(wiki_link)
321                                 ext_link = next(wiki.ifilter_external_links(), None)
322                                 if isinstance(ext_link, ExternalLink):
323                                     el = {
324                                         'url': str(ext_link.url),
325                                         'text': str(ext_link.title)
326                                     }
327                                     g['weblink'] = el
328                                 remaining = str(Wikicode(n for n in wiki.nodes
329                                                          if isinstance(n, (Text, Tag)) and str(n).strip() != '*')).\
330                                                 strip()
331                                 match = re.match(r'\((.+)\)', remaining)
332                                 if match:
333                                     remaining = match.group(1)
334                                 if len(remaining) > 0:
335                                     g['note'] = remaining
336                                 gastronomy.append(g)
337                             else:
338                                 break
339                     return gastronomy
340                 w = _gastronomy(str(v))
341                 if len(w) > 0:
342                     sledrun_json['gastronomy'] = w
343
344                 def _sled_rental_description():
345                     line_iter = io.StringIO(str(v))
346                     line = next(line_iter, None)
347                     match = None
348                     while line is not None and (match := re.match(r"\* '''Rodelverleih''':(.*)", line)) is None:
349                         line = next(line_iter, None)
350                     if match is None:
351                         return
352                     result = [match.group(1)]
353                     line = next(line_iter, None)
354                     while line is not None and re.match(r"\* ", line) is None:
355                         result.append(line)
356                         line = next(line_iter, None)
357                     sledrun_json['sled_rental_description'] = ''.join(result).strip()
358                 _sled_rental_description()
359
360                 i = iter(v.nodes)
361                 w = next(i, None)
362                 while w is not None:
363                     if isinstance(w, Tag) and str(w) == "'''Siehe auch'''":
364                         w = next(i, None)
365                         break
366                     w = next(i, None)
367                 while w is not None:
368                     if isinstance(w, ExternalLink):
369                         link = {'url': w.url}
370                         if w.title is not None:
371                             link['text'] = w.title
372                         x.append(link)
373                     elif isinstance(w, (Text, Tag)) and str(w).strip() in ['', '*', ':']:
374                         pass
375                     else:
376                         break
377                     w = next(i, None)
378             if len(x) > 0:
379                 sledrun_json['see_also'] = x
380
381             sledrun_json['allow_reports'] = True
382
383         text = create_sledrun_wiki(sledrun_json, map_json)
384         summary = 'Rodelbahnbeschreibung nach Konvertierung nach und von JSON.'
385         self.put_current(text, summary=summary)
386
387
388 def main(*args: str) -> None:
389     local_args = pywikibot.handle_args(args)
390     gen_factory = pagegenerators.GeneratorFactory()
391     gen_factory.handle_args(local_args)
392     gen = gen_factory.getCombinedGenerator(preload=True)
393     if gen:
394         bot = SledrunWikiTextToJsonBot(generator=gen)
395         bot.run()
396     else:
397         pywikibot.bot.suggest_help(missing_generator=True)
398
399
400 if __name__ == '__main__':
401     main()