]> ToastFreeware Gitweb - philipp/winterrodeln/wrpylib.git/blob - bots/sledrun_wikitext_to_json.py
Parse cachet.
[philipp/winterrodeln/wrpylib.git] / bots / sledrun_wikitext_to_json.py
1 #!/usr/bin/python
2 """
3 User script for pywikibot (https://gerrit.wikimedia.org/r/pywikibot/core.git), tested with version 6.6.1.
4 Put it in directory scripts/userscripts.
5
6 Create a sledrun JSON page from a sledrun wikitext page (including map).
7
8 The following generators and filters are supported:
9
10 &params;
11 """
12 import io
13 import json
14 import re
15 from itertools import takewhile, dropwhile
16 from typing import Any, Optional
17
18 import mwparserfromhell
19 from mwparserfromhell.nodes.extras import Parameter
20
21 import pywikibot
22 from mwparserfromhell.nodes import Tag, Text, ExternalLink, Template, Wikilink, Heading
23 from mwparserfromhell.wikicode import Wikicode
24 from pywikibot import pagegenerators, Page
25 from pywikibot.bot import (
26     AutomaticTWSummaryBot,
27     ConfigParserBot,
28     ExistingPageBot,
29     NoRedirectPageBot,
30     SingleSiteBot,
31 )
32 from pywikibot.logging import warning
33 from pywikibot.site._namespace import BuiltinNamespace
34
35 from wrpylib.wrmwmarkup import create_sledrun_wiki, lonlat_to_json, lonlat_ele_to_json, parse_wrmap
36 from wrpylib.wrvalidators import rodelbahnbox_from_template, tristate_german_to_str, difficulty_german_to_str, \
37     avalanches_german_to_str, public_transport_german_to_str, opt_str_opt_comment_enum_to_str, opt_lonlat_from_str, \
38     opt_uint_from_str
39
40 from pywikibot.site import Namespace
41
42 docuReplacements = {'&params;': pagegenerators.parameterHelp}
43
44
45 def str_or_none(value: Any) -> Optional[str]:
46     if value is not None:
47         return str(value)
48     return None
49
50
51 def template_to_json(value: Template) -> dict:
52     parameter = []
53     for p in value.params:
54         parameter.append({'value': str(p)})
55     return {
56         'name': str(value.name),
57         'parameter': parameter
58     }
59
60
61 def wikilink_to_json(value: Wikilink) -> dict:
62     wl = {'title': str(value.title)}
63     text = str_or_none(value.text)
64     if text is not None:
65         wl['text'] = text
66     return wl
67
68
69 class SledrunWikiTextToJsonBot(
70     SingleSiteBot,
71     ConfigParserBot,
72     ExistingPageBot,
73     NoRedirectPageBot,
74     AutomaticTWSummaryBot,
75 ):
76     def treat_page(self) -> None:
77         """Load the given page, do some changes, and save it."""
78         wikitext_content_model = 'wikitext'
79         if self.current_page.content_model != wikitext_content_model:
80             warning(f"The content model of {self.current_page.title()} is {self.current_page.content_model} "
81                     f"instead of {wikitext_content_model}.")
82             return
83
84         wikicode = mwparserfromhell.parse(self.current_page.text)
85         wikilink_list = wikicode.filter_wikilinks()
86         category_sledrun = 'Kategorie:Rodelbahn'
87         if sum(1 for c in wikilink_list if c.title == category_sledrun) == 0:
88             warning(f'The page {self.current_page.title()} does not have category {category_sledrun}.')
89             return
90
91         sledrun_json_page = Page(self.site, self.current_page.title() + '/Rodelbahn.json')
92         if sledrun_json_page.exists():
93             warning(f"{sledrun_json_page.title()} already exists, skipping {self.current_page.title()}.")
94             return
95
96         map_json_page = Page(self.site, self.current_page.title() + '/Landkarte.json')
97         if map_json_page.exists():
98             warning(f"{map_json_page.title()} already exists, skipping {self.current_page.title()}.")
99             return
100
101         map_json = None
102         v = wikicode.filter_tags(matches='wrmap')
103         if len(v) > 0:
104             map_json = parse_wrmap(str(v[0]))
105
106         sledrun_json = {
107             "name": self.current_page.title(),
108             "aliases": [],
109             "entry_under_construction": sum(1 for c in wikilink_list if c.text == 'Kategorie:In Arbeit') > 0,
110         }
111
112         for v in wikicode.get_sections(levels=[2], matches='Allgemeines'):
113             for w in v.ifilter_text(recursive=False):
114                 x = w.strip()
115                 if x:
116                     sledrun_json["description"] = str(x)
117                     break
118             break
119
120         rbb_list = wikicode.filter_templates(recursive=False, matches=lambda t: t.name.strip() == 'Rodelbahnbox')
121         if len(rbb_list) == 1:
122             rbb = rodelbahnbox_from_template(rbb_list[0])
123             v = rbb['Bild']
124             if v is not None:
125                 image_page = Page(self.site, v, ns=BuiltinNamespace.FILE)
126                 if not image_page.exists():
127                     warning(f"{image_page.title()} does not exist.")
128                 sledrun_json['image'] = v
129
130             v = rbb['Länge']
131             if v is not None:
132                 sledrun_json['length'] = v
133
134             v = rbb['Schwierigkeit']
135             if v is not None:
136                 sledrun_json['difficulty'] = difficulty_german_to_str(v)
137
138             v = rbb['Lawinen']
139             if v is not None:
140                 sledrun_json['avalanches'] = avalanches_german_to_str(v)
141
142             v, w = rbb['Betreiber']
143             if v is not None:
144                 sledrun_json['has_operator'] = v
145             if w is not None:
146                 sledrun_json['operator'] = w
147
148             v = rbb['Aufstieg möglich']
149             if v is not None:
150                 sledrun_json['walkup_possible'] = v
151
152             v, w = rbb['Aufstieg getrennt']
153             if v is not None:
154                 sledrun_json['walkup_separate'] = tristate_german_to_str(v)
155             if w is not None:
156                 sledrun_json['walkup_comment'] = w  # TODO
157
158             v = rbb['Gehzeit']
159             if v is not None:
160                 sledrun_json['walkup_time'] = v
161
162             def _walkup_support():
163                 walkup_support_rbb = rbb['Aufstiegshilfe']
164                 if walkup_support_rbb is not None:
165                     walkup_supports = []
166                     for walkup_support_type, comment in walkup_support_rbb:
167                         walkup_support = {'type': walkup_support_type}
168                         if comment is not None:
169                             walkup_support['comment']: comment
170                         walkup_supports.append(walkup_support)
171                     sledrun_json['walkup_supports'] = walkup_supports
172             _walkup_support()
173
174             v, w = rbb['Beleuchtungsanlage']
175             if v is not None:
176                 sledrun_json['nightlight_possible'] = tristate_german_to_str(v)
177             if w is not None:
178                 sledrun_json['nightlight_description'] = w
179
180             def _sled_rental():
181                 v = rbb['Rodelverleih']
182                 if v is not None:
183                     sledrun_json['sled_rental_direct'] = v != []
184                     w = []
185                     for name, comment in v:
186                         x = {}
187                         name_code = mwparserfromhell.parse(name)
188                         wiki_link = next(name_code.ifilter_wikilinks(), None)
189                         if isinstance(wiki_link, Wikilink):
190                             x['wr_page'] = wikilink_to_json(wiki_link)
191                         else:
192                             x['name'] = name
193                         if comment is not None:
194                             x['comment'] = comment
195                         w.append(x)
196                     sledrun_json['sled_rental'] = w
197             _sled_rental()
198
199             def _cachet():
200                 v = rbb['Gütesiegel']
201                 if v is not None:
202                     sledrun_json['cachet'] = len(v) > 0
203             _cachet()
204
205             v = rbb['In Übersichtskarte']
206             if v is not None:
207                 sledrun_json['show_in_overview'] = v
208
209             v = rbb['Forumid']
210             if v is not None:
211                 sledrun_json['forum_id'] = v
212
213             v = rbb['Position']
214             if v is not None:
215                 sledrun_json['position'] = lonlat_to_json(v)
216
217             v = lonlat_ele_to_json(rbb['Position oben'], rbb['Höhe oben'])
218             if v != {}:
219                 sledrun_json['top'] = v
220
221             v = lonlat_ele_to_json(rbb['Position unten'], rbb['Höhe unten'])
222             if v != {}:
223                 sledrun_json['bottom'] = v
224
225             v = rbb['Telefonauskunft']
226             if v is not None:
227                 sledrun_json['info_phone'] = [{'phone': p, 'name': n} for p, n in v]
228
229             v = rbb['Öffentliche Anreise']
230             if v is not None:
231                 sledrun_json['public_transport'] = public_transport_german_to_str(v)
232
233         def _button_bar():
234             bb_iter = wikicode.ifilter_templates(recursive=False, matches=lambda t: t.name.strip() == 'Buttonleiste')
235             bb = next(bb_iter, None)
236             if bb is not None:
237                 video = bb.get('video', None)
238                 if isinstance(video, Parameter):
239                     sledrun_json['videos'] = [{'url': video.value}]
240         _button_bar()
241
242         for v in wikicode.get_sections(levels=[2], matches='Anreise mit öffentlichen Verkehrsmitteln',
243                                        include_headings=False):
244             w = next((w for w in v.nodes if isinstance(w, Tag) and w.wiki_markup == '*'), None)
245             if w is not None:
246                 x = str(Wikicode(v.nodes[:v.nodes.index(w)])).strip()
247                 if x:
248                     sledrun_json["public_transport_description"] = str(x)
249
250             public_transport_stops = []
251             public_transport_lines = []
252             ya = None
253             for w in v.nodes:
254                 if isinstance(w, Template):
255                     if w.name == 'Haltestelle':
256                         if ya is not None:
257                             public_transport_stops.append(ya)
258                         ya = {}
259                         z = w.get(1, None)
260                         if z is not None:
261                             ya['municipality'] = str(z)
262                         z = w.get(2, None)
263                         if z is not None:
264                             ya['name_local'] = str(z)
265                         za = str_or_none(w.get(3, None))
266                         zb = str_or_none(w.get(4, None))
267                         z = lonlat_ele_to_json(opt_lonlat_from_str(za), opt_uint_from_str(zb))
268                         if len(z) > 0:
269                             ya['position'] = z
270                     elif w.name in ["Fahrplan Abfahrtsmonitor VVT"]:
271                         ya['monitor_template'] = template_to_json(w)
272                     elif w.name in ["Fahrplan Hinfahrt VVT"]:
273                         ya['route_arrival_template'] = template_to_json(w)
274                     elif w.name in ["Fahrplan Rückfahrt VVT"]:
275                         ya['route_departure_template'] = template_to_json(w)
276                     elif w.name in ["Fahrplan Linie VVT"]:
277                         if ya is not None:
278                             public_transport_stops.append(ya)
279                             ya = None
280                         y = {
281                             'timetable_template': template_to_json(w),
282                         }
283                         public_transport_lines.append(y)
284             if ya is not None:
285                 public_transport_stops.append(ya)
286             if len(public_transport_stops) > 0:
287                 sledrun_json['public_transport_stops'] = public_transport_stops
288             if len(public_transport_lines) > 0:
289                 sledrun_json['public_transport_lines'] = public_transport_lines
290             break
291
292         def _car():
293             car_section_list = wikicode.get_sections(levels=[2], matches='Anreise mit dem Auto')
294             if not car_section_list:
295                 return
296             v = car_section_list[0]
297
298             description_nodes = dropwhile(lambda w: isinstance(w, Heading), v.nodes)
299             description_nodes = takewhile(lambda w: not (isinstance(w, Tag) and w.wiki_markup == '*'),
300                                           description_nodes)
301             if description := str(Wikicode(list(description_nodes))).strip():
302                 sledrun_json["car_description"] = description
303
304             x = []
305             for w in v.ifilter_templates(matches='Parkplatz'):
306                 za = str_or_none(w.get(1, None))
307                 zb = str_or_none(w.get(2, None))
308                 z = lonlat_ele_to_json(opt_lonlat_from_str(za), opt_uint_from_str(zb))
309                 if len(z) > 0:
310                     x.append({'position': z})
311             if len(x) > 0:
312                 sledrun_json['car_parking'] = x
313
314             x = []
315             for w in io.StringIO(str(v)):
316                 match = re.match(r"\*\* von \'\'\'(.+)\'\'\'(.*): ([\d.,]+) km", w.rstrip())
317                 if match:
318                     ya, yb, yc = match.groups()
319                     yc = float(yc.replace(',', '.'))
320                     x.append({
321                         'km': yc,
322                         'route': (ya.strip() + ' ' + yb.strip()).strip(),
323                     })
324             if len(x) > 0:
325                 sledrun_json['car_distances'] = x
326         _car()
327
328         x = []
329         for v in wikicode.get_sections(levels=[2], matches='Allgemeines'):
330             def _gastronomy(value: str):
331                 gastronomy = []
332                 line_iter = io.StringIO(value)
333                 line = next(line_iter, None)
334                 while line is not None and line.rstrip() != "* '''Hütten''':":
335                     line = next(line_iter, None)
336                 if line is None:
337                     return gastronomy
338                 while line is not None:
339                     line = next(line_iter, None)
340                     if line is not None:
341                         if line.startswith('** '):
342                             g = {}
343                             wiki = mwparserfromhell.parse(line)
344                             wiki_link = next(wiki.ifilter_wikilinks(), None)
345                             if isinstance(wiki_link, Wikilink):
346                                 g['wr_page'] = wikilink_to_json(wiki_link)
347                             ext_link = next(wiki.ifilter_external_links(), None)
348                             if isinstance(ext_link, ExternalLink):
349                                 el = {
350                                     'url': str(ext_link.url),
351                                     'text': str(ext_link.title)
352                                 }
353                                 g['weblink'] = el
354                             remaining = str(Wikicode(n for n in wiki.nodes
355                                                      if isinstance(n, (Text, Tag)) and str(n).strip() != '*')).strip()
356                             match = re.match(r'\((.+)\)', remaining)
357                             if match:
358                                 remaining = match.group(1)
359                             if len(remaining) > 0:
360                                 g['note'] = remaining
361                             gastronomy.append(g)
362                         else:
363                             break
364                 return gastronomy
365             w = _gastronomy(str(v))
366             if len(w) > 0:
367                 sledrun_json['gastronomy'] = w
368
369             def _sled_rental_description():
370                 line_iter = io.StringIO(str(v))
371                 line = next(line_iter, None)
372                 match = None
373                 while line is not None and (match := re.match(r"\* '''Rodelverleih''':(.*)", line)) is None:
374                     line = next(line_iter, None)
375                 if match is None:
376                     return
377                 result = [match.group(1)]
378                 line = next(line_iter, None)
379                 while line is not None and re.match(r"\* ", line) is None:
380                     result.append(line)
381                     line = next(line_iter, None)
382                 sledrun_json['sled_rental_description'] = ''.join(result).strip()
383             _sled_rental_description()
384
385             i = iter(v.nodes)
386             w = next(i, None)
387             while w is not None:
388                 if isinstance(w, Tag) and str(w) == "'''Siehe auch'''":
389                     w = next(i, None)
390                     break
391                 w = next(i, None)
392             while w is not None:
393                 if isinstance(w, ExternalLink):
394                     link = {'url': w.url}
395                     if w.title is not None:
396                         link['text'] = w.title
397                     x.append(link)
398                 elif isinstance(w, (Text, Tag)) and str(w).strip() in ['', '*', ':']:
399                     pass
400                 else:
401                     break
402                 w = next(i, None)
403         if len(x) > 0:
404             sledrun_json['see_also'] = x
405
406         sledrun_json['allow_reports'] = True
407
408         impressions = None
409         sledrun_impressions_page = Page(self.site, self.current_page.title() + '/Impressionen')
410         if sledrun_impressions_page.exists():
411             impressions = sledrun_impressions_page.title()
412
413         text = create_sledrun_wiki(sledrun_json, map_json, impressions)
414         summary = 'Rodelbahnbeschreibung nach Konvertierung nach und von JSON.'
415         self.put_current(text, summary=summary)
416
417
418 def main(*args: str) -> None:
419     local_args = pywikibot.handle_args(args)
420     gen_factory = pagegenerators.GeneratorFactory()
421     gen_factory.handle_args(local_args)
422     gen = gen_factory.getCombinedGenerator(preload=True)
423     if gen:
424         bot = SledrunWikiTextToJsonBot(generator=gen)
425         bot.run()
426     else:
427         pywikibot.bot.suggest_help(missing_generator=True)
428
429
430 if __name__ == '__main__':
431     main()