]> ToastFreeware Gitweb - philipp/winterrodeln/wrpylib.git/blob - bots/sledrun_wikitext_to_json.py
930c513ddd4ad9f3b2e86fe96ec498de5920285c
[philipp/winterrodeln/wrpylib.git] / bots / sledrun_wikitext_to_json.py
1 #!/usr/bin/python
2 """
3 User script for pywikibot (https://gerrit.wikimedia.org/r/pywikibot/core.git), tested with version 6.6.1.
4 Put it in directory scripts/userscripts.
5
6 Create a sledrun JSON page from a sledrun wikitext page (including map).
7
8 The following generators and filters are supported:
9
10 &params;
11 """
12 import io
13 import json
14 import re
15 from itertools import takewhile, dropwhile
16 from typing import Optional
17
18 import jsonschema
19 import mwparserfromhell
20 from mwparserfromhell.nodes.extras import Parameter
21
22 import pywikibot
23 from mwparserfromhell.nodes import Tag, Text, ExternalLink, Template, Wikilink, Heading
24 from mwparserfromhell.wikicode import Wikicode
25 from pywikibot import pagegenerators, Page
26 from pywikibot.bot import (
27     AutomaticTWSummaryBot,
28     ConfigParserBot,
29     ExistingPageBot,
30     NoRedirectPageBot,
31     SingleSiteBot,
32 )
33 from pywikibot.logging import warning
34 from pywikibot.site._namespace import BuiltinNamespace
35 from wrpylib.json_tools import order_json_keys
36
37 from wrpylib.wrmwmarkup import create_sledrun_wiki, lonlat_to_json, lonlat_ele_to_json, parse_wrmap
38 from wrpylib.wrvalidators import rodelbahnbox_from_template, tristate_german_to_str, difficulty_german_to_str, \
39     avalanches_german_to_str, public_transport_german_to_str, opt_lonlat_from_str, \
40     opt_uint_from_str
41 from wrpylib.lib_sledrun_wikitext_to_json import optional_set, get_sledrun_description
42
43 docuReplacements = {'&params;': pagegenerators.parameterHelp}
44
45
46 def template_to_json(value: Template) -> dict:
47     parameter = []
48     for p in value.params:
49         parameter.append({'value': str(p)})
50     return {
51         'name': str(value.name),
52         'parameter': parameter
53     }
54
55
56 def wikilink_to_json(value: Wikilink) -> dict:
57     wl = {'title': str(value.title)}
58     if value.text is not None:
59         wl['text'] = str(value.text)
60     return wl
61
62
63 def external_link_to_json(value: ExternalLink) -> dict:
64     link = {'url': str(value.url)}
65     if value.title is not None:
66         link['text'] = str(value.title)
67     return link
68
69
70 class SledrunWikiTextToJsonBot(
71     SingleSiteBot,
72     ConfigParserBot,
73     ExistingPageBot,
74     NoRedirectPageBot,
75     AutomaticTWSummaryBot,
76 ):
77     def setup(self) -> None:
78         schema = Page(self.site, 'Winterrodeln:Datenschema/Rodelbahn/V1.json')
79         assert schema.content_model == 'json'
80         self.sledrun_schema = json.loads(schema.text)
81
82     def treat_page(self) -> None:
83         """Load the given page, do some changes, and save it."""
84         wikitext_content_model = 'wikitext'
85         if self.current_page.content_model != wikitext_content_model:
86             warning(f"The content model of {self.current_page.title()} is {self.current_page.content_model} "
87                     f"instead of {wikitext_content_model}.")
88             return
89
90         wikicode = mwparserfromhell.parse(self.current_page.text)
91         wikilink_list = wikicode.filter_wikilinks()
92         category_sledrun = 'Kategorie:Rodelbahn'
93         if sum(1 for c in wikilink_list if c.title == category_sledrun) == 0:
94             warning(f'The page {self.current_page.title()} does not have category {category_sledrun}.')
95             return
96
97         sledrun_json_page = Page(self.site, self.current_page.title() + '/Rodelbahn.json')
98
99         map_json_page = Page(self.site, self.current_page.title() + '/Landkarte.json')
100
101         map_json = None
102         v = wikicode.filter_tags(matches='wrmap')
103         if len(v) > 0:
104             map_json = parse_wrmap(str(v[0]))
105
106         sledrun_json = {
107             "name": self.current_page.title(),
108             "aliases": [],
109             "entry_under_construction": sum(1 for c in wikilink_list if c.text == 'Kategorie:In Arbeit') > 0,
110         }
111
112         optional_set(sledrun_json, 'description', get_sledrun_description(wikicode))
113
114         rbb_list = wikicode.filter_templates(recursive=False, matches=lambda t: t.name.strip() == 'Rodelbahnbox')
115         if len(rbb_list) == 1:
116             rbb = rodelbahnbox_from_template(rbb_list[0])
117             v = rbb['Bild']
118             if v is not None:
119                 image_page = Page(self.site, v, ns=BuiltinNamespace.FILE)
120                 if not image_page.exists():
121                     warning(f"{image_page.title()} does not exist.")
122                 sledrun_json['image'] = v
123
124             optional_set(sledrun_json, 'length', rbb['Länge'])
125
126             v = rbb['Schwierigkeit']
127             if v is not None:
128                 sledrun_json['difficulty'] = difficulty_german_to_str(v)
129
130             v = rbb['Lawinen']
131             if v is not None:
132                 sledrun_json['avalanches'] = avalanches_german_to_str(v)
133
134             v, w = rbb['Betreiber']
135             optional_set(sledrun_json, 'has_operator', v)
136             optional_set(sledrun_json, 'operator', w)
137
138             optional_set(sledrun_json, 'walkup_possible', rbb['Aufstieg möglich'])
139
140             v, w = rbb['Aufstieg getrennt']
141             if v is not None:
142                 sledrun_json['walkup_separate'] = tristate_german_to_str(v)
143             optional_set(sledrun_json, 'walkup_note', w)
144
145             optional_set(sledrun_json, 'walkup_time', rbb['Gehzeit'])
146
147             def _walkup_support():
148                 walkup_support_rbb = rbb['Aufstiegshilfe']
149                 if walkup_support_rbb is not None:
150                     walkup_supports = []
151                     for walkup_support_type, note in walkup_support_rbb:
152                         walkup_support = {'type': walkup_support_type}
153                         optional_set(walkup_support, 'note', note)
154                         walkup_supports.append(walkup_support)
155                     sledrun_json['walkup_supports'] = walkup_supports
156             _walkup_support()
157
158             v, w = rbb['Beleuchtungsanlage']
159             if v is not None:
160                 sledrun_json['nightlight_possible'] = tristate_german_to_str(v)
161             optional_set(sledrun_json, 'nightlight_possible_note', w)
162
163             v, w = rbb['Beleuchtungstage']
164             optional_set(sledrun_json, 'nightlight_weekdays_count', v)
165             optional_set(sledrun_json, 'nightlight_weekdays_note', w)
166
167             def _sled_rental():
168                 v = rbb['Rodelverleih']
169                 if v is not None:
170                     sledrun_json['sled_rental_direct'] = v != []
171                     w = []
172                     for name, note in v:
173                         x = {}
174                         name_code = mwparserfromhell.parse(name)
175                         wiki_link = next(name_code.ifilter_wikilinks(), None)
176                         if isinstance(wiki_link, Wikilink):
177                             x['wr_page'] = wikilink_to_json(wiki_link)
178                         else:
179                             x['name'] = name
180                         optional_set(x, 'note', note)
181                         w.append(x)
182                     sledrun_json['sled_rental'] = w
183             _sled_rental()
184
185             def _cachet():
186                 v = rbb['Gütesiegel']
187                 if v is not None:
188                     sledrun_json['cachet'] = len(v) > 0
189             _cachet()
190
191             optional_set(sledrun_json, 'show_in_overview', rbb['In Übersichtskarte'])
192             optional_set(sledrun_json, 'forum_id', rbb['Forumid'])
193
194             v = rbb['Position']
195             if v is not None:
196                 sledrun_json['position'] = lonlat_to_json(v)
197
198             v = lonlat_ele_to_json(rbb['Position oben'], rbb['Höhe oben'])
199             if v != {}:
200                 sledrun_json['top'] = v
201
202             v = lonlat_ele_to_json(rbb['Position unten'], rbb['Höhe unten'])
203             if v != {}:
204                 sledrun_json['bottom'] = v
205
206             v = rbb['Telefonauskunft']
207             if v is not None:
208                 sledrun_json['info_phone'] = [{'phone': p, 'name': n} for p, n in v]
209
210             v, w = rbb['Webauskunft']
211             if v is not None:
212                 if v:
213                     sledrun_json['info_web'] = [{'url': w}]
214                 else:
215                     sledrun_json['info_web'] = []
216
217             v = rbb['Öffentliche Anreise']
218             if v is not None:
219                 sledrun_json['public_transport'] = public_transport_german_to_str(v)
220
221         def _button_bar():
222             bb_iter = wikicode.ifilter_templates(recursive=False, matches=lambda t: t.name.strip() == 'Buttonleiste')
223             bb = next(bb_iter, None)
224             if bb is not None:
225                 video = bb.get('video', None)
226                 if isinstance(video, Parameter):
227                     sledrun_json['videos'] = [{'url': str(video.value)}]
228         _button_bar()
229
230         def _public_transport():
231             pt_sections = wikicode.get_sections(levels=[2], matches='Anreise mit öffentlichen Verkehrsmitteln',
232                                                 include_headings=False)
233             if len(pt_sections) < 1:
234                 return
235             pt = pt_sections[0]
236             node = next((node for node in pt.nodes if isinstance(node, Tag) and node.wiki_markup == '*'), None)
237             if node is not None:
238                 description = str(Wikicode(pt.nodes[:pt.nodes.index(node)])).strip()
239                 if description:
240                     sledrun_json["public_transport_description"] = str(description)
241
242             public_transport_stops = []
243             public_transport_lines = []
244             public_transport_links = []
245             ya = None
246             for node in pt.nodes:
247                 if isinstance(node, Template):
248                     if node.name == 'Haltestelle':
249                         if ya is not None:
250                             public_transport_stops.append(ya)
251                         ya = {}
252                         z = node.get(1, None)
253                         if z is not None:
254                             ya['municipality'] = str(z)
255                         z = node.get(2, None)
256                         if z is not None:
257                             ya['name_local'] = str(z)
258                         za = str(node.get(3, '')).strip()
259                         zb = str(node.get(4, '')).strip()
260                         z = lonlat_ele_to_json(opt_lonlat_from_str(za), opt_uint_from_str(zb))
261                         if len(z) > 0:
262                             ya['position'] = z
263                     elif node.name in ["Fahrplan Abfahrtsmonitor VVT"]:
264                         ya['monitor_template'] = template_to_json(node)
265                     elif node.name in ["Fahrplan Hinfahrt VVT"]:
266                         ya['route_arrival_template'] = template_to_json(node)
267                     elif node.name in ["Fahrplan Rückfahrt VVT"]:
268                         ya['route_departure_template'] = template_to_json(node)
269                     elif node.name in ["Fahrplan Linie VVT"]:
270                         if ya is not None:
271                             public_transport_stops.append(ya)
272                             ya = None
273                         y = {
274                             'timetable_template': template_to_json(node),
275                         }
276                         public_transport_lines.append(y)
277                 elif isinstance(node, ExternalLink):
278                     public_transport_links.append(external_link_to_json(node))
279             if ya is not None:
280                 public_transport_stops.append(ya)
281             if len(public_transport_stops) > 0:
282                 sledrun_json['public_transport_stops'] = public_transport_stops
283             if len(public_transport_lines) > 0:
284                 sledrun_json['public_transport_lines'] = public_transport_lines
285             if len(public_transport_links) > 0:
286                 sledrun_json['public_transport_links'] = public_transport_links
287         _public_transport()
288
289         def _car():
290             car_section_list = wikicode.get_sections(levels=[2], matches='Anreise mit dem Auto')
291             if not car_section_list:
292                 return
293             v = car_section_list[0]
294
295             description_nodes = dropwhile(lambda w: isinstance(w, Heading), v.nodes)
296             description_nodes = takewhile(lambda w: not (isinstance(w, Tag) and w.wiki_markup == '*'),
297                                           description_nodes)
298             if description := str(Wikicode(list(description_nodes))).strip():
299                 sledrun_json["car_description"] = description
300
301             x = []
302             for w in v.ifilter_templates(matches='Parkplatz'):
303                 za = str(w.get(1, '')).strip()
304                 zb = str(w.get(2, '')).strip()
305                 z = lonlat_ele_to_json(opt_lonlat_from_str(za), opt_uint_from_str(zb))
306                 if len(z) > 0:
307                     x.append({'position': z})
308             if len(x) > 0:
309                 sledrun_json['car_parking'] = x
310
311             x = []
312             for w in io.StringIO(str(v)):
313                 match = re.match(r"\*\* von \'\'\'(.+)\'\'\'(.*): ([\d.,]+) km", w.rstrip())
314                 if match:
315                     ya, yb, yc = match.groups()
316                     yc = float(yc.replace(',', '.'))
317                     x.append({
318                         'km': yc,
319                         'route': (ya.strip() + ' ' + yb.strip()).strip(),
320                     })
321             if len(x) > 0:
322                 sledrun_json['car_distances'] = x
323         _car()
324
325         x = []
326         for v in wikicode.get_sections(levels=[2], matches='Allgemeines'):
327             def _nightlight(value: str) -> Optional[str]:
328                 line_iter = io.StringIO(value)
329                 line = next(line_iter, None)
330                 while line is not None and not line.startswith("* '''Beleuchtung''':"):
331                     line = next(line_iter, None)
332                 if line is None:
333                     return None
334                 line = line.replace("* '''Beleuchtung''':", "").strip()
335                 if len(line) > 0:
336                     return line
337                 return None
338             optional_set(sledrun_json, 'nightlight_description', _nightlight(str(v)))
339
340             def _gastronomy(value: str):
341                 gastronomy = []
342                 line_iter = io.StringIO(value)
343                 line = next(line_iter, None)
344                 while line is not None and line.rstrip() != "* '''Hütten''':":
345                     line = next(line_iter, None)
346                 if line is None:
347                     return gastronomy
348                 while line is not None:
349                     line = next(line_iter, None)
350                     if line is not None:
351                         if line.startswith('** '):
352                             g = {}
353                             wiki = mwparserfromhell.parse(line)
354                             wiki_link = next(wiki.ifilter_wikilinks(), None)
355                             if isinstance(wiki_link, Wikilink):
356                                 g['wr_page'] = wikilink_to_json(wiki_link)
357                             ext_link = next(wiki.ifilter_external_links(), None)
358                             if isinstance(ext_link, ExternalLink):
359                                 g['weblink'] = external_link_to_json(ext_link)
360                             remaining = str(Wikicode(n for n in wiki.nodes
361                                                      if isinstance(n, (Text, Tag)) and str(n).strip() != '*')).strip()
362                             match = re.match(r'(.*)\((.+)\)', remaining)
363                             if match:
364                                 name, note = match.groups()
365                                 name = name.strip()
366                                 note = note.strip()
367                                 if len(name) > 0:
368                                     g['name'] = name
369                                 if len(note) > 0:
370                                     g['note'] = note
371                             elif len(remaining) > 0:
372                                 g['name'] = remaining
373                             gastronomy.append(g)
374                         else:
375                             break
376                 return gastronomy
377
378             w = _gastronomy(str(v))
379             if len(w) > 0:
380                 sledrun_json['gastronomy'] = w
381
382             def _sled_rental_description():
383                 line_iter = io.StringIO(str(v))
384                 line = next(line_iter, None)
385                 match = None
386                 while line is not None and (match := re.match(r"\* '''Rodelverleih''':(.*)", line)) is None:
387                     line = next(line_iter, None)
388                 if match is None:
389                     return
390                 result = [match.group(1)]
391                 line = next(line_iter, None)
392                 while line is not None and re.match(r"\* ", line) is None:
393                     result.append(line)
394                     line = next(line_iter, None)
395                 sledrun_json['sled_rental_description'] = ''.join(result).strip()
396             _sled_rental_description()
397
398             i = iter(v.nodes)
399             w = next(i, None)
400             while w is not None:
401                 if isinstance(w, Tag) and str(w) == "'''Siehe auch'''":
402                     w = next(i, None)
403                     break
404                 w = next(i, None)
405             while w is not None:
406                 if isinstance(w, ExternalLink):
407                     x.append(external_link_to_json(w))
408                 elif isinstance(w, (Text, Tag)) and str(w).strip() in ['', '*', ':']:
409                     pass
410                 else:
411                     break
412                 w = next(i, None)
413         if len(x) > 0:
414             sledrun_json['see_also'] = x
415
416         sledrun_json['allow_reports'] = True
417
418         impressions = None
419         sledrun_impressions_page = Page(self.site, self.current_page.title() + '/Impressionen')
420         if sledrun_impressions_page.exists():
421             impressions = sledrun_impressions_page.title()
422
423         text = create_sledrun_wiki(sledrun_json, map_json, impressions)
424         pywikibot.output(text)
425         pywikibot.output('\03{lightpurple}---\03{default}')
426         pywikibot.showDiff(self.current_page.text, text)
427
428         jsonschema.validate(instance=sledrun_json, schema=self.sledrun_schema)
429         sledrun_json_ordered = order_json_keys(sledrun_json, self.sledrun_schema)
430         assert sledrun_json_ordered == sledrun_json
431         sledrun_json_text = json.dumps(sledrun_json_ordered, ensure_ascii=False, indent=4)
432         summary = 'Rodelbahnbeschreibung konvertiert von Wikitext nach JSON.'
433         pywikibot.output('\03{lightpurple}---\03{default}')
434         pywikibot.output(sledrun_json_text)
435         pywikibot.output('\03{lightpurple}---\03{default}')
436         self.userPut(sledrun_json_page, sledrun_json_page.text, sledrun_json_text, summary=summary)
437
438         map_json_text = json.dumps(map_json, ensure_ascii=False, indent=4)
439         summary = 'Landkarte konvertiert von Wikitext nach JSON.'
440         self.userPut(map_json_page, map_json_page.text, map_json_text, summary=summary)
441
442
443 def main(*args: str) -> None:
444     local_args = pywikibot.handle_args(args)
445     gen_factory = pagegenerators.GeneratorFactory()
446     gen_factory.handle_args(local_args)
447     gen = gen_factory.getCombinedGenerator(preload=True)
448     if gen:
449         bot = SledrunWikiTextToJsonBot(generator=gen)
450         bot.run()
451     else:
452         pywikibot.bot.suggest_help(missing_generator=True)
453
454
455 if __name__ == '__main__':
456     main()