]> ToastFreeware Gitweb - philipp/winterrodeln/wrpylib.git/blob - bots/sledrun_wikitext_to_json.py
Don't add gastronomy only consisting of "...".
[philipp/winterrodeln/wrpylib.git] / bots / sledrun_wikitext_to_json.py
1 #!/usr/bin/python
2 """
3 User script for pywikibot (https://gerrit.wikimedia.org/r/pywikibot/core.git), tested with version 6.6.1.
4 Put it in directory scripts/userscripts.
5
6 Create a sledrun JSON page from a sledrun wikitext page (including map).
7
8 The following generators and filters are supported:
9
10 &params;
11 """
12 import io
13 import json
14 import re
15 from itertools import takewhile, dropwhile
16 from typing import Optional
17
18 import jsonschema
19 import mwparserfromhell
20 from mwparserfromhell.nodes.extras import Parameter
21
22 import pywikibot
23 from mwparserfromhell.nodes import Tag, Text, ExternalLink, Template, Wikilink, Heading
24 from mwparserfromhell.wikicode import Wikicode
25 from pywikibot import pagegenerators, Page
26 from pywikibot.bot import (
27     AutomaticTWSummaryBot,
28     ConfigParserBot,
29     ExistingPageBot,
30     NoRedirectPageBot,
31     SingleSiteBot,
32 )
33 from pywikibot.logging import warning
34 from pywikibot.site._namespace import BuiltinNamespace
35 from wrpylib.json_tools import order_json_keys
36
37 from wrpylib.wrmwmarkup import create_sledrun_wiki, lonlat_to_json, lonlat_ele_to_json, parse_wrmap
38 from wrpylib.wrvalidators import rodelbahnbox_from_template, tristate_german_to_str, difficulty_german_to_str, \
39     avalanches_german_to_str, public_transport_german_to_str, opt_lonlat_from_str, \
40     opt_uint_from_str
41 from wrpylib.lib_sledrun_wikitext_to_json import optional_set, get_sledrun_description
42
43 docuReplacements = {'&params;': pagegenerators.parameterHelp}
44
45
46 def template_to_json(value: Template) -> dict:
47     parameter = []
48     for p in value.params:
49         parameter.append({'value': str(p)})
50     return {
51         'name': str(value.name),
52         'parameter': parameter
53     }
54
55
56 def wikilink_to_json(value: Wikilink) -> dict:
57     wl = {'title': str(value.title)}
58     if value.text is not None:
59         wl['text'] = str(value.text)
60     return wl
61
62
63 def external_link_to_json(value: ExternalLink) -> dict:
64     link = {'url': str(value.url)}
65     if value.title is not None:
66         link['text'] = str(value.title)
67     return link
68
69
70 class SledrunWikiTextToJsonBot(
71     SingleSiteBot,
72     ConfigParserBot,
73     ExistingPageBot,
74     NoRedirectPageBot,
75     AutomaticTWSummaryBot,
76 ):
77     def setup(self) -> None:
78         schema = Page(self.site, 'Winterrodeln:Datenschema/Rodelbahn/V1.json')
79         assert schema.content_model == 'json'
80         self.sledrun_schema = json.loads(schema.text)
81
82     def treat_page(self) -> None:
83         """Load the given page, do some changes, and save it."""
84         wikitext_content_model = 'wikitext'
85         if self.current_page.content_model != wikitext_content_model:
86             warning(f"The content model of {self.current_page.title()} is {self.current_page.content_model} "
87                     f"instead of {wikitext_content_model}.")
88             return
89
90         wikicode = mwparserfromhell.parse(self.current_page.text)
91         wikilink_list = wikicode.filter_wikilinks()
92         category_sledrun = 'Kategorie:Rodelbahn'
93         if sum(1 for c in wikilink_list if c.title == category_sledrun) == 0:
94             warning(f'The page {self.current_page.title()} does not have category {category_sledrun}.')
95             return
96
97         sledrun_json_page = Page(self.site, self.current_page.title() + '/Rodelbahn.json')
98
99         if sledrun_json_page.exists():  # should be an option
100             return
101
102         map_json_page = Page(self.site, self.current_page.title() + '/Landkarte.json')
103
104         map_json = None
105         v = wikicode.filter_tags(matches='wrmap')
106         if len(v) > 0:
107             map_json = parse_wrmap(str(v[0]))
108
109         sledrun_json = {
110             "name": self.current_page.title(),
111             "aliases": [],
112             "entry_under_construction": sum(1 for c in wikilink_list if c.title == 'Kategorie:In Arbeit') > 0,
113         }
114
115         optional_set(sledrun_json, 'description', get_sledrun_description(wikicode))
116
117         rbb_list = wikicode.filter_templates(recursive=False, matches=lambda t: t.name.strip() == 'Rodelbahnbox')
118         if len(rbb_list) == 1:
119             rbb = rodelbahnbox_from_template(rbb_list[0])
120             v = rbb['Bild']
121             if v is not None:
122                 image_page = Page(self.site, v, ns=BuiltinNamespace.FILE)
123                 if not image_page.exists():
124                     warning(f"{image_page.title()} does not exist.")
125                 sledrun_json['image'] = v
126
127             optional_set(sledrun_json, 'length', rbb['Länge'])
128
129             v = rbb['Schwierigkeit']
130             if v is not None:
131                 sledrun_json['difficulty'] = difficulty_german_to_str(v)
132
133             v = rbb['Lawinen']
134             if v is not None:
135                 sledrun_json['avalanches'] = avalanches_german_to_str(v)
136
137             v, w = rbb['Betreiber']
138             optional_set(sledrun_json, 'has_operator', v)
139             optional_set(sledrun_json, 'operator', w)
140
141             optional_set(sledrun_json, 'walkup_possible', rbb['Aufstieg möglich'])
142
143             v, w = rbb['Aufstieg getrennt']
144             if v is not None:
145                 sledrun_json['walkup_separate'] = tristate_german_to_str(v)
146             optional_set(sledrun_json, 'walkup_note', w)
147
148             optional_set(sledrun_json, 'walkup_time', rbb['Gehzeit'])
149
150             def _walkup_support():
151                 walkup_support_rbb = rbb['Aufstiegshilfe']
152                 if walkup_support_rbb is not None:
153                     walkup_supports = []
154                     for walkup_support_type, note in walkup_support_rbb:
155                         walkup_support = {'type': walkup_support_type}
156                         optional_set(walkup_support, 'note', note)
157                         walkup_supports.append(walkup_support)
158                     sledrun_json['walkup_supports'] = walkup_supports
159             _walkup_support()
160
161             v, w = rbb['Beleuchtungsanlage']
162             if v is not None:
163                 sledrun_json['nightlight_possible'] = tristate_german_to_str(v)
164             optional_set(sledrun_json, 'nightlight_possible_note', w)
165
166             v, w = rbb['Beleuchtungstage']
167             optional_set(sledrun_json, 'nightlight_weekdays_count', v)
168             optional_set(sledrun_json, 'nightlight_weekdays_note', w)
169
170             def _sled_rental():
171                 v = rbb['Rodelverleih']
172                 if v is not None:
173                     sledrun_json['sled_rental_direct'] = v != []
174                     w = []
175                     for name, note in v:
176                         x = {}
177                         name_code = mwparserfromhell.parse(name)
178                         wiki_link = next(name_code.ifilter_wikilinks(), None)
179                         if isinstance(wiki_link, Wikilink):
180                             x['wr_page'] = wikilink_to_json(wiki_link)
181                         else:
182                             x['name'] = name
183                         optional_set(x, 'note', note)
184                         w.append(x)
185                     sledrun_json['sled_rental'] = w
186             _sled_rental()
187
188             def _cachet():
189                 v = rbb['Gütesiegel']
190                 if v is not None:
191                     sledrun_json['cachet'] = len(v) > 0
192             _cachet()
193
194             optional_set(sledrun_json, 'show_in_overview', rbb['In Übersichtskarte'])
195             optional_set(sledrun_json, 'forum_id', rbb['Forumid'])
196
197             v = rbb['Position']
198             if v is not None:
199                 sledrun_json['position'] = lonlat_to_json(v)
200
201             v = lonlat_ele_to_json(rbb['Position oben'], rbb['Höhe oben'])
202             if v != {}:
203                 sledrun_json['top'] = v
204
205             v = lonlat_ele_to_json(rbb['Position unten'], rbb['Höhe unten'])
206             if v != {}:
207                 sledrun_json['bottom'] = v
208
209             v = rbb['Telefonauskunft']
210             if v is not None:
211                 sledrun_json['info_phone'] = [{'phone': p, 'name': n} for p, n in v]
212
213             v, w = rbb['Webauskunft']
214             if v is not None:
215                 if v:
216                     sledrun_json['info_web'] = [{'url': w}]
217                 else:
218                     sledrun_json['info_web'] = []
219
220             v = rbb['Öffentliche Anreise']
221             if v is not None:
222                 sledrun_json['public_transport'] = public_transport_german_to_str(v)
223
224         def _button_bar():
225             bb_iter = wikicode.ifilter_templates(recursive=False, matches=lambda t: t.name.strip() == 'Buttonleiste')
226             bb = next(bb_iter, None)
227             if bb is not None:
228                 video = bb.get('video', None)
229                 if isinstance(video, Parameter) and video.value != "":
230                     sledrun_json['videos'] = [{'url': str(video.value)}]
231         _button_bar()
232
233         def _public_transport():
234             pt_sections = wikicode.get_sections(levels=[2], matches='Anreise mit öffentlichen Verkehrsmitteln',
235                                                 include_headings=False)
236             if len(pt_sections) < 1:
237                 return
238             pt = pt_sections[0]
239             node = next((node for node in pt.nodes if isinstance(node, Tag) and node.wiki_markup == '*'), None)
240             if node is not None:
241                 description = str(Wikicode(pt.nodes[:pt.nodes.index(node)])).strip()
242                 if description:
243                     sledrun_json["public_transport_description"] = str(description)
244
245             public_transport_stops = []
246             public_transport_lines = []
247             public_transport_links = []
248             ya = None
249             for node in pt.nodes:
250                 if isinstance(node, Template):
251                     if node.name == 'Haltestelle':
252                         if ya is not None:
253                             public_transport_stops.append(ya)
254                         if len([1 for p in node.params if len(p.strip()) != 0]) == 0:
255                             continue
256                         ya = {}
257                         z = node.get(1, None)
258                         if z is not None:
259                             ya['municipality'] = str(z)
260                         z = node.get(2, None)
261                         if z is not None:
262                             ya['name_local'] = str(z)
263                         za = str(node.get(3, '')).strip()
264                         zb = str(node.get(4, '')).strip()
265                         z = lonlat_ele_to_json(opt_lonlat_from_str(za), opt_uint_from_str(zb))
266                         if len(z) > 0:
267                             ya['position'] = z
268                     elif node.name in ["Fahrplan Abfahrtsmonitor VVT"]:
269                         ya['monitor_template'] = template_to_json(node)
270                     elif node.name in ["Fahrplan Hinfahrt VVT"]:
271                         ya['route_arrival_template'] = template_to_json(node)
272                     elif node.name in ["Fahrplan Rückfahrt VVT"]:
273                         ya['route_departure_template'] = template_to_json(node)
274                     elif node.name in ["Fahrplan Linie VVT"]:
275                         if ya is not None:
276                             public_transport_stops.append(ya)
277                             ya = None
278                         y = {
279                             'timetable_template': template_to_json(node),
280                         }
281                         public_transport_lines.append(y)
282                 elif isinstance(node, ExternalLink):
283                     public_transport_links.append(external_link_to_json(node))
284             if ya is not None:
285                 public_transport_stops.append(ya)
286             if len(public_transport_stops) > 0:
287                 sledrun_json['public_transport_stops'] = public_transport_stops
288             if len(public_transport_lines) > 0:
289                 sledrun_json['public_transport_lines'] = public_transport_lines
290             if len(public_transport_links) > 0:
291                 sledrun_json['public_transport_links'] = public_transport_links
292         _public_transport()
293
294         def _car():
295             car_section_list = wikicode.get_sections(levels=[2], matches='Anreise mit dem Auto')
296             if not car_section_list:
297                 return
298             v = car_section_list[0]
299
300             description_nodes = dropwhile(lambda w: isinstance(w, Heading), v.nodes)
301             description_nodes = takewhile(lambda w: not (isinstance(w, Tag) and w.wiki_markup == '*'),
302                                           description_nodes)
303             if description := str(Wikicode(list(description_nodes))).strip():
304                 sledrun_json["car_description"] = description
305
306             x = []
307             for w in v.ifilter_templates(matches='Parkplatz'):
308                 za = str(w.get(1, '')).strip()
309                 zb = str(w.get(2, '')).strip()
310                 z = lonlat_ele_to_json(opt_lonlat_from_str(za), opt_uint_from_str(zb))
311                 if len(z) > 0:
312                     x.append({'position': z})
313             if len(x) > 0:
314                 sledrun_json['car_parking'] = x
315
316             x = []
317             for w in io.StringIO(str(v)):
318                 match = re.match(r"\*\* von \'\'\'(.+)\'\'\'(.*): ([\d.,]+) km", w.rstrip())
319                 if match:
320                     ya, yb, yc = match.groups()
321                     yc = float(yc.replace(',', '.'))
322                     x.append({
323                         'km': yc,
324                         'route': (ya.strip() + ' ' + yb.strip()).strip(),
325                     })
326             if len(x) > 0:
327                 sledrun_json['car_distances'] = x
328         _car()
329
330         x = []
331         for v in wikicode.get_sections(levels=[2], matches='Allgemeines'):
332             def _nightlight(value: str) -> Optional[str]:
333                 line_iter = io.StringIO(value)
334                 line = next(line_iter, None)
335                 while line is not None and not line.startswith("* '''Beleuchtung''':"):
336                     line = next(line_iter, None)
337                 if line is None:
338                     return None
339                 line = line.replace("* '''Beleuchtung''':", "").strip()
340                 if len(line) > 0:
341                     return line
342                 return None
343             optional_set(sledrun_json, 'nightlight_description', _nightlight(str(v)))
344
345             def _gastronomy(value: str):
346                 gastronomy = []
347                 line_iter = io.StringIO(value)
348                 line = next(line_iter, None)
349                 while line is not None and line.rstrip() != "* '''Hütten''':":
350                     line = next(line_iter, None)
351                 if line is None:
352                     return gastronomy
353                 while line is not None:
354                     line = next(line_iter, None)
355                     if line is not None:
356                         if line.startswith('** '):
357                             g = {}
358                             wiki = mwparserfromhell.parse(line)
359                             wiki_link = next(wiki.ifilter_wikilinks(), None)
360                             if isinstance(wiki_link, Wikilink):
361                                 g['wr_page'] = wikilink_to_json(wiki_link)
362                             ext_link = next(wiki.ifilter_external_links(), None)
363                             if isinstance(ext_link, ExternalLink):
364                                 g['weblink'] = external_link_to_json(ext_link)
365                             remaining = str(Wikicode(n for n in wiki.nodes
366                                                      if isinstance(n, (Text, Tag)) and str(n).strip() != '*')).strip()
367                             match = re.match(r'(.*)\((.+)\)', remaining)
368                             if match:
369                                 name, note = match.groups()
370                                 name = name.strip()
371                                 note = note.strip()
372                                 if len(name) > 0:
373                                     g['name'] = name
374                                 if len(note) > 0:
375                                     g['note'] = note
376                             elif len(remaining) > 0 and remaining != '...':
377                                 g['name'] = remaining
378                             if len(gastronomy) != 0:
379                                 gastronomy.append(g)
380                         else:
381                             break
382                 return gastronomy
383
384             w = _gastronomy(str(v))
385             if len(w) > 0:
386                 sledrun_json['gastronomy'] = w
387
388             def _sled_rental_description():
389                 line_iter = io.StringIO(str(v))
390                 line = next(line_iter, None)
391                 match = None
392                 while line is not None and (match := re.match(r"\* '''Rodelverleih''':(.*)", line)) is None:
393                     line = next(line_iter, None)
394                 if match is None:
395                     return
396                 result = [match.group(1)]
397                 line = next(line_iter, None)
398                 while line is not None and re.match(r"\* ", line) is None:
399                     result.append(line)
400                     line = next(line_iter, None)
401                 sledrun_json['sled_rental_description'] = ''.join(result).strip()
402             _sled_rental_description()
403
404             i = iter(v.nodes)
405             w = next(i, None)
406             while w is not None:
407                 if isinstance(w, Tag) and str(w) == "'''Siehe auch'''":
408                     w = next(i, None)
409                     break
410                 w = next(i, None)
411             while w is not None:
412                 if isinstance(w, ExternalLink):
413                     x.append(external_link_to_json(w))
414                 elif isinstance(w, (Text, Tag)) and str(w).strip() in ['', '*', ':']:
415                     pass
416                 else:
417                     break
418                 w = next(i, None)
419         if len(x) > 0:
420             sledrun_json['see_also'] = x
421
422         sledrun_json['allow_reports'] = True
423
424         impressions = None
425         sledrun_impressions_page = Page(self.site, self.current_page.title() + '/Impressionen')
426         if sledrun_impressions_page.exists():
427             impressions = sledrun_impressions_page.title()
428
429         text = create_sledrun_wiki(sledrun_json, map_json, impressions)
430         pywikibot.output(text)
431         pywikibot.output('\03{lightpurple}---\03{default}')
432         pywikibot.showDiff(self.current_page.text, text)
433
434         jsonschema.validate(instance=sledrun_json, schema=self.sledrun_schema)
435         sledrun_json_ordered = order_json_keys(sledrun_json, self.sledrun_schema)
436         assert sledrun_json_ordered == sledrun_json
437         sledrun_json_text = json.dumps(sledrun_json_ordered, ensure_ascii=False, indent=4)
438         summary = 'Rodelbahnbeschreibung konvertiert von Wikitext nach JSON.'
439         pywikibot.output('\03{lightpurple}---\03{default}')
440         pywikibot.output(sledrun_json_text)
441         pywikibot.output('\03{lightpurple}---\03{default}')
442         self.userPut(sledrun_json_page, sledrun_json_page.text, sledrun_json_text, summary=summary, contentmodel='json')
443
444         map_json_text = json.dumps(map_json, ensure_ascii=False, indent=4)
445         summary = 'Landkarte konvertiert von Wikitext nach JSON.'
446         self.userPut(map_json_page, map_json_page.text, map_json_text, summary=summary, contentmodel='json')
447
448
449 def main(*args: str) -> None:
450     local_args = pywikibot.handle_args(args)
451     gen_factory = pagegenerators.GeneratorFactory()
452     gen_factory.handle_args(local_args)
453     gen = gen_factory.getCombinedGenerator(preload=True)
454     if gen:
455         bot = SledrunWikiTextToJsonBot(generator=gen)
456         bot.run()
457     else:
458         pywikibot.bot.suggest_help(missing_generator=True)
459
460
461 if __name__ == '__main__':
462     main()