]> ToastFreeware Gitweb - philipp/winterrodeln/wrpylib.git/blob - bots/sledrun_wikitext_to_json.py
Replace "comment" with "note" to make labeling more consistent.
[philipp/winterrodeln/wrpylib.git] / bots / sledrun_wikitext_to_json.py
1 #!/usr/bin/python
2 """
3 User script for pywikibot (https://gerrit.wikimedia.org/r/pywikibot/core.git), tested with version 6.6.1.
4 Put it in directory scripts/userscripts.
5
6 Create a sledrun JSON page from a sledrun wikitext page (including map).
7
8 The following generators and filters are supported:
9
10 &params;
11 """
12 import io
13 import json
14 import re
15 from itertools import takewhile, dropwhile
16 from typing import Optional
17
18 import jsonschema
19 import mwparserfromhell
20 from mwparserfromhell.nodes.extras import Parameter
21
22 import pywikibot
23 from mwparserfromhell.nodes import Tag, Text, ExternalLink, Template, Wikilink, Heading
24 from mwparserfromhell.wikicode import Wikicode
25 from pywikibot import pagegenerators, Page
26 from pywikibot.bot import (
27     AutomaticTWSummaryBot,
28     ConfigParserBot,
29     ExistingPageBot,
30     NoRedirectPageBot,
31     SingleSiteBot,
32 )
33 from pywikibot.logging import warning
34 from pywikibot.site._namespace import BuiltinNamespace
35 from wrpylib.json_tools import order_json_keys
36
37 from wrpylib.wrmwmarkup import create_sledrun_wiki, lonlat_to_json, lonlat_ele_to_json, parse_wrmap
38 from wrpylib.wrvalidators import rodelbahnbox_from_template, tristate_german_to_str, difficulty_german_to_str, \
39     avalanches_german_to_str, public_transport_german_to_str, opt_lonlat_from_str, \
40     opt_uint_from_str
41 from wrpylib.lib_sledrun_wikitext_to_json import optional_set, get_sledrun_description
42
43 docuReplacements = {'&params;': pagegenerators.parameterHelp}
44
45
46 def template_to_json(value: Template) -> dict:
47     parameter = []
48     for p in value.params:
49         parameter.append({'value': str(p)})
50     return {
51         'name': str(value.name),
52         'parameter': parameter
53     }
54
55
56 def wikilink_to_json(value: Wikilink) -> dict:
57     wl = {'title': str(value.title)}
58     if value.text is not None:
59         wl['text'] = str(value.text)
60     return wl
61
62
63 def external_link_to_json(value: ExternalLink) -> dict:
64     link = {'url': str(value.url)}
65     if value.title is not None:
66         link['text'] = str(value.title)
67     return link
68
69
70 class SledrunWikiTextToJsonBot(
71     SingleSiteBot,
72     ConfigParserBot,
73     ExistingPageBot,
74     NoRedirectPageBot,
75     AutomaticTWSummaryBot,
76 ):
77     def setup(self) -> None:
78         schema = Page(self.site, 'Winterrodeln:Datenschema/Rodelbahn/V1.json')
79         assert schema.content_model == 'json'
80         self.sledrun_schema = json.loads(schema.text)
81
82     def treat_page(self) -> None:
83         """Load the given page, do some changes, and save it."""
84         wikitext_content_model = 'wikitext'
85         if self.current_page.content_model != wikitext_content_model:
86             warning(f"The content model of {self.current_page.title()} is {self.current_page.content_model} "
87                     f"instead of {wikitext_content_model}.")
88             return
89
90         wikicode = mwparserfromhell.parse(self.current_page.text)
91         wikilink_list = wikicode.filter_wikilinks()
92         category_sledrun = 'Kategorie:Rodelbahn'
93         if sum(1 for c in wikilink_list if c.title == category_sledrun) == 0:
94             warning(f'The page {self.current_page.title()} does not have category {category_sledrun}.')
95             return
96
97         sledrun_json_page = Page(self.site, self.current_page.title() + '/Rodelbahn.json')
98
99         map_json_page = Page(self.site, self.current_page.title() + '/Landkarte.json')
100
101         map_json = None
102         v = wikicode.filter_tags(matches='wrmap')
103         if len(v) > 0:
104             map_json = parse_wrmap(str(v[0]))
105
106         sledrun_json = {
107             "name": self.current_page.title(),
108             "aliases": [],
109             "entry_under_construction": sum(1 for c in wikilink_list if c.text == 'Kategorie:In Arbeit') > 0,
110         }
111
112         optional_set(sledrun_json, 'description', get_sledrun_description(wikicode))
113
114         rbb_list = wikicode.filter_templates(recursive=False, matches=lambda t: t.name.strip() == 'Rodelbahnbox')
115         if len(rbb_list) == 1:
116             rbb = rodelbahnbox_from_template(rbb_list[0])
117             v = rbb['Bild']
118             if v is not None:
119                 image_page = Page(self.site, v, ns=BuiltinNamespace.FILE)
120                 if not image_page.exists():
121                     warning(f"{image_page.title()} does not exist.")
122                 sledrun_json['image'] = v
123
124             optional_set(sledrun_json, 'length', rbb['Länge'])
125
126             v = rbb['Schwierigkeit']
127             if v is not None:
128                 sledrun_json['difficulty'] = difficulty_german_to_str(v)
129
130             v = rbb['Lawinen']
131             if v is not None:
132                 sledrun_json['avalanches'] = avalanches_german_to_str(v)
133
134             v, w = rbb['Betreiber']
135             optional_set(sledrun_json, 'has_operator', v)
136             optional_set(sledrun_json, 'operator', w)
137
138             optional_set(sledrun_json, 'walkup_possible', rbb['Aufstieg möglich'])
139
140             v, w = rbb['Aufstieg getrennt']
141             if v is not None:
142                 sledrun_json['walkup_separate'] = tristate_german_to_str(v)
143             optional_set(sledrun_json, 'walkup_note', w)
144
145             optional_set(sledrun_json, 'walkup_time', rbb['Gehzeit'])
146
147             def _walkup_support():
148                 walkup_support_rbb = rbb['Aufstiegshilfe']
149                 if walkup_support_rbb is not None:
150                     walkup_supports = []
151                     for walkup_support_type, note in walkup_support_rbb:
152                         walkup_support = {'type': walkup_support_type}
153                         optional_set(walkup_support, 'note', note)
154                         walkup_supports.append(walkup_support)
155                     sledrun_json['walkup_supports'] = walkup_supports
156             _walkup_support()
157
158             v, w = rbb['Beleuchtungsanlage']
159             if v is not None:
160                 sledrun_json['nightlight_possible'] = tristate_german_to_str(v)
161             optional_set(sledrun_json, 'nightlight_possible_note', w)
162
163             v, w = rbb['Beleuchtungstage']
164             optional_set(sledrun_json, 'nightlight_weekdays_count', v)
165             optional_set(sledrun_json, 'nightlight_weekdays_note', w)
166
167             def _sled_rental():
168                 v = rbb['Rodelverleih']
169                 if v is not None:
170                     sledrun_json['sled_rental_direct'] = v != []
171                     w = []
172                     for name, note in v:
173                         x = {}
174                         name_code = mwparserfromhell.parse(name)
175                         wiki_link = next(name_code.ifilter_wikilinks(), None)
176                         if isinstance(wiki_link, Wikilink):
177                             x['wr_page'] = wikilink_to_json(wiki_link)
178                         else:
179                             x['name'] = name
180                         optional_set(x, 'note', note)
181                         w.append(x)
182                     sledrun_json['sled_rental'] = w
183             _sled_rental()
184
185             def _cachet():
186                 v = rbb['Gütesiegel']
187                 if v is not None:
188                     sledrun_json['cachet'] = len(v) > 0
189             _cachet()
190
191             optional_set(sledrun_json, 'show_in_overview', rbb['In Übersichtskarte'])
192             optional_set(sledrun_json, 'forum_id', rbb['Forumid'])
193
194             v = rbb['Position']
195             if v is not None:
196                 sledrun_json['position'] = lonlat_to_json(v)
197
198             v = lonlat_ele_to_json(rbb['Position oben'], rbb['Höhe oben'])
199             if v != {}:
200                 sledrun_json['top'] = v
201
202             v = lonlat_ele_to_json(rbb['Position unten'], rbb['Höhe unten'])
203             if v != {}:
204                 sledrun_json['bottom'] = v
205
206             v = rbb['Telefonauskunft']
207             if v is not None:
208                 sledrun_json['info_phone'] = [{'phone': p, 'name': n} for p, n in v]
209
210             v, w = rbb['Webauskunft']
211             if v is not None:
212                 if v:
213                     sledrun_json['info_web'] = [{'url': w}]
214                 else:
215                     sledrun_json['info_web'] = []
216
217             v = rbb['Öffentliche Anreise']
218             if v is not None:
219                 sledrun_json['public_transport'] = public_transport_german_to_str(v)
220
221         def _button_bar():
222             bb_iter = wikicode.ifilter_templates(recursive=False, matches=lambda t: t.name.strip() == 'Buttonleiste')
223             bb = next(bb_iter, None)
224             if bb is not None:
225                 video = bb.get('video', None)
226                 if isinstance(video, Parameter):
227                     sledrun_json['videos'] = [{'url': video.value}]
228         _button_bar()
229
230         def _public_transport():
231             pt_sections = wikicode.get_sections(levels=[2], matches='Anreise mit öffentlichen Verkehrsmitteln',
232                                                 include_headings=False)
233             if len(pt_sections) < 1:
234                 return
235             pt = pt_sections[0]
236             node = next((node for node in pt.nodes if isinstance(node, Tag) and node.wiki_markup == '*'), None)
237             if node is not None:
238                 description = str(Wikicode(pt.nodes[:pt.nodes.index(node)])).strip()
239                 if description:
240                     sledrun_json["public_transport_description"] = str(description)
241
242             public_transport_stops = []
243             public_transport_lines = []
244             public_transport_links = []
245             ya = None
246             for node in pt.nodes:
247                 if isinstance(node, Template):
248                     if node.name == 'Haltestelle':
249                         if ya is not None:
250                             public_transport_stops.append(ya)
251                         ya = {}
252                         z = node.get(1, None)
253                         if z is not None:
254                             ya['municipality'] = str(z)
255                         z = node.get(2, None)
256                         if z is not None:
257                             ya['name_local'] = str(z)
258                         za = str(node.get(3, '')).strip()
259                         zb = str(node.get(4, '')).strip()
260                         z = lonlat_ele_to_json(opt_lonlat_from_str(za), opt_uint_from_str(zb))
261                         if len(z) > 0:
262                             ya['position'] = z
263                     elif node.name in ["Fahrplan Abfahrtsmonitor VVT"]:
264                         ya['monitor_template'] = template_to_json(node)
265                     elif node.name in ["Fahrplan Hinfahrt VVT"]:
266                         ya['route_arrival_template'] = template_to_json(node)
267                     elif node.name in ["Fahrplan Rückfahrt VVT"]:
268                         ya['route_departure_template'] = template_to_json(node)
269                     elif node.name in ["Fahrplan Linie VVT"]:
270                         if ya is not None:
271                             public_transport_stops.append(ya)
272                             ya = None
273                         y = {
274                             'timetable_template': template_to_json(node),
275                         }
276                         public_transport_lines.append(y)
277                 elif isinstance(node, ExternalLink):
278                     public_transport_links.append(external_link_to_json(node))
279             if ya is not None:
280                 public_transport_stops.append(ya)
281             if len(public_transport_stops) > 0:
282                 sledrun_json['public_transport_stops'] = public_transport_stops
283             if len(public_transport_lines) > 0:
284                 sledrun_json['public_transport_lines'] = public_transport_lines
285             if len(public_transport_links) > 0:
286                 sledrun_json['public_transport_links'] = public_transport_links
287         _public_transport()
288
289         def _car():
290             car_section_list = wikicode.get_sections(levels=[2], matches='Anreise mit dem Auto')
291             if not car_section_list:
292                 return
293             v = car_section_list[0]
294
295             description_nodes = dropwhile(lambda w: isinstance(w, Heading), v.nodes)
296             description_nodes = takewhile(lambda w: not (isinstance(w, Tag) and w.wiki_markup == '*'),
297                                           description_nodes)
298             if description := str(Wikicode(list(description_nodes))).strip():
299                 sledrun_json["car_description"] = description
300
301             x = []
302             for w in v.ifilter_templates(matches='Parkplatz'):
303                 za = str(w.get(1, '')).strip()
304                 zb = str(w.get(2, '')).strip()
305                 z = lonlat_ele_to_json(opt_lonlat_from_str(za), opt_uint_from_str(zb))
306                 if len(z) > 0:
307                     x.append({'position': z})
308             if len(x) > 0:
309                 sledrun_json['car_parking'] = x
310
311             x = []
312             for w in io.StringIO(str(v)):
313                 match = re.match(r"\*\* von \'\'\'(.+)\'\'\'(.*): ([\d.,]+) km", w.rstrip())
314                 if match:
315                     ya, yb, yc = match.groups()
316                     yc = float(yc.replace(',', '.'))
317                     x.append({
318                         'km': yc,
319                         'route': (ya.strip() + ' ' + yb.strip()).strip(),
320                     })
321             if len(x) > 0:
322                 sledrun_json['car_distances'] = x
323         _car()
324
325         x = []
326         for v in wikicode.get_sections(levels=[2], matches='Allgemeines'):
327             def _nightlight(value: str) -> Optional[str]:
328                 line_iter = io.StringIO(value)
329                 line = next(line_iter, None)
330                 while line is not None and not line.startswith("* '''Beleuchtung''':"):
331                     line = next(line_iter, None)
332                 if line is None:
333                     return None
334                 line = line.replace("* '''Beleuchtung''':", "").strip()
335                 if len(line) > 0:
336                     return line
337                 return None
338             optional_set(sledrun_json, 'nightlight_description', _nightlight(str(v)))
339
340             def _gastronomy(value: str):
341                 gastronomy = []
342                 line_iter = io.StringIO(value)
343                 line = next(line_iter, None)
344                 while line is not None and line.rstrip() != "* '''Hütten''':":
345                     line = next(line_iter, None)
346                 if line is None:
347                     return gastronomy
348                 while line is not None:
349                     line = next(line_iter, None)
350                     if line is not None:
351                         if line.startswith('** '):
352                             g = {}
353                             wiki = mwparserfromhell.parse(line)
354                             wiki_link = next(wiki.ifilter_wikilinks(), None)
355                             if isinstance(wiki_link, Wikilink):
356                                 g['wr_page'] = wikilink_to_json(wiki_link)
357                             ext_link = next(wiki.ifilter_external_links(), None)
358                             if isinstance(ext_link, ExternalLink):
359                                 g['weblink'] = external_link_to_json(ext_link)
360                             remaining = str(Wikicode(n for n in wiki.nodes
361                                                      if isinstance(n, (Text, Tag)) and str(n).strip() != '*')).strip()
362                             match = re.match(r'\((.+)\)', remaining)
363                             if match:
364                                 remaining = match.group(1)
365                             if len(remaining) > 0:
366                                 g['note'] = remaining
367                             gastronomy.append(g)
368                         else:
369                             break
370                 return gastronomy
371
372             w = _gastronomy(str(v))
373             if len(w) > 0:
374                 sledrun_json['gastronomy'] = w
375
376             def _sled_rental_description():
377                 line_iter = io.StringIO(str(v))
378                 line = next(line_iter, None)
379                 match = None
380                 while line is not None and (match := re.match(r"\* '''Rodelverleih''':(.*)", line)) is None:
381                     line = next(line_iter, None)
382                 if match is None:
383                     return
384                 result = [match.group(1)]
385                 line = next(line_iter, None)
386                 while line is not None and re.match(r"\* ", line) is None:
387                     result.append(line)
388                     line = next(line_iter, None)
389                 sledrun_json['sled_rental_description'] = ''.join(result).strip()
390             _sled_rental_description()
391
392             i = iter(v.nodes)
393             w = next(i, None)
394             while w is not None:
395                 if isinstance(w, Tag) and str(w) == "'''Siehe auch'''":
396                     w = next(i, None)
397                     break
398                 w = next(i, None)
399             while w is not None:
400                 if isinstance(w, ExternalLink):
401                     x.append(external_link_to_json(w))
402                 elif isinstance(w, (Text, Tag)) and str(w).strip() in ['', '*', ':']:
403                     pass
404                 else:
405                     break
406                 w = next(i, None)
407         if len(x) > 0:
408             sledrun_json['see_also'] = x
409
410         sledrun_json['allow_reports'] = True
411
412         impressions = None
413         sledrun_impressions_page = Page(self.site, self.current_page.title() + '/Impressionen')
414         if sledrun_impressions_page.exists():
415             impressions = sledrun_impressions_page.title()
416
417         text = create_sledrun_wiki(sledrun_json, map_json, impressions)
418         pywikibot.output(text)
419         pywikibot.output('\03{lightpurple}---\03{default}')
420         pywikibot.showDiff(self.current_page.text, text)
421
422         jsonschema.validate(instance=sledrun_json, schema=self.sledrun_schema)
423         sledrun_json_ordered = order_json_keys(sledrun_json, self.sledrun_schema)
424         assert sledrun_json_ordered == sledrun_json
425         sledrun_json_text = json.dumps(sledrun_json_ordered, ensure_ascii=False, indent=4)
426         summary = 'Rodelbahnbeschreibung konvertiert von Wikitext nach JSON.'
427         pywikibot.output('\03{lightpurple}---\03{default}')
428         pywikibot.output(sledrun_json_text)
429         pywikibot.output('\03{lightpurple}---\03{default}')
430         self.userPut(sledrun_json_page, sledrun_json_page.text, sledrun_json_text, summary=summary)
431
432         map_json_text = json.dumps(map_json, ensure_ascii=False, indent=4)
433         summary = 'Landkarte konvertiert von Wikitext nach JSON.'
434         self.userPut(map_json_page, map_json_page.text, map_json_text, summary=summary)
435
436
437 def main(*args: str) -> None:
438     local_args = pywikibot.handle_args(args)
439     gen_factory = pagegenerators.GeneratorFactory()
440     gen_factory.handle_args(local_args)
441     gen = gen_factory.getCombinedGenerator(preload=True)
442     if gen:
443         bot = SledrunWikiTextToJsonBot(generator=gen)
444         bot.run()
445     else:
446         pywikibot.bot.suggest_help(missing_generator=True)
447
448
449 if __name__ == '__main__':
450     main()