]> ToastFreeware Gitweb - philipp/winterrodeln/wrpylib.git/blob - bots/sledrun_wikitext_to_json.py
More detailed parsing of sled rental.
[philipp/winterrodeln/wrpylib.git] / bots / sledrun_wikitext_to_json.py
1 #!/usr/bin/python
2 """
3 User script for pywikibot (https://gerrit.wikimedia.org/r/pywikibot/core.git), tested with version 6.6.1.
4 Put it in directory scripts/userscripts.
5
6 Create a sledrun JSON page from a sledrun wikitext page (including map).
7
8 The following generators and filters are supported:
9
10 &params;
11 """
12 import io
13 import json
14 import re
15 from typing import Any, Optional
16
17 import mwparserfromhell
18 import pywikibot
19 from mwparserfromhell.nodes import Tag, Text, ExternalLink, Template, Wikilink
20 from mwparserfromhell.wikicode import Wikicode
21 from pywikibot import pagegenerators, Page
22 from pywikibot.bot import (
23     AutomaticTWSummaryBot,
24     ConfigParserBot,
25     ExistingPageBot,
26     NoRedirectPageBot,
27     SingleSiteBot,
28 )
29 from pywikibot.logging import warning
30 from pywikibot.site._namespace import BuiltinNamespace
31
32 from wrpylib.wrmwmarkup import create_sledrun_wiki, lonlat_to_json, lonlat_ele_to_json, parse_wrmap
33 from wrpylib.wrvalidators import rodelbahnbox_from_template, tristate_german_to_str, difficulty_german_to_str, \
34     avalanches_german_to_str, public_transport_german_to_str, opt_str_opt_comment_enum_to_str, opt_lonlat_from_str, \
35     opt_uint_from_str
36
37 from pywikibot.site import Namespace
38
39 docuReplacements = {'&params;': pagegenerators.parameterHelp}
40
41
42 def str_or_none(value: Any) -> Optional[str]:
43     if value is not None:
44         return str(value)
45     return None
46
47
48 def template_to_json(value: Template) -> dict:
49     parameter = []
50     for p in value.params:
51         parameter.append({'value': str(p)})
52     return {
53         'name': str(value.name),
54         'parameter': parameter
55     }
56
57
58 def wikilink_to_json(value: Wikilink) -> dict:
59     wl = {'title': str(value.title)}
60     text = str_or_none(value.text)
61     if text is not None:
62         wl['text'] = text
63     return wl
64
65
66 class SledrunWikiTextToJsonBot(
67     SingleSiteBot,
68     ConfigParserBot,
69     ExistingPageBot,
70     NoRedirectPageBot,
71     AutomaticTWSummaryBot,
72 ):
73     def treat_page(self) -> None:
74         """Load the given page, do some changes, and save it."""
75         wikitext_content_model = 'wikitext'
76         if self.current_page.content_model != wikitext_content_model:
77             warning(f"The content model of {self.current_page.title()} is {self.current_page.content_model} "
78                     f"instead of {wikitext_content_model}.")
79             return
80
81         wikicode = mwparserfromhell.parse(self.current_page.text)
82         wikilink_list = wikicode.filter_wikilinks()
83         category_sledrun = 'Kategorie:Rodelbahn'
84         if sum(1 for c in wikilink_list if c.title == category_sledrun) == 0:
85             warning(f'The page {self.current_page.title()} does not have category {category_sledrun}.')
86             return
87
88         sledrun_json_page = Page(self.site, self.current_page.title() + '/Rodelbahn.json')
89         if sledrun_json_page.exists():
90             warning(f"{sledrun_json_page.title()} already exists, skipping {self.current_page.title()}.")
91             return
92
93         map_json_page = Page(self.site, self.current_page.title() + '/Landkarte.json')
94         if map_json_page.exists():
95             warning(f"{map_json_page.title()} already exists, skipping {self.current_page.title()}.")
96             return
97
98         map_json = None
99         v = wikicode.filter_tags(matches='wrmap')
100         if len(v) > 0:
101             map_json = parse_wrmap(str(v[0]))
102
103         sledrun_json = {
104             "name": self.current_page.title(),
105             "aliases": [],
106             "entry_under_construction": sum(1 for c in wikilink_list if c.text == 'Kategorie:In Arbeit') > 0,
107         }
108
109         for v in wikicode.get_sections(levels=[2], matches='Allgemeines'):
110             for w in v.ifilter_text(recursive=False):
111                 x = w.strip()
112                 if x:
113                     sledrun_json["description"] = str(x)
114                     break
115             break
116
117         rbb_list = wikicode.filter_templates(recursive=False, matches=lambda t: t.name.strip() == 'Rodelbahnbox')
118         if len(rbb_list) == 1:
119             rbb = rodelbahnbox_from_template(rbb_list[0])
120             v = rbb['Bild']
121             if v is not None:
122                 image_page = Page(self.site, v, ns=BuiltinNamespace.FILE)
123                 if not image_page.exists():
124                     warning(f"{image_page.title()} does not exist.")
125                 sledrun_json['image'] = v
126
127             v = rbb['Länge']
128             if v is not None:
129                 sledrun_json['length'] = v
130
131             v = rbb['Schwierigkeit']
132             if v is not None:
133                 sledrun_json['difficulty'] = difficulty_german_to_str(v)
134
135             v = rbb['Lawinen']
136             if v is not None:
137                 sledrun_json['avalanches'] = avalanches_german_to_str(v)
138
139             v, w = rbb['Betreiber']
140             if v is not None:
141                 sledrun_json['has_operator'] = v
142             if w is not None:
143                 sledrun_json['operator'] = w
144
145             v = rbb['Aufstieg möglich']
146             if v is not None:
147                 sledrun_json['walkup_possible'] = v
148
149             v, w = rbb['Aufstieg getrennt']
150             if v is not None:
151                 sledrun_json['walkup_separate'] = tristate_german_to_str(v)
152             if w is not None:
153                 sledrun_json['walkup_comment'] = w  # TODO
154
155             v = rbb['Gehzeit']
156             if v is not None:
157                 sledrun_json['walkup_time'] = v
158
159             v, w = rbb['Beleuchtungsanlage']
160             if v is not None:
161                 sledrun_json['nightlight_possible'] = tristate_german_to_str(v)
162             if w is not None:
163                 sledrun_json['nightlight_description'] = w
164
165             def _sled_rental():
166                 v = rbb['Rodelverleih']
167                 if v is not None:
168                     sledrun_json['sled_rental_direct'] = v != []
169                     w = []
170                     for name, comment in v:
171                         x = {}
172                         name_code = mwparserfromhell.parse(name)
173                         wiki_link = next(name_code.ifilter_wikilinks(), None)
174                         if isinstance(wiki_link, Wikilink):
175                             x['wr_page'] = wikilink_to_json(wiki_link)
176                         else:
177                             x['name'] = name
178                         if comment is not None:
179                             x['comment'] = comment
180                         w.append(x)
181                     sledrun_json['sled_rental'] = w
182             _sled_rental()
183
184             v = rbb['In Übersichtskarte']
185             if v is not None:
186                 sledrun_json['show_in_overview'] = v
187
188             v = rbb['Forumid']
189             if v is not None:
190                 sledrun_json['forum_id'] = v
191
192             v = rbb['Position']
193             if v is not None:
194                 sledrun_json['position'] = lonlat_to_json(v)
195
196             v = lonlat_ele_to_json(rbb['Position oben'], rbb['Höhe oben'])
197             if v != {}:
198                 sledrun_json['top'] = v
199
200             v = lonlat_ele_to_json(rbb['Position unten'], rbb['Höhe unten'])
201             if v != {}:
202                 sledrun_json['bottom'] = v
203
204             v = rbb['Telefonauskunft']
205             if v is not None:
206                 sledrun_json['info_phone'] = [{'phone': p, 'name': n} for p, n in v]
207
208             v = rbb['Öffentliche Anreise']
209             if v is not None:
210                 sledrun_json['public_transport'] = public_transport_german_to_str(v)
211
212         for v in wikicode.get_sections(levels=[2], matches='Anreise mit öffentlichen Verkehrsmitteln',
213                                        include_headings=False):
214             w = next((w for w in v.nodes if isinstance(w, Tag) and w.wiki_markup == '*'), None)
215             if w is not None:
216                 x = str(Wikicode(v.nodes[:v.nodes.index(w)])).strip()
217                 if x:
218                     sledrun_json["public_transport_description"] = str(x)
219
220             public_transport_stops = []
221             public_transport_lines = []
222             ya = None
223             for w in v.nodes:
224                 if isinstance(w, Template):
225                     if w.name == 'Haltestelle':
226                         if ya is not None:
227                             public_transport_stops.append(ya)
228                         ya = {}
229                         z = w.get(1, None)
230                         if z is not None:
231                             ya['municipality'] = str(z)
232                         z = w.get(2, None)
233                         if z is not None:
234                             ya['name_local'] = str(z)
235                         za = str_or_none(w.get(3, None))
236                         zb = str_or_none(w.get(4, None))
237                         z = lonlat_ele_to_json(opt_lonlat_from_str(za), opt_uint_from_str(zb))
238                         if len(z) > 0:
239                             ya['position'] = z
240                     elif w.name in ["Fahrplan Abfahrtsmonitor VVT"]:
241                         ya['monitor_template'] = template_to_json(w)
242                     elif w.name in ["Fahrplan Hinfahrt VVT"]:
243                         ya['route_arrival_template'] = template_to_json(w)
244                     elif w.name in ["Fahrplan Rückfahrt VVT"]:
245                         ya['route_departure_template'] = template_to_json(w)
246                     elif w.name in ["Fahrplan Linie VVT"]:
247                         if ya is not None:
248                             public_transport_stops.append(ya)
249                             ya = None
250                         y = {
251                             'timetable_template': template_to_json(w),
252                         }
253                         public_transport_lines.append(y)
254             if ya is not None:
255                 public_transport_stops.append(ya)
256             if len(public_transport_stops) > 0:
257                 sledrun_json['public_transport_stops'] = public_transport_stops
258             if len(public_transport_lines) > 0:
259                 sledrun_json['public_transport_lines'] = public_transport_lines
260             break
261
262         for v in wikicode.get_sections(levels=[2], matches='Anreise mit dem Auto'):
263             for w in v.ifilter_text(recursive=False):
264                 x = w.strip()
265                 if x:
266                     sledrun_json["car_description"] = str(x)
267                     break
268             x = []
269             for w in v.ifilter_templates(matches='Parkplatz'):
270                 za = str_or_none(w.get(1, None))
271                 zb = str_or_none(w.get(2, None))
272                 z = lonlat_ele_to_json(opt_lonlat_from_str(za), opt_uint_from_str(zb))
273                 if len(z) > 0:
274                     x.append({'position': z})
275             if len(x) > 0:
276                 sledrun_json['car_parking'] = x
277
278             x = []
279             for w in io.StringIO(str(v)):
280                 match = re.match(r"\*\* von \'\'\'(.+)\'\'\'(.*): ([\d.,]+) km", w.rstrip())
281                 if match:
282                     ya, yb, yc = match.groups()
283                     yc = float(yc.replace(',', '.'))
284                     x.append({
285                         'km': yc,
286                         'route': (ya.strip() + ' ' + yb.strip()).strip(),
287                     })
288             if len(x) > 0:
289                 sledrun_json['car_distances'] = x
290
291             x = []
292             for v in wikicode.get_sections(levels=[2], matches='Allgemeines'):
293                 def _gastronomy(value: str):
294                     gastronomy = []
295                     line_iter = io.StringIO(value)
296                     line = next(line_iter, None)
297                     while line is not None and line.rstrip() != "* '''Hütten''':":
298                         line = next(line_iter, None)
299                     if line is None:
300                         return gastronomy
301                     while line is not None:
302                         line = next(line_iter, None)
303                         if line is not None:
304                             if line.startswith('** '):
305                                 g = {}
306                                 wiki = mwparserfromhell.parse(line)
307                                 wiki_link = next(wiki.ifilter_wikilinks(), None)
308                                 if isinstance(wiki_link, Wikilink):
309                                     g['wr_page'] = wikilink_to_json(wiki_link)
310                                 ext_link = next(wiki.ifilter_external_links(), None)
311                                 if isinstance(ext_link, ExternalLink):
312                                     el = {
313                                         'url': str(ext_link.url),
314                                         'text': str(ext_link.title)
315                                     }
316                                     g['weblink'] = el
317                                 remaining = str(Wikicode(n for n in wiki.nodes
318                                                          if isinstance(n, (Text, Tag)) and str(n).strip() != '*')).\
319                                                 strip()
320                                 match = re.match(r'\((.+)\)', remaining)
321                                 if match:
322                                     remaining = match.group(1)
323                                 if len(remaining) > 0:
324                                     g['note'] = remaining
325                                 gastronomy.append(g)
326                             else:
327                                 break
328                     return gastronomy
329                 w = _gastronomy(str(v))
330                 if len(w) > 0:
331                     sledrun_json['gastronomy'] = w
332
333                 def _sled_rental_description():
334                     line_iter = io.StringIO(str(v))
335                     line = next(line_iter, None)
336                     match = None
337                     while line is not None and (match := re.match(r"\* '''Rodelverleih''':(.*)", line)) is None:
338                         line = next(line_iter, None)
339                     if match is None:
340                         return
341                     result = [match.group(1)]
342                     line = next(line_iter, None)
343                     while line is not None and re.match(r"\* ", line) is None:
344                         result.append(line)
345                         line = next(line_iter, None)
346                     sledrun_json['sled_rental_description'] = ''.join(result).strip()
347                 _sled_rental_description()
348
349                 i = iter(v.nodes)
350                 w = next(i, None)
351                 while w is not None:
352                     if isinstance(w, Tag) and str(w) == "'''Siehe auch'''":
353                         w = next(i, None)
354                         break
355                     w = next(i, None)
356                 while w is not None:
357                     if isinstance(w, ExternalLink):
358                         link = {'url': w.url}
359                         if w.title is not None:
360                             link['text'] = w.title
361                         x.append(link)
362                     elif isinstance(w, (Text, Tag)) and str(w).strip() in ['', '*', ':']:
363                         pass
364                     else:
365                         break
366                     w = next(i, None)
367             if len(x) > 0:
368                 sledrun_json['see_also'] = x
369
370             sledrun_json['allow_reports'] = True
371
372         text = create_sledrun_wiki(sledrun_json, map_json)
373         summary = 'Rodelbahnbeschreibung nach Konvertierung nach und von JSON.'
374         self.put_current(text, summary=summary)
375
376
377 def main(*args: str) -> None:
378     local_args = pywikibot.handle_args(args)
379     gen_factory = pagegenerators.GeneratorFactory()
380     gen_factory.handle_args(local_args)
381     gen = gen_factory.getCombinedGenerator(preload=True)
382     if gen:
383         bot = SledrunWikiTextToJsonBot(generator=gen)
384         bot.run()
385     else:
386         pywikibot.bot.suggest_help(missing_generator=True)
387
388
389 if __name__ == '__main__':
390     main()