]> ToastFreeware Gitweb - philipp/winterrodeln/wrpylib.git/blob - bots/sledrun_wikitext_to_json.py
Parse car_distances.
[philipp/winterrodeln/wrpylib.git] / bots / sledrun_wikitext_to_json.py
1 #!/usr/bin/python
2 """
3 User script for pywikibot (https://gerrit.wikimedia.org/r/pywikibot/core.git), tested with version 6.6.1.
4 Put it in directory scripts/userscripts.
5
6 Create a sledrun JSON page from a sledrun wikitext page (including map).
7
8 The following generators and filters are supported:
9
10 &params;
11 """
12 import io
13 import json
14 import re
15 from typing import Any, Optional
16
17 import mwparserfromhell
18 import pywikibot
19 from mwparserfromhell.nodes import Tag, Text, ExternalLink, Template
20 from mwparserfromhell.wikicode import Wikicode
21 from pywikibot import pagegenerators, Page
22 from pywikibot.bot import (
23     AutomaticTWSummaryBot,
24     ConfigParserBot,
25     ExistingPageBot,
26     NoRedirectPageBot,
27     SingleSiteBot,
28 )
29 from pywikibot.logging import warning
30 from pywikibot.site._namespace import BuiltinNamespace
31
32 from wrpylib.wrmwmarkup import create_sledrun_wiki, lonlat_to_json, lonlat_ele_to_json, parse_wrmap
33 from wrpylib.wrvalidators import rodelbahnbox_from_template, tristate_german_to_str, difficulty_german_to_str, \
34     avalanches_german_to_str, public_transport_german_to_str, opt_str_opt_comment_enum_to_str, opt_lonlat_from_str, \
35     opt_uint_from_str
36
37 from pywikibot.site import Namespace
38
39 docuReplacements = {'&params;': pagegenerators.parameterHelp}
40
41
42 def str_or_none(value: Any) -> Optional[str]:
43     if value is not None:
44         return str(value)
45     return None
46
47
48 def template_to_json(value: Template) -> dict:
49     parameter = []
50     for p in value.params:
51         parameter.append({'value': str(p)})
52     return {
53         'name': str(value.name),
54         'parameter': parameter
55     }
56
57
58 class SledrunWikiTextToJsonBot(
59     SingleSiteBot,
60     ConfigParserBot,
61     ExistingPageBot,
62     NoRedirectPageBot,
63     AutomaticTWSummaryBot,
64 ):
65     def treat_page(self) -> None:
66         """Load the given page, do some changes, and save it."""
67         wikitext_content_model = 'wikitext'
68         if self.current_page.content_model != wikitext_content_model:
69             warning(f"The content model of {self.current_page.title()} is {self.current_page.content_model} "
70                     f"instead of {wikitext_content_model}.")
71             return
72
73         wikicode = mwparserfromhell.parse(self.current_page.text)
74         wikilink_list = wikicode.filter_wikilinks()
75         category_sledrun = 'Kategorie:Rodelbahn'
76         if sum(1 for c in wikilink_list if c.title == category_sledrun) == 0:
77             warning(f'The page {self.current_page.title()} does not have category {category_sledrun}.')
78             return
79
80         sledrun_json_page = Page(self.site, self.current_page.title() + '/Rodelbahn.json')
81         if sledrun_json_page.exists():
82             warning(f"{sledrun_json_page.title()} already exists, skipping {self.current_page.title()}.")
83             return
84
85         map_json_page = Page(self.site, self.current_page.title() + '/Landkarte.json')
86         if map_json_page.exists():
87             warning(f"{map_json_page.title()} already exists, skipping {self.current_page.title()}.")
88             return
89
90         map_json = None
91         v = wikicode.filter_tags(matches='wrmap')
92         if len(v) > 0:
93             map_json = parse_wrmap(str(v[0]))
94
95         sledrun_json = {
96             "name": self.current_page.title(),
97             "aliases": [],
98             "entry_under_construction": sum(1 for c in wikilink_list if c.text == 'Kategorie:In Arbeit') > 0,
99         }
100
101         for v in wikicode.get_sections(levels=[2], matches='Allgemeines'):
102             for w in v.ifilter_text(recursive=False):
103                 x = w.strip()
104                 if x:
105                     sledrun_json["description"] = str(x)
106                     break
107             break
108
109         rbb_list = wikicode.filter_templates(recursive=False, matches=lambda t: t.name.strip() == 'Rodelbahnbox')
110         if len(rbb_list) == 1:
111             rbb = rodelbahnbox_from_template(rbb_list[0])
112             v = rbb['Bild']
113             if v is not None:
114                 image_page = Page(self.site, v, ns=BuiltinNamespace.FILE)
115                 if not image_page.exists():
116                     warning(f"{image_page.title()} does not exist.")
117                 sledrun_json['image'] = v
118
119             v = rbb['Länge']
120             if v is not None:
121                 sledrun_json['length'] = v
122
123             v = rbb['Schwierigkeit']
124             if v is not None:
125                 sledrun_json['difficulty'] = difficulty_german_to_str(v)
126
127             v = rbb['Lawinen']
128             if v is not None:
129                 sledrun_json['avalanches'] = avalanches_german_to_str(v)
130
131             v, w = rbb['Betreiber']
132             if v is not None:
133                 sledrun_json['has_operator'] = v
134             if w is not None:
135                 sledrun_json['operator'] = w
136
137             v = rbb['Aufstieg möglich']
138             if v is not None:
139                 sledrun_json['walkup_possible'] = v
140
141             v, w = rbb['Aufstieg getrennt']
142             if v is not None:
143                 sledrun_json['walkup_separate'] = tristate_german_to_str(v)
144             if w is not None:
145                 sledrun_json['walkup_comment'] = w  # TODO
146
147             v = rbb['Gehzeit']
148             if v is not None:
149                 sledrun_json['walkup_time'] = v
150
151             v, w = rbb['Beleuchtungsanlage']
152             if v is not None:
153                 sledrun_json['nightlight_possible'] = tristate_german_to_str(v)
154             if w is not None:
155                 sledrun_json['nightlight_description'] = w
156
157             v = rbb['Rodelverleih']
158             if v is not None:
159                 sledrun_json['sled_rental_direct'] = v != []
160                 sledrun_json['sled_rental_description'] = opt_str_opt_comment_enum_to_str(v)
161
162             v = rbb['In Übersichtskarte']
163             if v is not None:
164                 sledrun_json['show_in_overview'] = v
165
166             v = rbb['Forumid']
167             if v is not None:
168                 sledrun_json['forum_id'] = v
169
170             v = rbb['Position']
171             if v is not None:
172                 sledrun_json['position'] = lonlat_to_json(v)
173
174             v = lonlat_ele_to_json(rbb['Position oben'], rbb['Höhe oben'])
175             if v != {}:
176                 sledrun_json['top'] = v
177
178             v = lonlat_ele_to_json(rbb['Position unten'], rbb['Höhe unten'])
179             if v != {}:
180                 sledrun_json['bottom'] = v
181
182             v = rbb['Telefonauskunft']
183             if v is not None:
184                 sledrun_json['info_phone'] = [{'phone': p, 'name': n} for p, n in v]
185
186             v = rbb['Öffentliche Anreise']
187             if v is not None:
188                 sledrun_json['public_transport'] = public_transport_german_to_str(v)
189
190             for v in wikicode.get_sections(levels=[2], matches='Anreise mit öffentlichen Verkehrsmitteln',
191                                            include_headings=False):
192                 w = next((w for w in v.nodes if isinstance(w, Tag) and w.wiki_markup == '*'), None)
193                 if w is not None:
194                     x = str(Wikicode(v.nodes[:v.nodes.index(w)])).strip()
195                     if x:
196                         sledrun_json["public_transport_description"] = str(x)
197
198                 public_transport_stops = []
199                 public_transport_lines = []
200                 ya = None
201                 for w in v.nodes:
202                     if isinstance(w, Template):
203                         if w.name == 'Haltestelle':
204                             if ya is not None:
205                                 public_transport_stops.append(ya)
206                             ya = {}
207                             z = w.get(1, None)
208                             if z is not None:
209                                 ya['municipality'] = str(z)
210                             z = w.get(2, None)
211                             if z is not None:
212                                 ya['name_local'] = str(z)
213                             za = str_or_none(w.get(3, None))
214                             zb = str_or_none(w.get(4, None))
215                             z = lonlat_ele_to_json(opt_lonlat_from_str(za), opt_uint_from_str(zb))
216                             if len(z) > 0:
217                                 ya['position'] = z
218                         elif w.name in ["Fahrplan Abfahrtsmonitor VVT"]:
219                             ya['monitor_template'] = template_to_json(w)
220                         elif w.name in ["Fahrplan Hinfahrt VVT"]:
221                             ya['route_arrival_template'] = template_to_json(w)
222                         elif w.name in ["Fahrplan Rückfahrt VVT"]:
223                             ya['route_departure_template'] = template_to_json(w)
224                         elif w.name in ["Fahrplan Linie VVT"]:
225                             if ya is not None:
226                                 public_transport_stops.append(ya)
227                                 ya = None
228                             y = {
229                                 'timetable_template': template_to_json(w),
230                             }
231                             public_transport_lines.append(y)
232                 if ya is not None:
233                     public_transport_stops.append(ya)
234                 if len(public_transport_stops) > 0:
235                     sledrun_json['public_transport_stops'] = public_transport_stops
236                 if len(public_transport_lines) > 0:
237                     sledrun_json['public_transport_lines'] = public_transport_lines
238                 break
239
240             for v in wikicode.get_sections(levels=[2], matches='Anreise mit dem Auto'):
241                 for w in v.ifilter_text(recursive=False):
242                     x = w.strip()
243                     if x:
244                         sledrun_json["car_description"] = str(x)
245                         break
246                 x = []
247                 for w in v.ifilter_templates(matches='Parkplatz'):
248                     za = str_or_none(w.get(1, None))
249                     zb = str_or_none(w.get(2, None))
250                     z = lonlat_ele_to_json(opt_lonlat_from_str(za), opt_uint_from_str(zb))
251                     if len(z) > 0:
252                         x.append({'position': z})
253                 if len(x) > 0:
254                     sledrun_json['car_parking'] = x
255
256                 x = []
257                 for w in io.StringIO(str(v)):
258                     match = re.match(r"\*\* von \'\'\'(.+)\'\'\'(.*): ([\d.,]+) km", w.rstrip())
259                     if match:
260                         ya, yb, yc = match.groups()
261
262                         yc = float(yc.replace(',', '.'))
263                         x.append({
264                             'km': yc,
265                             'route': (ya.strip() + ' ' + yb.strip()).strip(),
266                         })
267                 if len(x) > 0:
268                     sledrun_json['car_distances'] = x
269
270             x = []
271             for v in wikicode.get_sections(levels=[2], matches='Allgemeines'):
272                 i = iter(v.nodes)
273                 w = next(i, None)
274                 while w is not None:
275                     if isinstance(w, Tag) and str(w) == "'''Siehe auch'''":
276                         w = next(i, None)
277                         break
278                     w = next(i, None)
279                 while w is not None:
280                     if isinstance(w, ExternalLink):
281                         link = {'url': w.url}
282                         if w.title is not None:
283                             link['text'] = w.title
284                         x.append(link)
285                     elif isinstance(w, (Text, Tag)) and str(w).strip() in ['', '*', ':']:
286                         pass
287                     else:
288                         break
289                     w = next(i, None)
290             if len(x) > 0:
291                 sledrun_json['see_also'] = x
292
293             sledrun_json['allow_reports'] = True
294
295         text = create_sledrun_wiki(sledrun_json, map_json)
296         summary = 'Rodelbahnbeschreibung nach Konvertierung nach und von JSON.'
297         self.put_current(text, summary=summary)
298
299
300 def main(*args: str) -> None:
301     local_args = pywikibot.handle_args(args)
302     gen_factory = pagegenerators.GeneratorFactory()
303     gen_factory.handle_args(local_args)
304     gen = gen_factory.getCombinedGenerator(preload=True)
305     if gen:
306         bot = SledrunWikiTextToJsonBot(generator=gen)
307         bot.run()
308     else:
309         pywikibot.bot.suggest_help(missing_generator=True)
310
311
312 if __name__ == '__main__':
313     main()