]> ToastFreeware Gitweb - philipp/winterrodeln/wrpylib.git/blob - bots/sledrun_wikitext_to_json.py
493346788964bb962e7d2cd1733aeb360a9759c5
[philipp/winterrodeln/wrpylib.git] / bots / sledrun_wikitext_to_json.py
1 #!/usr/bin/python
2 """
3 User script for pywikibot (https://gerrit.wikimedia.org/r/pywikibot/core.git), tested with version 6.6.1.
4 Put it in directory scripts/userscripts.
5
6 Create a sledrun JSON page from a sledrun wikitext page (including map).
7
8 The following generators and filters are supported:
9
10 &params;
11 """
12 import io
13 import json
14 import re
15 from typing import Any, Optional
16
17 import mwparserfromhell
18 import pywikibot
19 from mwparserfromhell.nodes import Tag, Text, ExternalLink, Template
20 from mwparserfromhell.wikicode import Wikicode
21 from pywikibot import pagegenerators, Page
22 from pywikibot.bot import (
23     AutomaticTWSummaryBot,
24     ConfigParserBot,
25     ExistingPageBot,
26     NoRedirectPageBot,
27     SingleSiteBot,
28 )
29 from pywikibot.logging import warning
30 from pywikibot.site._namespace import BuiltinNamespace
31
32 from wrpylib.wrmwmarkup import create_sledrun_wiki, lonlat_to_json, lonlat_ele_to_json, parse_wrmap
33 from wrpylib.wrvalidators import rodelbahnbox_from_template, tristate_german_to_str, difficulty_german_to_str, \
34     avalanches_german_to_str, public_transport_german_to_str, opt_str_opt_comment_enum_to_str, opt_lonlat_from_str, \
35     opt_uint_from_str
36
37 from pywikibot.site import Namespace
38
39 docuReplacements = {'&params;': pagegenerators.parameterHelp}
40
41
42 def str_or_none(value: Any) -> Optional[str]:
43     if value is not None:
44         return str(value)
45     return None
46
47
48 def template_to_json(value: Template) -> dict:
49     parameter = []
50     for p in value.params:
51         parameter.append({'value': str(p)})
52     return {
53         'name': str(value.name),
54         'parameter': parameter
55     }
56
57
58 class SledrunWikiTextToJsonBot(
59     SingleSiteBot,
60     ConfigParserBot,
61     ExistingPageBot,
62     NoRedirectPageBot,
63     AutomaticTWSummaryBot,
64 ):
65     def treat_page(self) -> None:
66         """Load the given page, do some changes, and save it."""
67         wikitext_content_model = 'wikitext'
68         if self.current_page.content_model != wikitext_content_model:
69             warning(f"The content model of {self.current_page.title()} is {self.current_page.content_model} "
70                     f"instead of {wikitext_content_model}.")
71             return
72
73         wikicode = mwparserfromhell.parse(self.current_page.text)
74         wikilink_list = wikicode.filter_wikilinks()
75         category_sledrun = 'Kategorie:Rodelbahn'
76         if sum(1 for c in wikilink_list if c.title == category_sledrun) == 0:
77             warning(f'The page {self.current_page.title()} does not have category {category_sledrun}.')
78             return
79
80         sledrun_json_page = Page(self.site, self.current_page.title() + '/Rodelbahn.json')
81         if sledrun_json_page.exists():
82             warning(f"{sledrun_json_page.title()} already exists, skipping {self.current_page.title()}.")
83             return
84
85         map_json_page = Page(self.site, self.current_page.title() + '/Landkarte.json')
86         if map_json_page.exists():
87             warning(f"{map_json_page.title()} already exists, skipping {self.current_page.title()}.")
88             return
89
90         map_json = None
91         v = wikicode.filter_tags(matches='wrmap')
92         if len(v) > 0:
93             map_json = parse_wrmap(str(v[0]))
94
95         sledrun_json = {
96             "name": self.current_page.title(),
97             "aliases": [],
98             "entry_under_construction": sum(1 for c in wikilink_list if c.text == 'Kategorie:In Arbeit') > 0,
99         }
100
101         for v in wikicode.get_sections(levels=[2], matches='Allgemeines'):
102             for w in v.ifilter_text(recursive=False):
103                 x = w.strip()
104                 if x:
105                     sledrun_json["description"] = str(x)
106                     break
107             break
108
109         rbb_list = wikicode.filter_templates(recursive=False, matches=lambda t: t.name.strip() == 'Rodelbahnbox')
110         if len(rbb_list) == 1:
111             rbb = rodelbahnbox_from_template(rbb_list[0])
112             v = rbb['Bild']
113             if v is not None:
114                 image_page = Page(self.site, v, ns=BuiltinNamespace.FILE)
115                 if not image_page.exists():
116                     warning(f"{image_page.title()} does not exist.")
117                 sledrun_json['image'] = v
118
119             v = rbb['Länge']
120             if v is not None:
121                 sledrun_json['length'] = v
122
123             v = rbb['Schwierigkeit']
124             if v is not None:
125                 sledrun_json['difficulty'] = difficulty_german_to_str(v)
126
127             v = rbb['Lawinen']
128             if v is not None:
129                 sledrun_json['avalanches'] = avalanches_german_to_str(v)
130
131             v, w = rbb['Betreiber']
132             if v is not None:
133                 sledrun_json['has_operator'] = v
134             if w is not None:
135                 sledrun_json['operator'] = w
136
137             v = rbb['Aufstieg möglich']
138             if v is not None:
139                 sledrun_json['walkup_possible'] = v
140
141             v, w = rbb['Aufstieg getrennt']
142             if v is not None:
143                 sledrun_json['walkup_separate'] = tristate_german_to_str(v)
144             if w is not None:
145                 sledrun_json['walkup_comment'] = w  # TODO
146
147             v = rbb['Gehzeit']
148             if v is not None:
149                 sledrun_json['walkup_time'] = v
150
151             v, w = rbb['Beleuchtungsanlage']
152             if v is not None:
153                 sledrun_json['nightlight_possible'] = tristate_german_to_str(v)
154             if w is not None:
155                 sledrun_json['nightlight_description'] = w
156
157             v = rbb['Rodelverleih']
158             if v is not None:
159                 sledrun_json['sled_rental_direct'] = v != []
160                 sledrun_json['sled_rental_description'] = opt_str_opt_comment_enum_to_str(v)
161
162             v = rbb['In Übersichtskarte']
163             if v is not None:
164                 sledrun_json['show_in_overview'] = v
165
166             v = rbb['Forumid']
167             if v is not None:
168                 sledrun_json['forum_id'] = v
169
170             v = rbb['Position']
171             if v is not None:
172                 sledrun_json['position'] = lonlat_to_json(v)
173
174             v = lonlat_ele_to_json(rbb['Position oben'], rbb['Höhe oben'])
175             if v != {}:
176                 sledrun_json['top'] = v
177
178             v = lonlat_ele_to_json(rbb['Position unten'], rbb['Höhe unten'])
179             if v != {}:
180                 sledrun_json['bottom'] = v
181
182             v = rbb['Telefonauskunft']
183             if v is not None:
184                 sledrun_json['info_phone'] = [{'phone': p, 'name': n} for p, n in v]
185
186             v = rbb['Öffentliche Anreise']
187             if v is not None:
188                 sledrun_json['public_transport'] = public_transport_german_to_str(v)
189
190         for v in wikicode.get_sections(levels=[2], matches='Anreise mit öffentlichen Verkehrsmitteln',
191                                        include_headings=False):
192             w = next((w for w in v.nodes if isinstance(w, Tag) and w.wiki_markup == '*'), None)
193             if w is not None:
194                 x = str(Wikicode(v.nodes[:v.nodes.index(w)])).strip()
195                 if x:
196                     sledrun_json["public_transport_description"] = str(x)
197
198             public_transport_stops = []
199             public_transport_lines = []
200             ya = None
201             for w in v.nodes:
202                 if isinstance(w, Template):
203                     if w.name == 'Haltestelle':
204                         if ya is not None:
205                             public_transport_stops.append(ya)
206                         ya = {}
207                         z = w.get(1, None)
208                         if z is not None:
209                             ya['municipality'] = str(z)
210                         z = w.get(2, None)
211                         if z is not None:
212                             ya['name_local'] = str(z)
213                         za = str_or_none(w.get(3, None))
214                         zb = str_or_none(w.get(4, None))
215                         z = lonlat_ele_to_json(opt_lonlat_from_str(za), opt_uint_from_str(zb))
216                         if len(z) > 0:
217                             ya['position'] = z
218                     elif w.name in ["Fahrplan Abfahrtsmonitor VVT"]:
219                         ya['monitor_template'] = template_to_json(w)
220                     elif w.name in ["Fahrplan Hinfahrt VVT"]:
221                         ya['route_arrival_template'] = template_to_json(w)
222                     elif w.name in ["Fahrplan Rückfahrt VVT"]:
223                         ya['route_departure_template'] = template_to_json(w)
224                     elif w.name in ["Fahrplan Linie VVT"]:
225                         if ya is not None:
226                             public_transport_stops.append(ya)
227                             ya = None
228                         y = {
229                             'timetable_template': template_to_json(w),
230                         }
231                         public_transport_lines.append(y)
232             if ya is not None:
233                 public_transport_stops.append(ya)
234             if len(public_transport_stops) > 0:
235                 sledrun_json['public_transport_stops'] = public_transport_stops
236             if len(public_transport_lines) > 0:
237                 sledrun_json['public_transport_lines'] = public_transport_lines
238             break
239
240         for v in wikicode.get_sections(levels=[2], matches='Anreise mit dem Auto'):
241             for w in v.ifilter_text(recursive=False):
242                 x = w.strip()
243                 if x:
244                     sledrun_json["car_description"] = str(x)
245                     break
246             x = []
247             for w in v.ifilter_templates(matches='Parkplatz'):
248                 za = str_or_none(w.get(1, None))
249                 zb = str_or_none(w.get(2, None))
250                 z = lonlat_ele_to_json(opt_lonlat_from_str(za), opt_uint_from_str(zb))
251                 if len(z) > 0:
252                     x.append({'position': z})
253             if len(x) > 0:
254                 sledrun_json['car_parking'] = x
255
256             x = []
257             for w in io.StringIO(str(v)):
258                 match = re.match(r"\*\* von \'\'\'(.+)\'\'\'(.*): ([\d.,]+) km", w.rstrip())
259                 if match:
260                     ya, yb, yc = match.groups()
261
262                     yc = float(yc.replace(',', '.'))
263                     x.append({
264                         'km': yc,
265                         'route': (ya.strip() + ' ' + yb.strip()).strip(),
266                     })
267             if len(x) > 0:
268                 sledrun_json['car_distances'] = x
269
270             x = []
271             for v in wikicode.get_sections(levels=[2], matches='Allgemeines'):
272                 i = iter(v.nodes)
273                 w = next(i, None)
274                 while w is not None:
275                     if isinstance(w, Tag) and str(w) == "'''Siehe auch'''":
276                         w = next(i, None)
277                         break
278                     w = next(i, None)
279                 while w is not None:
280                     if isinstance(w, ExternalLink):
281                         link = {'url': w.url}
282                         if w.title is not None:
283                             link['text'] = w.title
284                         x.append(link)
285                     elif isinstance(w, (Text, Tag)) and str(w).strip() in ['', '*', ':']:
286                         pass
287                     else:
288                         break
289                     w = next(i, None)
290             if len(x) > 0:
291                 sledrun_json['see_also'] = x
292
293             sledrun_json['allow_reports'] = True
294
295         text = create_sledrun_wiki(sledrun_json, map_json)
296         summary = 'Rodelbahnbeschreibung nach Konvertierung nach und von JSON.'
297         self.put_current(text, summary=summary)
298
299
300 def main(*args: str) -> None:
301     local_args = pywikibot.handle_args(args)
302     gen_factory = pagegenerators.GeneratorFactory()
303     gen_factory.handle_args(local_args)
304     gen = gen_factory.getCombinedGenerator(preload=True)
305     if gen:
306         bot = SledrunWikiTextToJsonBot(generator=gen)
307         bot.run()
308     else:
309         pywikibot.bot.suggest_help(missing_generator=True)
310
311
312 if __name__ == '__main__':
313     main()