]> ToastFreeware Gitweb - philipp/winterrodeln/wrpylib.git/blob - bots/sledrun_wikitext_to_json.py
Parse car description, public transport description and bus stop.
[philipp/winterrodeln/wrpylib.git] / bots / sledrun_wikitext_to_json.py
1 #!/usr/bin/python
2 """
3 User script for pywikibot (https://gerrit.wikimedia.org/r/pywikibot/core.git), tested with version 6.6.1.
4 Put it in directory scripts/userscripts.
5
6 Create a sledrun JSON page from a sledrun wikitext page (including map).
7
8 The following generators and filters are supported:
9
10 &params;
11 """
12 import json
13 from typing import Any, Optional
14
15 import mwparserfromhell
16 import pywikibot
17 from mwparserfromhell.nodes import Tag, Text, ExternalLink, Template
18 from mwparserfromhell.wikicode import Wikicode
19 from pywikibot import pagegenerators, Page
20 from pywikibot.bot import (
21     AutomaticTWSummaryBot,
22     ConfigParserBot,
23     ExistingPageBot,
24     NoRedirectPageBot,
25     SingleSiteBot,
26 )
27 from pywikibot.logging import warning
28 from pywikibot.site._namespace import BuiltinNamespace
29
30 from wrpylib.wrmwmarkup import create_sledrun_wiki, lonlat_to_json, lonlat_ele_to_json, parse_wrmap
31 from wrpylib.wrvalidators import rodelbahnbox_from_template, tristate_german_to_str, difficulty_german_to_str, \
32     avalanches_german_to_str, public_transport_german_to_str, opt_str_opt_comment_enum_to_str, opt_lonlat_from_str, \
33     opt_uint_from_str
34
35 from pywikibot.site import Namespace
36
37 docuReplacements = {'&params;': pagegenerators.parameterHelp}
38
39
40 def str_or_none(value: Any) -> Optional[str]:
41     if value is not None:
42         return str(value)
43     return None
44
45
46
47 class SledrunWikiTextToJsonBot(
48     SingleSiteBot,
49     ConfigParserBot,
50     ExistingPageBot,
51     NoRedirectPageBot,
52     AutomaticTWSummaryBot,
53 ):
54     def treat_page(self) -> None:
55         """Load the given page, do some changes, and save it."""
56         wikitext_content_model = 'wikitext'
57         if self.current_page.content_model != wikitext_content_model:
58             warning(f"The content model of {self.current_page.title()} is {self.current_page.content_model} "
59                     f"instead of {wikitext_content_model}.")
60             return
61
62         wikicode = mwparserfromhell.parse(self.current_page.text)
63         wikilink_list = wikicode.filter_wikilinks()
64         category_sledrun = 'Kategorie:Rodelbahn'
65         if sum(1 for c in wikilink_list if c.title == category_sledrun) == 0:
66             warning(f'The page {self.current_page.title()} does not have category {category_sledrun}.')
67             return
68
69         sledrun_json_page = Page(self.site, self.current_page.title() + '/Rodelbahn.json')
70         if sledrun_json_page.exists():
71             warning(f"{sledrun_json_page.title()} already exists, skipping {self.current_page.title()}.")
72             return
73
74         map_json_page = Page(self.site, self.current_page.title() + '/Landkarte.json')
75         if map_json_page.exists():
76             warning(f"{map_json_page.title()} already exists, skipping {self.current_page.title()}.")
77             return
78
79         map_json = None
80         v = wikicode.filter_tags(matches='wrmap')
81         if len(v) > 0:
82             map_json = parse_wrmap(str(v[0]))
83
84         sledrun_json = {
85             "name": self.current_page.title(),
86             "aliases": [],
87             "entry_under_construction": sum(1 for c in wikilink_list if c.text == 'Kategorie:In Arbeit') > 0,
88         }
89
90         for v in wikicode.get_sections(levels=[2], matches='Allgemeines'):
91             for w in v.ifilter_text(recursive=False):
92                 x = w.strip()
93                 if x:
94                     sledrun_json["description"] = str(x)
95                     break
96             break
97
98         rbb_list = wikicode.filter_templates(recursive=False, matches=lambda t: t.name.strip() == 'Rodelbahnbox')
99         if len(rbb_list) == 1:
100             rbb = rodelbahnbox_from_template(rbb_list[0])
101             v = rbb['Bild']
102             if v is not None:
103                 image_page = Page(self.site, v, ns=BuiltinNamespace.FILE)
104                 if not image_page.exists():
105                     warning(f"{image_page.title()} does not exist.")
106                 sledrun_json['image'] = v
107
108             v = rbb['Länge']
109             if v is not None:
110                 sledrun_json['length'] = v
111
112             v = rbb['Schwierigkeit']
113             if v is not None:
114                 sledrun_json['difficulty'] = difficulty_german_to_str(v)
115
116             v = rbb['Lawinen']
117             if v is not None:
118                 sledrun_json['avalanches'] = avalanches_german_to_str(v)
119
120             v, w = rbb['Betreiber']
121             if v is not None:
122                 sledrun_json['has_operator'] = v
123             if w is not None:
124                 sledrun_json['operator'] = w
125
126             v = rbb['Aufstieg möglich']
127             if v is not None:
128                 sledrun_json['walkup_possible'] = v
129
130             v, w = rbb['Aufstieg getrennt']
131             if v is not None:
132                 sledrun_json['walkup_separate'] = tristate_german_to_str(v)
133             if w is not None:
134                 sledrun_json['walkup_comment'] = w  # TODO
135
136             v = rbb['Gehzeit']
137             if v is not None:
138                 sledrun_json['walkup_time'] = v
139
140             v, w = rbb['Beleuchtungsanlage']
141             if v is not None:
142                 sledrun_json['nightlight_possible'] = tristate_german_to_str(v)
143             if w is not None:
144                 sledrun_json['nightlight_description'] = w
145
146             v = rbb['Rodelverleih']
147             if v is not None:
148                 sledrun_json['sled_rental_direct'] = v != []
149                 sledrun_json['sled_rental_description'] = opt_str_opt_comment_enum_to_str(v)
150
151             v = rbb['In Übersichtskarte']
152             if v is not None:
153                 sledrun_json['show_in_overview'] = v
154
155             v = rbb['Forumid']
156             if v is not None:
157                 sledrun_json['forum_id'] = v
158
159             v = rbb['Position']
160             if v is not None:
161                 sledrun_json['position'] = lonlat_to_json(v)
162
163             v = lonlat_ele_to_json(rbb['Position oben'], rbb['Höhe oben'])
164             if v != {}:
165                 sledrun_json['top'] = v
166
167             v = lonlat_ele_to_json(rbb['Position unten'], rbb['Höhe unten'])
168             if v != {}:
169                 sledrun_json['bottom'] = v
170
171             v = rbb['Telefonauskunft']
172             if v is not None:
173                 sledrun_json['info_phone'] = [{'phone': p, 'name': n} for p, n in v]
174
175             v = rbb['Öffentliche Anreise']
176             if v is not None:
177                 sledrun_json['public_transport'] = public_transport_german_to_str(v)
178
179             for v in wikicode.get_sections(levels=[2], matches='Anreise mit öffentlichen Verkehrsmitteln',
180                                            include_headings=False):
181                 w = next((w for w in v.nodes if isinstance(w, Tag) and w.wiki_markup == '*'), None)
182                 if w is not None:
183                     x = str(Wikicode(v.nodes[:v.nodes.index(w)])).strip()
184                     if x:
185                         sledrun_json["public_transport_description"] = str(x)
186                 x = []
187                 y = None
188                 for w in v.nodes:
189                     if isinstance(w, Template):
190                         if w.name == 'Haltestelle':
191                             if y is not None:
192                                 x.append(y)
193                             y = {}
194                             z = w.get(1, None)
195                             if z is not None:
196                                 y['municipality'] = str(z)
197                             z = w.get(2, None)
198                             if z is not None:
199                                 y['name_local'] = str(z)
200                             za = str_or_none(w.get(3, None))
201                             zb = str_or_none(w.get(4, None))
202                             z = lonlat_ele_to_json(opt_lonlat_from_str(za), opt_uint_from_str(zb))
203                             if len(z) > 0:
204                                 y['position'] = z
205                 if y is not None:
206                     x.append(y)
207                 if len(x) > 0:
208                     sledrun_json['public_transport_stops'] = x
209                 break
210
211             for v in wikicode.get_sections(levels=[2], matches='Anreise mit dem Auto'):
212                 for w in v.ifilter_text(recursive=False):
213                     x = w.strip()
214                     if x:
215                         sledrun_json["car_description"] = str(x)
216                         break
217                 break
218
219             x = []
220             for v in wikicode.get_sections(levels=[2], matches='Allgemeines'):
221                 i = iter(v.nodes)
222                 w = next(i, None)
223                 while w is not None:
224                     if isinstance(w, Tag) and str(w) == "'''Siehe auch'''":
225                         w = next(i, None)
226                         break
227                     w = next(i, None)
228                 while w is not None:
229                     if isinstance(w, ExternalLink):
230                         link = {'url': w.url}
231                         if w.title is not None:
232                             link['text'] = w.title
233                         x.append(link)
234                     elif isinstance(w, (Text, Tag)) and str(w).strip() in ['', '*', ':']:
235                         pass
236                     else:
237                         break
238                     w = next(i, None)
239             if len(x) > 0:
240                 sledrun_json['see_also'] = x
241
242             sledrun_json['allow_reports'] = True
243
244         text = create_sledrun_wiki(sledrun_json, map_json)
245         summary = 'Rodelbahnbeschreibung nach Konvertierung nach und von JSON.'
246         self.put_current(text, summary=summary)
247
248
249 def main(*args: str) -> None:
250     local_args = pywikibot.handle_args(args)
251     gen_factory = pagegenerators.GeneratorFactory()
252     gen_factory.handle_args(local_args)
253     gen = gen_factory.getCombinedGenerator(preload=True)
254     if gen:
255         bot = SledrunWikiTextToJsonBot(generator=gen)
256         bot.run()
257     else:
258         pywikibot.bot.suggest_help(missing_generator=True)
259
260
261 if __name__ == '__main__':
262     main()