From: Philipp Spitzer Date: Mon, 6 Jun 2022 21:18:45 +0000 (+0200) Subject: Temporary helper scripts that won't be needed for long... X-Git-Url: https://git.toastfreeware.priv.at/philipp/winterrodeln/wrpylib.git/commitdiff_plain/36db5daf31a06ca9ac4412cc5dfe992bd0a1a223 Temporary helper scripts that won't be needed for long... --- diff --git a/bots/update_sledrun_json_from_wikitext_gastronomy.py b/bots/update_sledrun_json_from_wikitext_gastronomy.py new file mode 100644 index 0000000..6f3619d --- /dev/null +++ b/bots/update_sledrun_json_from_wikitext_gastronomy.py @@ -0,0 +1,143 @@ +#!/usr/bin/python +""" +User script for pywikibot (https://gerrit.wikimedia.org/r/pywikibot/core.git), tested with version 6.6.1. +Put it in directory scripts/userscripts. + +Update a sledrun JSON page from a detail in a sledrun wikitext page. + +The following generators and filters are supported: + +¶ms; +""" +import io +import json +import re +from itertools import takewhile, dropwhile +from typing import Optional + +import jsonschema +import mwparserfromhell +from mwparserfromhell.nodes.extras import Parameter + +import pywikibot +from mwparserfromhell.nodes import Tag, Text, ExternalLink, Template, Wikilink, Heading +from mwparserfromhell.wikicode import Wikicode +from pywikibot import pagegenerators, Page +from pywikibot.bot import ( + AutomaticTWSummaryBot, + ConfigParserBot, + ExistingPageBot, + NoRedirectPageBot, + SingleSiteBot, +) +from pywikibot.logging import warning +from pywikibot.site._namespace import BuiltinNamespace +from wrpylib.json_tools import order_json_keys + +from wrpylib.wrmwmarkup import create_sledrun_wiki, lonlat_to_json, lonlat_ele_to_json, parse_wrmap +from wrpylib.wrvalidators import rodelbahnbox_from_template, tristate_german_to_str, difficulty_german_to_str, \ + avalanches_german_to_str, public_transport_german_to_str, opt_lonlat_from_str, \ + opt_uint_from_str +from wrpylib.lib_sledrun_wikitext_to_json import optional_set, get_sledrun_description, wikilink_to_json, \ + external_link_to_json + +docuReplacements = {'¶ms;': pagegenerators.parameterHelp} + + +class UpdateSledrunJsonFromWikiText( + SingleSiteBot, + ConfigParserBot, + ExistingPageBot, + NoRedirectPageBot, + AutomaticTWSummaryBot, +): + def setup(self) -> None: + schema = Page(self.site, 'Winterrodeln:Datenschema/Rodelbahn/V1.json') + assert schema.content_model == 'json' + self.sledrun_schema = json.loads(schema.text) + + def treat_page(self) -> None: + """Load the given page, do some changes, and save it.""" + wikitext_content_model = 'wikitext' + if self.current_page.content_model != wikitext_content_model: + warning(f"The content model of {self.current_page.title()} is {self.current_page.content_model} " + f"instead of {wikitext_content_model}.") + return + + wikicode = mwparserfromhell.parse(self.current_page.text) + + sledrun_json_page = Page(self.site, self.current_page.title() + '/Rodelbahn.json') + if not sledrun_json_page.exists(): + return + sledrun_json = json.loads(sledrun_json_page.text) + sledrun_json_orig = json.loads(sledrun_json_page.text) + sledrun_json_orig_text = json.dumps(sledrun_json_orig, ensure_ascii=False, indent=4) + + for v in wikicode.get_sections(levels=[2], matches='Allgemeines'): + def _gastronomy(value: str): + gastronomy = [] + line_iter = io.StringIO(value) + line = next(line_iter, None) + while line is not None and line.rstrip() != "* '''Hütten''':": + line = next(line_iter, None) + if line is None: + return gastronomy + while line is not None: + line = next(line_iter, None) + if line is not None: + if line.startswith('** '): + g = {} + wiki = mwparserfromhell.parse(line) + wiki_link = next(wiki.ifilter_wikilinks(), None) + if isinstance(wiki_link, Wikilink): + g['wr_page'] = wikilink_to_json(wiki_link) + ext_link = next(wiki.ifilter_external_links(), None) + if isinstance(ext_link, ExternalLink): + g['weblink'] = external_link_to_json(ext_link) + remaining = str(Wikicode(n for n in wiki.nodes + if isinstance(n, (Text, Tag)) and str(n).strip() != '*')).strip() + match = re.match(r'(.*)\((.+)\)', remaining) + if match: + name, note = match.groups() + name = name.strip() + note = note.strip() + if len(name) > 0: + g['name'] = name + if len(note) > 0: + g['note'] = note + elif len(remaining) > 0 and remaining != '...': + g['name'] = remaining + if len(g) != 0: + gastronomy.append(g) + else: + break + return gastronomy + + w = _gastronomy(str(v)) + if len(w) > 0: + sledrun_json['gastronomy'] = w + + jsonschema.validate(instance=sledrun_json, schema=self.sledrun_schema) + sledrun_json_ordered = order_json_keys(sledrun_json, self.sledrun_schema) + assert sledrun_json_ordered == sledrun_json + if sledrun_json == sledrun_json_orig: + return + sledrun_json_text = json.dumps(sledrun_json_ordered, ensure_ascii=False, indent=4) + summary = 'Gastronomie Information im Rodelbahn JSON aktualisiert vom Wikitext.' + self.userPut(sledrun_json_page, sledrun_json_orig_text, sledrun_json_text, summary=summary, contentmodel='json') + + +def main(*args: str) -> None: + local_args = pywikibot.handle_args(args) + gen_factory = pagegenerators.GeneratorFactory() + gen_factory.handle_args(local_args) + gen = gen_factory.getCombinedGenerator(preload=True) + if gen: + bot = UpdateSledrunJsonFromWikiText(generator=gen) + bot.run() + else: + pywikibot.bot.suggest_help(missing_generator=True) + + +if __name__ == '__main__': + main() diff --git "a/bots/update_sledrun_json_from_wikitext_g\303\274tesiegel.py" "b/bots/update_sledrun_json_from_wikitext_g\303\274tesiegel.py" new file mode 100644 index 0000000..d9094b8 --- /dev/null +++ "b/bots/update_sledrun_json_from_wikitext_g\303\274tesiegel.py" @@ -0,0 +1,134 @@ +#!/usr/bin/python +""" +User script for pywikibot (https://gerrit.wikimedia.org/r/pywikibot/core.git), tested with version 6.6.1. +Put it in directory scripts/userscripts. + +Update a sledrun JSON page from a detail in a sledrun wikitext page. + +The following generators and filters are supported: + +¶ms; +""" +import io +import json +import re +from itertools import takewhile, dropwhile +from typing import Optional + +import jsonschema +import mwparserfromhell +from mwparserfromhell.nodes.extras import Parameter + +import pywikibot +from mwparserfromhell.nodes import Tag, Text, ExternalLink, Template, Wikilink, Heading +from mwparserfromhell.wikicode import Wikicode +from pywikibot import pagegenerators, Page +from pywikibot.bot import ( + AutomaticTWSummaryBot, + ConfigParserBot, + ExistingPageBot, + NoRedirectPageBot, + SingleSiteBot, +) +from pywikibot.logging import warning +from pywikibot.site._namespace import BuiltinNamespace +from wrpylib.json_tools import order_json_keys + +from wrpylib.wrmwmarkup import create_sledrun_wiki, lonlat_to_json, lonlat_ele_to_json, parse_wrmap +from wrpylib.wrvalidators import rodelbahnbox_from_template, tristate_german_to_str, difficulty_german_to_str, \ + avalanches_german_to_str, public_transport_german_to_str, opt_lonlat_from_str, \ + opt_uint_from_str +from wrpylib.lib_sledrun_wikitext_to_json import optional_set, get_sledrun_description + +docuReplacements = {'¶ms;': pagegenerators.parameterHelp} + + +class UpdateSledrunJsonFromWikiText( + SingleSiteBot, + ConfigParserBot, + ExistingPageBot, + NoRedirectPageBot, + AutomaticTWSummaryBot, +): + def setup(self) -> None: + schema = Page(self.site, 'Winterrodeln:Datenschema/Rodelbahn/V1.json') + assert schema.content_model == 'json' + self.sledrun_schema = json.loads(schema.text) + + def treat_page(self) -> None: + """Load the given page, do some changes, and save it.""" + wikitext_content_model = 'wikitext' + if self.current_page.content_model != wikitext_content_model: + warning(f"The content model of {self.current_page.title()} is {self.current_page.content_model} " + f"instead of {wikitext_content_model}.") + return + + wikicode = mwparserfromhell.parse(self.current_page.text) + + sledrun_json_page = Page(self.site, self.current_page.title() + '/Rodelbahn.json') + if not sledrun_json_page.exists(): + return + sledrun_json = json.loads(sledrun_json_page.text) + sledrun_json_orig_text = json.dumps(sledrun_json, ensure_ascii=False, indent=4) + + def _tiroler_naturrodelbahn_guetesiegel(wikicode) -> Optional[dict]: + for gst in wikicode.filter_templates(): + if gst.name.strip() != 'Tiroler Naturrodelbahn Gütesiegel': + continue + gsj = {} + keys = { + 'Anlagename': 'name', + 'Organisation': 'organization', + 'Erstverleihung': 'first_issued', + 'Verlängerung': 'valid_from', + 'Forum': 'forum_id', + 'Thread': 'thread_id', + } + numeric = ['first_issued', 'valid_from', 'forum_id', 'thread_id'] + for key, value in keys.items(): + if gst.has(key): + v = gst.get(key).value.strip() + if v != '': + if value in numeric: + v = int(v) + gsj[value] = v + if len(gsj) > 0: + return gsj + + gsj = _tiroler_naturrodelbahn_guetesiegel(wikicode) + + if gsj is None: + return + + sledrun_json['tiroler_naturrodelbahn_gütesiegel'] = gsj + + description_code = mwparserfromhell.parse(sledrun_json['description']) + for t in description_code.filter_templates(): + # remove template from text + description_code.remove(t) + d = str(description_code) + d = re.sub(r'\n{2,}', r'\n\n', d) + sledrun_json['description'] = d + + jsonschema.validate(instance=sledrun_json, schema=self.sledrun_schema) + sledrun_json_ordered = order_json_keys(sledrun_json, self.sledrun_schema) + assert sledrun_json_ordered == sledrun_json + sledrun_json_text = json.dumps(sledrun_json_ordered, ensure_ascii=False, indent=4) + summary = 'Gütesiegel im Rodelbahn JSON aktualisiert vom Wikitext.' + self.userPut(sledrun_json_page, sledrun_json_orig_text, sledrun_json_text, summary=summary, contentmodel='json') + + +def main(*args: str) -> None: + local_args = pywikibot.handle_args(args) + gen_factory = pagegenerators.GeneratorFactory() + gen_factory.handle_args(local_args) + gen = gen_factory.getCombinedGenerator(preload=True) + if gen: + bot = UpdateSledrunJsonFromWikiText(generator=gen) + bot.run() + else: + pywikibot.bot.suggest_help(missing_generator=True) + + +if __name__ == '__main__': + main() diff --git a/bots/update_sledrun_json_from_wikitext_sledrun_list.py b/bots/update_sledrun_json_from_wikitext_sledrun_list.py new file mode 100644 index 0000000..9c7c00b --- /dev/null +++ b/bots/update_sledrun_json_from_wikitext_sledrun_list.py @@ -0,0 +1,125 @@ +#!/usr/bin/python +""" +User script for pywikibot (https://gerrit.wikimedia.org/r/pywikibot/core.git), tested with version 6.6.1. +Put it in directory scripts/userscripts. + +Update a sledrun JSON page from a detail in a sledrun wikitext page. + +The following generators and filters are supported: + +¶ms; +""" +import io +import json +import re +from itertools import takewhile, dropwhile +from typing import Optional + +import jsonschema +import mwparserfromhell +from mwparserfromhell.nodes.extras import Parameter + +import pywikibot +from mwparserfromhell.nodes import Tag, Text, ExternalLink, Template, Wikilink, Heading +from mwparserfromhell.wikicode import Wikicode +from pywikibot import pagegenerators, Page +from pywikibot.bot import ( + AutomaticTWSummaryBot, + ConfigParserBot, + ExistingPageBot, + NoRedirectPageBot, + SingleSiteBot, +) +from pywikibot.logging import warning +from pywikibot.site._namespace import BuiltinNamespace +from wrpylib.json_tools import order_json_keys + +from wrpylib.wrmwmarkup import create_sledrun_wiki, lonlat_to_json, lonlat_ele_to_json, parse_wrmap +from wrpylib.wrvalidators import rodelbahnbox_from_template, tristate_german_to_str, difficulty_german_to_str, \ + avalanches_german_to_str, public_transport_german_to_str, opt_lonlat_from_str, \ + opt_uint_from_str +from wrpylib.lib_sledrun_wikitext_to_json import optional_set, get_sledrun_description, wikilink_to_json, \ + external_link_to_json + +docuReplacements = {'¶ms;': pagegenerators.parameterHelp} + + +class UpdateSledrunJsonFromWikiText( + SingleSiteBot, + ConfigParserBot, + ExistingPageBot, + NoRedirectPageBot, + AutomaticTWSummaryBot, +): + def setup(self) -> None: + schema = Page(self.site, 'Winterrodeln:Datenschema/Rodelbahn/V1.json') + assert schema.content_model == 'json' + self.sledrun_schema = json.loads(schema.text) + + def treat_page(self) -> None: + """Load the given page, do some changes, and save it.""" + wikitext_content_model = 'wikitext' + if self.current_page.content_model != wikitext_content_model: + warning(f"The content model of {self.current_page.title()} is {self.current_page.content_model} " + f"instead of {wikitext_content_model}.") + return + + wikicode = mwparserfromhell.parse(self.current_page.text) + + sledrun_json_page = Page(self.site, self.current_page.title() + '/Rodelbahn.json') + if not sledrun_json_page.exists(): + return + sledrun_json = json.loads(sledrun_json_page.text) + sledrun_json_orig = json.loads(sledrun_json_page.text) + sledrun_json_orig_text = json.dumps(sledrun_json_orig, ensure_ascii=False, indent=4) + + for v in wikicode.get_sections(levels=[2], matches='Allgemeines'): + def _sledrun_list(value: str): + sledrun_list = [] + line_iter = io.StringIO(value) + line = next(line_iter, None) + while line is not None and line.rstrip() != "* '''Andere Rodelbahnen''':": + line = next(line_iter, None) + if line is None: + return sledrun_list + while line is not None: + line = next(line_iter, None) + if line is not None: + if line.startswith('** '): + s = {} + wiki = mwparserfromhell.parse(line) + wiki_link = next(wiki.ifilter_wikilinks(), None) + if isinstance(wiki_link, Wikilink): + sledrun_list.append(wikilink_to_json(wiki_link)) + else: + break + return sledrun_list + + sledrun_list = _sledrun_list(str(v)) + if len(sledrun_list) > 0: + sledrun_json['sledrun_list'] = sledrun_list + + jsonschema.validate(instance=sledrun_json, schema=self.sledrun_schema) + sledrun_json_ordered = order_json_keys(sledrun_json, self.sledrun_schema) + assert sledrun_json_ordered == sledrun_json + if sledrun_json == sledrun_json_orig: + return + sledrun_json_text = json.dumps(sledrun_json_ordered, ensure_ascii=False, indent=4) + summary = 'Information zu "Anderen Rodelbahnen" im Rodelbahn JSON aktualisiert vom Wikitext.' + self.userPut(sledrun_json_page, sledrun_json_orig_text, sledrun_json_text, summary=summary, contentmodel='json') + + +def main(*args: str) -> None: + local_args = pywikibot.handle_args(args) + gen_factory = pagegenerators.GeneratorFactory() + gen_factory.handle_args(local_args) + gen = gen_factory.getCombinedGenerator(preload=True) + if gen: + bot = UpdateSledrunJsonFromWikiText(generator=gen) + bot.run() + else: + pywikibot.bot.suggest_help(missing_generator=True) + + +if __name__ == '__main__': + main() diff --git a/bots/update_sledrun_json_from_wikitext_webcam.py b/bots/update_sledrun_json_from_wikitext_webcam.py new file mode 100644 index 0000000..c848909 --- /dev/null +++ b/bots/update_sledrun_json_from_wikitext_webcam.py @@ -0,0 +1,107 @@ +#!/usr/bin/python +""" +User script for pywikibot (https://gerrit.wikimedia.org/r/pywikibot/core.git), tested with version 6.6.1. +Put it in directory scripts/userscripts. + +Update a sledrun JSON page from a detail in a sledrun wikitext page. + +The following generators and filters are supported: + +¶ms; +""" +import io +import json +import re +from itertools import takewhile, dropwhile +from typing import Optional + +import jsonschema +import mwparserfromhell +from mwparserfromhell.nodes.extras import Parameter + +import pywikibot +from mwparserfromhell.nodes import Tag, Text, ExternalLink, Template, Wikilink, Heading +from mwparserfromhell.wikicode import Wikicode +from pywikibot import pagegenerators, Page +from pywikibot.bot import ( + AutomaticTWSummaryBot, + ConfigParserBot, + ExistingPageBot, + NoRedirectPageBot, + SingleSiteBot, +) +from pywikibot.logging import warning +from pywikibot.site._namespace import BuiltinNamespace +from wrpylib.json_tools import order_json_keys + +from wrpylib.wrmwmarkup import create_sledrun_wiki, lonlat_to_json, lonlat_ele_to_json, parse_wrmap +from wrpylib.wrvalidators import rodelbahnbox_from_template, tristate_german_to_str, difficulty_german_to_str, \ + avalanches_german_to_str, public_transport_german_to_str, opt_lonlat_from_str, \ + opt_uint_from_str +from wrpylib.lib_sledrun_wikitext_to_json import optional_set, get_sledrun_description + +docuReplacements = {'¶ms;': pagegenerators.parameterHelp} + + +class UpdateSledrunJsonFromWikiText( + SingleSiteBot, + ConfigParserBot, + ExistingPageBot, + NoRedirectPageBot, + AutomaticTWSummaryBot, +): + def setup(self) -> None: + schema = Page(self.site, 'Winterrodeln:Datenschema/Rodelbahn/V1.json') + assert schema.content_model == 'json' + self.sledrun_schema = json.loads(schema.text) + + def treat_page(self) -> None: + """Load the given page, do some changes, and save it.""" + wikitext_content_model = 'wikitext' + if self.current_page.content_model != wikitext_content_model: + warning(f"The content model of {self.current_page.title()} is {self.current_page.content_model} " + f"instead of {wikitext_content_model}.") + return + + wikicode = mwparserfromhell.parse(self.current_page.text) + + sledrun_json_page = Page(self.site, self.current_page.title() + '/Rodelbahn.json') + if not sledrun_json_page.exists(): + return + sledrun_json = json.loads(sledrun_json_page.text) + sledrun_json_orig = json.loads(sledrun_json_page.text) + sledrun_json_orig_text = json.dumps(sledrun_json_orig, ensure_ascii=False, indent=4) + + def _button_bar(): + bb_iter = wikicode.ifilter_templates(recursive=False, matches=lambda t: t.name.strip() == 'Buttonleiste') + bb = next(bb_iter, None) + if bb is not None: + webcam = bb.get('webcam', None) + if isinstance(webcam, Parameter) and webcam.value.strip() != "" and 'webcams' not in sledrun_json: + sledrun_json['webcams'] = [{'url': str(webcam.value.strip())}] + _button_bar() + + jsonschema.validate(instance=sledrun_json, schema=self.sledrun_schema) + sledrun_json_ordered = order_json_keys(sledrun_json, self.sledrun_schema) + assert sledrun_json_ordered == sledrun_json + if sledrun_json == sledrun_json_orig: + return + sledrun_json_text = json.dumps(sledrun_json_ordered, ensure_ascii=False, indent=4) + summary = 'Webcam Information im Rodelbahn JSON aktualisiert vom Wikitext.' + self.userPut(sledrun_json_page, sledrun_json_orig_text, sledrun_json_text, summary=summary, contentmodel='json') + + +def main(*args: str) -> None: + local_args = pywikibot.handle_args(args) + gen_factory = pagegenerators.GeneratorFactory() + gen_factory.handle_args(local_args) + gen = gen_factory.getCombinedGenerator(preload=True) + if gen: + bot = UpdateSledrunJsonFromWikiText(generator=gen) + bot.run() + else: + pywikibot.bot.suggest_help(missing_generator=True) + + +if __name__ == '__main__': + main()