5 from copy import deepcopy
6 from typing import List, Iterable, Tuple, Dict
9 from termcolor import cprint
11 from wrpylib.cli_tools import unified_diff, input_yes_no_quit, Choice
12 from wrpylib.json_tools import order_json_keys, format_json
13 from wrpylib.mwapi import WikiSite, page_json
17 'https://www.lizumerhof.at/',
18 'https://www.madrisa.ch/madrisa-mia',
22 def remove_dead_links_in_dict_with_weblink(container: Dict, weblink_key: str, all_dead_links: set[str]):
23 weblink = container.get(weblink_key)
24 if weblink is not None and weblink['url'] in all_dead_links:
25 del container[weblink_key]
27 def remove_dead_links_in_weblink_list(container: Dict, weblink_key: str, all_dead_links: set[str]):
28 weblink_list = container.get(weblink_key)
29 if weblink_list is None:
31 weblink_list[:] = filter(lambda wl: wl.get('url') not in all_dead_links, weblink_list)
32 if len(weblink_list) == 0:
33 del container[weblink_key]
36 def remove_dead_links_on_sledrun_title(site: WikiSite, title: str, all_dead_links: set[str]):
37 sledrun_json_page = site.query_page(f'{title}/Rodelbahn.json')
38 sledrun = page_json(sledrun_json_page)
39 sledrun_orig = deepcopy(sledrun)
41 if sledrun.get('official_url') in all_dead_links:
42 del sledrun['official_url']
44 for rental in sledrun.get('sled_rental', []):
45 remove_dead_links_in_dict_with_weblink(rental, 'weblink', all_dead_links)
47 for pt_line in sledrun.get('public_transport_lines', []):
48 remove_dead_links_in_weblink_list(pt_line, 'timetable_links', all_dead_links)
50 if 'gastronomy' in sledrun:
51 for gastronomy in sledrun['gastronomy']:
52 gastronomy_orig = gastronomy.copy()
53 remove_dead_links_in_dict_with_weblink(gastronomy, 'weblink', all_dead_links)
54 if 'weblink' in gastronomy_orig and 'weblink' not in gastronomy:
55 if gastronomy.get('name') is None and gastronomy_orig.get('weblink', {}).get('text') is not None:
56 gastronomy['name'] = gastronomy_orig['weblink']['text']
57 sledrun['gastronomy'][:] = [g for g in sledrun['gastronomy'] if g]
58 if len(sledrun['gastronomy']) == 0:
59 del sledrun['gastronomy']
61 for key in ['info_web', 'videos', 'webcams', 'see_also', 'public_transport_links']:
62 remove_dead_links_in_weblink_list(sledrun, key, all_dead_links)
64 if sledrun == sledrun_orig:
67 jsonschema.validate(instance=sledrun, schema=site.sledrun_schema())
68 sledrun_ordered = order_json_keys(sledrun, site.sledrun_schema())
69 assert sledrun_ordered == sledrun
71 sledrun_str = format_json(sledrun_ordered)
72 sledrun_orig_str = format_json(sledrun_orig)
74 cprint(title, 'green')
75 unified_diff(sledrun_orig_str, sledrun_str)
76 choice = input_yes_no_quit('Do you accept the changes [yes, no, quit]? ', None)
77 if choice == Choice.no:
79 elif choice == Choice.quit:
84 pageid=sledrun_json_page['pageid'],
86 summary=f'Ungültige Links entfernt.',
89 baserevid=sledrun_json_page['revisions'][0]['revid'],
95 def get_all_sledrun_titles(site: WikiSite) -> Iterable[str]:
96 for result in site.query(list='categorymembers', cmtitle='Kategorie:Rodelbahn', cmlimit='max'):
97 for page in result['categorymembers']:
101 def get_all_titles_with_dead_links(dead_links: Dict[str, List[Tuple[str, int, str]]]):
102 for title_list in dead_links.values():
103 for title, time, reason in title_list:
107 def print_forbidden(dead_links: Dict[str, List[Tuple[str, int, str]]]):
108 for dead_link, page_list in dead_links.items():
109 if len(page_list) >= 1 and page_list[-1][-1] == 'Forbidden' and dead_link not in VALID_FORBIDDEN:
110 print(f'{dead_link} ({", ".join(p[0] for p in page_list)})')
113 def dead_link_filter(dead_link_info: Tuple[str, List[Tuple[str, int, str]]]) -> bool:
114 dead_link, page_list = dead_link_info
115 if len(page_list) < 2:
117 if dead_link in VALID_FORBIDDEN:
123 def update_dead_links(ini_files: List[str], dead_link_file: str, only_print_forbidden: bool):
124 with open(dead_link_file, 'rb') as fp:
125 dead_links: Dict[str, List[Tuple[str, int, str]]] = pickle.load(fp)
127 if only_print_forbidden:
128 print_forbidden(dead_links)
131 dead_links = dict(filter(dead_link_filter, dead_links.items()))
132 all_titles_with_dead_links = set(get_all_titles_with_dead_links(dead_links))
133 all_dead_links = set(dead_links.keys())
135 site = WikiSite(ini_files)
136 all_sledrun_titles = set(get_all_sledrun_titles(site))
138 sledrun_titles_with_dead_links = all_sledrun_titles.intersection(all_titles_with_dead_links)
140 for sledrun_title in sledrun_titles_with_dead_links:
141 remove_dead_links_on_sledrun_title(site, sledrun_title, all_dead_links)
145 parser = argparse.ArgumentParser(description='Remove dead links.')
146 parser.add_argument('--print-forbidden', action='store_true', help='Print forbidden pages and exit')
147 parser.add_argument('dead_links_file', help='deadlinks-winterrodelncolleen-de-formal.dat')
148 parser.add_argument('ini_file', nargs='+', help='see: https://www.winterrodeln.org/trac/wiki/ConfigIni')
149 args = parser.parse_args()
150 update_dead_links(args.ini_file, args.dead_links_file, args.print_forbidden)
153 if __name__ == '__main__':