Continuation is supported - all sledrun JSON is now stored in the table.

author Philipp Spitzer <philipp@spitzer.priv.at>

Wed, 8 Jun 2022 21:13:01 +0000 (23:13 +0200)

committer Philipp Spitzer <philipp@spitzer.priv.at>

Wed, 8 Jun 2022 21:13:01 +0000 (23:13 +0200)
author Philipp Spitzer <philipp@spitzer.priv.at>
Wed, 8 Jun 2022 21:13:01 +0000 (23:13 +0200)
committer Philipp Spitzer <philipp@spitzer.priv.at>
Wed, 8 Jun 2022 21:13:01 +0000 (23:13 +0200)
diff --git a/wrpylib/wrmwcache.py b/wrpylib/wrmwcache.py

index e64161d4973b924b673af26c58079e8dfc644087..7e1a31e738f8bd3bb97816b468dc295562bad065 100644 (file)
--- a/wrpylib/wrmwcache.py
+++ b/wrpylib/wrmwcache.py
@@ -318,43 +318,52 @@ def update_wrsledrunjsoncache(api_url: urllib.parse.ParseResult, connection: sql
      :param api_url: URL to the API, e.g. https://www.winterrodeln.org/mediawiki/api.php
      :param connection: open sqlalchemy connection
      """
      :param api_url: URL to the API, e.g. https://www.winterrodeln.org/mediawiki/api.php
      :param connection: open sqlalchemy connection
      """
-    request = api_url._replace(query='action=query&list=categorymembers&cmtitle=Kategorie:Rodelbahn'
-                                     '&cmlimit=3&format=json')  # cmlimit=max (5000)
-    response = urllib.request.urlopen(request.geturl())
-    sledrun_list_response = json.load(response)
-    rows_to_insert = sledrun_list_response['query']['categorymembers']
-    for row in rows_to_insert:
-        row['sledrun_title'] = f'{row["title"]}/Rodelbahn.json'
-        row['map_title'] = f'{row["title"]}/Landkarte.json'
-
-    qs_titles = urllib.parse.urlencode({"titles": '|'.join(r['sledrun_title'] for r in rows_to_insert)})
-    request = api_url._replace(query='action=query&prop=revisions&rvprop=content&format=json&rvslots=main&' + qs_titles)
-    response = urllib.request.urlopen(request.geturl())
-    sledrun_json_list_response = json.load(response)
-    sledrun_json_dict = {s['title']: s for s in sledrun_json_list_response['query']['pages'].values()}
-
-    qs_titles = urllib.parse.urlencode({"titles": '|'.join(r['map_title'] for r in rows_to_insert)})
-    request = api_url._replace(query='action=query&prop=revisions&rvprop=content&format=json&rvslots=main&' + qs_titles)
-    response = urllib.request.urlopen(request.geturl())
-    map_json_list_response = json.load(response)
-    map_json_dict = {m['title']: m for m in map_json_list_response['query']['pages'].values()}
-
-    for row in rows_to_insert:
-        sledrun_json = sledrun_json_dict.get(row['sledrun_title'])
-        if sledrun_json is not None:
-            row['sledrun_pageid'] = sledrun_json['pageid']
-            row['sledrun'] = sledrun_json['revisions'][0]['slots']['main']['*']
-        map_json = map_json_dict.get(row['map_title'])
-        if map_json is not None:
-            row['map_pageid'] = map_json['pageid']
-            row['map'] = map_json['revisions'][0]['slots']['main']['*']
-
      with connection.begin():
          # connection.execute('truncate table wrsledrunjsoncache')  # needs additional permissions
          connection.execute('delete from wrsledrunjsoncache')
      with connection.begin():
          # connection.execute('truncate table wrsledrunjsoncache')  # needs additional permissions
          connection.execute('delete from wrsledrunjsoncache')
-        sql = 'insert into wrsledrunjsoncache ' \
-              '(sledrun_page_id, sledrun_json_page_id, sledrun_json, map_json_page_id, map_json) ' \
-              'values (%s, %s, %s, %s, %s)'
-        for row in rows_to_insert:
-            connection.execute(sql, (row['pageid'], row.get('sledrun_pageid'), row.get('sledrun'),
-                                     row.get('map_pageid'), row.get('map')))
+
+        cmcontinue = ''
+        while True:
+            request = api_url._replace(query='action=query&list=categorymembers&cmtitle=Kategorie:Rodelbahn'
+                                             f'&cmlimit=50&format=json&cmcontinue={cmcontinue}')  # cmlimit=max (5000)
+            response = urllib.request.urlopen(request.geturl())
+            sledrun_list_response = json.load(response)
+            rows_to_insert = sledrun_list_response['query']['categorymembers']
+            for row in rows_to_insert:
+                row['sledrun_title'] = f'{row["title"]}/Rodelbahn.json'
+                row['map_title'] = f'{row["title"]}/Landkarte.json'
+
+            qs_titles = urllib.parse.urlencode({"titles": '|'.join(r['sledrun_title'] for r in rows_to_insert)})
+            request = api_url._replace(query='action=query&prop=revisions&rvprop=content&format=json&rvslots=main&'
+                                             + qs_titles)
+            response = urllib.request.urlopen(request.geturl())
+            sledrun_json_list_response = json.load(response)
+            sledrun_json_dict = {s['title']: s for s in sledrun_json_list_response['query']['pages'].values()}
+
+            qs_titles = urllib.parse.urlencode({"titles": '|'.join(r['map_title'] for r in rows_to_insert)})
+            request = api_url._replace(query='action=query&prop=revisions&rvprop=content&format=json&rvslots=main&'
+                                             + qs_titles)
+            response = urllib.request.urlopen(request.geturl())
+            map_json_list_response = json.load(response)
+            map_json_dict = {m['title']: m for m in map_json_list_response['query']['pages'].values()}
+
+            for row in rows_to_insert:
+                sledrun_json = sledrun_json_dict.get(row['sledrun_title'])
+                if 'missing' not in sledrun_json:
+                    row['sledrun_pageid'] = sledrun_json['pageid']
+                    row['sledrun'] = sledrun_json['revisions'][0]['slots']['main']['*']
+                map_json = map_json_dict.get(row['map_title'])
+                if 'missing' not in map_json:
+                    row['map_pageid'] = map_json['pageid']
+                    row['map'] = map_json['revisions'][0]['slots']['main']['*']
+
+            sql = 'insert into wrsledrunjsoncache ' \
+                  '(sledrun_page_id, sledrun_json_page_id, sledrun_json, map_json_page_id, map_json) ' \
+                  'values (%s, %s, %s, %s, %s)'
+            for row in rows_to_insert:
+                connection.execute(sql, (row['pageid'], row.get('sledrun_pageid'), row.get('sledrun'),
+                                         row.get('map_pageid'), row.get('map')))
+
+            if 'continue' not in sledrun_list_response:
+                break
+            cmcontinue = sledrun_list_response['continue']['cmcontinue']
author	Philipp Spitzer <philipp@spitzer.priv.at>
	Wed, 8 Jun 2022 21:13:01 +0000 (23:13 +0200)
committer	Philipp Spitzer <philipp@spitzer.priv.at>
	Wed, 8 Jun 2022 21:13:01 +0000 (23:13 +0200)