]> ToastFreeware Gitweb - philipp/winterrodeln/wrpylib.git/commitdiff
Fine-tune parsing of gastronomy.
authorPhilipp Spitzer <philipp@spitzer.priv.at>
Tue, 14 Jun 2022 21:25:26 +0000 (23:25 +0200)
committerPhilipp Spitzer <philipp@spitzer.priv.at>
Tue, 14 Jun 2022 21:25:26 +0000 (23:25 +0200)
bots/update_sledrun_json_from_wikitext_gastronomy.py

index 6f3619d63e66b136f7cd9197a79278b1ba1916e0..3d23b8a28771b30c5da05497a81ed52ef5f4813c 100644 (file)
@@ -78,39 +78,44 @@ class UpdateSledrunJsonFromWikiText(
                 gastronomy = []
                 line_iter = io.StringIO(value)
                 line = next(line_iter, None)
                 gastronomy = []
                 line_iter = io.StringIO(value)
                 line = next(line_iter, None)
-                while line is not None and line.rstrip() != "* '''Hütten''':":
+                while line is not None and not line.startswith("* '''Hütten''':"):
                     line = next(line_iter, None)
                 if line is None:
                     return gastronomy
                     line = next(line_iter, None)
                 if line is None:
                     return gastronomy
-                while line is not None:
-                    line = next(line_iter, None)
-                    if line is not None:
-                        if line.startswith('** '):
-                            g = {}
-                            wiki = mwparserfromhell.parse(line)
-                            wiki_link = next(wiki.ifilter_wikilinks(), None)
-                            if isinstance(wiki_link, Wikilink):
-                                g['wr_page'] = wikilink_to_json(wiki_link)
-                            ext_link = next(wiki.ifilter_external_links(), None)
-                            if isinstance(ext_link, ExternalLink):
-                                g['weblink'] = external_link_to_json(ext_link)
-                            remaining = str(Wikicode(n for n in wiki.nodes
-                                                     if isinstance(n, (Text, Tag)) and str(n).strip() != '*')).strip()
-                            match = re.match(r'(.*)\((.+)\)', remaining)
-                            if match:
-                                name, note = match.groups()
-                                name = name.strip()
-                                note = note.strip()
-                                if len(name) > 0:
-                                    g['name'] = name
-                                if len(note) > 0:
-                                    g['note'] = note
-                            elif len(remaining) > 0 and remaining != '...':
-                                g['name'] = remaining
-                            if len(g) != 0:
-                                gastronomy.append(g)
-                        else:
-                            break
+                line = re.match(r"^\* '''Hütten''':\s*(.*)\s*", line).group(1)
+                if len(line) == 0:
+                    line = next(line_iter, '')
+                    if not line.startswith('** '):
+                        return gastronomy
+                    line = re.match(r"^\*\*\s*(.*)\s*", line).group(1)
+                while True:
+                    g = {}
+                    wiki = mwparserfromhell.parse(line)
+                    wiki_link = next(wiki.ifilter_wikilinks(), None)
+                    if isinstance(wiki_link, Wikilink):
+                        g['wr_page'] = wikilink_to_json(wiki_link)
+                    ext_link = next(wiki.ifilter_external_links(), None)
+                    if isinstance(ext_link, ExternalLink):
+                        g['weblink'] = external_link_to_json(ext_link)
+                    remaining = str(Wikicode(n for n in wiki.nodes
+                                             if isinstance(n, (Text, Tag)) and str(n).strip() != '*')).strip()
+                    match = re.match(r'(.*)\((.+)\)', remaining)
+                    if match:
+                        name, note = match.groups()
+                        name = name.strip()
+                        note = note.strip()
+                        if len(name) > 0:
+                            g['name'] = name
+                        if len(note) > 0:
+                            g['note'] = note
+                    elif len(remaining) > 0 and remaining != '...':
+                        g['name'] = remaining
+                    if len(g) != 0:
+                        gastronomy.append(g)
+                    line = next(line_iter, '')
+                    if not line.startswith('** '):
+                        break
+                    line = re.match(r"^\*\*\s*(.*)\s*", line).group(1)
                 return gastronomy
 
             w = _gastronomy(str(v))
                 return gastronomy
 
             w = _gastronomy(str(v))