Merge de6913a9af into cfd073fb5c

simplify algorithm
return to loop-based algorithm
2025-12-06 00:15:22 +01:00 · 2025-03-08 15:48:43 +00:00 · 2025-03-08 09:48:32 -06:00 · 2025-03-08 09:40:42 -06:00 · 2025-03-05 21:03:35 -06:00 · 2025-03-05 20:56:47 -06:00
1 changed files with 20 additions and 11 deletions
--- a/sites/arbitrary.py
+++ b/sites/arbitrary.py
@ -76,23 +76,32 @@ class Arbitrary(Site):
        else:
            # set of already processed urls. Stored to detect loops.
            found_content_urls = set()
-            content_url = definition.url
-            while content_url and content_url not in found_content_urls:
+            content_urls = [definition.url]
+
+            def process_content_url(content_url):
+                if content_url in found_content_urls:
+                    return None
                found_content_urls.add(content_url)
                for chapter in self._chapter(content_url, definition):
                    story.add(chapter)
-                if definition.next_selector:
+                return content_url
+
+            while content_urls:
+                for temp_url in content_urls:
+                    # stop inner loop once a new link is found
+                    if content_url := process_content_url(temp_url):
+                        break
+                # reset url list
+                content_urls = []
+                if content_url and definition.next_selector:
                    soup, base = self._soup(content_url)
                    next_link = soup.select(definition.next_selector)
                    if next_link:
-                        next_link_url = str(next_link[0].get('href'))
-                        if base:
-                            next_link_url = self._join_url(base, next_link_url)
-                        content_url = self._join_url(content_url, next_link_url)
-                    else:
-                        content_url = False
-                else:
-                    content_url = False
+                        for next_link_item in next_link:
+                            next_link_url = str(next_link_item.get('href'))
+                            if base:
+                                next_link_url = self._join_url(base, next_link_url)
+                            content_urls.append(self._join_url(content_url, next_link_url))

        if not story:
            raise SiteException("No story content found; check the content selectors")
Author	SHA1	Message	Date
Kevin Pedro	76c8fe97de	Merge `de6913a9af` into `cfd073fb5c`	2025-03-08 15:48:43 +00:00
Kevin Pedro	de6913a9af	simplify algorithm	2025-03-08 09:48:32 -06:00
Kevin Pedro	d4e1214be3	return to loop-based algorithm	2025-03-08 09:40:42 -06:00
Kevin Pedro	b2f15eb76c	satisfy linter	2025-03-05 21:03:35 -06:00
Kevin Pedro	280b242a27	stop loop once a new link is found	2025-03-05 20:56:47 -06:00
Kevin Pedro	0066a148bb	process all next_link items	2025-03-05 20:56:47 -06:00