simplify algorithm

return to loop-based algorithm
2025-12-06 08:22:56 +01:00 · 2025-03-08 09:48:32 -06:00 · 2025-03-08 09:40:42 -06:00
1 changed files with 13 additions and 11 deletions
--- a/sites/arbitrary.py
+++ b/sites/arbitrary.py
@ -76,15 +76,24 @@ class Arbitrary(Site):
        else:
            # set of already processed urls. Stored to detect loops.
            found_content_urls = set()
-            content_url = definition.url
+            content_urls = [definition.url]
            def process_content_url(content_url):
                if content_url in found_content_urls:
-                    return False
+                    return None
                found_content_urls.add(content_url)
                for chapter in self._chapter(content_url, definition):
                    story.add(chapter)
-                if definition.next_selector:
+                return content_url
            while content_urls:
                for temp_url in content_urls:
                    # stop inner loop once a new link is found
                    if content_url := process_content_url(temp_url):
                        break
                # reset url list
                content_urls = []
                if content_url and definition.next_selector:
                    soup, base = self._soup(content_url)
                    next_link = soup.select(definition.next_selector)
                    if next_link:
@ -92,14 +101,7 @@ class Arbitrary(Site):
                            next_link_url = str(next_link_item.get('href'))
                            if base:
                                next_link_url = self._join_url(base, next_link_url)
-                            content_url = self._join_url(content_url, next_link_url)
+                            content_urls.append(self._join_url(content_url, next_link_url))
                            # stop loop once a new link is found
                            status = process_content_url(content_url)
                            if status:
                                break
                return True
            process_content_url(content_url)
        if not story:
            raise SiteException("No story content found; check the content selectors")
Author	SHA1	Message	Date
Kevin Pedro	de6913a9af	simplify algorithm	2025-03-08 09:48:32 -06:00
Kevin Pedro	d4e1214be3	return to loop-based algorithm	2025-03-08 09:40:42 -06:00