1
0
Fork 0
mirror of https://github.com/kemayo/leech synced 2025-12-06 00:15:22 +01:00

Compare commits

...

4 commits

Author SHA1 Message Date
Kevin Pedro
8df8a593ff
Merge b2f15eb76c into cfd073fb5c 2025-03-07 08:54:12 -06:00
Kevin Pedro
b2f15eb76c satisfy linter 2025-03-05 21:03:35 -06:00
Kevin Pedro
280b242a27 stop loop once a new link is found 2025-03-05 20:56:47 -06:00
Kevin Pedro
0066a148bb process all next_link items 2025-03-05 20:56:47 -06:00

View file

@ -77,7 +77,10 @@ class Arbitrary(Site):
# set of already processed urls. Stored to detect loops.
found_content_urls = set()
content_url = definition.url
while content_url and content_url not in found_content_urls:
def process_content_url(content_url):
if content_url in found_content_urls:
return False
found_content_urls.add(content_url)
for chapter in self._chapter(content_url, definition):
story.add(chapter)
@ -85,14 +88,18 @@ class Arbitrary(Site):
soup, base = self._soup(content_url)
next_link = soup.select(definition.next_selector)
if next_link:
next_link_url = str(next_link[0].get('href'))
if base:
next_link_url = self._join_url(base, next_link_url)
content_url = self._join_url(content_url, next_link_url)
else:
content_url = False
else:
content_url = False
for next_link_item in next_link:
next_link_url = str(next_link_item.get('href'))
if base:
next_link_url = self._join_url(base, next_link_url)
content_url = self._join_url(content_url, next_link_url)
# stop loop once a new link is found
status = process_content_url(content_url)
if status:
break
return True
process_content_url(content_url)
if not story:
raise SiteException("No story content found; check the content selectors")