mirror of
https://github.com/kemayo/leech
synced 2025-12-06 00:15:22 +01:00
Compare commits
2 commits
b2f15eb76c
...
de6913a9af
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
de6913a9af | ||
|
|
d4e1214be3 |
1 changed files with 13 additions and 11 deletions
|
|
@ -76,15 +76,24 @@ class Arbitrary(Site):
|
|||
else:
|
||||
# set of already processed urls. Stored to detect loops.
|
||||
found_content_urls = set()
|
||||
content_url = definition.url
|
||||
content_urls = [definition.url]
|
||||
|
||||
def process_content_url(content_url):
|
||||
if content_url in found_content_urls:
|
||||
return False
|
||||
return None
|
||||
found_content_urls.add(content_url)
|
||||
for chapter in self._chapter(content_url, definition):
|
||||
story.add(chapter)
|
||||
if definition.next_selector:
|
||||
return content_url
|
||||
|
||||
while content_urls:
|
||||
for temp_url in content_urls:
|
||||
# stop inner loop once a new link is found
|
||||
if content_url := process_content_url(temp_url):
|
||||
break
|
||||
# reset url list
|
||||
content_urls = []
|
||||
if content_url and definition.next_selector:
|
||||
soup, base = self._soup(content_url)
|
||||
next_link = soup.select(definition.next_selector)
|
||||
if next_link:
|
||||
|
|
@ -92,14 +101,7 @@ class Arbitrary(Site):
|
|||
next_link_url = str(next_link_item.get('href'))
|
||||
if base:
|
||||
next_link_url = self._join_url(base, next_link_url)
|
||||
content_url = self._join_url(content_url, next_link_url)
|
||||
# stop loop once a new link is found
|
||||
status = process_content_url(content_url)
|
||||
if status:
|
||||
break
|
||||
return True
|
||||
|
||||
process_content_url(content_url)
|
||||
content_urls.append(self._join_url(content_url, next_link_url))
|
||||
|
||||
if not story:
|
||||
raise SiteException("No story content found; check the content selectors")
|
||||
|
|
|
|||
Loading…
Reference in a new issue