mirror of
https://github.com/JimmXinu/FanFicFare.git
synced 2025-12-06 08:52:55 +01:00
adapter_storiesonlinenet.py - tolerate contest stories
Contest stories have author="(Hidden)" which breaks the code to get story info from author's page. Added checks for this and also checks to verify soup actually found results before trying to blindly use the results.
This commit is contained in:
parent
7f0d7f70be
commit
e2961eaadf
1 changed files with 23 additions and 7 deletions
|
|
@ -209,7 +209,11 @@ class StoriesOnlineNetAdapter(BaseSiteAdapter):
|
||||||
## regardless.
|
## regardless.
|
||||||
soup = self.make_soup(data)
|
soup = self.make_soup(data)
|
||||||
a = soup.find('a',rel="bookmark")
|
a = soup.find('a',rel="bookmark")
|
||||||
url = 'https://'+self.host+a['href']
|
if (a):
|
||||||
|
url = 'https://'+self.host+a['href']
|
||||||
|
else:
|
||||||
|
# Contest entries do not have bookmark HREF
|
||||||
|
logger.info("No Bookmark HREF, using URL="+url)
|
||||||
|
|
||||||
## Premium has "?ind=1" to force index.
|
## Premium has "?ind=1" to force index.
|
||||||
## May not be needed w/o premium
|
## May not be needed w/o premium
|
||||||
|
|
@ -228,6 +232,12 @@ class StoriesOnlineNetAdapter(BaseSiteAdapter):
|
||||||
raise exceptions.FailedToDownload(self.getSiteDomain() +" says: Error! The story you're trying to access is being filtered by your choice of contents filtering.")
|
raise exceptions.FailedToDownload(self.getSiteDomain() +" says: Error! The story you're trying to access is being filtered by your choice of contents filtering.")
|
||||||
elif "Error! Daily Limit Reached" in data or "Sorry! You have reached your daily limit of" in data:
|
elif "Error! Daily Limit Reached" in data or "Sorry! You have reached your daily limit of" in data:
|
||||||
raise exceptions.FailedToDownload(self.getSiteDomain() +" says: Error! Daily Limit Reached")
|
raise exceptions.FailedToDownload(self.getSiteDomain() +" says: Error! Daily Limit Reached")
|
||||||
|
elif "by (Hidden)" in data:
|
||||||
|
#Contest entries have author set to "(Hidden)" which breaks author lookups below
|
||||||
|
logger.info("Contest entry, setting authorId=(Hidden)")
|
||||||
|
self.story.addToList('authorId',"(Hidden)")
|
||||||
|
logger.info("Contest entry, setting author=(Hidden)")
|
||||||
|
self.story.addToList('author',"(Hidden)")
|
||||||
|
|
||||||
soup = self.make_soup(data)
|
soup = self.make_soup(data)
|
||||||
# logger.debug(data)
|
# logger.debug(data)
|
||||||
|
|
@ -238,11 +248,14 @@ class StoriesOnlineNetAdapter(BaseSiteAdapter):
|
||||||
|
|
||||||
authfrom = soup.find('footer')
|
authfrom = soup.find('footer')
|
||||||
alist = authfrom.find_all('a', {'rel' : 'author'})
|
alist = authfrom.find_all('a', {'rel' : 'author'})
|
||||||
for a in alist:
|
if (alist):
|
||||||
self.story.addToList('authorId',a['href'].split('/')[2])
|
for a in alist:
|
||||||
self.story.addToList('authorUrl','https://'+self.host+a['href'])
|
self.story.addToList('authorId',a['href'].split('/')[2])
|
||||||
## both 's Page and ’s Page
|
self.story.addToList('authorUrl','https://'+self.host+a['href'])
|
||||||
self.story.addToList('author',re.sub(r".s Page$","",stripHTML(a)))
|
## both 's Page and ’s Page
|
||||||
|
self.story.addToList('author',re.sub(r".s Page$","",stripHTML(a)))
|
||||||
|
else:
|
||||||
|
logger.info("AuthorList empty. Contest entry?")
|
||||||
|
|
||||||
# Find the chapters:
|
# Find the chapters:
|
||||||
# If multiple chapters, they are in "index-list" div.
|
# If multiple chapters, they are in "index-list" div.
|
||||||
|
|
@ -261,7 +274,10 @@ class StoriesOnlineNetAdapter(BaseSiteAdapter):
|
||||||
# The rest of the metadata is within the article tag.
|
# The rest of the metadata is within the article tag.
|
||||||
soup = soup.find('article')
|
soup = soup.find('article')
|
||||||
|
|
||||||
self.getStoryMetadataFromAuthorPage()
|
if (self.story.getList('authorUrl')):
|
||||||
|
self.getStoryMetadataFromAuthorPage()
|
||||||
|
else:
|
||||||
|
logger.info("No authorurl found, could be contest story...")
|
||||||
|
|
||||||
# Some books have a cover in the index page.
|
# Some books have a cover in the index page.
|
||||||
# Samples are:
|
# Samples are:
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue