adapter_storiesonlinenet.py - tolerate contest stories

Contest stories have author="(Hidden)" which breaks the code to get story info from author's page.
Added checks for this and also checks to verify soup actually found results before trying to blindly use the results.
This commit is contained in:
Brian 2025-10-31 15:01:45 -07:00 committed by GitHub
parent 7f0d7f70be
commit e2961eaadf
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -209,7 +209,11 @@ class StoriesOnlineNetAdapter(BaseSiteAdapter):
## regardless. ## regardless.
soup = self.make_soup(data) soup = self.make_soup(data)
a = soup.find('a',rel="bookmark") a = soup.find('a',rel="bookmark")
if (a):
url = 'https://'+self.host+a['href'] url = 'https://'+self.host+a['href']
else:
# Contest entries do not have bookmark HREF
logger.info("No Bookmark HREF, using URL="+url)
## Premium has "?ind=1" to force index. ## Premium has "?ind=1" to force index.
## May not be needed w/o premium ## May not be needed w/o premium
@ -228,6 +232,12 @@ class StoriesOnlineNetAdapter(BaseSiteAdapter):
raise exceptions.FailedToDownload(self.getSiteDomain() +" says: Error! The story you're trying to access is being filtered by your choice of contents filtering.") raise exceptions.FailedToDownload(self.getSiteDomain() +" says: Error! The story you're trying to access is being filtered by your choice of contents filtering.")
elif "Error! Daily Limit Reached" in data or "Sorry! You have reached your daily limit of" in data: elif "Error! Daily Limit Reached" in data or "Sorry! You have reached your daily limit of" in data:
raise exceptions.FailedToDownload(self.getSiteDomain() +" says: Error! Daily Limit Reached") raise exceptions.FailedToDownload(self.getSiteDomain() +" says: Error! Daily Limit Reached")
elif "by (Hidden)" in data:
#Contest entries have author set to "(Hidden)" which breaks author lookups below
logger.info("Contest entry, setting authorId=(Hidden)")
self.story.addToList('authorId',"(Hidden)")
logger.info("Contest entry, setting author=(Hidden)")
self.story.addToList('author',"(Hidden)")
soup = self.make_soup(data) soup = self.make_soup(data)
# logger.debug(data) # logger.debug(data)
@ -238,11 +248,14 @@ class StoriesOnlineNetAdapter(BaseSiteAdapter):
authfrom = soup.find('footer') authfrom = soup.find('footer')
alist = authfrom.find_all('a', {'rel' : 'author'}) alist = authfrom.find_all('a', {'rel' : 'author'})
if (alist):
for a in alist: for a in alist:
self.story.addToList('authorId',a['href'].split('/')[2]) self.story.addToList('authorId',a['href'].split('/')[2])
self.story.addToList('authorUrl','https://'+self.host+a['href']) self.story.addToList('authorUrl','https://'+self.host+a['href'])
## both 's Page and s Page ## both 's Page and s Page
self.story.addToList('author',re.sub(r".s Page$","",stripHTML(a))) self.story.addToList('author',re.sub(r".s Page$","",stripHTML(a)))
else:
logger.info("AuthorList empty. Contest entry?")
# Find the chapters: # Find the chapters:
# If multiple chapters, they are in "index-list" div. # If multiple chapters, they are in "index-list" div.
@ -261,7 +274,10 @@ class StoriesOnlineNetAdapter(BaseSiteAdapter):
# The rest of the metadata is within the article tag. # The rest of the metadata is within the article tag.
soup = soup.find('article') soup = soup.find('article')
if (self.story.getList('authorUrl')):
self.getStoryMetadataFromAuthorPage() self.getStoryMetadataFromAuthorPage()
else:
logger.info("No authorurl found, could be contest story...")
# Some books have a cover in the index page. # Some books have a cover in the index page.
# Samples are: # Samples are: