From 23f93bde24541446d0dfd90f207a0cebe6a6bb01 Mon Sep 17 00:00:00 2001
From: Jim Miller <retiefjimm@gmail.com>
Date: Wed, 27 May 2020 09:54:39 -0500
Subject: [PATCH] Allow for stories without series in
 adapter_silmarillionwritersguildorg, clean up whitespace.

---
 .../adapter_silmarillionwritersguildorg.py    | 142 +++++++++---------
 1 file changed, 71 insertions(+), 71 deletions(-)

diff --git a/fanficfare/adapters/adapter_silmarillionwritersguildorg.py b/fanficfare/adapters/adapter_silmarillionwritersguildorg.py
index 91101743..990757a8 100644
--- a/fanficfare/adapters/adapter_silmarillionwritersguildorg.py
+++ b/fanficfare/adapters/adapter_silmarillionwritersguildorg.py
@@ -88,71 +88,71 @@ class SilmarillionWritersGuildOrgAdapter(BaseSiteAdapter):
         # Now go hunting for all the meta data and the chapter list.
 
         ## Title and author
-        
+
         # find story header
         a = soup.find('h6')
 
         titleLinks = a.find_all('a')
         authorLink= titleLinks[1]
-        
+
         self.story.setMetadata('authorId',authorLink['href'].split('=')[1])
         self.story.setMetadata('authorUrl','https://'+self.host+'/archive/home/'+authorLink['href'])
         self.story.setMetadata('author',authorLink.string)
 
         self.story.setMetadata('title',a.find('strong').find('a').get_text())
-        
+
         # Site does some weird stuff with pagination on series view and will only display 25 stories per page of results
         # Therefor to get accurate index for series, we fetch all sub-pages of series and parse for valid story urls and add to a list,
         # Then find first instance of current story url and use the number of loop itteration for index
-        
-        # This is pretty slow but ehh it works 
-        
+
+        # This is pretty slow but ehh it works
+
         try:
             # Find Series name from series URL.
             a = soup.find('a', href=re.compile(r"viewseries.php\?seriesid=\d+"))
-            seriesName = a.string
-            seriesUrl = 'https://'+self.host+'/archive/home/'+a['href']
-            
-            self.story.setMetadata('seriesUrl',seriesUrl)
-            
-            #logger.debug("Series Url: "+seriesUrl)
-            
-            # Get Series page and convert to soup
-            seriesPageSoup = self.make_soup(self._fetchUrl(seriesUrl))
-            # Find Series page sub-pages
-            seriesPageUrlList = []
-            for i in seriesPageSoup.findAll('a', href=re.compile("viewseries.php\?seriesid=\d+&offset=\d+$")):
-                    # Don't include url from next button, is another http request and parse + could cause more bugs!
-                    if i.string != '[Next]':
-                        seriesPageUrlList.append(i)
-            
-            #get urls from all subpages and append to list
-            seriesStoryList = []
-            for seriesPagePageUrl in seriesPageUrlList:
-                seriesPagePageSoup = self.make_soup(self._fetchUrl('https://'+self.host+'/archive/home/'+seriesPagePageUrl['href']))
-                seriesPagePageStoryList = seriesPagePageSoup.findAll('a', href=re.compile(r'^viewstory.php\?sid=\d+$'))
-                
-                for seriesPagePageStoryUrl in seriesPagePageStoryList:
-                    seriesStoryList.append(seriesPagePageStoryUrl)
+            if a:
+                seriesName = a.string
+                seriesUrl = 'https://'+self.host+'/archive/home/'+a['href']
+
+                self.story.setMetadata('seriesUrl',seriesUrl)
+
+                #logger.debug("Series Url: "+seriesUrl)
+
+                # Get Series page and convert to soup
+                seriesPageSoup = self.make_soup(self._fetchUrl(seriesUrl))
+                # Find Series page sub-pages
+                seriesPageUrlList = []
+                for i in seriesPageSoup.findAll('a', href=re.compile("viewseries.php\?seriesid=\d+&offset=\d+$")):
+                        # Don't include url from next button, is another http request and parse + could cause more bugs!
+                        if i.string != '[Next]':
+                            seriesPageUrlList.append(i)
+
+                #get urls from all subpages and append to list
+                seriesStoryList = []
+                for seriesPagePageUrl in seriesPageUrlList:
+                    seriesPagePageSoup = self.make_soup(self._fetchUrl('https://'+self.host+'/archive/home/'+seriesPagePageUrl['href']))
+                    seriesPagePageStoryList = seriesPagePageSoup.findAll('a', href=re.compile(r'^viewstory.php\?sid=\d+$'))
+
+                    for seriesPagePageStoryUrl in seriesPagePageStoryList:
+                        seriesStoryList.append(seriesPagePageStoryUrl)
+
+                # Find series index for story
+                i=1
+                for seriesStoriesUrl in seriesStoryList:
+                    if seriesStoriesUrl['href'] == ('viewstory.php?sid='+self.story.getMetadata('storyId')):
+                        self.setSeries(seriesName, i)
+                        #logger.debug("Series Name: "+ seriesName)
+                        #logger.debug("Series Index: "+i)
+                        break
+                    i+=1
 
-            # Find series index for story
-            i=1
-            for seriesStoriesUrl in seriesStoryList:
-                if seriesStoriesUrl['href'] == ('viewstory.php?sid='+self.story.getMetadata('storyId')):
-                    self.setSeries(seriesName, i)
-                    #logger.debug("Series Name: "+ seriesName)
-                    #logger.debug("Series Index: "+i)
-                    break
-                i+=1
-            
         except Exception as e:
-            raise e
-            # I find it hard to care if the series parsing fails
-            #pass
+            logger.warn("series parsing failed(%s)"%e)
+            pass
 
         # Find the chapters by regexing urls
         chapters=soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"&chapter=\d+$"))
-        
+
         if len(chapters)==1:
             self.add_chapter(self.story.getMetadata('title'),'https://'+self.host+'/archive/home/'+chapters[0]['href'])
         else:
@@ -161,9 +161,9 @@ class SilmarillionWritersGuildOrgAdapter(BaseSiteAdapter):
                 self.add_chapter(chapter,'https://'+self.host+'/archive/home/'+chapter['href'])
 
 	# find the details section for the work, will hopefully make parsing metadata a bit easier
-	
+
         workDetails = soup.find('div', {'id' : 'general'}).find('div', {'id' : 'general'})
-        
+
         # some metadata can be retrieved through regexes so will do that to try and avoid a janky mess.
 
         #get characters
@@ -171,30 +171,30 @@ class SilmarillionWritersGuildOrgAdapter(BaseSiteAdapter):
             charList = workDetails.findAll('a', href=re.compile(r'browse.php\?type=characters'+"&charid=\d+$"))
             for char in charList:
                 self.story.addToList('characters',char.string)
-                
+
         except Exception as e:
             logger.warn("character parsing failed(%s)"%e)
-            
+
         #get warnings
         try:
             warnList = workDetails.findAll('a', href=re.compile(r'browse.php\?type=class&type_id=2'+"&classid=\d+$"))
             for warn in warnList:
                 self.story.addToList('warnings', warn.string)
-                
+
         except Exception as e:
             logger.warn("warning parsing failed(%s)"%e)
-            
+
         #get genres
         try:
             genresList = workDetails.findAll('a', href=re.compile(r'browse.php\?type=class&type_id=1'+"&classid=\d+$"))
             for genre in genresList:
                 self.story.addToList('genre', genre.string)
-                
+
         except Exception as e:
-            logger.warn("genre parsing failed(%s)"%e)    
-        
+            logger.warn("genre parsing failed(%s)"%e)
+
         # no convenient way to extract remaining metadata so bodge it by finding relevant identifier string and using next element as the data source
-        
+
         #get summary by finding identifier, then itterating until next identifier is found and using data between the two as the summary
         try:
             summaryStart = workDetails.find('strong',text='Summary: ')
@@ -208,67 +208,67 @@ class SilmarillionWritersGuildOrgAdapter(BaseSiteAdapter):
         except Exception as e:
             logger.warn("summary parsing failed(%s) -- This can be caused by bad HTML in story description."%e)
 
-        
+
         #get rating
         try:
             rating = workDetails.find('strong',text='Rated:').next_sibling.string
             self.story.setMetadata('rating', rating)
         except Exception as e:
             logger.warn("rating parsing failed(%s) -- This can be caused by bad HTML in story description."%e)
-        
+
         #get completion status and correct for consistency with other adapters
         try:
             if (workDetails.find('strong',text='Completed:').next_sibling.string).lower() == "yes":
                 status="Completed"
-                
+
             else:
                 status="In-Progress"
-                
+
             self.story.setMetadata('status', status)
         except Exception as e:
             logger.warn("status parsing failed(%s) -- This can be caused by bad HTML in story description."%e)
-            
+
         #get wordcount
         try:
             wordCount = workDetails.find('strong',text='Word count:').next_sibling.string
             self.story.setMetadata('numWords', wordCount)
         except Exception as e:
             logger.warn("wordcount parsing failed(%s) -- This can be caused by bad HTML in story description."%e)
-        
+
         #get published date, this works for some reason yet doesn't without the spaces in it
         try:
             datePublished = workDetails.find('strong',text=' Published: ').next_sibling.string
             self.story.setMetadata('datePublished', makeDate(datePublished, self.dateformat))
-            
+
         except Exception as e:
             logger.warn("datePublished parsing failed(%s) -- This can be caused by bad HTML in story description."%e)
-        
+
         #get updated date
         try:
             dateUpdated = workDetails.find('strong',text='Updated:').next_sibling.string
             self.story.setMetadata('dateUpdated', makeDate(dateUpdated, self.dateformat))
-            
+
         except Exception as e:
             logger.warn("dateUpdated parsing failed(%s) -- This can be caused by bad HTML in story description."%e)
 
     # grab the text for an individual chapter.
     def getChapterText(self, url):
-        
+
         logger.debug('Getting chapter text from: %s' % url)
-        
+
         data = self._fetchUrl(url)
         soup = self.make_soup(data)
-        
+
         # No convenient way to get story without the rest of the page, so get whole page and strip unneeded sections
-        
+
         contentParent = soup.find('div', {'id' : 'maincontent'}).find('div', {'id' : 'general'})
-        
-        contentParent.find('p').decompose() # remove page header        
+
+        contentParent.find('p').decompose() # remove page header
         contentParent.find_all('div',id='general')[2].decompose() #remove page footer
         contentParent.find_all('div',id='general')[0].decompose() #remove chapter select etc.
-        
+
         contentParent.name='div'
-        
+
         #error on failure
         if None == contentParent:
             raise exceptions.FailedToDownload("Error downloading Chapter: %s!  Missing required element!" % url)