From a58d6cc5302092da86d29c3f0cf3970311192764 Mon Sep 17 00:00:00 2001
From: David <davidfor@internode.on.net>
Date: Fri, 7 Apr 2017 00:03:58 +1000
Subject: [PATCH 1/2] For storiesonline, read text from index page

Extend the text put into the notice to all the text on the index page.
This can be used for a preface and might include a cover..
---
 .../adapters/adapter_storiesonlinenet.py      | 38 ++++++++++++++-----
 1 file changed, 28 insertions(+), 10 deletions(-)
diff --git a/fanficfare/adapters/adapter_storiesonlinenet.py b/fanficfare/adapters/adapter_storiesonlinenet.py
index bc17aaee..7b83acff 100644
--- a/fanficfare/adapters/adapter_storiesonlinenet.py
+++ b/fanficfare/adapters/adapter_storiesonlinenet.py
@@ -169,19 +169,20 @@ class StoriesOnlineNetAdapter(BaseSiteAdapter):
         a = soup.find('h1')
         self.story.setMetadata('title',stripHTML(a))
 
-        notice = soup.find('div', {'class' : 'notice'})
-        if notice:
-            self.story.setMetadata('notice',unicode(notice))
-
         # Find authorid and URL from... author url.
-        for a in soup.findAll('a', href=re.compile(r"/a/\w+")):
+        nav_section = soup.find('nav')
+        for a in nav_section.findAll('a', {'rel' : 'author'}):
             self.story.addToList('authorId',a['href'].split('/')[2])
             self.story.addToList('authorUrl','http://'+self.host+a['href'])
             self.story.addToList('author',stripHTML(a).replace("'s Page",""))
 
+        # The rest of the metadata is within the article tag.
+        soup = soup.find('article')
+
         # Find the chapters:
         chapters = soup.findAll('a', href=re.compile(r'^/s/'+self.story.getMetadata('storyId')+":\d+(/.*)?$"))
         if len(chapters) != 0:
+            logger.debug("Number of chapters: {0}".format(len(chapters)))
             for chapter in chapters:
                 # just in case there's tags, like <i> in chapter titles.
                 self.chapterUrls.append((stripHTML(chapter),'http://'+self.host+chapter['href']))
@@ -192,18 +193,17 @@ class StoriesOnlineNetAdapter(BaseSiteAdapter):
 
         # surprisingly, the detailed page does not give enough details, so go to author's page
         page=0
-        i=0
-        while i == 0:
+        story_found = False
+        while not story_found:
+            page = page + 1
             data = self._fetchUrl(self.story.getList('authorUrl')[0]+"/"+unicode(page))
             asoup = self.make_soup(data)
 
             a = asoup.findAll('td', {'class' : 'lc2'})
             for lc2 in a:
                 if lc2.find('a', href=re.compile(r'^/s/'+self.story.getMetadata('storyId'))):
-                    i=1
+                    story_found = True
                     break
-                if a[len(a)-1] == lc2:
-                    page=page+1
 
         for cat in lc2.findAll('div', {'class' : 'typediv'}):
             self.story.addToList('genre',cat.text)
@@ -352,6 +352,24 @@ class StoriesOnlineNetAdapter(BaseSiteAdapter):
         else:
             self.story.setMetadata('status', 'Completed')
 
+        # Remove all the metadata elements to leave and preamble text. This is usually 
+        # a notice or a forward.
+        if len(self.chapterUrls) > 1:
+            header = soup.find('header')
+            header.extract()
+        else:
+            soup = soup.find('header')
+        # Remove some tags based on their class or id
+        elements_to_remove = ['#det-link', '#s-details', '#index-list', '#s-title', '#s-auth', '.copy']
+        if not self.getConfig('include_images'):
+            elements_to_remove.append('img')
+        for element_name in elements_to_remove:
+            elements = soup.select(element_name)
+            for element in elements:
+                element.extract()
+        if len(soup.contents ) > 0 and (len(soup.text.strip()) > 0 or len(soup.find_all('img')) > 0):
+            self.story.setMetadata('notice', self.utf8FromSoup(url, soup))
+
     # grab the text for an individual chapter.
     def getChapterText(self, url):
 

From 1d99fc11d798a3ab8f12320d3e8a0c5260f9b84d Mon Sep 17 00:00:00 2001
From: David <davidfor@internode.on.net>
Date: Fri, 7 Apr 2017 00:06:14 +1000
Subject: [PATCH 2/2] Add option to use all chapter categories in Literotica
 stories

Currently only the first category for a multiple chapter story is used.
Option added to use all, but set to default for backwards compatability.
---
 calibre-plugin/plugin-defaults.ini        | 4 ++++
 fanficfare/adapters/adapter_literotica.py | 5 ++++-
 fanficfare/configurable.py                | 2 ++
 fanficfare/defaults.ini                   | 4 ++++
 4 files changed, 14 insertions(+), 1 deletion(-)

diff --git a/calibre-plugin/plugin-defaults.ini b/calibre-plugin/plugin-defaults.ini
index 17247d92..bafed271 100644
--- a/calibre-plugin/plugin-defaults.ini
+++ b/calibre-plugin/plugin-defaults.ini
@@ -1475,6 +1475,10 @@ extra_titlepage_entries:eroticatags,averrating
 ## Extract more erotica_tags from the meta tag of each chapter
 use_meta_keywords: true
 
+## Chapters can be in different categories. Default to not using all
+## to be consistent with previous version.
+chapter_categories_use_all: false
+
 ## For multiple chapter stories, attempt to clean up the chapter title. This will
 ## remove the story title and change "Ch. 01" to "Chapter 1", "Pt. 01" to "Part 1"
 ## or just use the text. If this can't be done, the full title is used.
diff --git a/fanficfare/adapters/adapter_literotica.py b/fanficfare/adapters/adapter_literotica.py
index 135363ab..eaca5d43 100644
--- a/fanficfare/adapters/adapter_literotica.py
+++ b/fanficfare/adapters/adapter_literotica.py
@@ -227,6 +227,8 @@ class LiteroticaSiteAdapter(BaseSiteAdapter):
                 description = "%d. %s" % (len(descriptions)+1,stripHTML(chapterTr.findAll("td")[1]))
                 description = stripHTML(chapterTr.findAll("td")[1])
                 chapterLink = chapterTr.find("td", "fc").find("a")
+                if self.getConfig('chapter_categories_use_all'):
+                    self.story.addToList('category', chapterTr.findAll("td")[2].text)
                 self.story.addToList('eroticatags', chapterTr.findAll("td")[2].text)
                 pub_date = makeDate(chapterTr.findAll('td')[-1].text, self.dateformat)
                 dates.append(pub_date)
@@ -305,7 +307,8 @@ class LiteroticaSiteAdapter(BaseSiteAdapter):
 
         self.story.setMetadata('numChapters', len(self.chapterUrls))
 
-        self.story.setMetadata('category', soup1.find('div', 'b-breadcrumbs').findAll('a')[1].string)
+        # Add the category from the breadcumb. This might duplicate a category already added.
+        self.story.addToList('category', soup1.find('div', 'b-breadcrumbs').findAll('a')[1].string)
         self.getCategories(soup1)
 #         self.story.setMetadata('description', soup1.find('meta', {'name': 'description'})['content'])
 
diff --git a/fanficfare/configurable.py b/fanficfare/configurable.py
index f9eda195..dc45709e 100644
--- a/fanficfare/configurable.py
+++ b/fanficfare/configurable.py
@@ -209,6 +209,7 @@ def get_valid_set_options():
                'romancecat_to_characters_ships':(['tthfanfic.org'],None,boollist),
 
                'use_meta_keywords':(['literotica.com'],None,boollist),
+               'chapter_categories_use_all':(['literotica.com'],None,boollist),
                'clean_chapter_titles':(['literotica.com'],None,boollist),
                'description_in_chapter':(['literotica.com'],None,boollist),
 
@@ -389,6 +390,7 @@ def get_valid_keywords():
                  'pairingcat_to_characters_ships',
                  'romancecat_to_characters_ships',
                  'use_meta_keywords',
+                 'chapter_categories_use_all',
                  'clean_chapter_titles',
                  'description_in_chapter',
                  'inject_chapter_title',
diff --git a/fanficfare/defaults.ini b/fanficfare/defaults.ini
index fbc29e1e..1c43e3a9 100644
--- a/fanficfare/defaults.ini
+++ b/fanficfare/defaults.ini
@@ -1497,6 +1497,10 @@ extra_titlepage_entries:eroticatags,averrating
 ## Extract more erotica_tags from the meta tag of each chapter
 use_meta_keywords: true
 
+## Chapters can be in different categories. Default to not using all
+## to be consistent with previous version.
+chapter_categories_use_all: false
+
 ## For multiple chapter stories, attempt to clean up the chapter title. This will
 ## remove the story title and change "Ch. 01" to "Chapter 1", "Pt. 01" to "Part 1"
 ## or just use the text. If this can't be done, the full title is used.