Merge pull request #171 from davidfor/master

Storiesonline and Literotica updates
2026-01-05 15:44:14 +01:00 · 2017-04-06 10:46:30 -05:00 · 2017-04-06 10:46:30 -05:00 · cd0178030c
commit cd0178030c
parent a7d5f35565 1d99fc11d7
5 changed files with 42 additions and 11 deletions
--- a/calibre-plugin/plugin-defaults.ini
+++ b/calibre-plugin/plugin-defaults.ini
@ -1475,6 +1475,10 @@ extra_titlepage_entries:eroticatags,averrating
 ## Extract more erotica_tags from the meta tag of each chapter
 use_meta_keywords: true

+## Chapters can be in different categories. Default to not using all
+## to be consistent with previous version.
+chapter_categories_use_all: false
+
 ## For multiple chapter stories, attempt to clean up the chapter title. This will
 ## remove the story title and change "Ch. 01" to "Chapter 1", "Pt. 01" to "Part 1"
 ## or just use the text. If this can't be done, the full title is used.
--- a/fanficfare/adapters/adapter_literotica.py
+++ b/fanficfare/adapters/adapter_literotica.py
@ -227,6 +227,8 @@ class LiteroticaSiteAdapter(BaseSiteAdapter):
                description = "%d. %s" % (len(descriptions)+1,stripHTML(chapterTr.findAll("td")[1]))
                description = stripHTML(chapterTr.findAll("td")[1])
                chapterLink = chapterTr.find("td", "fc").find("a")
+                if self.getConfig('chapter_categories_use_all'):
+                    self.story.addToList('category', chapterTr.findAll("td")[2].text)
                self.story.addToList('eroticatags', chapterTr.findAll("td")[2].text)
                pub_date = makeDate(chapterTr.findAll('td')[-1].text, self.dateformat)
                dates.append(pub_date)
@ -305,7 +307,8 @@ class LiteroticaSiteAdapter(BaseSiteAdapter):

        self.story.setMetadata('numChapters', len(self.chapterUrls))

-        self.story.setMetadata('category', soup1.find('div', 'b-breadcrumbs').findAll('a')[1].string)
+        # Add the category from the breadcumb. This might duplicate a category already added.
+        self.story.addToList('category', soup1.find('div', 'b-breadcrumbs').findAll('a')[1].string)
        self.getCategories(soup1)
 #         self.story.setMetadata('description', soup1.find('meta', {'name': 'description'})['content'])

--- a/fanficfare/adapters/adapter_storiesonlinenet.py
+++ b/fanficfare/adapters/adapter_storiesonlinenet.py
@ -169,19 +169,20 @@ class StoriesOnlineNetAdapter(BaseSiteAdapter):
        a = soup.find('h1')
        self.story.setMetadata('title',stripHTML(a))

-        notice = soup.find('div', {'class' : 'notice'})
-        if notice:
-            self.story.setMetadata('notice',unicode(notice))
-
        # Find authorid and URL from... author url.
-        for a in soup.findAll('a', href=re.compile(r"/a/\w+")):
+        nav_section = soup.find('nav')
+        for a in nav_section.findAll('a', {'rel' : 'author'}):
            self.story.addToList('authorId',a['href'].split('/')[2])
            self.story.addToList('authorUrl','http://'+self.host+a['href'])
            self.story.addToList('author',stripHTML(a).replace("'s Page",""))

+        # The rest of the metadata is within the article tag.
+        soup = soup.find('article')
+
        # Find the chapters:
        chapters = soup.findAll('a', href=re.compile(r'^/s/'+self.story.getMetadata('storyId')+":\d+(/.*)?$"))
        if len(chapters) != 0:
+            logger.debug("Number of chapters: {0}".format(len(chapters)))
            for chapter in chapters:
                # just in case there's tags, like <i> in chapter titles.
                self.chapterUrls.append((stripHTML(chapter),'http://'+self.host+chapter['href']))
@ -192,18 +193,17 @@ class StoriesOnlineNetAdapter(BaseSiteAdapter):

        # surprisingly, the detailed page does not give enough details, so go to author's page
        page=0
-        i=0
-        while i == 0:
+        story_found = False
+        while not story_found:
+            page = page + 1
            data = self._fetchUrl(self.story.getList('authorUrl')[0]+"/"+unicode(page))
            asoup = self.make_soup(data)

            a = asoup.findAll('td', {'class' : 'lc2'})
            for lc2 in a:
                if lc2.find('a', href=re.compile(r'^/s/'+self.story.getMetadata('storyId'))):
-                    i=1
+                    story_found = True
                    break
-                if a[len(a)-1] == lc2:
-                    page=page+1

        for cat in lc2.findAll('div', {'class' : 'typediv'}):
            self.story.addToList('genre',cat.text)
@ -352,6 +352,24 @@ class StoriesOnlineNetAdapter(BaseSiteAdapter):
        else:
            self.story.setMetadata('status', 'Completed')

+        # Remove all the metadata elements to leave and preamble text. This is usually 
+        # a notice or a forward.
+        if len(self.chapterUrls) > 1:
+            header = soup.find('header')
+            header.extract()
+        else:
+            soup = soup.find('header')
+        # Remove some tags based on their class or id
+        elements_to_remove = ['#det-link', '#s-details', '#index-list', '#s-title', '#s-auth', '.copy']
+        if not self.getConfig('include_images'):
+            elements_to_remove.append('img')
+        for element_name in elements_to_remove:
+            elements = soup.select(element_name)
+            for element in elements:
+                element.extract()
+        if len(soup.contents ) > 0 and (len(soup.text.strip()) > 0 or len(soup.find_all('img')) > 0):
+            self.story.setMetadata('notice', self.utf8FromSoup(url, soup))
+
    # grab the text for an individual chapter.
    def getChapterText(self, url):

--- a/fanficfare/configurable.py
+++ b/fanficfare/configurable.py
@ -209,6 +209,7 @@ def get_valid_set_options():
               'romancecat_to_characters_ships':(['tthfanfic.org'],None,boollist),

               'use_meta_keywords':(['literotica.com'],None,boollist),
+               'chapter_categories_use_all':(['literotica.com'],None,boollist),
               'clean_chapter_titles':(['literotica.com'],None,boollist),
               'description_in_chapter':(['literotica.com'],None,boollist),

@ -389,6 +390,7 @@ def get_valid_keywords():
                 'pairingcat_to_characters_ships',
                 'romancecat_to_characters_ships',
                 'use_meta_keywords',
+                 'chapter_categories_use_all',
                 'clean_chapter_titles',
                 'description_in_chapter',
                 'inject_chapter_title',
--- a/fanficfare/defaults.ini
+++ b/fanficfare/defaults.ini
@ -1497,6 +1497,10 @@ extra_titlepage_entries:eroticatags,averrating
 ## Extract more erotica_tags from the meta tag of each chapter
 use_meta_keywords: true

+## Chapters can be in different categories. Default to not using all
+## to be consistent with previous version.
+chapter_categories_use_all: false
+
 ## For multiple chapter stories, attempt to clean up the chapter title. This will
 ## remove the story title and change "Ch. 01" to "Chapter 1", "Pt. 01" to "Part 1"
 ## or just use the text. If this can't be done, the full title is used.