From a58d6cc5302092da86d29c3f0cf3970311192764 Mon Sep 17 00:00:00 2001 From: David Date: Fri, 7 Apr 2017 00:03:58 +1000 Subject: [PATCH 1/2] For storiesonline, read text from index page Extend the text put into the notice to all the text on the index page. This can be used for a preface and might include a cover.. --- .../adapters/adapter_storiesonlinenet.py | 38 ++++++++++++++----- 1 file changed, 28 insertions(+), 10 deletions(-) diff --git a/fanficfare/adapters/adapter_storiesonlinenet.py b/fanficfare/adapters/adapter_storiesonlinenet.py index bc17aaee..7b83acff 100644 --- a/fanficfare/adapters/adapter_storiesonlinenet.py +++ b/fanficfare/adapters/adapter_storiesonlinenet.py @@ -169,19 +169,20 @@ class StoriesOnlineNetAdapter(BaseSiteAdapter): a = soup.find('h1') self.story.setMetadata('title',stripHTML(a)) - notice = soup.find('div', {'class' : 'notice'}) - if notice: - self.story.setMetadata('notice',unicode(notice)) - # Find authorid and URL from... author url. - for a in soup.findAll('a', href=re.compile(r"/a/\w+")): + nav_section = soup.find('nav') + for a in nav_section.findAll('a', {'rel' : 'author'}): self.story.addToList('authorId',a['href'].split('/')[2]) self.story.addToList('authorUrl','http://'+self.host+a['href']) self.story.addToList('author',stripHTML(a).replace("'s Page","")) + # The rest of the metadata is within the article tag. + soup = soup.find('article') + # Find the chapters: chapters = soup.findAll('a', href=re.compile(r'^/s/'+self.story.getMetadata('storyId')+":\d+(/.*)?$")) if len(chapters) != 0: + logger.debug("Number of chapters: {0}".format(len(chapters))) for chapter in chapters: # just in case there's tags, like in chapter titles. self.chapterUrls.append((stripHTML(chapter),'http://'+self.host+chapter['href'])) @@ -192,18 +193,17 @@ class StoriesOnlineNetAdapter(BaseSiteAdapter): # surprisingly, the detailed page does not give enough details, so go to author's page page=0 - i=0 - while i == 0: + story_found = False + while not story_found: + page = page + 1 data = self._fetchUrl(self.story.getList('authorUrl')[0]+"/"+unicode(page)) asoup = self.make_soup(data) a = asoup.findAll('td', {'class' : 'lc2'}) for lc2 in a: if lc2.find('a', href=re.compile(r'^/s/'+self.story.getMetadata('storyId'))): - i=1 + story_found = True break - if a[len(a)-1] == lc2: - page=page+1 for cat in lc2.findAll('div', {'class' : 'typediv'}): self.story.addToList('genre',cat.text) @@ -352,6 +352,24 @@ class StoriesOnlineNetAdapter(BaseSiteAdapter): else: self.story.setMetadata('status', 'Completed') + # Remove all the metadata elements to leave and preamble text. This is usually + # a notice or a forward. + if len(self.chapterUrls) > 1: + header = soup.find('header') + header.extract() + else: + soup = soup.find('header') + # Remove some tags based on their class or id + elements_to_remove = ['#det-link', '#s-details', '#index-list', '#s-title', '#s-auth', '.copy'] + if not self.getConfig('include_images'): + elements_to_remove.append('img') + for element_name in elements_to_remove: + elements = soup.select(element_name) + for element in elements: + element.extract() + if len(soup.contents ) > 0 and (len(soup.text.strip()) > 0 or len(soup.find_all('img')) > 0): + self.story.setMetadata('notice', self.utf8FromSoup(url, soup)) + # grab the text for an individual chapter. def getChapterText(self, url): From 1d99fc11d798a3ab8f12320d3e8a0c5260f9b84d Mon Sep 17 00:00:00 2001 From: David Date: Fri, 7 Apr 2017 00:06:14 +1000 Subject: [PATCH 2/2] Add option to use all chapter categories in Literotica stories Currently only the first category for a multiple chapter story is used. Option added to use all, but set to default for backwards compatability. --- calibre-plugin/plugin-defaults.ini | 4 ++++ fanficfare/adapters/adapter_literotica.py | 5 ++++- fanficfare/configurable.py | 2 ++ fanficfare/defaults.ini | 4 ++++ 4 files changed, 14 insertions(+), 1 deletion(-) diff --git a/calibre-plugin/plugin-defaults.ini b/calibre-plugin/plugin-defaults.ini index 17247d92..bafed271 100644 --- a/calibre-plugin/plugin-defaults.ini +++ b/calibre-plugin/plugin-defaults.ini @@ -1475,6 +1475,10 @@ extra_titlepage_entries:eroticatags,averrating ## Extract more erotica_tags from the meta tag of each chapter use_meta_keywords: true +## Chapters can be in different categories. Default to not using all +## to be consistent with previous version. +chapter_categories_use_all: false + ## For multiple chapter stories, attempt to clean up the chapter title. This will ## remove the story title and change "Ch. 01" to "Chapter 1", "Pt. 01" to "Part 1" ## or just use the text. If this can't be done, the full title is used. diff --git a/fanficfare/adapters/adapter_literotica.py b/fanficfare/adapters/adapter_literotica.py index 135363ab..eaca5d43 100644 --- a/fanficfare/adapters/adapter_literotica.py +++ b/fanficfare/adapters/adapter_literotica.py @@ -227,6 +227,8 @@ class LiteroticaSiteAdapter(BaseSiteAdapter): description = "%d. %s" % (len(descriptions)+1,stripHTML(chapterTr.findAll("td")[1])) description = stripHTML(chapterTr.findAll("td")[1]) chapterLink = chapterTr.find("td", "fc").find("a") + if self.getConfig('chapter_categories_use_all'): + self.story.addToList('category', chapterTr.findAll("td")[2].text) self.story.addToList('eroticatags', chapterTr.findAll("td")[2].text) pub_date = makeDate(chapterTr.findAll('td')[-1].text, self.dateformat) dates.append(pub_date) @@ -305,7 +307,8 @@ class LiteroticaSiteAdapter(BaseSiteAdapter): self.story.setMetadata('numChapters', len(self.chapterUrls)) - self.story.setMetadata('category', soup1.find('div', 'b-breadcrumbs').findAll('a')[1].string) + # Add the category from the breadcumb. This might duplicate a category already added. + self.story.addToList('category', soup1.find('div', 'b-breadcrumbs').findAll('a')[1].string) self.getCategories(soup1) # self.story.setMetadata('description', soup1.find('meta', {'name': 'description'})['content']) diff --git a/fanficfare/configurable.py b/fanficfare/configurable.py index f9eda195..dc45709e 100644 --- a/fanficfare/configurable.py +++ b/fanficfare/configurable.py @@ -209,6 +209,7 @@ def get_valid_set_options(): 'romancecat_to_characters_ships':(['tthfanfic.org'],None,boollist), 'use_meta_keywords':(['literotica.com'],None,boollist), + 'chapter_categories_use_all':(['literotica.com'],None,boollist), 'clean_chapter_titles':(['literotica.com'],None,boollist), 'description_in_chapter':(['literotica.com'],None,boollist), @@ -389,6 +390,7 @@ def get_valid_keywords(): 'pairingcat_to_characters_ships', 'romancecat_to_characters_ships', 'use_meta_keywords', + 'chapter_categories_use_all', 'clean_chapter_titles', 'description_in_chapter', 'inject_chapter_title', diff --git a/fanficfare/defaults.ini b/fanficfare/defaults.ini index fbc29e1e..1c43e3a9 100644 --- a/fanficfare/defaults.ini +++ b/fanficfare/defaults.ini @@ -1497,6 +1497,10 @@ extra_titlepage_entries:eroticatags,averrating ## Extract more erotica_tags from the meta tag of each chapter use_meta_keywords: true +## Chapters can be in different categories. Default to not using all +## to be consistent with previous version. +chapter_categories_use_all: false + ## For multiple chapter stories, attempt to clean up the chapter title. This will ## remove the story title and change "Ch. 01" to "Chapter 1", "Pt. 01" to "Part 1" ## or just use the text. If this can't be done, the full title is used.