From 189832a7a9a4f55574101cf745cc90ea946b9fc1 Mon Sep 17 00:00:00 2001 From: Jim Miller Date: Wed, 29 Feb 2012 12:44:12 -0600 Subject: [PATCH] Add replace_hr & never_make_cover options, allow empty fimfiction stories. --- calibre-plugin/__init__.py | 2 +- defaults.ini | 9 ++++++++ .../adapters/adapter_fimfictionnet.py | 22 +++++++++++-------- fanficdownloader/adapters/adapter_test1.py | 2 +- fanficdownloader/adapters/base_adapter.py | 16 ++++++++++---- fanficdownloader/epubutils.py | 2 +- fanficdownloader/story.py | 6 +++-- plugin-defaults.ini | 9 ++++++++ 8 files changed, 50 insertions(+), 18 deletions(-) diff --git a/calibre-plugin/__init__.py b/calibre-plugin/__init__.py index 9e1489a8..74727b9c 100644 --- a/calibre-plugin/__init__.py +++ b/calibre-plugin/__init__.py @@ -27,7 +27,7 @@ class FanFictionDownLoaderBase(InterfaceActionBase): description = 'UI plugin to download FanFiction stories from various sites.' supported_platforms = ['windows', 'osx', 'linux'] author = 'Jim Miller' - version = (1, 5, 2) + version = (1, 5, 3) minimum_calibre_version = (0, 8, 30) #: This field defines the GUI plugin class that contains all the code diff --git a/defaults.ini b/defaults.ini index 66b2a615..4d65760a 100644 --- a/defaults.ini +++ b/defaults.ini @@ -147,6 +147,11 @@ extratags: FanFiction # (.*)Great(.*)=>\1Moderate\2 # .*-Centered=> +## Some readers don't show horizontal rule (
) tags correctly. +## This replaces them all with a centered '* * *'. (Note centering +## doesn't work on some devices either.) +#replace_hr: false + ## Each output format has a section that overrides [defaults] [html] @@ -244,6 +249,10 @@ output_css: ## in chapters. #make_firstimage_cover: false +## If set, the epub will never have a cover, even include_images is on +## and the site has specific cover images. +#never_make_cover: false + ## If set, and there isn't already a cover image from the adapter or ## from make_firstimage_cover, this image will be made the cover. ## It can be either a 'file:' or 'http:' url. diff --git a/fanficdownloader/adapters/adapter_fimfictionnet.py b/fanficdownloader/adapters/adapter_fimfictionnet.py index 3adff377..207196e6 100644 --- a/fanficdownloader/adapters/adapter_fimfictionnet.py +++ b/fanficdownloader/adapters/adapter_fimfictionnet.py @@ -102,7 +102,7 @@ class FimFictionNetSiteAdapter(BaseSiteAdapter): for chapter in soup.findAll("a", {"class":"chapter_link"}): chapterDates.append(chapter.span.extract().text.strip("()")) self.chapterUrls.append((chapter.text.strip(), "http://"+self.getSiteDomain() + chapter['href'])) - + self.story.setMetadata('numChapters',len(self.chapterUrls)) for character in [character_icon['title'] for character_icon in soup.findAll("a", {"class":"character_icon"})]: @@ -157,14 +157,6 @@ class FimFictionNetSiteAdapter(BaseSiteAdapter): now = datetime.datetime.now() - # Get the date of creation from the first chapter - datePublished_text = chapterDates[0] - day, month = datePublished_text.split() - day = re.sub(r"[^\d.]+", '', day) - datePublished = makeDate("%s%s%s"%(now.year,month,day), "%Y%b%d") - if datePublished > now : - datePublished = datePublished.replace(year=now.year-1) - self.story.setMetadata("datePublished", datePublished) dateUpdated_soup = bs.BeautifulSoup(data).find("div", {"class":"calendar"}) dateUpdated_soup.find('span').extract() dateUpdated = makeDate("%s%s"%(now.year,dateUpdated_soup.text), "%Y%b%d") @@ -172,6 +164,18 @@ class FimFictionNetSiteAdapter(BaseSiteAdapter): dateUpdated = datePublished.replace(year=now.year-1) self.story.setMetadata("dateUpdated", dateUpdated) + # Get the date of creation from the first chapter + if len(chapterDates) > 0: + datePublished_text = chapterDates[0] + day, month = datePublished_text.split() + day = re.sub(r"[^\d.]+", '', day) + datePublished = makeDate("%s%s%s"%(now.year,month,day), "%Y%b%d") + if datePublished > now : + datePublished = datePublished.replace(year=now.year-1) + self.story.setMetadata("datePublished", datePublished) + else: + self.story.setMetadata("datePublished", dateUpdated) + def getChapterText(self, url): logging.debug('Getting chapter text from: %s' % url) soup = bs.BeautifulSoup(self._fetchUrl(url),selfClosingTags=('br','hr')).find('div', {'id' : 'chapter_container'}) diff --git a/fanficdownloader/adapters/adapter_test1.py b/fanficdownloader/adapters/adapter_test1.py index 13afacf2..0d5cb109 100644 --- a/fanficdownloader/adapters/adapter_test1.py +++ b/fanficdownloader/adapters/adapter_test1.py @@ -186,7 +186,7 @@ br breaks

br breaks


horizontal rules -
+

Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.

Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.

diff --git a/fanficdownloader/adapters/base_adapter.py b/fanficdownloader/adapters/base_adapter.py index 85398a7f..678a9332 100644 --- a/fanficdownloader/adapters/base_adapter.py +++ b/fanficdownloader/adapters/base_adapter.py @@ -272,8 +272,9 @@ class BaseSiteAdapter(Configurable): self.story.setMetadata('description',stripHTML(svalue)) #print("\n\ndescription:\n"+self.story.getMetadata('description')+"\n\n") - # this gives us a unicode object, not just a string containing bytes. + # This gives us a unicode object, not just a string containing bytes. # (I gave soup a unicode string, you'd think it could give it back...) + # Now also does a bunch of other common processing for us. def utf8FromSoup(self,url,soup,fetch=None): if not fetch: fetch=self._fetchUrlRaw @@ -294,9 +295,9 @@ class BaseSiteAdapter(Configurable): for attr in t._getAttrMap().keys(): if attr not in acceptable_attributes: del t[attr] ## strip all tag attributes except href and name - + # these are not acceptable strict XHTML. But we do already have - # CSS classes of the same names defined in constants.py + # CSS classes of the same names defined if t.name in ('u'): t['class']=t.name t.name='span' @@ -307,9 +308,16 @@ class BaseSiteAdapter(Configurable): if t.string != None and len(t.string.strip()) == 0 : t.extract() + retval = soup.__str__('utf8').decode('utf-8') + + if self.getConfig('replace_hr'): + # replacing a self-closing tag with a container tag in the + # soup is more difficult than it first appears. So cheat. + retval = retval.replace("
","
* * *
") + # Don't want body tags in chapter html--writers add them. # This is primarily for epub updates. - return re.sub(r"\r?\n?","",soup.__str__('utf8').decode('utf-8')) + return re.sub(r"\r?\n?","",retval) fullmon = {"January":"01", "February":"02", "March":"03", "April":"04", "May":"05", "June":"06","July":"07", "August":"08", "September":"09", "October":"10", diff --git a/fanficdownloader/epubutils.py b/fanficdownloader/epubutils.py index a7ccedae..27d5257c 100644 --- a/fanficdownloader/epubutils.py +++ b/fanficdownloader/epubutils.py @@ -51,7 +51,7 @@ def get_update_data(inputio, # Hellmouth, which uses chapter0.html. if( item.getAttribute("media-type") == "application/xhtml+xml" ): href=relpath+item.getAttribute("href") - print("---- item href:%s path part: %s"%(href,get_path_part(href))) + #print("---- item href:%s path part: %s"%(href,get_path_part(href))) if re.match(r'.*/(file|chapter)\d+\.x?html',href): if getsoups: soup = bs.BeautifulSoup(epub.read(href).decode("utf-8")) diff --git a/fanficdownloader/story.py b/fanficdownloader/story.py index 5964eb72..fd58def0 100644 --- a/fanficdownloader/story.py +++ b/fanficdownloader/story.py @@ -315,7 +315,7 @@ class Story: return "failedtoload" # explicit cover, make the first image. - if cover: + if cover and not configurable.getConfig('never_make_cover'): if len(self.imgtuples) > 0 and 'cover' in self.imgtuples[0]['newsrc']: # remove existing cover, if there is one. del self.imgurls[0] @@ -327,7 +327,9 @@ class Story: else: self.imgurls.append(imgurl) # First image, copy not link because calibre will replace with it's cover. - if (len(self.imgurls)==1 and configurable.getConfig('make_firstimage_cover')): + if len(self.imgurls)==1 and \ + configurable.getConfig('make_firstimage_cover') and \ + not configurable.getConfig('never_make_cover'): newsrc = "images/cover.%s"%ext self.cover=newsrc self.imgtuples.append({'newsrc':newsrc,'mime':mime,'data':data}) diff --git a/plugin-defaults.ini b/plugin-defaults.ini index 9eb5f6bf..16b9a1ad 100644 --- a/plugin-defaults.ini +++ b/plugin-defaults.ini @@ -122,6 +122,11 @@ extratags: FanFiction # (.*)Great(.*)=>\1Moderate\2 # .*-Centered=> +## Some readers don't show horizontal rule (
) tags correctly. +## This replaces them all with a centered '* * *'. (Note centering +## doesn't work on some devices either.) +#replace_hr: false + ## Each output format has a section that overrides [defaults] [html] @@ -216,6 +221,10 @@ output_css: ## in chapters. #make_firstimage_cover: false +## If set, the epub will never have a cover, even include_images is on +## and the site has specific cover images. +#never_make_cover: false + ## If set, and there isn't already a cover image from the adapter or ## from make_firstimage_cover, this image will be made the cover. ## It can be either a 'file:' or 'http:' url.