Add replace_hr & never_make_cover options, allow empty fimfiction stories.

2026-05-08 12:36:11 +02:00 · 2012-02-29 12:44:12 -06:00 · 2012-02-29 12:44:12 -06:00 · 189832a7a9
commit 189832a7a9
parent 6528b3c9a5
8 changed files with 50 additions and 18 deletions
--- a/calibre-plugin/init.py
+++ b/calibre-plugin/init.py
@ -27,7 +27,7 @@ class FanFictionDownLoaderBase(InterfaceActionBase):
    description         = 'UI plugin to download FanFiction stories from various sites.'
    supported_platforms = ['windows', 'osx', 'linux']
    author              = 'Jim Miller'
-    version             = (1, 5, 2)
+    version             = (1, 5, 3)
    minimum_calibre_version = (0, 8, 30)

    #: This field defines the GUI plugin class that contains all the code
--- a/defaults.ini
+++ b/defaults.ini
@ -147,6 +147,11 @@ extratags: FanFiction
 # (.*)Great(.*)=>\1Moderate\2
 # .*-Centered=>

+## Some readers don't show horizontal rule (<hr />) tags correctly.
+## This replaces them all with a centered '* * *'.  (Note centering
+## doesn't work on some devices either.)
+#replace_hr: false
+
 ## Each output format has a section that overrides [defaults]
 [html]

@ -244,6 +249,10 @@ output_css:
 ## in chapters.
 #make_firstimage_cover: false

+## If set, the epub will never have a cover, even include_images is on
+## and the site has specific cover images.
+#never_make_cover: false
+
 ## If set, and there isn't already a cover image from the adapter or
 ## from make_firstimage_cover, this image will be made the cover.
 ## It can be either a 'file:' or 'http:' url.
--- a/fanficdownloader/adapters/adapter_fimfictionnet.py
+++ b/fanficdownloader/adapters/adapter_fimfictionnet.py
@ -102,7 +102,7 @@ class FimFictionNetSiteAdapter(BaseSiteAdapter):
        for chapter in soup.findAll("a", {"class":"chapter_link"}):
            chapterDates.append(chapter.span.extract().text.strip("()"))
            self.chapterUrls.append((chapter.text.strip(), "http://"+self.getSiteDomain() + chapter['href']))
-        
+
        self.story.setMetadata('numChapters',len(self.chapterUrls))
        
        for character in [character_icon['title'] for character_icon in soup.findAll("a", {"class":"character_icon"})]:
@ -157,14 +157,6 @@ class FimFictionNetSiteAdapter(BaseSiteAdapter):

        now = datetime.datetime.now()
        
-        # Get the date of creation from the first chapter
-        datePublished_text = chapterDates[0]
-        day, month = datePublished_text.split()
-        day = re.sub(r"[^\d.]+", '', day)
-        datePublished = makeDate("%s%s%s"%(now.year,month,day), "%Y%b%d")
-        if datePublished > now :
-            datePublished = datePublished.replace(year=now.year-1)
-        self.story.setMetadata("datePublished", datePublished)
        dateUpdated_soup = bs.BeautifulSoup(data).find("div", {"class":"calendar"})
        dateUpdated_soup.find('span').extract()
        dateUpdated = makeDate("%s%s"%(now.year,dateUpdated_soup.text), "%Y%b%d")
@ -172,6 +164,18 @@ class FimFictionNetSiteAdapter(BaseSiteAdapter):
            dateUpdated = datePublished.replace(year=now.year-1)
        self.story.setMetadata("dateUpdated", dateUpdated)
        
+        # Get the date of creation from the first chapter
+        if len(chapterDates) > 0:
+            datePublished_text = chapterDates[0]
+            day, month = datePublished_text.split()
+            day = re.sub(r"[^\d.]+", '', day)
+            datePublished = makeDate("%s%s%s"%(now.year,month,day), "%Y%b%d")
+            if datePublished > now :
+                datePublished = datePublished.replace(year=now.year-1)
+            self.story.setMetadata("datePublished", datePublished)
+        else:
+            self.story.setMetadata("datePublished", dateUpdated)
+            
    def getChapterText(self, url):
        logging.debug('Getting chapter text from: %s' % url)
        soup = bs.BeautifulSoup(self._fetchUrl(url),selfClosingTags=('br','hr')).find('div', {'id' : 'chapter_container'})
--- a/fanficdownloader/adapters/adapter_test1.py
+++ b/fanficdownloader/adapters/adapter_test1.py
@ -186,7 +186,7 @@ br breaks<br><br>
 br breaks<br><br>
 <hr>
 horizontal rules
-<hr>
+<hr size=1 noshade>
 <p>Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.</p>
 <p>Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.</p>
 </div>
--- a/fanficdownloader/adapters/base_adapter.py
+++ b/fanficdownloader/adapters/base_adapter.py
@ -272,8 +272,9 @@ class BaseSiteAdapter(Configurable):
            self.story.setMetadata('description',stripHTML(svalue))
        #print("\n\ndescription:\n"+self.story.getMetadata('description')+"\n\n")

-    # this gives us a unicode object, not just a string containing bytes.
+    # This gives us a unicode object, not just a string containing bytes.
    # (I gave soup a unicode string, you'd think it could give it back...)
+    # Now also does a bunch of other common processing for us.
    def utf8FromSoup(self,url,soup,fetch=None):
        if not fetch:
            fetch=self._fetchUrlRaw
@ -294,9 +295,9 @@ class BaseSiteAdapter(Configurable):
            for attr in t._getAttrMap().keys():
                if attr not in acceptable_attributes:
                    del t[attr] ## strip all tag attributes except href and name
-                    
+
            # these are not acceptable strict XHTML.  But we do already have 
-	    # CSS classes of the same names defined in constants.py
+	    # CSS classes of the same names defined
            if t.name in ('u'):
                t['class']=t.name
                t.name='span'
@ -307,9 +308,16 @@ class BaseSiteAdapter(Configurable):
            if t.string != None and len(t.string.strip()) == 0 :
                t.extract()
                
+        retval = soup.__str__('utf8').decode('utf-8')
+
+        if self.getConfig('replace_hr'):
+            # replacing a self-closing tag with a container tag in the
+            # soup is more difficult than it first appears.  So cheat.
+            retval = retval.replace("<hr />","<div class='center'>*   *   *</div>")
+
        # Don't want body tags in chapter html--writers add them.
        # This is primarily for epub updates.
-        return re.sub(r"</?body>\r?\n?","",soup.__str__('utf8').decode('utf-8'))
+        return re.sub(r"</?body>\r?\n?","",retval)

 fullmon = {"January":"01", "February":"02", "March":"03", "April":"04", "May":"05",
           "June":"06","July":"07", "August":"08", "September":"09", "October":"10",
--- a/fanficdownloader/epubutils.py
+++ b/fanficdownloader/epubutils.py
@ -51,7 +51,7 @@ def get_update_data(inputio,
            # Hellmouth, which uses chapter0.html.
            if( item.getAttribute("media-type") == "application/xhtml+xml" ):
                href=relpath+item.getAttribute("href")
-                print("---- item href:%s path part: %s"%(href,get_path_part(href)))
+                #print("---- item href:%s path part: %s"%(href,get_path_part(href)))
                if re.match(r'.*/(file|chapter)\d+\.x?html',href):
                    if getsoups:
                        soup = bs.BeautifulSoup(epub.read(href).decode("utf-8"))
--- a/fanficdownloader/story.py
+++ b/fanficdownloader/story.py
@ -315,7 +315,7 @@ class Story:
                return "failedtoload"
            
            # explicit cover, make the first image.
-            if cover:
+            if cover and not configurable.getConfig('never_make_cover'):
                if len(self.imgtuples) > 0 and 'cover' in self.imgtuples[0]['newsrc']:
                    # remove existing cover, if there is one.
                    del self.imgurls[0]
@ -327,7 +327,9 @@ class Story:
            else:
                self.imgurls.append(imgurl)
                # First image, copy not link because calibre will replace with it's cover.
-                if (len(self.imgurls)==1 and configurable.getConfig('make_firstimage_cover')):
+                if len(self.imgurls)==1 and \
+                        configurable.getConfig('make_firstimage_cover') and \
+                        not configurable.getConfig('never_make_cover'):
                    newsrc = "images/cover.%s"%ext
                    self.cover=newsrc
                    self.imgtuples.append({'newsrc':newsrc,'mime':mime,'data':data})
--- a/plugin-defaults.ini
+++ b/plugin-defaults.ini
@ -122,6 +122,11 @@ extratags: FanFiction
 # (.*)Great(.*)=>\1Moderate\2
 # .*-Centered=>

+## Some readers don't show horizontal rule (<hr />) tags correctly.
+## This replaces them all with a centered '* * *'.  (Note centering
+## doesn't work on some devices either.)
+#replace_hr: false
+
 ## Each output format has a section that overrides [defaults]
 [html]

@ -216,6 +221,10 @@ output_css:
 ## in chapters.
 #make_firstimage_cover: false

+## If set, the epub will never have a cover, even include_images is on
+## and the site has specific cover images.
+#never_make_cover: false
+
 ## If set, and there isn't already a cover image from the adapter or
 ## from make_firstimage_cover, this image will be made the cover.
 ## It can be either a 'file:' or 'http:' url.