Merge remote-tracking branch 'refs/remotes/JimmXinu/master'

2026-05-06 03:20:24 +02:00 · 2017-06-13 22:12:54 -06:00 · 2017-06-13 22:12:54 -06:00 · 362d3e7959
commit 362d3e7959
parent 3acb60251f 49fb2552e1
12 changed files with 80 additions and 33 deletions
--- a/calibre-plugin/init.py
+++ b/calibre-plugin/init.py
@ -33,7 +33,7 @@ except NameError:
 from calibre.customize import InterfaceActionBase

 # pulled out from FanFicFareBase for saving in prefs.py
-__version__ = (2, 12, 9)
+__version__ = (2, 12, 11)

 ## Apparently the name for this class doesn't matter--it was still
 ## 'demo' for the first few versions.
--- a/calibre-plugin/plugin-defaults.ini
+++ b/calibre-plugin/plugin-defaults.ini
@ -576,8 +576,8 @@ always_use_forumtags:false
 ## you to leave out categories you don't want.  Skipping categories
 ## will also speed downloads as categories other than 'Threadmarks'
 ## don't use Reader Mode.
-## The current list of categories is:
-## Threadmarks,Sidestory,Apocrypha,Images,Informational,Staff Post
+## The last known list of categories is:
+## Threadmarks,Sidestory,Apocrypha,Media,Informational,Staff Post
 #skip_threadmarks_categories:Staff Post

 ## I'm not a fan of using the word 'Apocrypha' for the threadmark
@ -2942,6 +2942,14 @@ translator_label: Translator
 editor_label: Editor
 extra_titlepage_entries: translator, editor

+## webnovel.com is, as a whole, utf-8.  There are even utf-8 encoded
+## Chinese characters in the HTML *comments*.  However, I've seen at
+## least one story that still managed to have Simplified Chinese
+## encoding in it.  But none of the SC encodings I tried worked; I
+## suspect because of the utf-8 comments in the HTML.
+#chardet_confidence_limit:0.9
+#website_encodings:auto,utf8,Windows-1252
+
 [www.whofic.com]
 website_encodings:Windows-1252,utf8

--- a/calibre-plugin/translations/et.po
+++ b/calibre-plugin/translations/et.po
@ -7,8 +7,8 @@ msgid ""
 msgstr ""
 "Project-Id-Version: calibre-plugins\n"
 "POT-Creation-Date: 2017-05-09 11:44+Central Daylight Time\n"
-"PO-Revision-Date: 2017-05-10 02:26+0000\n"
-"Last-Translator: Kovid Goyal <kovid@kovidgoyal.net>\n"
+"PO-Revision-Date: 2017-06-12 15:10+0000\n"
+"Last-Translator: Maidur\n"
 "Language-Team: Estonian (http://www.transifex.com/calibre/calibre-plugins/language/et/)\n"
 "MIME-Version: 1.0\n"
 "Content-Type: text/plain; charset=UTF-8\n"
@ -258,13 +258,13 @@ msgstr "Hoiata, kui uuendamine muudab olemasoleva raamatu URLi.\nSaidi fanfictio

 #: config.py:496
 msgid "Search inside ebooks for Story URL?"
-msgstr ""
+msgstr "Otsi e-raamatutest jutu URLi?"

 #: config.py:497
 msgid ""
 "Look for first valid story URL inside EPUB, ZIP(HTML) or TXT ebook formats if not found in metadata.\n"
 "Somewhat risky, could find wrong URL depending on ebook content."
-msgstr ""
+msgstr "Otsi formaadis EPUB, ZIP(HTML) või TXT e-raamatutest esimest korrektset jutu URLi, kui seda metaandmetest ei leitud.\nMingil määral riskantne, sõltuvalt e-raamatu sisust võib leida vale URLi."

 #: config.py:501
 msgid "Post Processing Options"
--- a/fanficfare/adapters/adapter_fanfiktionde.py
+++ b/fanficfare/adapters/adapter_fanfiktionde.py
@ -48,7 +48,7 @@ class FanFiktionDeAdapter(BaseSiteAdapter):


        # normalized story URL.
-        self._setURL('http://' + self.getSiteDomain() + '/s/'+self.story.getMetadata('storyId') + '/1')
+        self._setURL('https://' + self.getSiteDomain() + '/s/'+self.story.getMetadata('storyId') + '/1')

        # Each adapter needs to have a unique site abbreviation.
        self.story.setMetadata('siteabbrev','ffde')
@ -64,10 +64,10 @@ class FanFiktionDeAdapter(BaseSiteAdapter):

    @classmethod
    def getSiteExampleURLs(cls):
-        return "http://"+cls.getSiteDomain()+"/s/46ccbef30000616306614050 http://"+cls.getSiteDomain()+"/s/46ccbef30000616306614050/1 http://"+cls.getSiteDomain()+"/s/46ccbef30000616306614050/1/story-name"
+        return "https://"+cls.getSiteDomain()+"/s/46ccbef30000616306614050 https://"+cls.getSiteDomain()+"/s/46ccbef30000616306614050/1 https://"+cls.getSiteDomain()+"/s/46ccbef30000616306614050/1/story-name"

    def getSiteURLPattern(self):
-        return re.escape("http://"+self.getSiteDomain()+"/s/")+r"\w+(/\d+)?"
+        return r"https?"+re.escape("://"+self.getSiteDomain()+"/s/")+r"\w+(/\d+)?"

    def use_pagecache(self):
        '''
@ -146,12 +146,12 @@ class FanFiktionDeAdapter(BaseSiteAdapter):
        head = soup.find('div', {'class' : 'story-left'})
        a = head.find('a')
        self.story.setMetadata('authorId',a['href'].split('/')[2])
-        self.story.setMetadata('authorUrl','http://'+self.host+'/'+a['href'])
+        self.story.setMetadata('authorUrl','https://'+self.host+'/'+a['href'])
        self.story.setMetadata('author',stripHTML(a))

        # Find the chapters:
        for chapter in soup.find('select').findAll('option'):
-            self.chapterUrls.append((stripHTML(chapter),'http://'+self.host+'/s/'+self.story.getMetadata('storyId')+'/'+chapter['value']))
+            self.chapterUrls.append((stripHTML(chapter),'https://'+self.host+'/s/'+self.story.getMetadata('storyId')+'/'+chapter['value']))

        self.story.setMetadata('numChapters',len(self.chapterUrls))
        self.story.setMetadata('language','German')
@ -169,7 +169,7 @@ class FanFiktionDeAdapter(BaseSiteAdapter):
            self.story.setMetadata('status', 'In-Progress')

        #find metadata on the author's page
-        asoup = self.make_soup(self._fetchUrl("http://"+self.getSiteDomain()+"?a=q&a1=v&t=nickdetailsstories&lbi=stories&ar=0&nick="+self.story.getMetadata('authorId')))
+        asoup = self.make_soup(self._fetchUrl("https://"+self.getSiteDomain()+"?a=q&a1=v&t=nickdetailsstories&lbi=stories&ar=0&nick="+self.story.getMetadata('authorId')))
        tr=asoup.findAll('tr')
        for i in range(1,len(tr)):
            a = tr[i].find('a')
--- a/fanficfare/adapters/adapter_storiesonlinenet.py
+++ b/fanficfare/adapters/adapter_storiesonlinenet.py
@ -196,7 +196,11 @@ class StoriesOnlineNetAdapter(BaseSiteAdapter):
        story_found = False
        while not story_found:
            page = page + 1
-            data = self._fetchUrl(self.story.getList('authorUrl')[0]+"/"+unicode(page))
+            try:
+                data = self._fetchUrl(self.story.getList('authorUrl')[0]+"/"+unicode(page))
+            except urllib2.HTTPError, e:
+                if e.code == 404:
+                    raise exceptions.FailedToDownload("Story not found in Author's list--change Listings Theme back to Classic")
            asoup = self.make_soup(data)

            a = asoup.findAll('td', {'class' : 'lc2'})
--- a/fanficfare/adapters/adapter_webnovelcom.py
+++ b/fanficfare/adapters/adapter_webnovelcom.py
@ -21,6 +21,7 @@
 import logging
 import re
 import urllib2
+import json
 from datetime import datetime, timedelta

 from base_adapter import BaseSiteAdapter
@ -43,7 +44,17 @@ def _parse_relative_date_string(string_):
        'minute(s)': 'minutes',
        'hour(s)': 'hours',
        'day(s)': 'days',
-        'week(s)': 'weeks'
+        'week(s)': 'weeks',
+        'seconds': 'seconds',
+        'minutes': 'minutes',
+        'hours': 'hours',
+        'days': 'days',
+        'weeks': 'weeks',
+        'second': 'seconds',
+        'minute': 'minutes',
+        'hour': 'hours',
+        'day': 'days',
+        'week': 'weeks',
    }

    value, unit_string, rest = string_.split()
@ -123,8 +134,11 @@ class WWWWebNovelComAdapter(BaseSiteAdapter):
        bookdetails = soup.find('div', {'class': 'g_col_8'})

        # Title
-        a = bookdetails.find('h2', {'class': 'lh1d2'})
-        self.story.setMetadata('title', stripHTML(a))
+        title = bookdetails.find('h2', {'class': 'lh1d2'})
+        # done as a loop incase there isn't one, or more than one.
+        for tag in title.find_all('small'):
+            tag.extract()
+        self.story.setMetadata('title', stripHTML(title))

        # Find authorid and URL from... author url.
        paras = bookdetails.find_all('p')
@ -144,12 +158,18 @@ class WWWWebNovelComAdapter(BaseSiteAdapter):
        category = stripHTML(paras[0].strong).strip()
        self.story.setMetadata('category', category)

-        # Getting the ChapterUrls
-        chaps = soup.find('div', {'id': 'contentsModal'}).find_all('a')
-        for chap in chaps:
-            # capitalize to change leading 'chapter' to 'Chapter'.
-            chap_title = stripHTML(chap).capitalize()
-            chap_Url = 'https:' + chap['href']
+        ## get _csrfToken cookie for chapter list fetch
+        csrfToken = None
+        for cookie in self.get_configuration().get_cookiejar():
+            if cookie.name == '_csrfToken':
+                csrfToken = cookie.value
+                break
+
+        ## get chapters from a json API url.
+        jsondata = json.loads(self._fetchUrl("https://"+self.getSiteDomain()+"/apiajax/chapter/GetChapterList?_csrfToken="+csrfToken+"&bookId="+self.story.getMetadata('storyId')))
+        for chap in jsondata["data"]["chapterItems"]:
+            chap_title = 'Chapter ' + unicode(chap['chapterIndex']) + ' - ' + chap['chapterName']
+            chap_Url = url + '/' + chap['chapterId']
            self.chapterUrls.append((chap_title, chap_Url))

        self.story.setMetadata('numChapters', len(self.chapterUrls))
@ -182,7 +202,7 @@ class WWWWebNovelComAdapter(BaseSiteAdapter):
        if story is None:
            raise exceptions.FailedToDownload("Error downloading Chapter: %s!  Missing required element!" % url)

-        for tag in story.find_all('form'):
+        for tag in story.find_all('form') + story.find_all('div',{'class':'cha-bts'}):
            tag.extract()

        return self.utf8FromSoup(url, story)
--- a/fanficfare/adapters/base_xenforoforum_adapter.py
+++ b/fanficfare/adapters/base_xenforoforum_adapter.py
@ -34,6 +34,7 @@ class BaseXenForoForumAdapter(BaseSiteAdapter):
        # save for reader processing.
        self.reader = False
        self.post_cache = {}
+        self.threadmarks_for_reader = {}

        #logger.info("init url: "+url)
        BaseSiteAdapter.__init__(self, config, url)
@ -235,10 +236,12 @@ class BaseXenForoForumAdapter(BaseSiteAdapter):
            # try threadmarks if no '#' in , require at least 2.
            navdiv = souptag.find('div',{'class':'pageNavLinkGroup'}) # first navdiv only.
            threadmarksas = navdiv.find_all('a',{'class':'threadmarksTrigger'})
+
            ## Loop on threadmark categories.
            threadmark_chapters=[]
            for threadmarksa in threadmarksas:
                soupmarks = self.make_soup(self._fetchUrl(self.getURLPrefix()+'/'+threadmarksa['href']))
+                tmcat_num = threadmarksa['href'].split('category_id=')[1]
                ## prepend threadmark category name if not 'Threadmarks'
                prepend = ""
                tmcat_name = stripHTML(threadmarksa)
@ -268,7 +271,8 @@ class BaseXenForoForumAdapter(BaseSiteAdapter):
                if self.getConfig('always_include_first_post'):
                    threadmark_chapters.append((first_post_title,useurl))

-                for (atag,url,name) in [ (x,x['href'],stripHTML(x)) for x in markas ]:
+                for (tmcat_index,atag,url,name) in [ (i,x,x['href'],stripHTML(x)) for i,x in enumerate(markas) ]:
+                    self.threadmarks_for_reader[self.normalize_chapterurl(url)] = (tmcat_num,tmcat_index)
                    date = self.make_date(atag.find_next_sibling('div',{'class':'extra'}))
                    if not self.story.getMetadataRaw('datePublished') or date < self.story.getMetadataRaw('datePublished'):
                        self.story.setMetadata('datePublished', date)
@ -409,9 +413,10 @@ class BaseXenForoForumAdapter(BaseSiteAdapter):
                souptag = self.get_cache_post(url)

                if not souptag:
-                    reader_page_num = int((index+posts_per_page)/posts_per_page) + offset
+                    (tmcat_num,tmcat_index)=self.threadmarks_for_reader[url]
+                    reader_page_num = int((tmcat_index+posts_per_page)/posts_per_page) + offset
                    logger.debug('Reader page offset:%s'%offset)
-                    reader_url=self.getURLPrefix()+'/threads/'+self.story.getMetadata('storyId')+'/reader?page='+unicode(reader_page_num)
+                    reader_url=self.getURLPrefix()+'/threads/'+self.story.getMetadata('storyId')+'/'+tmcat_num+'/reader?page='+unicode(reader_page_num)
                    logger.debug("Fetch reader URL to: %s"%reader_url)
                    data = self._fetchUrl(reader_url)
                    topsoup = self.make_soup(data)
@ -425,6 +430,8 @@ class BaseXenForoForumAdapter(BaseSiteAdapter):
                    anchorid = "post-"+url.split('/')[-2]
                    logger.debug("anchorid: %s"%anchorid)
                    souptag = topsoup.find('li',id=anchorid)
+                else:
+                    logger.debug("post found in cache")
                if souptag:
                    break

--- a/fanficfare/cli.py
+++ b/fanficfare/cli.py
@ -26,7 +26,7 @@ import pprint
 import string
 import sys

-version="2.12.9"
+version="2.12.11"

 if sys.version_info < (2, 5):
    print 'This program requires Python 2.5 or newer.'
--- a/fanficfare/defaults.ini
+++ b/fanficfare/defaults.ini
@ -618,8 +618,8 @@ always_use_forumtags:false
 ## you to leave out categories you don't want.  Skipping categories
 ## will also speed downloads as categories other than 'Threadmarks'
 ## don't use Reader Mode.
-## The current list of categories is:
-## Threadmarks,Sidestory,Apocrypha,Images,Informational,Staff Post
+## The last known list of categories is:
+## Threadmarks,Sidestory,Apocrypha,Media,Informational,Staff Post
 #skip_threadmarks_categories:Staff Post

 ## I'm not a fan of using the word 'Apocrypha' for the threadmark
@ -2958,6 +2958,14 @@ translator_label: Translator
 editor_label: Editor
 extra_titlepage_entries: translator, editor

+## webnovel.com is, as a whole, utf-8.  There are even utf-8 encoded
+## Chinese characters in the HTML *comments*.  However, I've seen at
+## least one story that still managed to have Simplified Chinese
+## encoding in it.  But none of the SC encodings I tried worked; I
+## suspect because of the utf-8 comments in the HTML.
+#chardet_confidence_limit:0.9
+#website_encodings:auto,utf8,Windows-1252
+
 [www.whofic.com]
 website_encodings:Windows-1252,utf8

--- a/fanficfare/story.py
+++ b/fanficfare/story.py
@ -66,7 +66,7 @@ try:
        scaled, nwidth, nheight = fit_image(owidth, oheight, nwidth, nheight)

        if normalize_format_name(img.format)=="gif" and GifInfo(StringIO(data),CHECK_IS_ANIMATED).frameCount > 1:
-            raise exceptions.RejectImage("Animated gifs come out purely--not going to use it.")
+            raise exceptions.RejectImage("Animated gifs come out poorly--not going to use it.")

        if scaled:
            img.size = (nwidth, nheight)
--- a/setup.py
+++ b/setup.py
@ -21,7 +21,7 @@ setup(
    name="FanFicFare",

    # Versions should comply with PEP440.
-    version="2.12.9",
+    version="2.12.11",

    description='A tool for downloading fanfiction to eBook formats',
    long_description=long_description,
--- a/webservice/app.yaml
+++ b/webservice/app.yaml
@ -1,6 +1,6 @@
 # ffd-retief-hrd fanficfare
 application: fanficfare
-version: 2-12-9
+version: 2-12-11
 runtime: python27
 api_version: 1
 threadsafe: true