diff --git a/calibre-plugin/__init__.py b/calibre-plugin/__init__.py index e4cbb410..3f65ce89 100644 --- a/calibre-plugin/__init__.py +++ b/calibre-plugin/__init__.py @@ -33,7 +33,7 @@ except NameError: from calibre.customize import InterfaceActionBase # pulled out from FanFicFareBase for saving in prefs.py -__version__ = (2, 12, 9) +__version__ = (2, 12, 11) ## Apparently the name for this class doesn't matter--it was still ## 'demo' for the first few versions. diff --git a/calibre-plugin/plugin-defaults.ini b/calibre-plugin/plugin-defaults.ini index 7d0d40cd..a382a014 100644 --- a/calibre-plugin/plugin-defaults.ini +++ b/calibre-plugin/plugin-defaults.ini @@ -576,8 +576,8 @@ always_use_forumtags:false ## you to leave out categories you don't want. Skipping categories ## will also speed downloads as categories other than 'Threadmarks' ## don't use Reader Mode. -## The current list of categories is: -## Threadmarks,Sidestory,Apocrypha,Images,Informational,Staff Post +## The last known list of categories is: +## Threadmarks,Sidestory,Apocrypha,Media,Informational,Staff Post #skip_threadmarks_categories:Staff Post ## I'm not a fan of using the word 'Apocrypha' for the threadmark @@ -2942,6 +2942,14 @@ translator_label: Translator editor_label: Editor extra_titlepage_entries: translator, editor +## webnovel.com is, as a whole, utf-8. There are even utf-8 encoded +## Chinese characters in the HTML *comments*. However, I've seen at +## least one story that still managed to have Simplified Chinese +## encoding in it. But none of the SC encodings I tried worked; I +## suspect because of the utf-8 comments in the HTML. +#chardet_confidence_limit:0.9 +#website_encodings:auto,utf8,Windows-1252 + [www.whofic.com] website_encodings:Windows-1252,utf8 diff --git a/calibre-plugin/translations/et.po b/calibre-plugin/translations/et.po index bf9d0c5c..76bfe0d5 100644 --- a/calibre-plugin/translations/et.po +++ b/calibre-plugin/translations/et.po @@ -7,8 +7,8 @@ msgid "" msgstr "" "Project-Id-Version: calibre-plugins\n" "POT-Creation-Date: 2017-05-09 11:44+Central Daylight Time\n" -"PO-Revision-Date: 2017-05-10 02:26+0000\n" -"Last-Translator: Kovid Goyal \n" +"PO-Revision-Date: 2017-06-12 15:10+0000\n" +"Last-Translator: Maidur\n" "Language-Team: Estonian (http://www.transifex.com/calibre/calibre-plugins/language/et/)\n" "MIME-Version: 1.0\n" "Content-Type: text/plain; charset=UTF-8\n" @@ -258,13 +258,13 @@ msgstr "Hoiata, kui uuendamine muudab olemasoleva raamatu URLi.\nSaidi fanfictio #: config.py:496 msgid "Search inside ebooks for Story URL?" -msgstr "" +msgstr "Otsi e-raamatutest jutu URLi?" #: config.py:497 msgid "" "Look for first valid story URL inside EPUB, ZIP(HTML) or TXT ebook formats if not found in metadata.\n" "Somewhat risky, could find wrong URL depending on ebook content." -msgstr "" +msgstr "Otsi formaadis EPUB, ZIP(HTML) või TXT e-raamatutest esimest korrektset jutu URLi, kui seda metaandmetest ei leitud.\nMingil määral riskantne, sõltuvalt e-raamatu sisust võib leida vale URLi." #: config.py:501 msgid "Post Processing Options" diff --git a/fanficfare/adapters/adapter_fanfiktionde.py b/fanficfare/adapters/adapter_fanfiktionde.py index 97442ff4..8aa79057 100644 --- a/fanficfare/adapters/adapter_fanfiktionde.py +++ b/fanficfare/adapters/adapter_fanfiktionde.py @@ -48,7 +48,7 @@ class FanFiktionDeAdapter(BaseSiteAdapter): # normalized story URL. - self._setURL('http://' + self.getSiteDomain() + '/s/'+self.story.getMetadata('storyId') + '/1') + self._setURL('https://' + self.getSiteDomain() + '/s/'+self.story.getMetadata('storyId') + '/1') # Each adapter needs to have a unique site abbreviation. self.story.setMetadata('siteabbrev','ffde') @@ -64,10 +64,10 @@ class FanFiktionDeAdapter(BaseSiteAdapter): @classmethod def getSiteExampleURLs(cls): - return "http://"+cls.getSiteDomain()+"/s/46ccbef30000616306614050 http://"+cls.getSiteDomain()+"/s/46ccbef30000616306614050/1 http://"+cls.getSiteDomain()+"/s/46ccbef30000616306614050/1/story-name" + return "https://"+cls.getSiteDomain()+"/s/46ccbef30000616306614050 https://"+cls.getSiteDomain()+"/s/46ccbef30000616306614050/1 https://"+cls.getSiteDomain()+"/s/46ccbef30000616306614050/1/story-name" def getSiteURLPattern(self): - return re.escape("http://"+self.getSiteDomain()+"/s/")+r"\w+(/\d+)?" + return r"https?"+re.escape("://"+self.getSiteDomain()+"/s/")+r"\w+(/\d+)?" def use_pagecache(self): ''' @@ -146,12 +146,12 @@ class FanFiktionDeAdapter(BaseSiteAdapter): head = soup.find('div', {'class' : 'story-left'}) a = head.find('a') self.story.setMetadata('authorId',a['href'].split('/')[2]) - self.story.setMetadata('authorUrl','http://'+self.host+'/'+a['href']) + self.story.setMetadata('authorUrl','https://'+self.host+'/'+a['href']) self.story.setMetadata('author',stripHTML(a)) # Find the chapters: for chapter in soup.find('select').findAll('option'): - self.chapterUrls.append((stripHTML(chapter),'http://'+self.host+'/s/'+self.story.getMetadata('storyId')+'/'+chapter['value'])) + self.chapterUrls.append((stripHTML(chapter),'https://'+self.host+'/s/'+self.story.getMetadata('storyId')+'/'+chapter['value'])) self.story.setMetadata('numChapters',len(self.chapterUrls)) self.story.setMetadata('language','German') @@ -169,7 +169,7 @@ class FanFiktionDeAdapter(BaseSiteAdapter): self.story.setMetadata('status', 'In-Progress') #find metadata on the author's page - asoup = self.make_soup(self._fetchUrl("http://"+self.getSiteDomain()+"?a=q&a1=v&t=nickdetailsstories&lbi=stories&ar=0&nick="+self.story.getMetadata('authorId'))) + asoup = self.make_soup(self._fetchUrl("https://"+self.getSiteDomain()+"?a=q&a1=v&t=nickdetailsstories&lbi=stories&ar=0&nick="+self.story.getMetadata('authorId'))) tr=asoup.findAll('tr') for i in range(1,len(tr)): a = tr[i].find('a') diff --git a/fanficfare/adapters/adapter_storiesonlinenet.py b/fanficfare/adapters/adapter_storiesonlinenet.py index 7b83acff..45795eaa 100644 --- a/fanficfare/adapters/adapter_storiesonlinenet.py +++ b/fanficfare/adapters/adapter_storiesonlinenet.py @@ -196,7 +196,11 @@ class StoriesOnlineNetAdapter(BaseSiteAdapter): story_found = False while not story_found: page = page + 1 - data = self._fetchUrl(self.story.getList('authorUrl')[0]+"/"+unicode(page)) + try: + data = self._fetchUrl(self.story.getList('authorUrl')[0]+"/"+unicode(page)) + except urllib2.HTTPError, e: + if e.code == 404: + raise exceptions.FailedToDownload("Story not found in Author's list--change Listings Theme back to Classic") asoup = self.make_soup(data) a = asoup.findAll('td', {'class' : 'lc2'}) diff --git a/fanficfare/adapters/adapter_webnovelcom.py b/fanficfare/adapters/adapter_webnovelcom.py index deb53309..8c519814 100644 --- a/fanficfare/adapters/adapter_webnovelcom.py +++ b/fanficfare/adapters/adapter_webnovelcom.py @@ -21,6 +21,7 @@ import logging import re import urllib2 +import json from datetime import datetime, timedelta from base_adapter import BaseSiteAdapter @@ -43,7 +44,17 @@ def _parse_relative_date_string(string_): 'minute(s)': 'minutes', 'hour(s)': 'hours', 'day(s)': 'days', - 'week(s)': 'weeks' + 'week(s)': 'weeks', + 'seconds': 'seconds', + 'minutes': 'minutes', + 'hours': 'hours', + 'days': 'days', + 'weeks': 'weeks', + 'second': 'seconds', + 'minute': 'minutes', + 'hour': 'hours', + 'day': 'days', + 'week': 'weeks', } value, unit_string, rest = string_.split() @@ -123,8 +134,11 @@ class WWWWebNovelComAdapter(BaseSiteAdapter): bookdetails = soup.find('div', {'class': 'g_col_8'}) # Title - a = bookdetails.find('h2', {'class': 'lh1d2'}) - self.story.setMetadata('title', stripHTML(a)) + title = bookdetails.find('h2', {'class': 'lh1d2'}) + # done as a loop incase there isn't one, or more than one. + for tag in title.find_all('small'): + tag.extract() + self.story.setMetadata('title', stripHTML(title)) # Find authorid and URL from... author url. paras = bookdetails.find_all('p') @@ -144,12 +158,18 @@ class WWWWebNovelComAdapter(BaseSiteAdapter): category = stripHTML(paras[0].strong).strip() self.story.setMetadata('category', category) - # Getting the ChapterUrls - chaps = soup.find('div', {'id': 'contentsModal'}).find_all('a') - for chap in chaps: - # capitalize to change leading 'chapter' to 'Chapter'. - chap_title = stripHTML(chap).capitalize() - chap_Url = 'https:' + chap['href'] + ## get _csrfToken cookie for chapter list fetch + csrfToken = None + for cookie in self.get_configuration().get_cookiejar(): + if cookie.name == '_csrfToken': + csrfToken = cookie.value + break + + ## get chapters from a json API url. + jsondata = json.loads(self._fetchUrl("https://"+self.getSiteDomain()+"/apiajax/chapter/GetChapterList?_csrfToken="+csrfToken+"&bookId="+self.story.getMetadata('storyId'))) + for chap in jsondata["data"]["chapterItems"]: + chap_title = 'Chapter ' + unicode(chap['chapterIndex']) + ' - ' + chap['chapterName'] + chap_Url = url + '/' + chap['chapterId'] self.chapterUrls.append((chap_title, chap_Url)) self.story.setMetadata('numChapters', len(self.chapterUrls)) @@ -182,7 +202,7 @@ class WWWWebNovelComAdapter(BaseSiteAdapter): if story is None: raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url) - for tag in story.find_all('form'): + for tag in story.find_all('form') + story.find_all('div',{'class':'cha-bts'}): tag.extract() return self.utf8FromSoup(url, story) diff --git a/fanficfare/adapters/base_xenforoforum_adapter.py b/fanficfare/adapters/base_xenforoforum_adapter.py index 0dcf4c97..930f2594 100644 --- a/fanficfare/adapters/base_xenforoforum_adapter.py +++ b/fanficfare/adapters/base_xenforoforum_adapter.py @@ -34,6 +34,7 @@ class BaseXenForoForumAdapter(BaseSiteAdapter): # save for reader processing. self.reader = False self.post_cache = {} + self.threadmarks_for_reader = {} #logger.info("init url: "+url) BaseSiteAdapter.__init__(self, config, url) @@ -235,10 +236,12 @@ class BaseXenForoForumAdapter(BaseSiteAdapter): # try threadmarks if no '#' in , require at least 2. navdiv = souptag.find('div',{'class':'pageNavLinkGroup'}) # first navdiv only. threadmarksas = navdiv.find_all('a',{'class':'threadmarksTrigger'}) + ## Loop on threadmark categories. threadmark_chapters=[] for threadmarksa in threadmarksas: soupmarks = self.make_soup(self._fetchUrl(self.getURLPrefix()+'/'+threadmarksa['href'])) + tmcat_num = threadmarksa['href'].split('category_id=')[1] ## prepend threadmark category name if not 'Threadmarks' prepend = "" tmcat_name = stripHTML(threadmarksa) @@ -268,7 +271,8 @@ class BaseXenForoForumAdapter(BaseSiteAdapter): if self.getConfig('always_include_first_post'): threadmark_chapters.append((first_post_title,useurl)) - for (atag,url,name) in [ (x,x['href'],stripHTML(x)) for x in markas ]: + for (tmcat_index,atag,url,name) in [ (i,x,x['href'],stripHTML(x)) for i,x in enumerate(markas) ]: + self.threadmarks_for_reader[self.normalize_chapterurl(url)] = (tmcat_num,tmcat_index) date = self.make_date(atag.find_next_sibling('div',{'class':'extra'})) if not self.story.getMetadataRaw('datePublished') or date < self.story.getMetadataRaw('datePublished'): self.story.setMetadata('datePublished', date) @@ -409,9 +413,10 @@ class BaseXenForoForumAdapter(BaseSiteAdapter): souptag = self.get_cache_post(url) if not souptag: - reader_page_num = int((index+posts_per_page)/posts_per_page) + offset + (tmcat_num,tmcat_index)=self.threadmarks_for_reader[url] + reader_page_num = int((tmcat_index+posts_per_page)/posts_per_page) + offset logger.debug('Reader page offset:%s'%offset) - reader_url=self.getURLPrefix()+'/threads/'+self.story.getMetadata('storyId')+'/reader?page='+unicode(reader_page_num) + reader_url=self.getURLPrefix()+'/threads/'+self.story.getMetadata('storyId')+'/'+tmcat_num+'/reader?page='+unicode(reader_page_num) logger.debug("Fetch reader URL to: %s"%reader_url) data = self._fetchUrl(reader_url) topsoup = self.make_soup(data) @@ -425,6 +430,8 @@ class BaseXenForoForumAdapter(BaseSiteAdapter): anchorid = "post-"+url.split('/')[-2] logger.debug("anchorid: %s"%anchorid) souptag = topsoup.find('li',id=anchorid) + else: + logger.debug("post found in cache") if souptag: break diff --git a/fanficfare/cli.py b/fanficfare/cli.py index 12a31925..377829d8 100644 --- a/fanficfare/cli.py +++ b/fanficfare/cli.py @@ -26,7 +26,7 @@ import pprint import string import sys -version="2.12.9" +version="2.12.11" if sys.version_info < (2, 5): print 'This program requires Python 2.5 or newer.' diff --git a/fanficfare/defaults.ini b/fanficfare/defaults.ini index 1c518ad2..32995848 100644 --- a/fanficfare/defaults.ini +++ b/fanficfare/defaults.ini @@ -618,8 +618,8 @@ always_use_forumtags:false ## you to leave out categories you don't want. Skipping categories ## will also speed downloads as categories other than 'Threadmarks' ## don't use Reader Mode. -## The current list of categories is: -## Threadmarks,Sidestory,Apocrypha,Images,Informational,Staff Post +## The last known list of categories is: +## Threadmarks,Sidestory,Apocrypha,Media,Informational,Staff Post #skip_threadmarks_categories:Staff Post ## I'm not a fan of using the word 'Apocrypha' for the threadmark @@ -2958,6 +2958,14 @@ translator_label: Translator editor_label: Editor extra_titlepage_entries: translator, editor +## webnovel.com is, as a whole, utf-8. There are even utf-8 encoded +## Chinese characters in the HTML *comments*. However, I've seen at +## least one story that still managed to have Simplified Chinese +## encoding in it. But none of the SC encodings I tried worked; I +## suspect because of the utf-8 comments in the HTML. +#chardet_confidence_limit:0.9 +#website_encodings:auto,utf8,Windows-1252 + [www.whofic.com] website_encodings:Windows-1252,utf8 diff --git a/fanficfare/story.py b/fanficfare/story.py index 08d11142..a76d6a5d 100644 --- a/fanficfare/story.py +++ b/fanficfare/story.py @@ -66,7 +66,7 @@ try: scaled, nwidth, nheight = fit_image(owidth, oheight, nwidth, nheight) if normalize_format_name(img.format)=="gif" and GifInfo(StringIO(data),CHECK_IS_ANIMATED).frameCount > 1: - raise exceptions.RejectImage("Animated gifs come out purely--not going to use it.") + raise exceptions.RejectImage("Animated gifs come out poorly--not going to use it.") if scaled: img.size = (nwidth, nheight) diff --git a/setup.py b/setup.py index 6eec4220..35dbe2d8 100644 --- a/setup.py +++ b/setup.py @@ -21,7 +21,7 @@ setup( name="FanFicFare", # Versions should comply with PEP440. - version="2.12.9", + version="2.12.11", description='A tool for downloading fanfiction to eBook formats', long_description=long_description, diff --git a/webservice/app.yaml b/webservice/app.yaml index f615ee27..ca68254b 100644 --- a/webservice/app.yaml +++ b/webservice/app.yaml @@ -1,6 +1,6 @@ # ffd-retief-hrd fanficfare application: fanficfare -version: 2-12-9 +version: 2-12-11 runtime: python27 api_version: 1 threadsafe: true