From 9cda10e255c67f27bcca619bc74a3cb812c42077 Mon Sep 17 00:00:00 2001 From: Jim Miller Date: Fri, 9 Jun 2017 17:53:15 -0500 Subject: [PATCH 1/9] Tweak skip_threadmarks_categories comments in INI. --- calibre-plugin/plugin-defaults.ini | 4 ++-- fanficfare/defaults.ini | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/calibre-plugin/plugin-defaults.ini b/calibre-plugin/plugin-defaults.ini index 7d0d40cd..167d067f 100644 --- a/calibre-plugin/plugin-defaults.ini +++ b/calibre-plugin/plugin-defaults.ini @@ -576,8 +576,8 @@ always_use_forumtags:false ## you to leave out categories you don't want. Skipping categories ## will also speed downloads as categories other than 'Threadmarks' ## don't use Reader Mode. -## The current list of categories is: -## Threadmarks,Sidestory,Apocrypha,Images,Informational,Staff Post +## The last known list of categories is: +## Threadmarks,Sidestory,Apocrypha,Media,Informational,Staff Post #skip_threadmarks_categories:Staff Post ## I'm not a fan of using the word 'Apocrypha' for the threadmark diff --git a/fanficfare/defaults.ini b/fanficfare/defaults.ini index 1c518ad2..1b597a7c 100644 --- a/fanficfare/defaults.ini +++ b/fanficfare/defaults.ini @@ -618,8 +618,8 @@ always_use_forumtags:false ## you to leave out categories you don't want. Skipping categories ## will also speed downloads as categories other than 'Threadmarks' ## don't use Reader Mode. -## The current list of categories is: -## Threadmarks,Sidestory,Apocrypha,Images,Informational,Staff Post +## The last known list of categories is: +## Threadmarks,Sidestory,Apocrypha,Media,Informational,Staff Post #skip_threadmarks_categories:Staff Post ## I'm not a fan of using the word 'Apocrypha' for the threadmark From 5066ac07e0456c754474fbd4abfbbf4cf5bbf3ab Mon Sep 17 00:00:00 2001 From: Jim Miller Date: Fri, 9 Jun 2017 17:54:08 -0500 Subject: [PATCH 2/9] Special error msg for storiesonline.net about Listing Theme. --- fanficfare/adapters/adapter_storiesonlinenet.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/fanficfare/adapters/adapter_storiesonlinenet.py b/fanficfare/adapters/adapter_storiesonlinenet.py index 7b83acff..45795eaa 100644 --- a/fanficfare/adapters/adapter_storiesonlinenet.py +++ b/fanficfare/adapters/adapter_storiesonlinenet.py @@ -196,7 +196,11 @@ class StoriesOnlineNetAdapter(BaseSiteAdapter): story_found = False while not story_found: page = page + 1 - data = self._fetchUrl(self.story.getList('authorUrl')[0]+"/"+unicode(page)) + try: + data = self._fetchUrl(self.story.getList('authorUrl')[0]+"/"+unicode(page)) + except urllib2.HTTPError, e: + if e.code == 404: + raise exceptions.FailedToDownload("Story not found in Author's list--change Listings Theme back to Classic") asoup = self.make_soup(data) a = asoup.findAll('td', {'class' : 'lc2'}) From 1078279c979534ffc061540a996ae4af4f5dc846 Mon Sep 17 00:00:00 2001 From: Jim Miller Date: Sat, 10 Jun 2017 10:40:55 -0500 Subject: [PATCH 3/9] Change adapter_fanfiktionde to https. --- fanficfare/adapters/adapter_fanfiktionde.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/fanficfare/adapters/adapter_fanfiktionde.py b/fanficfare/adapters/adapter_fanfiktionde.py index 97442ff4..8aa79057 100644 --- a/fanficfare/adapters/adapter_fanfiktionde.py +++ b/fanficfare/adapters/adapter_fanfiktionde.py @@ -48,7 +48,7 @@ class FanFiktionDeAdapter(BaseSiteAdapter): # normalized story URL. - self._setURL('http://' + self.getSiteDomain() + '/s/'+self.story.getMetadata('storyId') + '/1') + self._setURL('https://' + self.getSiteDomain() + '/s/'+self.story.getMetadata('storyId') + '/1') # Each adapter needs to have a unique site abbreviation. self.story.setMetadata('siteabbrev','ffde') @@ -64,10 +64,10 @@ class FanFiktionDeAdapter(BaseSiteAdapter): @classmethod def getSiteExampleURLs(cls): - return "http://"+cls.getSiteDomain()+"/s/46ccbef30000616306614050 http://"+cls.getSiteDomain()+"/s/46ccbef30000616306614050/1 http://"+cls.getSiteDomain()+"/s/46ccbef30000616306614050/1/story-name" + return "https://"+cls.getSiteDomain()+"/s/46ccbef30000616306614050 https://"+cls.getSiteDomain()+"/s/46ccbef30000616306614050/1 https://"+cls.getSiteDomain()+"/s/46ccbef30000616306614050/1/story-name" def getSiteURLPattern(self): - return re.escape("http://"+self.getSiteDomain()+"/s/")+r"\w+(/\d+)?" + return r"https?"+re.escape("://"+self.getSiteDomain()+"/s/")+r"\w+(/\d+)?" def use_pagecache(self): ''' @@ -146,12 +146,12 @@ class FanFiktionDeAdapter(BaseSiteAdapter): head = soup.find('div', {'class' : 'story-left'}) a = head.find('a') self.story.setMetadata('authorId',a['href'].split('/')[2]) - self.story.setMetadata('authorUrl','http://'+self.host+'/'+a['href']) + self.story.setMetadata('authorUrl','https://'+self.host+'/'+a['href']) self.story.setMetadata('author',stripHTML(a)) # Find the chapters: for chapter in soup.find('select').findAll('option'): - self.chapterUrls.append((stripHTML(chapter),'http://'+self.host+'/s/'+self.story.getMetadata('storyId')+'/'+chapter['value'])) + self.chapterUrls.append((stripHTML(chapter),'https://'+self.host+'/s/'+self.story.getMetadata('storyId')+'/'+chapter['value'])) self.story.setMetadata('numChapters',len(self.chapterUrls)) self.story.setMetadata('language','German') @@ -169,7 +169,7 @@ class FanFiktionDeAdapter(BaseSiteAdapter): self.story.setMetadata('status', 'In-Progress') #find metadata on the author's page - asoup = self.make_soup(self._fetchUrl("http://"+self.getSiteDomain()+"?a=q&a1=v&t=nickdetailsstories&lbi=stories&ar=0&nick="+self.story.getMetadata('authorId'))) + asoup = self.make_soup(self._fetchUrl("https://"+self.getSiteDomain()+"?a=q&a1=v&t=nickdetailsstories&lbi=stories&ar=0&nick="+self.story.getMetadata('authorId'))) tr=asoup.findAll('tr') for i in range(1,len(tr)): a = tr[i].find('a') From 84dc04bb151abb729e3fc82a2b312ca3a821a2fa Mon Sep 17 00:00:00 2001 From: Jim Miller Date: Sun, 11 Jun 2017 13:20:01 -0500 Subject: [PATCH 4/9] Extend base_xenforoforum_adapter Reader Mode to other Threadmark Categories. --- fanficfare/adapters/base_xenforoforum_adapter.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/fanficfare/adapters/base_xenforoforum_adapter.py b/fanficfare/adapters/base_xenforoforum_adapter.py index 0dcf4c97..930f2594 100644 --- a/fanficfare/adapters/base_xenforoforum_adapter.py +++ b/fanficfare/adapters/base_xenforoforum_adapter.py @@ -34,6 +34,7 @@ class BaseXenForoForumAdapter(BaseSiteAdapter): # save for reader processing. self.reader = False self.post_cache = {} + self.threadmarks_for_reader = {} #logger.info("init url: "+url) BaseSiteAdapter.__init__(self, config, url) @@ -235,10 +236,12 @@ class BaseXenForoForumAdapter(BaseSiteAdapter): # try threadmarks if no '#' in , require at least 2. navdiv = souptag.find('div',{'class':'pageNavLinkGroup'}) # first navdiv only. threadmarksas = navdiv.find_all('a',{'class':'threadmarksTrigger'}) + ## Loop on threadmark categories. threadmark_chapters=[] for threadmarksa in threadmarksas: soupmarks = self.make_soup(self._fetchUrl(self.getURLPrefix()+'/'+threadmarksa['href'])) + tmcat_num = threadmarksa['href'].split('category_id=')[1] ## prepend threadmark category name if not 'Threadmarks' prepend = "" tmcat_name = stripHTML(threadmarksa) @@ -268,7 +271,8 @@ class BaseXenForoForumAdapter(BaseSiteAdapter): if self.getConfig('always_include_first_post'): threadmark_chapters.append((first_post_title,useurl)) - for (atag,url,name) in [ (x,x['href'],stripHTML(x)) for x in markas ]: + for (tmcat_index,atag,url,name) in [ (i,x,x['href'],stripHTML(x)) for i,x in enumerate(markas) ]: + self.threadmarks_for_reader[self.normalize_chapterurl(url)] = (tmcat_num,tmcat_index) date = self.make_date(atag.find_next_sibling('div',{'class':'extra'})) if not self.story.getMetadataRaw('datePublished') or date < self.story.getMetadataRaw('datePublished'): self.story.setMetadata('datePublished', date) @@ -409,9 +413,10 @@ class BaseXenForoForumAdapter(BaseSiteAdapter): souptag = self.get_cache_post(url) if not souptag: - reader_page_num = int((index+posts_per_page)/posts_per_page) + offset + (tmcat_num,tmcat_index)=self.threadmarks_for_reader[url] + reader_page_num = int((tmcat_index+posts_per_page)/posts_per_page) + offset logger.debug('Reader page offset:%s'%offset) - reader_url=self.getURLPrefix()+'/threads/'+self.story.getMetadata('storyId')+'/reader?page='+unicode(reader_page_num) + reader_url=self.getURLPrefix()+'/threads/'+self.story.getMetadata('storyId')+'/'+tmcat_num+'/reader?page='+unicode(reader_page_num) logger.debug("Fetch reader URL to: %s"%reader_url) data = self._fetchUrl(reader_url) topsoup = self.make_soup(data) @@ -425,6 +430,8 @@ class BaseXenForoForumAdapter(BaseSiteAdapter): anchorid = "post-"+url.split('/')[-2] logger.debug("anchorid: %s"%anchorid) souptag = topsoup.find('li',id=anchorid) + else: + logger.debug("post found in cache") if souptag: break From 1b4a1241c839cfb1af36db406d9aea1752cae262 Mon Sep 17 00:00:00 2001 From: Jim Miller Date: Mon, 12 Jun 2017 19:26:55 -0500 Subject: [PATCH 5/9] Update translations. --- calibre-plugin/translations/et.po | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/calibre-plugin/translations/et.po b/calibre-plugin/translations/et.po index bf9d0c5c..76bfe0d5 100644 --- a/calibre-plugin/translations/et.po +++ b/calibre-plugin/translations/et.po @@ -7,8 +7,8 @@ msgid "" msgstr "" "Project-Id-Version: calibre-plugins\n" "POT-Creation-Date: 2017-05-09 11:44+Central Daylight Time\n" -"PO-Revision-Date: 2017-05-10 02:26+0000\n" -"Last-Translator: Kovid Goyal \n" +"PO-Revision-Date: 2017-06-12 15:10+0000\n" +"Last-Translator: Maidur\n" "Language-Team: Estonian (http://www.transifex.com/calibre/calibre-plugins/language/et/)\n" "MIME-Version: 1.0\n" "Content-Type: text/plain; charset=UTF-8\n" @@ -258,13 +258,13 @@ msgstr "Hoiata, kui uuendamine muudab olemasoleva raamatu URLi.\nSaidi fanfictio #: config.py:496 msgid "Search inside ebooks for Story URL?" -msgstr "" +msgstr "Otsi e-raamatutest jutu URLi?" #: config.py:497 msgid "" "Look for first valid story URL inside EPUB, ZIP(HTML) or TXT ebook formats if not found in metadata.\n" "Somewhat risky, could find wrong URL depending on ebook content." -msgstr "" +msgstr "Otsi formaadis EPUB, ZIP(HTML) või TXT e-raamatutest esimest korrektset jutu URLi, kui seda metaandmetest ei leitud.\nMingil määral riskantne, sõltuvalt e-raamatu sisust võib leida vale URLi." #: config.py:501 msgid "Post Processing Options" From 239dc88d867128a945954aa6da1c127e8201c38c Mon Sep 17 00:00:00 2001 From: Jim Miller Date: Mon, 12 Jun 2017 19:27:19 -0500 Subject: [PATCH 6/9] Correct an error log message. --- fanficfare/story.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fanficfare/story.py b/fanficfare/story.py index 08d11142..a76d6a5d 100644 --- a/fanficfare/story.py +++ b/fanficfare/story.py @@ -66,7 +66,7 @@ try: scaled, nwidth, nheight = fit_image(owidth, oheight, nwidth, nheight) if normalize_format_name(img.format)=="gif" and GifInfo(StringIO(data),CHECK_IS_ANIMATED).frameCount > 1: - raise exceptions.RejectImage("Animated gifs come out purely--not going to use it.") + raise exceptions.RejectImage("Animated gifs come out poorly--not going to use it.") if scaled: img.size = (nwidth, nheight) From 74f8161e186048f8f69c2b663e0761805c206665 Mon Sep 17 00:00:00 2001 From: Jim Miller Date: Mon, 12 Jun 2017 19:28:52 -0500 Subject: [PATCH 7/9] Bump Test Version --- calibre-plugin/__init__.py | 2 +- fanficfare/cli.py | 2 +- setup.py | 2 +- webservice/app.yaml | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/calibre-plugin/__init__.py b/calibre-plugin/__init__.py index e4cbb410..1407c4a3 100644 --- a/calibre-plugin/__init__.py +++ b/calibre-plugin/__init__.py @@ -33,7 +33,7 @@ except NameError: from calibre.customize import InterfaceActionBase # pulled out from FanFicFareBase for saving in prefs.py -__version__ = (2, 12, 9) +__version__ = (2, 12, 10) ## Apparently the name for this class doesn't matter--it was still ## 'demo' for the first few versions. diff --git a/fanficfare/cli.py b/fanficfare/cli.py index 12a31925..82ca39de 100644 --- a/fanficfare/cli.py +++ b/fanficfare/cli.py @@ -26,7 +26,7 @@ import pprint import string import sys -version="2.12.9" +version="2.12.10" if sys.version_info < (2, 5): print 'This program requires Python 2.5 or newer.' diff --git a/setup.py b/setup.py index 6eec4220..44c4b66d 100644 --- a/setup.py +++ b/setup.py @@ -21,7 +21,7 @@ setup( name="FanFicFare", # Versions should comply with PEP440. - version="2.12.9", + version="2.12.10", description='A tool for downloading fanfiction to eBook formats', long_description=long_description, diff --git a/webservice/app.yaml b/webservice/app.yaml index f615ee27..40c5791b 100644 --- a/webservice/app.yaml +++ b/webservice/app.yaml @@ -1,6 +1,6 @@ # ffd-retief-hrd fanficfare application: fanficfare -version: 2-12-9 +version: 2-12-10 runtime: python27 api_version: 1 threadsafe: true From 18c04298683193bc4a577a6b44c95b183d8e6706 Mon Sep 17 00:00:00 2001 From: Jim Miller Date: Tue, 13 Jun 2017 10:10:05 -0500 Subject: [PATCH 8/9] Update adapter_webnovelcom for site changes. Thanks, Ser4nb2LUY6e --- calibre-plugin/plugin-defaults.ini | 8 +++++ fanficfare/adapters/adapter_webnovelcom.py | 40 ++++++++++++++++------ fanficfare/defaults.ini | 8 +++++ 3 files changed, 46 insertions(+), 10 deletions(-) diff --git a/calibre-plugin/plugin-defaults.ini b/calibre-plugin/plugin-defaults.ini index 167d067f..a382a014 100644 --- a/calibre-plugin/plugin-defaults.ini +++ b/calibre-plugin/plugin-defaults.ini @@ -2942,6 +2942,14 @@ translator_label: Translator editor_label: Editor extra_titlepage_entries: translator, editor +## webnovel.com is, as a whole, utf-8. There are even utf-8 encoded +## Chinese characters in the HTML *comments*. However, I've seen at +## least one story that still managed to have Simplified Chinese +## encoding in it. But none of the SC encodings I tried worked; I +## suspect because of the utf-8 comments in the HTML. +#chardet_confidence_limit:0.9 +#website_encodings:auto,utf8,Windows-1252 + [www.whofic.com] website_encodings:Windows-1252,utf8 diff --git a/fanficfare/adapters/adapter_webnovelcom.py b/fanficfare/adapters/adapter_webnovelcom.py index deb53309..8c519814 100644 --- a/fanficfare/adapters/adapter_webnovelcom.py +++ b/fanficfare/adapters/adapter_webnovelcom.py @@ -21,6 +21,7 @@ import logging import re import urllib2 +import json from datetime import datetime, timedelta from base_adapter import BaseSiteAdapter @@ -43,7 +44,17 @@ def _parse_relative_date_string(string_): 'minute(s)': 'minutes', 'hour(s)': 'hours', 'day(s)': 'days', - 'week(s)': 'weeks' + 'week(s)': 'weeks', + 'seconds': 'seconds', + 'minutes': 'minutes', + 'hours': 'hours', + 'days': 'days', + 'weeks': 'weeks', + 'second': 'seconds', + 'minute': 'minutes', + 'hour': 'hours', + 'day': 'days', + 'week': 'weeks', } value, unit_string, rest = string_.split() @@ -123,8 +134,11 @@ class WWWWebNovelComAdapter(BaseSiteAdapter): bookdetails = soup.find('div', {'class': 'g_col_8'}) # Title - a = bookdetails.find('h2', {'class': 'lh1d2'}) - self.story.setMetadata('title', stripHTML(a)) + title = bookdetails.find('h2', {'class': 'lh1d2'}) + # done as a loop incase there isn't one, or more than one. + for tag in title.find_all('small'): + tag.extract() + self.story.setMetadata('title', stripHTML(title)) # Find authorid and URL from... author url. paras = bookdetails.find_all('p') @@ -144,12 +158,18 @@ class WWWWebNovelComAdapter(BaseSiteAdapter): category = stripHTML(paras[0].strong).strip() self.story.setMetadata('category', category) - # Getting the ChapterUrls - chaps = soup.find('div', {'id': 'contentsModal'}).find_all('a') - for chap in chaps: - # capitalize to change leading 'chapter' to 'Chapter'. - chap_title = stripHTML(chap).capitalize() - chap_Url = 'https:' + chap['href'] + ## get _csrfToken cookie for chapter list fetch + csrfToken = None + for cookie in self.get_configuration().get_cookiejar(): + if cookie.name == '_csrfToken': + csrfToken = cookie.value + break + + ## get chapters from a json API url. + jsondata = json.loads(self._fetchUrl("https://"+self.getSiteDomain()+"/apiajax/chapter/GetChapterList?_csrfToken="+csrfToken+"&bookId="+self.story.getMetadata('storyId'))) + for chap in jsondata["data"]["chapterItems"]: + chap_title = 'Chapter ' + unicode(chap['chapterIndex']) + ' - ' + chap['chapterName'] + chap_Url = url + '/' + chap['chapterId'] self.chapterUrls.append((chap_title, chap_Url)) self.story.setMetadata('numChapters', len(self.chapterUrls)) @@ -182,7 +202,7 @@ class WWWWebNovelComAdapter(BaseSiteAdapter): if story is None: raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url) - for tag in story.find_all('form'): + for tag in story.find_all('form') + story.find_all('div',{'class':'cha-bts'}): tag.extract() return self.utf8FromSoup(url, story) diff --git a/fanficfare/defaults.ini b/fanficfare/defaults.ini index 1b597a7c..32995848 100644 --- a/fanficfare/defaults.ini +++ b/fanficfare/defaults.ini @@ -2958,6 +2958,14 @@ translator_label: Translator editor_label: Editor extra_titlepage_entries: translator, editor +## webnovel.com is, as a whole, utf-8. There are even utf-8 encoded +## Chinese characters in the HTML *comments*. However, I've seen at +## least one story that still managed to have Simplified Chinese +## encoding in it. But none of the SC encodings I tried worked; I +## suspect because of the utf-8 comments in the HTML. +#chardet_confidence_limit:0.9 +#website_encodings:auto,utf8,Windows-1252 + [www.whofic.com] website_encodings:Windows-1252,utf8 From 49fb2552e1d22b6a822b7b51b4f28ca2c682c8bc Mon Sep 17 00:00:00 2001 From: Jim Miller Date: Tue, 13 Jun 2017 10:19:52 -0500 Subject: [PATCH 9/9] Bump Test Version --- calibre-plugin/__init__.py | 2 +- fanficfare/cli.py | 2 +- setup.py | 2 +- webservice/app.yaml | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/calibre-plugin/__init__.py b/calibre-plugin/__init__.py index 1407c4a3..3f65ce89 100644 --- a/calibre-plugin/__init__.py +++ b/calibre-plugin/__init__.py @@ -33,7 +33,7 @@ except NameError: from calibre.customize import InterfaceActionBase # pulled out from FanFicFareBase for saving in prefs.py -__version__ = (2, 12, 10) +__version__ = (2, 12, 11) ## Apparently the name for this class doesn't matter--it was still ## 'demo' for the first few versions. diff --git a/fanficfare/cli.py b/fanficfare/cli.py index 82ca39de..377829d8 100644 --- a/fanficfare/cli.py +++ b/fanficfare/cli.py @@ -26,7 +26,7 @@ import pprint import string import sys -version="2.12.10" +version="2.12.11" if sys.version_info < (2, 5): print 'This program requires Python 2.5 or newer.' diff --git a/setup.py b/setup.py index 44c4b66d..35dbe2d8 100644 --- a/setup.py +++ b/setup.py @@ -21,7 +21,7 @@ setup( name="FanFicFare", # Versions should comply with PEP440. - version="2.12.10", + version="2.12.11", description='A tool for downloading fanfiction to eBook formats', long_description=long_description, diff --git a/webservice/app.yaml b/webservice/app.yaml index 40c5791b..ca68254b 100644 --- a/webservice/app.yaml +++ b/webservice/app.yaml @@ -1,6 +1,6 @@ # ffd-retief-hrd fanficfare application: fanficfare -version: 2-12-10 +version: 2-12-11 runtime: python27 api_version: 1 threadsafe: true