diff --git a/calibre-plugin/plugin-defaults.ini b/calibre-plugin/plugin-defaults.ini
index 8e563a16..d374a099 100644
--- a/calibre-plugin/plugin-defaults.ini
+++ b/calibre-plugin/plugin-defaults.ini
@@ -405,6 +405,13 @@ user_agent:FFF/2.X
## non-intuitive.
#description_limit:1000
+## Because some adapters can pull chapter URLs from human posts, the
+## odds of errors in the chapter URLs can be higher for some
+## sites/stories. You can set continue_on_chapter_error:true to
+## continue on after failing to download a chapter and instead record
+## an error message in the ebook for that chapter.
+continue_on_chapter_error:false
+
[base_efiction]
## At the time of writing, eFiction Base adapters allow downloading
## the whole story in bulk using the 'Print' feature. If 'bulk_load'
@@ -481,13 +488,6 @@ dateUpdated_format:%%Y-%%m-%%d %%H:%%M
## the description.
description_limit:500
-## Because base_xenforoforum adapters can pull chapter URLs from human
-## posts, the odds of errors in the chapter URLs are vastly higher.
-## You can set continue_on_chapter_error:true to continue on after
-## failing to download a chapter and instead record an error message
-## in the ebook for that chapter.
-continue_on_chapter_error:false
-
## When given a thread URL, use threadmarks as chapter links when
## there are at least this many threadmarks. A number of older
## threads have a single threadmark to an 'index' post. Set to 1 to
diff --git a/fanficfare/adapters/adapter_test1.py b/fanficfare/adapters/adapter_test1.py
index 5e51d211..9330c752 100644
--- a/fanficfare/adapters/adapter_test1.py
+++ b/fanficfare/adapters/adapter_test1.py
@@ -360,41 +360,28 @@ Some more longer description. "I suck at summaries!" "Better than it sounds!"
## for chapter_urls setting.
logger.debug('Getting chapter text from: %s' % url)
- try:
- origurl = url
- (data,opened) = self._fetchUrlOpened(url,extrasleep=2.0)
- url = opened.geturl()
- if '#' in origurl and '#' not in url:
- url = url + origurl[origurl.index('#'):]
- logger.debug("chapter URL redirected to: %s"%url)
+ origurl = url
+ (data,opened) = self._fetchUrlOpened(url,extrasleep=2.0)
+ url = opened.geturl()
+ if '#' in origurl and '#' not in url:
+ url = url + origurl[origurl.index('#'):]
+ logger.debug("chapter URL redirected to: %s"%url)
- soup = self.make_soup(data)
+ soup = self.make_soup(data)
- if '#' in url:
- anchorid = url.split('#')[1]
- soup = soup.find('li',id=anchorid)
+ if '#' in url:
+ anchorid = url.split('#')[1]
+ soup = soup.find('li',id=anchorid)
- bq = soup.find('blockquote')
+ bq = soup.find('blockquote')
- bq.name='div'
+ bq.name='div'
- for iframe in bq.find_all('iframe'):
- iframe.extract() # calibre book reader & editor don't like iframes to youtube.
+ for iframe in bq.find_all('iframe'):
+ iframe.extract() # calibre book reader & editor don't like iframes to youtube.
- for qdiv in bq.find_all('div',{'class':'quoteExpand'}):
- qdiv.extract() # Remove
click to expand
-
- except Exception as e:
- if self.getConfig('continue_on_chapter_error'):
- bq = self.make_soup("""
-
Error
-
FanFicFare failed to download this chapter. Because you have
-continue_on_chapter_error set to true in your personal.ini, the download continued.
-
Chapter URL:
%s
-
Error:
%s
-
"""%(url,traceback.format_exc()))
- else:
- raise
+ for qdiv in bq.find_all('div',{'class':'quoteExpand'}):
+ qdiv.extract() # Remove click to expand
return self.utf8FromSoup(url[:url.index('/',8)+1],bq)
diff --git a/fanficfare/adapters/adapter_wuxiaworldcom.py b/fanficfare/adapters/adapter_wuxiaworldcom.py
index e734f10b..fdc81aa1 100644
--- a/fanficfare/adapters/adapter_wuxiaworldcom.py
+++ b/fanficfare/adapters/adapter_wuxiaworldcom.py
@@ -28,7 +28,6 @@ import logging
import re
import urllib2
import urlparse
-import traceback
from base_adapter import BaseSiteAdapter, makeDate
@@ -150,32 +149,14 @@ class WuxiaWorldComSiteAdapter(BaseSiteAdapter):
def getChapterText(self, url):
#logger.debug('Getting chapter text from: %s', url)
- try:
- data = self._fetchUrl(url)
- soup = self.make_soup(data)
- story = soup.find('div', {'itemprop':'articleBody'})
- if not story:
- raise exceptions.FailedToDownload(
- "Error downloading Chapter: %s! Missing required element!" % url)
- #removing the Previous and next chapter links
- for tag in story.find_all('a'):
- tag.extract()
-
- except Exception as e:
- if self.getConfig('continue_on_chapter_error'):
- story = self.make_soup("""
-
Error
-
FanFicFare failed to download this chapter. Because you have
-continue_on_chapter_error set to true, the download continued.
-
Chapter URL:
%s
-
-Authors on wuxiaworld.com create their own index pages, so it's not
-uncommon for there to be 404 errors when there are links to chapters
-that haven't been uploaded yet.
-
-
Error:
%s
-
"""%(url,traceback.format_exc()))
- else:
- raise
+ data = self._fetchUrl(url)
+ soup = self.make_soup(data)
+ story = soup.find('div', {'itemprop':'articleBody'})
+ if not story:
+ raise exceptions.FailedToDownload(
+ "Error downloading Chapter: %s! Missing required element!" % url)
+ #removing the Previous and next chapter links
+ for tag in story.find_all('a'):
+ tag.extract()
return self.utf8FromSoup(url, story)
diff --git a/fanficfare/adapters/base_adapter.py b/fanficfare/adapters/base_adapter.py
index 1324e40a..3fdbb79b 100644
--- a/fanficfare/adapters/base_adapter.py
+++ b/fanficfare/adapters/base_adapter.py
@@ -25,7 +25,7 @@ import urllib2 as u2
import urlparse as up
import cookielib as cl
from functools import partial
-import pickle
+import traceback
import bs4
@@ -178,12 +178,6 @@ class BaseSiteAdapter(Configurable):
'''
return False
- # def story_load(self,filename):
- # d = pickle.load(self.story.metadata,filename)
- # self.story.metadata = d['metadata']
- # self.chapterUrls = d['chapterlist']
- # self.story.metadataDone = True
-
def _setURL(self,url):
self.url = url
self.parsedUrl = up.urlparse(url)
@@ -395,17 +389,30 @@ class BaseSiteAdapter(Configurable):
self.oldchaptersdata[url]['chapterorigtitle'] !=
self.oldchaptersdata[url]['chaptertitle']) )
- if not data:
- data = self.getChapterTextNum(url,index)
- # if had to fetch and has existing chapters
- newchap = bool(self.oldchapters or self.oldchaptersmap)
+ try:
+ if not data:
+ data = self.getChapterTextNum(url,index)
+ # if had to fetch and has existing chapters
+ newchap = bool(self.oldchapters or self.oldchaptersmap)
- if index == 0 and self.getConfig('always_reload_first_chapter'):
- data = self.getChapterTextNum(url,index)
- # first chapter is rarely marked new
- # anyway--only if it's replaced during an
- # update.
- newchap = False
+ if index == 0 and self.getConfig('always_reload_first_chapter'):
+ data = self.getChapterTextNum(url,index)
+ # first chapter is rarely marked new
+ # anyway--only if it's replaced during an
+ # update.
+ newchap = False
+ except Exception as e:
+ if self.getConfig('continue_on_chapter_error'):
+ data = self.make_soup("""
+
Error
+
FanFicFare failed to download this chapter. Because
+continue_on_chapter_error is set to true, the download continued.
+
Chapter URL:
%s
+
Error:
%s
+
"""%(url,traceback.format_exc().replace("&","&").replace(">",">").replace("<","<")))
+ title = title+"(FAILED)"
+ else:
+ raise
self.story.addChapter(url,
removeEntities(title),
@@ -480,7 +487,7 @@ class BaseSiteAdapter(Configurable):
def getSiteURLFragment(self):
"Needs to be overriden in case of adapters that share a domain."
return self.getSiteDomain()
-
+
@classmethod
def getConfigSection(cls):
"Only needs to be overriden if != site domain."
diff --git a/fanficfare/adapters/base_xenforoforum_adapter.py b/fanficfare/adapters/base_xenforoforum_adapter.py
index 1782658a..acd7c41a 100644
--- a/fanficfare/adapters/base_xenforoforum_adapter.py
+++ b/fanficfare/adapters/base_xenforoforum_adapter.py
@@ -17,7 +17,6 @@
import time
import logging
-import traceback
logger = logging.getLogger(__name__)
import re
import urllib2
@@ -340,76 +339,63 @@ class BaseXenForoForumAdapter(BaseSiteAdapter):
def getChapterTextNum(self, url, index):
logger.debug('Getting chapter text from: %s index: %s' % (url,index))
- try:
- origurl = url
+ origurl = url
- # reader mode shows only threadmarked posts in threadmark
- # order. don't use reader mode for /threads/ urls, or
- # first post when always_include_first_post.
- if ( self.reader and
- self.getConfig("use_reader_mode",True) and
- '/threads/' not in url and
- (index > 0 or not self.getConfig('always_include_first_post')) ):
- logger.debug("USE READER MODE")
- # in case it changes:
- posts_per_page = self.getConfig("reader_posts_per_page",10)
+ # reader mode shows only threadmarked posts in threadmark
+ # order. don't use reader mode for /threads/ urls, or
+ # first post when always_include_first_post.
+ if ( self.reader and
+ self.getConfig("use_reader_mode",True) and
+ '/threads/' not in url and
+ (index > 0 or not self.getConfig('always_include_first_post')) ):
+ logger.debug("USE READER MODE")
+ # in case it changes:
+ posts_per_page = self.getConfig("reader_posts_per_page",10)
- # always_include_first_post with threadmarks added an
- # extra first chapter, we should be past it.
- if self.getConfig('always_include_first_post'):
- index = index - 1
- reader_page_num = int((index+posts_per_page)/posts_per_page)
- reader_url=self.getURLPrefix()+'/threads/'+self.story.getMetadata('storyId')+'/reader?page='+unicode(reader_page_num)
- logger.debug("Reader URL to: %s"%reader_url)
- data = self._fetchUrl(reader_url)
- topsoup = souptag = self.make_soup(data)
+ # always_include_first_post with threadmarks added an
+ # extra first chapter, we should be past it.
+ if self.getConfig('always_include_first_post'):
+ index = index - 1
+ reader_page_num = int((index+posts_per_page)/posts_per_page)
+ reader_url=self.getURLPrefix()+'/threads/'+self.story.getMetadata('storyId')+'/reader?page='+unicode(reader_page_num)
+ logger.debug("Reader URL to: %s"%reader_url)
+ data = self._fetchUrl(reader_url)
+ topsoup = souptag = self.make_soup(data)
- # assumed normalized to /posts/1234/
- anchorid = "post-"+url.split('/')[-2]
- logger.debug("anchorid: %s"%anchorid)
+ # assumed normalized to /posts/1234/
+ anchorid = "post-"+url.split('/')[-2]
+ logger.debug("anchorid: %s"%anchorid)
+ souptag = topsoup.find('li',id=anchorid)
+ else:
+ logger.debug("DON'T USE READER MODE")
+ (data,opened) = self._fetchUrlOpened(url)
+ url = opened.geturl()
+ if '#' in origurl and '#' not in url:
+ url = url + origurl[origurl.index('#'):]
+ logger.debug("chapter URL redirected to: %s"%url)
+
+ topsoup = souptag = self.make_soup(data)
+
+ if '#' in url:
+ anchorid = url.split('#')[1]
souptag = topsoup.find('li',id=anchorid)
- else:
- logger.debug("DON'T USE READER MODE")
- (data,opened) = self._fetchUrlOpened(url)
- url = opened.geturl()
- if '#' in origurl and '#' not in url:
- url = url + origurl[origurl.index('#'):]
- logger.debug("chapter URL redirected to: %s"%url)
- topsoup = souptag = self.make_soup(data)
+ self.handle_spoilers(topsoup,souptag)
- if '#' in url:
- anchorid = url.split('#')[1]
- souptag = topsoup.find('li',id=anchorid)
+ bq = souptag.find('blockquote')
- self.handle_spoilers(topsoup,souptag)
+ bq.name='div'
- bq = souptag.find('blockquote')
+ for iframe in bq.find_all('iframe'):
+ iframe.extract() # calibre book reader & editor don't like iframes to youtube.
- bq.name='div'
+ for qdiv in bq.find_all('div',{'class':'quoteExpand'}):
+ qdiv.extract() # Remove click to expand
- for iframe in bq.find_all('iframe'):
- iframe.extract() # calibre book reader & editor don't like iframes to youtube.
-
- for qdiv in bq.find_all('div',{'class':'quoteExpand'}):
- qdiv.extract() # Remove click to expand
-
- ## img alt="[IMG]" class="bbCodeImage LbImage lazyload
- ## include lazy load images.
- for img in bq.find_all('img',{'class':'lazyload'}):
- img['src'] = img['data-src']
-
- except Exception as e:
- if self.getConfig('continue_on_chapter_error'):
- bq = self.make_soup("""
-
Error
-
FanFicFare failed to download this chapter. Because you have
-continue_on_chapter_error set to true in your personal.ini, the download continued.
-
Chapter URL:
%s
-
Error:
%s
-
"""%(url,traceback.format_exc()))
- else:
- raise
+ ## img alt="[IMG]" class="bbCodeImage LbImage lazyload
+ ## include lazy load images.
+ for img in bq.find_all('img',{'class':'lazyload'}):
+ img['src'] = img['data-src']
# XenForo uses
return self.utf8FromSoup(self.getURLPrefix()+'/',bq)
diff --git a/fanficfare/configurable.py b/fanficfare/configurable.py
index e50c6eac..9bca1b86 100644
--- a/fanficfare/configurable.py
+++ b/fanficfare/configurable.py
@@ -153,6 +153,7 @@ def get_valid_set_options():
'titlepage_use_table':(None,None,boollist),
'use_ssl_unverified_context':(None,None,boollist),
+ 'continue_on_chapter_error':(None,None,boollist),
'add_chapter_numbers':(None,None,boollist+['toconly']),
@@ -194,7 +195,6 @@ def get_valid_set_options():
'internalize_text_links':(None,['epub','html'],boollist),
'capitalize_forumtags':(base_xenforo_list,None,boollist),
- 'continue_on_chapter_error':(base_xenforo_list+['wuxiaworld.com'],None,boollist),
'minimum_threadmarks':(base_xenforo_list,None,None),
'first_post_title':(base_xenforo_list,None,None),
'always_include_first_post':(base_xenforo_list,None,boollist),
diff --git a/fanficfare/defaults.ini b/fanficfare/defaults.ini
index 50926b77..99af2ab3 100644
--- a/fanficfare/defaults.ini
+++ b/fanficfare/defaults.ini
@@ -409,6 +409,13 @@ user_agent:FFF/2.X
## non-intuitive.
#description_limit:1000
+## Because some adapters can pull chapter URLs from human posts, the
+## odds of errors in the chapter URLs can be higher for some
+## sites/stories. You can set continue_on_chapter_error:true to
+## continue on after failing to download a chapter and instead record
+## an error message in the ebook for that chapter.
+continue_on_chapter_error:false
+
## The FFF CLI can fetch story URLs from unread emails when configured
## to read from your IMAP mail server. The example shows GMail, but
## other services that support IMAP can be used. GMail requires you
@@ -519,13 +526,6 @@ dateUpdated_format:%%Y-%%m-%%d %%H:%%M
## the description.
description_limit:500
-## Because base_xenforoforum adapters can pull chapter URLs from human
-## posts, the odds of errors in the chapter URLs are vastly higher.
-## You can set continue_on_chapter_error:true to continue on after
-## failing to download a chapter and instead record an error message
-## in the ebook for that chapter.
-continue_on_chapter_error:false
-
## When given a thread URL, use threadmarks as chapter links when
## there are at least this many threadmarks. A number of older
## threads have a single threadmark to an 'index' post. Set to 1 to