diff --git a/fanficfare/adapters/adapter_fimfictionnet.py b/fanficfare/adapters/adapter_fimfictionnet.py index 614c3e8a..634efe59 100644 --- a/fanficfare/adapters/adapter_fimfictionnet.py +++ b/fanficfare/adapters/adapter_fimfictionnet.py @@ -183,7 +183,7 @@ class FimFictionNetSiteAdapter(BaseSiteAdapter): if storyImage: coverurl = storyImage['data-fullsize'] # try setting from data-fullsize, if fails, try using data-src - if self.setCoverImage(self.url,coverurl)[0] == "failedtoload": + if self.setCoverImage(self.url,coverurl)[0].startswith("failedtoload"): coverurl = storyImage['src'] self.setCoverImage(self.url,coverurl) diff --git a/fanficfare/adapters/adapter_royalroadcom.py b/fanficfare/adapters/adapter_royalroadcom.py index ce492467..246e5f4e 100644 --- a/fanficfare/adapters/adapter_royalroadcom.py +++ b/fanficfare/adapters/adapter_royalroadcom.py @@ -290,7 +290,7 @@ class RoyalRoadAdapter(BaseSiteAdapter): if img: cover_url = img['src'] # usually URL is for thumbnail. Try expected URL for larger image, if fails fall back to the original URL - if self.setCoverImage(url,cover_url.replace('/covers-full/', '/covers-large/'))[0] == "failedtoload": + if self.setCoverImage(url,cover_url.replace('/covers-full/', '/covers-large/'))[0].startswith("failedtoload"): self.setCoverImage(url,cover_url) # some content is show as tables, this will preserve them diff --git a/fanficfare/adapters/base_adapter.py b/fanficfare/adapters/base_adapter.py index 563ed148..d788c2a5 100644 --- a/fanficfare/adapters/base_adapter.py +++ b/fanficfare/adapters/base_adapter.py @@ -355,7 +355,7 @@ try to download.

self.getConfig('allow_unsafe_filename')), self.get_request_raw, cover=cover_image_type) - if src and src != 'failedtoload': + if src and not src.startswith('failedtoload'): self.story.setMetadata('cover_image',cover_image_type) # cheesy way to carry calibre bookmark file forward across update. diff --git a/fanficfare/adapters/base_xenforo2forum_adapter.py b/fanficfare/adapters/base_xenforo2forum_adapter.py index c4ac2cca..eb2cb7f3 100644 --- a/fanficfare/adapters/base_xenforo2forum_adapter.py +++ b/fanficfare/adapters/base_xenforo2forum_adapter.py @@ -1077,7 +1077,7 @@ class BaseXenForo2ForumAdapter(BaseSiteAdapter): span['class']=[] span['class'].append("invisible_text") if self.getConfig('replace_failed_smilies_with_alt_text'): - for img in soup.find_all('img',src=re.compile(r'(^data:image|(failedtoload|clear.png)$)')): + for img in soup.find_all('img',src=re.compile(r'(^(data:image|failedtoload)|(clear.png$))')): # logger.debug("replace_failed_smilies_with_alt_text img: %s"%img) if img.has_attr('class'): clses = unicode(img['class']) # stringify list. diff --git a/fanficfare/epubutils.py b/fanficfare/epubutils.py index 09a2413b..4f930456 100644 --- a/fanficfare/epubutils.py +++ b/fanficfare/epubutils.py @@ -163,7 +163,7 @@ def get_update_data(inputio, ## skip html Add oldimages:%s"%newsrc) except Exception as e: - # don't report u'OEBPS/failedtoload', - # it indicates a failed download - # originally. - if newsrc != u'OEBPS/failedtoload': - logger.warning("Image %s not found!\n(originally:%s)"%(newsrc,longdesc)) - logger.warning("Exception: %s"%(unicode(e)),exc_info=True) + logger.warning("Image %s not found!\n(originally:%s)"%(newsrc,longdesc)) + logger.warning("Exception: %s"%(unicode(e)),exc_info=True) ## Inline and embedded CSS url() images for inline in soup.select('*[style]') + soup.select('style'): style = '' @@ -193,6 +189,8 @@ def get_update_data(inputio, if 'url(' in style: ## the pattern will also accept mismatched '/", which is broken CSS. for style_url in re.findall(r'url\([\'"]?(.*?)[\'"]?\)', style): + if style_url.startswith('failedtoload'): + continue logger.debug("Updating inline/embedded style url(%s)"%style_url) newsrc='' longdesc='' diff --git a/fanficfare/story.py b/fanficfare/story.py index 325f1223..b85890cc 100644 --- a/fanficfare/story.py +++ b/fanficfare/story.py @@ -603,7 +603,7 @@ class ImageStore: self.cover = None # returns newsrc - def add_img(self,url,ext,mime,data,cover=False,actuallyused=True): + def add_img(self,url,ext=None,mime=None,data=None,cover=False,actuallyused=True,failure=False): # logger.debug("add_img0(%s,%s,%s)"%(url,ext,mime)) # existing ffdl image, likely from CSS m = re.match(r'^images/'+self.prefix+r'-(?P[0-9a-fA-F-]+)\.(?P.+)$',url) @@ -643,10 +643,18 @@ class ImageStore: if uuid not in self.uuid_index: self.uuid_index[uuid]=info self.infos.append(info) - self.size_index[len(data)].append(uuid) + if data: + self.size_index[len(data)].append(uuid) + if failure: + info['newsrc'] = 'failedtoload' + info['actuallyused'] = False logger.debug("add_img(%s,%s,%s,%s,%s)"%(url,ext,mime,uuid,info['newsrc'])) return info['newsrc'] + def cache_failed_url(self,url): + # logger.debug("cache_failed_url(%s)"%url) + self.add_img(url,failure=True) + def get_img_by_url(self,url): # logger.debug("get_img_by_url(%s)"%url) uuid = self.url_index.get(url,None) @@ -664,7 +672,7 @@ class ImageStore: def get_img_by_uuid(self,uuid): # logger.debug("get_img_by_uuid(%s)"%uuid) info = self.uuid_index.get(uuid,None) - if info: + if info and info['newsrc'] != 'failedtoload': info['actuallyused']=True return info @@ -675,6 +683,7 @@ class ImageStore: return [ x for x in self.infos if x['actuallyused'] ] def debug_out(self): + # logger.debug(self.fails_index) # import pprint # logger.debug(pprint.pformat([ (x['url'], x['uuid'], x['newsrc']) for x in self.infos])) pass @@ -1696,8 +1705,8 @@ class Story(Requestable): imginfo = self.img_store.get_img_by_url(imgurl) if not imginfo: try: - if imgurl.endswith('failedtoload'): - return ("failedtoload","failedtoload") + if imgurl.startswith('failedtoload'): + return (imgurl,imgurl) if not imgdata: # might already have from data:image in-line allow @@ -1751,7 +1760,9 @@ class Story(Requestable): logger.info("Failed to load or convert image, \nparent:%s\nskipping:%s\nException: %s"%(parenturl,imgurl,e)) except: logger.info("Failed to load or convert image, \nparent:%s\nskipping:%s\n(Exception output also caused exception)"%(parenturl,imgurl)) - return ("failedtoload","failedtoload") + self.img_store.cache_failed_url(imgurl) + fs = "failedtoload %s"%imgurl + return (fs,fs) ## (cover images never included in get_imgs_by_size) if self.getConfig('dedup_img_files',False): @@ -1768,6 +1779,9 @@ class Story(Requestable): mime, data) else: + if imginfo['newsrc'].startswith('failedtoload'): + fs = "failedtoload %s"%imgurl + return (fs,fs) ## image was found in existing store. self.img_store.debug_out() logger.debug("existing image url found:%s->%s"%(imgurl,imginfo['newsrc']))