From 223138b8e5bfb415e4bd661bc2861667e2ce9b06 Mon Sep 17 00:00:00 2001
From: Jim Miller
Date: Mon, 19 Jan 2026 12:05:50 -0600
Subject: [PATCH] Image Handling: Cache fails w/in download (but not between),
keep full src URL with failedtodownload marker
---
fanficfare/adapters/adapter_fimfictionnet.py | 2 +-
fanficfare/adapters/adapter_royalroadcom.py | 2 +-
fanficfare/adapters/base_adapter.py | 2 +-
.../adapters/base_xenforo2forum_adapter.py | 2 +-
fanficfare/epubutils.py | 12 ++++-----
fanficfare/story.py | 26 ++++++++++++++-----
6 files changed, 29 insertions(+), 17 deletions(-)
diff --git a/fanficfare/adapters/adapter_fimfictionnet.py b/fanficfare/adapters/adapter_fimfictionnet.py
index 614c3e8a..634efe59 100644
--- a/fanficfare/adapters/adapter_fimfictionnet.py
+++ b/fanficfare/adapters/adapter_fimfictionnet.py
@@ -183,7 +183,7 @@ class FimFictionNetSiteAdapter(BaseSiteAdapter):
if storyImage:
coverurl = storyImage['data-fullsize']
# try setting from data-fullsize, if fails, try using data-src
- if self.setCoverImage(self.url,coverurl)[0] == "failedtoload":
+ if self.setCoverImage(self.url,coverurl)[0].startswith("failedtoload"):
coverurl = storyImage['src']
self.setCoverImage(self.url,coverurl)
diff --git a/fanficfare/adapters/adapter_royalroadcom.py b/fanficfare/adapters/adapter_royalroadcom.py
index ce492467..246e5f4e 100644
--- a/fanficfare/adapters/adapter_royalroadcom.py
+++ b/fanficfare/adapters/adapter_royalroadcom.py
@@ -290,7 +290,7 @@ class RoyalRoadAdapter(BaseSiteAdapter):
if img:
cover_url = img['src']
# usually URL is for thumbnail. Try expected URL for larger image, if fails fall back to the original URL
- if self.setCoverImage(url,cover_url.replace('/covers-full/', '/covers-large/'))[0] == "failedtoload":
+ if self.setCoverImage(url,cover_url.replace('/covers-full/', '/covers-large/'))[0].startswith("failedtoload"):
self.setCoverImage(url,cover_url)
# some content is show as tables, this will preserve them
diff --git a/fanficfare/adapters/base_adapter.py b/fanficfare/adapters/base_adapter.py
index 563ed148..d788c2a5 100644
--- a/fanficfare/adapters/base_adapter.py
+++ b/fanficfare/adapters/base_adapter.py
@@ -355,7 +355,7 @@ try to download.
self.getConfig('allow_unsafe_filename')),
self.get_request_raw,
cover=cover_image_type)
- if src and src != 'failedtoload':
+ if src and not src.startswith('failedtoload'):
self.story.setMetadata('cover_image',cover_image_type)
# cheesy way to carry calibre bookmark file forward across update.
diff --git a/fanficfare/adapters/base_xenforo2forum_adapter.py b/fanficfare/adapters/base_xenforo2forum_adapter.py
index c4ac2cca..eb2cb7f3 100644
--- a/fanficfare/adapters/base_xenforo2forum_adapter.py
+++ b/fanficfare/adapters/base_xenforo2forum_adapter.py
@@ -1077,7 +1077,7 @@ class BaseXenForo2ForumAdapter(BaseSiteAdapter):
span['class']=[]
span['class'].append("invisible_text")
if self.getConfig('replace_failed_smilies_with_alt_text'):
- for img in soup.find_all('img',src=re.compile(r'(^data:image|(failedtoload|clear.png)$)')):
+ for img in soup.find_all('img',src=re.compile(r'(^(data:image|failedtoload)|(clear.png$))')):
# logger.debug("replace_failed_smilies_with_alt_text img: %s"%img)
if img.has_attr('class'):
clses = unicode(img['class']) # stringify list.
diff --git a/fanficfare/epubutils.py b/fanficfare/epubutils.py
index 09a2413b..4f930456 100644
--- a/fanficfare/epubutils.py
+++ b/fanficfare/epubutils.py
@@ -163,7 +163,7 @@ def get_update_data(inputio,
## skip
html Add oldimages:%s"%newsrc)
except Exception as e:
- # don't report u'OEBPS/failedtoload',
- # it indicates a failed download
- # originally.
- if newsrc != u'OEBPS/failedtoload':
- logger.warning("Image %s not found!\n(originally:%s)"%(newsrc,longdesc))
- logger.warning("Exception: %s"%(unicode(e)),exc_info=True)
+ logger.warning("Image %s not found!\n(originally:%s)"%(newsrc,longdesc))
+ logger.warning("Exception: %s"%(unicode(e)),exc_info=True)
## Inline and embedded CSS url() images
for inline in soup.select('*[style]') + soup.select('style'):
style = ''
@@ -193,6 +189,8 @@ def get_update_data(inputio,
if 'url(' in style:
## the pattern will also accept mismatched '/", which is broken CSS.
for style_url in re.findall(r'url\([\'"]?(.*?)[\'"]?\)', style):
+ if style_url.startswith('failedtoload'):
+ continue
logger.debug("Updating inline/embedded style url(%s)"%style_url)
newsrc=''
longdesc=''
diff --git a/fanficfare/story.py b/fanficfare/story.py
index 325f1223..b85890cc 100644
--- a/fanficfare/story.py
+++ b/fanficfare/story.py
@@ -603,7 +603,7 @@ class ImageStore:
self.cover = None
# returns newsrc
- def add_img(self,url,ext,mime,data,cover=False,actuallyused=True):
+ def add_img(self,url,ext=None,mime=None,data=None,cover=False,actuallyused=True,failure=False):
# logger.debug("add_img0(%s,%s,%s)"%(url,ext,mime))
# existing ffdl image, likely from CSS
m = re.match(r'^images/'+self.prefix+r'-(?P[0-9a-fA-F-]+)\.(?P.+)$',url)
@@ -643,10 +643,18 @@ class ImageStore:
if uuid not in self.uuid_index:
self.uuid_index[uuid]=info
self.infos.append(info)
- self.size_index[len(data)].append(uuid)
+ if data:
+ self.size_index[len(data)].append(uuid)
+ if failure:
+ info['newsrc'] = 'failedtoload'
+ info['actuallyused'] = False
logger.debug("add_img(%s,%s,%s,%s,%s)"%(url,ext,mime,uuid,info['newsrc']))
return info['newsrc']
+ def cache_failed_url(self,url):
+ # logger.debug("cache_failed_url(%s)"%url)
+ self.add_img(url,failure=True)
+
def get_img_by_url(self,url):
# logger.debug("get_img_by_url(%s)"%url)
uuid = self.url_index.get(url,None)
@@ -664,7 +672,7 @@ class ImageStore:
def get_img_by_uuid(self,uuid):
# logger.debug("get_img_by_uuid(%s)"%uuid)
info = self.uuid_index.get(uuid,None)
- if info:
+ if info and info['newsrc'] != 'failedtoload':
info['actuallyused']=True
return info
@@ -675,6 +683,7 @@ class ImageStore:
return [ x for x in self.infos if x['actuallyused'] ]
def debug_out(self):
+ # logger.debug(self.fails_index)
# import pprint
# logger.debug(pprint.pformat([ (x['url'], x['uuid'], x['newsrc']) for x in self.infos]))
pass
@@ -1696,8 +1705,8 @@ class Story(Requestable):
imginfo = self.img_store.get_img_by_url(imgurl)
if not imginfo:
try:
- if imgurl.endswith('failedtoload'):
- return ("failedtoload","failedtoload")
+ if imgurl.startswith('failedtoload'):
+ return (imgurl,imgurl)
if not imgdata:
# might already have from data:image in-line allow
@@ -1751,7 +1760,9 @@ class Story(Requestable):
logger.info("Failed to load or convert image, \nparent:%s\nskipping:%s\nException: %s"%(parenturl,imgurl,e))
except:
logger.info("Failed to load or convert image, \nparent:%s\nskipping:%s\n(Exception output also caused exception)"%(parenturl,imgurl))
- return ("failedtoload","failedtoload")
+ self.img_store.cache_failed_url(imgurl)
+ fs = "failedtoload %s"%imgurl
+ return (fs,fs)
## (cover images never included in get_imgs_by_size)
if self.getConfig('dedup_img_files',False):
@@ -1768,6 +1779,9 @@ class Story(Requestable):
mime,
data)
else:
+ if imginfo['newsrc'].startswith('failedtoload'):
+ fs = "failedtoload %s"%imgurl
+ return (fs,fs)
## image was found in existing store.
self.img_store.debug_out()
logger.debug("existing image url found:%s->%s"%(imgurl,imginfo['newsrc']))