mirror of
https://github.com/JimmXinu/FanFicFare.git
synced 2026-01-26 02:01:45 +01:00
Image Handling: Cache fails w/in download (but not between), keep full src URL with failedtodownload marker
This commit is contained in:
parent
4aa47c8bab
commit
223138b8e5
6 changed files with 29 additions and 17 deletions
|
|
@ -183,7 +183,7 @@ class FimFictionNetSiteAdapter(BaseSiteAdapter):
|
|||
if storyImage:
|
||||
coverurl = storyImage['data-fullsize']
|
||||
# try setting from data-fullsize, if fails, try using data-src
|
||||
if self.setCoverImage(self.url,coverurl)[0] == "failedtoload":
|
||||
if self.setCoverImage(self.url,coverurl)[0].startswith("failedtoload"):
|
||||
coverurl = storyImage['src']
|
||||
self.setCoverImage(self.url,coverurl)
|
||||
|
||||
|
|
|
|||
|
|
@ -290,7 +290,7 @@ class RoyalRoadAdapter(BaseSiteAdapter):
|
|||
if img:
|
||||
cover_url = img['src']
|
||||
# usually URL is for thumbnail. Try expected URL for larger image, if fails fall back to the original URL
|
||||
if self.setCoverImage(url,cover_url.replace('/covers-full/', '/covers-large/'))[0] == "failedtoload":
|
||||
if self.setCoverImage(url,cover_url.replace('/covers-full/', '/covers-large/'))[0].startswith("failedtoload"):
|
||||
self.setCoverImage(url,cover_url)
|
||||
# some content is show as tables, this will preserve them
|
||||
|
||||
|
|
|
|||
|
|
@ -355,7 +355,7 @@ try to download.</p>
|
|||
self.getConfig('allow_unsafe_filename')),
|
||||
self.get_request_raw,
|
||||
cover=cover_image_type)
|
||||
if src and src != 'failedtoload':
|
||||
if src and not src.startswith('failedtoload'):
|
||||
self.story.setMetadata('cover_image',cover_image_type)
|
||||
|
||||
# cheesy way to carry calibre bookmark file forward across update.
|
||||
|
|
|
|||
|
|
@ -1077,7 +1077,7 @@ class BaseXenForo2ForumAdapter(BaseSiteAdapter):
|
|||
span['class']=[]
|
||||
span['class'].append("invisible_text")
|
||||
if self.getConfig('replace_failed_smilies_with_alt_text'):
|
||||
for img in soup.find_all('img',src=re.compile(r'(^data:image|(failedtoload|clear.png)$)')):
|
||||
for img in soup.find_all('img',src=re.compile(r'(^(data:image|failedtoload)|(clear.png$))')):
|
||||
# logger.debug("replace_failed_smilies_with_alt_text img: %s"%img)
|
||||
if img.has_attr('class'):
|
||||
clses = unicode(img['class']) # stringify list.
|
||||
|
|
|
|||
|
|
@ -163,7 +163,7 @@ def get_update_data(inputio,
|
|||
## skip <img src="data:image..."
|
||||
## NOTE - also only applying this processing if img has a longdesc (aka origurl)
|
||||
## in past, would error out entirely.
|
||||
if img.has_attr('src') and img.has_attr('longdesc') and not img['src'].startswith('data:image'):
|
||||
if img.has_attr('src') and img.has_attr('longdesc') and not img['src'].startswith('data:image') and not img['src'].startswith('failedtoload'):
|
||||
try:
|
||||
newsrc=get_path_part(href)+img['src']
|
||||
# remove all .. and the path part above it, if present.
|
||||
|
|
@ -177,12 +177,8 @@ def get_update_data(inputio,
|
|||
images[longdesc] = (newsrc, data)
|
||||
# logger.debug("-->html Add oldimages:%s"%newsrc)
|
||||
except Exception as e:
|
||||
# don't report u'OEBPS/failedtoload',
|
||||
# it indicates a failed download
|
||||
# originally.
|
||||
if newsrc != u'OEBPS/failedtoload':
|
||||
logger.warning("Image %s not found!\n(originally:%s)"%(newsrc,longdesc))
|
||||
logger.warning("Exception: %s"%(unicode(e)),exc_info=True)
|
||||
logger.warning("Image %s not found!\n(originally:%s)"%(newsrc,longdesc))
|
||||
logger.warning("Exception: %s"%(unicode(e)),exc_info=True)
|
||||
## Inline and embedded CSS url() images
|
||||
for inline in soup.select('*[style]') + soup.select('style'):
|
||||
style = ''
|
||||
|
|
@ -193,6 +189,8 @@ def get_update_data(inputio,
|
|||
if 'url(' in style:
|
||||
## the pattern will also accept mismatched '/", which is broken CSS.
|
||||
for style_url in re.findall(r'url\([\'"]?(.*?)[\'"]?\)', style):
|
||||
if style_url.startswith('failedtoload'):
|
||||
continue
|
||||
logger.debug("Updating inline/embedded style url(%s)"%style_url)
|
||||
newsrc=''
|
||||
longdesc=''
|
||||
|
|
|
|||
|
|
@ -603,7 +603,7 @@ class ImageStore:
|
|||
self.cover = None
|
||||
|
||||
# returns newsrc
|
||||
def add_img(self,url,ext,mime,data,cover=False,actuallyused=True):
|
||||
def add_img(self,url,ext=None,mime=None,data=None,cover=False,actuallyused=True,failure=False):
|
||||
# logger.debug("add_img0(%s,%s,%s)"%(url,ext,mime))
|
||||
# existing ffdl image, likely from CSS
|
||||
m = re.match(r'^images/'+self.prefix+r'-(?P<uuid>[0-9a-fA-F-]+)\.(?P<ext>.+)$',url)
|
||||
|
|
@ -643,10 +643,18 @@ class ImageStore:
|
|||
if uuid not in self.uuid_index:
|
||||
self.uuid_index[uuid]=info
|
||||
self.infos.append(info)
|
||||
self.size_index[len(data)].append(uuid)
|
||||
if data:
|
||||
self.size_index[len(data)].append(uuid)
|
||||
if failure:
|
||||
info['newsrc'] = 'failedtoload'
|
||||
info['actuallyused'] = False
|
||||
logger.debug("add_img(%s,%s,%s,%s,%s)"%(url,ext,mime,uuid,info['newsrc']))
|
||||
return info['newsrc']
|
||||
|
||||
def cache_failed_url(self,url):
|
||||
# logger.debug("cache_failed_url(%s)"%url)
|
||||
self.add_img(url,failure=True)
|
||||
|
||||
def get_img_by_url(self,url):
|
||||
# logger.debug("get_img_by_url(%s)"%url)
|
||||
uuid = self.url_index.get(url,None)
|
||||
|
|
@ -664,7 +672,7 @@ class ImageStore:
|
|||
def get_img_by_uuid(self,uuid):
|
||||
# logger.debug("get_img_by_uuid(%s)"%uuid)
|
||||
info = self.uuid_index.get(uuid,None)
|
||||
if info:
|
||||
if info and info['newsrc'] != 'failedtoload':
|
||||
info['actuallyused']=True
|
||||
return info
|
||||
|
||||
|
|
@ -675,6 +683,7 @@ class ImageStore:
|
|||
return [ x for x in self.infos if x['actuallyused'] ]
|
||||
|
||||
def debug_out(self):
|
||||
# logger.debug(self.fails_index)
|
||||
# import pprint
|
||||
# logger.debug(pprint.pformat([ (x['url'], x['uuid'], x['newsrc']) for x in self.infos]))
|
||||
pass
|
||||
|
|
@ -1696,8 +1705,8 @@ class Story(Requestable):
|
|||
imginfo = self.img_store.get_img_by_url(imgurl)
|
||||
if not imginfo:
|
||||
try:
|
||||
if imgurl.endswith('failedtoload'):
|
||||
return ("failedtoload","failedtoload")
|
||||
if imgurl.startswith('failedtoload'):
|
||||
return (imgurl,imgurl)
|
||||
|
||||
if not imgdata:
|
||||
# might already have from data:image in-line allow
|
||||
|
|
@ -1751,7 +1760,9 @@ class Story(Requestable):
|
|||
logger.info("Failed to load or convert image, \nparent:%s\nskipping:%s\nException: %s"%(parenturl,imgurl,e))
|
||||
except:
|
||||
logger.info("Failed to load or convert image, \nparent:%s\nskipping:%s\n(Exception output also caused exception)"%(parenturl,imgurl))
|
||||
return ("failedtoload","failedtoload")
|
||||
self.img_store.cache_failed_url(imgurl)
|
||||
fs = "failedtoload %s"%imgurl
|
||||
return (fs,fs)
|
||||
|
||||
## (cover images never included in get_imgs_by_size)
|
||||
if self.getConfig('dedup_img_files',False):
|
||||
|
|
@ -1768,6 +1779,9 @@ class Story(Requestable):
|
|||
mime,
|
||||
data)
|
||||
else:
|
||||
if imginfo['newsrc'].startswith('failedtoload'):
|
||||
fs = "failedtoload %s"%imgurl
|
||||
return (fs,fs)
|
||||
## image was found in existing store.
|
||||
self.img_store.debug_out()
|
||||
logger.debug("existing image url found:%s->%s"%(imgurl,imginfo['newsrc']))
|
||||
|
|
|
|||
Loading…
Reference in a new issue