From f64f041546818a08d51748db8354fa2c68380898 Mon Sep 17 00:00:00 2001
From: Jim Miller
Date: Sat, 10 Jan 2026 14:39:07 -0600
Subject: [PATCH] Adding CSS url() image inclusion, name all images by uuid5
---
fanficfare/adapters/adapter_test1.py | 15 ++++-
fanficfare/adapters/base_adapter.py | 61 +++++++++++++++----
fanficfare/epubutils.py | 61 +++++++++++++++++--
fanficfare/story.py | 91 ++++++++++++++++++++++------
fanficfare/writers/base_writer.py | 2 +
5 files changed, 193 insertions(+), 37 deletions(-)
diff --git a/fanficfare/adapters/adapter_test1.py b/fanficfare/adapters/adapter_test1.py
index dd978b4e..7ac78161 100644
--- a/fanficfare/adapters/adapter_test1.py
+++ b/fanficfare/adapters/adapter_test1.py
@@ -335,8 +335,11 @@ Some more longer description. "I suck at summaries!" "Better than it sounds!"
text=u'''
Prologue
+
This is a fake adapter for testing purposes. Different sid's will give different errors:
sid>=1000 will use custom test story data from your configuration(personal.ini)
+
+
Hard coded ids:
http://test1.com?sid=664 - Crazy string title
http://test1.com?sid=665, 711-720 - raises AdultCheckRequired
@@ -353,6 +356,7 @@ Some more longer description. "I suck at summaries!" "Better than it sounds!"
http://test1.com?sid=0 - Succeeds, generates some text specifically for testing hyphenation problems with Nook STR/STRwG
Odd sid's will be In-Progress, evens complete. sid<10 will be assigned one of four languages and included in a series.
+
'''
elif self.story.getMetadata('storyId') == '0':
text=u'''
@@ -411,7 +415,13 @@ Some more longer description. "I suck at summaries!" "Better than it sounds!"
else:
if self.story.getMetadata('storyId') == '92':
- imgtext='

'
+ imgtext='''
+

+
+
Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.
+'''
else:
imgtext='img goes here when sid=92'
text=u'''
@@ -432,7 +442,9 @@ Don't—e;ver—d;o—that—a;gain, 法 é
horizontal rules
+
"Lorem ipsum dolor sit amet", consectetur adipisicing elit, sed do eiusmod tempor incididunt ut labore--et dolore magna aliqua. 'Ut enim ad minim veniam', quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.
+
Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.
@@ -444,7 +456,6 @@ Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor
"Lorem ipsum dolor sit amet", consectetur adipisicing elit, sed do eiusmod tempor incididunt ut labore--et dolore magna aliqua. 'Ut enim ad minim veniam', quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.
-
Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.
'''%imgtext
soup = self.make_soup(text)
diff --git a/fanficfare/adapters/base_adapter.py b/fanficfare/adapters/base_adapter.py
index b7c05cb9..7d641e89 100644
--- a/fanficfare/adapters/base_adapter.py
+++ b/fanficfare/adapters/base_adapter.py
@@ -93,6 +93,8 @@ class BaseSiteAdapter(Requestable):
self.oldchaptersdata = None
self.oldimgs = None
self.oldcover = None # (data of existing cover html, data of existing cover image)
+ self.add_img_names = None
+
self.calibrebookmark = None
self.logfile = None
self.ignore_chapter_url_list = None
@@ -261,12 +263,10 @@ class BaseSiteAdapter(Requestable):
# logger.debug("index:%s title:%s url:%s"%(index,title,url))
# logger.debug(self.oldchaptersmap[url])
data = self.utf8FromSoup(None,
- self.oldchaptersmap[url],
- partial(cachedfetch,self.get_request_raw,self.oldimgs))
+ self.oldchaptersmap[url])
elif self.oldchapters and index < len(self.oldchapters):
data = self.utf8FromSoup(None,
- self.oldchapters[index],
- partial(cachedfetch,self.get_request_raw,self.oldimgs))
+ self.oldchapters[index])
if self.getConfig('mark_new_chapters') == 'true':
# if already marked new -- ie, origtitle and title don't match
@@ -402,6 +402,12 @@ try to download.
for index, chap in enumerate(self.chapterUrls):
self.chapterUrls[index]['url'] = self.normalize_chapterurl(chap['url'])
+ ## load existing epub images in story ImageStore so they
+ ## are re-used, but not processed again. Prior system was
+ ## simple url->data cache wedged in front of fetch.
+ if self.oldimgs:
+ self.story.load_oldimgs(self.oldimgs)
+
# logger.debug(u"getStoryMetadataOnly times:\n%s"%self.times)
return self.story
@@ -657,6 +663,40 @@ try to download.
return list(soup.attrs.keys())
return []
+ def is_additional_image(self,url):
+ if self.add_img_names is None:
+ self.add_img_names = [ "images/"+os.path.basename(imgfn) for imgfn in self.getConfigList('additional_images') ]
+ return url in self.add_img_names
+
+ def include_css_urls(self,parenturl,style):
+ # logger.debug("include_css_urls(%s,%s)"%(parenturl,style))
+ ## pass in the style string, will be returned with URLs
+ ## replaced and images will be added.
+ newstyle = style
+ if 'url(' in style:
+ ## url(href)
+ ## url("href")
+ ## url('href')
+ ## the pattern will also accept mismatched '/", which is broken CSS.
+ for style_url in re.findall(r'url\([\'"]?(.*?)[\'"]?\)', style):
+ logger.debug("Adding style url(%s)"%style_url)
+ ## additional_images don't get processing. Applies
+ ## only to CSS url(), that should be the only time
+ ## additional_images is used.
+ if self.is_additional_image(style_url):
+ continue
+
+ try:
+ # longdesc(aka origurl) isn't saved anywhere in CSS.
+ (src,longdesc)=self.story.addImgUrl(parenturl,self.img_url_trans(style_url),
+ self.get_request_raw,
+ # no CSS image may be cover.
+ coverexclusion=r'.')
+ newstyle = newstyle.replace(style_url,src)
+ except AttributeError as ae:
+ logger.info("CSS url() image failed. Skipping url(%s)"%style_url)
+ return newstyle
+
# This gives us a unicode object, not just a string containing bytes.
# (I gave soup a unicode string, you'd think it could give it back...)
# Now also does a bunch of other common processing for us.
@@ -737,6 +777,12 @@ try to download.
coverexclusion=self.getConfig('cover_exclusion_regexp'))
except AttributeError as ae:
logger.info("Parsing for img tags failed--probably poor input HTML. Skipping img(%s)"%img)
+ ## Inline CSS url() images
+ for inline in soup.select('*[style]'):
+ inline['style'] = self.include_css_urls(url,inline['style'])
+ ## Embedded CSS