Ignore CSS url() when ttf/otf/woff/woff2 font files

This commit is contained in:
Jim Miller 2026-02-05 13:46:24 -06:00
parent db0d39c9cd
commit 1b57e49d98
2 changed files with 14 additions and 1 deletions

View file

@ -670,6 +670,7 @@ try to download.</p>
return url in self.add_img_names
def include_css_urls(self,parenturl,style):
FONT_EXTS = ('ttf','otf','woff','woff2')
# logger.debug("include_css_urls(%s,%s)"%(parenturl,style))
## pass in the style string, will be returned with URLs
## replaced and images will be added.
@ -680,12 +681,16 @@ try to download.</p>
## url('href')
## the pattern will also accept mismatched '/", which is broken CSS.
for style_url in re.findall(r'url\([\'"]?(.*?)[\'"]?\)', style):
logger.debug("Adding style url(%s)"%style_url)
## additional_images don't get processing. Applies
## only to CSS url(), that should be the only time
## additional_images is used.
if self.is_additional_image(style_url):
logger.debug("Skipping sheet style url(%s), in additional_images"%style_url)
continue
if style_url.rsplit('.')[-1].lower() in FONT_EXTS:
logger.debug("Skipping sheet style url(%s), assumed font"%style_url)
continue
logger.debug("Adding style url(%s)"%style_url)
try:
# longdesc(aka origurl) isn't saved anywhere in CSS.

View file

@ -20,6 +20,8 @@ from .six import ensure_text, text_type as unicode
from .six import string_types as basestring
from io import BytesIO
FONT_EXTS = ('ttf','otf','woff','woff2')
# from io import StringIO
# import cProfile, pstats
# from pstats import SortKey
@ -191,6 +193,9 @@ def get_update_data(inputio,
for style_url in re.findall(r'url\([\'"]?(.*?)[\'"]?\)', style):
if style_url.startswith('failedtoload'):
continue
if style_url.rsplit('.')[-1].lower() in FONT_EXTS:
logger.debug("Skipping sheet style url(%s), assumed font"%style_url)
continue
logger.debug("Updating inline/embedded style url(%s)"%style_url)
newsrc=''
longdesc=''
@ -262,6 +267,9 @@ def get_update_data(inputio,
# logger.debug("%s CSS url:%s"%(href,style))
## the pattern will also accept mismatched '/", which is broken CSS.
for style_url in re.findall(r'url\([\'"]?(.*?)[\'"]?\)', style):
if style_url.rsplit('.')[-1].lower() in FONT_EXTS:
logger.debug("Skipping sheet style url(%s), assumed font"%style_url)
continue
logger.debug("Updating sheet style url(%s)"%style_url)
newsrc=''
longdesc=''