From e5b5768f111ed79a2aa7e22f8af0deb4458330a3 Mon Sep 17 00:00:00 2001 From: Jim Miller Date: Tue, 4 Nov 2025 12:20:39 -0600 Subject: [PATCH] Perf improvement for unnew --- fanficfare/epubutils.py | 30 +++++++++++++++++++++++++++--- 1 file changed, 27 insertions(+), 3 deletions(-) diff --git a/fanficfare/epubutils.py b/fanficfare/epubutils.py index 3a33335d..1b3f2a26 100644 --- a/fanficfare/epubutils.py +++ b/fanficfare/epubutils.py @@ -20,6 +20,26 @@ from .six import ensure_text, text_type as unicode from .six import string_types as basestring from io import BytesIO +# from io import StringIO +# import cProfile, pstats +# from pstats import SortKey +# def do_cprofile(func): +# def profiled_func(*args, **kwargs): +# profile = cProfile.Profile() +# try: +# profile.enable() +# result = func(*args, **kwargs) +# profile.disable() +# return result +# finally: +# # profile.sort_stats(SortKey.CUMULATIVE).print_stats(20) +# s = StringIO() +# sortby = SortKey.CUMULATIVE +# ps = pstats.Stats(profile, stream=s).sort_stats(sortby) +# ps.print_stats(20) +# print(s.getvalue()) +# return profiled_func + import bs4 def get_dcsource(inputio): @@ -293,6 +313,7 @@ def get_story_url_from_zip_html(inputio,_is_good_url=None): return ahref return None +# @do_cprofile def reset_orig_chapters_epub(inputio,outfile): inputepub = ZipFile(inputio, 'r') # works equally well with a path or a blob @@ -345,7 +366,9 @@ def reset_orig_chapters_epub(inputio,outfile): if re.match(r'.*/file\d+\.xhtml',zf): #logger.debug("zf:%s"%zf) data = data.decode('utf-8') - soup = make_soup(data) + # should be re-reading an FFF file, single soup should + # be good enough and halve processing time. + soup = make_soup(data,dblsoup=False) chapterorigtitle = None tag = soup.find('meta',{'name':'chapterorigtitle'}) @@ -458,7 +481,7 @@ def _replace_navxhtml(navxhtmldom,zf,chaptertoctitle): # logger.debug("a href=%s label:%s"%(zf,atag.toxml())) continue -def make_soup(data): +def make_soup(data,dblsoup=True): ''' Convenience method for getting a bs4 soup. bs3 has been removed. ''' @@ -473,7 +496,8 @@ def make_soup(data): with warnings.catch_warnings(): warnings.simplefilter("ignore") soup = bs4.BeautifulSoup(data,'html5lib') - soup = bs4.BeautifulSoup(unicode(soup),'html5lib') + if dblsoup: + soup = bs4.BeautifulSoup(unicode(soup),'html5lib') for ns in soup.find_all('fff_hide_noscript'): ns.name = 'noscript'