Perf improvement for unnew

2025-12-06 08:52:55 +01:00 · 2025-11-04 12:20:39 -06:00 · 2025-11-04 12:20:39 -06:00 · e5b5768f11
commit e5b5768f11
parent 6cf2519ef9
1 changed files with 27 additions and 3 deletions
--- a/fanficfare/epubutils.py
+++ b/fanficfare/epubutils.py
@ -20,6 +20,26 @@ from .six import ensure_text, text_type as unicode
 from .six import string_types as basestring
 from io import BytesIO
 # from io import StringIO
 # import cProfile, pstats
 # from pstats import SortKey
 # def do_cprofile(func):
 #     def profiled_func(*args, **kwargs):
 #         profile = cProfile.Profile()
 #         try:
 #             profile.enable()
 #             result = func(*args, **kwargs)
 #             profile.disable()
 #             return result
 #         finally:
 #             # profile.sort_stats(SortKey.CUMULATIVE).print_stats(20)
 #             s = StringIO()
 #             sortby = SortKey.CUMULATIVE
 #             ps = pstats.Stats(profile, stream=s).sort_stats(sortby)
 #             ps.print_stats(20)
 #             print(s.getvalue())
 #     return profiled_func
 import bs4
 def get_dcsource(inputio):
@ -293,6 +313,7 @@ def get_story_url_from_zip_html(inputio,_is_good_url=None):
                    return ahref
    return None
 # @do_cprofile
 def reset_orig_chapters_epub(inputio,outfile):
    inputepub = ZipFile(inputio, 'r') # works equally well with a path or a blob
@ -345,7 +366,9 @@ def reset_orig_chapters_epub(inputio,outfile):
            if re.match(r'.*/file\d+\.xhtml',zf):
                #logger.debug("zf:%s"%zf)
                data = data.decode('utf-8')
-                soup = make_soup(data)
+                # should be re-reading an FFF file, single soup should
                # be good enough and halve processing time.
                soup = make_soup(data,dblsoup=False)
                chapterorigtitle = None
                tag = soup.find('meta',{'name':'chapterorigtitle'})
@ -458,7 +481,7 @@ def _replace_navxhtml(navxhtmldom,zf,chaptertoctitle):
            # logger.debug("a href=%s label:%s"%(zf,atag.toxml()))
            continue
-def make_soup(data):
+def make_soup(data,dblsoup=True):
    '''
    Convenience method for getting a bs4 soup.  bs3 has been removed.
    '''
@ -473,6 +496,7 @@ def make_soup(data):
    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        soup = bs4.BeautifulSoup(data,'html5lib')
        if dblsoup:
            soup = bs4.BeautifulSoup(unicode(soup),'html5lib')
    for ns in soup.find_all('fff_hide_noscript'):