From e5b5768f111ed79a2aa7e22f8af0deb4458330a3 Mon Sep 17 00:00:00 2001
From: Jim Miller <retiefjimm@gmail.com>
Date: Tue, 4 Nov 2025 12:20:39 -0600
Subject: [PATCH] Perf improvement for unnew

---
 fanficfare/epubutils.py | 30 +++++++++++++++++++++++++++---
 1 file changed, 27 insertions(+), 3 deletions(-)

diff --git a/fanficfare/epubutils.py b/fanficfare/epubutils.py
index 3a33335d..1b3f2a26 100644
--- a/fanficfare/epubutils.py
+++ b/fanficfare/epubutils.py
@@ -20,6 +20,26 @@ from .six import ensure_text, text_type as unicode
 from .six import string_types as basestring
 from io import BytesIO
 
+# from io import StringIO
+# import cProfile, pstats
+# from pstats import SortKey
+# def do_cprofile(func):
+#     def profiled_func(*args, **kwargs):
+#         profile = cProfile.Profile()
+#         try:
+#             profile.enable()
+#             result = func(*args, **kwargs)
+#             profile.disable()
+#             return result
+#         finally:
+#             # profile.sort_stats(SortKey.CUMULATIVE).print_stats(20)
+#             s = StringIO()
+#             sortby = SortKey.CUMULATIVE
+#             ps = pstats.Stats(profile, stream=s).sort_stats(sortby)
+#             ps.print_stats(20)
+#             print(s.getvalue())
+#     return profiled_func
+
 import bs4
 
 def get_dcsource(inputio):
@@ -293,6 +313,7 @@ def get_story_url_from_zip_html(inputio,_is_good_url=None):
                     return ahref
     return None
 
+# @do_cprofile
 def reset_orig_chapters_epub(inputio,outfile):
     inputepub = ZipFile(inputio, 'r') # works equally well with a path or a blob
 
@@ -345,7 +366,9 @@ def reset_orig_chapters_epub(inputio,outfile):
             if re.match(r'.*/file\d+\.xhtml',zf):
                 #logger.debug("zf:%s"%zf)
                 data = data.decode('utf-8')
-                soup = make_soup(data)
+                # should be re-reading an FFF file, single soup should
+                # be good enough and halve processing time.
+                soup = make_soup(data,dblsoup=False)
 
                 chapterorigtitle = None
                 tag = soup.find('meta',{'name':'chapterorigtitle'})
@@ -458,7 +481,7 @@ def _replace_navxhtml(navxhtmldom,zf,chaptertoctitle):
             # logger.debug("a href=%s label:%s"%(zf,atag.toxml()))
             continue
 
-def make_soup(data):
+def make_soup(data,dblsoup=True):
     '''
     Convenience method for getting a bs4 soup.  bs3 has been removed.
     '''
@@ -473,7 +496,8 @@ def make_soup(data):
     with warnings.catch_warnings():
         warnings.simplefilter("ignore")
         soup = bs4.BeautifulSoup(data,'html5lib')
-        soup = bs4.BeautifulSoup(unicode(soup),'html5lib')
+        if dblsoup:
+            soup = bs4.BeautifulSoup(unicode(soup),'html5lib')
 
     for ns in soup.find_all('fff_hide_noscript'):
         ns.name = 'noscript'