Add a configuration option to normalize URLs returned from CLI --imap

2026-05-08 12:36:11 +02:00 · 2024-01-28 12:37:34 -07:00 · 2024-01-28 12:37:34 -07:00 · 7a89d03339
commit 7a89d03339
parent ae638fd0a1
3 changed files with 13 additions and 4 deletions
--- a/fanficfare/cli.py
+++ b/fanficfare/cli.py
@ -288,11 +288,13 @@ def dispatch(options, urls,
        configuration = get_configuration('test1.com',passed_defaultsini,passed_personalini,options)
        markread = configuration.getConfig('imap_mark_read') == 'true' or \
            (configuration.getConfig('imap_mark_read') == 'downloadonly' and options.downloadimap)
+        normalize_urls = configuration.getConfig('imap_normalize_urls') == 'true'
        retlist = get_urls_from_imap(configuration.getConfig('imap_server'),
                                     configuration.getConfig('imap_username'),
                                     configuration.getConfig('imap_password'),
                                     configuration.getConfig('imap_folder'),
-                                     markread)
+                                     markread,
+                                     normalize_urls)

        if options.downloadimap:
            urls.extend(retlist)
--- a/fanficfare/defaults.ini
+++ b/fanficfare/defaults.ini
@ -656,6 +656,13 @@ chapter_title_error_mark:(CHAPTER ERROR)
 ##
 #imap_mark_read:true

+## Return normalized URLs instead of possibly returning the URL for a
+## specific chapter.
+##
+## If 'true', urls downloaded via the CLI --imap flag will be normalized
+##
+#imap_normalize_urls:false
+
 ## Some authors use 'Zalgo' text--arbitrary and often excessive
 ## added/combined unicode markings--to indicate 'noise' of some kind.
 ## While a critical part of some languages, when over used it can also
--- a/fanficfare/geturls.py
+++ b/fanficfare/geturls.py
@ -182,7 +182,7 @@ def cleanup_url(href,configuration,foremail=False):
            logger.warning("Skipping royalroad email URL %s, got HTTP error %s"%(href,e))
    return href

-def get_urls_from_imap(srv,user,passwd,folder,markread=True):
+def get_urls_from_imap(srv,user,passwd,folder,markread=True,normalize_urls=False):

    # logger.debug("get_urls_from_imap srv:(%s)"%srv)
    mail = imaplib.IMAP4_SSL(srv)
@ -256,9 +256,9 @@ def get_urls_from_imap(srv,user,passwd,folder,markread=True):
            try:
                # logger.debug("part mime:%s"%part.get_content_type())
                if part.get_content_type() == 'text/plain':
-                    urllist.extend(get_urls_from_text(part.get_payload(decode=True),foremail=True))
+                    urllist.extend(get_urls_from_text(part.get_payload(decode=True),foremail=True, normalize=normalize_urls))
                if part.get_content_type() == 'text/html':
-                    urllist.extend(get_urls_from_html(part.get_payload(decode=True),foremail=True))
+                    urllist.extend(get_urls_from_html(part.get_payload(decode=True),foremail=True, normalize=normalize_urls))
            except Exception as e:
                logger.error("Failed to read email content: %s"%e,exc_info=True)