Add a configuration option to normalize URLs returned from CLI --imap

This commit is contained in:
Moxie 2024-01-28 12:37:34 -07:00 committed by Jim Miller
parent ae638fd0a1
commit 7a89d03339
3 changed files with 13 additions and 4 deletions

View file

@ -288,11 +288,13 @@ def dispatch(options, urls,
configuration = get_configuration('test1.com',passed_defaultsini,passed_personalini,options)
markread = configuration.getConfig('imap_mark_read') == 'true' or \
(configuration.getConfig('imap_mark_read') == 'downloadonly' and options.downloadimap)
normalize_urls = configuration.getConfig('imap_normalize_urls') == 'true'
retlist = get_urls_from_imap(configuration.getConfig('imap_server'),
configuration.getConfig('imap_username'),
configuration.getConfig('imap_password'),
configuration.getConfig('imap_folder'),
markread)
markread,
normalize_urls)
if options.downloadimap:
urls.extend(retlist)

View file

@ -656,6 +656,13 @@ chapter_title_error_mark:(CHAPTER ERROR)
##
#imap_mark_read:true
## Return normalized URLs instead of possibly returning the URL for a
## specific chapter.
##
## If 'true', urls downloaded via the CLI --imap flag will be normalized
##
#imap_normalize_urls:false
## Some authors use 'Zalgo' text--arbitrary and often excessive
## added/combined unicode markings--to indicate 'noise' of some kind.
## While a critical part of some languages, when over used it can also

View file

@ -182,7 +182,7 @@ def cleanup_url(href,configuration,foremail=False):
logger.warning("Skipping royalroad email URL %s, got HTTP error %s"%(href,e))
return href
def get_urls_from_imap(srv,user,passwd,folder,markread=True):
def get_urls_from_imap(srv,user,passwd,folder,markread=True,normalize_urls=False):
# logger.debug("get_urls_from_imap srv:(%s)"%srv)
mail = imaplib.IMAP4_SSL(srv)
@ -256,9 +256,9 @@ def get_urls_from_imap(srv,user,passwd,folder,markread=True):
try:
# logger.debug("part mime:%s"%part.get_content_type())
if part.get_content_type() == 'text/plain':
urllist.extend(get_urls_from_text(part.get_payload(decode=True),foremail=True))
urllist.extend(get_urls_from_text(part.get_payload(decode=True),foremail=True, normalize=normalize_urls))
if part.get_content_type() == 'text/html':
urllist.extend(get_urls_from_html(part.get_payload(decode=True),foremail=True))
urllist.extend(get_urls_from_html(part.get_payload(decode=True),foremail=True, normalize=normalize_urls))
except Exception as e:
logger.error("Failed to read email content: %s"%e,exc_info=True)