diff --git a/fanficfare/cli.py b/fanficfare/cli.py index aaa1e532..6739dd6a 100644 --- a/fanficfare/cli.py +++ b/fanficfare/cli.py @@ -288,11 +288,13 @@ def dispatch(options, urls, configuration = get_configuration('test1.com',passed_defaultsini,passed_personalini,options) markread = configuration.getConfig('imap_mark_read') == 'true' or \ (configuration.getConfig('imap_mark_read') == 'downloadonly' and options.downloadimap) + normalize_urls = configuration.getConfig('imap_normalize_urls') == 'true' retlist = get_urls_from_imap(configuration.getConfig('imap_server'), configuration.getConfig('imap_username'), configuration.getConfig('imap_password'), configuration.getConfig('imap_folder'), - markread) + markread, + normalize_urls) if options.downloadimap: urls.extend(retlist) diff --git a/fanficfare/defaults.ini b/fanficfare/defaults.ini index 6af1d207..7bd255cf 100644 --- a/fanficfare/defaults.ini +++ b/fanficfare/defaults.ini @@ -656,6 +656,13 @@ chapter_title_error_mark:(CHAPTER ERROR) ## #imap_mark_read:true +## Return normalized URLs instead of possibly returning the URL for a +## specific chapter. +## +## If 'true', urls downloaded via the CLI --imap flag will be normalized +## +#imap_normalize_urls:false + ## Some authors use 'Zalgo' text--arbitrary and often excessive ## added/combined unicode markings--to indicate 'noise' of some kind. ## While a critical part of some languages, when over used it can also diff --git a/fanficfare/geturls.py b/fanficfare/geturls.py index 7c59c99f..a2a46376 100644 --- a/fanficfare/geturls.py +++ b/fanficfare/geturls.py @@ -182,7 +182,7 @@ def cleanup_url(href,configuration,foremail=False): logger.warning("Skipping royalroad email URL %s, got HTTP error %s"%(href,e)) return href -def get_urls_from_imap(srv,user,passwd,folder,markread=True): +def get_urls_from_imap(srv,user,passwd,folder,markread=True,normalize_urls=False): # logger.debug("get_urls_from_imap srv:(%s)"%srv) mail = imaplib.IMAP4_SSL(srv) @@ -256,9 +256,9 @@ def get_urls_from_imap(srv,user,passwd,folder,markread=True): try: # logger.debug("part mime:%s"%part.get_content_type()) if part.get_content_type() == 'text/plain': - urllist.extend(get_urls_from_text(part.get_payload(decode=True),foremail=True)) + urllist.extend(get_urls_from_text(part.get_payload(decode=True),foremail=True, normalize=normalize_urls)) if part.get_content_type() == 'text/html': - urllist.extend(get_urls_from_html(part.get_payload(decode=True),foremail=True)) + urllist.extend(get_urls_from_html(part.get_payload(decode=True),foremail=True, normalize=normalize_urls)) except Exception as e: logger.error("Failed to read email content: %s"%e,exc_info=True)