From 23400f955780c41b4e2f7f9cd813036ea38d2cf5 Mon Sep 17 00:00:00 2001 From: Jim Miller Date: Sun, 14 Jul 2019 10:28:04 -0500 Subject: [PATCH] Tweak getting URLs from email for SV XF2 changes, don't get post URLs. --- fanficfare/geturls.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/fanficfare/geturls.py b/fanficfare/geturls.py index 1acba392..4f531ae5 100644 --- a/fanficfare/geturls.py +++ b/fanficfare/geturls.py @@ -136,7 +136,10 @@ def get_urls_from_text(data,configuration=None,normalize=False,email=False): if not configuration: configuration = Configuration(["test1.com"],"EPUB",lightweight=True) - for href in re.findall('http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+', data): + for href in re.findall('\(?http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+\)?', data): + ## detect and remove ()s around URL ala markdown. + if href[0] == '(' and href[-1] == ')': + href = href[1:-1] href = cleanup_url(href,email) try: adapter = adapters.getAdapter(configuration,href) @@ -192,7 +195,10 @@ def cleanup_url(href,email=False): ## xenforo emails, toss unread and page/post urls. Emails are ## only sent for thread updates, I believe. Email only so ## get_urls_from_page can still get post URLs. - href = re.sub(r"/(unread|page-\d+)?(#post-\d+)?",r"/",href) + href = re.sub(r"/(unread|page-\d+)?(#post-\d+)?(\?new=1)?",r"/",href) + elif email and 'sufficientvelocity' in href and '/posts/' in href: + ## SV emails now use /posts/ instead of #post- + href = "" elif 'click' in href and 'royalroad' in href: # they've changed the domain at least once # logger.debug(href) from .six.moves.urllib.request import build_opener