Story URLs from email notice for royalroad contain clicktracker links that redirect to actual story URLs. Hit those to get story URL. Issue #349

This commit is contained in:
Jim Miller 2018-10-19 14:03:05 -05:00
parent 4007f82ce6
commit 1433576b9d

View file

@ -192,6 +192,14 @@ def cleanup_url(href,email=False):
## only sent for thread updates, I believe. Email only so
## get_urls_from_page can still get post URLs.
href = re.sub(r"/(unread|page-\d+)?(#post-\d+)?",r"/",href)
elif 'clicktracker.royalroad' in href:
logger.debug(href)
from .six.moves.urllib.request import build_opener
opener = build_opener()
opener.addheaders = [('User-Agent', '')] ## give 403 Forbidden without a UA.
opened = opener.open(href.replace(' ','%20'))
logger.debug(opened.url)
href = opened.url
href = href.replace('&index=1','')
return href