mirror of
https://github.com/JimmXinu/FanFicFare.git
synced 2025-12-06 08:52:55 +01:00
More direct way for /../ in Get Story URLs from web page, previous broke other sites. #1160
This commit is contained in:
parent
9b8eb547fc
commit
8e55d1e6f4
1 changed files with 9 additions and 3 deletions
|
|
@ -23,7 +23,7 @@ import re
|
|||
|
||||
# unicode in py2, str in py3
|
||||
from .six.moves.urllib.request import urlopen
|
||||
from .six.moves.urllib.parse import (urlparse, urlunparse, urljoin)
|
||||
from .six.moves.urllib.parse import (urlparse, urlunparse)
|
||||
from .six import text_type as unicode
|
||||
from .six import ensure_str
|
||||
|
||||
|
|
@ -137,7 +137,7 @@ def form_url(parenturl,url):
|
|||
returl = urlunparse(
|
||||
(parsedUrl.scheme,
|
||||
parsedUrl.netloc,
|
||||
urljoin(url,'.'),
|
||||
url,
|
||||
'','',''))
|
||||
else:
|
||||
toppath=""
|
||||
|
|
@ -148,7 +148,7 @@ def form_url(parenturl,url):
|
|||
returl = urlunparse(
|
||||
(parsedUrl.scheme,
|
||||
parsedUrl.netloc,
|
||||
urljoin(toppath + '/' + url,'.'),
|
||||
toppath + '/' + url,
|
||||
'','',''))
|
||||
return returl
|
||||
|
||||
|
|
@ -181,6 +181,12 @@ def cleanup_url(href,configuration,foremail=False):
|
|||
href = href.replace('&index=1','')
|
||||
except Exception as e:
|
||||
logger.warning("Skipping royalroad email URL %s, got HTTP error %s"%(href,e))
|
||||
if '/../' in href:
|
||||
## For mcstories.com, see #1160 All my attempts to use
|
||||
## urljoin() got uncomfortably complex in the face of
|
||||
## javascript links and parameter URLs. And normpath() will
|
||||
## give \ on windows.
|
||||
href = re.sub(r'([^/]+/../)',r'',href)
|
||||
return href
|
||||
|
||||
def get_urls_from_imap(srv,user,passwd,folder,markread=True,normalize_urls=False):
|
||||
|
|
|
|||
Loading…
Reference in a new issue