mirror of
https://github.com/JimmXinu/FanFicFare.git
synced 2025-12-07 17:33:05 +01:00
More direct way for /../ in Get Story URLs from web page, previous broke other sites. #1160
This commit is contained in:
parent
9b8eb547fc
commit
8e55d1e6f4
1 changed files with 9 additions and 3 deletions
|
|
@ -23,7 +23,7 @@ import re
|
||||||
|
|
||||||
# unicode in py2, str in py3
|
# unicode in py2, str in py3
|
||||||
from .six.moves.urllib.request import urlopen
|
from .six.moves.urllib.request import urlopen
|
||||||
from .six.moves.urllib.parse import (urlparse, urlunparse, urljoin)
|
from .six.moves.urllib.parse import (urlparse, urlunparse)
|
||||||
from .six import text_type as unicode
|
from .six import text_type as unicode
|
||||||
from .six import ensure_str
|
from .six import ensure_str
|
||||||
|
|
||||||
|
|
@ -137,7 +137,7 @@ def form_url(parenturl,url):
|
||||||
returl = urlunparse(
|
returl = urlunparse(
|
||||||
(parsedUrl.scheme,
|
(parsedUrl.scheme,
|
||||||
parsedUrl.netloc,
|
parsedUrl.netloc,
|
||||||
urljoin(url,'.'),
|
url,
|
||||||
'','',''))
|
'','',''))
|
||||||
else:
|
else:
|
||||||
toppath=""
|
toppath=""
|
||||||
|
|
@ -148,7 +148,7 @@ def form_url(parenturl,url):
|
||||||
returl = urlunparse(
|
returl = urlunparse(
|
||||||
(parsedUrl.scheme,
|
(parsedUrl.scheme,
|
||||||
parsedUrl.netloc,
|
parsedUrl.netloc,
|
||||||
urljoin(toppath + '/' + url,'.'),
|
toppath + '/' + url,
|
||||||
'','',''))
|
'','',''))
|
||||||
return returl
|
return returl
|
||||||
|
|
||||||
|
|
@ -181,6 +181,12 @@ def cleanup_url(href,configuration,foremail=False):
|
||||||
href = href.replace('&index=1','')
|
href = href.replace('&index=1','')
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.warning("Skipping royalroad email URL %s, got HTTP error %s"%(href,e))
|
logger.warning("Skipping royalroad email URL %s, got HTTP error %s"%(href,e))
|
||||||
|
if '/../' in href:
|
||||||
|
## For mcstories.com, see #1160 All my attempts to use
|
||||||
|
## urljoin() got uncomfortably complex in the face of
|
||||||
|
## javascript links and parameter URLs. And normpath() will
|
||||||
|
## give \ on windows.
|
||||||
|
href = re.sub(r'([^/]+/../)',r'',href)
|
||||||
return href
|
return href
|
||||||
|
|
||||||
def get_urls_from_imap(srv,user,passwd,folder,markread=True,normalize_urls=False):
|
def get_urls_from_imap(srv,user,passwd,folder,markread=True,normalize_urls=False):
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue