mirror of
https://github.com/JimmXinu/FanFicFare.git
synced 2026-05-09 05:21:13 +02:00
adapter_fanfictionnet: Start keeping story title part of storyUrl.
This commit is contained in:
parent
48b8730571
commit
6965a04403
2 changed files with 39 additions and 17 deletions
|
|
@ -1082,6 +1082,14 @@ class FanFicFarePlugin(InterfaceAction):
|
|||
# http, plus many sites are now switching to https.
|
||||
regexp = r'identifiers:"~ur(i|l):~^https?%s$"'%(re.sub(r'^https?','',re.escape(url)))
|
||||
# logger.debug(regexp)
|
||||
## Added Jan 2021, adapter_fanfictionnet is keeping title in
|
||||
## URL now, search with and without url title. 'URL changed'
|
||||
## check will still trigger if existing URL has a *different*
|
||||
## url title.
|
||||
if "\.fanfiction\.net" in regexp:
|
||||
regexp = re.sub(r"^(?P<keep>.*net/s/\d+/\d+/)(?P<urltitle>[^\$]*)?",
|
||||
r"\g<keep>(\g<urltitle>)?",regexp)
|
||||
# logger.debug(regexp)
|
||||
return self.gui.current_db.search_getting_ids(regexp,None,use_virtual_library=False)
|
||||
|
||||
def prep_downloads(self, options, books, merge=False, extrapayload=None):
|
||||
|
|
|
|||
|
|
@ -24,6 +24,7 @@ import re
|
|||
# py2 vs py3 transition
|
||||
from ..six import text_type as unicode
|
||||
from ..six.moves.urllib.error import HTTPError
|
||||
from ..six.moves.urllib.parse import urlparse
|
||||
|
||||
from ..chromagnon.cacheParse import ChromeCache
|
||||
|
||||
|
|
@ -42,20 +43,8 @@ class FanFictionNetSiteAdapter(BaseSiteAdapter):
|
|||
BaseSiteAdapter.__init__(self, config, url)
|
||||
self.story.setMetadata('siteabbrev','ffnet')
|
||||
|
||||
# get storyId from url--url validation guarantees second part is storyId
|
||||
self.story.setMetadata('storyId',self.parsedUrl.path.split('/',)[2])
|
||||
self.set_story_idurl(url)
|
||||
|
||||
# normalized story URL.
|
||||
self._setURL("https://"+self.getSiteDomain()\
|
||||
+"/s/"+self.story.getMetadata('storyId')+"/1/")
|
||||
|
||||
# ffnet update emails have the latest chapter URL.
|
||||
# Frequently, when they arrive, not all the servers have the
|
||||
# latest chapter yet and going back to chapter 1 to pull the
|
||||
# chapter list doesn't get the latest. So save and use the
|
||||
# original URL given to pull chapter list & metadata.
|
||||
# Not used by plugin because URL gets normalized first for
|
||||
# eliminating duplicate story urls.
|
||||
self.origurl = url
|
||||
if "https://m." in self.origurl:
|
||||
## accept m(mobile)url, but use www.
|
||||
|
|
@ -74,6 +63,15 @@ class FanFictionNetSiteAdapter(BaseSiteAdapter):
|
|||
def getSiteExampleURLs(cls):
|
||||
return "https://www.fanfiction.net/s/1234/1/ https://www.fanfiction.net/s/1234/12/ http://www.fanfiction.net/s/1234/1/Story_Title http://m.fanfiction.net/s/1234/1/"
|
||||
|
||||
def set_story_idurl(self,url):
|
||||
parsedUrl = urlparse(url)
|
||||
pathparts = parsedUrl.path.split('/',)
|
||||
self.story.setMetadata('storyId',pathparts[2])
|
||||
self.urltitle='' if len(pathparts)<5 else pathparts[4]
|
||||
# normalized story URL.
|
||||
self._setURL("https://"+self.getSiteDomain()\
|
||||
+"/s/"+self.story.getMetadata('storyId')+"/1/"+self.urltitle)
|
||||
|
||||
def getSiteURLPattern(self):
|
||||
return r"https?://(www|m)?\.fanfiction\.net/s/\d+(/\d+)?(/|/[^/]+)?/?$"
|
||||
|
||||
|
|
@ -136,6 +134,13 @@ class FanFictionNetSiteAdapter(BaseSiteAdapter):
|
|||
'''
|
||||
return True
|
||||
|
||||
## not actually putting urltitle on multi-chapters below, but
|
||||
## one-shots will have it, so this is still useful. normalized
|
||||
## chapter URLs do NOT contain the story title.
|
||||
def normalize_chapterurl(self,url):
|
||||
return re.sub(r"https?://(www|m)\.(?P<keep>fanfiction\.net/s/\d+/\d+/).*",
|
||||
r"https://www.\g<keep>",url)
|
||||
|
||||
def doExtractChapterUrlsAndMetadata(self,get_cover=True):
|
||||
get_cover=False
|
||||
# fetch the chapter. From that we will get almost all the
|
||||
|
|
@ -165,6 +170,10 @@ class FanFictionNetSiteAdapter(BaseSiteAdapter):
|
|||
if "Please check to see you are not using an outdated url." in data:
|
||||
raise exceptions.FailedToDownload("Error downloading Chapter: %s! 'Chapter not found. Please check to see you are not using an outdated url.'" % url)
|
||||
|
||||
# <link rel="canonical" href="//www.fanfiction.net/s/13551154/100/Haze-Gray">
|
||||
canonicalurl = soup.select_one('link[rel=canonical]')['href']
|
||||
self.set_story_idurl(canonicalurl)
|
||||
|
||||
if self.getConfig('check_next_chapter'):
|
||||
try:
|
||||
## ffnet used to have a tendency to send out update
|
||||
|
|
@ -177,9 +186,10 @@ class FanFictionNetSiteAdapter(BaseSiteAdapter):
|
|||
# get chapter part of url.
|
||||
except:
|
||||
chapcount = 1
|
||||
tryurl = "https://%s/s/%s/%d/"%(self.getSiteDomain(),
|
||||
self.story.getMetadata('storyId'),
|
||||
chapcount+1)
|
||||
tryurl = "https://%s/s/%s/%d/%s"%(self.getSiteDomain(),
|
||||
self.story.getMetadata('storyId'),
|
||||
chapcount+1,
|
||||
self.urltitle)
|
||||
logger.debug('=Trying newer chapter: %s' % tryurl)
|
||||
newdata = self._fetchUrl(tryurl)
|
||||
if "not found. Please check to see you are not using an outdated url." not in newdata \
|
||||
|
|
@ -409,7 +419,11 @@ class FanFictionNetSiteAdapter(BaseSiteAdapter):
|
|||
## ffnet(and, I assume, fpcom) tends to fail more if hit too
|
||||
## fast. This is in additional to what ever the
|
||||
## slow_down_sleep_time setting is.
|
||||
data = self._fetchUrl(url,extrasleep=4.0)
|
||||
|
||||
## AND explicitly put title URL back on chapter URL for fetch
|
||||
## *only*--normalized chapter URL does NOT have urltitle
|
||||
data = self._fetchUrl(url+self.urltitle,
|
||||
extrasleep=4.0)
|
||||
|
||||
if "Please email this error message in full to <a href='mailto:support@fanfiction.com'>support@fanfiction.com</a>" in data:
|
||||
raise exceptions.FailedToDownload("Error downloading Chapter: %s! FanFiction.net Site Error!" % url)
|
||||
|
|
|
|||
Loading…
Reference in a new issue