mirror of
https://github.com/JimmXinu/FanFicFare.git
synced 2025-12-07 09:23:28 +01:00
adapter_scribblehubcom: Allow for changing title in story URL.
This commit is contained in:
parent
4ea869a764
commit
654619e7e2
2 changed files with 55 additions and 17 deletions
|
|
@ -1133,7 +1133,7 @@ class FanFicFarePlugin(InterfaceAction):
|
||||||
## Jul 2024, added similar handling for scribblehub
|
## Jul 2024, added similar handling for scribblehub
|
||||||
## https://www.scribblehub.com/series/862913/title
|
## https://www.scribblehub.com/series/862913/title
|
||||||
if r"\.scribblehub\.com" in regexp:
|
if r"\.scribblehub\.com" in regexp:
|
||||||
regexp = re.sub(r"^(?P<keep>.*com/series/\d+/\d+/)(?P<urltitle>[^\$]*)?",
|
regexp = re.sub(r"^(?P<keep>.*com/series/\d+/)(?P<urltitle>[^$]*)?",
|
||||||
r"\g<keep>(.*)",regexp)
|
r"\g<keep>(.*)",regexp)
|
||||||
# logger.debug(regexp)
|
# logger.debug(regexp)
|
||||||
retval = self.gui.current_db.search_getting_ids(regexp,None,use_virtual_library=False)
|
retval = self.gui.current_db.search_getting_ids(regexp,None,use_virtual_library=False)
|
||||||
|
|
|
||||||
|
|
@ -57,16 +57,9 @@ class ScribbleHubComAdapter(BaseSiteAdapter): # XXX
|
||||||
self.username = "NoneGiven" # if left empty, site doesn't return any message at all.
|
self.username = "NoneGiven" # if left empty, site doesn't return any message at all.
|
||||||
self.password = ""
|
self.password = ""
|
||||||
self.is_adult=False
|
self.is_adult=False
|
||||||
|
self.urltitle = "some-title"
|
||||||
|
|
||||||
m = re.match(self.getSiteURLPattern(),url)
|
self.set_story_idurl(url)
|
||||||
# logger.debug("id:%s"%m.group('id'))
|
|
||||||
# logger.debug("title:%s"%m.group('title'))
|
|
||||||
|
|
||||||
# get storyId from url
|
|
||||||
self.story.setMetadata('storyId', m.group('id'))
|
|
||||||
|
|
||||||
# normalized story URL.
|
|
||||||
self._setURL('https://' + self.getSiteDomain() + '/series/' + self.story.getMetadata('storyId') + '/' + m.group('title') + '/')
|
|
||||||
|
|
||||||
# Each adapter needs to have a unique site abbreviation.
|
# Each adapter needs to have a unique site abbreviation.
|
||||||
self.story.setMetadata('siteabbrev','scrhub') # XXX
|
self.story.setMetadata('siteabbrev','scrhub') # XXX
|
||||||
|
|
@ -75,6 +68,19 @@ class ScribbleHubComAdapter(BaseSiteAdapter): # XXX
|
||||||
# http://docs.python.org/library/datetime.html#strftime-strptime-behavior
|
# http://docs.python.org/library/datetime.html#strftime-strptime-behavior
|
||||||
self.dateformat = "%b %d, %Y" # XXX
|
self.dateformat = "%b %d, %Y" # XXX
|
||||||
|
|
||||||
|
def set_story_idurl(self,url):
|
||||||
|
m = re.match(self.getSiteURLPattern(),url)
|
||||||
|
# logger.debug("id:%s"%m.group('id'))
|
||||||
|
# logger.debug("urltitle:%s"%m.group('urltitle'))
|
||||||
|
|
||||||
|
# get storyId from url
|
||||||
|
self.story.setMetadata('storyId', m.group('id'))
|
||||||
|
if m.group('urltitle'):
|
||||||
|
self.urltitle = m.group('urltitle')
|
||||||
|
# logger.debug("urltitle:%s"%self.urltitle)
|
||||||
|
|
||||||
|
# normalized story URL.
|
||||||
|
self._setURL('https://' + self.getSiteDomain() + '/series/' + self.story.getMetadata('storyId') + '/' + self.urltitle + '/')
|
||||||
|
|
||||||
@staticmethod # must be @staticmethod, don't remove it.
|
@staticmethod # must be @staticmethod, don't remove it.
|
||||||
def getSiteDomain():
|
def getSiteDomain():
|
||||||
|
|
@ -88,8 +94,36 @@ class ScribbleHubComAdapter(BaseSiteAdapter): # XXX
|
||||||
return "https://"+cls.getSiteDomain()+"/series/1234/storyname/"
|
return "https://"+cls.getSiteDomain()+"/series/1234/storyname/"
|
||||||
|
|
||||||
def getSiteURLPattern(self):
|
def getSiteURLPattern(self):
|
||||||
return re.escape("https://"+self.getSiteDomain())+r"/(series|read)/(?P<id>\d+)[/-](?P<title>[^/]+)?"
|
return self._get_site_url_pattern()
|
||||||
|
|
||||||
|
## here so getSiteURLPattern and get_section_url(class method) can
|
||||||
|
## both use it. Note adapter_fictionpresscom has one too.
|
||||||
|
@classmethod
|
||||||
|
def _get_site_url_pattern(cls):
|
||||||
|
return re.escape("https://"+cls.getSiteDomain())+r"/(series|read)/(?P<id>\d+)([/-](?P<urltitle>[^/]+))?"
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def get_section_url(cls,url):
|
||||||
|
## minimal URL used for section names in INI and reject list
|
||||||
|
## for comparison
|
||||||
|
# logger.debug("pre section--url:%s"%url)
|
||||||
|
m = re.match(cls._get_site_url_pattern(),url)
|
||||||
|
if m:
|
||||||
|
url = "https://"+cls.getSiteDomain()\
|
||||||
|
+"/series/"+m.group('id')+"/a-title/"
|
||||||
|
# logger.debug("post-section url:%s"%url)
|
||||||
|
return url
|
||||||
|
|
||||||
|
## normalized chapter URLs DO contain the story title now, but
|
||||||
|
## normalized to current urltitle in case of title changes.
|
||||||
|
def normalize_chapterurl(self,url):
|
||||||
|
# https://www.scribblehub.com/read/862913-hp-the-arcane-thief-litrpg/chapter/1175961/
|
||||||
|
# logger.debug("pre normal chapter--url:%s"%url)
|
||||||
|
url = re.sub(r"https?://(?P<keep>www\.scribblehub\.com/read/\d+-).*(?P<chapter>/chapter/\d+/)",
|
||||||
|
(r"https://\g<keep>"+self.urltitle+r"\g<chapter>"),url)
|
||||||
|
# logger.debug("post normal chapter-url:%s"%url)
|
||||||
|
return url
|
||||||
|
|
||||||
def post_request(self, url,
|
def post_request(self, url,
|
||||||
parameters=None,
|
parameters=None,
|
||||||
usecache=True):
|
usecache=True):
|
||||||
|
|
@ -97,8 +131,8 @@ class ScribbleHubComAdapter(BaseSiteAdapter): # XXX
|
||||||
return super(getClass(), self).post_request(url, parameters, usecache)
|
return super(getClass(), self).post_request(url, parameters, usecache)
|
||||||
except exceptions.HTTPErrorFFF as e:
|
except exceptions.HTTPErrorFFF as e:
|
||||||
## this is a fix for the scribblehub ajax request sometimes returning
|
## this is a fix for the scribblehub ajax request sometimes returning
|
||||||
# a 400 but only with flaresolverr. Have not been able to reproduce
|
# a 400 but only with flaresolverr. Have not been able to reproduce
|
||||||
# in curl/firefox. See: https://github.com/JimmXinu/FanFicFare/pull/900
|
# in curl/firefox. See: https://github.com/JimmXinu/FanFicFare/pull/900
|
||||||
logger.debug("HTTPErrorFFF/Scribblehub: " + str(e.status_code))
|
logger.debug("HTTPErrorFFF/Scribblehub: " + str(e.status_code))
|
||||||
if e.status_code == 400 and self.getConfig('use_flaresolverr_proxy'):
|
if e.status_code == 400 and self.getConfig('use_flaresolverr_proxy'):
|
||||||
return self.decode_data(e.data)
|
return self.decode_data(e.data)
|
||||||
|
|
@ -136,11 +170,15 @@ class ScribbleHubComAdapter(BaseSiteAdapter): # XXX
|
||||||
|
|
||||||
soup = self.make_soup(data)
|
soup = self.make_soup(data)
|
||||||
|
|
||||||
|
|
||||||
## Title
|
## Title
|
||||||
pagetitle = soup.find('div',{'class':'fic_title'})
|
pagetitle = soup.find('div',{'class':'fic_title'})
|
||||||
self.story.setMetadata('title',stripHTML(pagetitle))
|
self.story.setMetadata('title',stripHTML(pagetitle))
|
||||||
|
|
||||||
|
## <link rel="canonical" href="https://www.scribblehub.com/series/862913/hp-the-arcane-thief-litrpg/" />
|
||||||
|
canonicalurl = soup.select_one('link[rel=canonical]')['href']
|
||||||
|
self.set_story_idurl(canonicalurl)
|
||||||
|
url = canonicalurl
|
||||||
|
|
||||||
# Find authorid and URL from main story page
|
# Find authorid and URL from main story page
|
||||||
self.story.setMetadata('authorId',stripHTML(soup.find('span',{'class':'auth_name_fic'})))
|
self.story.setMetadata('authorId',stripHTML(soup.find('span',{'class':'auth_name_fic'})))
|
||||||
self.story.setMetadata('authorUrl',soup.find('div',{'class':'author'}).find('div',{'property':'author'}).find('span',{'property':'name'}).find('a').get('href'))
|
self.story.setMetadata('authorUrl',soup.find('div',{'class':'author'}).find('div',{'property':'author'}).find('span',{'property':'name'}).find('a').get('href'))
|
||||||
|
|
@ -155,9 +193,9 @@ class ScribbleHubComAdapter(BaseSiteAdapter): # XXX
|
||||||
"strSID": self.story.getMetadata('storyId'),
|
"strSID": self.story.getMetadata('storyId'),
|
||||||
"strmypostid": 0,
|
"strmypostid": 0,
|
||||||
"strFic": "yes"}
|
"strFic": "yes"}
|
||||||
|
|
||||||
# 14/12/22 - Looks like it should follow this format now (below), but still returns a 400
|
# 14/12/22 - Looks like it should follow this format now (below), but still returns a 400
|
||||||
# but not a 403. tested in browser getting rid of all other cookies to try and get a 400 and nopes.
|
# but not a 403. tested in browser getting rid of all other cookies to try and get a 400 and nopes.
|
||||||
|
|
||||||
# contents_payload = {"action": "wi_getreleases_pagination",
|
# contents_payload = {"action": "wi_getreleases_pagination",
|
||||||
# "pagenum": 1,
|
# "pagenum": 1,
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue