Make get_section_url classmethod for performance.

This commit is contained in:
Jim Miller 2021-01-30 16:06:20 -06:00
parent 6695b9a846
commit 586ddce59f
6 changed files with 26 additions and 16 deletions

View file

@ -133,7 +133,7 @@ class RejectUrlEntry:
if addreasontext:
self.note = self.note + ' - ' + addreasontext
if normalize:
if normalize and self.url:
self.url = getNormalStoryURL(self.url)
self.valid = self.url != None

View file

@ -250,13 +250,13 @@ def getNormalStoryURLSite(url):
## Originally defined for INI [storyUrl] sections where story URL
## contains a title that can change, now also used for reject list.
## waaaay faster with classmethod.
def get_section_url(url):
with lightweight_adapter(url) as adapter:
if adapter:
return adapter.get_section_url(url)
else:
logger.debug("WTF? no adapter?")
return None
cls = _get_class_for(url)[0]
if cls:
return cls.get_section_url(url)
else:
return None
def getAdapter(config,url,anyurl=False):

View file

@ -69,19 +69,26 @@ class FanFictionNetSiteAdapter(BaseSiteAdapter):
self._setURL("https://"+self.getSiteDomain()\
+"/s/"+self.story.getMetadata('storyId')+"/1/"+self.urltitle)
def get_section_url(self,url):
## here so getSiteURLPattern and get_section_url(class method) can
## both use it. Note adapter_fictionpresscom has one too.
@classmethod
def _get_site_url_pattern(cls):
return r"https?://(www|m)?\.fanfiction\.net/s/(?P<id>\d+)(/\d+)?(/(?P<title>[^/]+))?/?$"
@classmethod
def get_section_url(cls,url):
## minimal URL used for section names in INI and reject list
## for comparison
# logger.debug("pre--url:%s"%url)
m = re.match(self.getSiteURLPattern(),url)
m = re.match(cls._get_site_url_pattern(),url)
if m:
url = "https://"+self.getSiteDomain()\
url = "https://"+cls.getSiteDomain()\
+"/s/"+m.group('id')+"/1/"
# logger.debug("post-url:%s"%url)
return url
def getSiteURLPattern(self):
return r"https?://(www|m)?\.fanfiction\.net/s/(?P<id>\d+)(/\d+)?(/(?P<title>[^/]+))?/?$"
return self._get_site_url_pattern()
def _fetchUrl(self,url,parameters=None,extrasleep=1.0,usecache=True):
## ffnet(and, I assume, fpcom) tends to fail more if hit too

View file

@ -44,8 +44,9 @@ class FictionPressComSiteAdapter(FanFictionNetSiteAdapter):
def getSiteExampleURLs(cls):
return "https://www.fictionpress.com/s/1234/1/ https://www.fictionpress.com/s/1234/12/ http://www.fictionpress.com/s/1234/1/Story_Title http://m.fictionpress.com/s/1234/1/"
def getSiteURLPattern(self):
return r"https?://(www|m)?\.fictionpress\.com/s/(?P<id>\d+)(/\d+)?(/(?P<title>[^/]+))?/?$"
@classmethod
def _get_site_url_pattern(cls):
return r"https?://(www|m)?\.fanfiction\.net/s/(?P<id>\d+)(/\d+)?(/(?P<title>[^/]+))?/?$"
def getClass():
return FictionPressComSiteAdapter

View file

@ -119,7 +119,8 @@ class BaseSiteAdapter(Configurable):
'''
return False
def get_section_url(self,url):
@classmethod
def get_section_url(cls,url):
'''
For adapters that have story URLs that can change. This is
applied both to the story URL (saved to metadata as

View file

@ -182,12 +182,13 @@ class BaseXenForoForumAdapter(BaseSiteAdapter):
return (is_chapter_url,url)
def get_section_url(self,url):
@classmethod
def get_section_url(cls,url):
## domain is checked in configuration loop. Can't check for
## storyId, because this is called before story url has been
## parsed.
# logger.debug("pre--url:%s"%url)
url = re.sub(re.escape(self.getPathPrefix())+r'threads/.*\.(?P<id>[0-9]+)/',self.getPathPrefix()+r'threads/\g<id>/',url)
url = re.sub(re.escape(cls.getPathPrefix())+r'threads/.*\.(?P<id>[0-9]+)/',cls.getPathPrefix()+r'threads/\g<id>/',url)
# logger.debug("post-url:%s"%url)
return url