mirror of
https://github.com/JimmXinu/FanFicFare.git
synced 2026-05-01 11:24:35 +02:00
Refactor xenforo code, '/' in getPathPrefix()
This commit is contained in:
parent
c68712a577
commit
1d077bda3f
3 changed files with 36 additions and 34 deletions
|
|
@ -38,9 +38,9 @@ class WWWAlternatehistoryComAdapter(BaseXenForo2ForumAdapter):
|
|||
return 'www.alternatehistory.com'
|
||||
|
||||
@classmethod
|
||||
def getURLPrefix(cls):
|
||||
def getPathPrefix(cls):
|
||||
# in case it needs more than just site/
|
||||
return 'https://' + cls.getSiteDomain() + '/forum'
|
||||
return '/forum/'
|
||||
|
||||
def get_threadmarks_top(self,souptag):
|
||||
return souptag.find('div',{'class':'block-outer-opposite--threadmarks'})
|
||||
|
|
|
|||
|
|
@ -63,7 +63,7 @@ class BaseXenForo2ForumAdapter(BaseXenForoForumAdapter):
|
|||
raise exceptions.FailedToLogin(self.url,"No username given. Set in personal.ini or enter when prompted.")
|
||||
|
||||
## need a login token.
|
||||
data = self._fetchUrl(self.getURLPrefix() + '/login',usecache=False)
|
||||
data = self._fetchUrl(self.getURLPrefix() + 'login',usecache=False)
|
||||
# logger.debug(data)
|
||||
# <input type="hidden" name="_xfToken" value="1556822458,710e5bf6fc87c67ea04ab56a910ac3ff" />
|
||||
find_token='<input type="hidden" name="_xfToken" value="'
|
||||
|
|
@ -71,10 +71,10 @@ class BaseXenForo2ForumAdapter(BaseXenForoForumAdapter):
|
|||
xftoken = xftoken[:xftoken.index('"')]
|
||||
params['remember'] = '1'
|
||||
params['_xfToken'] = xftoken
|
||||
params['_xfRedirect'] = self.getURLPrefix() + '/'
|
||||
params['_xfRedirect'] = self.getURLPrefix()
|
||||
|
||||
## https://forum.questionablequesting.com/login/login
|
||||
loginUrl = self.getURLPrefix() + '/login/login'
|
||||
loginUrl = self.getURLPrefix() + 'login/login'
|
||||
logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
|
||||
params['login']))
|
||||
|
||||
|
|
@ -225,7 +225,7 @@ class BaseXenForo2ForumAdapter(BaseXenForoForumAdapter):
|
|||
|
||||
def get_threadmark_range_url(self,tm_item,tmcat_num):
|
||||
fetcher = tm_item.find('div',{'data-xf-click':'threadmark-fetcher'})
|
||||
# logger.debug(fetcher)
|
||||
logger.debug('data-fetchurl:%s'%fetcher)
|
||||
return self.getURLPrefix() + fetcher['data-fetchurl']
|
||||
|
||||
def get_threadmark_date(self,tm_item):
|
||||
|
|
|
|||
|
|
@ -43,21 +43,18 @@ class BaseXenForoForumAdapter(BaseSiteAdapter):
|
|||
#logger.info("init url: "+url)
|
||||
BaseSiteAdapter.__init__(self, config, url)
|
||||
|
||||
# get storyId from url--url validation guarantees query is only sid=1234
|
||||
self.story.setMetadata('storyId',self.parsedUrl.path.split('/',)[2])
|
||||
|
||||
# get storyId from url--url validation guarantees query correct
|
||||
m = re.match(self.getSiteURLPattern(),url)
|
||||
if m:
|
||||
#logger.debug("groupdict:%s"%m.groupdict())
|
||||
if m.group('anchorpost'):
|
||||
self.story.setMetadata('storyId',m.group('anchorpost'))
|
||||
self._setURL(self.getURLPrefix() + '/posts/'+m.group('anchorpost')+'/')
|
||||
self._setURL(self.getURLPrefix() + 'posts/'+m.group('anchorpost')+'/')
|
||||
else:
|
||||
self.story.setMetadata('storyId',m.group('id'))
|
||||
# normalized story URL.
|
||||
title = m.group('title') or ""
|
||||
self._setURL(self.getURLPrefix() + '/'+m.group('tp')+'/'+title+self.story.getMetadata('storyId')+'/')
|
||||
self._setURL(self.getURLPrefix() + m.group('tp')+'/'+title+self.story.getMetadata('storyId')+'/')
|
||||
else:
|
||||
raise exceptions.InvalidStoryURL(url,
|
||||
self.getSiteDomain(),
|
||||
|
|
@ -75,18 +72,23 @@ class BaseXenForoForumAdapter(BaseSiteAdapter):
|
|||
"Only needs to be overriden if has additional ini sections."
|
||||
return ['base_xenforoforum',cls.getConfigSection()]
|
||||
|
||||
@classmethod
|
||||
def getPathPrefix(cls):
|
||||
# The site's fixed path prefix. '/' for most
|
||||
return '/'
|
||||
|
||||
@classmethod
|
||||
def getURLPrefix(cls):
|
||||
# The site domain. Does have www here, if it uses it.
|
||||
return 'https://' + cls.getSiteDomain()
|
||||
return 'https://' + cls.getSiteDomain() + cls.getPathPrefix()
|
||||
|
||||
@classmethod
|
||||
def getSiteExampleURLs(cls):
|
||||
return cls.getURLPrefix()+"/threads/some-story-name.123456/ "+cls.getURLPrefix()+"/posts/123456/"
|
||||
return cls.getURLPrefix()+"threads/some-story-name.123456/ "+cls.getURLPrefix()+"posts/123456/"
|
||||
|
||||
def getSiteURLPattern(self):
|
||||
## need to accept http and https still.
|
||||
return re.escape(self.getURLPrefix()).replace("https","https?")+r"/(?P<tp>threads|posts)/(?P<title>.+\.)?(?P<id>\d+)/?[^#]*?(#?post-(?P<anchorpost>\d+))?$"
|
||||
return re.escape(self.getURLPrefix()).replace("https","https?")+r"(?P<tp>threads|posts)/(?P<title>.+\.)?(?P<id>\d+)/?[^#]*?(#?post-(?P<anchorpost>\d+))?$"
|
||||
|
||||
def _fetchUrlOpened(self, url,
|
||||
parameters=None,
|
||||
|
|
@ -119,12 +121,12 @@ class BaseXenForoForumAdapter(BaseSiteAdapter):
|
|||
|
||||
## moved from extract metadata to share with normalize_chapterurl.
|
||||
if not url.startswith('http'):
|
||||
url = self.getURLPrefix()+'/'+url
|
||||
url = self.getURLPrefix()+url
|
||||
|
||||
if ( url.startswith(self.getURLPrefix()) or
|
||||
url.startswith('http://'+self.getSiteDomain()) or
|
||||
url.startswith('https://'+self.getSiteDomain()) ) and \
|
||||
( '/posts/' in url or '/threads/' in url or 'showpost.php' in url or 'goto/post' in url):
|
||||
( self.getPathPrefix()+'posts/' in url or self.getPathPrefix()+'threads/' in url or 'showpost.php' in url or 'goto/post' in url):
|
||||
## brute force way to deal with SB's http->https change
|
||||
## when hardcoded http urls. Now assumes all
|
||||
## base_xenforoforum sites use https--true as of
|
||||
|
|
@ -132,10 +134,10 @@ class BaseXenForoForumAdapter(BaseSiteAdapter):
|
|||
url = url.replace('http://','https://')
|
||||
|
||||
# http://forums.spacebattles.com/showpost.php?p=4755532&postcount=9
|
||||
url = re.sub(r'showpost\.php\?p=([0-9]+)(&postcount=[0-9]+)?',r'/posts/\1/',url)
|
||||
url = re.sub(r'showpost\.php\?p=([0-9]+)(&postcount=[0-9]+)?',self.getPathPrefix()+r'posts/\1/',url)
|
||||
|
||||
# http://forums.spacebattles.com/goto/post?id=15222406#post-15222406
|
||||
url = re.sub(r'/goto/post\?id=([0-9]+)(#post-[0-9]+)?',r'/posts/\1/',url)
|
||||
url = re.sub(r'goto/post\?id=([0-9]+)(#post-[0-9]+)?',self.getPathPrefix()+r'posts/\1/',url)
|
||||
|
||||
url = re.sub(r'(^[\'"]+|[\'"]+$)','',url) # strip leading or trailing '" from incorrect quoting.
|
||||
url = re.sub(r'like$','',url) # strip 'like' if incorrect 'like' link instead of proper post URL.
|
||||
|
|
@ -147,24 +149,24 @@ class BaseXenForoForumAdapter(BaseSiteAdapter):
|
|||
## *correct* ones.
|
||||
# https://forums.sufficientvelocity.com/posts/39915/
|
||||
if '#post-' in url:
|
||||
url = self.getURLPrefix()+'/posts/'+url.split('#post-')[1]+'/'
|
||||
url = self.getURLPrefix()+'posts/'+url.split('#post-')[1]+'/'
|
||||
|
||||
## Same as above except for for case where author mistakenly
|
||||
## used the reply link instead of normal link to post.
|
||||
# "http://forums.spacebattles.com/threads/manager-worm-story-thread-iv.301602/reply?quote=15962513"
|
||||
# https://forums.spacebattles.com/posts/
|
||||
if 'reply?quote=' in url:
|
||||
url = self.getURLPrefix()+'/posts/'+url.split('reply?quote=')[1]+'/'
|
||||
url = self.getURLPrefix()+'posts/'+url.split('reply?quote=')[1]+'/'
|
||||
|
||||
## normalize named thread urls, too.
|
||||
# http://forums.sufficientvelocity.com/threads/harry-potter-and-the-not-fatal-at-all-cultural-exchange-program.330/
|
||||
url = re.sub(r'/threads/.*\.([0-9]+)/',r'/threads/\1/',url)
|
||||
url = re.sub(re.escape(self.getPathPrefix())+r'threads/.*\.([0-9]+)/',self.getPathPrefix()+r'threads/\1/',url)
|
||||
|
||||
is_chapter_url = True
|
||||
|
||||
## One person once put a threadmarks URL directly in an
|
||||
## index post and now we have to exclude it.
|
||||
if re.match(r".*/threads/[0-9]+/threadmarks",url):
|
||||
if re.match(r'.*'+re.escape(self.getPathPrefix())+'threads/[0-9]+/threadmarks',url):
|
||||
is_chapter_url = False
|
||||
|
||||
return (is_chapter_url,url)
|
||||
|
|
@ -174,7 +176,7 @@ class BaseXenForoForumAdapter(BaseSiteAdapter):
|
|||
## storyId, because this is called before story url has been
|
||||
## parsed.
|
||||
# logger.debug("pre--url:%s"%url)
|
||||
url = re.sub(r'/threads/.*\.(?P<id>[0-9]+)/',r'/threads/\g<id>/',url)
|
||||
url = re.sub(re.escape(self.getPathPrefix())+r'threads/.*\.(?P<id>[0-9]+)/',self.getPathPrefix()+r'threads/\g<id>/',url)
|
||||
# logger.debug("post-url:%s"%url)
|
||||
return url
|
||||
|
||||
|
|
@ -206,10 +208,10 @@ class BaseXenForoForumAdapter(BaseSiteAdapter):
|
|||
params['register'] = '0'
|
||||
params['cookie_check'] = '1'
|
||||
params['_xfToken'] = ''
|
||||
params['redirect'] = self.getURLPrefix() + '/'
|
||||
params['redirect'] = self.getURLPrefix()
|
||||
|
||||
## https://forum.questionablequesting.com/login/login
|
||||
loginUrl = self.getURLPrefix() + '/login/login'
|
||||
loginUrl = self.getURLPrefix() + 'login/login'
|
||||
logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
|
||||
params['login']))
|
||||
|
||||
|
|
@ -294,7 +296,7 @@ class BaseXenForoForumAdapter(BaseSiteAdapter):
|
|||
elif threadmarksa['href'].startswith('/'):
|
||||
href = 'https://'+self.getSiteDomain()+threadmarksa['href']
|
||||
else:
|
||||
href = self.getURLPrefix()+'/'+threadmarksa['href']
|
||||
href = self.getURLPrefix()+threadmarksa['href']
|
||||
threadmarkgroups[tmcat_name]=self.fetch_threadmarks(href,
|
||||
tmcat_name,
|
||||
tmcat_num)
|
||||
|
|
@ -385,7 +387,7 @@ class BaseXenForoForumAdapter(BaseSiteAdapter):
|
|||
date = self.get_threadmark_date(tm_item)
|
||||
words,kwords = self.get_threadmark_words(tm_item)
|
||||
if 'http' not in url:
|
||||
url = self.getURLPrefix()+"/"+url
|
||||
url = self.getURLPrefix()+url
|
||||
# logger.debug("%s. %s"%(tmcat_index,name))
|
||||
threadmarks.append({"tmcat_name":tmcat_name,
|
||||
"tmcat_num":tmcat_num,
|
||||
|
|
@ -452,7 +454,7 @@ class BaseXenForoForumAdapter(BaseSiteAdapter):
|
|||
# use BeautifulSoup HTML parser to make everything easier to find.
|
||||
topsoup = souptag = self.make_soup(data)
|
||||
|
||||
if '#' not in useurl and '/posts/' not in useurl:
|
||||
if '#' not in useurl and self.getPathPrefix()+'posts/' not in useurl:
|
||||
self._setURL(useurl) ## for when threadmarked thread name changes.
|
||||
|
||||
self.parse_title(topsoup)
|
||||
|
|
@ -602,7 +604,7 @@ class BaseXenForoForumAdapter(BaseSiteAdapter):
|
|||
def parse_author(self,souptag):
|
||||
a = souptag.find('h3',{'class':'userText'}).find('a')
|
||||
self.story.addToList('authorId',a['href'].split('/')[1])
|
||||
authorUrl = self.getURLPrefix()+'/'+a['href']
|
||||
authorUrl = self.getURLPrefix()+a['href']
|
||||
self.story.addToList('authorUrl',authorUrl)
|
||||
self.story.addToList('author',a.text)
|
||||
|
||||
|
|
@ -654,7 +656,7 @@ class BaseXenForoForumAdapter(BaseSiteAdapter):
|
|||
def get_cache_post(self,postid):
|
||||
## saved using original 'post-99999' id for key.
|
||||
postid=unicode(postid) # thank you, Py3.
|
||||
if '/posts/' in postid:
|
||||
if self.getPathPrefix()+'posts/' in postid:
|
||||
## allows chapter urls to be passed in directly.
|
||||
# assumed normalized to /posts/1234/
|
||||
postid = "post-"+postid.split('/')[-2]
|
||||
|
|
@ -676,7 +678,7 @@ class BaseXenForoForumAdapter(BaseSiteAdapter):
|
|||
# first post when always_include_first_post.
|
||||
if ( self.reader and
|
||||
self.getConfig("use_reader_mode",True) and
|
||||
'/threads/' not in url and
|
||||
self.getPathPrefix()+'threads/' not in url and
|
||||
(index > 0 or not self.getConfig('always_include_first_post')) ):
|
||||
logger.debug("Using reader mode")
|
||||
# in case it changes:
|
||||
|
|
@ -718,7 +720,7 @@ class BaseXenForoForumAdapter(BaseSiteAdapter):
|
|||
# page. looking for it in cache reuses code in
|
||||
# cache_posts that finds post tags.
|
||||
souptag = self.get_cache_post(url)
|
||||
if not souptag and '/threads/' in url: # first post uses /thread/ URL.
|
||||
if not souptag and self.getPathPrefix()+'threads/' in url: # first post uses /thread/ URL.
|
||||
souptag = self.get_first_post(topsoup)
|
||||
|
||||
# remove <div class="baseHtml noticeContent"> because it can
|
||||
|
|
@ -729,10 +731,10 @@ class BaseXenForoForumAdapter(BaseSiteAdapter):
|
|||
postbody = self.get_post_body(souptag)
|
||||
|
||||
# XenForo uses <base href="https://forums.spacebattles.com/" />
|
||||
return self.utf8FromSoup(self.getURLPrefix()+'/',postbody)
|
||||
return self.utf8FromSoup(self.getURLPrefix(),postbody)
|
||||
|
||||
def make_reader_url(self,tmcat_num,reader_page_num):
|
||||
return self.getURLPrefix()+'/threads/'+self.story.getMetadata('storyId')+'/'+tmcat_num+'/reader?page='+unicode(reader_page_num)
|
||||
return self.getURLPrefix()+'threads/'+self.story.getMetadata('storyId')+'/'+tmcat_num+'/reader?page='+unicode(reader_page_num)
|
||||
|
||||
def get_quote_expand_tag(self,soup):
|
||||
return soup.find_all('div',{'class':'quoteExpand'})
|
||||
|
|
|
|||
Loading…
Reference in a new issue