Add fetch_last_page for base_xenforo--SB doesn't send notice emails if user not up-to-date now.

This commit is contained in:
Jim Miller 2019-08-17 14:15:40 -05:00
parent 5c6ba3e62d
commit 60763b8156
3 changed files with 65 additions and 7 deletions

View file

@ -37,7 +37,6 @@ logger = logging.getLogger(__name__)
class BaseXenForo2ForumAdapter(BaseXenForoForumAdapter):
def __init__(self, config, url):
logger.info("init url: "+url)
BaseXenForoForumAdapter.__init__(self, config, url)
@classmethod
@ -45,9 +44,14 @@ class BaseXenForo2ForumAdapter(BaseXenForoForumAdapter):
"Only needs to be overriden if has additional ini sections."
return super(BaseXenForo2ForumAdapter, cls).getConfigSections() + ['base_xenforo2forum']
def performLogin(self):
def performLogin(self,data):
params = {}
if data and "Log in" not in data:
## already logged in.
logger.debug("Already Logged In")
return
if self.password:
params['login'] = self.username
params['password'] = self.password
@ -77,7 +81,7 @@ class BaseXenForo2ForumAdapter(BaseXenForoForumAdapter):
d = self._postUrl(loginUrl, params)# , headers={ 'referer':self.getURLPrefix() + '/login',
# 'origin':self.getURLPrefix() })
if "Log In" in d:
if "Log in" in d:
# logger.debug(d)
logger.info("Failed to login to URL %s as %s" % (self.url,
params['login']))
@ -242,3 +246,12 @@ class BaseXenForo2ForumAdapter(BaseXenForoForumAdapter):
## as XF1.
for tag in soup.find_all('div', class_="bbCodeBlock-expandContent"):
tag.name='blockquote'
def get_last_page_url(self,topsoup):
## <ul class="pageNav-main">
ul = topsoup.find('ul',{'class':'pageNav-main'})
# logger.debug(ul)
lastpage = ul.find_all('a',href=re.compile(r'page-'))[-1]
# logger.debug(lastpage)
# doing make_soup will also cache posts from that last page.
return lastpage['href']

View file

@ -185,9 +185,14 @@ class BaseXenForoForumAdapter(BaseSiteAdapter):
'''
return True
def performLogin(self):
def performLogin(self,data):
params = {}
if data and "Log Out" in data:
## already logged in.
logger.debug("Already Logged In")
return
if self.password:
params['login'] = self.username
params['password'] = self.password
@ -211,6 +216,7 @@ class BaseXenForoForumAdapter(BaseSiteAdapter):
d = self._fetchUrl(loginUrl, params)
if "Log Out" not in d:
logger.debug(d)
logger.info("Failed to login to URL %s as %s" % (self.url,
params['login']))
raise exceptions.FailedToLogin(self.url,params['login'])
@ -381,9 +387,35 @@ class BaseXenForoForumAdapter(BaseSiteAdapter):
tmcat_index += 1
return threadmarks
def get_last_page_url(self,topsoup):
span = topsoup.find('span',{'class':'pageNavHeader'})
# logger.debug(span)
# span class="pageNavHeader" - not present if no pages
# first <nav>?
# last not class=text?
nav = span.find_next('nav')
# logger.debug(nav)
lastpage = nav.find_all('a',href=re.compile(r'page-'))[-2]
# logger.debug(lastpage)
return lastpage['href']
## Aug 2019 - SB doesn't send update emails for threads it doesn't
## think you've seen all of since the last email anymore. Fetch
## the last page of the thread to reset it. This requires login
## to already have been done.
def fetch_last_page(self,topsoup):
logger.debug("Perform fetch_last_page")
try:
# doing make_soup will also cache posts from that last page.
self.make_soup(self._fetchUrl(self.getURLPrefix()+'/'+self.get_last_page_url(topsoup)))
except:
logger.info("fetch_last_page failed, continuing")
## Getting the chapter list and the meta data, plus 'is adult' checking.
def extractChapterUrlsAndMetadata(self):
data = topsoup = souptag = None
useurl = self.url
logger.info("url: "+useurl)
@ -391,10 +423,16 @@ class BaseXenForoForumAdapter(BaseSiteAdapter):
(data,opened) = self._fetchUrlOpened(useurl)
useurl = opened.geturl()
logger.info("use useurl: "+useurl)
# can't login before initial fetch--need a cookie.
if self.getConfig('always_login',False) or self.getConfig('fetch_last_page',False):
self.performLogin(data)
(data,opened) = self._fetchUrlOpened(useurl)
useurl = opened.geturl()
logger.info("use useurl: "+useurl)
except HTTPError as e:
# QQ gives 403, SV at least gives 404. Which unfortunately
if e.code == 403 or self.getConfig('always_login',False):
self.performLogin()
self.performLogin(data)
(data,opened) = self._fetchUrlOpened(useurl)
useurl = opened.geturl()
logger.info("use useurl: "+useurl)
@ -402,12 +440,17 @@ class BaseXenForoForumAdapter(BaseSiteAdapter):
raise exceptions.StoryDoesNotExist(self.url)
else:
raise
if '#' not in useurl and '/posts/' not in useurl:
self._setURL(useurl) ## for when threadmarked thread name changes.
# use BeautifulSoup HTML parser to make everything easier to find.
topsoup = souptag = self.make_soup(data)
if '#' not in useurl and '/posts/' not in useurl:
self._setURL(useurl) ## for when threadmarked thread name changes.
# only apply fetch_last_page when not a post url.
if self.getConfig('fetch_last_page',False):
self.fetch_last_page(topsoup)
self.parse_title(topsoup)
first_post_title = self.getConfig('first_post_title','First Post')

View file

@ -282,6 +282,7 @@ def get_valid_set_options():
'replace_failed_smilies_with_alt_text':(base_xenforo_list,None,boollist),
'use_threadmark_wordcounts':(base_xenforo_list,None,boollist),
'always_include_first_post_chapters':(base_xenforo_list,None,boollist),
'fetch_last_page':(base_xenforo_list,None,boollist),
'use_threadmarks_description':(base_xenforo2_list,None,boollist),
'use_threadmarks_status':(base_xenforo2_list,None,boollist),
'use_threadmarks_cover':(base_xenforo2_list,None,boollist),
@ -498,6 +499,7 @@ def get_valid_keywords():
'use_threadmarks_status',
'use_threadmarks_cover',
'datethreadmark_format',
'fetch_last_page',
'fix_pseudo_html',
'fix_excess_space',
'ignore_chapter_url_list',