Fix XF2 quotes and a new /post-999 form story URL.

This commit is contained in:
Jim Miller 2019-04-30 17:22:33 -05:00
parent 936a2409b4
commit a6ec8fd1d8
2 changed files with 25 additions and 11 deletions

View file

@ -138,5 +138,14 @@ class BaseXenForo2ForumAdapter(BaseXenForoForumAdapter):
# https://xf2test.sufficientvelocity.com/threads/mauling-snarks-worm.41471/reader/page-4?threadmark_category=4
return self.story.getMetadata('storyUrl')+'reader/page-'+unicode(reader_page_num)+'?threadmark_category='+tmcat_num
def get_quote_expand_tag(self,soup):
return soup.find_all('div',{'class':re.compile(r'bbCodeBlock-(expand|shrink)Link')})
def get_spoiler_tags(self,topsoup):
return topsoup.find_all('div',class_='bbCodeSpoiler')
def convert_quotes(self,soup):
## make XF2 quote divs blockquotes so the spacing is the same
## as XF1.
for tag in soup.find_all('div', class_="bbCodeBlock-expandContent"):
tag.name='blockquote'

View file

@ -86,7 +86,7 @@ class BaseXenForoForumAdapter(BaseSiteAdapter):
def getSiteURLPattern(self):
## need to accept http and https still.
return re.escape(self.getURLPrefix()).replace("https","https?")+r"/(?P<tp>threads|posts)/(?P<title>.+\.)?(?P<id>\d+)/?[^#]*?(#post-(?P<anchorpost>\d+))?$"
return re.escape(self.getURLPrefix()).replace("https","https?")+r"/(?P<tp>threads|posts)/(?P<title>.+\.)?(?P<id>\d+)/?[^#]*?(#?post-(?P<anchorpost>\d+))?$"
def _fetchUrlOpened(self, url,
parameters=None,
@ -232,6 +232,11 @@ class BaseXenForoForumAdapter(BaseSiteAdapter):
for noscript in soup.find_all('noscript'):
noscript.extract()
for qdiv in self.get_quote_expand_tag(soup):
qdiv.extract() # Remove <div class="...">click to expand</div>
self.convert_quotes(soup)
self.handle_spoilers(soup)
## cache posts on page.
@ -497,9 +502,11 @@ class BaseXenForoForumAdapter(BaseSiteAdapter):
if self.num_chapters() < 1:
self.add_chapter(first_post_title,useurl)
for (url,name) in [ (x['href'],stripHTML(x)) for x in first_post.find_all('a') ]:
# logger.debug(first_post)
for (url,name,tag) in [ (x['href'],stripHTML(x),x) for x in first_post.find_all('a') ]:
(is_chapter_url,url) = self._is_normalize_chapterurl(url)
if is_chapter_url and name != u"\u2191": # skip quote links as indicated by up arrow character.
# skip quote links as indicated by up arrow character or data-xf-click=attribution
if is_chapter_url and name != u"\u2191" and tag.get("data-xf-click",None)!="attribution":
self.add_chapter(name,url)
if url == useurl and first_post_title == self.get_chapter(0,'url') \
and not self.getConfig('always_include_first_post',False):
@ -654,28 +661,26 @@ class BaseXenForoForumAdapter(BaseSiteAdapter):
for notice in souptag.find_all('div',{'class':'noticeContent'}):
notice.extract()
# bq = souptag.find('blockquote')
# if not bq:
# bq = souptag.find('div',{'class':'messageText'}) # cached gets if it was already used before
# bq.name='div'
postbody = self.get_post_body(souptag)
for iframe in postbody.find_all('iframe'):
iframe.extract() # calibre book reader & editor don't like iframes to youtube.
for qdiv in postbody.find_all('div',{'class':'quoteExpand'}):
qdiv.extract() # Remove <div class="quoteExpand">click to expand</div>
# XenForo uses <base href="https://forums.spacebattles.com/" />
return self.utf8FromSoup(self.getURLPrefix()+'/',postbody)
def make_reader_url(self,tmcat_num,reader_page_num):
return self.getURLPrefix()+'/threads/'+self.story.getMetadata('storyId')+'/'+tmcat_num+'/reader?page='+unicode(reader_page_num)
def get_quote_expand_tag(self,soup):
return soup.find_all('div',{'class':'quoteExpand'})
def get_spoiler_tags(self,topsoup):
return topsoup.find_all('div',class_='bbCodeSpoilerContainer')
def convert_quotes(self,soup):
pass
def handle_spoilers(self,topsoup):
'''
Modifies tag given as required to do spoiler changes.