From 3443304ab10692670beb6457919948590b1f3b19 Mon Sep 17 00:00:00 2001 From: David Lynch Date: Sat, 13 Jul 2019 11:42:22 -0500 Subject: [PATCH] XenForo: handle SV's XenForo2 changes --- sites/xenforo.py | 69 ++++++++++++++++++++++++++++------------------- sites/xenforo2.py | 63 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 105 insertions(+), 27 deletions(-) create mode 100644 sites/xenforo2.py diff --git a/sites/xenforo.py b/sites/xenforo.py index 5f405ca..e9bd30e 100644 --- a/sites/xenforo.py +++ b/sites/xenforo.py @@ -63,22 +63,19 @@ class XenForo(Site): def extract(self, url): soup = self._soup(url) - base = soup.head.base.get('href') + base = soup.head.base and soup.head.base.get('href') or url - title = soup.select('div.titleBar > h1')[0] - # clean out informational bits from the title - for tag in title.find_all(class_='prefix'): - tag.decompose() - story = Section( - title=title.get_text().strip(), - author=soup.find('p', id='pageDescription').find('a', class_='username').get_text(), - url=url - ) + story = self._base_story(soup) if url.endswith('/reader'): reader_url = url elif soup.find('a', class_='readerToggle'): reader_url = soup.find('a', class_='readerToggle').get('href') + elif soup.find('div', class_='threadmarks-reader'): + # Technically this is the xenforo2 bit, but :shrug: + reader_url = soup.find('div', class_='threadmarks-reader').find('a').get('href') + else: + reader_url = False if reader_url: idx = 0 @@ -86,7 +83,7 @@ class XenForo(Site): reader_url = self._join_url(base, reader_url) logger.info("Fetching chapters @ %s", reader_url) reader_soup = self._soup(reader_url) - posts = reader_soup.select('#messageList > li.hasThreadmark') + posts = self._posts_from_page(reader_soup) for post in posts: idx = idx + 1 @@ -94,8 +91,7 @@ class XenForo(Site): continue if self.options['limit'] and idx >= self.options['limit']: continue - # Get the title, removing "Threadmark:" which precedes it - title = ''.join(post.select('div.threadmarker > span.label')[0].findAll(text=True, recursive=False)).strip() + title = self._threadmark_title(post) logger.info("Extracting chapter \"%s\"", title) story.add(Chapter( @@ -105,11 +101,8 @@ class XenForo(Site): )) reader_url = False - page_nav = reader_soup.find('div', class_='PageNav') - if page_nav: - # e.g.