mirror of
https://github.com/kemayo/leech
synced 2025-12-06 16:33:16 +01:00
Xenforo: cope with ThreadmarksPro's fetchers
This commit is contained in:
parent
9b484a429a
commit
18c9d68617
1 changed files with 22 additions and 1 deletions
|
|
@ -3,6 +3,8 @@
|
||||||
import datetime
|
import datetime
|
||||||
import re
|
import re
|
||||||
import logging
|
import logging
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
|
||||||
from . import register, Site, SiteException, SiteSpecificOption, Section, Chapter
|
from . import register, Site, SiteException, SiteSpecificOption, Section, Chapter
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
@ -115,7 +117,26 @@ class XenForo(Site):
|
||||||
base = soup.head.base.get('href')
|
base = soup.head.base.get('href')
|
||||||
soup = self._soup(base + href)
|
soup = self._soup(base + href)
|
||||||
|
|
||||||
marks = soup.select('.threadmarks li.primaryContent.threadmarkListItem a, .threadmarks li.primaryContent.threadmarkItem a')
|
fetcher = soup.find(class_='ThreadmarkFetcher')
|
||||||
|
while fetcher:
|
||||||
|
# ThreadmarksPro, hiding some threadmarks. Means the API is available to do this.
|
||||||
|
# Note: the fetched threadmarks can contain more placeholder elements to fetch. Ergo, loop.
|
||||||
|
# Good test case: https://forums.sufficientvelocity.com/threads/ignition-mtg-multicross-planeswalker-pc.26099/threadmarks
|
||||||
|
# e.g.: <li class="primaryContent threadmarkListItem ThreadmarkFetcher _depth0 filler" data-range-min="0" data-range-max="306" data-thread-id="26099" data-category-id="1" title="305 hidden">
|
||||||
|
response = self.session.post('https://{}/index.php?threads/threadmarks/load-range'.format(self.domain), data={
|
||||||
|
# I did try a fetch on min/data-min+data-max, but there seems
|
||||||
|
# to be an absolute limit which the API fetch won't override
|
||||||
|
'min': fetcher.get('data-range-min'),
|
||||||
|
'max': fetcher.get('data-range-max'),
|
||||||
|
'thread_id': fetcher.get('data-thread-id'),
|
||||||
|
'category_id': fetcher.get('data-category-id'),
|
||||||
|
'_xfResponseType': 'json',
|
||||||
|
}).json()
|
||||||
|
responseSoup = BeautifulSoup(response['templateHtml'], 'html5lib')
|
||||||
|
fetcher.replace_with(responseSoup)
|
||||||
|
fetcher = soup.find(class_='ThreadmarkFetcher')
|
||||||
|
|
||||||
|
marks = soup.find(class_='threadmarks').select('li.primaryContent.threadmarkListItem a, li.primaryContent.threadmarkItem a')
|
||||||
if not marks:
|
if not marks:
|
||||||
raise SiteException("No marks on threadmarks page")
|
raise SiteException("No marks on threadmarks page")
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue