From a5fb14d61a3ad5d2b31c6009777ecaab32157d4d Mon Sep 17 00:00:00 2001 From: David Lynch Date: Wed, 9 Dec 2015 01:57:55 -0600 Subject: [PATCH] Xenforo: canonicalize post URLs from threadmarks --- sites/xenforo.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sites/xenforo.py b/sites/xenforo.py index fa47352..84f08ec 100644 --- a/sites/xenforo.py +++ b/sites/xenforo.py @@ -102,6 +102,10 @@ class XenForo(Site): match = re.match(r'.+#post-(\d+)$', url) # could still be nothing here postid = match and match.group(1) + if postid: + # create a proper post-url, because threadmarks can sometimes + # mess up page-wise with anchors + url = 'https://%s/posts/%s/' % (self.domain, postid) soup = self._soup(url, 'html5lib') if postid: