From e53e2bfbe5db1634df0ce9596ce5128b9553a9d6 Mon Sep 17 00:00:00 2001 From: Jim Miller Date: Mon, 22 Apr 2019 14:57:55 -0500 Subject: [PATCH 01/17] Incomplete test version for xenforo2 on xf2test.sufficientvelocity.com. --- fanficfare/adapters/__init__.py | 1 + .../adapter_xf2testsufficientvelocitycom.py | 49 +++++ .../adapters/base_xenforo2forum_adapter.py | 200 ++++++++++++++++++ .../adapters/base_xenforoforum_adapter.py | 59 +++--- fanficfare/configurable.py | 5 + 5 files changed, 289 insertions(+), 25 deletions(-) create mode 100644 fanficfare/adapters/adapter_xf2testsufficientvelocitycom.py create mode 100644 fanficfare/adapters/base_xenforo2forum_adapter.py diff --git a/fanficfare/adapters/__init__.py b/fanficfare/adapters/__init__.py index ac80450e..74315c59 100644 --- a/fanficfare/adapters/__init__.py +++ b/fanficfare/adapters/__init__.py @@ -110,6 +110,7 @@ from . import adapter_tgstorytimecom from . import adapter_itcouldhappennet from . import adapter_forumsspacebattlescom from . import adapter_forumssufficientvelocitycom +from . import adapter_xf2testsufficientvelocitycom from . import adapter_forumquestionablequestingcom from . import adapter_ninelivesarchivecom from . import adapter_masseffect2in diff --git a/fanficfare/adapters/adapter_xf2testsufficientvelocitycom.py b/fanficfare/adapters/adapter_xf2testsufficientvelocitycom.py new file mode 100644 index 00000000..6610843f --- /dev/null +++ b/fanficfare/adapters/adapter_xf2testsufficientvelocitycom.py @@ -0,0 +1,49 @@ +# -*- coding: utf-8 -*- + +# Copyright 2019 FanFicFare team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +from __future__ import absolute_import +import re + +from .base_xenforo2forum_adapter import BaseXenForo2ForumAdapter + +def getClass(): + return XF2TestSufficientVelocityComAdapter + +class XF2TestSufficientVelocityComAdapter(BaseXenForo2ForumAdapter): + + def __init__(self, config, url): + BaseXenForo2ForumAdapter.__init__(self, config, url) + + # Each adapter needs to have a unique site abbreviation. + self.story.setMetadata('siteabbrev','fsv2') + + @staticmethod # must be @staticmethod, don't remove it. + def getSiteDomain(): + # The site domain. Does have www here, if it uses it. + return 'xf2test.sufficientvelocity.com' + + # @classmethod + # def getAcceptDomains(cls): + # return [cls.getSiteDomain(), + # cls.getSiteDomain().replace('forums.','forum.'), + # cls.getSiteDomain().replace('forums.','')] + + def getSiteURLPattern(self): + ## SV accepts forums.sufficientvelocity.com, forum.sufficientvelocity.com and sufficientvelocity.com + ## all of which redirect to forums. + ## We will use forums. as canonical for all + return super(XF2TestSufficientVelocityComAdapter, self).getSiteURLPattern().replace(re.escape("forums."),r"(forums?\.)?") diff --git a/fanficfare/adapters/base_xenforo2forum_adapter.py b/fanficfare/adapters/base_xenforo2forum_adapter.py new file mode 100644 index 00000000..083f50d4 --- /dev/null +++ b/fanficfare/adapters/base_xenforo2forum_adapter.py @@ -0,0 +1,200 @@ +# -*- coding: utf-8 -*- + +# Copyright 2019 FanFicFare team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +from __future__ import absolute_import +import logging +from datetime import datetime +logger = logging.getLogger(__name__) +import re +from xml.dom.minidom import parseString + +from ..htmlcleanup import stripHTML +from .. import exceptions as exceptions + +# py2 vs py3 transition +from ..six import text_type as unicode +from ..six.moves.urllib.error import HTTPError + +from .base_adapter import makeDate +from .base_xenforoforum_adapter import BaseXenForoForumAdapter + +logger = logging.getLogger(__name__) + +class BaseXenForo2ForumAdapter(BaseXenForoForumAdapter): + + def __init__(self, config, url): + logger.info("init url: "+url) + BaseXenForoForumAdapter.__init__(self, config, url) + + def parse_title(self,souptag): + h1 = souptag.find('h1',{'class':'p-title-value'}) + logger.debug(h1) + ## SV has started putting 'Crossover', 'Sci-Fi' etc spans in the title h1. + for tag in h1.find_all('span',{'class':'label'}): + ## stick them into genre. + self.story.addToList('genre',stripHTML(tag)) + logger.debug(stripHTML(tag)) + tag.extract() + self.story.setMetadata('title',stripHTML(h1)) + logger.debug(stripHTML(h1)) + + def parse_author(self,souptag): + a = souptag.find('section',{'class':'message-user'}).find('a') + logger.debug(a) + self.story.addToList('authorId',a['href'].split('/')[-2]) + authorUrl = a['href'] # self.getURLPrefix()+'/'+a['href'] + self.story.addToList('authorUrl',authorUrl) + self.story.addToList('author',a.text) + + def cache_posts(self,topsoup): + for post in topsoup.find_all('article',{'class':'message--post'}): + logger.debug("Caching %s"%post['data-content']) + self.post_cache[post['data-content']] = post + + def get_first_post(self,topsoup): + return topsoup.find('article',{'class':'message--post'}) + + def get_first_post_body(self,topsoup): + return self.get_first_post(topsoup).find('article',{'class':'message-body'}).find('div',{'class':'bbWrapper'}) + + def extract_threadmarks(self,souptag): + threadmarks=[] + # try threadmarks if no '#' in url + navdiv = souptag.find('div',{'class':'buttonGroup'}) + if not navdiv: + return threadmarks + # was class=threadmarksTrigger. thread cats are currently + # only OverlayTrigger s in threadmarkMenus, but I wouldn't + # be surprised if that changed. Don't want to do use just + # href=re because there's more than one copy on the page; plus + # could be included in a post. Would be easier if