# -*- coding: utf-8 -*- # Copyright 2013 Fanficdownloader team # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # import datetime import logging logger = logging.getLogger(__name__) import re import urllib2 from .. import BeautifulSoup as bs from ..htmlcleanup import stripHTML from .. import exceptions as exceptions from base_adapter import BaseSiteAdapter # This function is called by the downloader in all adapter_*.py files # in this dir to register the adapter class. So it needs to be # updated to reflect the class below it. That, plus getSiteDomain() # take care of 'Registering'. def getClass(): return PotterFicsComAdapter # Class name has to be unique. Our convention is camel case the # sitename with Adapter at the end. www is skipped. class PotterFicsComAdapter(BaseSiteAdapter): def __init__(self, config, url): BaseSiteAdapter.__init__(self, config, url) self.decode = ["Windows-1252", "utf8"] # 1252 is a superset of iso-8859-1. # Most sites that claim to be # iso-8859-1 (and some that claim to be # utf8) are really windows-1252. self.username = "NoneGiven" # if left empty, site doesn't return any message at all. self.password = "" self.is_adult=False # get storyId from url--url validation guarantees query correct m = re.match(self.getSiteURLPattern(),url) if m: self.story.setMetadata('storyId',m.group('id')) # normalized story URL. gets rid of chapter if there, left with chapter index URL nurl = "http://"+self.getSiteDomain()+"/historias/"+self.story.getMetadata('storyId') self._setURL(nurl) else: raise exceptions.InvalidStoryURL(url, self.getSiteDomain(), self.getSiteExampleURLs()) # Each adapter needs to have a unique site abbreviation. self.story.setMetadata('siteabbrev','potficscom') @staticmethod # must be @staticmethod, don't remove it. def getSiteDomain(): # The site domain. Does have www here, if it uses it. return 'www.potterfics.com' @classmethod def getSiteExampleURLs(cls): return "http://www.potterfics.com/historias/12345 http://www.potterfics.com/historias/12345/capitulo-1 " def getSiteURLPattern(self): #http://www.potterfics.com/historias/127583 #http://www.potterfics.com/historias/127583/capitulo-1 #http://www.potterfics.com/historias/127583/capitulo-4 #http://www.potterfics.com/historias/92810 -> Complete story #http://www.potterfics.com/historias/111194 -> Complete, single chap p = re.escape("http://"+self.getSiteDomain()+"/historias/")+\ r"(?P\d+)(/capitulo-(?P\d+))?/?$" return p def needToLoginCheck(self, data): # partials used to avoid having to figure out what was wrong # with included utf8 higher chars. if 'Para ver esta historia, por favor inicia tu sesi' in data \ or '