diff --git a/app.yaml b/app.yaml index 492bbf07..7650006d 100644 --- a/app.yaml +++ b/app.yaml @@ -1,6 +1,6 @@ # ffd-retief-hrd fanfictiondownloader application: fanfictiondownloader -version: 4-4-17 +version: 4-4-18 runtime: python27 api_version: 1 threadsafe: true diff --git a/defaults.ini b/defaults.ini index 9bfb10cd..07c5873c 100644 --- a/defaults.ini +++ b/defaults.ini @@ -313,6 +313,19 @@ extratags: FanFiction,Testing,HTML [archive.skyehawke.com] +[archiveofourown.org] +## Some sites require login (or login for some rated stories) The +## program can prompt you, or you can save it in config. In +## commandline version, this should go in your personal.ini, not +## defaults.ini. +#username:YourName +#password:yourpassword + +## Some sites also require the user to confirm they are adult for +## adult content. In commandline version, this should go in your +## personal.ini, not defaults.ini. +#is_adult:true + [ashwinder.sycophanthex.com] ## Some sites require login (or login for some rated stories) The ## program can prompt you, or you can save it in config. In @@ -470,12 +483,6 @@ cover_exclusion_regexp:/images/.*?ribbon.gif [www.thealphagate.com] -[www.archiveofourown.org] -## Some sites do not require a login, but do require the user to -## confirm they are adult for adult content. In commandline version, -## this should go in your personal.ini, not defaults.ini. -#is_adult:true - [www.checkmated.com] ## Some sites require login (or login for some rated stories) The ## program can prompt you, or you can save it in config. In @@ -498,6 +505,12 @@ cover_exclusion_regexp:/images/.*?ribbon.gif never_make_cover: true [www.fanfiktion.de] +## Some sites require login (or login for some rated stories) The +## program can prompt you, or you can save it in config. In +## commandline version, this should go in your personal.ini, not +## defaults.ini. +#username:YourName +#password:yourpassword [www.ficbook.net] diff --git a/fanficdownloader/adapters/adapter_archiveofourownorg.py b/fanficdownloader/adapters/adapter_archiveofourownorg.py index 958a1b18..8de8009b 100644 --- a/fanficdownloader/adapters/adapter_archiveofourownorg.py +++ b/fanficdownloader/adapters/adapter_archiveofourownorg.py @@ -42,6 +42,9 @@ class ArchiveOfOurOwnOrgAdapter(BaseSiteAdapter): # utf8) are really windows-1252. + self.username = "NoneGiven" # if left empty, site doesn't return any message at all. + self.password = "" + self.is_adult=False # get storyId from url--url validation guarantees query is only sid=1234 self.story.setMetadata('storyId',self.parsedUrl.path.split('/',)[2]) @@ -60,13 +63,54 @@ class ArchiveOfOurOwnOrgAdapter(BaseSiteAdapter): @staticmethod # must be @staticmethod, don't remove it. def getSiteDomain(): # The site domain. Does have www here, if it uses it. - return 'www.archiveofourown.org' + return 'archiveofourown.org' + + @classmethod + def getAcceptDomains(cls): + return ['www.archiveofourown.org','archiveofourown.org'] def getSiteExampleURLs(self): return "http://"+self.getSiteDomain()+"/works/123456" def getSiteURLPattern(self): - return re.escape("http://"+self.getSiteDomain()+"/works/")+r"\d+(/chapters/\d+)?/?$" + return re.escape("http://")+"(www\.)?"+re.escape(self.getSiteDomain()+"/works/")+r"\d+(/chapters/\d+)?/?$" + + ## Login + def needToLoginCheck(self, data): + if 'This work is only available to registered users of the Archive.' in data \ + or "The password or user name you entered doesn't match our records" in data: + return True + else: + return False + + def performLogin(self, url, data): + + params = {} + if self.password: + params['user_session[login]'] = self.username + params['user_session[password]'] = self.password + else: + params['user_session[login]'] = self.getConfig("username") + params['user_session[password]'] = self.getConfig("password") + params['user_session[remember_me]'] = '1' + params['commit'] = 'Log in' + #params['utf8'] = u'✓'#u'\x2713' # gets along with out it, and it confuses the encoder. + params['authenticity_token'] = data.split('input name="authenticity_token" type="hidden" value="')[1].split('" />')[0] + + loginUrl = 'http://' + self.getSiteDomain() + '/user_sessions' + logging.info("Will now login to URL (%s) as (%s)" % (loginUrl, + params['user_session[login]'])) + + d = self._postUrl(loginUrl, params) + #logging.info(d) + + if "Successfully logged in" not in d : #Member Account + logging.info("Failed to login to URL %s as %s" % (loginUrl, + params['user_session[login]'])) + raise exceptions.FailedToLogin(url,params['user_session[login]']) + return False + else: + return True ## Getting the chapter list and the meta data, plus 'is adult' checking. def extractChapterUrlsAndMetadata(self): @@ -76,13 +120,14 @@ class ArchiveOfOurOwnOrgAdapter(BaseSiteAdapter): else: addurl="" - meta = self.url+addurl + metaurl = self.url+addurl url = self.url+'/navigate'+addurl - logging.debug("URL: "+meta) + logging.info("url: "+url) + logging.info("metaurl: "+metaurl) try: data = self._fetchUrl(url) - meta = self._fetchUrl(meta) + meta = self._fetchUrl(metaurl) if "This work could have adult content. If you proceed you have agreed that you are willing to see such content." in meta: raise exceptions.AdultCheckRequired(self.url) @@ -92,11 +137,16 @@ class ArchiveOfOurOwnOrgAdapter(BaseSiteAdapter): raise exceptions.StoryDoesNotExist(self.meta) else: raise e + + if self.needToLoginCheck(data): + # need to log in for this one. + self.performLogin(url,data) + data = self._fetchUrl(url) + meta = self._fetchUrl(metaurl) # use BeautifulSoup HTML parser to make everything easier to find. soup = bs.BeautifulSoup(data) metasoup = bs.BeautifulSoup(meta) - # print data # Now go hunting for all the meta data and the chapter list. diff --git a/index.html b/index.html index 8c694718..751bddfb 100644 --- a/index.html +++ b/index.html @@ -60,6 +60,21 @@ New sites ncisfic.com and national-library.net added.
Thanks, Ida! +

+

New Fixes

+

+ + Fixes to support www.fanfiktion.de and archiveofourown.org username and password. + +

+

+ + Note: As part of the archiveofourown.org fix, the + configuration section for AO3 is now + [archiveofourown.org], not [www.archiveofourown.org]. + Please change your personal configuration accordingly, if + you have customized your settings for archiveofourown.org. +

Questions? Check out our diff --git a/plugin-defaults.ini b/plugin-defaults.ini index df34dd5c..6b3678b8 100644 --- a/plugin-defaults.ini +++ b/plugin-defaults.ini @@ -299,6 +299,19 @@ extratags: FanFiction,Testing,HTML [archive.skyehawke.com] +[archiveofourown.org] +## Some sites require login (or login for some rated stories) The +## program can prompt you, or you can save it in config. In +## commandline version, this should go in your personal.ini, not +## defaults.ini. +#username:YourName +#password:yourpassword + +## Some sites also require the user to confirm they are adult for +## adult content. In commandline version, this should go in your +## personal.ini, not defaults.ini. +#is_adult:true + [ashwinder.sycophanthex.com] ## Some sites require login (or login for some rated stories) The ## program can prompt you, or you can save it in config. In @@ -456,12 +469,6 @@ cover_exclusion_regexp:/images/.*?ribbon.gif [www.thealphagate.com] -[www.archiveofourown.org] -## Some sites do not require a login, but do require the user to -## confirm they are adult for adult content. In commandline version, -## this should go in your personal.ini, not defaults.ini. -#is_adult:true - [www.checkmated.com] ## Some sites require login (or login for some rated stories) The ## program can prompt you, or you can save it in config. In @@ -484,6 +491,12 @@ cover_exclusion_regexp:/images/.*?ribbon.gif never_make_cover: true [www.fanfiktion.de] +## Some sites require login (or login for some rated stories) The +## program can prompt you, or you can save it in config. In +## commandline version, this should go in your personal.ini, not +## defaults.ini. +#username:YourName +#password:yourpassword [www.ficbook.net]