diff --git a/calibre-plugin/plugin-defaults.ini b/calibre-plugin/plugin-defaults.ini index 7e82732f..e7e2915e 100644 --- a/calibre-plugin/plugin-defaults.ini +++ b/calibre-plugin/plugin-defaults.ini @@ -74,7 +74,7 @@ rating_label:Rating warnings_label:Warnings numChapters_label:Chapters numWords_label:Words -## www.fanfiction.net, fictionalley.com, etc. +## www.fanfiction.net, fictionalley-archive.com, etc. site_label:Publisher ## ffnet, fpcom, etc. siteabbrev_label:Site Abbrev @@ -2980,33 +2980,46 @@ use_basic_cache:true ## for examples of how to use them. extra_valid_entries:native_status -[www.fictionalley.org] -## Some sites do not require a login, but do require the user to -## confirm they are adult for adult content. In commandline version, -## this should go in your personal.ini, not defaults.ini. -#is_adult:true +[www.fictionalley-archive.org] +## also accepts fictionalley.org story URLs +use_basic_cache:true ## Site dedicated to these categories/characters/ships extracategories:Harry Potter -## fictionally.org storyIds are not unique. Combine with authorId. +## fictionalley-archive.org storyIds are not unique. Combine with authorId. output_filename: ${title}-${siteabbrev}_${authorId}_${storyId}${formatext} -## fictionalley.org doesn't have a status metadatum. If uncommented, +## fictionalley-archive.org doesn't have a status metadatum. If uncommented, ## this will be used for status. #default_value_status:Unknown -website_encodings:Windows-1252,utf8 -slow_down_sleep_time:10 +## Extra metadata that this adapter knows about. See [archiveofourown.org] +## for examples of how to use them. +extra_valid_entries:house,era,spoilers,hits -## sites are sensitive to too many hits. Users are sensitive to long -## waits during the initial metadata collection in the foreground. -## When used, these settings will speed up metadata downloads in the -## foreground linearly. -tweak_fg_sleep:true -min_fg_sleep:2.0 -max_fg_sleep:5.0 -max_fg_sleep_at_downloads:4 +## fictionalley-archive chapters have 'date', 'words', 'hits' and +## 'summary' available for each chapter. These can be used with +## custom output (see +## https://github.com/JimmXinu/FanFicFare/wiki/CustomOutput ) or with +## chapter_title_*_pattern settings. +## Examples for html & epub: +#[html] +#tocpage_entry: +# ${chapter} ${date} ${words}
+#[epub] +#tocpage_entry: +# ${chapter} ${date} ${words}

+ +## The 'date' value for chapters mentioned above can be formated with +## datechapter_format. Otherwise it will default to +## datePublished_format +#datechapter_format:%%Y-%%m-%%d + +## fictionalley-archive.org chapters can have author notes attached to +## them. Setting include_author_notes:true will include them with the +## chapter text. Includes both leading and trailing notes. +#include_author_notes:false [www.fictionpress.com] ## Using cloudscraper can satisfy the first couple levels of diff --git a/fanficfare/adapters/__init__.py b/fanficfare/adapters/__init__.py index d24d0e94..5c0c0e3c 100644 --- a/fanficfare/adapters/__init__.py +++ b/fanficfare/adapters/__init__.py @@ -33,7 +33,7 @@ from .. import configurable as configurable from . import base_efiction_adapter from . import adapter_test1 from . import adapter_fanfictionnet -from . import adapter_fictionalleyorg +from . import adapter_fictionalleyarchiveorg from . import adapter_fictionpresscom from . import adapter_ficwadcom from . import adapter_fimfictionnet diff --git a/fanficfare/adapters/adapter_fictionalleyarchiveorg.py b/fanficfare/adapters/adapter_fictionalleyarchiveorg.py new file mode 100644 index 00000000..f8b04a10 --- /dev/null +++ b/fanficfare/adapters/adapter_fictionalleyarchiveorg.py @@ -0,0 +1,225 @@ +# -*- coding: utf-8 -*- + +# Copyright 2011 Fanficdownloader team, 2021 FanFicFare team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +from __future__ import absolute_import +import logging +logger = logging.getLogger(__name__) +import re +from ..htmlcleanup import stripHTML +from .. import exceptions as exceptions + +from .base_adapter import BaseSiteAdapter, makeDate + +class FictionAlleyArchiveOrgSiteAdapter(BaseSiteAdapter): + + def __init__(self, config, url): + BaseSiteAdapter.__init__(self, config, url) + self.story.setMetadata('siteabbrev','fa') + self.is_adult=False + + # get storyId from url--url validation guarantees query correct + m = re.match(self.getSiteURLPattern(),url) + if m: + # normalized story URL. + url = "https://"+self.getSiteDomain()+"/authors/"+m.group('auth')+"/"+m.group('id')+".html" + self._setURL(url) + else: + raise exceptions.InvalidStoryURL(url, + self.getSiteDomain(), + self.getSiteExampleURLs()) + # The date format will vary from site to site. + # http://docs.python.org/library/datetime.html#strftime-strptime-behavior + self.dateformat = "%m/%d/%Y" + + def _setURL(self,url): + # logger.debug("set URL:%s"%url) + super(FictionAlleyArchiveOrgSiteAdapter, self)._setURL(url) + m = re.match(self.getSiteURLPattern(),url) + if m: + self.story.setMetadata('authorId',m.group('auth')) + self.story.setMetadata('storyId',m.group('id')) + + @staticmethod + def getSiteDomain(): + return 'www.fictionalley-archive.org' + + @classmethod + def getAcceptDomains(cls): + return ['www.fictionalley-archive.org', + 'www.fictionalley.org'] + + @classmethod + def getSiteExampleURLs(cls): + return "https://"+cls.getSiteDomain()+"/authors/drt/DA.html https://"+cls.getSiteDomain()+"/authors/drt/JOTP01a.html" + + @classmethod + def getURLDomain(cls): + return 'https://' + cls.getSiteDomain() + + def getSiteURLPattern(self): + # http://www.fictionalley-archive.org/authors/drt/DA.html + # http://www.fictionalley-archive.org/authors/drt/JOTP01a.html + return r"https?://www.fictionalley(-archive)?.org/authors/(?P[a-zA-Z0-9_]+)/(?P[a-zA-Z0-9_]+)\.html" + + def extractChapterUrlsAndMetadata(self): + + ## could be either chapter list page or one-shot text page. + logger.debug("URL: "+self.url) + + (data,rurl) = self.get_request_redirected(self.url) + if rurl != self.url: + self._setURL(rurl) + logger.debug("set to redirected url:%s"%self.url) + soup = self.make_soup(data) + + # If chapter list page, get the first chapter to look for adult check + chapterlinklist = soup.select('h5.mb-1 > a') + # logger.debug(chapterlinklist) + + if not chapterlinklist: + # no chapter list, it's either a chapter URL or a single chapter story + #