diff --git a/calibre-plugin/plugin-defaults.ini b/calibre-plugin/plugin-defaults.ini index b80a623d..c8161ee9 100644 --- a/calibre-plugin/plugin-defaults.ini +++ b/calibre-plugin/plugin-defaults.ini @@ -1222,6 +1222,28 @@ extracategories:Lord of the Rings ## Site dedicated to these categories/characters/ships extracategories:Harry Potter +[www.masseffect2.in] +## Site dedicated to this fandom. +extracategories: Mass Effect + +## Ucoz.com engine, upon which MassEffect2.in is based, imposes an unspecified limit on request frequency. +## Reports vary from `5 requests per second' to `2 requests per second for more than 10 per minute'. +## With default settings, a several-hours IP ban may follow, so set it higher. +slow_down_sleep_time: 2 + +## Whether to exclude editor signature from the bottom of chapter text. +exclude_editor_signature: false + +## Stories on the site almost never have cover image, and for the stories which do, +## this may be adjusted in `personal.ini' before downloading. +never_make_cover: true + +## Titles for ratings identified by 1- or 2-letter codes from `ERATING system' +## (`система Р.Е.Й.Т.И.Н.Г.'). MassEffect2.in and some other sites adopted it, +## but changed titles and update them occasionally. +rating_titles: R=RESTRICTED (16+), E=EXEMPT (18+), I=ART HOUSE, T=To every, A=IN=Иной мир, Nn=Новый мир, G=О\, Господи! +adult_ratings: E,R + [merlinfic.dtwins.co.uk] ## Some sites require login (or login for some rated stories) The ## program can prompt you, or you can save it in config. In diff --git a/fanficfare/adapters/__init__.py b/fanficfare/adapters/__init__.py index 3865a973..6c4e1c88 100644 --- a/fanficfare/adapters/__init__.py +++ b/fanficfare/adapters/__init__.py @@ -138,6 +138,7 @@ import adapter_itcouldhappennet import adapter_forumsspacebattlescom import adapter_forumssufficientvelocitycom import adapter_ninelivesdarksolaceorg +import adapter_masseffect2in ## This bit of complexity allows adapters to be added by just adding ## importing. It eliminates the long if/else clauses we used to need diff --git a/fanficfare/adapters/adapter_masseffect2in.py b/fanficfare/adapters/adapter_masseffect2in.py new file mode 100644 index 00000000..e2b77cb3 --- /dev/null +++ b/fanficfare/adapters/adapter_masseffect2in.py @@ -0,0 +1,719 @@ +# -*- coding: utf-8 -*- + +# Copyright 2015 FanFicFare team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import bs4 +import datetime +import logging +import re +import urllib2 + +from ..htmlcleanup import removeEntities, stripHTML +from .. import exceptions as exceptions +from base_adapter import BaseSiteAdapter, makeDate + + +_logger = logging.getLogger(__name__) + + +def getClass(): + """Returns adapter class defined in this module.""" + return MassEffect2InAdapter + + +class ParsingError(Exception): + """Indicates an error while parsing web page content.""" + def __init__(self, message): + Exception.__init__(self) + self.message = message + + def __str__(self): + return self.message + + +class MassEffect2InAdapter(BaseSiteAdapter): + """ + Provides support for MassEffect2.in site as story source. + Can be used as a template for sites build upon Ucoz.com engine (until no base class extracted). + Specializations: + 1) Russian content (date format, genre names, etc.); + 2) original `E.R.A.T.I.N.G.' rating scale, used by masseffect2.in + and some affiliated sites, denoted with images; + 3) editor signatures an an option to remove them. + """ + + WORD_PATTERN = re.compile(u'\w+', re.UNICODE) + DOCUMENT_ID_PATTERN = re.compile(u'\d+-\d+-\d+-\d+') + SITE_LANGUAGE = u'Russian' + + def __init__(self, config, url): + BaseSiteAdapter.__init__(self, config, url) + + self.decode = ["utf8"] + + self.story.setMetadata('siteabbrev', 'me2in') + self.story.setMetadata('storyId', self._getDocumentId(self.url)) + + self._setURL(self._makeDocumentUrl(self.story.getMetadata('storyId'))) + + self._chapters = {} + self._parsingConfiguration = None + + # Must be @staticmethod, not @classmethod! + @staticmethod + def getSiteDomain(): + return 'www.masseffect2.in' + + @classmethod + def getSiteExampleURLs(cls): + return u' '.join([cls._makeDocumentUrl('19-1-0-1234'), + cls._makeDocumentUrl('24-1-0-4321')]) + + def getSiteURLPattern(self): + return re.escape(self._makeDocumentUrl('')) + self.DOCUMENT_ID_PATTERN.pattern + + def use_pagecache(self): + """Allows use of downloaded page cache. It is essential for this + adapter, because the site does not offers chapter URL list, and many + pages have to be fetched and parsed repeatedly.""" + return True + + def extractChapterUrlsAndMetadata(self): + """Extracts chapter URLs and story metadata. Actually downloads all + chapters, which is not exactly right, but necessary due to technical + limitations of the site.""" + + def followChapters(starting, forward=True): + if forward: + url = starting.getNextChapterUrl() + else: + url = starting.getPreviousChapterUrl() + if url: + url = self._makeDocumentUrl(self._getDocumentId(url)) + following = self._makeChapter(url) + # Do not follow links to related, but different stories (prequels or sequels). + if not following.isFromStory(starting.getHeading()): + return + if forward: + yield following + for chapter in followChapters(following, forward): + yield chapter + if not forward: + yield following + + try: + startingChapter = self._makeChapter(self.url) + except urllib2.HTTPError, error: + if error.code == 404: + raise exceptions.StoryDoesNotExist(self.url) + raise + + # We only have one date for each chapter and assume the oldest one + # to be publication date and the most recent one to be update date. + datePublished = datetime.datetime.max + dateUpdated = datetime.datetime.min + wordCount = 0 + # We aim at counting chapters, not chapter parts. + chapterCount = 0 + storyInProgress = False + + chapters = \ + list(followChapters(startingChapter, forward=False)) + \ + [startingChapter] + \ + list(followChapters(startingChapter, forward=True)) + + headings = [chapter.getHeading() for chapter in chapters] + largestCommonPrefix = _getLargestCommonPrefix(*headings) + prefixLength = len(largestCommonPrefix) + storyTitleEnd, chapterTitleStart = prefixLength, prefixLength + match = re.search(u'[:\.\s]*(?Pглава\s+)?$', largestCommonPrefix, re.IGNORECASE | re.UNICODE) + if match: + storyTitleEnd -= len(match.group()) + label = match.group('chapter') + if label: + chapterTitleStart -= len(label) + storyTitle = largestCommonPrefix[:storyTitleEnd] + self.story.setMetadata('title', storyTitle) + + garbagePattern = re.compile(u'(?P^)?[:\.\s]*(?(start)|$)', re.UNICODE) + indexPattern = re.compile(u'(?:глава\s)?(?:(?\d{1,3})(?=\D|$))', re.IGNORECASE | re.UNICODE) + + for chapter in chapters: + url = chapter.getUrl() + self._chapters[url] = chapter + _logger.debug(u"Processing chapter `%s'.", url) + + try: + authorName = chapter.getAuthorName() + if authorName: + self.story.extendList('author', [authorName]) + authorId = chapter.getAuthorId() + if authorId: + authorUrl = 'http://%s/index/%s' % (self.getSiteDomain(), authorId) + else: + authorId = u'' + authorUrl = u'' + self.story.extendList('authorId', [authorId]) + self.story.extendList('authorUrl', [authorUrl]) + + if not self.story.getMetadata('rating'): + ratingTitle = chapter.getRatingTitle() + if ratingTitle: + self.story.setMetadata('rating', ratingTitle) + + if not self.story.getMetadata('description'): + summary = chapter.getSummary() + if summary: + self.story.setMetadata('description', summary) + + datePublished = min(datePublished, chapter.getDate()) + dateUpdated = max(dateUpdated, chapter.getDate()) + + self.story.extendList('genre', chapter.getGenres()) + self.story.extendList('characters', chapter.getCharacters()) + self.story.extendList('ships', chapter.getPairings()) + + wordCount += self._getWordCount(chapter.getTextElement()) + + # Chapter status usually represents the story status, so we want the last chapter status. + # Some chapters may have no status attribute. + chapterInProgress = chapter.isInProgress() + if chapterInProgress is not None: + storyInProgress = chapterInProgress + + # If any chapter is adult, consider the whole story adult. + if chapter.isAdult(): + self.story.setMetadata('is_adult', True) + warning = chapter.getWarning() + if warning: + self.story.extendList('warnings', [warning]) + + chapterTitle = re.sub(garbagePattern, u'', chapter.getHeading()[chapterTitleStart:]) + + match = re.search(indexPattern, chapterTitle) + if match: + index = int(match.group('index')) + chapterCount = max(chapterCount, index) + else: + chapterCount += 1 + + self.chapterUrls.append((chapterTitle, url)) + except ParsingError, error: + raise exceptions.FailedToDownload(u"Failed to download chapter `%s': %s" % (url, error)) + + # Some metadata are handled separately due to format conversions. + self.story.setMetadata('status', 'In Progress' if storyInProgress else 'Completed') + self.story.setMetadata('datePublished', datePublished) + self.story.setMetadata('dateUpdated', dateUpdated) + self.story.setMetadata('numWords', str(wordCount)) + self.story.setMetadata('numChapters', chapterCount) + + # Site-specific metadata. + self.story.setMetadata('language', self.SITE_LANGUAGE) + + def getChapterText(self, url): + """Grabs the text for an individual chapter.""" + if url not in self._chapters: + raise exceptions.FailedToDownload(u"No chapter `%s' present!" % url) + chapter = self._chapters[url] + return self.utf8FromSoup(url, chapter.getTextElement()) + + def _makeChapter(self, url): + """Creates a chapter object given a URL.""" + document = self.make_soup(self._fetchUrl(url)) + chapter = Chapter(self._getParsingConfiguration(), url, document) + return chapter + + def _getWordCount(self, element): + """Returns word count in plain text extracted from chapter body.""" + text = stripHTML(element) + count = len(re.findall(self.WORD_PATTERN, text)) + return count + + def _getParsingConfiguration(self): + if not self._parsingConfiguration: + self._parsingConfiguration = {} + + adultRatings = self.getConfigList('adult_ratings') + if not adultRatings: + raise exceptions.PersonalIniFailed( + u"Missing `adult_ratings' setting", u"MassEffect2.in", u"?") + adultRatings = set(adultRatings) + self._parsingConfiguration['adultRatings'] = adultRatings + + ratingTitleDescriptions = self.getConfigList('rating_titles') + if ratingTitleDescriptions: + ratingTitles = {} + for ratingDescription in ratingTitleDescriptions: + parts = ratingDescription.split(u'=') + if len(parts) < 2: + _logger.warning( + u"Invalid `rating_titles' setting, missing `=' in `%s'." + % ratingDescription) + continue + labels = parts[:-1] + title = parts[-1] + for label in labels: + ratingTitles[label] = title + # Duplicate label aliasing in adult rating set. + if label in adultRatings: + adultRatings.add(*labels) + self._parsingConfiguration['adultRatings'] = list(adultRatings) + self._parsingConfiguration['ratingTitles'] = ratingTitles + else: + raise exceptions.PersonalIniFailed( + u"Missing `rating_titles' setting", u"MassEffect2.in", u"?") + + self._parsingConfiguration['excludeEditorSignature'] = \ + self.getConfig('exclude_editor_signature', False) + + return self._parsingConfiguration + + def _getDocumentId(self, url): + """Extract document ID from MassEffect2.in URL.""" + match = re.search(self.DOCUMENT_ID_PATTERN, url) + if not match: + raise ValueError(u"Failed to extract document ID from `'" % url) + documentId = url[match.start():match.end()] + return documentId + + @classmethod + def _makeDocumentUrl(cls, documentId): + """Make a chapter URL given a document ID.""" + return 'http://%s/publ/%s' % (cls.getSiteDomain(), documentId) + + +class Chapter(object): + """Represents a lazily-parsed chapter of a story.""" + def __init__(self, configuration, url, document): + self._configuration = configuration + self._url = url + self._document = document + # Lazy-loaded: + self._heading = None + self._date = None + self._author = None + self._attributes = None + self._textElement = None + self._infoBar = None + + def getHeading(self): + return self._extractHeading() + + def getSummary(self): + attributes = self.__getAttributes() + if 'summary' in attributes: + return attributes['summary'] + + def getAuthorId(self): + author = self._getAuthor() + if author: + return author['id'] + + def getAuthorName(self): + author = self._getAuthor() + if author: + return author['name'] + + def getDate(self): + return self.__getDate() + + def getRatingTitle(self): + attributes = self.__getAttributes() + if 'rating' in attributes: + return attributes['rating']['title'] + + def isAdult(self): + attributes = self.__getAttributes() + if 'rating' in attributes and attributes['rating']['isAdult']: + return True + if 'warning' in attributes: + return True + return False + + def getWarning(self): + attributes = self.__getAttributes() + if 'warning' in attributes: + return attributes['warning'] + + def getCharacters(self): + return self._getListAttribute('characters') + + def getPairings(self): + return self._getListAttribute('pairings') + + def getGenres(self): + return self._getListAttribute('genres') + + def isInProgress(self): + attributes = self.__getAttributes() + if 'isInProgress' in attributes: + return attributes['isInProgress'] + + def getUrl(self): + return self._url + + def getTextElement(self): + return self._getTextElement() + + def getPreviousChapterUrl(self): + """Download chapters following `Previous chapter' links. + Returns a list of chapters' URLs.""" + return self._getSiblingChapterUrl({'class': 'fl tal'}) + + def getNextChapterUrl(self): + """Download chapters following `Next chapter' links. + Returns a list of chapters' URLs.""" + return self._getSiblingChapterUrl({'class': 'tar fr'}) + + def isFromStory(self, storyTitle, prefixThreshold=-1): + """Check if this chapter is from a story different from the given one. + Prefix threshold specifies how long common story title prefix shall be + for chapters from one story: negative value means implementation-defined + optimum, zero inhibits the check, and positive value adjusts threshold.""" + + def getFirstWord(string): + match = re.search(u'^\s*\w+', string, re.UNICODE) + return string[match.start():match.end()] + + thisStoryTitle = self.getHeading() + if prefixThreshold != 0: + if prefixThreshold < 0: + prefixThreshold = min( + len(getFirstWord(storyTitle)), len(getFirstWord(thisStoryTitle))) + else: + prefixThreshold = min( + prefixThreshold, len(storyTitle), len(thisStoryTitle)) + result = len(_getLargestCommonPrefix(storyTitle, thisStoryTitle)) >= prefixThreshold + return result + else: + return storyTitle != thisStoryTitle + + def _getListAttribute(self, name): + """Return an attribute value as a list or an empty list if the attribute is absent.""" + attributes = self.__getAttributes() + if name in attributes: + return attributes[name] + return [] + + def _extractHeading(self): + """Extract header text from the document.""" + return stripHTML( + self._document.find('div', {'class': 'eTitle'}).string) + + def __getHeading(self): + """Lazily parse and return heading.""" + if not self._heading: + self._heading = self._extractHeading() + return self._heading + + def _getAuthor(self): + """Lazily parse and return author's information.""" + if not self._author: + self._author = self._parseAuthor() + return self._author + + def _parseAuthor(self): + """Locate and parse chapter author's information to a dictionary with author's `id' and `name'.""" + try: + authorLink = self._getInfoBarElement() \ + .find('i', {'class': 'icon-user'}) \ + .findNextSibling('a') + except AttributeError: + raise ParsingError(u'Failed to locate author link.') + match = re.search(u'(8-\d+)', authorLink['onclick']) + if not match: + raise ParsingError(u'Failed to extract author ID.') + authorId = match.group(0) + authorName = stripHTML(authorLink.text) + return { + 'id': authorId, + 'name': authorName + } + + def __getDate(self): + """Lazily parse chapter date.""" + if not self._date: + self._date = self._parseDate() + return self._date + + def _parseDate(self): + """Locate and parse chapter date.""" + try: + dateText = self._getInfoBarElement() \ + .find('i', {'class': 'icon-eye'}) \ + .findPreviousSibling(text=True) \ + .strip(u'| \n') + except AttributeError: + raise ParsingError(u'Failed to locate date.') + date = makeDate(dateText, '%d.%m.%Y') + return date + + def _getInfoBarElement(self): + """Locate informational bar element, containing chapter date and author, on the page.""" + if not self._infoBar: + self._infoBar = self._document.find('td', {'class': 'eDetails2'}) + if not self._infoBar: + raise ParsingError(u'No informational bar found.') + return self._infoBar + + def __getAttributes(self): + """Lazily parse attributes.""" + if not self._attributes: + self._attributes = self._parseAttributes() + return self._attributes + + def _parseAttributes(self): + """Parse chapter attribute block and return it as a dictionary with standard entries.""" + + attributes = {} + attributesText = u'' + try: + starter = self._document \ + .find('div', {'class': 'comm-div'}) \ + .findNextSibling('div', {'class': 'cb'}) + bound = starter.findNextSibling('div', {'class': 'cb'}) + + def processElement(element): + """Return textual representation an *inline* element of chapter attribute block.""" + result = u'' + if isinstance(element, bs4.Tag): + if element.name in ('b', 'strong', 'font', 'br'): + result += u"\n" + if element.name == 's': + result += u"%s" % stripHTML(element) + else: + result += stripHTML(element) + else: + result += removeEntities(element) + return result + + elements = starter.nextSiblingGenerator() + for element in elements: + if isinstance(element, bs4.Tag): + if element == bound: + break + else: + if element.name in ('div', 'p'): + attributesText += u"\n" + for child in element.childGenerator(): + attributesText += processElement(child) + continue + attributesText += processElement(element) + + elements = starter.nextGenerator() + for element in elements: + if isinstance(element, bs4.Tag): + if element == bound: + break + elif element.name == 'img': + rating = self._parseRatingFromImage(element) + if rating: + attributes['rating'] = rating + break + except AttributeError or TypeError: + raise ParsingError(u'Failed to locate and collect attributes.') + + separators = u"\r\n :;." + freestandingText = u'' + for line in attributesText.split(u'\n'): + if line.count(u':') != 1: + freestandingText += line + continue + key, value = line.split(u':', 1) + key = key.strip(separators).lower() + value = value.strip().strip(separators) + parsed = self._parseAttribute(key, value) + for parsedKey, parsedValue in parsed.iteritems(): + attributes[parsedKey] = parsedValue + + freestandingText = freestandingText.strip() + if 'summary' not in attributes and freestandingText: + attributes['summary'] = freestandingText + + if 'rating' not in attributes: + _logger.warning(u"Failed to locate or recognize rating for `%s'!", self.getUrl()) + + return attributes + + # Most, but not all, URLs of rating icons match this. + RATING_LABEL_PATTERN = re.compile(u'/(?P[ERATINnG]+)\.png$') + + def _parseRatingFromImage(self, element): + """Given an image element, try to parse story rating from it.""" + # Although deprecated, `has_key()' is required here. + if not element.has_attr('src'): + return + source = element['src'] + if 'REITiNG' in source: + match = re.search(self.RATING_LABEL_PATTERN, source) + if not match: + return + label = match.group('rating') + if label in self._configuration['ratingTitles']: + return { + 'label': label, + 'title': self._configuration['ratingTitles'][label], + 'isAdult': label in self._configuration['adultRatings'] + } + else: + _logger.warning(u"No title found for rating label `%s'!" % label) + # TODO: conduct a research on such abnormal URLs. + elif source == 'http://www.masseffect2.in/_fr/10/1360399.png': + label = 'Nn' + return { + 'label': 'Nn', + 'title': self._configuration['ratingTitles'][label], + 'isAdult': label in self._configuration['adultRatings'] + } + + # Various `et cetera' and `et al' forms in Russian texts. + # Intended to be used with whole strings! + ETC_PATTERN = re.compile( + u'''[и&]\s(?: + (?:т\.?\s?[пд]?\.?)| + (?:др(?:угие|\.)?)| + (?:пр(?:очие|\.)?)| + # Note: identically looking letters `K' and `o' + # below are from Latin and Cyrillic alphabets. + (?:ко(?:мпания)?|[KК][oо°]) + )$ + ''', + re.IGNORECASE + re.UNICODE + re.VERBOSE) + + # `Author's Notes' and its variants in Russian. + ANNOTATION_PATTERN = re.compile(u'аннотация|описание|(?:(?:за|при)мечание\s)?(?:от\s)?автора', re.UNICODE) + + def _parseAttribute(self, key, value): + """ + Parse a single a single record in chapter attributes for chapter metadata. + Return a dictionary of canonical attributes and values (i. e. multiple attributes may be discovered). + """ + + def refineCharacter(name): + """Refines character name from stop-words and distortions.""" + strippedName = name.strip() + nameOnly = re.sub(self.ETC_PATTERN, u'', strippedName) + # TODO: extract canonical name (even ME-specific?). + canonicalName = nameOnly + return canonicalName + + if re.match(u'жанры?', key, re.UNICODE): + genres = filter(bool, map(unicode.strip, re.split(u'[,;/]', value))) + return {'genres': genres} + elif key == u'статус': + isInProgress = value == u'в процессе' + return {'isInProgress': isInProgress} + elif key == u'персонажи': + participants = map(refineCharacter, re.split(u'[,;]', value)) + characters = [] + pairings = [] + for participant in participants: + if u'/' in participant: + pairings.append(participant) + else: + characters.append(participant) + return { + 'characters': characters, + 'pairings': pairings + } + elif key == u'предупреждение': + return {'warning': value} + elif re.match(self.ANNOTATION_PATTERN, key): + if not value.endswith(u'.'): + value += u'.' + # Capitalize would make value[1:] lowercase, which we don't want. + value = value[:1].upper() + value[1:] + return {'summary': value} + else: + _logger.info(u"Unrecognized attribute `%s' ignored.", key) + return {} + + def _getTextElement(self): + """Locate chapter body text element on the page.""" + if not self._textElement: + self._textElement = self.__collectTextElements() + return self._textElement + + def __collectTextElements(self): + """Return all elements containing parts of chapter text (which may be +

aragraphs,

isions or plain text nodes) under a single root.""" + starter = self._document.find('div', {'id': u'article'}) + if starter is None: + # FIXME: This will occur if the method is called more than once. + # The reason is elements appended to `root' are removed from the document. + # BS 4.4 implements cloning via `copy.copy()', but supporting it for BS 4.3 + # would be error-prone (due to relying on BS internals) and is not needed. + if self._textElement: + _logger.debug(u"You may not call this function more than once!") + raise ParsingError(u'Failed to locate text.') + collection = [starter] + for element in starter.nextSiblingGenerator(): + if element is None: + break + if isinstance(element, bs4.Tag) and element.name == 'tr': + break + collection.append(element) + root = bs4.Tag(name='td') + for element in collection: + root.append(element) + + if self._configuration['excludeEditorSignature']: + root = self._excludeEditorSignature(root) + + return root + + def _getSiblingChapterUrl(self, selector): + """Locate a link to a sibling chapter, either previous or next one, and return its URL.""" + block = self._document \ + .find('td', {'class': 'eDetails1'}) \ + .find('div', selector) + if not block: + return + link = block.find('a') + if not link: + return + return link['href'] + + # Editor signature always starts with something like this. + SIGNED_PATTERN = re.compile(u'отредактирова(?:но|ла?)[:.\s]', re.IGNORECASE + re.UNICODE) + + def _excludeEditorSignature(self, root): + """Exclude editor signature from within `root' element.""" + for textNode in root.findAll(text=True): + if re.match(self.SIGNED_PATTERN, textNode.string): + editorLink = textNode.findNext('a') + if editorLink: + editorLink.extract() + # Seldom editor link has inner formatting, which is sibling DOM-wise. + editorName = textNode.findNext('i') + if editorName: + editorName.extract() + textNode.extract() + # We could try removing container element, but there is a risk + # of removing text ending with it. Better play safe here. + break + return root + + +def _getLargestCommonPrefix(*args): + """Returns largest common prefix of all unicode arguments, ignoring case. + :rtype : unicode + """ + from itertools import takewhile, izip + toLower = lambda xs: map(lambda x: x.lower(), xs) + allSame = lambda xs: len(set(toLower(xs))) == 1 + return u''.join([i[0] for i in takewhile(allSame, izip(*args))]) diff --git a/fanficfare/configurable.py b/fanficfare/configurable.py index faf73613..d7168aa2 100644 --- a/fanficfare/configurable.py +++ b/fanficfare/configurable.py @@ -206,6 +206,7 @@ def get_valid_keywords(): return list(['(in|ex)clude_metadata_(pre|post)', 'add_chapter_numbers', 'add_genre_when_multi_category', + 'adult_ratings', 'allow_unsafe_filename', 'always_overwrite', 'anthology_tags', @@ -235,6 +236,7 @@ def get_valid_keywords(): 'description_limit', 'do_update_hook', 'exclude_notes', + 'exclude_editor_signature', 'extra_logpage_entries', 'extra_subject_tags', 'extra_titlepage_entries', @@ -286,6 +288,7 @@ def get_valid_keywords(): 'output_filename_safepattern', 'password', 'post_process_cmd', + 'rating_titles', 'remove_transparency', 'replace_br_with_p', 'replace_hr', diff --git a/fanficfare/defaults.ini b/fanficfare/defaults.ini index 7906121b..335d0462 100644 --- a/fanficfare/defaults.ini +++ b/fanficfare/defaults.ini @@ -1823,6 +1823,28 @@ extraships:InuYasha/Kagome ## Site dedicated to these categories/characters/ships extracategories:Lord of the Rings +[www.masseffect2.in] +## Site dedicated to this fandom. +extracategories: Mass Effect + +## Ucoz.com engine, upon which MassEffect2.in is based, imposes an unspecified limit on request frequency. +## Reports vary from `5 requests per second' to `2 requests per second for more than 10 per minute'. +## With default settings, a several-hours IP ban may follow, so set it higher. +slow_down_sleep_time: 2 + +## Whether to exclude editor signature from the bottom of chapter text. +exclude_editor_signature: false + +## Stories on the site almost never have cover image, and for the stories which do, +## this may be adjusted in `personal.ini' before downloading. +never_make_cover: true + +## Titles for ratings identified by 1- or 2-letter codes from `ERATING system' +## (`система Р.Е.Й.Т.И.Н.Г.'). MassEffect2.in and some other sites adopted it, +## but changed titles and update them occasionally. +rating_titles: R=RESTRICTED (16+), E=EXEMPT (18+), I=ART HOUSE, T=To every, A=IN=Иной мир, Nn=Новый мир, G=О\, Господи! +adult_ratings: E,R + [www.mediaminer.org] [www.midnightwhispers.ca]