Merge pull request #103 from PlushBeaver/masseffect2in

Add MassEffect2.in Adapter
2025-12-30 04:36:11 +01:00 · 2015-07-27 12:53:42 -05:00 · 2015-07-27 12:53:42 -05:00 · 457b0f6fc3
commit 457b0f6fc3
parent 03d3938876 0ec1e8b779
5 changed files with 767 additions and 0 deletions
--- a/calibre-plugin/plugin-defaults.ini
+++ b/calibre-plugin/plugin-defaults.ini
@ -1222,6 +1222,28 @@ extracategories:Lord of the Rings
 ## Site dedicated to these categories/characters/ships
 extracategories:Harry Potter

+[www.masseffect2.in]
+## Site dedicated to this fandom.
+extracategories: Mass Effect
+
+## Ucoz.com engine, upon which MassEffect2.in is based, imposes an unspecified limit on request frequency.
+## Reports vary from `5 requests per second' to `2 requests per second for more than 10 per minute'.
+## With default settings, a several-hours IP ban may follow, so set it higher.
+slow_down_sleep_time: 2
+
+## Whether to exclude editor signature from the bottom of chapter text.
+exclude_editor_signature: false
+
+## Stories on the site almost never have cover image, and for the stories which do,
+## this may be adjusted in `personal.ini' before downloading.
+never_make_cover: true
+
+## Titles for ratings identified by 1- or 2-letter codes from `ERATING system'
+## (`система Р.Е.Й.Т.И.Н.Г.').  MassEffect2.in and some other sites adopted it,
+## but changed titles and update them occasionally.
+rating_titles: R=RESTRICTED (16+), E=EXEMPT (18+), I=ART HOUSE, T=To every, A=IN=Иной мир, Nn=Новый мир, G=О\, Господи!
+adult_ratings: E,R
+
 [merlinfic.dtwins.co.uk]
 ## Some sites require login (or login for some rated stories) The
 ## program can prompt you, or you can save it in config.  In
--- a/fanficfare/adapters/init.py
+++ b/fanficfare/adapters/init.py
@ -138,6 +138,7 @@ import adapter_itcouldhappennet
 import adapter_forumsspacebattlescom
 import adapter_forumssufficientvelocitycom
 import adapter_ninelivesdarksolaceorg
+import adapter_masseffect2in

 ## This bit of complexity allows adapters to be added by just adding
 ## importing.  It eliminates the long if/else clauses we used to need
--- a/fanficfare/adapters/adapter_masseffect2in.py
+++ b/fanficfare/adapters/adapter_masseffect2in.py
@ -0,0 +1,719 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2015 FanFicFare team
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import bs4
+import datetime
+import logging
+import re
+import urllib2
+
+from ..htmlcleanup import removeEntities, stripHTML
+from .. import exceptions as exceptions
+from base_adapter import BaseSiteAdapter, makeDate
+
+
+_logger = logging.getLogger(__name__)
+
+
+def getClass():
+    """Returns adapter class defined in this module."""
+    return MassEffect2InAdapter
+
+
+class ParsingError(Exception):
+    """Indicates an error while parsing web page content."""
+    def __init__(self, message):
+        Exception.__init__(self)
+        self.message = message
+
+    def __str__(self):
+        return self.message
+
+
+class MassEffect2InAdapter(BaseSiteAdapter):
+    """
+    Provides support for MassEffect2.in site as story source.
+    Can be used as a template for sites build upon Ucoz.com engine (until no base class extracted).
+    Specializations:
+        1) Russian content (date format, genre names, etc.);
+        2) original `E.R.A.T.I.N.G.' rating scale, used by masseffect2.in
+           and some affiliated sites, denoted with images;
+        3) editor signatures an an option to remove them.
+    """
+
+    WORD_PATTERN = re.compile(u'\w+', re.UNICODE)
+    DOCUMENT_ID_PATTERN = re.compile(u'\d+-\d+-\d+-\d+')
+    SITE_LANGUAGE = u'Russian'
+
+    def __init__(self, config, url):
+        BaseSiteAdapter.__init__(self, config, url)
+
+        self.decode = ["utf8"]
+
+        self.story.setMetadata('siteabbrev', 'me2in')
+        self.story.setMetadata('storyId', self._getDocumentId(self.url))
+
+        self._setURL(self._makeDocumentUrl(self.story.getMetadata('storyId')))
+
+        self._chapters = {}
+        self._parsingConfiguration = None
+
+    # Must be @staticmethod, not @classmethod!
+    @staticmethod
+    def getSiteDomain():
+        return 'www.masseffect2.in'
+
+    @classmethod
+    def getSiteExampleURLs(cls):
+        return u' '.join([cls._makeDocumentUrl('19-1-0-1234'),
+                          cls._makeDocumentUrl('24-1-0-4321')])
+
+    def getSiteURLPattern(self):
+        return re.escape(self._makeDocumentUrl('')) + self.DOCUMENT_ID_PATTERN.pattern
+
+    def use_pagecache(self):
+        """Allows use of downloaded page cache.  It is essential for this
+        adapter, because the site does not offers chapter URL list, and many
+        pages have to be fetched and parsed repeatedly."""
+        return True
+
+    def extractChapterUrlsAndMetadata(self):
+        """Extracts chapter URLs and story metadata.  Actually downloads all
+        chapters, which is not exactly right, but necessary due to technical
+        limitations of the site."""
+
+        def followChapters(starting, forward=True):
+            if forward:
+                url = starting.getNextChapterUrl()
+            else:
+                url = starting.getPreviousChapterUrl()
+            if url:
+                url = self._makeDocumentUrl(self._getDocumentId(url))
+                following = self._makeChapter(url)
+                # Do not follow links to related, but different stories (prequels or sequels).
+                if not following.isFromStory(starting.getHeading()):
+                    return
+                if forward:
+                    yield following
+                for chapter in followChapters(following, forward):
+                    yield chapter
+                if not forward:
+                    yield following
+
+        try:
+            startingChapter = self._makeChapter(self.url)
+        except urllib2.HTTPError, error:
+            if error.code == 404:
+                raise exceptions.StoryDoesNotExist(self.url)
+            raise
+
+        # We only have one date for each chapter and assume the oldest one
+        # to be publication date and the most recent one to be update date.
+        datePublished = datetime.datetime.max
+        dateUpdated = datetime.datetime.min
+        wordCount = 0
+        # We aim at counting chapters, not chapter parts.
+        chapterCount = 0
+        storyInProgress = False
+
+        chapters = \
+            list(followChapters(startingChapter, forward=False)) + \
+            [startingChapter] + \
+            list(followChapters(startingChapter, forward=True))
+
+        headings = [chapter.getHeading() for chapter in chapters]
+        largestCommonPrefix = _getLargestCommonPrefix(*headings)
+        prefixLength = len(largestCommonPrefix)
+        storyTitleEnd, chapterTitleStart = prefixLength, prefixLength
+        match = re.search(u'[:\.\s]*(?P<chapter>глава\s+)?$', largestCommonPrefix, re.IGNORECASE | re.UNICODE)
+        if match:
+            storyTitleEnd -= len(match.group())
+            label = match.group('chapter')
+            if label:
+                chapterTitleStart -= len(label)
+        storyTitle = largestCommonPrefix[:storyTitleEnd]
+        self.story.setMetadata('title', storyTitle)
+
+        garbagePattern = re.compile(u'(?P<start>^)?[:\.\s]*(?(start)|$)', re.UNICODE)
+        indexPattern = re.compile(u'(?:глава\s)?(?:(?<!\d)(?P<index>\d{1,3})(?=\D|$))', re.IGNORECASE | re.UNICODE)
+
+        for chapter in chapters:
+            url = chapter.getUrl()
+            self._chapters[url] = chapter
+            _logger.debug(u"Processing chapter `%s'.", url)
+
+            try:
+                authorName = chapter.getAuthorName()
+                if authorName:
+                    self.story.extendList('author', [authorName])
+                    authorId = chapter.getAuthorId()
+                    if authorId:
+                        authorUrl = 'http://%s/index/%s' % (self.getSiteDomain(), authorId)
+                    else:
+                        authorId = u''
+                        authorUrl = u''
+                    self.story.extendList('authorId', [authorId])
+                    self.story.extendList('authorUrl', [authorUrl])
+
+                if not self.story.getMetadata('rating'):
+                    ratingTitle = chapter.getRatingTitle()
+                    if ratingTitle:
+                        self.story.setMetadata('rating', ratingTitle)
+
+                if not self.story.getMetadata('description'):
+                    summary = chapter.getSummary()
+                    if summary:
+                        self.story.setMetadata('description', summary)
+
+                datePublished = min(datePublished, chapter.getDate())
+                dateUpdated = max(dateUpdated, chapter.getDate())
+
+                self.story.extendList('genre', chapter.getGenres())
+                self.story.extendList('characters', chapter.getCharacters())
+                self.story.extendList('ships', chapter.getPairings())
+
+                wordCount += self._getWordCount(chapter.getTextElement())
+
+                # Chapter status usually represents the story status, so we want the last chapter status.
+                # Some chapters may have no status attribute.
+                chapterInProgress = chapter.isInProgress()
+                if chapterInProgress is not None:
+                    storyInProgress = chapterInProgress
+
+                # If any chapter is adult, consider the whole story adult.
+                if chapter.isAdult():
+                    self.story.setMetadata('is_adult', True)
+                    warning = chapter.getWarning()
+                    if warning:
+                        self.story.extendList('warnings', [warning])
+
+                chapterTitle = re.sub(garbagePattern, u'', chapter.getHeading()[chapterTitleStart:])
+
+                match = re.search(indexPattern, chapterTitle)
+                if match:
+                    index = int(match.group('index'))
+                    chapterCount = max(chapterCount, index)
+                else:
+                    chapterCount += 1
+
+                self.chapterUrls.append((chapterTitle, url))
+            except ParsingError, error:
+                raise exceptions.FailedToDownload(u"Failed to download chapter `%s': %s" % (url, error))
+
+        # Some metadata are handled separately due to format conversions.
+        self.story.setMetadata('status', 'In Progress' if storyInProgress else 'Completed')
+        self.story.setMetadata('datePublished', datePublished)
+        self.story.setMetadata('dateUpdated', dateUpdated)
+        self.story.setMetadata('numWords', str(wordCount))
+        self.story.setMetadata('numChapters', chapterCount)
+
+        # Site-specific metadata.
+        self.story.setMetadata('language', self.SITE_LANGUAGE)
+
+    def getChapterText(self, url):
+        """Grabs the text for an individual chapter."""
+        if url not in self._chapters:
+            raise exceptions.FailedToDownload(u"No chapter `%s' present!" % url)
+        chapter = self._chapters[url]
+        return self.utf8FromSoup(url, chapter.getTextElement())
+
+    def _makeChapter(self, url):
+        """Creates a chapter object given a URL."""
+        document = self.make_soup(self._fetchUrl(url))
+        chapter = Chapter(self._getParsingConfiguration(), url, document)
+        return chapter
+
+    def _getWordCount(self, element):
+        """Returns word count in plain text extracted from chapter body."""
+        text = stripHTML(element)
+        count = len(re.findall(self.WORD_PATTERN, text))
+        return count
+
+    def _getParsingConfiguration(self):
+        if not self._parsingConfiguration:
+            self._parsingConfiguration = {}
+
+            adultRatings = self.getConfigList('adult_ratings')
+            if not adultRatings:
+                raise exceptions.PersonalIniFailed(
+                    u"Missing `adult_ratings' setting", u"MassEffect2.in", u"?")
+            adultRatings = set(adultRatings)
+            self._parsingConfiguration['adultRatings'] = adultRatings
+
+            ratingTitleDescriptions = self.getConfigList('rating_titles')
+            if ratingTitleDescriptions:
+                ratingTitles = {}
+                for ratingDescription in ratingTitleDescriptions:
+                    parts = ratingDescription.split(u'=')
+                    if len(parts) < 2:
+                        _logger.warning(
+                            u"Invalid `rating_titles' setting, missing `=' in `%s'."
+                            % ratingDescription)
+                        continue
+                    labels = parts[:-1]
+                    title = parts[-1]
+                    for label in labels:
+                        ratingTitles[label] = title
+                        # Duplicate label aliasing in adult rating set.
+                        if label in adultRatings:
+                            adultRatings.add(*labels)
+                self._parsingConfiguration['adultRatings'] = list(adultRatings)
+                self._parsingConfiguration['ratingTitles'] = ratingTitles
+            else:
+                raise exceptions.PersonalIniFailed(
+                    u"Missing `rating_titles' setting", u"MassEffect2.in", u"?")
+
+            self._parsingConfiguration['excludeEditorSignature'] = \
+                self.getConfig('exclude_editor_signature', False)
+
+        return self._parsingConfiguration
+
+    def _getDocumentId(self, url):
+        """Extract document ID from MassEffect2.in URL."""
+        match = re.search(self.DOCUMENT_ID_PATTERN, url)
+        if not match:
+            raise ValueError(u"Failed to extract document ID from `'" % url)
+        documentId = url[match.start():match.end()]
+        return documentId
+
+    @classmethod
+    def _makeDocumentUrl(cls, documentId):
+        """Make a chapter URL given a document ID."""
+        return 'http://%s/publ/%s' % (cls.getSiteDomain(), documentId)
+
+
+class Chapter(object):
+    """Represents a lazily-parsed chapter of a story."""
+    def __init__(self, configuration, url, document):
+        self._configuration = configuration
+        self._url = url
+        self._document = document
+        # Lazy-loaded:
+        self._heading = None
+        self._date = None
+        self._author = None
+        self._attributes = None
+        self._textElement = None
+        self._infoBar = None
+
+    def getHeading(self):
+        return self._extractHeading()
+
+    def getSummary(self):
+        attributes = self.__getAttributes()
+        if 'summary' in attributes:
+            return attributes['summary']
+
+    def getAuthorId(self):
+        author = self._getAuthor()
+        if author:
+            return author['id']
+
+    def getAuthorName(self):
+        author = self._getAuthor()
+        if author:
+            return author['name']
+
+    def getDate(self):
+        return self.__getDate()
+
+    def getRatingTitle(self):
+        attributes = self.__getAttributes()
+        if 'rating' in attributes:
+            return attributes['rating']['title']
+
+    def isAdult(self):
+        attributes = self.__getAttributes()
+        if 'rating' in attributes and attributes['rating']['isAdult']:
+            return True
+        if 'warning' in attributes:
+            return True
+        return False
+
+    def getWarning(self):
+        attributes = self.__getAttributes()
+        if 'warning' in attributes:
+            return attributes['warning']
+
+    def getCharacters(self):
+        return self._getListAttribute('characters')
+
+    def getPairings(self):
+        return self._getListAttribute('pairings')
+
+    def getGenres(self):
+        return self._getListAttribute('genres')
+
+    def isInProgress(self):
+        attributes = self.__getAttributes()
+        if 'isInProgress' in attributes:
+            return attributes['isInProgress']
+
+    def getUrl(self):
+        return self._url
+
+    def getTextElement(self):
+        return self._getTextElement()
+
+    def getPreviousChapterUrl(self):
+        """Download chapters following `Previous chapter' links.
+        Returns a list of chapters' URLs."""
+        return self._getSiblingChapterUrl({'class': 'fl tal'})
+
+    def getNextChapterUrl(self):
+        """Download chapters following `Next chapter' links.
+        Returns a list of chapters' URLs."""
+        return self._getSiblingChapterUrl({'class': 'tar fr'})
+
+    def isFromStory(self, storyTitle, prefixThreshold=-1):
+        """Check if this chapter is from a story different from the given one.
+        Prefix threshold specifies how long common story title prefix shall be
+        for chapters from one story: negative value means implementation-defined
+        optimum, zero inhibits the check, and positive value adjusts threshold."""
+
+        def getFirstWord(string):
+            match = re.search(u'^\s*\w+', string, re.UNICODE)
+            return string[match.start():match.end()]
+
+        thisStoryTitle = self.getHeading()
+        if prefixThreshold != 0:
+            if prefixThreshold < 0:
+                prefixThreshold = min(
+                    len(getFirstWord(storyTitle)), len(getFirstWord(thisStoryTitle)))
+            else:
+                prefixThreshold = min(
+                    prefixThreshold, len(storyTitle), len(thisStoryTitle))
+            result = len(_getLargestCommonPrefix(storyTitle, thisStoryTitle)) >= prefixThreshold
+            return result
+        else:
+            return storyTitle != thisStoryTitle
+
+    def _getListAttribute(self, name):
+        """Return an attribute value as a list or an empty list if the attribute is absent."""
+        attributes = self.__getAttributes()
+        if name in attributes:
+            return attributes[name]
+        return []
+
+    def _extractHeading(self):
+        """Extract header text from the document."""
+        return stripHTML(
+            self._document.find('div', {'class': 'eTitle'}).string)
+
+    def __getHeading(self):
+        """Lazily parse and return heading."""
+        if not self._heading:
+            self._heading = self._extractHeading()
+        return self._heading
+
+    def _getAuthor(self):
+        """Lazily parse and return author's information."""
+        if not self._author:
+            self._author = self._parseAuthor()
+        return self._author
+
+    def _parseAuthor(self):
+        """Locate and parse chapter author's information to a dictionary with author's `id' and `name'."""
+        try:
+            authorLink = self._getInfoBarElement() \
+                .find('i', {'class': 'icon-user'}) \
+                .findNextSibling('a')
+        except AttributeError:
+            raise ParsingError(u'Failed to locate author link.')
+        match = re.search(u'(8-\d+)', authorLink['onclick'])
+        if not match:
+            raise ParsingError(u'Failed to extract author ID.')
+        authorId = match.group(0)
+        authorName = stripHTML(authorLink.text)
+        return {
+            'id': authorId,
+            'name': authorName
+        }
+
+    def __getDate(self):
+        """Lazily parse chapter date."""
+        if not self._date:
+            self._date = self._parseDate()
+        return self._date
+
+    def _parseDate(self):
+        """Locate and parse chapter date."""
+        try:
+            dateText = self._getInfoBarElement() \
+                .find('i', {'class': 'icon-eye'}) \
+                .findPreviousSibling(text=True) \
+                .strip(u'| \n')
+        except AttributeError:
+            raise ParsingError(u'Failed to locate date.')
+        date = makeDate(dateText, '%d.%m.%Y')
+        return date
+
+    def _getInfoBarElement(self):
+        """Locate informational bar element, containing chapter date and author, on the page."""
+        if not self._infoBar:
+            self._infoBar = self._document.find('td', {'class': 'eDetails2'})
+            if not self._infoBar:
+                raise ParsingError(u'No informational bar found.')
+        return self._infoBar
+
+    def __getAttributes(self):
+        """Lazily parse attributes."""
+        if not self._attributes:
+            self._attributes = self._parseAttributes()
+        return self._attributes
+
+    def _parseAttributes(self):
+        """Parse chapter attribute block and return it as a dictionary with standard entries."""
+
+        attributes = {}
+        attributesText = u''
+        try:
+            starter = self._document \
+                .find('div', {'class': 'comm-div'}) \
+                .findNextSibling('div', {'class': 'cb'})
+            bound = starter.findNextSibling('div', {'class': 'cb'})
+
+            def processElement(element):
+                """Return textual representation an *inline* element of chapter attribute block."""
+                result = u''
+                if isinstance(element, bs4.Tag):
+                    if element.name in ('b', 'strong', 'font', 'br'):
+                        result += u"\n"
+                    if element.name == 's':
+                        result += u"<s>%s</s>" % stripHTML(element)
+                    else:
+                        result += stripHTML(element)
+                else:
+                    result += removeEntities(element)
+                return result
+
+            elements = starter.nextSiblingGenerator()
+            for element in elements:
+                if isinstance(element, bs4.Tag):
+                    if element == bound:
+                        break
+                    else:
+                        if element.name in ('div', 'p'):
+                            attributesText += u"\n"
+                            for child in element.childGenerator():
+                                attributesText += processElement(child)
+                            continue
+                attributesText += processElement(element)
+
+            elements = starter.nextGenerator()
+            for element in elements:
+                if isinstance(element, bs4.Tag):
+                    if element == bound:
+                        break
+                    elif element.name == 'img':
+                        rating = self._parseRatingFromImage(element)
+                        if rating:
+                            attributes['rating'] = rating
+                            break
+        except AttributeError or TypeError:
+            raise ParsingError(u'Failed to locate and collect attributes.')
+
+        separators = u"\r\n :;."
+        freestandingText = u''
+        for line in attributesText.split(u'\n'):
+            if line.count(u':') != 1:
+                freestandingText += line
+                continue
+            key, value = line.split(u':', 1)
+            key = key.strip(separators).lower()
+            value = value.strip().strip(separators)
+            parsed = self._parseAttribute(key, value)
+            for parsedKey, parsedValue in parsed.iteritems():
+                attributes[parsedKey] = parsedValue
+
+        freestandingText = freestandingText.strip()
+        if 'summary' not in attributes and freestandingText:
+            attributes['summary'] = freestandingText
+
+        if 'rating' not in attributes:
+            _logger.warning(u"Failed to locate or recognize rating for `%s'!", self.getUrl())
+
+        return attributes
+
+    # Most, but not all, URLs of rating icons match this.
+    RATING_LABEL_PATTERN = re.compile(u'/(?P<rating>[ERATINnG]+)\.png$')
+
+    def _parseRatingFromImage(self, element):
+        """Given an image element, try to parse story rating from it."""
+        # Although deprecated, `has_key()' is required here.
+        if not element.has_attr('src'):
+            return
+        source = element['src']
+        if 'REITiNG' in source:
+            match = re.search(self.RATING_LABEL_PATTERN, source)
+            if not match:
+                return
+            label = match.group('rating')
+            if label in self._configuration['ratingTitles']:
+                return {
+                    'label': label,
+                    'title': self._configuration['ratingTitles'][label],
+                    'isAdult': label in self._configuration['adultRatings']
+                }
+            else:
+                _logger.warning(u"No title found for rating label `%s'!" % label)
+        # TODO: conduct a research on such abnormal URLs.
+        elif source == 'http://www.masseffect2.in/_fr/10/1360399.png':
+            label = 'Nn'
+            return {
+                'label': 'Nn',
+                'title': self._configuration['ratingTitles'][label],
+                'isAdult': label in self._configuration['adultRatings']
+            }
+
+    # Various `et cetera' and `et al' forms in Russian texts.
+    # Intended to be used with whole strings!
+    ETC_PATTERN = re.compile(
+        u'''[и&]\s(?:
+              (?:т\.?\s?[пд]?\.?)|
+              (?:др(?:угие|\.)?)|
+              (?:пр(?:очие|\.)?)|
+              # Note: identically looking letters `K' and `o'
+              # below are from Latin and Cyrillic alphabets.
+              (?:ко(?:мпания)?|[KК][oо°])
+            )$
+        ''',
+        re.IGNORECASE + re.UNICODE + re.VERBOSE)
+
+    # `Author's Notes' and its variants in Russian.
+    ANNOTATION_PATTERN = re.compile(u'аннотация|описание|(?:(?:за|при)мечание\s)?(?:от\s)?автора', re.UNICODE)
+
+    def _parseAttribute(self, key, value):
+        """
+        Parse a single a single record in chapter attributes for chapter metadata.
+        Return a dictionary of canonical attributes and values (i. e. multiple attributes may be discovered).
+        """
+
+        def refineCharacter(name):
+            """Refines character name from stop-words and distortions."""
+            strippedName = name.strip()
+            nameOnly = re.sub(self.ETC_PATTERN, u'', strippedName)
+            # TODO: extract canonical name (even ME-specific?).
+            canonicalName = nameOnly
+            return canonicalName
+
+        if re.match(u'жанры?', key, re.UNICODE):
+            genres = filter(bool, map(unicode.strip, re.split(u'[,;/]', value)))
+            return {'genres': genres}
+        elif key == u'статус':
+            isInProgress = value == u'в процессе'
+            return {'isInProgress': isInProgress}
+        elif key == u'персонажи':
+            participants = map(refineCharacter, re.split(u'[,;]', value))
+            characters = []
+            pairings = []
+            for participant in participants:
+                if u'/' in participant:
+                    pairings.append(participant)
+                else:
+                    characters.append(participant)
+            return {
+                'characters': characters,
+                'pairings': pairings
+            }
+        elif key == u'предупреждение':
+            return {'warning': value}
+        elif re.match(self.ANNOTATION_PATTERN, key):
+            if not value.endswith(u'.'):
+                value += u'.'
+            # Capitalize would make value[1:] lowercase, which we don't want.
+            value = value[:1].upper() + value[1:]
+            return {'summary': value}
+        else:
+            _logger.info(u"Unrecognized attribute `%s' ignored.", key)
+            return {}
+
+    def _getTextElement(self):
+        """Locate chapter body text element on the page."""
+        if not self._textElement:
+            self._textElement = self.__collectTextElements()
+        return self._textElement
+
+    def __collectTextElements(self):
+        """Return all elements containing parts of chapter text (which may be
+        <p>aragraphs, <div>isions or plain text nodes) under a single root."""
+        starter = self._document.find('div', {'id': u'article'})
+        if starter is None:
+            # FIXME: This will occur if the method is called more than once.
+            # The reason is elements appended to `root' are removed from the document.
+            # BS 4.4 implements cloning via `copy.copy()', but supporting it for BS 4.3
+            # would be error-prone (due to relying on BS internals) and is not needed.
+            if self._textElement:
+                _logger.debug(u"You may not call this function more than once!")
+            raise ParsingError(u'Failed to locate text.')
+        collection = [starter]
+        for element in starter.nextSiblingGenerator():
+            if element is None:
+                break
+            if isinstance(element, bs4.Tag) and element.name == 'tr':
+                break
+            collection.append(element)
+        root = bs4.Tag(name='td')
+        for element in collection:
+            root.append(element)
+
+        if self._configuration['excludeEditorSignature']:
+            root = self._excludeEditorSignature(root)
+
+        return root
+
+    def _getSiblingChapterUrl(self, selector):
+        """Locate a link to a sibling chapter, either previous or next one, and return its URL."""
+        block = self._document \
+            .find('td', {'class': 'eDetails1'}) \
+            .find('div', selector)
+        if not block:
+            return
+        link = block.find('a')
+        if not link:
+            return
+        return link['href']
+
+    # Editor signature always starts with something like this.
+    SIGNED_PATTERN = re.compile(u'отредактирова(?:но|ла?)[:.\s]', re.IGNORECASE + re.UNICODE)
+
+    def _excludeEditorSignature(self, root):
+        """Exclude editor signature from within `root' element."""
+        for textNode in root.findAll(text=True):
+            if re.match(self.SIGNED_PATTERN, textNode.string):
+                editorLink = textNode.findNext('a')
+                if editorLink:
+                    editorLink.extract()
+                # Seldom editor link has inner formatting, which is sibling DOM-wise.
+                editorName = textNode.findNext('i')
+                if editorName:
+                    editorName.extract()
+                textNode.extract()
+                # We could try removing container element, but there is a risk
+                # of removing text ending with it.  Better play safe here.
+                break
+        return root
+
+
+def _getLargestCommonPrefix(*args):
+    """Returns largest common prefix of all unicode arguments, ignoring case.
+    :rtype : unicode
+    """
+    from itertools import takewhile, izip
+    toLower = lambda xs: map(lambda x: x.lower(), xs)
+    allSame = lambda xs: len(set(toLower(xs))) == 1
+    return u''.join([i[0] for i in takewhile(allSame, izip(*args))])
--- a/fanficfare/configurable.py
+++ b/fanficfare/configurable.py
@ -206,6 +206,7 @@ def get_valid_keywords():
    return list(['(in|ex)clude_metadata_(pre|post)',
                 'add_chapter_numbers',
                 'add_genre_when_multi_category',
+                 'adult_ratings',
                 'allow_unsafe_filename',
                 'always_overwrite',
                 'anthology_tags',
@ -235,6 +236,7 @@ def get_valid_keywords():
                 'description_limit',
                 'do_update_hook',
                 'exclude_notes',
+                 'exclude_editor_signature',
                 'extra_logpage_entries',
                 'extra_subject_tags',
                 'extra_titlepage_entries',
@ -286,6 +288,7 @@ def get_valid_keywords():
                 'output_filename_safepattern',
                 'password',
                 'post_process_cmd',
+                 'rating_titles',
                 'remove_transparency',
                 'replace_br_with_p',
                 'replace_hr',
--- a/fanficfare/defaults.ini
+++ b/fanficfare/defaults.ini
@ -1823,6 +1823,28 @@ extraships:InuYasha/Kagome
 ## Site dedicated to these categories/characters/ships
 extracategories:Lord of the Rings

+[www.masseffect2.in]
+## Site dedicated to this fandom.
+extracategories: Mass Effect
+
+## Ucoz.com engine, upon which MassEffect2.in is based, imposes an unspecified limit on request frequency.
+## Reports vary from `5 requests per second' to `2 requests per second for more than 10 per minute'.
+## With default settings, a several-hours IP ban may follow, so set it higher.
+slow_down_sleep_time: 2
+
+## Whether to exclude editor signature from the bottom of chapter text.
+exclude_editor_signature: false
+
+## Stories on the site almost never have cover image, and for the stories which do,
+## this may be adjusted in `personal.ini' before downloading.
+never_make_cover: true
+
+## Titles for ratings identified by 1- or 2-letter codes from `ERATING system'
+## (`система Р.Е.Й.Т.И.Н.Г.').  MassEffect2.in and some other sites adopted it,
+## but changed titles and update them occasionally.
+rating_titles: R=RESTRICTED (16+), E=EXEMPT (18+), I=ART HOUSE, T=To every, A=IN=Иной мир, Nn=Новый мир, G=О\, Господи!
+adult_ratings: E,R
+
 [www.mediaminer.org]

 [www.midnightwhispers.ca]