Add editor signature removal capability.

All chapters have editor signature in the end.  Users wishing to remove
it can switch `exclude_editor_signature' option in `personal.ini'.
This commit is contained in:
Dmitry Kozliuk 2015-07-23 02:44:50 +03:00
parent 79b56c872f
commit a8ce9d5711
4 changed files with 31 additions and 0 deletions

View file

@ -1226,6 +1226,9 @@ extracategories:Harry Potter
## Site dedicated to this fandom.
extracategories: Mass Effect
## Whether to exclude editor signature from the bottom if chapter text.
exclude_editor_signature: false
## Stories on the site almost never have cover image.
## May be adjusted in `personal.ini' on per-story basis.
never_make_cover: true

View file

@ -266,6 +266,8 @@ class MassEffect2InAdapter(BaseSiteAdapter):
self.getConfig('strip_chapter_numbers', False) \
and not self.getConfig('add_chapter_numbers', False)
self._parsingConfiguration['excludeEditorSignature'] = \
self.getConfig('exclude_editor_signature', False)
return self._parsingConfiguration
@ -709,6 +711,10 @@ class Chapter(object):
root = bs.Tag(self._document, 'td')
for element in collection:
root.append(element)
if self._configuration['excludeEditorSignature']:
root = self._excludeEditorSignature(root)
return root
def _getSiblingChapterUrl(self, selector):
@ -725,6 +731,24 @@ class Chapter(object):
return
return link['href']
SIGNED_PATTERN = re.compile(u'отредактирова(?:но|ла?)[:.\s]', re.IGNORECASE + re.UNICODE)
def _excludeEditorSignature(self, root):
for textNode in root.findAll(text=True):
if re.match(self.SIGNED_PATTERN, textNode.string):
editorLink = textNode.findNext('a')
if editorLink:
editorLink.extract()
# Seldom editor link has inner formatting, which is sibling DOM-wise.
editorName = textNode.findNext('i')
if editorName:
editorName.extract()
textNode.extract()
# We could try removing container element, but there is a risk
# of removing text ending with it. Better play safe here.
break
return root
def _getLargestCommonPrefix(*args):
"""Returns largest common prefix of all unicode(!) arguments.

View file

@ -235,6 +235,7 @@ def get_valid_keywords():
'description_limit',
'do_update_hook',
'exclude_notes',
'exclude_editor_signature',
'extra_logpage_entries',
'extra_subject_tags',
'extra_titlepage_entries',

View file

@ -1827,6 +1827,9 @@ extracategories:Lord of the Rings
## Site dedicated to this fandom.
extracategories: Mass Effect
## Whether to exclude editor signature from the bottom if chapter text.
exclude_editor_signature: false
## Stories on the site almost never have cover image.
## May be adjusted in `personal.ini' on per-story basis.
never_make_cover: true