diff --git a/calibre-plugin/plugin-defaults.ini b/calibre-plugin/plugin-defaults.ini index ac27065e..a68a5d99 100644 --- a/calibre-plugin/plugin-defaults.ini +++ b/calibre-plugin/plugin-defaults.ini @@ -1226,6 +1226,9 @@ extracategories:Harry Potter ## Site dedicated to this fandom. extracategories: Mass Effect +## Whether to exclude editor signature from the bottom if chapter text. +exclude_editor_signature: false + ## Stories on the site almost never have cover image. ## May be adjusted in `personal.ini' on per-story basis. never_make_cover: true diff --git a/fanficfare/adapters/adapter_masseffect2in.py b/fanficfare/adapters/adapter_masseffect2in.py index 815ae191..dd8444bd 100644 --- a/fanficfare/adapters/adapter_masseffect2in.py +++ b/fanficfare/adapters/adapter_masseffect2in.py @@ -266,6 +266,8 @@ class MassEffect2InAdapter(BaseSiteAdapter): self.getConfig('strip_chapter_numbers', False) \ and not self.getConfig('add_chapter_numbers', False) + self._parsingConfiguration['excludeEditorSignature'] = \ + self.getConfig('exclude_editor_signature', False) return self._parsingConfiguration @@ -709,6 +711,10 @@ class Chapter(object): root = bs.Tag(self._document, 'td') for element in collection: root.append(element) + + if self._configuration['excludeEditorSignature']: + root = self._excludeEditorSignature(root) + return root def _getSiblingChapterUrl(self, selector): @@ -725,6 +731,24 @@ class Chapter(object): return return link['href'] + SIGNED_PATTERN = re.compile(u'отредактирова(?:но|ла?)[:.\s]', re.IGNORECASE + re.UNICODE) + + def _excludeEditorSignature(self, root): + for textNode in root.findAll(text=True): + if re.match(self.SIGNED_PATTERN, textNode.string): + editorLink = textNode.findNext('a') + if editorLink: + editorLink.extract() + # Seldom editor link has inner formatting, which is sibling DOM-wise. + editorName = textNode.findNext('i') + if editorName: + editorName.extract() + textNode.extract() + # We could try removing container element, but there is a risk + # of removing text ending with it. Better play safe here. + break + return root + def _getLargestCommonPrefix(*args): """Returns largest common prefix of all unicode(!) arguments. diff --git a/fanficfare/configurable.py b/fanficfare/configurable.py index 5dd2a561..61b306df 100644 --- a/fanficfare/configurable.py +++ b/fanficfare/configurable.py @@ -235,6 +235,7 @@ def get_valid_keywords(): 'description_limit', 'do_update_hook', 'exclude_notes', + 'exclude_editor_signature', 'extra_logpage_entries', 'extra_subject_tags', 'extra_titlepage_entries', diff --git a/fanficfare/defaults.ini b/fanficfare/defaults.ini index 6dbb44ba..d9069cf8 100644 --- a/fanficfare/defaults.ini +++ b/fanficfare/defaults.ini @@ -1827,6 +1827,9 @@ extracategories:Lord of the Rings ## Site dedicated to this fandom. extracategories: Mass Effect +## Whether to exclude editor signature from the bottom if chapter text. +exclude_editor_signature: false + ## Stories on the site almost never have cover image. ## May be adjusted in `personal.ini' on per-story basis. never_make_cover: true