From 4242aa6f633754267fb31e67dcfc08f32e5344e2 Mon Sep 17 00:00:00 2001 From: David Lynch Date: Sun, 7 Nov 2021 11:16:26 -0600 Subject: [PATCH] Strip colors on all sites, not just xenforo --- sites/__init__.py | 15 ++++++++++++++- sites/royalroad.py | 2 +- sites/xenforo.py | 2 +- 3 files changed, 16 insertions(+), 3 deletions(-) diff --git a/sites/__init__.py b/sites/__init__.py index 5170b28..a08c25b 100644 --- a/sites/__init__.py +++ b/sites/__init__.py @@ -7,6 +7,7 @@ import uuid import time import logging import urllib +import re import attr from bs4 import BeautifulSoup @@ -93,7 +94,14 @@ class Site: same name, but pains should be taken to ensure they remain semantically similar in meaning. """ - return [] + return [ + SiteSpecificOption( + 'strip_colors', + '--strip-colors/--no-strip-colors', + default=True, + help="If true, colors will be stripped from the text." + ), + ] @classmethod def get_default_options(cls): @@ -209,6 +217,11 @@ class Site: email = bytes([c ^ enc[0] for c in enc[1:]]).decode('utf8') a.insert_before(email) a.decompose() + # strip colors + if self.options['strip_colors']: + for tag in contents.find_all(style=re.compile(r'(?:color|background)\s*:')): + tag['style'] = re.sub(r'(?:color|background)\s*:[^;]+;?', '', tag['style']) + return contents diff --git a/sites/royalroad.py b/sites/royalroad.py index f6794bb..2e22c3f 100644 --- a/sites/royalroad.py +++ b/sites/royalroad.py @@ -15,7 +15,7 @@ class RoyalRoad(Site): @staticmethod def get_site_specific_option_defs(): - return [ + return Site.get_site_specific_option_defs() + [ SiteSpecificOption( 'skip_spoilers', '--skip-spoilers/--include-spoilers', diff --git a/sites/xenforo.py b/sites/xenforo.py index 4c8b50b..ed44b51 100644 --- a/sites/xenforo.py +++ b/sites/xenforo.py @@ -17,7 +17,7 @@ class XenForo(Site): @staticmethod def get_site_specific_option_defs(): - return [ + return Site.get_site_specific_option_defs() + [ SiteSpecificOption( 'include_index', '--include-index/--no-include-index',