From 6d52c72c991e7055b8505857122fc1eb5203905b Mon Sep 17 00:00:00 2001 From: David Lynch Date: Sat, 4 Nov 2017 00:09:09 -0500 Subject: [PATCH] Use logging instead of print Fixes #10 --- leech.py | 20 ++++++++++++++++---- sites/__init__.py | 5 ++++- sites/ao3.py | 5 ++++- sites/arbitrary.py | 5 ++++- sites/deviantart.py | 5 ++++- sites/fanfictionnet.py | 7 +++++-- sites/fictionlive.py | 5 ++++- sites/stash.py | 7 +++++-- sites/xenforo.py | 9 ++++++--- 9 files changed, 52 insertions(+), 16 deletions(-) diff --git a/leech.py b/leech.py index b73c26e..9cc1be5 100755 --- a/leech.py +++ b/leech.py @@ -4,6 +4,7 @@ import argparse import sys import json import http.cookiejar +import logging import sites import ebook @@ -14,6 +15,8 @@ import requests_cache __version__ = 1 USER_AGENT = 'Leech/%s +http://davidlynch.org' % __version__ +logger = logging.getLogger(__name__) + def leech(url, session, filename=None, args=None): # we have: a page, which could be absolutely any part of a story, or not a story at all @@ -22,7 +25,7 @@ def leech(url, session, filename=None, args=None): if not site: raise Exception("No site handler found") - print("Handler", site, url) + logger.info("Handler: %s (%s)", site, url) handler = site(session, args=args) @@ -48,13 +51,22 @@ if __name__ == '__main__': parser.add_argument('--filename', help="output filename (the title is used if this isn't provided)") parser.add_argument('--no-cache', dest='cache', action='store_false') parser.add_argument('--flush', dest='flush', action='store_true') - parser.set_defaults(cache=True, flush=False) + parser.add_argument('-v', '--verbose', help="verbose output", action='store_true', dest='verbose') + parser.set_defaults(cache=True, flush=False, verbose=False) args, extra_args = parser.parse_known_args() + if args.verbose: + logging.basicConfig(level=logging.DEBUG) + else: + logging.basicConfig( + level=logging.INFO, + format="[%(name)s] %(message)s" + ) + if args.flush: requests_cache.install_cache('leech') requests_cache.clear() - print("Flushed cache") + logger.info("Flushed cache") sys.exit() if not args.url: @@ -76,4 +88,4 @@ if __name__ == '__main__': }) filename = leech(args.url, filename=args.filename, session=session, args=extra_args) - print("File created:", filename) + logger.info("File created: %s", filename) diff --git a/sites/__init__.py b/sites/__init__.py index 24161be..9a91f4f 100644 --- a/sites/__init__.py +++ b/sites/__init__.py @@ -4,9 +4,12 @@ import os import argparse import uuid import time +import logging import attr from bs4 import BeautifulSoup +logger = logging.getLogger(__name__) +logger.addHandler(logging.NullHandler()) _sites = [] @@ -104,7 +107,7 @@ class Site: delay = retry_delay if page.headers['Retry-After']: delay = int(page.headers['Retry-After']) - print("Load failed: waiting {}s to retry ({})".format(delay, page)) + logger.warning("Load failed: waiting %s to retry (%s)", delay, page) time.sleep(delay) return self._soup(url, method=method, retry=retry - 1, retry_delay=retry_delay, **kw) raise SiteException("Couldn't fetch", url) diff --git a/sites/ao3.py b/sites/ao3.py index 4523ae6..bce4e61 100644 --- a/sites/ao3.py +++ b/sites/ao3.py @@ -1,9 +1,12 @@ #!/usr/bin/python +import logging import datetime import re from . import register, Site, Section, Chapter +logger = logging.getLogger(__name__) + @register class ArchiveOfOurOwn(Site): @@ -46,7 +49,7 @@ class ArchiveOfOurOwn(Site): return story def _chapter(self, url): - print("Extracting chapter from", url) + logger.info("Extracting chapter @ %s", url) soup = self._soup(url) content = soup.find('div', role='article') diff --git a/sites/arbitrary.py b/sites/arbitrary.py index 195faee..1989bc6 100644 --- a/sites/arbitrary.py +++ b/sites/arbitrary.py @@ -1,5 +1,6 @@ #!/usr/bin/python +import logging import attr import datetime import json @@ -7,6 +8,8 @@ import os.path import urllib from . import register, Site, Section, Chapter +logger = logging.getLogger(__name__) + """ Example JSON: { @@ -75,7 +78,7 @@ class Arbitrary(Site): def _chapter(self, url, definition): # TODO: refactor so this can meaningfully handle multiple matches on content_selector. # Probably by changing it so that this returns a Chapter / Section. - print("Extracting chapter from", url) + logger.info("Extracting chapter @ %s", url) soup = self._soup(url) content = soup.select(definition.content_selector)[0] diff --git a/sites/deviantart.py b/sites/deviantart.py index bb2775a..df30e92 100644 --- a/sites/deviantart.py +++ b/sites/deviantart.py @@ -1,10 +1,13 @@ #!/usr/bin/python +import logging import re from . import register, Section from .stash import Stash +logger = logging.getLogger(__name__) + @register class DeviantArt(Stash): @@ -41,6 +44,6 @@ class DeviantArt(Stash): if thumb['href'] is not '#': story.add(self._chapter(thumb['href'])) except Exception as e: - print(e) + logger.exception("Couldn't extract chapters from thumbs") return story diff --git a/sites/fanfictionnet.py b/sites/fanfictionnet.py index c3a6792..0da64ae 100644 --- a/sites/fanfictionnet.py +++ b/sites/fanfictionnet.py @@ -1,9 +1,12 @@ #!/usr/bin/python +import logging import datetime import re from . import register, Site, SiteException, Section, Chapter +logger = logging.getLogger(__name__) + @register class FanFictionNet(Site): @@ -59,7 +62,7 @@ class FanFictionNet(Site): return story def _chapter(self, url): - print("Extracting chapter from", url) + logger.info("Fetching chapter @ %s", url) soup = self._soup(url) content = soup.find(id="content_wrapper_inner") @@ -74,7 +77,7 @@ class FanFictionNet(Site): for tag in text.find_all(True): tag.attrs = None except Exception as e: - print("Trouble cleaning attributes", e) + logger.exception("Trouble cleaning attributes") return text.prettify() diff --git a/sites/fictionlive.py b/sites/fictionlive.py index 6d588ba..a7d8fae 100644 --- a/sites/fictionlive.py +++ b/sites/fictionlive.py @@ -1,10 +1,13 @@ #!/usr/bin/python +import logging import itertools import datetime import re from . import register, Site, Section, Chapter +logger = logging.getLogger(__name__) + @register class FictionLive(Site): @@ -40,7 +43,7 @@ class FictionLive(Site): # https://fiction.live/api/anonkun/chapters/SBBA49fQavNQMWxFT/1502823848216/9999999999999998 # i.e. format is [current timestamp] / [next timestamp - 1] chapter_url = 'https://fiction.live/api/anonkun/chapters/{}/{}/{}'.format(workid, currc['ct'], nextc['ct'] - 1) - print("Extracting chapter from", chapter_url) + logger.info("Extracting chapter \"%s\" @ %s", currc['title'], chapter_url) data = self.session.get(chapter_url).json() html = [] diff --git a/sites/stash.py b/sites/stash.py index e7487b6..9c77b83 100644 --- a/sites/stash.py +++ b/sites/stash.py @@ -1,9 +1,12 @@ #!/usr/bin/python +import logging import datetime import re from . import register, Site, SiteException, Section, Chapter +logger = logging.getLogger(__name__) + @register class Stash(Site): @@ -35,12 +38,12 @@ class Stash(Site): if thumb['href'] is not '#': story.add(self._chapter(thumb['href'])) except Exception as e: - print(e) + logger.exception("Couldn't extract chapters from thumbs") return story def _chapter(self, url): - print("Extracting chapter from", url) + logger.info("Fetching chapter @ %s", url) soup = self._soup(url) content = soup.find(class_="journal-wrapper") diff --git a/sites/xenforo.py b/sites/xenforo.py index 9c94401..aa530eb 100644 --- a/sites/xenforo.py +++ b/sites/xenforo.py @@ -2,8 +2,11 @@ import datetime import re +import logging from . import register, Site, SiteException, Section, Chapter +logger = logging.getLogger(__name__) + class XenForo(Site): """XenForo is forum software that powers a number of fiction-related forums.""" @@ -23,7 +26,7 @@ class XenForo(Site): 'password': login_details[1], } self.session.post('https://%s/login/login' % self.domain, data=post) - print("Logged in as", login_details[0]) + logger.info("Logged in as %s", login_details[0]) def extract(self, url): soup = self._soup(url) @@ -47,7 +50,7 @@ class XenForo(Site): if not href.startswith('http'): href = base + href title = str(mark.string).strip() - print("Fetching chapter", title, href) + logger.info("Fetching chapter \"%s\" @ %s", title, href) chapter = Chapter(title=title, contents="") contents, post_date = self._chapter(href, idx) chapter.contents = contents @@ -63,7 +66,7 @@ class XenForo(Site): try: return self._chapter_list_threadmarks(url) except SiteException as e: - print("Tried threadmarks", e.args) + logger.debug("Tried threadmarks (%r)", e.args) return self._chapter_list_index(url) def _chapter_list_threadmarks(self, url):