From 6d52c72c991e7055b8505857122fc1eb5203905b Mon Sep 17 00:00:00 2001
From: David Lynch <kemayo@gmail.com>
Date: Sat, 4 Nov 2017 00:09:09 -0500
Subject: [PATCH] Use logging instead of print

Fixes #10
---
 leech.py               | 20 ++++++++++++++++----
 sites/__init__.py      |  5 ++++-
 sites/ao3.py           |  5 ++++-
 sites/arbitrary.py     |  5 ++++-
 sites/deviantart.py    |  5 ++++-
 sites/fanfictionnet.py |  7 +++++--
 sites/fictionlive.py   |  5 ++++-
 sites/stash.py         |  7 +++++--
 sites/xenforo.py       |  9 ++++++---
 9 files changed, 52 insertions(+), 16 deletions(-)

diff --git a/leech.py b/leech.py
index b73c26e..9cc1be5 100755
--- a/leech.py
+++ b/leech.py
@@ -4,6 +4,7 @@ import argparse
 import sys
 import json
 import http.cookiejar
+import logging
 
 import sites
 import ebook
@@ -14,6 +15,8 @@ import requests_cache
 __version__ = 1
 USER_AGENT = 'Leech/%s +http://davidlynch.org' % __version__
 
+logger = logging.getLogger(__name__)
+
 
 def leech(url, session, filename=None, args=None):
     # we have: a page, which could be absolutely any part of a story, or not a story at all
@@ -22,7 +25,7 @@ def leech(url, session, filename=None, args=None):
     if not site:
         raise Exception("No site handler found")
 
-    print("Handler", site, url)
+    logger.info("Handler: %s (%s)", site, url)
 
     handler = site(session, args=args)
 
@@ -48,13 +51,22 @@ if __name__ == '__main__':
     parser.add_argument('--filename', help="output filename (the title is used if this isn't provided)")
     parser.add_argument('--no-cache', dest='cache', action='store_false')
     parser.add_argument('--flush', dest='flush', action='store_true')
-    parser.set_defaults(cache=True, flush=False)
+    parser.add_argument('-v', '--verbose', help="verbose output", action='store_true', dest='verbose')
+    parser.set_defaults(cache=True, flush=False, verbose=False)
     args, extra_args = parser.parse_known_args()
 
+    if args.verbose:
+        logging.basicConfig(level=logging.DEBUG)
+    else:
+        logging.basicConfig(
+            level=logging.INFO,
+            format="[%(name)s] %(message)s"
+        )
+
     if args.flush:
         requests_cache.install_cache('leech')
         requests_cache.clear()
-        print("Flushed cache")
+        logger.info("Flushed cache")
         sys.exit()
 
     if not args.url:
@@ -76,4 +88,4 @@ if __name__ == '__main__':
     })
 
     filename = leech(args.url, filename=args.filename, session=session, args=extra_args)
-    print("File created:", filename)
+    logger.info("File created: %s", filename)
diff --git a/sites/__init__.py b/sites/__init__.py
index 24161be..9a91f4f 100644
--- a/sites/__init__.py
+++ b/sites/__init__.py
@@ -4,9 +4,12 @@ import os
 import argparse
 import uuid
 import time
+import logging
 import attr
 from bs4 import BeautifulSoup
 
+logger = logging.getLogger(__name__)
+logger.addHandler(logging.NullHandler())
 _sites = []
 
 
@@ -104,7 +107,7 @@ class Site:
                 delay = retry_delay
                 if page.headers['Retry-After']:
                     delay = int(page.headers['Retry-After'])
-                print("Load failed: waiting {}s to retry ({})".format(delay, page))
+                logger.warning("Load failed: waiting %s to retry (%s)", delay, page)
                 time.sleep(delay)
                 return self._soup(url, method=method, retry=retry - 1, retry_delay=retry_delay, **kw)
             raise SiteException("Couldn't fetch", url)
diff --git a/sites/ao3.py b/sites/ao3.py
index 4523ae6..bce4e61 100644
--- a/sites/ao3.py
+++ b/sites/ao3.py
@@ -1,9 +1,12 @@
 #!/usr/bin/python
 
+import logging
 import datetime
 import re
 from . import register, Site, Section, Chapter
 
+logger = logging.getLogger(__name__)
+
 
 @register
 class ArchiveOfOurOwn(Site):
@@ -46,7 +49,7 @@ class ArchiveOfOurOwn(Site):
         return story
 
     def _chapter(self, url):
-        print("Extracting chapter from", url)
+        logger.info("Extracting chapter @ %s", url)
         soup = self._soup(url)
         content = soup.find('div', role='article')
 
diff --git a/sites/arbitrary.py b/sites/arbitrary.py
index 195faee..1989bc6 100644
--- a/sites/arbitrary.py
+++ b/sites/arbitrary.py
@@ -1,5 +1,6 @@
 #!/usr/bin/python
 
+import logging
 import attr
 import datetime
 import json
@@ -7,6 +8,8 @@ import os.path
 import urllib
 from . import register, Site, Section, Chapter
 
+logger = logging.getLogger(__name__)
+
 """
 Example JSON:
 {
@@ -75,7 +78,7 @@ class Arbitrary(Site):
     def _chapter(self, url, definition):
         # TODO: refactor so this can meaningfully handle multiple matches on content_selector.
         # Probably by changing it so that this returns a Chapter / Section.
-        print("Extracting chapter from", url)
+        logger.info("Extracting chapter @ %s", url)
         soup = self._soup(url)
         content = soup.select(definition.content_selector)[0]
 
diff --git a/sites/deviantart.py b/sites/deviantart.py
index bb2775a..df30e92 100644
--- a/sites/deviantart.py
+++ b/sites/deviantart.py
@@ -1,10 +1,13 @@
 #!/usr/bin/python
 
+import logging
 import re
 
 from . import register, Section
 from .stash import Stash
 
+logger = logging.getLogger(__name__)
+
 
 @register
 class DeviantArt(Stash):
@@ -41,6 +44,6 @@ class DeviantArt(Stash):
                 if thumb['href'] is not '#':
                     story.add(self._chapter(thumb['href']))
             except Exception as e:
-                print(e)
+                logger.exception("Couldn't extract chapters from thumbs")
 
         return story
diff --git a/sites/fanfictionnet.py b/sites/fanfictionnet.py
index c3a6792..0da64ae 100644
--- a/sites/fanfictionnet.py
+++ b/sites/fanfictionnet.py
@@ -1,9 +1,12 @@
 #!/usr/bin/python
 
+import logging
 import datetime
 import re
 from . import register, Site, SiteException, Section, Chapter
 
+logger = logging.getLogger(__name__)
+
 
 @register
 class FanFictionNet(Site):
@@ -59,7 +62,7 @@ class FanFictionNet(Site):
         return story
 
     def _chapter(self, url):
-        print("Extracting chapter from", url)
+        logger.info("Fetching chapter @ %s", url)
         soup = self._soup(url)
 
         content = soup.find(id="content_wrapper_inner")
@@ -74,7 +77,7 @@ class FanFictionNet(Site):
             for tag in text.find_all(True):
                 tag.attrs = None
         except Exception as e:
-            print("Trouble cleaning attributes", e)
+            logger.exception("Trouble cleaning attributes")
 
         return text.prettify()
 
diff --git a/sites/fictionlive.py b/sites/fictionlive.py
index 6d588ba..a7d8fae 100644
--- a/sites/fictionlive.py
+++ b/sites/fictionlive.py
@@ -1,10 +1,13 @@
 #!/usr/bin/python
 
+import logging
 import itertools
 import datetime
 import re
 from . import register, Site, Section, Chapter
 
+logger = logging.getLogger(__name__)
+
 
 @register
 class FictionLive(Site):
@@ -40,7 +43,7 @@ class FictionLive(Site):
             # https://fiction.live/api/anonkun/chapters/SBBA49fQavNQMWxFT/1502823848216/9999999999999998
             # i.e. format is [current timestamp] / [next timestamp - 1]
             chapter_url = 'https://fiction.live/api/anonkun/chapters/{}/{}/{}'.format(workid, currc['ct'], nextc['ct'] - 1)
-            print("Extracting chapter from", chapter_url)
+            logger.info("Extracting chapter \"%s\" @ %s", currc['title'], chapter_url)
             data = self.session.get(chapter_url).json()
             html = []
 
diff --git a/sites/stash.py b/sites/stash.py
index e7487b6..9c77b83 100644
--- a/sites/stash.py
+++ b/sites/stash.py
@@ -1,9 +1,12 @@
 #!/usr/bin/python
 
+import logging
 import datetime
 import re
 from . import register, Site, SiteException, Section, Chapter
 
+logger = logging.getLogger(__name__)
+
 
 @register
 class Stash(Site):
@@ -35,12 +38,12 @@ class Stash(Site):
                 if thumb['href'] is not '#':
                     story.add(self._chapter(thumb['href']))
             except Exception as e:
-                print(e)
+                logger.exception("Couldn't extract chapters from thumbs")
 
         return story
 
     def _chapter(self, url):
-        print("Extracting chapter from", url)
+        logger.info("Fetching chapter @ %s", url)
         soup = self._soup(url)
 
         content = soup.find(class_="journal-wrapper")
diff --git a/sites/xenforo.py b/sites/xenforo.py
index 9c94401..aa530eb 100644
--- a/sites/xenforo.py
+++ b/sites/xenforo.py
@@ -2,8 +2,11 @@
 
 import datetime
 import re
+import logging
 from . import register, Site, SiteException, Section, Chapter
 
+logger = logging.getLogger(__name__)
+
 
 class XenForo(Site):
     """XenForo is forum software that powers a number of fiction-related forums."""
@@ -23,7 +26,7 @@ class XenForo(Site):
             'password': login_details[1],
         }
         self.session.post('https://%s/login/login' % self.domain, data=post)
-        print("Logged in as", login_details[0])
+        logger.info("Logged in as %s", login_details[0])
 
     def extract(self, url):
         soup = self._soup(url)
@@ -47,7 +50,7 @@ class XenForo(Site):
             if not href.startswith('http'):
                 href = base + href
             title = str(mark.string).strip()
-            print("Fetching chapter", title, href)
+            logger.info("Fetching chapter \"%s\" @ %s", title, href)
             chapter = Chapter(title=title, contents="")
             contents, post_date = self._chapter(href, idx)
             chapter.contents = contents
@@ -63,7 +66,7 @@ class XenForo(Site):
         try:
             return self._chapter_list_threadmarks(url)
         except SiteException as e:
-            print("Tried threadmarks", e.args)
+            logger.debug("Tried threadmarks (%r)", e.args)
             return self._chapter_list_index(url)
 
     def _chapter_list_threadmarks(self, url):