From d532962696597ff16129012c2641a703f350e6ae Mon Sep 17 00:00:00 2001 From: David Lynch Date: Mon, 28 Apr 2014 19:10:04 -0500 Subject: [PATCH] Support deviantart favourites / galleries --- sites/deviantart.py | 44 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) create mode 100644 sites/deviantart.py diff --git a/sites/deviantart.py b/sites/deviantart.py new file mode 100644 index 0000000..01d024e --- /dev/null +++ b/sites/deviantart.py @@ -0,0 +1,44 @@ +#!/usr/bin/python + +import re +from bs4 import BeautifulSoup + +from .stash import _extract_chapter + + +def match(url): + # Need a collection page + return re.match(r'^https?://[^.]+\.deviantart\.com/(?:gallery|favourites)/\d+/?', url) + + +def extract(url, fetch): + page = fetch(url) + soup = BeautifulSoup(page, 'html5lib') + content = soup.find(id="output") + if not content: + return + + story = {} + chapters = [] + + if "gallery" in url: + story['author'] = str(content.select('h1 a.u')[0].string) + else: + authors = set(str(author.string) for author in content.select('.stream .details a.u')) + story['author'] = ', '.join(authors) + + story['title'] = str(content.find(class_="folder-title").string) + + thumbs = content.select(".stream a.thumb") + if not thumbs: + return + for thumb in thumbs: + try: + if thumb['href'] is not '#': + chapters.append(_extract_chapter(thumb['href'], fetch)) + except Exception as e: + print(e) + + story['chapters'] = chapters + + return story