leech/sites/deviantart.py

#!/usr/bin/python

import re
from bs4 import BeautifulSoup

from .stash import _extract_chapter


def match(url):
    # Need a collection page
    return re.match(r'^https?://[^.]+\.deviantart\.com/(?:gallery|favourites)/\d+/?', url)


def extract(url, fetch):
    page = fetch(url)
    soup = BeautifulSoup(page, 'html5lib')
    content = soup.find(id="output")
    if not content:
        return

    story = {}
    chapters = []

    if "gallery" in url:
        story['author'] = str(content.select('h1 a.u')[0].string)
    else:
        authors = set(str(author.string) for author in content.select('.stream .details a.u'))
        story['author'] = ', '.join(authors)

    story['title'] = str(content.find(class_="folder-title").string)

    thumbs = content.select(".stream a.thumb")
    if not thumbs:
        return
    for thumb in thumbs:
        try:
            if thumb['href'] is not '#':
                chapters.append(_extract_chapter(thumb['href'], fetch))
        except Exception as e:
            print(e)

    story['chapters'] = chapters

    return story