1
0
Fork 0
mirror of https://github.com/kemayo/leech synced 2026-04-20 11:30:48 +02:00

A PEP8 run-through

This commit is contained in:
David Lynch 2014-04-28 15:15:53 -05:00
parent 64f23080a3
commit 37f20415ec
4 changed files with 16 additions and 7 deletions

11
epub.py
View file

@ -12,6 +12,7 @@ a bit of metadata thrown in for good measure.
This totally started from http://www.manuel-strehl.de/dev/simple_epub_ebooks_with_python.en.html
"""
def make_epub(filename, html_files, meta):
unique_id = meta.get('unique_id', False)
if not unique_id:
@ -25,13 +26,13 @@ def make_epub(filename, html_files, meta):
# We need an index file, that lists all other HTML files
# This index file itself is referenced in the META_INF/container.xml
# file
container = etree.Element('container', version = "1.0", xmlns="urn:oasis:names:tc:opendocument:xmlns:container")
container = etree.Element('container', version="1.0", xmlns="urn:oasis:names:tc:opendocument:xmlns:container")
rootfiles = etree.SubElement(container, 'rootfiles')
etree.SubElement(rootfiles, 'rootfile', {
'full-path': "OEBPS/Content.opf",
'media-type': "application/oebps-package+xml",
})
epub.writestr("META-INF/container.xml", etree.tostring(container));
epub.writestr("META-INF/container.xml", etree.tostring(container))
package = etree.Element('package', {
'version': "2.0",
@ -70,7 +71,7 @@ def make_epub(filename, html_files, meta):
# Write each HTML file to the ebook, collect information for the index
for i, html in enumerate(html_files):
basename = os.path.basename(html[1])
file_id = 'file_%d' % (i+1)
file_id = 'file_%d' % (i + 1)
etree.SubElement(manifest, 'item', {
'id': file_id,
'href': basename,
@ -86,9 +87,9 @@ def make_epub(filename, html_files, meta):
# and add the actual html to the zip
if html[2]:
epub.writestr('OEBPS/'+basename, html[2])
epub.writestr('OEBPS/' + basename, html[2])
else:
epub.write(html[1], 'OEBPS/'+basename)
epub.write(html[1], 'OEBPS/' + basename)
# ...and add the ncx to the manifest
etree.SubElement(manifest, 'item', {

View file

@ -9,10 +9,11 @@ from urllib.request import Request, urlopen
__version__ = 1
USER_AGENT = 'Leech/%s +http://davidlynch.org' % __version__
class Fetch:
"""A store for values by date, sqlite-backed"""
def __init__(self, storepath, cachetime = "+1 day"):
def __init__(self, storepath, cachetime="+1 day"):
"""Initializes the store; creates tables if required
storepath is the path to a sqlite database, and will be created
@ -58,6 +59,7 @@ class Fetch:
self.store.commit()
c.close()
def _fetch(url, data=None, ungzip=True):
"""A generic URL-fetcher, which handles gzipped content, returns a string"""
request = Request(url)

View file

@ -21,6 +21,7 @@ html_template = '''<?xml version="1.0" encoding="UTF-8" standalone="no"?>
</html>
'''
def leech(url, filename=None):
# we have: a page, which could be absolutely any part of a story, or not a story at all
# check a bunch of things which are completely ff.n specific, to get text from it
@ -39,7 +40,7 @@ def leech(url, filename=None):
}
html = []
for i, chapter in enumerate(story['chapters']):
html.append((chapter[0], 'chapter%d.html' % (i+1), html_template.format(title=chapter[0], text=chapter[1])))
html.append((chapter[0], 'chapter%d.html' % (i + 1), html_template.format(title=chapter[0], text=chapter[1])))
filename = filename or story['title'] + '.epub'
@ -49,11 +50,13 @@ def leech(url, filename=None):
_sites = []
def _get_site(url):
for site in _sites:
if site.match(url):
return site
def _load_sites():
dirname = os.path.join(os.path.dirname(__file__), 'sites')
for f in os.listdir(dirname):

View file

@ -3,10 +3,12 @@
import re
from bs4 import BeautifulSoup
def match(url):
## e.g. https://www.fanfiction.net/s/4109686/3/Taking-Sights
return re.match(r'^https?://www\.fanfiction\.net/s/\d+/?.*', url)
def extract(url, fetch):
page = fetch(url)
soup = BeautifulSoup(page, 'html5lib')
@ -39,6 +41,7 @@ def extract(url, fetch):
return story
def _extract_chapter(url, title, fetch):
print("Extracting chapter from", url)
page = fetch(url)