mirror of
https://github.com/kemayo/leech
synced 2026-04-20 11:30:48 +02:00
A PEP8 run-through
This commit is contained in:
parent
64f23080a3
commit
37f20415ec
4 changed files with 16 additions and 7 deletions
11
epub.py
11
epub.py
|
|
@ -12,6 +12,7 @@ a bit of metadata thrown in for good measure.
|
|||
This totally started from http://www.manuel-strehl.de/dev/simple_epub_ebooks_with_python.en.html
|
||||
"""
|
||||
|
||||
|
||||
def make_epub(filename, html_files, meta):
|
||||
unique_id = meta.get('unique_id', False)
|
||||
if not unique_id:
|
||||
|
|
@ -25,13 +26,13 @@ def make_epub(filename, html_files, meta):
|
|||
# We need an index file, that lists all other HTML files
|
||||
# This index file itself is referenced in the META_INF/container.xml
|
||||
# file
|
||||
container = etree.Element('container', version = "1.0", xmlns="urn:oasis:names:tc:opendocument:xmlns:container")
|
||||
container = etree.Element('container', version="1.0", xmlns="urn:oasis:names:tc:opendocument:xmlns:container")
|
||||
rootfiles = etree.SubElement(container, 'rootfiles')
|
||||
etree.SubElement(rootfiles, 'rootfile', {
|
||||
'full-path': "OEBPS/Content.opf",
|
||||
'media-type': "application/oebps-package+xml",
|
||||
})
|
||||
epub.writestr("META-INF/container.xml", etree.tostring(container));
|
||||
epub.writestr("META-INF/container.xml", etree.tostring(container))
|
||||
|
||||
package = etree.Element('package', {
|
||||
'version': "2.0",
|
||||
|
|
@ -70,7 +71,7 @@ def make_epub(filename, html_files, meta):
|
|||
# Write each HTML file to the ebook, collect information for the index
|
||||
for i, html in enumerate(html_files):
|
||||
basename = os.path.basename(html[1])
|
||||
file_id = 'file_%d' % (i+1)
|
||||
file_id = 'file_%d' % (i + 1)
|
||||
etree.SubElement(manifest, 'item', {
|
||||
'id': file_id,
|
||||
'href': basename,
|
||||
|
|
@ -86,9 +87,9 @@ def make_epub(filename, html_files, meta):
|
|||
|
||||
# and add the actual html to the zip
|
||||
if html[2]:
|
||||
epub.writestr('OEBPS/'+basename, html[2])
|
||||
epub.writestr('OEBPS/' + basename, html[2])
|
||||
else:
|
||||
epub.write(html[1], 'OEBPS/'+basename)
|
||||
epub.write(html[1], 'OEBPS/' + basename)
|
||||
|
||||
# ...and add the ncx to the manifest
|
||||
etree.SubElement(manifest, 'item', {
|
||||
|
|
|
|||
4
fetch.py
4
fetch.py
|
|
@ -9,10 +9,11 @@ from urllib.request import Request, urlopen
|
|||
__version__ = 1
|
||||
USER_AGENT = 'Leech/%s +http://davidlynch.org' % __version__
|
||||
|
||||
|
||||
class Fetch:
|
||||
"""A store for values by date, sqlite-backed"""
|
||||
|
||||
def __init__(self, storepath, cachetime = "+1 day"):
|
||||
def __init__(self, storepath, cachetime="+1 day"):
|
||||
"""Initializes the store; creates tables if required
|
||||
|
||||
storepath is the path to a sqlite database, and will be created
|
||||
|
|
@ -58,6 +59,7 @@ class Fetch:
|
|||
self.store.commit()
|
||||
c.close()
|
||||
|
||||
|
||||
def _fetch(url, data=None, ungzip=True):
|
||||
"""A generic URL-fetcher, which handles gzipped content, returns a string"""
|
||||
request = Request(url)
|
||||
|
|
|
|||
5
leech.py
5
leech.py
|
|
@ -21,6 +21,7 @@ html_template = '''<?xml version="1.0" encoding="UTF-8" standalone="no"?>
|
|||
</html>
|
||||
'''
|
||||
|
||||
|
||||
def leech(url, filename=None):
|
||||
# we have: a page, which could be absolutely any part of a story, or not a story at all
|
||||
# check a bunch of things which are completely ff.n specific, to get text from it
|
||||
|
|
@ -39,7 +40,7 @@ def leech(url, filename=None):
|
|||
}
|
||||
html = []
|
||||
for i, chapter in enumerate(story['chapters']):
|
||||
html.append((chapter[0], 'chapter%d.html' % (i+1), html_template.format(title=chapter[0], text=chapter[1])))
|
||||
html.append((chapter[0], 'chapter%d.html' % (i + 1), html_template.format(title=chapter[0], text=chapter[1])))
|
||||
|
||||
filename = filename or story['title'] + '.epub'
|
||||
|
||||
|
|
@ -49,11 +50,13 @@ def leech(url, filename=None):
|
|||
|
||||
_sites = []
|
||||
|
||||
|
||||
def _get_site(url):
|
||||
for site in _sites:
|
||||
if site.match(url):
|
||||
return site
|
||||
|
||||
|
||||
def _load_sites():
|
||||
dirname = os.path.join(os.path.dirname(__file__), 'sites')
|
||||
for f in os.listdir(dirname):
|
||||
|
|
|
|||
|
|
@ -3,10 +3,12 @@
|
|||
import re
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
|
||||
def match(url):
|
||||
## e.g. https://www.fanfiction.net/s/4109686/3/Taking-Sights
|
||||
return re.match(r'^https?://www\.fanfiction\.net/s/\d+/?.*', url)
|
||||
|
||||
|
||||
def extract(url, fetch):
|
||||
page = fetch(url)
|
||||
soup = BeautifulSoup(page, 'html5lib')
|
||||
|
|
@ -39,6 +41,7 @@ def extract(url, fetch):
|
|||
|
||||
return story
|
||||
|
||||
|
||||
def _extract_chapter(url, title, fetch):
|
||||
print("Extracting chapter from", url)
|
||||
page = fetch(url)
|
||||
|
|
|
|||
Loading…
Reference in a new issue