diff --git a/calibre-plugin/__init__.py b/calibre-plugin/__init__.py index adb1f031..f5f2de8e 100644 --- a/calibre-plugin/__init__.py +++ b/calibre-plugin/__init__.py @@ -27,7 +27,7 @@ class FanFictionDownLoaderBase(InterfaceActionBase): description = 'UI plugin to download FanFiction stories from various sites.' supported_platforms = ['windows', 'osx', 'linux'] author = 'Jim Miller' - version = (1, 5, 32) + version = (1, 5, 33) minimum_calibre_version = (0, 8, 30) #: This field defines the GUI plugin class that contains all the code diff --git a/calibre-plugin/dialogs.py b/calibre-plugin/dialogs.py index f2b1b9a9..977ba26c 100644 --- a/calibre-plugin/dialogs.py +++ b/calibre-plugin/dialogs.py @@ -177,6 +177,43 @@ class FakeLineEdit(): def text(self): pass +class CollectURLDialog(QDialog): + ''' + Collect single url for get urls. + ''' + def __init__(self, gui, title): + QDialog.__init__(self, gui) + self.gui = gui + self.status=False + + self.l = QGridLayout() + self.setLayout(self.l) + + self.setWindowTitle(title) + self.l.addWidget(QLabel(title),0,0,1,2) + + self.l.addWidget(QLabel("URL:"),1,0) + self.url = QLineEdit(self) + self.l.addWidget(self.url,1,1) + + self.ok_button = QPushButton('OK', self) + self.ok_button.clicked.connect(self.ok) + self.l.addWidget(self.ok_button,2,0) + + self.cancel_button = QPushButton('Cancel', self) + self.cancel_button.clicked.connect(self.cancel) + self.l.addWidget(self.cancel_button,2,1) + + self.resize(self.sizeHint()) + + def ok(self): + self.status=True + self.hide() + + def cancel(self): + self.status=False + self.hide() + class UserPassDialog(QDialog): ''' Need to collect User/Pass for some sites. diff --git a/calibre-plugin/ffdl_plugin.py b/calibre-plugin/ffdl_plugin.py index e8e9b761..36c8e0b7 100644 --- a/calibre-plugin/ffdl_plugin.py +++ b/calibre-plugin/ffdl_plugin.py @@ -37,11 +37,12 @@ from calibre_plugins.fanfictiondownloader_plugin.common_utils import (set_plugin from calibre_plugins.fanfictiondownloader_plugin.fanficdownloader import adapters, writers, exceptions from calibre_plugins.fanfictiondownloader_plugin.fanficdownloader.htmlcleanup import stripHTML from calibre_plugins.fanfictiondownloader_plugin.fanficdownloader.epubutils import get_dcsource, get_dcsource_chaptercount, get_story_url_from_html +from calibre_plugins.fanfictiondownloader_plugin.fanficdownloader.geturls import get_urls_from_page from calibre_plugins.fanfictiondownloader_plugin.config import (prefs, permitted_values) from calibre_plugins.fanfictiondownloader_plugin.dialogs import ( AddNewDialog, UpdateExistingDialog, display_story_list, DisplayStoryListDialog, - LoopProgressDialog, UserPassDialog, AboutDialog, + LoopProgressDialog, UserPassDialog, AboutDialog, CollectURLDialog, OVERWRITE, OVERWRITEALWAYS, UPDATE, UPDATEALWAYS, ADDNEW, SKIP, CALIBREONLY, NotGoingToDownload ) @@ -192,6 +193,11 @@ class FanFictionDownLoaderPlugin(InterfaceAction): shortcut_name='Get URLs from Selected Books', triggered=self.get_list_urls) + self.get_list_action = self.create_menu_item_ex(self.menu, 'Get Story URLs from Web Page', image='view.png', + unique_name='Get Story URLs from Web Page', + shortcut_name='Get Story URLs from Web Page', + triggered=self.get_urls_from_page) + self.menu.addSeparator() self.config_action = create_menu_action_unique(self, self.menu, '&Configure Plugin', shortcut=False, image= 'config.png', @@ -246,6 +252,26 @@ class FanFictionDownLoaderPlugin(InterfaceAction): (prefs['addtolists'] or prefs['addtoreadlists']) : self._update_reading_lists(self.gui.library_view.get_selected_ids(),add) + def get_urls_from_page(self): + d = CollectURLDialog(self.gui,"Get Story URLs from Web Page") + d.exec_() + if not d.status: + return + print("URL:%s"%d.url.text()) + + url_list = get_urls_from_page("%s"%d.url.text()) + + if url_list: + d = ViewLog(_("List of URLs"),"\n".join(url_list),parent=self.gui) + d.setWindowIcon(get_icon('bookmarks.png')) + d.exec_() + else: + info_dialog(self.gui, _('List of URLs'), + _('No Valid URLs found on given page.'), + show=True, + show_copy_button=False) + + def get_list_urls(self): if len(self.gui.library_view.get_selected_ids()) > 0: book_list = map( partial(self._convert_id_to_book, good=False), self.gui.library_view.get_selected_ids() ) @@ -498,7 +524,7 @@ make_firstimage_cover:true # find dups mi = MetaInformation(story.getMetadata("title", removeallentities=True), - (story.getMetadata("author", removeallentities=True),)) # author is a list. + [story.getMetadata("author", removeallentities=True)]) # author is a list. identicalbooks = db.find_identical_books(mi) ## removed for being overkill. # for ib in identicalbooks: @@ -784,7 +810,7 @@ make_firstimage_cover:true if epubmi.cover_data[1] is not None: db.set_cover(book_id, epubmi.cover_data[1]) - # set author link if found. All current adapters have authorUrl. + # set author link if found. All current adapters have authorUrl, except anonymous on AO3. if 'authorUrl' in book['all_metadata']: autid=db.get_author_id(book['author']) db.set_link_field_for_author(autid, unicode(book['all_metadata']['authorUrl']), @@ -926,7 +952,7 @@ make_firstimage_cover:true confirm(message,'fanfictiondownloader_no_reading_list_%s'%l, self.gui) def _find_existing_book_id(self,db,book,matchurl=True): - mi = MetaInformation(book["title"],(book["author"],)) # author is a list. + mi = MetaInformation(book["title"],[book["author"]]) # author is a list. identicalbooks = db.find_identical_books(mi) if matchurl: # only *really* identical if URL matches, too. for ib in identicalbooks: @@ -937,7 +963,7 @@ make_firstimage_cover:true return None def _make_mi_from_book(self,book): - mi = MetaInformation(book['title'],(book['author'],)) # author is a list. + mi = MetaInformation(book['title'],[book['author']]) # author is a list. mi.set_identifiers({'url':book['url']}) mi.publisher = book['publisher'] mi.tags = book['tags'] diff --git a/calibre-plugin/jobs.py b/calibre-plugin/jobs.py index 1a313159..6325dd75 100644 --- a/calibre-plugin/jobs.py +++ b/calibre-plugin/jobs.py @@ -87,7 +87,15 @@ def do_download_worker(book_list, options, print(job.details) if count >= total: - # All done! + # All done! Output some lists for convenience of some users. + print("Successfully downloaded:") + for book in book_list: + if book['good']: + print(book['title']) + print("\nUnsuccessful:") + for book in book_list: + if not book['good']: + print(book['title']) break server.close() @@ -117,10 +125,10 @@ def do_download_for_worker(book,options): story = adapter.getStoryMetadataOnly() if 'calibre_series' in book: - print("calibre_series:%s [%d]"%book['calibre_series']) + # print("calibre_series:%s [%d]"%book['calibre_series']) adapter.setSeries(book['calibre_series'][0],book['calibre_series'][1]) - else: - print("no calibre_series") + # else: + # print("no calibre_series") writer = writers.getWriter(options['fileform'],adapter.config,adapter) outfile = book['outfile'] diff --git a/defaults.ini b/defaults.ini index cd30ac9e..cb3ee5ec 100644 --- a/defaults.ini +++ b/defaults.ini @@ -464,6 +464,9 @@ cover_exclusion_regexp:/images/.*?ribbon.gif [www.dokuga.com] [www.fanfiction.net] +## fanfiction.net's 'cover' images are really just tiny thumbnails. +## Comment this out or change it to false to use them anyway. +never_make_cover: true [www.ficbook.net] diff --git a/downloader.py b/downloader.py index 8f405e88..abc3781d 100644 --- a/downloader.py +++ b/downloader.py @@ -17,7 +17,7 @@ import logging ## XXX cli option for logging level. -logging.basicConfig(level=logging.DEBUG,format="%(levelname)s:%(filename)s(%(lineno)d):%(message)s") +logging.basicConfig(level=logging.INFO,format="%(levelname)s:%(filename)s(%(lineno)d):%(message)s") import sys, os from os.path import normpath, expanduser, isfile, join @@ -30,6 +30,7 @@ from subprocess import call from fanficdownloader import adapters,writers,exceptions from fanficdownloader.epubutils import get_dcsource_chaptercount, get_update_data +from fanficdownloader.geturls import get_urls_from_page if sys.version_info < (2, 5): print "This program requires Python 2.5 or newer." @@ -70,6 +71,9 @@ def main(): parser.add_option("--force", action="store_true", dest="force", help="Force overwrite or update of an existing epub, download and overwrite all chapters.",) + parser.add_option("-l", "--list", + action="store_true", dest="list", + help="Get list of valid story URLs from page given.",) (options, args) = parser.parse_args() @@ -116,6 +120,12 @@ def main(): (var,val) = opt.split('=') config.set("overrides",var,val) + if options.list: + retlist = get_urls_from_page(args[0]) + print "\n".join(retlist) + + return + try: ## Attempt to update an existing epub. if options.update: @@ -202,7 +212,7 @@ def main(): print dne except exceptions.UnknownSite, us: print us - + if __name__ == "__main__": #import time #start = time.time() diff --git a/fanficdownloader/adapters/adapter_fanfictionnet.py b/fanficdownloader/adapters/adapter_fanfictionnet.py index 47996c7d..7e57f0c3 100644 --- a/fanficdownloader/adapters/adapter_fanfictionnet.py +++ b/fanficdownloader/adapters/adapter_fanfictionnet.py @@ -242,6 +242,11 @@ class FanFictionNetSiteAdapter(BaseSiteAdapter): self.story.setMetadata('status', 'Completed') else: self.story.setMetadata('status', 'In-Progress') + + img = soup.find('img',{'class':'cimage'}) + if img: + self.story.addImgUrl(self,url,img['src'],self._fetchUrlRaw,cover=True) + return def getChapterText(self, url): diff --git a/fanficdownloader/geturls.py b/fanficdownloader/geturls.py new file mode 100644 index 00000000..65bf9398 --- /dev/null +++ b/fanficdownloader/geturls.py @@ -0,0 +1,76 @@ +# -*- coding: utf-8 -*- + +# Copyright 2012 Fanficdownloader team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + + +import urlparse +import urllib2 as u2 +import ConfigParser + +from BeautifulSoup import BeautifulSoup +from gziphttp import GZipProcessor + +import adapters + +def get_urls_from_page(url): + + opener = u2.build_opener(u2.HTTPCookieProcessor(),GZipProcessor()) + soup = BeautifulSoup(opener.open(url).read()) + + normalized = set() # normalized url + retlist = [] # orig urls. + config = ConfigParser.SafeConfigParser() + + for a in soup.findAll('a'): + if a.has_key('href'): + href = form_url(url,a['href']) + try: + adapter = adapters.getAdapter(config,href,"EPUB") + if adapter.story.getMetadata('storyUrl') not in normalized: + normalized.add(adapter.story.getMetadata('storyUrl')) + retlist.append(href) + except: + pass + + return retlist + +def form_url(parenturl,url): + url = url.strip() # ran across an image with a space in the + # src. Browser handled it, so we'd better, too. + + if "//" in url or parenturl == None: + returl = url + else: + parsedUrl = urlparse.urlparse(parenturl) + if url.startswith("/") : + returl = urlparse.urlunparse( + (parsedUrl.scheme, + parsedUrl.netloc, + url, + '','','')) + else: + toppath="" + if parsedUrl.path.endswith("/"): + toppath = parsedUrl.path + else: + toppath = parsedUrl.path[:parsedUrl.path.rindex('/')] + returl = urlparse.urlunparse( + (parsedUrl.scheme, + parsedUrl.netloc, + toppath + '/' + url, + '','','')) + return returl + diff --git a/plugin-defaults.ini b/plugin-defaults.ini index a8422568..557583ad 100644 --- a/plugin-defaults.ini +++ b/plugin-defaults.ini @@ -449,6 +449,9 @@ cover_exclusion_regexp:/images/.*?ribbon.gif [www.dokuga.com] [www.fanfiction.net] +## fanfiction.net's 'cover' images are really just tiny thumbnails. +## Comment this out or change it to false to use them anyway. +never_make_cover: true [www.ficbook.net]