mirror of
https://github.com/JimmXinu/FanFicFare.git
synced 2025-12-22 16:54:06 +01:00
Add 'Get Story URLs from Web Page' feature to plugin and CLI(-l).
This commit is contained in:
parent
37a9446162
commit
321daf7de9
4 changed files with 156 additions and 7 deletions
|
|
@ -177,6 +177,43 @@ class FakeLineEdit():
|
|||
def text(self):
|
||||
pass
|
||||
|
||||
class CollectURLDialog(QDialog):
|
||||
'''
|
||||
Collect single url for get urls.
|
||||
'''
|
||||
def __init__(self, gui, title):
|
||||
QDialog.__init__(self, gui)
|
||||
self.gui = gui
|
||||
self.status=False
|
||||
|
||||
self.l = QGridLayout()
|
||||
self.setLayout(self.l)
|
||||
|
||||
self.setWindowTitle(title)
|
||||
self.l.addWidget(QLabel(title),0,0,1,2)
|
||||
|
||||
self.l.addWidget(QLabel("URL:"),1,0)
|
||||
self.url = QLineEdit(self)
|
||||
self.l.addWidget(self.url,1,1)
|
||||
|
||||
self.ok_button = QPushButton('OK', self)
|
||||
self.ok_button.clicked.connect(self.ok)
|
||||
self.l.addWidget(self.ok_button,2,0)
|
||||
|
||||
self.cancel_button = QPushButton('Cancel', self)
|
||||
self.cancel_button.clicked.connect(self.cancel)
|
||||
self.l.addWidget(self.cancel_button,2,1)
|
||||
|
||||
self.resize(self.sizeHint())
|
||||
|
||||
def ok(self):
|
||||
self.status=True
|
||||
self.hide()
|
||||
|
||||
def cancel(self):
|
||||
self.status=False
|
||||
self.hide()
|
||||
|
||||
class UserPassDialog(QDialog):
|
||||
'''
|
||||
Need to collect User/Pass for some sites.
|
||||
|
|
|
|||
|
|
@ -37,11 +37,12 @@ from calibre_plugins.fanfictiondownloader_plugin.common_utils import (set_plugin
|
|||
from calibre_plugins.fanfictiondownloader_plugin.fanficdownloader import adapters, writers, exceptions
|
||||
from calibre_plugins.fanfictiondownloader_plugin.fanficdownloader.htmlcleanup import stripHTML
|
||||
from calibre_plugins.fanfictiondownloader_plugin.fanficdownloader.epubutils import get_dcsource, get_dcsource_chaptercount, get_story_url_from_html
|
||||
from calibre_plugins.fanfictiondownloader_plugin.fanficdownloader.geturls import get_urls_from_page
|
||||
|
||||
from calibre_plugins.fanfictiondownloader_plugin.config import (prefs, permitted_values)
|
||||
from calibre_plugins.fanfictiondownloader_plugin.dialogs import (
|
||||
AddNewDialog, UpdateExistingDialog, display_story_list, DisplayStoryListDialog,
|
||||
LoopProgressDialog, UserPassDialog, AboutDialog,
|
||||
LoopProgressDialog, UserPassDialog, AboutDialog, CollectURLDialog,
|
||||
OVERWRITE, OVERWRITEALWAYS, UPDATE, UPDATEALWAYS, ADDNEW, SKIP, CALIBREONLY,
|
||||
NotGoingToDownload )
|
||||
|
||||
|
|
@ -192,6 +193,11 @@ class FanFictionDownLoaderPlugin(InterfaceAction):
|
|||
shortcut_name='Get URLs from Selected Books',
|
||||
triggered=self.get_list_urls)
|
||||
|
||||
self.get_list_action = self.create_menu_item_ex(self.menu, 'Get Story URLs from Web Page', image='view.png',
|
||||
unique_name='Get Story URLs from Web Page',
|
||||
shortcut_name='Get Story URLs from Web Page',
|
||||
triggered=self.get_urls_from_page)
|
||||
|
||||
self.menu.addSeparator()
|
||||
self.config_action = create_menu_action_unique(self, self.menu, '&Configure Plugin', shortcut=False,
|
||||
image= 'config.png',
|
||||
|
|
@ -246,6 +252,26 @@ class FanFictionDownLoaderPlugin(InterfaceAction):
|
|||
(prefs['addtolists'] or prefs['addtoreadlists']) :
|
||||
self._update_reading_lists(self.gui.library_view.get_selected_ids(),add)
|
||||
|
||||
def get_urls_from_page(self):
|
||||
d = CollectURLDialog(self.gui,"Get Story URLs from Web Page")
|
||||
d.exec_()
|
||||
if not d.status:
|
||||
return
|
||||
print("URL:%s"%d.url.text())
|
||||
|
||||
url_list = get_urls_from_page("%s"%d.url.text())
|
||||
|
||||
if url_list:
|
||||
d = ViewLog(_("List of URLs"),"\n".join(url_list),parent=self.gui)
|
||||
d.setWindowIcon(get_icon('bookmarks.png'))
|
||||
d.exec_()
|
||||
else:
|
||||
info_dialog(self.gui, _('List of URLs'),
|
||||
_('No Valid URLs found on given page.'),
|
||||
show=True,
|
||||
show_copy_button=False)
|
||||
|
||||
|
||||
def get_list_urls(self):
|
||||
if len(self.gui.library_view.get_selected_ids()) > 0:
|
||||
book_list = map( partial(self._convert_id_to_book, good=False), self.gui.library_view.get_selected_ids() )
|
||||
|
|
@ -498,7 +524,7 @@ make_firstimage_cover:true
|
|||
|
||||
# find dups
|
||||
mi = MetaInformation(story.getMetadata("title", removeallentities=True),
|
||||
(story.getMetadata("author", removeallentities=True),)) # author is a list.
|
||||
[story.getMetadata("author", removeallentities=True)]) # author is a list.
|
||||
identicalbooks = db.find_identical_books(mi)
|
||||
## removed for being overkill.
|
||||
# for ib in identicalbooks:
|
||||
|
|
@ -784,7 +810,7 @@ make_firstimage_cover:true
|
|||
if epubmi.cover_data[1] is not None:
|
||||
db.set_cover(book_id, epubmi.cover_data[1])
|
||||
|
||||
# set author link if found. All current adapters have authorUrl.
|
||||
# set author link if found. All current adapters have authorUrl, except anonymous on AO3.
|
||||
if 'authorUrl' in book['all_metadata']:
|
||||
autid=db.get_author_id(book['author'])
|
||||
db.set_link_field_for_author(autid, unicode(book['all_metadata']['authorUrl']),
|
||||
|
|
@ -926,7 +952,7 @@ make_firstimage_cover:true
|
|||
confirm(message,'fanfictiondownloader_no_reading_list_%s'%l, self.gui)
|
||||
|
||||
def _find_existing_book_id(self,db,book,matchurl=True):
|
||||
mi = MetaInformation(book["title"],(book["author"],)) # author is a list.
|
||||
mi = MetaInformation(book["title"],[book["author"]]) # author is a list.
|
||||
identicalbooks = db.find_identical_books(mi)
|
||||
if matchurl: # only *really* identical if URL matches, too.
|
||||
for ib in identicalbooks:
|
||||
|
|
@ -937,7 +963,7 @@ make_firstimage_cover:true
|
|||
return None
|
||||
|
||||
def _make_mi_from_book(self,book):
|
||||
mi = MetaInformation(book['title'],(book['author'],)) # author is a list.
|
||||
mi = MetaInformation(book['title'],[book['author']]) # author is a list.
|
||||
mi.set_identifiers({'url':book['url']})
|
||||
mi.publisher = book['publisher']
|
||||
mi.tags = book['tags']
|
||||
|
|
|
|||
|
|
@ -17,7 +17,7 @@
|
|||
|
||||
import logging
|
||||
## XXX cli option for logging level.
|
||||
logging.basicConfig(level=logging.DEBUG,format="%(levelname)s:%(filename)s(%(lineno)d):%(message)s")
|
||||
logging.basicConfig(level=logging.INFO,format="%(levelname)s:%(filename)s(%(lineno)d):%(message)s")
|
||||
|
||||
import sys, os
|
||||
from os.path import normpath, expanduser, isfile, join
|
||||
|
|
@ -30,6 +30,7 @@ from subprocess import call
|
|||
|
||||
from fanficdownloader import adapters,writers,exceptions
|
||||
from fanficdownloader.epubutils import get_dcsource_chaptercount, get_update_data
|
||||
from fanficdownloader.geturls import get_urls_from_page
|
||||
|
||||
if sys.version_info < (2, 5):
|
||||
print "This program requires Python 2.5 or newer."
|
||||
|
|
@ -70,6 +71,9 @@ def main():
|
|||
parser.add_option("--force",
|
||||
action="store_true", dest="force",
|
||||
help="Force overwrite or update of an existing epub, download and overwrite all chapters.",)
|
||||
parser.add_option("-l", "--list",
|
||||
action="store_true", dest="list",
|
||||
help="Get list of valid story URLs from page given.",)
|
||||
|
||||
(options, args) = parser.parse_args()
|
||||
|
||||
|
|
@ -116,6 +120,12 @@ def main():
|
|||
(var,val) = opt.split('=')
|
||||
config.set("overrides",var,val)
|
||||
|
||||
if options.list:
|
||||
retlist = get_urls_from_page(args[0])
|
||||
print "\n".join(retlist)
|
||||
|
||||
return
|
||||
|
||||
try:
|
||||
## Attempt to update an existing epub.
|
||||
if options.update:
|
||||
|
|
@ -202,7 +212,7 @@ def main():
|
|||
print dne
|
||||
except exceptions.UnknownSite, us:
|
||||
print us
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
#import time
|
||||
#start = time.time()
|
||||
|
|
|
|||
76
fanficdownloader/geturls.py
Normal file
76
fanficdownloader/geturls.py
Normal file
|
|
@ -0,0 +1,76 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2012 Fanficdownloader team
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
|
||||
import urlparse
|
||||
import urllib2 as u2
|
||||
import ConfigParser
|
||||
|
||||
from BeautifulSoup import BeautifulSoup
|
||||
from gziphttp import GZipProcessor
|
||||
|
||||
import adapters
|
||||
|
||||
def get_urls_from_page(url):
|
||||
|
||||
opener = u2.build_opener(u2.HTTPCookieProcessor(),GZipProcessor())
|
||||
soup = BeautifulSoup(opener.open(url).read())
|
||||
|
||||
normalized = set() # normalized url
|
||||
retlist = [] # orig urls.
|
||||
config = ConfigParser.SafeConfigParser()
|
||||
|
||||
for a in soup.findAll('a'):
|
||||
if a.has_key('href'):
|
||||
href = form_url(url,a['href'])
|
||||
try:
|
||||
adapter = adapters.getAdapter(config,href,"EPUB")
|
||||
if adapter.story.getMetadata('storyUrl') not in normalized:
|
||||
normalized.add(adapter.story.getMetadata('storyUrl'))
|
||||
retlist.append(href)
|
||||
except:
|
||||
pass
|
||||
|
||||
return retlist
|
||||
|
||||
def form_url(parenturl,url):
|
||||
url = url.strip() # ran across an image with a space in the
|
||||
# src. Browser handled it, so we'd better, too.
|
||||
|
||||
if "//" in url or parenturl == None:
|
||||
returl = url
|
||||
else:
|
||||
parsedUrl = urlparse.urlparse(parenturl)
|
||||
if url.startswith("/") :
|
||||
returl = urlparse.urlunparse(
|
||||
(parsedUrl.scheme,
|
||||
parsedUrl.netloc,
|
||||
url,
|
||||
'','',''))
|
||||
else:
|
||||
toppath=""
|
||||
if parsedUrl.path.endswith("/"):
|
||||
toppath = parsedUrl.path
|
||||
else:
|
||||
toppath = parsedUrl.path[:parsedUrl.path.rindex('/')]
|
||||
returl = urlparse.urlunparse(
|
||||
(parsedUrl.scheme,
|
||||
parsedUrl.netloc,
|
||||
toppath + '/' + url,
|
||||
'','',''))
|
||||
return returl
|
||||
|
||||
Loading…
Reference in a new issue