diff --git a/calibre-plugin/dialogs.py b/calibre-plugin/dialogs.py index d663e043..4a7233a1 100644 --- a/calibre-plugin/dialogs.py +++ b/calibre-plugin/dialogs.py @@ -13,7 +13,6 @@ from functools import partial import logging logger = logging.getLogger(__name__) -import urllib import email from datetime import datetime @@ -68,7 +67,9 @@ from calibre_plugins.fanficfare_plugin.common_utils \ SizePersistedDialog, EditableTableWidgetItem, ImageTitleLayout, get_icon) -from calibre_plugins.fanficfare_plugin.fanficfare.geturls import get_urls_from_html, get_urls_from_text +from calibre_plugins.fanficfare_plugin.fanficfare.geturls \ + import ( get_urls_from_html, get_urls_from_text, + get_urls_from_mime) from calibre_plugins.fanficfare_plugin.fanficfare.adapters import getNormalStoryURL from calibre_plugins.fanficfare_plugin.fanficfare.configurable \ @@ -187,29 +188,7 @@ class DroppableQTextEdit(QTextEdit): def dropEvent(self,event): # print("event:%s"%event) - - mimetype='text/uri-list' - - urllist=[] - filelist="%s"%event.mimeData().data(mimetype) - for f in filelist.splitlines(): - #print("filename:%s"%f) - if f.endswith(".eml"): - fhandle = urllib.urlopen(f) - #print("file:\n%s\n\n"%fhandle.read()) - msg = email.message_from_file(fhandle) - if msg.is_multipart(): - for part in msg.walk(): - #print("part type:%s"%part.get_content_type()) - if part.get_content_type() == "text/html": - #print("URL list:%s"%get_urls_from_data(part.get_payload(decode=True))) - urllist.extend(get_urls_from_html(part.get_payload(decode=True))) - if part.get_content_type() == "text/plain": - #print("part content:text/plain") - # print("part content:%s"%part.get_payload(decode=True)) - urllist.extend(get_urls_from_text(part.get_payload(decode=True))) - else: - urllist.extend(get_urls_from_text("%s"%msg)) + urllist = get_urls_from_mime(event.mimeData()) if urllist: self.append("\n".join(urllist)) return None diff --git a/calibre-plugin/fff_plugin.py b/calibre-plugin/fff_plugin.py index 402f46e6..08e7852e 100644 --- a/calibre-plugin/fff_plugin.py +++ b/calibre-plugin/fff_plugin.py @@ -88,7 +88,7 @@ from calibre_plugins.fanficfare_plugin.fanficfare.epubutils import ( from calibre_plugins.fanficfare_plugin.fanficfare.geturls import ( get_urls_from_page, get_urls_from_html,get_urls_from_text, - get_urls_from_imap) + get_urls_from_imap, get_urls_from_mime) from calibre_plugins.fanficfare_plugin.fff_util import ( get_fff_adapter, get_fff_config, get_fff_personalini, @@ -240,37 +240,12 @@ class FanFicFarePlugin(InterfaceAction): dropped_ids=None urllist=[] - mime = 'application/calibre+from_library' - if mime_data.hasFormat(mime): - dropped_ids = [ int(x) for x in str(mime_data.data(mime)).split() ] - - mimetype='text/uri-list' - filelist="%s"%event.mimeData().data(mimetype) - if filelist: - for f in filelist.splitlines(): - #print("filename:%s"%f) - if f.endswith(".eml"): - fhandle = six.moves.urllib.request.urlopen(f) - msg = email.message_from_file(fhandle) - if msg.is_multipart(): - for part in msg.walk(): - #print("part type:%s"%part.get_content_type()) - if part.get_content_type() == "text/html": - #print("URL list:%s"%get_urls_from_data(part.get_payload(decode=True))) - urllist.extend(get_urls_from_html(part.get_payload(decode=True))) - if part.get_content_type() == "text/plain": - #print("part content:text/plain") - #print("part content:%s"%part.get_payload(decode=True)) - urllist.extend(get_urls_from_text(part.get_payload(decode=True))) - else: - urllist.extend(get_urls_from_text("%s"%msg)) - else: - urllist.extend(get_urls_from_text(f)) + libmime = 'application/calibre+from_library' + urimimetype='text/uri-list' + if mime_data.hasFormat(libmime): + dropped_ids = [ int(x) for x in str(mime_data.data(libmime)).split() ] else: - mimetype='text/plain' - if mime_data.hasFormat(mimetype): - #print("text/plain:%s"%event.mimeData().data(mimetype)) - urllist.extend(get_urls_from_text(event.mimeData().data(mimetype))) + urllist = get_urls_from_mime(mime_data) # print("urllist:%s\ndropped_ids:%s"%(urllist,dropped_ids)) if urllist or dropped_ids: diff --git a/fanficfare/geturls.py b/fanficfare/geturls.py index 549c918a..853ab28f 100644 --- a/fanficfare/geturls.py +++ b/fanficfare/geturls.py @@ -23,7 +23,7 @@ import re import csv # unicode in py2, str in py3 -from .six.moves.urllib.request import (build_opener, HTTPCookieProcessor) +from .six.moves.urllib.request import (build_opener, HTTPCookieProcessor, urlopen) from .six.moves.urllib.parse import (urlparse, urlunparse) from .six import text_type as unicode from .six import ensure_str @@ -52,19 +52,19 @@ def get_urls_from_page(url,configuration=None,normalize=False): return {} def get_urls_from_html(data,url=None,configuration=None,normalize=False,email=False): - logger.debug("get_urls_from_html") + # logger.debug("get_urls_from_html") urls = collections.OrderedDict() if not configuration: configuration = Configuration(["test1.com"],"EPUB",lightweight=True) if isinstance(data,(BeautifulSoup,Tag)): - logger.debug("Using pre-made soup") + # logger.debug("Using pre-made soup") soup = data else: ## soup and re-soup because BS4/html5lib is more forgiving of ## incorrectly nested tags that way. - logger.debug("dbl souping") + # logger.debug("dbl souping") soup = BeautifulSoup(unicode(BeautifulSoup(data,"html5lib")),"html5lib") for a in soup.findAll('a'): @@ -213,7 +213,7 @@ def get_urls_from_imap(srv,user,passwd,folder,markread=True): # so this is safe. There may be other chars than " that need escaping. status = mail.select('"%s"'%folder.replace('"','\\"')) if status[0] != 'OK': - logger.debug(status) + # logger.debug(status) if folders: raise FetchEmailFailed("Failed to select folder(%s) on mail server (folder list:%s)"%(folder,folders)) else: @@ -264,3 +264,39 @@ def get_urls_from_imap(srv,user,passwd,folder,markread=True): [ urls.add(x) for x in urllist ] return urls + +def get_urls_from_mime(mime_data): + urllist=[] + if mime_data.hasFormat('text/uri-list'): + # logger.debug("text/uri-list") + for qurl in mime_data.urls(): + f = qurl.toString() + # logger.debug("filename:%s"%f) + if f.endswith(".eml"): + fhandle = urlopen(f) + if hasattr(email,'message_from_binary_file'): + # py3 + msg = email.message_from_binary_file(fhandle) + # logger.debug("email.message_from_binary_file") + else: + # py2 + msg = email.message_from_file(fhandle) + # logger.debug("email.message_from_file") + if msg.is_multipart(): + for part in msg.walk(): + #logger.debug("part type:%s"%part.get_content_type()) + if part.get_content_type() == "text/html": + #logger.debug("URL list:%s"%get_urls_from_data(part.get_payload(decode=True))) + urllist.extend(get_urls_from_html(part.get_payload(decode=True))) + if part.get_content_type() == "text/plain": + #logger.debug("part content:text/plain") + #logger.debug("part content:%s"%part.get_payload(decode=True)) + urllist.extend(get_urls_from_text(part.get_payload(decode=True))) + else: + urllist.extend(get_urls_from_text("%s"%msg)) + else: + urllist.extend(get_urls_from_text(f)) + elif mime_data.hasFormat('text/plain'): + # logger.debug("text/plain") + urllist.extend(get_urls_from_text(mime_data.text())) + return urllist