mirror of
https://github.com/JimmXinu/FanFicFare.git
synced 2026-05-02 20:02:51 +02:00
Merge branch 'master' into fictionlive
This commit is contained in:
commit
6ab50b6eaa
46 changed files with 6446 additions and 3378 deletions
|
|
@ -33,7 +33,7 @@ except NameError:
|
|||
from calibre.customize import InterfaceActionBase
|
||||
|
||||
# pulled out from FanFicFareBase for saving in prefs.py
|
||||
__version__ = (3, 23, 4)
|
||||
__version__ = (3, 24, 2)
|
||||
|
||||
## Apparently the name for this class doesn't matter--it was still
|
||||
## 'demo' for the first few versions.
|
||||
|
|
|
|||
|
|
@ -1662,7 +1662,7 @@ class ImapTab(QWidget):
|
|||
|
||||
label = QLabel(_('Add these Tag(s) Automatically'))
|
||||
tooltip = ( _("Tags entered here will be automatically added to stories downloaded from email story URLs.") +"\n"+
|
||||
_("Any additional stories if you then manually add to the Story URL dialog will also have these tags added.") )
|
||||
_("Any additional stories you then manually add to the Story URL dialog will also have these tags added.") )
|
||||
label.setToolTip(tooltip)
|
||||
self.l.addWidget(label,row,0)
|
||||
self.imaptags = EditWithComplete(self) # QLineEdit(self)
|
||||
|
|
|
|||
|
|
@ -4,7 +4,7 @@ from __future__ import (absolute_import, unicode_literals, division,
|
|||
print_function)
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2018, Jim Miller'
|
||||
__copyright__ = '2020, Jim Miller'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
import traceback, re
|
||||
|
|
@ -55,7 +55,7 @@ from calibre.gui2 import gprefs
|
|||
show_download_options = 'fff:add new/update dialogs:show_download_options'
|
||||
from calibre.gui2.dialogs.confirm_delete import confirm
|
||||
from calibre.gui2.complete2 import EditWithComplete
|
||||
from .fanficfare.six import text_type as unicode
|
||||
from .fanficfare.six import text_type as unicode, ensure_text
|
||||
|
||||
# pulls in translation files for _() strings
|
||||
try:
|
||||
|
|
@ -252,21 +252,51 @@ class AddNewDialog(SizePersistedDialog):
|
|||
self.setWindowTitle('FanFicFare')
|
||||
self.setWindowIcon(icon)
|
||||
|
||||
self.toplabel=QLabel("Toplabel")
|
||||
self.l.addWidget(self.toplabel)
|
||||
self.url = DroppableQTextEdit(self)
|
||||
self.url.setToolTip("UrlTooltip")
|
||||
self.url.setLineWrapMode(QTextEdit.NoWrap)
|
||||
self.l.addWidget(self.url)
|
||||
|
||||
self.merge = self.newmerge = False
|
||||
self.extraoptions = {}
|
||||
|
||||
# elements to hide when doing merge.
|
||||
self.mergehide = []
|
||||
self.mergeshow = []
|
||||
# elements to show again when doing *update* merge
|
||||
self.mergeupdateshow = []
|
||||
|
||||
self.toplabel=QLabel("Toplabel")
|
||||
self.l.addWidget(self.toplabel)
|
||||
|
||||
## XXX add labels for series name and desc? Desc in tooltip?
|
||||
row = 0
|
||||
grid = QGridLayout()
|
||||
label = QLabel('<b>'+_('Series')+':</b>')
|
||||
grid.addWidget(label,row,0)
|
||||
self.mergedname=QLabel("mergedname")
|
||||
tt = _('This name will be used with the %s setting to set the title of the new book.')%'<i>anthology_title_pattern</i>'
|
||||
label.setToolTip(tt)
|
||||
self.mergeshow.append(label)
|
||||
self.mergedname.setToolTip(tt)
|
||||
grid.addWidget(self.mergedname,row,1,1,-1)
|
||||
self.l.addLayout(grid)
|
||||
self.mergeshow.append(self.mergedname)
|
||||
|
||||
row+=1
|
||||
label = QLabel('<b>'+_('Comments')+':</b>')
|
||||
grid.addWidget(label,row,0)
|
||||
self.mergeddesc=QLabel("mergeddesc")
|
||||
tt = _('These comments about the series will be included in the Comments of the new book.')+'<i></i>' # for html for auto-wrap
|
||||
label.setToolTip(tt)
|
||||
self.mergeshow.append(label)
|
||||
self.mergeddesc.setToolTip(tt)
|
||||
self.mergeddesc.setWordWrap(True)
|
||||
grid.addWidget(self.mergeddesc,row,1,1,-1)
|
||||
self.l.addLayout(grid)
|
||||
self.mergeshow.append(self.mergeddesc)
|
||||
grid.setColumnStretch(1,1)
|
||||
|
||||
self.url = DroppableQTextEdit(self)
|
||||
self.url.setToolTip("UrlTooltip")
|
||||
self.url.setLineWrapMode(QTextEdit.NoWrap)
|
||||
self.l.addWidget(self.url)
|
||||
|
||||
self.groupbox = QGroupBox(_("Show Download Options"))
|
||||
self.groupbox.setCheckable(True)
|
||||
self.groupbox.setFlat(True)
|
||||
|
|
@ -399,18 +429,32 @@ class AddNewDialog(SizePersistedDialog):
|
|||
count=""
|
||||
if url_list_text:
|
||||
count = " " + _("(%s URLs found)")%len(url_list_text.split()) # count lines
|
||||
self.toplabel.setText(_('Story URLs for anthology, one per line:') + count)
|
||||
self.toplabel.setText('<b>'+_('Story URLs for anthology, one per line:') + count + '</b>')
|
||||
self.url.setToolTip(_('URLs for stories to include in the anthology, one per line.\nWill take URLs from clipboard, but only valid URLs.'))
|
||||
self.collisionlabel.setText(_('If Story Already Exists in Anthology?'))
|
||||
self.collision.setToolTip(_("What to do if there's already an existing story with the same URL in the anthology."))
|
||||
for widget in self.mergehide:
|
||||
widget.setVisible(False)
|
||||
for widget in self.mergeshow:
|
||||
widget.setVisible(True)
|
||||
if not self.newmerge:
|
||||
for widget in self.mergeupdateshow:
|
||||
widget.setVisible(True)
|
||||
n = extraoptions.get('frompage',{}).get('name',None)
|
||||
if n:
|
||||
self.mergedname.setText(n)
|
||||
else:
|
||||
self.mergedname.setVisible(False)
|
||||
d = extraoptions.get('frompage',{}).get('desc',None)
|
||||
if d:
|
||||
self.mergeddesc.setText(unicode(d))
|
||||
else:
|
||||
self.mergeddesc.setVisible(False)
|
||||
else:
|
||||
for widget in self.mergehide:
|
||||
widget.setVisible(True)
|
||||
for widget in self.mergeshow:
|
||||
widget.setVisible(False)
|
||||
self.toplabel.setText(_('Story URLs, one per line:'))
|
||||
self.url.setToolTip(_('URLs for stories, one per line.\nWill take URLs from clipboard, but only valid URLs.\nAdd [1,5] after the URL to limit the download to chapters 1-5.'))
|
||||
self.collisionlabel.setText(_('If Story Already Exists?'))
|
||||
|
|
@ -1328,7 +1372,7 @@ class IniTextDialog(SizePersistedDialog):
|
|||
|
||||
self.textedit.setReadOnly(read_only)
|
||||
|
||||
self.textedit.setText(text)
|
||||
self.textedit.setText(ensure_text(text))
|
||||
self.l.addWidget(self.textedit)
|
||||
|
||||
self.lastStart = 0
|
||||
|
|
|
|||
|
|
@ -309,58 +309,85 @@ class FanFicFarePlugin(InterfaceAction):
|
|||
unique_name='&Update Existing FanFiction Books',
|
||||
triggered=self.update_dialog)
|
||||
|
||||
if prefs['imapserver'] and prefs['imapuser'] and prefs['imapfolder']:
|
||||
self.get_list_imap_action = self.create_menu_item_ex(self.menu, _('Get Story URLs from &Email'), image='view.png',
|
||||
unique_name='Get Story URLs from IMAP',
|
||||
triggered=self.get_urls_from_imap_menu)
|
||||
self.get_list_imap_action = self.create_menu_item_ex(self.menu, _('Get Story URLs from &Email'), image='view.png',
|
||||
unique_name='Get Story URLs from IMAP',
|
||||
triggered=self.get_urls_from_imap_menu)
|
||||
self.get_list_imap_action.setVisible( bool(prefs['imapserver'] and prefs['imapuser'] and prefs['imapfolder']) )
|
||||
|
||||
|
||||
self.get_list_url_action = self.create_menu_item_ex(self.menu, _('Get Story URLs from Web Page'), image='view.png',
|
||||
unique_name='Get Story URLs from Web Page',
|
||||
triggered=self.get_urls_from_page_menu)
|
||||
self.get_list_action = self.create_menu_item_ex(self.menu, _('Get Story URLs from Selected Books'),
|
||||
unique_name='Get URLs from Selected Books',
|
||||
image='bookmarks.png',
|
||||
triggered=self.list_story_urls)
|
||||
|
||||
if self.get_epubmerge_plugin():
|
||||
self.menu.addSeparator()
|
||||
self.makeanth_action = self.create_menu_item_ex(self.menu, _('&Make Anthology Epub from URLs'), image='plusplus.png',
|
||||
unique_name='Make FanFiction Anthology Epub from URLs',
|
||||
shortcut_name=_('Make FanFiction Anthology Epub from URLs'),
|
||||
triggered=partial(self.add_dialog,merge=True) )
|
||||
|
||||
self.get_anthlist_url_action = self.create_menu_item_ex(self.menu, _('Make Anthology Epub from Web Page'), image='view.png',
|
||||
unique_name='Make FanFiction Anthology Epub from Web Page',
|
||||
shortcut_name=_('Make FanFiction Anthology Epub from Web Page'),
|
||||
triggered=partial(self.get_urls_from_page_menu,anthology=True))
|
||||
self.menu.addSeparator()
|
||||
anth_on = bool(self.get_epubmerge_plugin())
|
||||
self.anth_sub_menu = self.menu.addMenu(_('Anthology Options'))
|
||||
self.get_anthlist_url_action = self.create_menu_item_ex(self.anth_sub_menu, _('Make Anthology Epub from Web Page'),
|
||||
image='view.png',
|
||||
unique_name='Make FanFiction Anthology Epub from Web Page',
|
||||
shortcut_name=_('Make FanFiction Anthology Epub from Web Page'),
|
||||
triggered=partial(self.get_urls_from_page_menu,anthology=True))
|
||||
|
||||
self.updateanth_action = self.create_menu_item_ex(self.menu, _('Update Anthology Epub'), image='plusplus.png',
|
||||
unique_name='Update FanFiction Anthology Epub',
|
||||
shortcut_name=_('Update FanFiction Anthology Epub'),
|
||||
triggered=self.update_anthology)
|
||||
self.makeanth_action = self.create_menu_item_ex(self.anth_sub_menu, _('&Make Anthology Epub from URLs'),
|
||||
image='plusplus.png',
|
||||
unique_name='Make FanFiction Anthology Epub from URLs',
|
||||
shortcut_name=_('Make FanFiction Anthology Epub from URLs'),
|
||||
triggered=partial(self.add_dialog,merge=True) )
|
||||
|
||||
if 'Reading List' in self.gui.iactions and (prefs['addtolists'] or prefs['addtoreadlists']) :
|
||||
self.menu.addSeparator()
|
||||
addmenutxt, rmmenutxt = None, None
|
||||
if prefs['addtolists'] and prefs['addtoreadlists'] :
|
||||
addmenutxt = _('Mark Unread: Add to "To Read" and "Send to Device" Lists')
|
||||
if prefs['addtolistsonread']:
|
||||
rmmenutxt = _('Mark Read: Remove from "To Read" and add to "Send to Device" Lists')
|
||||
else:
|
||||
rmmenutxt = _('Mark Read: Remove from "To Read" Lists')
|
||||
elif prefs['addtolists'] :
|
||||
addmenutxt = _('Add to "Send to Device" Lists')
|
||||
elif prefs['addtoreadlists']:
|
||||
addmenutxt = _('Mark Unread: Add to "To Read" Lists')
|
||||
self.updateanth_action = self.create_menu_item_ex(self.anth_sub_menu, _('Update Anthology Epub'),
|
||||
image='plusplus.png',
|
||||
unique_name='Update FanFiction Anthology Epub',
|
||||
shortcut_name=_('Update FanFiction Anthology Epub'),
|
||||
triggered=self.update_anthology)
|
||||
# Make, but set invisible--that way they still appear in
|
||||
# keyboard shortcuts (and can be set/reset) even when not
|
||||
# available. Set actions, not just sub invisible because
|
||||
# that also serves to disable them.
|
||||
for ac in (self.anth_sub_menu.menuAction(),
|
||||
self.get_anthlist_url_action,
|
||||
self.makeanth_action,
|
||||
self.updateanth_action):
|
||||
ac.setVisible(anth_on)
|
||||
|
||||
rl_on = bool('Reading List' in self.gui.iactions and (prefs['addtolists'] or prefs['addtoreadlists']))
|
||||
self.rl_sub_menu = self.menu.addMenu(_('Reading List Options'))
|
||||
addmenutxt, rmmenutxt = None, None
|
||||
if prefs['addtolists'] and prefs['addtoreadlists'] :
|
||||
addmenutxt = _('Mark Unread: Add to "To Read" and "Send to Device" Lists')
|
||||
if prefs['addtolistsonread']:
|
||||
rmmenutxt = _('Mark Read: Remove from "To Read" and add to "Send to Device" Lists')
|
||||
else:
|
||||
rmmenutxt = _('Mark Read: Remove from "To Read" Lists')
|
||||
elif prefs['addtolists'] :
|
||||
addmenutxt = _('Add to "Send to Device" Lists')
|
||||
elif prefs['addtoreadlists']:
|
||||
addmenutxt = _('Mark Unread: Add to "To Read" Lists')
|
||||
rmmenutxt = _('Mark Read: Remove from "To Read" Lists')
|
||||
|
||||
if addmenutxt:
|
||||
self.add_send_action = self.create_menu_item_ex(self.menu, addmenutxt,
|
||||
unique_name='Add to "To Read" and "Send to Device" Lists',
|
||||
image='plusplus.png',
|
||||
triggered=partial(self.update_lists,add=True))
|
||||
add_off = not addmenutxt
|
||||
if add_off:
|
||||
addmenutxt = _('Add to Lists Not Configured')
|
||||
|
||||
if rmmenutxt:
|
||||
self.add_remove_action = self.create_menu_item_ex(self.menu, rmmenutxt,
|
||||
unique_name='Remove from "To Read" and add to "Send to Device" Lists',
|
||||
image='minusminus.png',
|
||||
triggered=partial(self.update_lists,add=False))
|
||||
self.add_send_action = self.create_menu_item_ex(self.rl_sub_menu, addmenutxt,
|
||||
unique_name='Add to "To Read" and "Send to Device" Lists',
|
||||
image='plusplus.png',
|
||||
triggered=partial(self.update_lists,add=True))
|
||||
self.add_send_action.setVisible(rl_on and not add_off)
|
||||
|
||||
rm_off = not rmmenutxt
|
||||
if rm_off:
|
||||
rmmenutxt = _('Remove from Lists Not Configured')
|
||||
self.add_remove_action = self.create_menu_item_ex(self.rl_sub_menu, rmmenutxt,
|
||||
unique_name='Remove from "To Read" and add to "Send to Device" Lists',
|
||||
image='minusminus.png',
|
||||
triggered=partial(self.update_lists,add=False))
|
||||
self.add_remove_action.setVisible(rl_on and not rm_off)
|
||||
self.rl_sub_menu.menuAction().setVisible(rl_on)
|
||||
|
||||
self.menu.addSeparator()
|
||||
self.get_list_action = self.create_menu_item_ex(self.menu, _('Remove "New" Chapter Marks from Selected books'),
|
||||
|
|
@ -368,20 +395,15 @@ class FanFicFarePlugin(InterfaceAction):
|
|||
image='edit-undo.png',
|
||||
triggered=self.unnew_books)
|
||||
|
||||
self.menu.addSeparator()
|
||||
self.get_list_action = self.create_menu_item_ex(self.menu, _('Get Story URLs from Selected Books'),
|
||||
unique_name='Get URLs from Selected Books',
|
||||
image='bookmarks.png',
|
||||
triggered=self.list_story_urls)
|
||||
|
||||
self.reject_list_action = self.create_menu_item_ex(self.menu, _('Reject Selected Books'),
|
||||
unique_name='Reject Selected Books', image='rotate-right.png',
|
||||
triggered=self.reject_list_urls)
|
||||
# self.menu.addSeparator()
|
||||
|
||||
# print("platform.system():%s"%platform.system())
|
||||
# print("platform.mac_ver()[0]:%s"%platform.mac_ver()[0])
|
||||
if not self.check_macmenuhack(): # not platform.mac_ver()[0]: # Some macs crash on these menu items for unknown reasons.
|
||||
self.menu.addSeparator()
|
||||
# self.menu.addSeparator()
|
||||
self.config_action = self.create_menu_item_ex(self.menu, _('&Configure FanFicFare'),
|
||||
image= 'config.png',
|
||||
unique_name='Configure FanFicFare',
|
||||
|
|
@ -464,8 +486,7 @@ class FanFicFarePlugin(InterfaceAction):
|
|||
self.unnew_books()
|
||||
|
||||
def get_urls_from_imap_menu(self):
|
||||
|
||||
if not prefs['imapserver'] or not prefs['imapuser'] or not prefs['imapfolder']:
|
||||
if not (prefs['imapserver'] and prefs['imapuser'] and prefs['imapfolder']):
|
||||
s=_('FanFicFare Email Settings are not configured.')
|
||||
info_dialog(self.gui, s, s, show=True, show_copy_button=False)
|
||||
return
|
||||
|
|
@ -533,7 +554,9 @@ class FanFicFarePlugin(InterfaceAction):
|
|||
if prefs['imaptags']:
|
||||
message="<p>"+_("Tag(s) <b><i>%s</i></b> will be added to all stories downloaded in the next dialog, including any story URLs you add manually.")%prefs['imaptags']+"</p>"
|
||||
confirm(message,'fff_add_imaptags', self.gui, show_cancel_button=False)
|
||||
self.add_dialog("\n".join(url_list),merge=False,add_tag=prefs['imaptags'])
|
||||
self.add_dialog("\n".join(url_list),
|
||||
merge=False,
|
||||
extraoptions={'add_tag':prefs['imaptags']})
|
||||
else:
|
||||
msg = _('No Valid Story URLs Found in Unread Emails.')
|
||||
if reject_list:
|
||||
|
|
@ -565,12 +588,16 @@ class FanFicFarePlugin(InterfaceAction):
|
|||
with busy_cursor():
|
||||
self.gui.status_bar.show_message(_('Fetching Story URLs from Page...'))
|
||||
|
||||
url_list = self.get_urls_from_page(url)
|
||||
frompage = self.get_urls_from_page(url)
|
||||
url_list = frompage.get('urllist',[])
|
||||
|
||||
self.gui.status_bar.show_message(_('Finished Fetching Story URLs from Page.'),3000)
|
||||
|
||||
if url_list:
|
||||
self.add_dialog("\n".join(url_list),merge=d.anthology,anthology_url=url)
|
||||
self.add_dialog("\n".join(url_list),
|
||||
merge=d.anthology,
|
||||
extraoptions={'anthology_url':url,
|
||||
'frompage':frompage})
|
||||
else:
|
||||
info_dialog(self.gui, _('List of Story URLs'),
|
||||
_('No Valid Story URLs found on given page.'),
|
||||
|
|
@ -578,12 +605,9 @@ class FanFicFarePlugin(InterfaceAction):
|
|||
show_copy_button=False)
|
||||
|
||||
def get_urls_from_page(self,url):
|
||||
## now returns a {} with at least 'urllist'
|
||||
logger.debug("get_urls_from_page URL:%s"%url)
|
||||
## some sites hide mature links unless logged in.
|
||||
if 'archiveofourown.org' in url or 'fimfiction.net' in url:
|
||||
configuration = get_fff_config(url)
|
||||
else:
|
||||
configuration = None
|
||||
configuration = get_fff_config(url)
|
||||
return get_urls_from_page(url,configuration)
|
||||
|
||||
def list_story_urls(self):
|
||||
|
|
@ -765,9 +789,14 @@ class FanFicFarePlugin(InterfaceAction):
|
|||
if confirm(message,'fff_reject_non_fanfiction', self.gui):
|
||||
self.gui.iactions['Remove Books'].delete_books()
|
||||
|
||||
def add_dialog(self,url_list_text=None,merge=False,anthology_url=None,add_tag=None):
|
||||
'Both new individual stories and new anthologies are created here.'
|
||||
|
||||
def add_dialog(self,
|
||||
url_list_text=None,
|
||||
merge=False,
|
||||
extraoptions={}):
|
||||
'''
|
||||
Both new individual stories and new anthologies are created here.
|
||||
Expected extraoptions entries: anthology_url, add_tag, frompage
|
||||
'''
|
||||
if not url_list_text:
|
||||
url_list = self.get_urls_clip()
|
||||
url_list_text = "\n".join(url_list)
|
||||
|
|
@ -779,7 +808,7 @@ class FanFicFarePlugin(InterfaceAction):
|
|||
self.prep_downloads,
|
||||
merge=merge,
|
||||
newmerge=True,
|
||||
extraoptions={'anthology_url':anthology_url,'add_tag':add_tag})
|
||||
extraoptions=extraoptions)
|
||||
|
||||
def update_anthology(self):
|
||||
if not self.get_epubmerge_plugin():
|
||||
|
|
@ -830,8 +859,11 @@ class FanFicFarePlugin(InterfaceAction):
|
|||
# get list from identifiers:url/uri if present, but only if
|
||||
# it's *not* a valid story URL.
|
||||
mergeurl = self.get_story_url(db,book_id)
|
||||
frompage = {}
|
||||
if mergeurl and not self.is_good_downloader_url(mergeurl):
|
||||
url_list = [ adapters.getNormalStoryURL(url) for url in self.get_urls_from_page(mergeurl) ]
|
||||
frompage = self.get_urls_from_page(mergeurl)
|
||||
url_list = [ adapters.getNormalStoryURL(url) for url in frompage.get('urllist',[]) ]
|
||||
frompage['urllist']=url_list
|
||||
|
||||
url_list_text = "\n".join(url_list)
|
||||
|
||||
|
|
@ -848,7 +880,8 @@ class FanFicFarePlugin(InterfaceAction):
|
|||
merge=True,
|
||||
newmerge=False,
|
||||
extrapayload=urlmapfile,
|
||||
extraoptions={'tdir':tdir,
|
||||
extraoptions={'frompage':frompage,
|
||||
'tdir':tdir,
|
||||
'mergebook':mergebook})
|
||||
# Need to use AddNewDialog modal here because it's an update
|
||||
# of an existing book. Don't want the user deleting it or
|
||||
|
|
@ -965,9 +998,6 @@ class FanFicFarePlugin(InterfaceAction):
|
|||
|
||||
def prep_downloads(self, options, books, merge=False, extrapayload=None):
|
||||
'''Fetch metadata for stories from servers, launch BG job when done.'''
|
||||
|
||||
logger.debug("add_tag:%s"%options.get('add_tag',None))
|
||||
|
||||
if isinstance(books, string_types):
|
||||
url_list = split_text_to_urls(books)
|
||||
books = self.convert_urls_to_books(url_list)
|
||||
|
|
@ -1322,7 +1352,7 @@ class FanFicFarePlugin(InterfaceAction):
|
|||
<p>%s</p>
|
||||
<p>%s</p>'''%(
|
||||
_('Change Story URL?'),
|
||||
_('<b>%s</b> by <b>%s</b> is already in your library with a different source URL:')%(mi.title,', '.join(mi.author)),
|
||||
_('<b>%(title)s</b> by <b>%(author)s</b> is already in your library with a different source URL:')%{'title':mi.title,'author':', '.join(mi.author)},
|
||||
_('In library: <a href="%(liburl)s">%(liburl)s</a>')%{'liburl':liburl},
|
||||
_('New URL: <a href="%(newurl)s">%(newurl)s</a>')%{'newurl':book['url']},
|
||||
_("Click '<b>Yes</b>' to update/overwrite book with new URL."),
|
||||
|
|
@ -1336,7 +1366,7 @@ class FanFicFarePlugin(InterfaceAction):
|
|||
<p>%s</p>
|
||||
<p>%s</p>'''%(
|
||||
_('Download as New Book?'),
|
||||
_('<b>%s</b> by <b>%s</b> is already in your library with a different source URL.')%(mi.title,', '.join(mi.author)),
|
||||
_('<b>%(title)s</b> by <b>%(author)s</b> is already in your library with a different source URL.')%{'title':mi.title,'author':', '.join(mi.author)},
|
||||
_('You chose not to update the existing book. Do you want to add a new book for this URL?'),
|
||||
_('New URL: <a href="%(newurl)s">%(newurl)s</a>')%{'newurl':book['url']},
|
||||
_("Click '<b>Yes</b>' to a new book with new URL."),
|
||||
|
|
@ -1585,7 +1615,6 @@ class FanFicFarePlugin(InterfaceAction):
|
|||
errorcol_label=None,
|
||||
lastcheckedcol_label=None):
|
||||
|
||||
logger.debug("add_tag:%s"%options.get('add_tag',None))
|
||||
if options.get('add_tag',False):
|
||||
book['tags'].extend(options.get('add_tag').split(','))
|
||||
|
||||
|
|
@ -1806,13 +1835,7 @@ class FanFicFarePlugin(InterfaceAction):
|
|||
if 'mergebook' in options:
|
||||
existingbook = options['mergebook']
|
||||
#print("existingbook:\n%s"%existingbook)
|
||||
mergebook = self.merge_meta_books(existingbook,good_list,options['fileform'])
|
||||
|
||||
if 'mergebook' in options:
|
||||
mergebook['calibre_id'] = options['mergebook']['calibre_id']
|
||||
|
||||
if 'anthology_url' in options:
|
||||
mergebook['url'] = options['anthology_url']
|
||||
mergebook = self.merge_meta_books(existingbook,good_list,options)
|
||||
|
||||
#print("mergebook:\n%s"%mergebook)
|
||||
|
||||
|
|
@ -2571,7 +2594,7 @@ class FanFicFarePlugin(InterfaceAction):
|
|||
def is_good_downloader_url(self,url):
|
||||
return adapters.getNormalStoryURL(url)
|
||||
|
||||
def merge_meta_books(self,existingbook,book_list,fileform):
|
||||
def merge_meta_books(self,existingbook,book_list,options):
|
||||
book = self.make_book()
|
||||
book['author'] = []
|
||||
book['tags'] = []
|
||||
|
|
@ -2672,10 +2695,12 @@ class FanFicFarePlugin(InterfaceAction):
|
|||
|
||||
logger.debug("book['url']:%s"%book['url'])
|
||||
|
||||
book['comments'] = '<div><p>' +_("Anthology containing:")+"</p>\n\n"
|
||||
## if series explicitly collected, include desc, if it's there.
|
||||
d = options.get('frompage',{}).get('desc','')
|
||||
book['comments'] = '<div>'+d+'<p>' +_("Anthology containing:")+"</p>\n\n"
|
||||
wraptitle = lambda x : '<p><b>'+x+'</b></p>\n'
|
||||
if len(book['author']) > 1:
|
||||
mkbooktitle = lambda x : wraptitle(_("%s by %s") % (x['title'],' & '.join(x['author'])))
|
||||
mkbooktitle = lambda x : wraptitle(_("%(title)s by %(author)s") % {'title':x['title'],'author':' & '.join(x['author'])})
|
||||
else:
|
||||
mkbooktitle = lambda x : wraptitle(x['title'])
|
||||
|
||||
|
|
@ -2694,7 +2719,7 @@ class FanFicFarePlugin(InterfaceAction):
|
|||
book['comments'] += '</div>'
|
||||
logger.debug(book['comments'])
|
||||
|
||||
configuration = get_fff_config(book['url'],fileform)
|
||||
configuration = get_fff_config(book['url'],options['fileform'])
|
||||
if existingbook:
|
||||
book['title'] = deftitle = existingbook['title']
|
||||
if prefs['anth_comments_newonly']:
|
||||
|
|
@ -2704,25 +2729,30 @@ class FanFicFarePlugin(InterfaceAction):
|
|||
# book['all_metadata']['description']
|
||||
|
||||
series = None
|
||||
logger.debug("serieslists:%s"%serieslists)
|
||||
# if all same series, use series for name. But only if all and not previous named
|
||||
if len(serieslist) == len(book_list):
|
||||
series = serieslist[0]
|
||||
book['title'] = series
|
||||
for sr in serieslist:
|
||||
if series != sr:
|
||||
book['title'] = deftitle
|
||||
series = None
|
||||
break
|
||||
if not series and serieslists:
|
||||
# for multiple series sites: if all stories are
|
||||
# members of the same series, use it. Or the first
|
||||
# one, rather.
|
||||
common_series = get_common_elements(serieslists)
|
||||
logger.debug("common_series:%s"%common_series)
|
||||
if common_series:
|
||||
series = common_series[0]
|
||||
n = options.get('frompage',{}).get('name',None)
|
||||
if n:
|
||||
# series explicitly parsed, use name.
|
||||
book['title'] = series = n
|
||||
else:
|
||||
logger.debug("serieslists:%s"%serieslists)
|
||||
# if all same series, use series for name. But only if all and not previous named
|
||||
if len(serieslist) == len(book_list):
|
||||
series = serieslist[0]
|
||||
book['title'] = series
|
||||
for sr in serieslist:
|
||||
if series != sr:
|
||||
book['title'] = deftitle
|
||||
series = None
|
||||
break
|
||||
if not series and serieslists:
|
||||
# for multiple series sites: if all stories are
|
||||
# members of the same series, use it. Or the first
|
||||
# one, rather.
|
||||
common_series = get_common_elements(serieslists)
|
||||
logger.debug("common_series:%s"%common_series)
|
||||
if common_series:
|
||||
series = common_series[0]
|
||||
book['title'] = series
|
||||
|
||||
if prefs['setanthologyseries'] and book['title'] == series:
|
||||
book['series'] = series+' [0]'
|
||||
|
|
@ -2742,9 +2772,20 @@ class FanFicFarePlugin(InterfaceAction):
|
|||
for v in ['Completed','In-Progress']:
|
||||
if v in book['tags']:
|
||||
book['tags'].remove(v)
|
||||
## some adapters, like AO3, may have series status.
|
||||
s = options.get('frompage',{}).get('status','')
|
||||
if s:
|
||||
book['all_metadata']['status'] = s
|
||||
book['tags'].append(s)
|
||||
book['tags'].extend(configuration.getConfigList('anthology_tags'))
|
||||
book['all_metadata']['anthology'] = "true"
|
||||
|
||||
if 'mergebook' in options:
|
||||
book['calibre_id'] = options['mergebook']['calibre_id']
|
||||
|
||||
if 'anthology_url' in options:
|
||||
book['url'] = options['anthology_url']
|
||||
|
||||
return book
|
||||
|
||||
def split_text_to_urls(urls):
|
||||
|
|
|
|||
|
|
@ -5,7 +5,7 @@ from __future__ import (unicode_literals, division, absolute_import,
|
|||
import six
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2018, Jim Miller, 2011, Grant Drake <grant.drake@gmail.com>'
|
||||
__copyright__ = '2020, Jim Miller, 2011, Grant Drake <grant.drake@gmail.com>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
import logging
|
||||
|
|
@ -90,7 +90,7 @@ def do_download_worker(book_list,
|
|||
book_list.append(job.result)
|
||||
book_id = job._book['calibre_id']
|
||||
count = count + 1
|
||||
notification(float(count)/total, _('%d of %d stories finished downloading')%(count,total))
|
||||
notification(float(count)/total, _('%(count)d of %(total)d stories finished downloading')%{'count':count,'total':total})
|
||||
# Add this job's output to the current log
|
||||
logger.info('Logfile for book ID %s (%s)'%(book_id, job._book['title']))
|
||||
logger.info(job.details)
|
||||
|
|
@ -297,8 +297,8 @@ def do_download_for_worker(book,options,merge,notification=lambda x,y:x):
|
|||
inject_cal_cols(book,story,configuration)
|
||||
writer.writeStory(outfilename=outfile, forceOverwrite=True)
|
||||
|
||||
book['comment'] = _('Update %s completed, added %s chapters for %s total.')%\
|
||||
(options['fileform'],(urlchaptercount-chaptercount),urlchaptercount)
|
||||
book['comment'] = _('Update %(fileform)s completed, added %(added)s chapters for %(total)s total.')%\
|
||||
{'fileform':options['fileform'],'added':(urlchaptercount-chaptercount),'total':urlchaptercount}
|
||||
book['all_metadata'] = story.getAllMetadata(removeallentities=True)
|
||||
if options['savemetacol'] != '':
|
||||
book['savemetacol'] = story.dump_html_metadata()
|
||||
|
|
|
|||
|
|
@ -561,6 +561,14 @@ storynotes_label:Story Notes
|
|||
add_to_extra_titlepage_entries:,storynotes
|
||||
|
||||
[base_xenforoforum]
|
||||
## Some sites require login for some stories
|
||||
#username:YourName
|
||||
#password:yourpassword
|
||||
|
||||
## XenForo sites require login for some stories, but don't report that
|
||||
## to FFF. To download those, set your username, password and set
|
||||
## always_login:false
|
||||
#always_login:false
|
||||
|
||||
## We've been requested by the site(s) admin to rein in hits. If you
|
||||
## download fewer stories less often you can likely get by with
|
||||
|
|
@ -1213,65 +1221,6 @@ extra_titlepage_entries:eroticatags,disclaimer
|
|||
#username:YourName
|
||||
#password:yourpassword
|
||||
|
||||
[archive.hpfanfictalk.com]
|
||||
## Some sites also require the user to confirm they are adult for
|
||||
## adult content. In commandline version, this should go in your
|
||||
## personal.ini, not defaults.ini.
|
||||
#is_adult:true
|
||||
|
||||
add_to_extra_valid_entries:,themes,inclusivity,house,
|
||||
series00,series00Url,series00HTML,
|
||||
series01,series01Url,series01HTML,
|
||||
series02,series02Url,series02HTML,
|
||||
series03,series03Url,series03HTML,
|
||||
series04,series04Url,series04HTML,
|
||||
series05,series05Url,series05HTML,
|
||||
series06,series06Url,series06HTML,
|
||||
series07,series07Url,series07HTML,
|
||||
series08,series08Url,series08HTML,
|
||||
series09,series09Url,series09HTML,
|
||||
|
||||
## Assume entryUrl, apply to "<a class='%slink' href='%s'>%s</a>" to
|
||||
## make entryHTML.
|
||||
make_linkhtml_entries:series00,series01,series02,series03,series04,
|
||||
series05,series06,series07,series08,series09
|
||||
|
||||
themes_label:Themes
|
||||
inclusivity_label:Inclusivity
|
||||
house_label:HPFT Forum House
|
||||
|
||||
## series00 will be the same as common metadata series.
|
||||
series00HTML_label:Series
|
||||
series01HTML_label:Additional Series
|
||||
series02HTML_label:Additional Series
|
||||
series03HTML_label:Additional Series
|
||||
series04HTML_label:Additional Series
|
||||
series05HTML_label:Additional Series
|
||||
series06HTML_label:Additional Series
|
||||
series07HTML_label:Additional Series
|
||||
series08HTML_label:Additional Series
|
||||
series09HTML_label:Additional Series
|
||||
|
||||
## Try to collect series names and numbers of this story in those
|
||||
## series. This lets us turn it on and off by site without keeping a
|
||||
## lengthy titlepage_entries per site and prevents it updating in the
|
||||
## plugin.
|
||||
collect_series: true
|
||||
|
||||
add_to_extra_titlepage_entries:,series01HTML,series02HTML,series03HTML,
|
||||
series04HTML,series05HTML,series06HTML,series07HTML,series08HTML,series09HTML
|
||||
|
||||
## archive.hpfanfictalk.com takes margins away, even from p tags, by
|
||||
## default. So authors have to either include extra br/p tags or
|
||||
## their own styles. These allow for both, but leave you at the mercy
|
||||
## of author CSS.
|
||||
add_to_output_css:
|
||||
* {
|
||||
margin: 0;
|
||||
padding: 0;
|
||||
}
|
||||
add_to_keep_html_attrs:,style
|
||||
|
||||
[archive.shriftweb.org]
|
||||
website_encodings:Windows-1252,utf8,iso-8859-1
|
||||
|
||||
|
|
@ -1678,6 +1627,72 @@ make_linkhtml_entries:translators,betas
|
|||
## can change it.
|
||||
include_in_category:fandoms
|
||||
|
||||
[fanfictalk.com]
|
||||
## Some sites also require the user to confirm they are adult for
|
||||
## adult content. In commandline version, this should go in your
|
||||
## personal.ini, not defaults.ini.
|
||||
#is_adult:true
|
||||
|
||||
add_to_extra_valid_entries:,tropes,themes,representation,inclusivity,
|
||||
house,storytype,contentwarnings,
|
||||
series00,series00Url,series00HTML,
|
||||
series01,series01Url,series01HTML,
|
||||
series02,series02Url,series02HTML,
|
||||
series03,series03Url,series03HTML,
|
||||
series04,series04Url,series04HTML,
|
||||
series05,series05Url,series05HTML,
|
||||
series06,series06Url,series06HTML,
|
||||
series07,series07Url,series07HTML,
|
||||
series08,series08Url,series08HTML,
|
||||
series09,series09Url,series09HTML,
|
||||
|
||||
# fields changed name with domain name change.
|
||||
include_in_inclusivity:representation
|
||||
include_in_themes:tropes
|
||||
|
||||
## Assume entryUrl, apply to "<a class='%slink' href='%s'>%s</a>" to
|
||||
## make entryHTML.
|
||||
make_linkhtml_entries:series00,series01,series02,series03,series04,
|
||||
series05,series06,series07,series08,series09
|
||||
|
||||
tropes_label:Tropes
|
||||
representation_label:Representation
|
||||
house_label:HPFT Forum House
|
||||
storytype_label:Story Type
|
||||
contentwarnings_label:Content Warnings
|
||||
|
||||
## series00 will be the same as common metadata series.
|
||||
series00HTML_label:Series
|
||||
series01HTML_label:Additional Series
|
||||
series02HTML_label:Additional Series
|
||||
series03HTML_label:Additional Series
|
||||
series04HTML_label:Additional Series
|
||||
series05HTML_label:Additional Series
|
||||
series06HTML_label:Additional Series
|
||||
series07HTML_label:Additional Series
|
||||
series08HTML_label:Additional Series
|
||||
series09HTML_label:Additional Series
|
||||
|
||||
## Try to collect series names and numbers of this story in those
|
||||
## series. This lets us turn it on and off by site without keeping a
|
||||
## lengthy titlepage_entries per site and prevents it updating in the
|
||||
## plugin.
|
||||
collect_series: true
|
||||
|
||||
#add_to_extra_titlepage_entries:,tropes,themes,representation,inclusivity,house,storytype,contentwarnings,series01HTML,series02HTML,series03HTML,
|
||||
# series04HTML,series05HTML,series06HTML,series07HTML,series08HTML,series09HTML
|
||||
|
||||
## fanfictalk.com takes margins away, even from p tags, by default.
|
||||
## So authors have to either include extra br/p tags or their own
|
||||
## styles. These allow for both, but leave you at the mercy of author
|
||||
## CSS.
|
||||
add_to_output_css:
|
||||
* {
|
||||
margin: 0;
|
||||
padding: 0;
|
||||
}
|
||||
add_to_keep_html_attrs:,style
|
||||
|
||||
[fanfiction-junkies.de]
|
||||
website_encodings:Windows-1252,utf8
|
||||
|
||||
|
|
@ -3013,6 +3028,17 @@ sitetags_label:Site tags
|
|||
## Attempt to fix p and br excess from HTML in great many stories
|
||||
fix_excess_space:false
|
||||
|
||||
[www.novelupdates.cc]
|
||||
## Note that novelupdates.cc != novelupdates.com
|
||||
|
||||
## There is reason to believe that novelupdates.cc may be a
|
||||
## replacement for wuxiaworld.co, but currently both exist with
|
||||
## different data.
|
||||
|
||||
## When dedup_order_chapter_list:true, use a heuristic algorithm
|
||||
## specific to novelupdates.cc order and dedup chapters.
|
||||
dedup_order_chapter_list:false
|
||||
|
||||
[www.phoenixsong.net]
|
||||
## Some sites require login (or login for some rated stories) The
|
||||
## program can prompt you, or you can save it in config. In
|
||||
|
|
@ -3043,7 +3069,7 @@ sitetags_label:Site Tags
|
|||
## Site dedicated to these categories/characters/ships
|
||||
extracategories:Harry Potter
|
||||
|
||||
add_to_include_subject_tags:,takesplaces,snapeflavours,sitetags
|
||||
#add_to_include_subject_tags:,takesplaces,snapeflavours,sitetags
|
||||
#add_to_extra_titlepage_entries:,stars,reviews,reads,takesplaces,snapeflavours,sitetags
|
||||
|
||||
website_encodings:Windows-1252,utf8
|
||||
|
|
@ -3090,6 +3116,11 @@ extratags:
|
|||
## add_to_output_css example for [base_xenforoforum:epub].
|
||||
#legend_spoilers:true
|
||||
|
||||
## royalroad.com chapters can have author notes attached to them.
|
||||
## Setting include_author_notes:true will include them with the
|
||||
## chapter text.
|
||||
#include_author_notes:true
|
||||
|
||||
[www.scarvesandcoffee.net]
|
||||
## Some sites do not require a login, but do require the user to
|
||||
## confirm they are adult for adult content. In commandline version,
|
||||
|
|
@ -3108,6 +3139,12 @@ views_label:Views
|
|||
averageWords_label:Average Words (Chapter)
|
||||
add_to_titlepage_entries:,views, averageWords
|
||||
|
||||
## Scribble Hub chapters can include author's notes and news blocks. We've
|
||||
## traditionally included them all in the chapter text, but this allows
|
||||
## you to customize which you include. Copy this parameter to your
|
||||
## personal.ini and list the ones you don't want.
|
||||
#exclude_notes:authornotes,newsboxes
|
||||
|
||||
[www.siye.co.uk]
|
||||
## Site dedicated to these categories/characters/ships
|
||||
extracategories:Harry Potter
|
||||
|
|
@ -3148,6 +3185,12 @@ extracategories:Lord of the Rings
|
|||
|
||||
website_encodings:Windows-1252,utf8
|
||||
|
||||
[www.the-sietch.com]
|
||||
## see [base_xenforoforum]
|
||||
|
||||
## the-sietch.com shows more posts per reader page than other XF sites.
|
||||
reader_posts_per_page:15
|
||||
|
||||
[www.thedelphicexpanse.com]
|
||||
## Site dedicated to these categories/characters/ships
|
||||
extracategories:Star Trek: Enterprise
|
||||
|
|
@ -3330,12 +3373,6 @@ website_encodings:Windows-1252,utf8
|
|||
## specific to wuxiaworld.co order and dedup chapters.
|
||||
dedup_order_chapter_list:false
|
||||
|
||||
[www.novelupdates.cc]
|
||||
## Note that novelupdates.cc != novelupdates.com
|
||||
## When dedup_order_chapter_list:true, use a heuristic algorithm
|
||||
## specific to novelupdates.cc order and dedup chapters.
|
||||
dedup_order_chapter_list:false
|
||||
|
||||
[www.wuxiaworld.com]
|
||||
user_agent:Mozilla/5.0
|
||||
## Authors on wuxiaworld.com create their own index pages, so it's not
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
2369
calibre-plugin/translations/ja.po
Normal file
2369
calibre-plugin/translations/ja.po
Normal file
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
|
|
@ -166,13 +166,14 @@ from . import adapter_hentaifoundrycom
|
|||
from . import adapter_mugglenetfanfictioncom
|
||||
from . import adapter_swiorgru
|
||||
from . import adapter_fanficsme
|
||||
from . import adapter_archivehpfanfictalkcom
|
||||
from . import adapter_fanfictalkcom
|
||||
from . import adapter_scifistoriescom
|
||||
from . import adapter_silmarillionwritersguildorg
|
||||
from . import adapter_chireadscom
|
||||
from . import adapter_scribblehubcom
|
||||
from . import adapter_fictionlive
|
||||
from . import adapter_wuxiaworldsite
|
||||
from . import adapter_thesietchcom
|
||||
|
||||
## This bit of complexity allows adapters to be added by just adding
|
||||
## importing. It eliminates the long if/else clauses we used to need
|
||||
|
|
|
|||
|
|
@ -38,9 +38,9 @@ class WWWAlternatehistoryComAdapter(BaseXenForo2ForumAdapter):
|
|||
return 'www.alternatehistory.com'
|
||||
|
||||
@classmethod
|
||||
def getURLPrefix(cls):
|
||||
def getPathPrefix(cls):
|
||||
# in case it needs more than just site/
|
||||
return 'https://' + cls.getSiteDomain() + '/forum'
|
||||
return '/forum/'
|
||||
|
||||
def get_threadmarks_top(self,souptag):
|
||||
return souptag.find('div',{'class':'block-outer-opposite--threadmarks'})
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2014 Fanficdownloader team, 2018 FanFicFare team
|
||||
# Copyright 2014 Fanficdownloader team, 2020 FanFicFare team
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
|
|
@ -396,7 +396,7 @@ class ArchiveOfOurOwnOrgAdapter(BaseSiteAdapter):
|
|||
self.story.setMetadata('seriesUrl',series_url)
|
||||
|
||||
def hookForUpdates(self,chaptercount):
|
||||
if self.oldchapters and len(self.oldchapters) > self.newestChapterNum:
|
||||
if self.newestChapterNum and self.oldchapters and len(self.oldchapters) > self.newestChapterNum:
|
||||
logger.info("Existing epub has %s chapters\nNewest chapter is %s. Discarding old chapters from there on."%(len(self.oldchapters), self.newestChapterNum+1))
|
||||
self.oldchapters = self.oldchapters[:self.newestChapterNum]
|
||||
return len(self.oldchapters)
|
||||
|
|
@ -572,3 +572,51 @@ class ArchiveOfOurOwnOrgAdapter(BaseSiteAdapter):
|
|||
# logger.debug(skip_tag)
|
||||
|
||||
return self.utf8FromSoup(url,save_chapter)
|
||||
|
||||
def before_get_urls_from_page(self,url,normalize):
|
||||
# special stuff to log into archiveofourown.org, if possible.
|
||||
# Unlike most that show the links to 'adult' stories, but protect
|
||||
# them, AO3 doesn't even show them if not logged in. Only works
|
||||
# with saved user/pass--not going to prompt for list.
|
||||
if self.getConfig("username"):
|
||||
if self.getConfig("is_adult"):
|
||||
if '?' in url:
|
||||
addurl = "&view_adult=true"
|
||||
else:
|
||||
addurl = "?view_adult=true"
|
||||
else:
|
||||
addurl=""
|
||||
# just to get an authenticity_token.
|
||||
data = self._fetchUrl(url+addurl)
|
||||
# login the session.
|
||||
self.performLogin(url,data)
|
||||
# get the list page with logged in session.
|
||||
|
||||
def get_series_from_page(self,url,data,normalize=False):
|
||||
'''
|
||||
This method is to make it easier for adapters to detect a
|
||||
series URL, pick out the series metadata and list of storyUrls
|
||||
to return without needing to override get_urls_from_page
|
||||
entirely.
|
||||
'''
|
||||
## easiest way to get all the weird URL possibilities and stay
|
||||
## up to date with future changes.
|
||||
m = re.match(self.getSiteURLPattern().replace('/works/','/series/'),url)
|
||||
if m:
|
||||
soup = self.make_soup(data)
|
||||
retval = {}
|
||||
retval['urllist']=[ 'https://'+self.host+a['href'] for a in soup.select('h4.heading a:first-child') ]
|
||||
retval['name']=stripHTML(soup.select_one("h2.heading"))
|
||||
desc=soup.select_one("div.wrapper dd blockquote.userstuff")
|
||||
if desc:
|
||||
desc.name='div' # change blockquote to div to match stories.
|
||||
retval['desc']=desc
|
||||
stats=stripHTML(soup.select_one("dl.series dl.stats"))
|
||||
if 'Complete:Yes' in stats:
|
||||
retval['status'] = "Completed"
|
||||
elif 'Complete:No' in stats:
|
||||
retval['status'] = "In-Progress"
|
||||
return retval
|
||||
## return dict with at least {'urllist':['storyUrl','storyUrl',...]}
|
||||
## optionally 'name' and 'desc'?
|
||||
return {}
|
||||
|
|
|
|||
|
|
@ -30,11 +30,11 @@ from ..six.moves.urllib.error import HTTPError
|
|||
from .base_adapter import BaseSiteAdapter, makeDate
|
||||
|
||||
def getClass():
|
||||
return ArchiveHPfanfictalkComAdapter
|
||||
return FanfictalkComAdapter
|
||||
|
||||
# Class name has to be unique. Our convention is camel case the
|
||||
# sitename with Adapter at the end. www is skipped.
|
||||
class ArchiveHPfanfictalkComAdapter(BaseSiteAdapter):
|
||||
class FanfictalkComAdapter(BaseSiteAdapter):
|
||||
|
||||
def __init__(self, config, url):
|
||||
BaseSiteAdapter.__init__(self, config, url)
|
||||
|
|
@ -48,7 +48,7 @@ class ArchiveHPfanfictalkComAdapter(BaseSiteAdapter):
|
|||
|
||||
|
||||
# normalized story URL.
|
||||
self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId'))
|
||||
self._setURL('https://' + self.getSiteDomain() + '/archive/viewstory.php?sid='+self.story.getMetadata('storyId'))
|
||||
|
||||
# Each adapter needs to have a unique site abbreviation.
|
||||
self.story.setMetadata('siteabbrev','ahpfftc')
|
||||
|
|
@ -57,17 +57,26 @@ class ArchiveHPfanfictalkComAdapter(BaseSiteAdapter):
|
|||
# http://docs.python.org/library/datetime.html#strftime-strptime-behavior
|
||||
self.dateformat = "%d %b %Y"
|
||||
|
||||
@classmethod
|
||||
def getAcceptDomains(cls):
|
||||
return [cls.getSiteDomain(),'archive.hpfanfictalk.com']
|
||||
|
||||
@classmethod
|
||||
def getConfigSections(cls):
|
||||
"Only needs to be overriden if has additional ini sections."
|
||||
return [cls.getConfigSection(),'archive.hpfanfictalk.com']
|
||||
|
||||
@staticmethod # must be @staticmethod, don't remove it.
|
||||
def getSiteDomain():
|
||||
# The site domain. Does have www here, if it uses it.
|
||||
return 'archive.hpfanfictalk.com'
|
||||
return 'fanfictalk.com'
|
||||
|
||||
@classmethod
|
||||
def getSiteExampleURLs(cls):
|
||||
return "http://"+cls.getSiteDomain()+"/viewstory.php?sid=1234"
|
||||
return "https://"+cls.getSiteDomain()+"/archive/viewstory.php?sid=1234"
|
||||
|
||||
def getSiteURLPattern(self):
|
||||
return re.escape("http://"+self.getSiteDomain()+"/viewstory.php?sid=")+r"\d+$"
|
||||
return r"https?://(archive\.hp)?"+re.escape(self.getSiteDomain())+r"(/archive)?/viewstory\.php\?sid=\d+$"
|
||||
|
||||
def use_pagecache(self):
|
||||
'''
|
||||
|
|
@ -111,24 +120,27 @@ class ArchiveHPfanfictalkComAdapter(BaseSiteAdapter):
|
|||
|
||||
# Now go hunting for all the meta data and the chapter list.
|
||||
|
||||
pagetitle = soup.find('h3')
|
||||
pagetitle = soup.select_one('div#pagetitle')
|
||||
# logger.debug(pagetitle)
|
||||
## Title
|
||||
a = pagetitle.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$"))
|
||||
self.story.setMetadata('title',stripHTML(a))
|
||||
|
||||
# Find authorid and URL from... author url.
|
||||
a = pagetitle.find('a', href=re.compile(r"viewuser.php\?uid=\d+"))
|
||||
self.story.setMetadata('authorId',a['href'].split('=')[1])
|
||||
self.story.setMetadata('authorUrl','http://'+self.host+'/'+a['href'])
|
||||
self.story.setMetadata('author',stripHTML(a))
|
||||
for a in pagetitle.find_all('a', href=re.compile(r"viewuser.php\?uid=\d+")):
|
||||
self.story.addToList('authorId',a['href'].split('=')[1])
|
||||
self.story.addToList('authorUrl','https://'+self.host+'/'+a['href'])
|
||||
self.story.addToList('author',stripHTML(a))
|
||||
|
||||
# Find the chapters:
|
||||
for chapter in soup.find_all('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+r"&chapter=\d+$")):
|
||||
# just in case there's tags, like <i> in chapter titles.
|
||||
self.add_chapter(chapter,'http://'+self.host+'/'+chapter['href'])
|
||||
self.add_chapter(chapter,'https://'+self.host+'/archive/'+chapter['href'])
|
||||
|
||||
# categories
|
||||
for a in soup.select("div#sort a"):
|
||||
self.story.addToList('category',stripHTML(a))
|
||||
|
||||
listbox = soup.find('div', {'class':'listbox'})
|
||||
# this site has two divs with class=gb-50 and no immediate container.
|
||||
gb50s = soup.find_all('div', {'class':'gb-50'})
|
||||
|
||||
|
|
@ -137,14 +149,15 @@ class ArchiveHPfanfictalkComAdapter(BaseSiteAdapter):
|
|||
for url in urls:
|
||||
self.story.addToList(metadata,stripHTML(url))
|
||||
|
||||
list_from_urls(listbox,r'browse.php\?type=categories','category')
|
||||
list_from_urls(gb50s[0],r'browse.php\?type=characters','characters')
|
||||
list_from_urls(gb50s[0],r'browse.php\?type=class&type_id=11','ships')
|
||||
list_from_urls(gb50s[0],r'browse.php\?type=class&type_id=10','representation')
|
||||
list_from_urls(gb50s[0],r'browse.php\?type=class&type_id=7','storytype')
|
||||
list_from_urls(gb50s[0],r'browse.php\?type=class&type_id=14','house')
|
||||
list_from_urls(gb50s[1],r'browse.php\?type=class&type_id=4','genre')
|
||||
list_from_urls(gb50s[1],r'browse.php\?type=class&type_id=13','themes')
|
||||
list_from_urls(gb50s[1],r'browse.php\?type=class&type_id=8','warnings')
|
||||
list_from_urls(gb50s[1],r'browse.php\?type=class&type_id=10','inclusivity')
|
||||
list_from_urls(gb50s[1],r'browse.php\?type=class&type_id=15','contentwarnings')
|
||||
list_from_urls(gb50s[1],r'browse.php\?type=class&type_id=4','genre')
|
||||
list_from_urls(gb50s[1],r'browse.php\?type=class&type_id=13','tropes')
|
||||
|
||||
bq = soup.find('blockquote2')
|
||||
if bq:
|
||||
|
|
@ -162,40 +175,27 @@ class ArchiveHPfanfictalkComAdapter(BaseSiteAdapter):
|
|||
# logger.debug(value)
|
||||
# logger.debug(label)
|
||||
|
||||
if 'Rating' in label:
|
||||
# Mature Audiences · Incomplete
|
||||
(rating,status) = value.split('·')
|
||||
self.story.setMetadata('rating', rating)
|
||||
if 'Complete' in status:
|
||||
self.story.setMetadata('status', 'Completed')
|
||||
else:
|
||||
self.story.setMetadata('status', 'In-Progress')
|
||||
|
||||
if 'Story Length' in label:
|
||||
if 'Words:' in label:
|
||||
stripHTML(value)
|
||||
# 10 chapters (45462 words)
|
||||
v = stripHTML(value)
|
||||
v = v.split('(')[1]
|
||||
v = v.split(' words')[0]
|
||||
self.story.setMetadata('numWords', v)
|
||||
self.story.setMetadata('numWords', stripHTML(value).replace('·',''))
|
||||
|
||||
if 'Published' in label:
|
||||
if 'Published:' in label:
|
||||
self.story.setMetadata('datePublished', makeDate(stripHTML(value).replace('·',''), self.dateformat))
|
||||
|
||||
if 'Updated' in label:
|
||||
self.story.setMetadata('dateUpdated', makeDate(stripHTML(value), self.dateformat))
|
||||
if 'Updated:' in label:
|
||||
self.story.setMetadata('dateUpdated', makeDate(stripHTML(value).replace('·',''), self.dateformat))
|
||||
|
||||
# Site allows stories to be in several series at once. FFF
|
||||
# isn't thrilled with that, we have series00, series01, etc.
|
||||
# Example:
|
||||
# http://archive.hpfanfictalk.com/viewstory.php?sid=483
|
||||
# https://fanfictalk.com/archive/viewstory.php?sid=483
|
||||
|
||||
if self.getConfig("collect_series"):
|
||||
seriesspan = soup.find('span',label='Series')
|
||||
for i, seriesa in enumerate(seriesspan.find_all('a', href=re.compile(r"viewseries\.php\?seriesid=\d+"))):
|
||||
# logger.debug(seriesa)
|
||||
series_name = stripHTML(seriesa)
|
||||
series_url = 'https://'+self.host+'/'+seriesa['href']
|
||||
series_url = 'https://'+self.host+'/archive/'+seriesa['href']
|
||||
|
||||
seriessoup = self.make_soup(self._fetchUrl(series_url))
|
||||
storyas = seriessoup.find_all('a', href=re.compile(r'viewstory.php\?sid=\d+'))
|
||||
|
|
@ -401,6 +401,10 @@ class FictionLiveAdapter(BaseSiteAdapter):
|
|||
if 'multiple' in chunk and chunk['multiple'] == False:
|
||||
vote = [vote] # normalize to list
|
||||
for v in vote:
|
||||
# v should only be int, but there is at least one story where some unrelated string was returned,
|
||||
# so let's just ignore non-int values here
|
||||
if not isinstance(v, int):
|
||||
continue
|
||||
if 0 <= v <= len(choices):
|
||||
output[v] += 1
|
||||
return output
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2011 Fanficdownloader team, 2018 FanFicFare team
|
||||
# Copyright 2011 Fanficdownloader team, 2020 FanFicFare team
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
|
|
@ -391,3 +391,10 @@ class FimFictionNetSiteAdapter(BaseSiteAdapter):
|
|||
raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url)
|
||||
|
||||
return self.utf8FromSoup(url,soup)
|
||||
|
||||
def before_get_urls_from_page(self,url,normalize):
|
||||
## Unlike most that show the links to 'adult' stories, but protect
|
||||
## them, FimF doesn't even show them if not logged in.
|
||||
# data = self._fetchUrl(url)
|
||||
if self.getConfig("is_adult"):
|
||||
self.set_adult_cookie()
|
||||
|
|
|
|||
|
|
@ -103,7 +103,7 @@ class NovelUpdatesCcSiteAdapter(BaseSiteAdapter):
|
|||
if self.getConfig("dedup_order_chapter_list",False):
|
||||
# Sort and deduplicate chapters (some stories in incorrect order and/or duplicates)
|
||||
chapters_data = []
|
||||
numbers_regex = re.compile('[^0-9\.]') # Everything except decimal and numbers
|
||||
numbers_regex = re.compile(r'[^0-9\.]') # Everything except decimal and numbers
|
||||
for ch in chapters:
|
||||
chapter_title = ch.p.get_text()
|
||||
chapter_url = ch['href']
|
||||
|
|
|
|||
|
|
@ -48,10 +48,10 @@ class PonyFictionArchiveNetAdapter(BaseSiteAdapter):
|
|||
|
||||
# normalized story URL.
|
||||
if "explicit" in self.parsedUrl.netloc:
|
||||
self._setURL('http://explicit.' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId'))
|
||||
self._setURL('https://explicit.' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId'))
|
||||
self.dateformat = "%d/%b/%y"
|
||||
else:
|
||||
self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId'))
|
||||
self._setURL('https://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId'))
|
||||
self.dateformat = "%d %b %Y"
|
||||
|
||||
# Each adapter needs to have a unique site abbreviation.
|
||||
|
|
@ -68,10 +68,10 @@ class PonyFictionArchiveNetAdapter(BaseSiteAdapter):
|
|||
|
||||
@classmethod
|
||||
def getSiteExampleURLs(cls):
|
||||
return "http://"+cls.getSiteDomain()+"/viewstory.php?sid=1234 http://explicit."+cls.getSiteDomain()+"/viewstory.php?sid=1234"
|
||||
return "https://"+cls.getSiteDomain()+"/viewstory.php?sid=1234 https://explicit."+cls.getSiteDomain()+"/viewstory.php?sid=1234"
|
||||
|
||||
def getSiteURLPattern(self):
|
||||
return re.escape("http://")+r"(www\.|explicit\.)?"+re.escape(self.getSiteDomain()+"/viewstory.php?sid=")+r"\d+$"
|
||||
return r"https?://(www\.|explicit\.)?"+re.escape(self.getSiteDomain()+"/viewstory.php?sid=")+r"\d+$"
|
||||
|
||||
|
||||
## Getting the chapter list and the meta data, plus 'is adult' checking.
|
||||
|
|
@ -138,13 +138,13 @@ class PonyFictionArchiveNetAdapter(BaseSiteAdapter):
|
|||
# Find authorid and URL from... author url.
|
||||
a = soup.find('a', href=re.compile(r"viewuser.php\?uid=\d+"))
|
||||
self.story.setMetadata('authorId',a['href'].split('=')[1])
|
||||
self.story.setMetadata('authorUrl','http://'+self.host+'/'+a['href'])
|
||||
self.story.setMetadata('authorUrl','https://'+self.host+'/'+a['href'])
|
||||
self.story.setMetadata('author',a.string)
|
||||
|
||||
# Find the chapters:
|
||||
for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+r"&chapter=\d+$")):
|
||||
# just in case there's tags, like <i> in chapter titles.
|
||||
self.add_chapter(chapter,'http://'+self.host+'/'+chapter['href']+addurl)
|
||||
self.add_chapter(chapter,'https://'+self.host+'/'+chapter['href']+addurl)
|
||||
|
||||
|
||||
# eFiction sites don't help us out a lot with their meta data
|
||||
|
|
@ -223,7 +223,7 @@ class PonyFictionArchiveNetAdapter(BaseSiteAdapter):
|
|||
# Find Series name from series URL.
|
||||
a = soup.find('a', href=re.compile(r"viewseries.php\?seriesid=\d+"))
|
||||
series_name = a.string
|
||||
series_url = 'http://'+self.host+'/'+a['href']
|
||||
series_url = 'https://'+self.host+'/'+a['href']
|
||||
|
||||
# use BeautifulSoup HTML parser to make everything easier to find.
|
||||
seriessoup = self.make_soup(self._fetchUrl(series_url))
|
||||
|
|
|
|||
|
|
@ -173,7 +173,7 @@ class RoyalRoadAdapter(BaseSiteAdapter):
|
|||
self.story.setMetadata('title',title)
|
||||
|
||||
# Find authorid and URL from... author url.
|
||||
mt_card_social = soup.find('',{'class':'mt-card-social'})
|
||||
mt_card_social = soup.find(None,{'class':'mt-card-social'})
|
||||
author_link = mt_card_social('a')[-1]
|
||||
if author_link:
|
||||
authorId = author_link['href'].rsplit('/', 1)[1]
|
||||
|
|
@ -228,7 +228,7 @@ class RoyalRoadAdapter(BaseSiteAdapter):
|
|||
self.story.addToList('warnings',stripHTML(li))
|
||||
|
||||
# get cover
|
||||
img = soup.find('',{'class':'row fic-header'}).find('img')
|
||||
img = soup.find(None,{'class':'row fic-header'}).find('img')
|
||||
if img:
|
||||
cover_url = img['src']
|
||||
self.setCoverImage(url,cover_url)
|
||||
|
|
@ -252,4 +252,16 @@ class RoyalRoadAdapter(BaseSiteAdapter):
|
|||
if None == div:
|
||||
raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url)
|
||||
|
||||
if self.getConfig("include_author_notes",True):
|
||||
# collect both first, changing div for frontnote first
|
||||
# causes confusion in the tree.
|
||||
frontnote = div.find_previous('div', {'class':'author-note-portlet'})
|
||||
endnote = div.find_next('div', {'class':'author-note-portlet'})
|
||||
if frontnote:
|
||||
# move frontnote into chapter text div.
|
||||
div.insert(0,frontnote.extract())
|
||||
if endnote:
|
||||
# move endnote into chapter text div.
|
||||
div.append(endnote.extract())
|
||||
|
||||
return self.utf8FromSoup(url,div)
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2012 Fanficdownloader team, 2018 FanFicFare team
|
||||
# Copyright 2012 Fanficdownloader team, 2020 FanFicFare team
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
|
|
@ -240,3 +240,19 @@ class ScarvesAndCoffeeNetAdapter(BaseSiteAdapter):
|
|||
raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url)
|
||||
|
||||
return self.utf8FromSoup(url,div)
|
||||
|
||||
def get_urls_from_page(self,url,normalize):
|
||||
from ..geturls import get_urls_from_html
|
||||
# this way it uses User-Agent or other special settings.
|
||||
data = self._fetchUrl(url,usecache=False)
|
||||
## I can't find when or why exactly this was added, but it was
|
||||
## in the old code, so here it remains.
|
||||
soup = self.make_soup(data)
|
||||
series = self.get_series_from_page(url,data)
|
||||
if series:
|
||||
return series
|
||||
else:
|
||||
return {'urllist':get_urls_from_html(soup.find('div',{'id':'mainpage'}),
|
||||
url,
|
||||
configuration=self.configuration,
|
||||
normalize=normalize)}
|
||||
|
|
|
|||
|
|
@ -270,8 +270,58 @@ class ScribbleHubComAdapter(BaseSiteAdapter): # XXX
|
|||
soup = self.make_soup(self._fetchUrl(url))
|
||||
|
||||
div = soup.find('div', {'id' : 'chp_raw'})
|
||||
if div.find('div', {'class' : 'wi_authornotes'}):
|
||||
div.find('div', {'class' : 'wi_authornotes'}).decompose()
|
||||
|
||||
exclude_notes = self.getConfigList('exclude_notes')
|
||||
|
||||
if 'authornotes' in exclude_notes:
|
||||
# Remove author's notes
|
||||
for author_notes in div.find_all('div', {'class' : 'wi_authornotes'}):
|
||||
author_notes.decompose()
|
||||
else:
|
||||
# Reformat the author's notes
|
||||
for author_notes in div.find_all('div', {'class' : 'wi_authornotes'}):
|
||||
author_notes['class'] = ['fff_chapter_notes']
|
||||
notes_div = soup.new_tag('div')
|
||||
|
||||
new_tag = soup.new_tag('b')
|
||||
new_tag.string = "Author's note:"
|
||||
notes_div.append(new_tag)
|
||||
|
||||
author_notes_body = author_notes.find('div', {'class' : 'wi_authornotes_body'})
|
||||
if author_notes_body:
|
||||
new_tag = soup.new_tag('blockquote')
|
||||
new_tag.append(author_notes_body)
|
||||
notes_div.append(new_tag)
|
||||
|
||||
# Clear old children from the note, then add this
|
||||
author_notes.clear()
|
||||
author_notes.append(notes_div)
|
||||
|
||||
if 'newsboxes' in exclude_notes:
|
||||
# Remove author's notes
|
||||
for news in div.find('div', {'class' : 'wi_news'}):
|
||||
news.decompose()
|
||||
else:
|
||||
# Reformat the news boxes
|
||||
for news in div.find_all('div', {'class' : 'wi_news'}):
|
||||
news['class'] = ['fff_chapter_notes']
|
||||
notes_div = soup.new_tag('div')
|
||||
|
||||
news_title = news.find('div', {'class' : 'wi_news_title'})
|
||||
if news_title:
|
||||
new_tag = soup.new_tag('b')
|
||||
new_tag.string = news_title.get_text()
|
||||
notes_div.append(new_tag)
|
||||
|
||||
news_body = news.find('div', {'class' : 'wi_news_body'})
|
||||
if news_body:
|
||||
new_tag = soup.new_tag('blockquote')
|
||||
new_tag.append(news_body)
|
||||
notes_div.append(new_tag)
|
||||
|
||||
# Clear old children from the news box, then add this
|
||||
news.clear()
|
||||
news.append(notes_div)
|
||||
|
||||
if None == div:
|
||||
raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url)
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2011 Fanficdownloader team, 2019 FanFicFare team
|
||||
# Copyright 2011 Fanficdownloader team, 2020 FanFicFare team
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
|
|
@ -48,7 +48,7 @@ class SiyeCoUkAdapter(BaseSiteAdapter): # XXX
|
|||
|
||||
|
||||
# normalized story URL.
|
||||
self._setURL('http://' + self.getSiteDomain() + '/siye/viewstory.php?sid='+self.story.getMetadata('storyId'))
|
||||
self._setURL('https://' + self.getSiteDomain() + '/siye/viewstory.php?sid='+self.story.getMetadata('storyId'))
|
||||
|
||||
# Each adapter needs to have a unique site abbreviation.
|
||||
self.story.setMetadata('siteabbrev','siye') # XXX
|
||||
|
|
@ -68,10 +68,10 @@ class SiyeCoUkAdapter(BaseSiteAdapter): # XXX
|
|||
|
||||
@classmethod
|
||||
def getSiteExampleURLs(cls):
|
||||
return "http://"+cls.getSiteDomain()+"/siye/viewstory.php?sid=1234"
|
||||
return "https://"+cls.getSiteDomain()+"/siye/viewstory.php?sid=1234"
|
||||
|
||||
def getSiteURLPattern(self):
|
||||
return re.escape("http://")+r"(www\.)?siye\.co\.uk/(siye/)?"+re.escape("viewstory.php?sid=")+r"\d+$"
|
||||
return r"https?://(www\.)?siye\.co\.uk/(siye/)?"+re.escape("viewstory.php?sid=")+r"\d+$"
|
||||
|
||||
def use_pagecache(self):
|
||||
'''
|
||||
|
|
@ -108,7 +108,7 @@ class SiyeCoUkAdapter(BaseSiteAdapter): # XXX
|
|||
if a is None:
|
||||
raise exceptions.StoryDoesNotExist(self.url)
|
||||
self.story.setMetadata('authorId',a['href'].split('=')[1])
|
||||
self.story.setMetadata('authorUrl','http://'+self.host+'/siye/'+a['href'])
|
||||
self.story.setMetadata('authorUrl','https://'+self.host+'/siye/'+a['href'])
|
||||
self.story.setMetadata('author',a.string)
|
||||
|
||||
# need(or easier) to pull other metadata from the author's list page.
|
||||
|
|
@ -126,7 +126,7 @@ class SiyeCoUkAdapter(BaseSiteAdapter): # XXX
|
|||
# Find the chapters (from soup, not authsoup):
|
||||
for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+r"&chapter=\d+$")):
|
||||
# just in case there's tags, like <i> in chapter titles.
|
||||
self.add_chapter(chapter,'http://'+self.host+'/siye/'+chapter['href'])
|
||||
self.add_chapter(chapter,'https://'+self.host+'/siye/'+chapter['href'])
|
||||
|
||||
if self.num_chapters() < 1:
|
||||
self.add_chapter(self.story.getMetadata('title'),url)
|
||||
|
|
@ -177,7 +177,7 @@ class SiyeCoUkAdapter(BaseSiteAdapter): # XXX
|
|||
nxt = label.next_sibling
|
||||
while nxt and "Hitcount:" not in stripHTML(nxt):
|
||||
summary += "%s"%nxt
|
||||
logger.debug(summary)
|
||||
# logger.debug(summary)
|
||||
nxt = nxt.next_sibling
|
||||
if summary.strip().endswith("<br/>"):
|
||||
summary = summary.strip()[0:-len("<br/>")]
|
||||
|
|
@ -221,7 +221,7 @@ class SiyeCoUkAdapter(BaseSiteAdapter): # XXX
|
|||
# Find Series name from series URL.
|
||||
a = titlea.findPrevious('a', href=re.compile(r"series.php\?seriesid=\d+"))
|
||||
series_name = a.string
|
||||
series_url = 'http://'+self.host+'/'+a['href']
|
||||
series_url = 'https://'+self.host+'/'+a['href']
|
||||
|
||||
# use BeautifulSoup HTML parser to make everything easier to find.
|
||||
seriessoup = self.make_soup(self._fetchUrl(series_url))
|
||||
|
|
|
|||
|
|
@ -25,6 +25,7 @@ from ..htmlcleanup import stripHTML
|
|||
from .. import exceptions as exceptions
|
||||
|
||||
# py2 vs py3 transition
|
||||
from ..six.moves.urllib.parse import urlparse, urlunparse
|
||||
from ..six import text_type as unicode
|
||||
from ..six.moves.urllib.error import HTTPError
|
||||
|
||||
|
|
@ -81,57 +82,98 @@ class StoriesOnlineNetAdapter(BaseSiteAdapter):
|
|||
## only one theme is supported.
|
||||
return "Classic"
|
||||
|
||||
## Login seems to be reasonably standard across eFiction sites.
|
||||
def needToLoginCheck(self, data):
|
||||
if self.needToLogin \
|
||||
or 'Free Registration' in data \
|
||||
return 'Free Registration' in data \
|
||||
or "Invalid Password!" in data \
|
||||
or "Invalid User Name!" in data \
|
||||
or "Log In" in data \
|
||||
or "Access to unlinked chapters requires" in data \
|
||||
or "Log in to Storiesonline" in data :
|
||||
self.needToLogin = True
|
||||
return self.needToLogin
|
||||
or "Log in to Storiesonline" in data \
|
||||
or "WLPC log in System" in data
|
||||
|
||||
def performLogin(self, url):
|
||||
params = {}
|
||||
|
||||
if self.password:
|
||||
params['theusername'] = self.username
|
||||
params['thepassword'] = self.password
|
||||
username = self.username
|
||||
password = self.password
|
||||
else:
|
||||
params['theusername'] = self.getConfig("username")
|
||||
params['thepassword'] = self.getConfig("password")
|
||||
params['rememberMe'] = '1'
|
||||
params['submit'] = 'Login'
|
||||
username = self.getConfig("username")
|
||||
password = self.getConfig("password")
|
||||
|
||||
loginUrl = 'https://' + self.getSiteDomain() + '/sol-secure/login.php'
|
||||
logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
|
||||
params['theusername']))
|
||||
username))
|
||||
|
||||
if not params['theusername'] or not params['thepassword']:
|
||||
if not username or not password:
|
||||
logger.info("Login Required for URL %s" % loginUrl)
|
||||
raise exceptions.FailedToLogin(url,params['theusername'])
|
||||
raise exceptions.FailedToLogin(url,username)
|
||||
|
||||
## fetch 'v' code from login page.
|
||||
soup = self.make_soup(self._fetchUrl(loginUrl,usecache=False))
|
||||
## Site now uses a two POST login system on a different
|
||||
## domain. At least it appears shared between storiesonline
|
||||
## and finestories.
|
||||
|
||||
## fetch 'v' code, post action and redirected domain from login page.
|
||||
(data,opened) = self._fetchUrlOpened(loginUrl,
|
||||
usecache=False)
|
||||
# logger.debug(data)
|
||||
if not self.needToLoginCheck(data):
|
||||
## hitting login URL reminds system we're logged in?
|
||||
logger.debug("don't need to login")
|
||||
return
|
||||
useurl = opened.geturl()
|
||||
soup = self.make_soup(data)
|
||||
params = {}
|
||||
params['v']=soup.find('input', {'name':'v'})['value']
|
||||
params['email'] = username
|
||||
params['cmd'] = 'SubmitEmail'
|
||||
postAction = soup.find('form')['action']
|
||||
|
||||
parsedUrl = urlparse(useurl)
|
||||
postUrl = urlunparse((parsedUrl.scheme,
|
||||
parsedUrl.netloc,
|
||||
postAction,
|
||||
'','',''))
|
||||
# try:
|
||||
data = self._postUrl(postUrl,params,usecache=False)
|
||||
# logger.debug(data)
|
||||
# except HTTPError as e:
|
||||
# if e.code == 307:
|
||||
# logger.debug("HTTP Error 307: Temporary Redirect -- assumed to be valid login for this site")
|
||||
# return
|
||||
|
||||
soup = self.make_soup(data)
|
||||
params['v']=soup.find('input', {'name':'v'})['value']
|
||||
params['password'] = password
|
||||
params['cmd'] = 'Log In'
|
||||
|
||||
# postAction = soup.find('form')['action']
|
||||
|
||||
# parsedUrl = urlparse(useurl)
|
||||
# postUrl = urlunparse(urlunparse(
|
||||
# (parsedUrl.scheme,
|
||||
# parsedUrl.netloc,
|
||||
# postAction,
|
||||
# '','',''))
|
||||
|
||||
try:
|
||||
d = self._postUrl(loginUrl,params,usecache=False)
|
||||
self.needToLogin = False
|
||||
data = self._postUrl(postUrl,params,usecache=False)
|
||||
# logger.debug(data)
|
||||
except HTTPError as e:
|
||||
if e.code == 307:
|
||||
logger.debug("HTTP Error 307: Temporary Redirect -- assumed to be valid login for this site")
|
||||
return True
|
||||
logger.debug("e Location:%s"%e.headers['Location'])
|
||||
try:
|
||||
## need to hit redirect URL so cookies get set for
|
||||
## the story site domain. I think.
|
||||
data = self._postUrl(e.headers['Location'],params,usecache=False)
|
||||
except HTTPError as e:
|
||||
if e.code == 307:
|
||||
# logger.debug(e)
|
||||
return
|
||||
|
||||
if self.needToLoginCheck(d):
|
||||
if self.needToLoginCheck(data):
|
||||
logger.info("Failed to login to URL %s as %s" % (loginUrl,
|
||||
params['theusername']))
|
||||
raise exceptions.FailedToLogin(url,params['theusername'])
|
||||
return False
|
||||
else:
|
||||
return True
|
||||
username))
|
||||
raise exceptions.FailedToLogin(url,username)
|
||||
|
||||
def use_pagecache(self):
|
||||
'''
|
||||
|
|
@ -148,15 +190,14 @@ class StoriesOnlineNetAdapter(BaseSiteAdapter):
|
|||
url = self.url
|
||||
logger.debug("URL: "+url)
|
||||
|
||||
self.needToLogin = False
|
||||
try:
|
||||
data = self._fetchUrl(url+":i")
|
||||
# logger.debug(data)
|
||||
except HTTPError as e:
|
||||
if e.code in (404, 410):
|
||||
raise exceptions.StoryDoesNotExist("Code: %s: %s"%(e.code,self.url))
|
||||
elif e.code in (401, 403):
|
||||
self.needToLogin = True
|
||||
data = ''
|
||||
data = 'Log In' # to trip needToLoginCheck
|
||||
else:
|
||||
raise e
|
||||
|
||||
|
|
@ -169,7 +210,6 @@ class StoriesOnlineNetAdapter(BaseSiteAdapter):
|
|||
if e.code in (404, 410):
|
||||
raise exceptions.StoryDoesNotExist("Code: %s: %s"%(e.code,self.url))
|
||||
elif e.code == 401:
|
||||
self.needToLogin = True
|
||||
data = ''
|
||||
else:
|
||||
raise e
|
||||
|
|
@ -342,8 +382,8 @@ class StoriesOnlineNetAdapter(BaseSiteAdapter):
|
|||
self.story.setMetadata("universe", universe_name)
|
||||
self.story.setMetadata('universeUrl','https://'+self.host+ '/library/universe.php?id=' + universe_id)
|
||||
break
|
||||
else:
|
||||
logger.debug("No universe page")
|
||||
# else:
|
||||
# logger.debug("No universe page")
|
||||
except:
|
||||
raise
|
||||
pass
|
||||
|
|
@ -357,9 +397,9 @@ class StoriesOnlineNetAdapter(BaseSiteAdapter):
|
|||
universeUrl = 'https://'+self.host+a['href']
|
||||
# logger.debug("Retrieving Universe - about to get page - universeUrl='{0}".format(universeUrl))
|
||||
universe_soup = self.make_soup(self._fetchUrl(universeUrl))
|
||||
logger.debug("Retrieving Universe - have page")
|
||||
# logger.debug("Retrieving Universe - have page")
|
||||
if universe_soup:
|
||||
logger.debug("Retrieving Universe - looking for name")
|
||||
# logger.debug("Retrieving Universe - looking for name")
|
||||
universe_name = stripHTML(universe_soup.find('h1', {'id' : 'ptitle'}))
|
||||
universe_name = re.sub(r' .\s+A Universe from the Mind.*$','',universe_name)
|
||||
# logger.debug("Universes name: '{0}'".format(universe_name))
|
||||
|
|
@ -368,15 +408,15 @@ class StoriesOnlineNetAdapter(BaseSiteAdapter):
|
|||
# logger.debug("Setting universe name: '{0}'".format(universe_name))
|
||||
self.story.setMetadata('universe',universe_name)
|
||||
if self.getConfig("universe_as_series") and not self.story.getMetadata('seriesUrl'):
|
||||
logger.debug("universe_as_series")
|
||||
# logger.debug("universe_as_series")
|
||||
# take position in universe page as number in series.
|
||||
for i, storya in enumerate(universe_soup.find_all('a',href=re.compile(r'^/s/\d+/'))):
|
||||
if storya['href'].split('/')[2] == self.story.getMetadata('storyId'):
|
||||
self.setSeries(universe_name, i+1)
|
||||
self.story.setMetadata('seriesUrl',universeUrl)
|
||||
break
|
||||
else:
|
||||
logger.debug("Do not have a universe")
|
||||
# else:
|
||||
# logger.debug("Do not have a universe")
|
||||
except:
|
||||
raise
|
||||
pass
|
||||
|
|
@ -499,7 +539,11 @@ class StoriesOnlineNetAdapter(BaseSiteAdapter):
|
|||
|
||||
chapter_title = None
|
||||
if self.getConfig('inject_chapter_title'):
|
||||
chapter_title = pagetag.find('h2').extract()
|
||||
h2tag = pagetag.find('h2')
|
||||
if h2tag:
|
||||
# I'm seeing an h1 now, but it's not logged in?
|
||||
# Something's broken...
|
||||
chapter_title = h2tag.extract()
|
||||
|
||||
# Strip te header section
|
||||
tag = pagetag.find('header')
|
||||
|
|
|
|||
|
|
@ -442,6 +442,30 @@ Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor
|
|||
soup = self.make_soup(text)
|
||||
return self.utf8FromSoup(url,soup)
|
||||
|
||||
def get_urls_from_page(self,url,normalize):
|
||||
logger.debug("Fake series test1.com")
|
||||
'''
|
||||
This method is to make it easier for adapters to detect a
|
||||
series URL, pick out the series metadata and list of storyUrls
|
||||
to return without needing to override get_urls_from_page
|
||||
entirely.
|
||||
'''
|
||||
## easiest way to get all the weird URL possibilities and stay
|
||||
## up to date with future changes.
|
||||
return {'name':'The Great Test',
|
||||
'desc':'<div>The Great Test Series of test1.com!</div>',
|
||||
'urllist':['http://test1.com?sid=1',
|
||||
'http://test1.com?sid=2',
|
||||
'http://test1.com?sid=3',
|
||||
'http://test1.com?sid=4',
|
||||
'http://test1.com?sid=5',
|
||||
'http://test1.com?sid=6',
|
||||
'http://test1.com?sid=7',
|
||||
'http://test1.com?sid=8',
|
||||
'http://test1.com?sid=9',]
|
||||
}
|
||||
|
||||
|
||||
def getClass():
|
||||
return TestSiteAdapter
|
||||
|
||||
|
|
|
|||
53
fanficfare/adapters/adapter_thesietchcom.py
Normal file
53
fanficfare/adapters/adapter_thesietchcom.py
Normal file
|
|
@ -0,0 +1,53 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2020 FanFicFare team
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
from __future__ import absolute_import
|
||||
import re
|
||||
|
||||
# py2 vs py3 transition
|
||||
from ..six import text_type as unicode
|
||||
|
||||
from .base_xenforo2forum_adapter import BaseXenForo2ForumAdapter
|
||||
|
||||
def getClass():
|
||||
return TheSietchComAdapter
|
||||
|
||||
class TheSietchComAdapter(BaseXenForo2ForumAdapter):
|
||||
|
||||
def __init__(self, config, url):
|
||||
BaseXenForo2ForumAdapter.__init__(self, config, url)
|
||||
|
||||
# Each adapter needs to have a unique site abbreviation.
|
||||
self.story.setMetadata('siteabbrev','sietch')
|
||||
|
||||
@staticmethod # must be @staticmethod, don't remove it.
|
||||
def getSiteDomain():
|
||||
# The site domain. Does have www here, if it uses it.
|
||||
return 'www.the-sietch.com'
|
||||
|
||||
@classmethod
|
||||
def getPathPrefix(cls):
|
||||
# in case it needs more than just site/
|
||||
return '/index.php?'
|
||||
|
||||
def make_reader_url(self,tmcat_num,reader_page_num):
|
||||
# https://www.the-sietch.com/index.php?threads/shattered-sphere-the-arcadian-free-march.3243/reader/page-2
|
||||
# discard tmcat_num -- the-sietch.com doesn't have multiple
|
||||
# threadmark categories yet.
|
||||
return self.story.getMetadata('storyUrl')+'reader/page-'+unicode(reader_page_num)
|
||||
|
||||
# XXX different threadmarks categories
|
||||
|
|
@ -1,6 +1,6 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2011 Fanficdownloader team, 2018 FanFicFare team
|
||||
# Copyright 2011 Fanficdownloader team, 2020 FanFicFare team
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
|
|
@ -350,5 +350,34 @@ class TwistingTheHellmouthSiteAdapter(BaseSiteAdapter):
|
|||
r"https://\1",url)
|
||||
return url
|
||||
|
||||
def before_get_urls_from_page(self,url,normalize):
|
||||
## Not needed for series pages, but does effect author pages,
|
||||
## top lists, etc.
|
||||
if self.getConfig("is_adult"):
|
||||
self.setSiteMaxRating(url)
|
||||
|
||||
def get_series_from_page(self,url,data,normalize=False):
|
||||
'''
|
||||
This method is to make it easier for adapters to detect a
|
||||
series URL, pick out the series metadata and list of storyUrls
|
||||
to return without needing to override get_urls_from_page
|
||||
entirely.
|
||||
'''
|
||||
## https://www.tthfanfic.org/Series-2329
|
||||
m = re.match(r"https?://www.tthfanfic.org/Series-(?P<id>\d+)$",url)
|
||||
if m:
|
||||
soup = self.make_soup(data)
|
||||
retval = {}
|
||||
retval['urllist']=[ 'https://'+self.host+a['href'] for a in soup.select('div.storylistitem a.storylink') ]
|
||||
retval['name']=stripHTML(soup.select_one("title"))
|
||||
retval['name'] = retval['name'].replace('TtH • Series • ','')
|
||||
desc=soup.select_one("div.storybody")
|
||||
desc.name='div' # change blockquote to div to match stories.
|
||||
retval['desc']=desc
|
||||
return retval
|
||||
## return dict with at least {'urllist':['storyUrl','storyUrl',...]}
|
||||
## optionally 'name' and 'desc'?
|
||||
return {}
|
||||
|
||||
def getClass():
|
||||
return TwistingTheHellmouthSiteAdapter
|
||||
|
|
|
|||
|
|
@ -103,7 +103,7 @@ class WuxiaWorldCoSiteAdapter(BaseSiteAdapter):
|
|||
if self.getConfig("dedup_order_chapter_list",False):
|
||||
# Sort and deduplicate chapters (some stories in incorrect order and/or duplicates)
|
||||
chapters_data = []
|
||||
numbers_regex = re.compile('[^0-9\.]') # Everything except decimal and numbers
|
||||
numbers_regex = re.compile(r'[^0-9\.]') # Everything except decimal and numbers
|
||||
for ch in chapters:
|
||||
chapter_title = ch.p.get_text()
|
||||
chapter_url = ch['href']
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2011 Fanficdownloader team, 2019 FanFicFare team
|
||||
# Copyright 2011 Fanficdownloader team, 2020 FanFicFare team
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
|
|
@ -30,7 +30,7 @@ from functools import partial
|
|||
import traceback
|
||||
import copy
|
||||
|
||||
from bs4 import BeautifulSoup, __version__
|
||||
from bs4 import BeautifulSoup, Tag
|
||||
|
||||
|
||||
from ..htmlcleanup import stripHTML
|
||||
|
|
@ -392,6 +392,112 @@ class BaseSiteAdapter(Configurable):
|
|||
"Needs to be overriden in each adapter class."
|
||||
pass
|
||||
|
||||
def before_get_urls_from_page(self,url,normalize):
|
||||
## some sites need a login or other prep for 'from page' to
|
||||
## work best. Separate function to keep adapter code minimal.
|
||||
pass
|
||||
|
||||
def get_urls_from_page(self,url,normalize):
|
||||
from ..geturls import get_urls_from_html
|
||||
'''
|
||||
This is a method in adapter now rather than the generic code
|
||||
that was in geturls.py to allow individual adapters to
|
||||
recognize and provide special handling if needed for series.
|
||||
Prompted largely by AO3 authors leaving links to other stories
|
||||
in story desc that were getting picked up.
|
||||
'''
|
||||
|
||||
## hook for logins, etc.
|
||||
self.before_get_urls_from_page(url,normalize)
|
||||
|
||||
# this way it uses User-Agent or other special settings.
|
||||
data = self._fetchUrl(url,usecache=True)
|
||||
series = self.get_series_from_page(url,data,normalize)
|
||||
if series:
|
||||
# just to make it easier for adapters.
|
||||
if isinstance(series.get('desc',None),(BeautifulSoup,Tag)):
|
||||
series['desc'] = self.utf8FromSoup(url,series['desc'])
|
||||
# NOTE: series desc imgs are *not* included in ebook.
|
||||
# Should they be removed?
|
||||
return series
|
||||
else:
|
||||
return {'urllist':get_urls_from_html(self.make_soup(data),
|
||||
url,
|
||||
configuration=self.configuration,
|
||||
normalize=normalize)}
|
||||
|
||||
def get_series_from_page(self,url,data,normalize=False):
|
||||
from ..geturls import get_urls_from_html
|
||||
'''
|
||||
This method is to make it easier for adapters to detect a
|
||||
series URL, pick out the series metadata and list of storyUrls
|
||||
to return without needing to override get_urls_from_page
|
||||
entirely.
|
||||
'''
|
||||
# return {}
|
||||
retval = {}
|
||||
## return dict with at least {'urllist':['storyUrl','storyUrl',...]}
|
||||
## 'name' and 'desc' are also used if given.
|
||||
|
||||
## for eFiction sites:
|
||||
## http://www.dracoandginny.com/viewseries.php?seriesid=45
|
||||
# logger.debug("base get_series_from_page:%s"%url)
|
||||
try:
|
||||
if re.match(r".*(view)?series\.php\?s(erie)?sid=\d+.*",url): # seriesid or ssid
|
||||
# logger.debug("Attempting eFiction get_series_from_page")
|
||||
soup = self.make_soup(data)
|
||||
retval = {}
|
||||
nametag = soup.select_one('div#pagetitle') or soup.select_one('div#storytitle')
|
||||
# logger.debug(nametag)
|
||||
if nametag:
|
||||
nametag.find('a').decompose()
|
||||
retval['name'] = stripHTML(nametag)
|
||||
# some have [ - ], some have ' by', some have both.
|
||||
# order matters.
|
||||
trailing_strip_list=['[ - ]',' by']
|
||||
for s in trailing_strip_list:
|
||||
# logger.debug(retval['name'])
|
||||
if retval['name'].endswith(s):
|
||||
# remove trailing s
|
||||
retval['name'] = retval['name'][:-len(s)].strip()
|
||||
summaryspan = soup.select_one("div#titleblock span.label") or soup.select_one("div#titleblock span.classification")
|
||||
# logger.debug(summaryspan)
|
||||
if summaryspan and stripHTML(summaryspan) == "Summary:":
|
||||
desc = ""
|
||||
c = summaryspan.nextSibling
|
||||
# logger.debug(c)
|
||||
# strings and tags that aren't <span class='label'>
|
||||
while c and not (isinstance(c,Tag) and c.name == 'span' and ('label' in c['class'] or 'classification' in c['class'])):
|
||||
# logger.debug(c)
|
||||
desc += unicode(c)
|
||||
c = c.nextSibling
|
||||
# logger.debug(c)
|
||||
if desc:
|
||||
# logger.debug(desc)
|
||||
# strip spaces and trailing <br> tags.
|
||||
desc = re.sub(r'( *<br/?>)+$','',desc.strip())
|
||||
# logger.debug(desc)
|
||||
retval['desc']=desc.strip()
|
||||
else:
|
||||
# some(1?) sites
|
||||
summarydiv = soup.select_one("div.summarytext") or soup.select_one("blockquote2") # fanfictalk.com
|
||||
summarydiv.name='div' # force name to div.
|
||||
if summarydiv:
|
||||
retval['desc']=summarydiv
|
||||
|
||||
# trying to get story urls for series from different
|
||||
# eFictions is a nightmare that the pre-existing
|
||||
# get_urls_from_html() handles well enough.
|
||||
# logger.debug(soup)
|
||||
retval['urllist']=get_urls_from_html(soup,
|
||||
url,
|
||||
configuration=self.configuration,
|
||||
normalize=normalize)
|
||||
except Exception as e:
|
||||
logger.debug("get_series_from_page for eFiction failed:%s"%e)
|
||||
retval = {}
|
||||
return retval
|
||||
|
||||
# Just for series, in case we choose to change how it's stored or represented later.
|
||||
def setSeries(self,name,num):
|
||||
if self.getConfig('collect_series'):
|
||||
|
|
|
|||
|
|
@ -63,7 +63,7 @@ class BaseXenForo2ForumAdapter(BaseXenForoForumAdapter):
|
|||
raise exceptions.FailedToLogin(self.url,"No username given. Set in personal.ini or enter when prompted.")
|
||||
|
||||
## need a login token.
|
||||
data = self._fetchUrl(self.getURLPrefix() + '/login',usecache=False)
|
||||
data = self._fetchUrl(self.getURLPrefix() + 'login',usecache=False)
|
||||
# logger.debug(data)
|
||||
# <input type="hidden" name="_xfToken" value="1556822458,710e5bf6fc87c67ea04ab56a910ac3ff" />
|
||||
find_token='<input type="hidden" name="_xfToken" value="'
|
||||
|
|
@ -71,10 +71,10 @@ class BaseXenForo2ForumAdapter(BaseXenForoForumAdapter):
|
|||
xftoken = xftoken[:xftoken.index('"')]
|
||||
params['remember'] = '1'
|
||||
params['_xfToken'] = xftoken
|
||||
params['_xfRedirect'] = self.getURLPrefix() + '/'
|
||||
params['_xfRedirect'] = self.getURLPrefix()
|
||||
|
||||
## https://forum.questionablequesting.com/login/login
|
||||
loginUrl = self.getURLPrefix() + '/login/login'
|
||||
loginUrl = self.getURLPrefix() + 'login/login'
|
||||
logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
|
||||
params['login']))
|
||||
|
||||
|
|
@ -225,7 +225,7 @@ class BaseXenForo2ForumAdapter(BaseXenForoForumAdapter):
|
|||
|
||||
def get_threadmark_range_url(self,tm_item,tmcat_num):
|
||||
fetcher = tm_item.find('div',{'data-xf-click':'threadmark-fetcher'})
|
||||
# logger.debug(fetcher)
|
||||
# logger.debug('data-fetchurl:%s'%fetcher)
|
||||
return self.getURLPrefix() + fetcher['data-fetchurl']
|
||||
|
||||
def get_threadmark_date(self,tm_item):
|
||||
|
|
|
|||
|
|
@ -43,21 +43,18 @@ class BaseXenForoForumAdapter(BaseSiteAdapter):
|
|||
#logger.info("init url: "+url)
|
||||
BaseSiteAdapter.__init__(self, config, url)
|
||||
|
||||
# get storyId from url--url validation guarantees query is only sid=1234
|
||||
self.story.setMetadata('storyId',self.parsedUrl.path.split('/',)[2])
|
||||
|
||||
# get storyId from url--url validation guarantees query correct
|
||||
m = re.match(self.getSiteURLPattern(),url)
|
||||
if m:
|
||||
#logger.debug("groupdict:%s"%m.groupdict())
|
||||
if m.group('anchorpost'):
|
||||
self.story.setMetadata('storyId',m.group('anchorpost'))
|
||||
self._setURL(self.getURLPrefix() + '/posts/'+m.group('anchorpost')+'/')
|
||||
self._setURL(self.getURLPrefix() + 'posts/'+m.group('anchorpost')+'/')
|
||||
else:
|
||||
self.story.setMetadata('storyId',m.group('id'))
|
||||
# normalized story URL.
|
||||
title = m.group('title') or ""
|
||||
self._setURL(self.getURLPrefix() + '/'+m.group('tp')+'/'+title+self.story.getMetadata('storyId')+'/')
|
||||
self._setURL(self.getURLPrefix() + m.group('tp')+'/'+title+self.story.getMetadata('storyId')+'/')
|
||||
else:
|
||||
raise exceptions.InvalidStoryURL(url,
|
||||
self.getSiteDomain(),
|
||||
|
|
@ -75,18 +72,23 @@ class BaseXenForoForumAdapter(BaseSiteAdapter):
|
|||
"Only needs to be overriden if has additional ini sections."
|
||||
return ['base_xenforoforum',cls.getConfigSection()]
|
||||
|
||||
@classmethod
|
||||
def getPathPrefix(cls):
|
||||
# The site's fixed path prefix. '/' for most
|
||||
return '/'
|
||||
|
||||
@classmethod
|
||||
def getURLPrefix(cls):
|
||||
# The site domain. Does have www here, if it uses it.
|
||||
return 'https://' + cls.getSiteDomain()
|
||||
return 'https://' + cls.getSiteDomain() + cls.getPathPrefix()
|
||||
|
||||
@classmethod
|
||||
def getSiteExampleURLs(cls):
|
||||
return cls.getURLPrefix()+"/threads/some-story-name.123456/ "+cls.getURLPrefix()+"/posts/123456/"
|
||||
return cls.getURLPrefix()+"threads/some-story-name.123456/ "+cls.getURLPrefix()+"posts/123456/"
|
||||
|
||||
def getSiteURLPattern(self):
|
||||
## need to accept http and https still.
|
||||
return re.escape(self.getURLPrefix()).replace("https","https?")+r"/(?P<tp>threads|posts)/(?P<title>.+\.)?(?P<id>\d+)/?[^#]*?(#?post-(?P<anchorpost>\d+))?$"
|
||||
return re.escape(self.getURLPrefix()).replace("https","https?")+r"(?P<tp>threads|posts)/(?P<title>.+\.)?(?P<id>\d+)/?[^#]*?(#?post-(?P<anchorpost>\d+))?$"
|
||||
|
||||
def _fetchUrlOpened(self, url,
|
||||
parameters=None,
|
||||
|
|
@ -119,12 +121,12 @@ class BaseXenForoForumAdapter(BaseSiteAdapter):
|
|||
|
||||
## moved from extract metadata to share with normalize_chapterurl.
|
||||
if not url.startswith('http'):
|
||||
url = self.getURLPrefix()+'/'+url
|
||||
url = self.getURLPrefix()+url
|
||||
|
||||
if ( url.startswith(self.getURLPrefix()) or
|
||||
url.startswith('http://'+self.getSiteDomain()) or
|
||||
url.startswith('https://'+self.getSiteDomain()) ) and \
|
||||
( '/posts/' in url or '/threads/' in url or 'showpost.php' in url or 'goto/post' in url):
|
||||
( self.getPathPrefix()+'posts/' in url or self.getPathPrefix()+'threads/' in url or 'showpost.php' in url or 'goto/post' in url):
|
||||
## brute force way to deal with SB's http->https change
|
||||
## when hardcoded http urls. Now assumes all
|
||||
## base_xenforoforum sites use https--true as of
|
||||
|
|
@ -132,10 +134,10 @@ class BaseXenForoForumAdapter(BaseSiteAdapter):
|
|||
url = url.replace('http://','https://')
|
||||
|
||||
# http://forums.spacebattles.com/showpost.php?p=4755532&postcount=9
|
||||
url = re.sub(r'showpost\.php\?p=([0-9]+)(&postcount=[0-9]+)?',r'/posts/\1/',url)
|
||||
url = re.sub(r'showpost\.php\?p=([0-9]+)(&postcount=[0-9]+)?',self.getPathPrefix()+r'posts/\1/',url)
|
||||
|
||||
# http://forums.spacebattles.com/goto/post?id=15222406#post-15222406
|
||||
url = re.sub(r'/goto/post\?id=([0-9]+)(#post-[0-9]+)?',r'/posts/\1/',url)
|
||||
url = re.sub(r'goto/post\?id=([0-9]+)(#post-[0-9]+)?',self.getPathPrefix()+r'posts/\1/',url)
|
||||
|
||||
url = re.sub(r'(^[\'"]+|[\'"]+$)','',url) # strip leading or trailing '" from incorrect quoting.
|
||||
url = re.sub(r'like$','',url) # strip 'like' if incorrect 'like' link instead of proper post URL.
|
||||
|
|
@ -147,24 +149,24 @@ class BaseXenForoForumAdapter(BaseSiteAdapter):
|
|||
## *correct* ones.
|
||||
# https://forums.sufficientvelocity.com/posts/39915/
|
||||
if '#post-' in url:
|
||||
url = self.getURLPrefix()+'/posts/'+url.split('#post-')[1]+'/'
|
||||
url = self.getURLPrefix()+'posts/'+url.split('#post-')[1]+'/'
|
||||
|
||||
## Same as above except for for case where author mistakenly
|
||||
## used the reply link instead of normal link to post.
|
||||
# "http://forums.spacebattles.com/threads/manager-worm-story-thread-iv.301602/reply?quote=15962513"
|
||||
# https://forums.spacebattles.com/posts/
|
||||
if 'reply?quote=' in url:
|
||||
url = self.getURLPrefix()+'/posts/'+url.split('reply?quote=')[1]+'/'
|
||||
url = self.getURLPrefix()+'posts/'+url.split('reply?quote=')[1]+'/'
|
||||
|
||||
## normalize named thread urls, too.
|
||||
# http://forums.sufficientvelocity.com/threads/harry-potter-and-the-not-fatal-at-all-cultural-exchange-program.330/
|
||||
url = re.sub(r'/threads/.*\.([0-9]+)/',r'/threads/\1/',url)
|
||||
url = re.sub(re.escape(self.getPathPrefix())+r'threads/.*\.([0-9]+)/',self.getPathPrefix()+r'threads/\1/',url)
|
||||
|
||||
is_chapter_url = True
|
||||
|
||||
## One person once put a threadmarks URL directly in an
|
||||
## index post and now we have to exclude it.
|
||||
if re.match(r".*/threads/[0-9]+/threadmarks",url):
|
||||
if re.match(r'.*'+re.escape(self.getPathPrefix())+'threads/[0-9]+/threadmarks',url):
|
||||
is_chapter_url = False
|
||||
|
||||
return (is_chapter_url,url)
|
||||
|
|
@ -174,7 +176,7 @@ class BaseXenForoForumAdapter(BaseSiteAdapter):
|
|||
## storyId, because this is called before story url has been
|
||||
## parsed.
|
||||
# logger.debug("pre--url:%s"%url)
|
||||
url = re.sub(r'/threads/.*\.(?P<id>[0-9]+)/',r'/threads/\g<id>/',url)
|
||||
url = re.sub(re.escape(self.getPathPrefix())+r'threads/.*\.(?P<id>[0-9]+)/',self.getPathPrefix()+r'threads/\g<id>/',url)
|
||||
# logger.debug("post-url:%s"%url)
|
||||
return url
|
||||
|
||||
|
|
@ -206,10 +208,10 @@ class BaseXenForoForumAdapter(BaseSiteAdapter):
|
|||
params['register'] = '0'
|
||||
params['cookie_check'] = '1'
|
||||
params['_xfToken'] = ''
|
||||
params['redirect'] = self.getURLPrefix() + '/'
|
||||
params['redirect'] = self.getURLPrefix()
|
||||
|
||||
## https://forum.questionablequesting.com/login/login
|
||||
loginUrl = self.getURLPrefix() + '/login/login'
|
||||
loginUrl = self.getURLPrefix() + 'login/login'
|
||||
logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
|
||||
params['login']))
|
||||
|
||||
|
|
@ -294,7 +296,7 @@ class BaseXenForoForumAdapter(BaseSiteAdapter):
|
|||
elif threadmarksa['href'].startswith('/'):
|
||||
href = 'https://'+self.getSiteDomain()+threadmarksa['href']
|
||||
else:
|
||||
href = self.getURLPrefix()+'/'+threadmarksa['href']
|
||||
href = self.getURLPrefix()+threadmarksa['href']
|
||||
threadmarkgroups[tmcat_name]=self.fetch_threadmarks(href,
|
||||
tmcat_name,
|
||||
tmcat_num)
|
||||
|
|
@ -385,7 +387,7 @@ class BaseXenForoForumAdapter(BaseSiteAdapter):
|
|||
date = self.get_threadmark_date(tm_item)
|
||||
words,kwords = self.get_threadmark_words(tm_item)
|
||||
if 'http' not in url:
|
||||
url = self.getURLPrefix()+"/"+url
|
||||
url = self.getURLPrefix()+url
|
||||
# logger.debug("%s. %s"%(tmcat_index,name))
|
||||
threadmarks.append({"tmcat_name":tmcat_name,
|
||||
"tmcat_num":tmcat_num,
|
||||
|
|
@ -452,7 +454,7 @@ class BaseXenForoForumAdapter(BaseSiteAdapter):
|
|||
# use BeautifulSoup HTML parser to make everything easier to find.
|
||||
topsoup = souptag = self.make_soup(data)
|
||||
|
||||
if '#' not in useurl and '/posts/' not in useurl:
|
||||
if '#' not in useurl and self.getPathPrefix()+'posts/' not in useurl:
|
||||
self._setURL(useurl) ## for when threadmarked thread name changes.
|
||||
|
||||
self.parse_title(topsoup)
|
||||
|
|
@ -602,7 +604,7 @@ class BaseXenForoForumAdapter(BaseSiteAdapter):
|
|||
def parse_author(self,souptag):
|
||||
a = souptag.find('h3',{'class':'userText'}).find('a')
|
||||
self.story.addToList('authorId',a['href'].split('/')[1])
|
||||
authorUrl = self.getURLPrefix()+'/'+a['href']
|
||||
authorUrl = self.getURLPrefix()+a['href']
|
||||
self.story.addToList('authorUrl',authorUrl)
|
||||
self.story.addToList('author',a.text)
|
||||
|
||||
|
|
@ -654,7 +656,7 @@ class BaseXenForoForumAdapter(BaseSiteAdapter):
|
|||
def get_cache_post(self,postid):
|
||||
## saved using original 'post-99999' id for key.
|
||||
postid=unicode(postid) # thank you, Py3.
|
||||
if '/posts/' in postid:
|
||||
if self.getPathPrefix()+'posts/' in postid:
|
||||
## allows chapter urls to be passed in directly.
|
||||
# assumed normalized to /posts/1234/
|
||||
postid = "post-"+postid.split('/')[-2]
|
||||
|
|
@ -676,7 +678,7 @@ class BaseXenForoForumAdapter(BaseSiteAdapter):
|
|||
# first post when always_include_first_post.
|
||||
if ( self.reader and
|
||||
self.getConfig("use_reader_mode",True) and
|
||||
'/threads/' not in url and
|
||||
self.getPathPrefix()+'threads/' not in url and
|
||||
(index > 0 or not self.getConfig('always_include_first_post')) ):
|
||||
logger.debug("Using reader mode")
|
||||
# in case it changes:
|
||||
|
|
@ -718,7 +720,7 @@ class BaseXenForoForumAdapter(BaseSiteAdapter):
|
|||
# page. looking for it in cache reuses code in
|
||||
# cache_posts that finds post tags.
|
||||
souptag = self.get_cache_post(url)
|
||||
if not souptag and '/threads/' in url: # first post uses /thread/ URL.
|
||||
if not souptag and self.getPathPrefix()+'threads/' in url: # first post uses /thread/ URL.
|
||||
souptag = self.get_first_post(topsoup)
|
||||
|
||||
# remove <div class="baseHtml noticeContent"> because it can
|
||||
|
|
@ -729,10 +731,10 @@ class BaseXenForoForumAdapter(BaseSiteAdapter):
|
|||
postbody = self.get_post_body(souptag)
|
||||
|
||||
# XenForo uses <base href="https://forums.spacebattles.com/" />
|
||||
return self.utf8FromSoup(self.getURLPrefix()+'/',postbody)
|
||||
return self.utf8FromSoup(self.getURLPrefix(),postbody)
|
||||
|
||||
def make_reader_url(self,tmcat_num,reader_page_num):
|
||||
return self.getURLPrefix()+'/threads/'+self.story.getMetadata('storyId')+'/'+tmcat_num+'/reader?page='+unicode(reader_page_num)
|
||||
return self.getURLPrefix()+'threads/'+self.story.getMetadata('storyId')+'/'+tmcat_num+'/reader?page='+unicode(reader_page_num)
|
||||
|
||||
def get_quote_expand_tag(self,soup):
|
||||
return soup.find_all('div',{'class':'quoteExpand'})
|
||||
|
|
|
|||
|
|
@ -40,7 +40,7 @@ else: # > 3.0
|
|||
def pickle_load(f):
|
||||
return pickle.load(f,encoding="bytes")
|
||||
|
||||
version="3.23.4"
|
||||
version="3.24.2"
|
||||
os.environ['CURRENT_VERSION_ID']=version
|
||||
|
||||
global_cache = 'global_cache'
|
||||
|
|
@ -125,7 +125,7 @@ def main(argv=None,
|
|||
help='Exclude list of chapters("zchapters") from metadata stdout output. No effect without --meta-only or --json-meta flags', )
|
||||
parser.add_option('-j', '--json-meta',
|
||||
action='store_true', dest='jsonmeta',
|
||||
help='Output metadata as JSON with download, or with --meta-only flag. (Only JSON will be output with --meta-only flag.)', )
|
||||
help='Output metadata as JSON with download, or with --meta-only flag. (Only JSON will be output with --meta-only flag.) Also now series name and desc if available with --list', )
|
||||
parser.add_option('--no-output',
|
||||
action='store_true', dest='nooutput',
|
||||
help='Do not download chapters and do not write output file. Intended for testing and with --meta-only.', )
|
||||
|
|
@ -249,25 +249,42 @@ def main(argv=None,
|
|||
parser.print_help();
|
||||
return
|
||||
|
||||
if options.save_cache:
|
||||
try:
|
||||
with open(global_cache,'rb') as jin:
|
||||
options.pagecache = pickle_load(jin)
|
||||
options.cookiejar = cl.LWPCookieJar()
|
||||
options.cookiejar.load(global_cookies)
|
||||
except Exception as e:
|
||||
## This is not uncommon, will happen when starting a new
|
||||
## cache, for example.
|
||||
print("Didn't load --save-cache %s"%e)
|
||||
|
||||
if options.list:
|
||||
configuration = get_configuration(options.list,
|
||||
passed_defaultsini,
|
||||
passed_personalini,options)
|
||||
retlist = get_urls_from_page(options.list, configuration)
|
||||
print('\n'.join(retlist))
|
||||
frompage = get_urls_from_page(options.list, configuration)
|
||||
if options.jsonmeta:
|
||||
import json
|
||||
print(json.dumps(frompage, sort_keys=True,
|
||||
indent=2, separators=(',', ':')))
|
||||
else:
|
||||
retlist = frompage.get('urllist',[])
|
||||
print('\n'.join(retlist))
|
||||
|
||||
if options.normalize:
|
||||
configuration = get_configuration(options.normalize,
|
||||
passed_defaultsini,
|
||||
passed_personalini,options)
|
||||
retlist = get_urls_from_page(options.normalize, configuration,normalize=True)
|
||||
retlist = get_urls_from_page(options.normalize, configuration,normalize=True).get('urllist',[])
|
||||
print('\n'.join(retlist))
|
||||
|
||||
if options.downloadlist:
|
||||
configuration = get_configuration(options.downloadlist,
|
||||
passed_defaultsini,
|
||||
passed_personalini,options)
|
||||
retlist = get_urls_from_page(options.downloadlist, configuration)
|
||||
retlist = get_urls_from_page(options.downloadlist, configuration).get('urllist',[])
|
||||
urls.extend(retlist)
|
||||
|
||||
if options.imaplist or options.downloadimap:
|
||||
|
|
@ -298,15 +315,6 @@ def main(argv=None,
|
|||
#print("url: (%s)"%url)
|
||||
urls.append(url)
|
||||
|
||||
if options.save_cache:
|
||||
try:
|
||||
with open(global_cache,'rb') as jin:
|
||||
options.pagecache = pickle_load(jin)
|
||||
options.cookiejar = cl.LWPCookieJar()
|
||||
options.cookiejar.load(global_cookies)
|
||||
except Exception as e:
|
||||
print("Didn't load --save-cache %s"%e)
|
||||
|
||||
if not list_only:
|
||||
if len(urls) < 1:
|
||||
print("No valid story URLs found")
|
||||
|
|
@ -323,12 +331,6 @@ def main(argv=None,
|
|||
raise
|
||||
print("URL(%s) Failed: Exception (%s). Run URL individually for more detail."%(url,e))
|
||||
|
||||
# Saved in configurable.py now.
|
||||
# if options.save_cache:
|
||||
# with open('global_cache','wb') as jout:
|
||||
# pickle.dump(options.pagecache,jout,protocol=2)
|
||||
# options.cookiejar.save('global_cookies')
|
||||
|
||||
# make rest a function and loop on it.
|
||||
def do_download(arg,
|
||||
options,
|
||||
|
|
@ -374,19 +376,6 @@ def do_download(arg,
|
|||
|
||||
adapter = adapters.getAdapter(configuration, url)
|
||||
|
||||
## Share pagecache and cookiejar between multiple downloads.
|
||||
if not hasattr(options,'pagecache'):
|
||||
options.pagecache = configuration.get_empty_pagecache()
|
||||
if not hasattr(options,'cookiejar'):
|
||||
options.cookiejar = configuration.get_empty_cookiejar()
|
||||
if options.save_cache:
|
||||
save_cache = global_cache
|
||||
save_cookies = global_cookies
|
||||
else:
|
||||
save_cache = save_cookies = None
|
||||
configuration.set_pagecache(options.pagecache,save_cache)
|
||||
configuration.set_cookiejar(options.cookiejar,save_cookies)
|
||||
|
||||
# url[begin-end] overrides CLI option if present.
|
||||
if ch_begin or ch_end:
|
||||
adapter.setChaptersRange(ch_begin, ch_end)
|
||||
|
|
@ -621,6 +610,19 @@ def get_configuration(url,
|
|||
if options.progressbar:
|
||||
configuration.set('overrides','progressbar','true')
|
||||
|
||||
## Share pagecache and cookiejar between multiple downloads.
|
||||
if not hasattr(options,'pagecache'):
|
||||
options.pagecache = configuration.get_empty_pagecache()
|
||||
if not hasattr(options,'cookiejar'):
|
||||
options.cookiejar = configuration.get_empty_cookiejar()
|
||||
if options.save_cache:
|
||||
save_cache = global_cache
|
||||
save_cookies = global_cookies
|
||||
else:
|
||||
save_cache = save_cookies = None
|
||||
configuration.set_pagecache(options.pagecache,save_cache)
|
||||
configuration.set_cookiejar(options.cookiejar,save_cookies)
|
||||
|
||||
return configuration
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
|
|
|||
|
|
@ -215,7 +215,7 @@ def get_valid_set_options():
|
|||
'fix_fimf_blockquotes':(['fimfiction.net'],None,boollist),
|
||||
'fail_on_password':(['fimfiction.net'],None,boollist),
|
||||
'keep_prequel_in_description':(['fimfiction.net'],None,boollist),
|
||||
'include_author_notes':(['fimfiction.net'],None,boollist),
|
||||
'include_author_notes':(['fimfiction.net','royalroad.com'],None,boollist),
|
||||
'do_update_hook':(['fimfiction.net',
|
||||
'archiveofourown.org'],None,boollist),
|
||||
'always_login':(['archiveofourown.org']+base_xenforo_list,None,boollist),
|
||||
|
|
|
|||
|
|
@ -588,6 +588,14 @@ storynotes_label:Story Notes
|
|||
add_to_extra_titlepage_entries:,storynotes
|
||||
|
||||
[base_xenforoforum]
|
||||
## Some sites require login for some stories
|
||||
#username:YourName
|
||||
#password:yourpassword
|
||||
|
||||
## XenForo sites require login for some stories, but don't report that
|
||||
## to FFF. To download those, set your username, password and set
|
||||
## always_login:false
|
||||
#always_login:false
|
||||
|
||||
## We've been requested by the site(s) admin to rein in hits. If you
|
||||
## download fewer stories less often you can likely get by with
|
||||
|
|
@ -1244,65 +1252,6 @@ extra_titlepage_entries:eroticatags,disclaimer
|
|||
#username:YourName
|
||||
#password:yourpassword
|
||||
|
||||
[archive.hpfanfictalk.com]
|
||||
## Some sites also require the user to confirm they are adult for
|
||||
## adult content. In commandline version, this should go in your
|
||||
## personal.ini, not defaults.ini.
|
||||
#is_adult:true
|
||||
|
||||
add_to_extra_valid_entries:,themes,inclusivity,house,
|
||||
series00,series00Url,series00HTML,
|
||||
series01,series01Url,series01HTML,
|
||||
series02,series02Url,series02HTML,
|
||||
series03,series03Url,series03HTML,
|
||||
series04,series04Url,series04HTML,
|
||||
series05,series05Url,series05HTML,
|
||||
series06,series06Url,series06HTML,
|
||||
series07,series07Url,series07HTML,
|
||||
series08,series08Url,series08HTML,
|
||||
series09,series09Url,series09HTML,
|
||||
|
||||
## Assume entryUrl, apply to "<a class='%slink' href='%s'>%s</a>" to
|
||||
## make entryHTML.
|
||||
make_linkhtml_entries:series00,series01,series02,series03,series04,
|
||||
series05,series06,series07,series08,series09
|
||||
|
||||
themes_label:Themes
|
||||
inclusivity_label:Inclusivity
|
||||
house_label:HPFT Forum House
|
||||
|
||||
## series00 will be the same as common metadata series.
|
||||
series00HTML_label:Series
|
||||
series01HTML_label:Additional Series
|
||||
series02HTML_label:Additional Series
|
||||
series03HTML_label:Additional Series
|
||||
series04HTML_label:Additional Series
|
||||
series05HTML_label:Additional Series
|
||||
series06HTML_label:Additional Series
|
||||
series07HTML_label:Additional Series
|
||||
series08HTML_label:Additional Series
|
||||
series09HTML_label:Additional Series
|
||||
|
||||
## Try to collect series names and numbers of this story in those
|
||||
## series. This lets us turn it on and off by site without keeping a
|
||||
## lengthy titlepage_entries per site and prevents it updating in the
|
||||
## plugin.
|
||||
collect_series: true
|
||||
|
||||
add_to_extra_titlepage_entries:,series01HTML,series02HTML,series03HTML,
|
||||
series04HTML,series05HTML,series06HTML,series07HTML,series08HTML,series09HTML
|
||||
|
||||
## archive.hpfanfictalk.com takes margins away, even from p tags, by
|
||||
## default. So authors have to either include extra br/p tags or
|
||||
## their own styles. These allow for both, but leave you at the mercy
|
||||
## of author CSS.
|
||||
add_to_output_css:
|
||||
* {
|
||||
margin: 0;
|
||||
padding: 0;
|
||||
}
|
||||
add_to_keep_html_attrs:,style
|
||||
|
||||
[archive.shriftweb.org]
|
||||
website_encodings:Windows-1252,utf8,iso-8859-1
|
||||
|
||||
|
|
@ -1709,6 +1658,72 @@ make_linkhtml_entries:translators,betas
|
|||
## can change it.
|
||||
include_in_category:fandoms
|
||||
|
||||
[fanfictalk.com]
|
||||
## Some sites also require the user to confirm they are adult for
|
||||
## adult content. In commandline version, this should go in your
|
||||
## personal.ini, not defaults.ini.
|
||||
#is_adult:true
|
||||
|
||||
add_to_extra_valid_entries:,tropes,themes,representation,inclusivity,
|
||||
house,storytype,contentwarnings,
|
||||
series00,series00Url,series00HTML,
|
||||
series01,series01Url,series01HTML,
|
||||
series02,series02Url,series02HTML,
|
||||
series03,series03Url,series03HTML,
|
||||
series04,series04Url,series04HTML,
|
||||
series05,series05Url,series05HTML,
|
||||
series06,series06Url,series06HTML,
|
||||
series07,series07Url,series07HTML,
|
||||
series08,series08Url,series08HTML,
|
||||
series09,series09Url,series09HTML,
|
||||
|
||||
# fields changed name with domain name change.
|
||||
include_in_inclusivity:representation
|
||||
include_in_themes:tropes
|
||||
|
||||
## Assume entryUrl, apply to "<a class='%slink' href='%s'>%s</a>" to
|
||||
## make entryHTML.
|
||||
make_linkhtml_entries:series00,series01,series02,series03,series04,
|
||||
series05,series06,series07,series08,series09
|
||||
|
||||
tropes_label:Tropes
|
||||
representation_label:Representation
|
||||
house_label:HPFT Forum House
|
||||
storytype_label:Story Type
|
||||
contentwarnings_label:Content Warnings
|
||||
|
||||
## series00 will be the same as common metadata series.
|
||||
series00HTML_label:Series
|
||||
series01HTML_label:Additional Series
|
||||
series02HTML_label:Additional Series
|
||||
series03HTML_label:Additional Series
|
||||
series04HTML_label:Additional Series
|
||||
series05HTML_label:Additional Series
|
||||
series06HTML_label:Additional Series
|
||||
series07HTML_label:Additional Series
|
||||
series08HTML_label:Additional Series
|
||||
series09HTML_label:Additional Series
|
||||
|
||||
## Try to collect series names and numbers of this story in those
|
||||
## series. This lets us turn it on and off by site without keeping a
|
||||
## lengthy titlepage_entries per site and prevents it updating in the
|
||||
## plugin.
|
||||
collect_series: true
|
||||
|
||||
#add_to_extra_titlepage_entries:,tropes,themes,representation,inclusivity,house,storytype,contentwarnings,series01HTML,series02HTML,series03HTML,
|
||||
# series04HTML,series05HTML,series06HTML,series07HTML,series08HTML,series09HTML
|
||||
|
||||
## fanfictalk.com takes margins away, even from p tags, by default.
|
||||
## So authors have to either include extra br/p tags or their own
|
||||
## styles. These allow for both, but leave you at the mercy of author
|
||||
## CSS.
|
||||
add_to_output_css:
|
||||
* {
|
||||
margin: 0;
|
||||
padding: 0;
|
||||
}
|
||||
add_to_keep_html_attrs:,style
|
||||
|
||||
[fanfiction-junkies.de]
|
||||
website_encodings:Windows-1252,utf8
|
||||
|
||||
|
|
@ -3035,6 +3050,17 @@ sitetags_label:Site tags
|
|||
## Attempt to fix p and br excess from HTML in great many stories
|
||||
fix_excess_space:false
|
||||
|
||||
[www.novelupdates.cc]
|
||||
## Note that novelupdates.cc != novelupdates.com
|
||||
|
||||
## There is reason to believe that novelupdates.cc may be a
|
||||
## replacement for wuxiaworld.co, but currently both exist with
|
||||
## different data.
|
||||
|
||||
## When dedup_order_chapter_list:true, use a heuristic algorithm
|
||||
## specific to novelupdates.cc order and dedup chapters.
|
||||
dedup_order_chapter_list:false
|
||||
|
||||
[www.phoenixsong.net]
|
||||
## Some sites require login (or login for some rated stories) The
|
||||
## program can prompt you, or you can save it in config. In
|
||||
|
|
@ -3065,7 +3091,7 @@ sitetags_label:Site Tags
|
|||
## Site dedicated to these categories/characters/ships
|
||||
extracategories:Harry Potter
|
||||
|
||||
add_to_include_subject_tags:,takesplaces,snapeflavours,sitetags
|
||||
#add_to_include_subject_tags:,takesplaces,snapeflavours,sitetags
|
||||
#add_to_extra_titlepage_entries:,stars,reviews,reads,takesplaces,snapeflavours,sitetags
|
||||
|
||||
website_encodings:Windows-1252,utf8
|
||||
|
|
@ -3112,6 +3138,11 @@ extratags:
|
|||
## add_to_output_css example for [base_xenforoforum:epub].
|
||||
#legend_spoilers:true
|
||||
|
||||
## royalroad.com chapters can have author notes attached to them.
|
||||
## Setting include_author_notes:true will include them with the
|
||||
## chapter text.
|
||||
#include_author_notes:true
|
||||
|
||||
[www.scarvesandcoffee.net]
|
||||
## Some sites do not require a login, but do require the user to
|
||||
## confirm they are adult for adult content. In commandline version,
|
||||
|
|
@ -3130,6 +3161,12 @@ views_label:Views
|
|||
averageWords_label:Average Words (Chapter)
|
||||
add_to_titlepage_entries:,views, averageWords
|
||||
|
||||
## Scribble Hub chapters can include author's notes and news blocks. We've
|
||||
## traditionally included them all in the chapter text, but this allows
|
||||
## you to customize which you include. Copy this parameter to your
|
||||
## personal.ini and list the ones you don't want.
|
||||
#exclude_notes:authornotes,newsboxes
|
||||
|
||||
[www.siye.co.uk]
|
||||
## Site dedicated to these categories/characters/ships
|
||||
extracategories:Harry Potter
|
||||
|
|
@ -3170,6 +3207,12 @@ extracategories:Lord of the Rings
|
|||
|
||||
website_encodings:Windows-1252,utf8
|
||||
|
||||
[www.the-sietch.com]
|
||||
## see [base_xenforoforum]
|
||||
|
||||
## the-sietch.com shows more posts per reader page than other XF sites.
|
||||
reader_posts_per_page:15
|
||||
|
||||
[www.thedelphicexpanse.com]
|
||||
## Site dedicated to these categories/characters/ships
|
||||
extracategories:Star Trek: Enterprise
|
||||
|
|
@ -3352,12 +3395,6 @@ website_encodings:Windows-1252,utf8
|
|||
## specific to wuxiaworld.co order and dedup chapters.
|
||||
dedup_order_chapter_list:false
|
||||
|
||||
[www.novelupdates.cc]
|
||||
## Note that novelupdates.cc != novelupdates.com
|
||||
## When dedup_order_chapter_list:true, use a heuristic algorithm
|
||||
## specific to novelupdates.cc order and dedup chapters.
|
||||
dedup_order_chapter_list:false
|
||||
|
||||
[www.wuxiaworld.com]
|
||||
user_agent:Mozilla/5.0
|
||||
## Authors on wuxiaworld.com create their own index pages, so it's not
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2015 Fanficdownloader team, 2018 FanFicFare team
|
||||
# Copyright 2015 Fanficdownloader team, 2020 FanFicFare team
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
|
|
@ -31,7 +31,7 @@ from .six import ensure_str
|
|||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
from bs4 import BeautifulSoup
|
||||
from bs4 import BeautifulSoup, Tag
|
||||
from .gziphttp import GZipProcessor
|
||||
|
||||
from . import adapters
|
||||
|
|
@ -39,87 +39,51 @@ from .configurable import Configuration
|
|||
from .exceptions import UnknownSite, FetchEmailFailed
|
||||
|
||||
def get_urls_from_page(url,configuration=None,normalize=False):
|
||||
|
||||
if not configuration:
|
||||
configuration = Configuration(["test1.com"],"EPUB",lightweight=True)
|
||||
|
||||
data = None
|
||||
adapter = None
|
||||
try:
|
||||
adapter = adapters.getAdapter(configuration,url,anyurl=True)
|
||||
|
||||
# special stuff to log into archiveofourown.org, if possible.
|
||||
# Unlike most that show the links to 'adult' stories, but protect
|
||||
# them, AO3 doesn't even show them if not logged in. Only works
|
||||
# with saved user/pass--not going to prompt for list.
|
||||
if 'archiveofourown.org' in url:
|
||||
if adapter.getConfig("username"):
|
||||
if adapter.getConfig("is_adult"):
|
||||
if '?' in url:
|
||||
addurl = "&view_adult=true"
|
||||
else:
|
||||
addurl = "?view_adult=true"
|
||||
else:
|
||||
addurl=""
|
||||
# just to get an authenticity_token.
|
||||
data = adapter._fetchUrl(url+addurl)
|
||||
# login the session.
|
||||
adapter.performLogin(url,data)
|
||||
# get the list page with logged in session.
|
||||
|
||||
if 'fimfiction.net' in url and adapter.getConfig("is_adult"):
|
||||
data = adapter._fetchUrl(url)
|
||||
adapter.set_adult_cookie()
|
||||
|
||||
if 'tthfanfic.org' in url and adapter.getConfig("is_adult"):
|
||||
## Simple fetch works in testing, but actual pages use a
|
||||
## POST and has a 'ctkn' value, so we do too.
|
||||
# adapter._fetchUrl("https://www.tthfanfic.org/setmaxrating.php?sitemaxrating=5")
|
||||
adapter.setSiteMaxRating(url)
|
||||
|
||||
# this way it uses User-Agent or other special settings.
|
||||
data = adapter._fetchUrl(url,usecache=False)
|
||||
return adapter.get_urls_from_page(url,normalize)
|
||||
except UnknownSite:
|
||||
# no adapter with anyurl=True, must be a random site.
|
||||
opener = build_opener(HTTPCookieProcessor(),GZipProcessor())
|
||||
data = opener.open(url).read()
|
||||
return {'urllist':get_urls_from_html(data,url,configuration,normalize)}
|
||||
return {}
|
||||
|
||||
# kludge because I don't see it on enough sites to be worth generalizing yet.
|
||||
restrictsearch=None
|
||||
if 'scarvesandcoffee.net' in url:
|
||||
restrictsearch=('div',{'id':'mainpage'})
|
||||
|
||||
return get_urls_from_html(data,url,configuration,normalize,restrictsearch)
|
||||
|
||||
def get_urls_from_html(data,url=None,configuration=None,normalize=False,restrictsearch=None,email=False):
|
||||
def get_urls_from_html(data,url=None,configuration=None,normalize=False,email=False):
|
||||
logger.debug("get_urls_from_html")
|
||||
urls = collections.OrderedDict()
|
||||
|
||||
if not configuration:
|
||||
configuration = Configuration(["test1.com"],"EPUB",lightweight=True)
|
||||
|
||||
## soup and re-soup because BS4/html5lib is more forgiving of
|
||||
## incorrectly nested tags that way.
|
||||
soup = BeautifulSoup(unicode(BeautifulSoup(data,"html5lib")),"html5lib")
|
||||
if restrictsearch:
|
||||
soup = soup.find(*restrictsearch)
|
||||
#logger.debug("restrict search:%s"%soup)
|
||||
if isinstance(data,(BeautifulSoup,Tag)):
|
||||
logger.debug("Using pre-made soup")
|
||||
soup = data
|
||||
else:
|
||||
## soup and re-soup because BS4/html5lib is more forgiving of
|
||||
## incorrectly nested tags that way.
|
||||
logger.debug("dbl souping")
|
||||
soup = BeautifulSoup(unicode(BeautifulSoup(data,"html5lib")),"html5lib")
|
||||
|
||||
for a in soup.findAll('a'):
|
||||
if a.has_attr('href'):
|
||||
#logger.debug("a['href']:%s"%a['href'])
|
||||
# logger.debug("a['href']:%s"%a['href'])
|
||||
href = form_url(url,a['href'])
|
||||
#logger.debug("1 urlhref:%s"%href)
|
||||
# logger.debug("1 urlhref:%s"%href)
|
||||
href = cleanup_url(href,email)
|
||||
try:
|
||||
#logger.debug("2 urlhref:%s"%href)
|
||||
# logger.debug("2 urlhref:%s"%href)
|
||||
adapter = adapters.getAdapter(configuration,href)
|
||||
#logger.debug("found adapter")
|
||||
# logger.debug("found adapter")
|
||||
if adapter.story.getMetadata('storyUrl') not in urls:
|
||||
urls[adapter.story.getMetadata('storyUrl')] = [href]
|
||||
else:
|
||||
urls[adapter.story.getMetadata('storyUrl')].append(href)
|
||||
# logger.debug("adapter storyUrl:%s"%adapter.story.getMetadata('storyUrl'))
|
||||
except Exception as e:
|
||||
#logger.debug e
|
||||
# logger.debug(e)
|
||||
pass
|
||||
|
||||
# Simply return the longest URL with the assumption that it contains the
|
||||
|
|
|
|||
|
|
@ -1284,7 +1284,7 @@ class Story(Configurable):
|
|||
if cover and cover_big_enough:
|
||||
if len(self.imgtuples) > 0 and 'cover' in self.imgtuples[0]['newsrc']:
|
||||
# remove existing cover, if there is one.
|
||||
self.imgsizes[len(self.imgtuples[0]['data'])].remove(0)
|
||||
# could have only come from first image and is assumed index 0.
|
||||
del self.imgurls[0]
|
||||
del self.imgtuples[0]
|
||||
self.imgurls.insert(0,imgurl)
|
||||
|
|
@ -1292,7 +1292,10 @@ class Story(Configurable):
|
|||
self.cover=newsrc
|
||||
self.setMetadata('cover_image','specific')
|
||||
self.imgtuples.insert(0,{'newsrc':newsrc,'mime':mime,'data':data})
|
||||
self.imgsizes[len(data)].append(0)
|
||||
## *Don't* include cover in imgsizes because it can be
|
||||
## replaced by Calibre etc. So don't re-use it.
|
||||
## Also saves removing it above.
|
||||
# self.imgsizes[len(data)].append(0)
|
||||
else:
|
||||
if self.getConfig('dedup_img_files',False):
|
||||
same_sz_imgs = self.imgsizes[len(data)]
|
||||
|
|
@ -1319,6 +1322,9 @@ class Story(Configurable):
|
|||
self.setMetadata('cover_image','first')
|
||||
self.imgtuples.append({'newsrc':newsrc,'mime':mime,'data':data})
|
||||
self.imgurls.append(imgurl)
|
||||
## *Don't* include cover in imgsizes because it can be
|
||||
## replaced by Calibre etc. So don't re-use it.
|
||||
# self.imgsizes[len(data)].append(len(self.imgtuples)-1)
|
||||
|
||||
newsrc = "images/%s-%s.%s"%(
|
||||
prefix,
|
||||
|
|
|
|||
2
setup.py
2
setup.py
|
|
@ -27,7 +27,7 @@ setup(
|
|||
name=package_name,
|
||||
|
||||
# Versions should comply with PEP440.
|
||||
version="3.23.4",
|
||||
version="3.24.2",
|
||||
|
||||
description='A tool for downloading fanfiction to eBook formats',
|
||||
long_description=long_description,
|
||||
|
|
|
|||
Loading…
Reference in a new issue