Compare commits

...

78 commits

Author SHA1 Message Date
Jim Miller
a172a7bd2b Bump Test Version 4.57.7 2026-05-07 13:54:08 -05:00
Jim Miller
ab103dce6e browsercache_sqldb: Better share_open and read-only. #1341 2026-05-07 13:54:02 -05:00
Jim Miller
892e9207f0 Bump Test Version 4.57.6 2026-05-06 19:53:58 -05:00
Jim Miller
b4e392fae1 browsercache_sqldb: Use share_open for windows file locking. #1341 2026-05-06 19:53:44 -05:00
Jim Miller
d9525d9726 Bump Test Version 4.57.5 2026-05-06 13:22:28 -05:00
Jim Miller
cb77b12754 Adding browsercache_sqldb for Yet Another caching scheme in Chrome. #1341 2026-05-06 13:22:22 -05:00
Jim Miller
b41a633821 Bump Test Version 4.57.4 2026-05-05 08:11:07 -05:00
Jim Miller
50c8db2992 browsercache_simple: Tweak index file size check. #1341 2026-05-05 08:10:59 -05:00
Jim Miller
ef6dd99bfe Bump Test Version 4.57.3 2026-05-04 15:05:25 -05:00
Jim Miller
59796ff537 Add debug out to Browser Cache cache dir checking #1341 2026-05-04 15:05:13 -05:00
Jim Miller
8ee0a6e898 Bump Test Version 4.57.2 2026-05-03 09:06:51 -05:00
Jim Miller
c53fc362bd Include genre/category in defaults.ini when include_in_X for extragenres/extracategories 2026-05-03 09:06:44 -05:00
Jim Miller
c87cfc1057 adapter_fanficauthorsnet: Domains changed from .nsns to -nsns 2026-05-01 10:10:37 -05:00
Jim Miller
6ee151c90a Bump Release Version 4.57.0 2026-05-01 09:38:27 -05:00
Jim Miller
db01c828a0 Update translations. 2026-05-01 09:37:13 -05:00
Jim Miller
4d03874f06 Fix a bad comment-out 2026-04-29 15:42:59 -05:00
Jim Miller
36f56483e6 Bump Test Version 4.56.10 2026-04-29 13:01:28 -05:00
Jim Miller
18e45a403b PI Anthology: Reuse epub cover if there is one. 2026-04-29 13:01:22 -05:00
Jim Miller
2e25172ba3 adapter_scribblehubcom: Update ajax call for chapters data. Didn't fix #1339 but change noted 3+ years ago 2026-04-29 10:15:26 -05:00
Jim Miller
65e3fd562b Update translations. 2026-04-27 16:53:06 -05:00
Jim Miller
7089bf6689 Bump Test Version 4.56.9 2026-04-21 15:02:05 -05:00
Jim Miller
061dc1333f PI: Correct Series field url link when setanthologyseries 2026-04-21 15:01:58 -05:00
Jim Miller
0a7fb5c090 Bump Test Version 4.56.8 2026-04-19 14:08:29 -05:00
Jim Miller
cf02f729ae adapter_literotica: Fix for numeric tag value from json. #1336 2026-04-19 14:08:21 -05:00
Jim Miller
730c4f77f9 Bump Test Version 4.56.7 2026-04-19 09:33:07 -05:00
Jim Miller
c02da29cbd Added strings for translation 2026-04-19 09:33:00 -05:00
Jim Miller
b87d796221 PI: Add Fix Series Case setting for #1338 2026-04-19 09:30:15 -05:00
Jim Miller
436370fe5b Done profiling for now 2026-04-19 09:03:10 -05:00
Jim Miller
ac77f31bc2 Move NotGoingToDownload to exceptions.py #1337 2026-04-19 09:02:32 -05:00
Jim Miller
16f2c74e4b Bump Test Version 4.56.6 2026-04-18 13:47:51 -05:00
praschke
af5c2aa0bc adapter_kakuyomujp: site update 2026-04-18 13:47:14 -05:00
Jim Miller
31dec5b62d Bump Test Version 4.56.5 2026-04-18 12:58:56 -05:00
Jim Miller
97d37fcfc1 fix_relative_text_links: Allow hrefs to name anchors as well as id. 2026-04-18 12:58:46 -05:00
Jim Miller
c730aa2f68 Bump Test Version 4.56.4 2026-04-17 10:22:20 -05:00
Jim Miller
4e2e359dee PI Anthologies: Only put status in tags if in include_subject_tags. Closes #1332 2026-04-17 10:22:13 -05:00
Jim Miller
bb96049934 Remove some debug 2026-04-16 14:27:48 -05:00
Jim Miller
84965ef25f Bump Test Version 4.56.3 2026-04-12 21:20:09 -05:00
Jim Miller
348d129a1e adapter_ficwadcom: Detect missing username as well as failed login #1330 2026-04-12 21:05:42 -05:00
Jim Miller
4794e9bc51 Bump Test Version 4.56.2 2026-04-10 21:56:43 -05:00
Jim Miller
d46dc76ae1 Somewhat better consolidated perf profiling 2026-04-10 21:56:43 -05:00
Jim Miller
08bae8d9be Imperfect, but working perf profiling 2026-04-10 16:49:17 -05:00
Jim Miller
405c37aeb5 Remove some dead code. 2026-04-10 16:43:49 -05:00
Jim Miller
270e01c3c7 Cache config values for performance improvement. 2026-04-10 16:24:37 -05:00
Jim Miller
12d57f5950 Bump Test Version 4.56.1 2026-04-06 12:07:14 -05:00
Jim Miller
562b3a4ecd Unnew Perf Improvement w/profiling 2026-04-06 12:07:05 -05:00
Jim Miller
e69045fd98 Bump Release Version 4.56.0 2026-04-02 10:03:42 -05:00
Jim Miller
747bde3394 Update (commented out) profiling code. 2026-04-02 10:02:58 -05:00
Jim Miller
aa00c7ae03 Bump Test Version 4.55.4 2026-03-27 11:54:50 -05:00
Jim Miller
0539f818f3 Add top menu items for Add/Edit Reject URLs. 2026-03-27 11:54:44 -05:00
Jim Miller
41a6f56f44 Remove fanficfare_macmenuhack. 2026-03-27 11:43:53 -05:00
Jim Miller
e3832245e6 Add Reject URLs: Accept story URLs drag/drop & paste like Add Stories by URL 2026-03-27 10:52:30 -05:00
Jim Miller
909b64c83c Remove some image processing debug output 2026-03-27 10:51:29 -05:00
Jim Miller
732f5e2571 Bump Test Version 4.55.3 2026-03-19 13:03:11 -05:00
Jim Miller
d9dd04396e Epub Update: Don't cache cover image with others, trips dedup. 2026-03-19 13:03:03 -05:00
Jim Miller
36e2183d45 Bump Test Version 4.55.2 2026-03-12 15:13:01 -05:00
Jim Miller
040b7205b8 adapter_literotica: Fix for site change (#1318) 2026-03-12 15:11:26 -05:00
Jim Miller
d8ed180eb1 Bump Test Version 4.55.1 2026-03-09 13:04:56 -05:00
Jim Miller
2a6c1e74db Make seriesUrl mutable again. 2026-03-09 13:04:50 -05:00
Jim Miller
b7c8c96153 Put download list at start of BG job too 2026-03-09 13:04:24 -05:00
Jim Miller
a16096592c Bump Release Version 4.55.0 2026-03-01 09:25:11 -06:00
Jim Miller
bb34eecc7c Remove a line of unused code. 2026-02-23 13:08:57 -06:00
Jim Miller
ceed7ef1a8 Bump Test Version 4.54.5 2026-02-10 08:45:34 -06:00
Jim Miller
1d2a887c2d Epub Update: Skip missing chapter, image and css files instead of failing. 2026-02-10 08:45:20 -06:00
Jim Miller
a3f3302312 Plugin only: In Skip mode, don't do initial metadata fetch if already matched in library. #1309 2026-02-10 08:30:02 -06:00
Jim Miller
ecf005b145 Bump Test Version 4.54.4 2026-02-05 16:09:00 -06:00
Jim Miller
3bd074fa2c Additional checks for svg images to reject--Calibre only. Related to #1298 2026-02-05 16:08:54 -06:00
Jim Miller
0fd95daa8e Bump Test Version 4.54.3 2026-02-05 13:46:42 -06:00
Jim Miller
1b57e49d98 Ignore CSS url() when ttf/otf/woff/woff2 font files 2026-02-05 13:46:24 -06:00
Jim Miller
db0d39c9cd Bump Test Version 4.54.2 2026-02-02 13:12:56 -06:00
Jim Miller
cbde66cf41 adapter_fimfictionnet/adapter_royalroadcom: Better handling of cover image size fall back #1306 2026-02-02 13:12:42 -06:00
Jim Miller
17331e9eb3 Bump Test Version 4.54.1 2026-02-01 13:51:23 -06:00
Jim Miller
9b96c151a5 adapter_adultfanfictionorg: Fixes for site changes #1305 2026-02-01 13:51:22 -06:00
Jim Miller
1b65a30798 Making some metadata entries immutable 2026-02-01 13:51:22 -06:00
Jim Miller
c9a47877f7 Allow for language getting changed by replace_metadata not breaking langcode 2026-02-01 09:15:31 -06:00
Jim Miller
bdc77ad0f6 Remove Site: swi.org.ru No DNS for site. 2026-02-01 09:15:31 -06:00
Jim Miller
719971c76c Don't set numChapters--it's done automatically. 2026-02-01 09:15:31 -06:00
Jim Miller
c74dba472a Fixes for mutable metadata entries used in code 2026-02-01 09:15:31 -06:00
Jim Miller
c1fb7f0fc5 Refactor metadata entry and settings name code a bit 2026-02-01 09:15:31 -06:00
65 changed files with 12400 additions and 12244 deletions

View file

@ -33,7 +33,7 @@ except NameError:
from calibre.customize import InterfaceActionBase from calibre.customize import InterfaceActionBase
# pulled out from FanFicFareBase for saving in prefs.py # pulled out from FanFicFareBase for saving in prefs.py
__version__ = (4, 54, 0) __version__ = (4, 57, 7)
## Apparently the name for this class doesn't matter--it was still ## Apparently the name for this class doesn't matter--it was still
## 'demo' for the first few versions. ## 'demo' for the first few versions.

View file

@ -371,6 +371,7 @@ class ConfigWidget(QWidget):
prefs['suppresstitlesort'] = self.std_columns_tab.suppresstitlesort.isChecked() prefs['suppresstitlesort'] = self.std_columns_tab.suppresstitlesort.isChecked()
prefs['authorcase'] = self.std_columns_tab.authorcase.isChecked() prefs['authorcase'] = self.std_columns_tab.authorcase.isChecked()
prefs['titlecase'] = self.std_columns_tab.titlecase.isChecked() prefs['titlecase'] = self.std_columns_tab.titlecase.isChecked()
prefs['seriescase'] = self.std_columns_tab.seriescase.isChecked()
prefs['setanthologyseries'] = self.std_columns_tab.setanthologyseries.isChecked() prefs['setanthologyseries'] = self.std_columns_tab.setanthologyseries.isChecked()
prefs['set_author_url'] =self.std_columns_tab.set_author_url.isChecked() prefs['set_author_url'] =self.std_columns_tab.set_author_url.isChecked()
@ -760,6 +761,7 @@ class BasicTab(QWidget):
tooltip=_("One URL per line:\n<b>http://...,note</b>\n<b>http://...,title by author - note</b>"), tooltip=_("One URL per line:\n<b>http://...,note</b>\n<b>http://...,title by author - note</b>"),
rejectreasons=rejecturllist.get_reject_reasons(), rejectreasons=rejecturllist.get_reject_reasons(),
reasonslabel=_('Add this reason to all URLs added:'), reasonslabel=_('Add this reason to all URLs added:'),
accept_storyurls=True,
save_size_name='fff:Add Reject List') save_size_name='fff:Add Reject List')
d.exec_() d.exec_()
if d.result() == d.Accepted: if d.result() == d.Accepted:
@ -1636,6 +1638,11 @@ class StandardColumnsTab(QWidget):
self.setanthologyseries.setChecked(prefs['setanthologyseries']) self.setanthologyseries.setChecked(prefs['setanthologyseries'])
row.append(self.setanthologyseries) row.append(self.setanthologyseries)
self.seriescase = QCheckBox(_('Fix Series Case?'),self)
self.seriescase.setToolTip(_("If checked, Calibre's routine for correcting the capitalization of title will be applied.")
+"\n"+_("This effects Calibre metadata only, not FanFicFare metadata in title page."))
self.seriescase.setChecked(prefs['seriescase'])
row.append(self.seriescase)
grid = QGridLayout() grid = QGridLayout()
for rownum, row in enumerate(rows): for rownum, row in enumerate(rows):
for colnum, col in enumerate(row): for colnum, col in enumerate(row):

View file

@ -38,6 +38,7 @@ from calibre.gui2 import gprefs
show_download_options = 'fff:add new/update dialogs:show_download_options' show_download_options = 'fff:add new/update dialogs:show_download_options'
from calibre.gui2.dialogs.confirm_delete import confirm from calibre.gui2.dialogs.confirm_delete import confirm
from calibre.gui2.complete2 import EditWithComplete from calibre.gui2.complete2 import EditWithComplete
from fanficfare.exceptions import NotGoingToDownload
from fanficfare.six import text_type as unicode, ensure_text from fanficfare.six import text_type as unicode, ensure_text
# pulls in translation files for _() strings # pulls in translation files for _() strings
@ -155,15 +156,6 @@ class RejectUrlEntry:
return retval return retval
class NotGoingToDownload(Exception):
def __init__(self,error,icon='dialog_error.png',showerror=True):
self.error=error
self.icon=icon
self.showerror=showerror
def __str__(self):
return self.error
class DroppableQTextEdit(QTextEdit): class DroppableQTextEdit(QTextEdit):
def __init__(self,parent): def __init__(self,parent):
QTextEdit.__init__(self,parent) QTextEdit.__init__(self,parent)
@ -1328,6 +1320,7 @@ class EditTextDialog(SizePersistedDialog):
icon=None, title=None, label=None, tooltip=None, icon=None, title=None, label=None, tooltip=None,
read_only=False, read_only=False,
rejectreasons=[],reasonslabel=None, rejectreasons=[],reasonslabel=None,
accept_storyurls=False,
save_size_name='fff:edit text dialog', save_size_name='fff:edit text dialog',
): ):
SizePersistedDialog.__init__(self, parent, save_size_name) SizePersistedDialog.__init__(self, parent, save_size_name)
@ -1341,7 +1334,10 @@ class EditTextDialog(SizePersistedDialog):
self.setWindowIcon(icon) self.setWindowIcon(icon)
self.l.addWidget(self.label) self.l.addWidget(self.label)
self.textedit = QTextEdit(self) if accept_storyurls:
self.textedit = DroppableQTextEdit(self)
else:
self.textedit = QTextEdit(self)
self.textedit.setLineWrapMode(QTextEditNoWrap) self.textedit.setLineWrapMode(QTextEditNoWrap)
self.textedit.setReadOnly(read_only) self.textedit.setReadOnly(read_only)
self.textedit.setText(text) self.textedit.setText(text)

View file

@ -10,20 +10,6 @@ __docformat__ = 'restructuredtext en'
import fanficfare.six as six import fanficfare.six as six
from fanficfare.six import ensure_text, string_types, text_type as unicode from fanficfare.six import ensure_text, string_types, text_type as unicode
# import cProfile
# def do_cprofile(func):
# def profiled_func(*args, **kwargs):
# profile = cProfile.Profile()
# try:
# profile.enable()
# result = func(*args, **kwargs)
# profile.disable()
# return result
# finally:
# profile.print_stats()
# return profiled_func
import logging import logging
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -78,12 +64,14 @@ from fanficfare import adapters, exceptions
from fanficfare.epubutils import ( from fanficfare.epubutils import (
get_dcsource, get_dcsource_chaptercount, get_story_url_from_epub_html, get_dcsource, get_dcsource_chaptercount, get_story_url_from_epub_html,
get_story_url_from_zip_html, reset_orig_chapters_epub, get_cover_data) get_story_url_from_zip_html, reset_orig_chapters_epub, get_cover_img)
from fanficfare.geturls import ( from fanficfare.geturls import (
get_urls_from_page, get_urls_from_text,get_urls_from_imap, get_urls_from_page, get_urls_from_text,get_urls_from_imap,
get_urls_from_mime) get_urls_from_mime)
from fanficfare.fff_profile import do_cprofile
from calibre_plugins.fanficfare_plugin.fff_util import ( from calibre_plugins.fanficfare_plugin.fff_util import (
get_fff_adapter, get_fff_config, get_fff_personalini, get_fff_adapter, get_fff_config, get_fff_personalini,
get_common_elements) get_common_elements)
@ -111,7 +99,8 @@ from calibre_plugins.fanficfare_plugin.dialogs import (
LoopProgressDialog, UserPassDialog, AboutDialog, CollectURLDialog, LoopProgressDialog, UserPassDialog, AboutDialog, CollectURLDialog,
RejectListDialog, EmailPassDialog, TOTPDialog, RejectListDialog, EmailPassDialog, TOTPDialog,
save_collisions, question_dialog_all, save_collisions, question_dialog_all,
NotGoingToDownload, RejectUrlEntry, IniTextDialog) RejectUrlEntry, IniTextDialog,
EditTextDialog)
# because calibre immediately transforms html into zip and don't want # because calibre immediately transforms html into zip and don't want
# to have an 'if html'. db.has_format is cool with the case mismatch, # to have an 'if html'. db.has_format is cool with the case mismatch,
@ -205,20 +194,6 @@ class FanFicFarePlugin(InterfaceAction):
prefs, prefs,
self.qaction.icon()) self.qaction.icon())
## Kludgey, yes, but with the real configuration inside the
## library now, how else would a user be able to change this
## setting if it's crashing calibre?
def check_macmenuhack(self):
try:
return self.macmenuhack
except:
file_path = os.path.join(calibre_config_dir,
*("plugins/fanficfare_macmenuhack.txt".split('/')))
file_path = os.path.abspath(file_path)
logger.debug("Plugin %s macmenuhack file_path:%s"%(self.name,file_path))
self.macmenuhack = os.access(file_path, os.F_OK)
return self.macmenuhack
accepts_drops = True accepts_drops = True
def accept_enter_event(self, event, mime_data): def accept_enter_event(self, event, mime_data):
@ -443,30 +418,38 @@ class FanFicFarePlugin(InterfaceAction):
self.reject_list_action = self.create_menu_item_ex(self.menu, _('Reject Selected Books'), self.reject_list_action = self.create_menu_item_ex(self.menu, _('Reject Selected Books'),
unique_name='Reject Selected Books', image='rotate-right.png', unique_name='Reject Selected Books', image='rotate-right.png',
triggered=self.reject_list_urls) triggered=self.reject_list_urls)
# self.menu.addSeparator()
# print("platform.system():%s"%platform.system()) self.add_reject_urls_action = self.create_menu_item_ex(self.menu, _('Add Reject URLs'),
# print("platform.mac_ver()[0]:%s"%platform.mac_ver()[0]) image='rotate-right.png',
if not self.check_macmenuhack(): # not platform.mac_ver()[0]: # Some macs crash on these menu items for unknown reasons. unique_name='Add Reject URLs',
self.menu.addSeparator() shortcut_name=_('Add Reject URLs'),
self.editpersonalini_action = self.create_menu_item_ex(self.menu, _('Edit personal.ini'), triggered=self.add_reject_urls)
image= 'config.png',
unique_name='Edit personal.ini',
shortcut_name=_('Edit personal.ini'),
triggered=self.editpersonalini)
self.config_action = self.create_menu_item_ex(self.menu, _('&Configure FanFicFare'), self.edit_reject_urls_action = self.create_menu_item_ex(self.menu, _('Edit Reject URLs'),
image= 'config.png', image='rotate-right.png',
unique_name='Configure FanFicFare', unique_name='Edit Reject URLs',
shortcut_name=_('Configure FanFicFare'), shortcut_name=_('Edit Reject URLs'),
triggered=do_user_config) triggered=self.edit_reject_urls)
self.about_action = self.create_menu_item_ex(self.menu, _('About FanFicFare'), self.menu.addSeparator()
image= 'images/icon.png',
unique_name='About FanFicFare',
shortcut_name=_('About FanFicFare'),
triggered=self.about)
self.editpersonalini_action = self.create_menu_item_ex(self.menu, _('Edit personal.ini'),
image= 'config.png',
unique_name='Edit personal.ini',
shortcut_name=_('Edit personal.ini'),
triggered=self.editpersonalini)
self.config_action = self.create_menu_item_ex(self.menu, _('&Configure FanFicFare'),
image= 'config.png',
unique_name='Configure FanFicFare',
shortcut_name=_('Configure FanFicFare'),
triggered=do_user_config)
self.about_action = self.create_menu_item_ex(self.menu, _('About FanFicFare'),
image= 'images/icon.png',
unique_name='About FanFicFare',
shortcut_name=_('About FanFicFare'),
triggered=self.about)
self.gui.keyboard.finalize() self.gui.keyboard.finalize()
def about(self,checked): def about(self,checked):
@ -502,6 +485,35 @@ class FanFicFarePlugin(InterfaceAction):
prefs['personal.ini'] = get_resources('plugin-example.ini') prefs['personal.ini'] = get_resources('plugin-example.ini')
prefs.save_to_db() prefs.save_to_db()
def add_reject_urls(self):
d = EditTextDialog(self.gui,
"http://example.com/story.php?sid=5,"+_("Reason why I rejected it")+"\nhttp://example.com/story.php?sid=6,"+_("Title by Author")+" - "+_("Reason why I rejected it"),
# icon=self.windowIcon(),
title=_("FanFicFare"),
label=_("Add Reject URLs. Use: <b>http://...,note</b> or <b>http://...,title by author - note</b><br>Invalid story URLs will be ignored."),
tooltip=_("One URL per line:\n<b>http://...,note</b>\n<b>http://...,title by author - note</b>"),
rejectreasons=rejecturllist.get_reject_reasons(),
reasonslabel=_('Add this reason to all URLs added:'),
accept_storyurls=True,
save_size_name='fff:Add Reject List')
d.exec_()
if d.result() == d.Accepted:
rejecturllist.add_text(d.get_plain_text(),d.get_reason_text())
def edit_reject_urls(self):
with busy_cursor():
d = RejectListDialog(self.gui,
rejecturllist.get_list(),
rejectreasons=rejecturllist.get_reject_reasons(),
header=_("Edit Reject URLs List"),
show_delete=False,
show_all_reasons=False)
d.exec_()
if d.result() != d.Accepted:
return
with busy_cursor():
rejecturllist.add(d.get_reject_list(),clear=True)
def create_menu_item_ex(self, parent_menu, menu_text, image=None, tooltip=None, def create_menu_item_ex(self, parent_menu, menu_text, image=None, tooltip=None,
shortcut=None, triggered=None, is_checked=None, shortcut_name=None, shortcut=None, triggered=None, is_checked=None, shortcut_name=None,
unique_name=None): unique_name=None):
@ -1141,9 +1153,9 @@ class FanFicFarePlugin(InterfaceAction):
## Aug2024 moved site specific search changes to adapters as ## Aug2024 moved site specific search changes to adapters as
## classmethod ## classmethod
regexp = adapters.get_url_search(url) regexp = adapters.get_url_search(url)
logger.debug(regexp) # logger.debug(regexp)
retval = self.gui.current_db.search_getting_ids(regexp,None,use_virtual_library=False) retval = self.gui.current_db.search_getting_ids(regexp,None,use_virtual_library=False)
logger.debug(retval) # logger.debug(retval)
return retval return retval
def prep_downloads(self, options, books, merge=False, extrapayload=None): def prep_downloads(self, options, books, merge=False, extrapayload=None):
@ -1273,7 +1285,7 @@ class FanFicFarePlugin(InterfaceAction):
# let other exceptions percolate up. # let other exceptions percolate up.
return adapter.getStoryMetadataOnly(get_cover=False) return adapter.getStoryMetadataOnly(get_cover=False)
# @do_cprofile @do_cprofile
def prep_download_loop(self,book, def prep_download_loop(self,book,
options={'fileform':'epub', options={'fileform':'epub',
'collision':ADDNEW, 'collision':ADDNEW,
@ -1307,9 +1319,16 @@ class FanFicFarePlugin(InterfaceAction):
if self.reject_url(merge,book): if self.reject_url(merge,book):
return return
## Check existing for SKIP mode. Again, redundant with below
## for when story URL changes, but also kept here to avoid
## network hit.
identicalbooks = self.do_id_search(url)
if collision == SKIP and identicalbooks:
raise exceptions.NotGoingToDownload(_("Skipping duplicate story."),"list_remove.png")
# Dialogs should prevent this case now. # Dialogs should prevent this case now.
if collision in (UPDATE,UPDATEALWAYS) and fileform != 'epub': if collision in (UPDATE,UPDATEALWAYS) and fileform != 'epub':
raise NotGoingToDownload(_("Cannot update non-epub format.")) raise exceptions.NotGoingToDownload(_("Cannot update non-epub format."))
if not book['good']: if not book['good']:
# book has already been flagged bad for whatever reason. # book has already been flagged bad for whatever reason.
@ -1503,7 +1522,7 @@ class FanFicFarePlugin(InterfaceAction):
logger.debug("existing found by identifier URL") logger.debug("existing found by identifier URL")
if collision == SKIP and identicalbooks: if collision == SKIP and identicalbooks:
raise NotGoingToDownload(_("Skipping duplicate story."),"list_remove.png") raise exceptions.NotGoingToDownload(_("Skipping duplicate story."),"list_remove.png")
if len(identicalbooks) > 1: if len(identicalbooks) > 1:
identicalbooks_msg = _("More than one identical book by Identifier URL or title/author(s)--can't tell which book to update/overwrite.") identicalbooks_msg = _("More than one identical book by Identifier URL or title/author(s)--can't tell which book to update/overwrite.")
@ -1534,7 +1553,7 @@ class FanFicFarePlugin(InterfaceAction):
identicalbooks = [] identicalbooks = []
collision = book['collision'] = ADDNEW collision = book['collision'] = ADDNEW
else: else:
raise NotGoingToDownload(identicalbooks_msg,"minusminus.png") raise exceptions.NotGoingToDownload(identicalbooks_msg,"minusminus.png")
## changed: add new book when CALIBREONLY if none found. ## changed: add new book when CALIBREONLY if none found.
if collision in (CALIBREONLY, CALIBREONLYSAVECOL) and not identicalbooks: if collision in (CALIBREONLY, CALIBREONLYSAVECOL) and not identicalbooks:
@ -1621,11 +1640,11 @@ class FanFicFarePlugin(InterfaceAction):
# returns int adjusted for start-end range. # returns int adjusted for start-end range.
urlchaptercount = story.getChapterCount() urlchaptercount = story.getChapterCount()
if chaptercount == urlchaptercount and collision == UPDATE: if chaptercount == urlchaptercount and collision == UPDATE:
raise NotGoingToDownload(_("Already contains %d chapters.")%chaptercount,'edit-undo.png',showerror=False) raise exceptions.NotGoingToDownload(_("Already contains %d chapters.")%chaptercount,'edit-undo.png',showerror=False)
elif chaptercount > urlchaptercount and not (collision == UPDATEALWAYS and adapter.getConfig('force_update_epub_always')): elif chaptercount > urlchaptercount and not (collision == UPDATEALWAYS and adapter.getConfig('force_update_epub_always')):
raise NotGoingToDownload(_("Existing epub contains %d chapters, web site only has %d. Use Overwrite or force_update_epub_always to force update.") % (chaptercount,urlchaptercount),'dialog_error.png') raise exceptions.NotGoingToDownload(_("Existing epub contains %d chapters, web site only has %d. Use Overwrite or force_update_epub_always to force update.") % (chaptercount,urlchaptercount),'dialog_error.png')
elif chaptercount == 0: elif chaptercount == 0:
raise NotGoingToDownload(_("FanFicFare doesn't recognize chapters in existing epub, epub is probably from a different source. Use Overwrite to force update."),'dialog_error.png') raise exceptions.NotGoingToDownload(_("FanFicFare doesn't recognize chapters in existing epub, epub is probably from a different source. Use Overwrite to force update."),'dialog_error.png')
if collision == OVERWRITE and \ if collision == OVERWRITE and \
db.has_format(book_id,formmapping[fileform],index_is_id=True): db.has_format(book_id,formmapping[fileform],index_is_id=True):
@ -1642,7 +1661,7 @@ class FanFicFarePlugin(InterfaceAction):
# updated does have time, use full timestamps. # updated does have time, use full timestamps.
if (lastupdated.time() == time.min and fileupdated.date() > lastupdated.date()) or \ if (lastupdated.time() == time.min and fileupdated.date() > lastupdated.date()) or \
(lastupdated.time() != time.min and fileupdated > lastupdated): (lastupdated.time() != time.min and fileupdated > lastupdated):
raise NotGoingToDownload(_("Not Overwriting, web site is not newer."),'edit-undo.png',showerror=False) raise exceptions.NotGoingToDownload(_("Not Overwriting, web site is not newer."),'edit-undo.png',showerror=False)
# For update, provide a tmp file copy of the existing epub so # For update, provide a tmp file copy of the existing epub so
# it can't change underneath us. Now also overwrite for logpage preserve. # it can't change underneath us. Now also overwrite for logpage preserve.
@ -1862,6 +1881,7 @@ class FanFicFarePlugin(InterfaceAction):
else: else:
return None return None
@do_cprofile
def update_books_loop(self,book,db=None, def update_books_loop(self,book,db=None,
options={'fileform':'epub', options={'fileform':'epub',
'collision':ADDNEW, 'collision':ADDNEW,
@ -2193,30 +2213,45 @@ class FanFicFarePlugin(InterfaceAction):
## start with None. If no subbook covers, don't force one ## start with None. If no subbook covers, don't force one
## here. User can configure FFF to always create/polish a ## here. User can configure FFF to always create/polish a
## cover if they want. This is about when we force it. ## cover if they want. This is about when we force it.
coverpath = None coverimgpath = None
coverimgtype = None coverimgtype = None
had_cover = False
## first, look for covers inside the subbooks. Stop at the # epubmerge wants a path to cover img on disk
## first one, which will be used if there isn't a pre-existing def write_image(imgtype,imgdata):
tmp = PersistentTemporaryFile(prefix='cover_',
suffix='.'+imagetypes[imgtype],
dir=options['tdir'])
tmp.write(imgdata)
tmp.flush()
tmp.close()
return tmp.name
## if prior epub had a cover, we should use it again.
if mergebook['calibre_id'] and db.has_format(mergebook['calibre_id'],'EPUB',index_is_id=True):
(covertype,coverdata) = get_cover_img(db.format(mergebook['calibre_id'],'EPUB',index_is_id=True,as_file=True))
if coverdata:
had_cover = True
coverimgpath = write_image(covertype,coverdata)
coverimgtype = covertype
logger.debug("prior anthology cover found")
## look for covers inside the subbooks. Stop at the first
## one, which will be used if there isn't a pre-existing
## calibre cover. ## calibre cover.
if not coverpath: if not coverimgpath:
for book in good_list: for book in good_list:
coverdata = get_cover_data(book['outfile']) (covertype,coverdata) = get_cover_img(book['outfile'])
if coverdata: # found a cover. if coverdata: # found a cover.
(coverimgtype,coverimgdata) = coverdata[4:6] coverimgpath = write_image(covertype,coverdata)
# logger.debug('coverimgtype:%s [%s]'%(coverimgtype,imagetypes[coverimgtype])) coverimgtype = covertype
tmpcover = PersistentTemporaryFile(suffix='.'+imagetypes[coverimgtype], logger.debug('from subbook coverimgpath:%s'%coverimgpath)
dir=options['tdir'])
tmpcover.write(coverimgdata)
tmpcover.flush()
tmpcover.close()
coverpath = tmpcover.name
break break
# logger.debug('coverpath:%s'%coverpath)
## if updating an existing book and there is at least one ## if updating an existing book and there is at least one
## subbook cover: ## subbook cover:
if coverpath and mergebook['calibre_id']: if not had_cover and coverimgpath and mergebook['calibre_id']:
logger.debug("anth cover: using cal cover")
# Couldn't find a better way to get the cover path. # Couldn't find a better way to get the cover path.
calcoverpath = os.path.join(db.library_path, calcoverpath = os.path.join(db.library_path,
db.path(mergebook['calibre_id'], index_is_id=True), db.path(mergebook['calibre_id'], index_is_id=True),
@ -2224,9 +2259,11 @@ class FanFicFarePlugin(InterfaceAction):
## if there's an existing cover, use it. Calibre will set ## if there's an existing cover, use it. Calibre will set
## it for us during lots of different actions anyway. ## it for us during lots of different actions anyway.
if os.path.exists(calcoverpath): if os.path.exists(calcoverpath):
coverpath = calcoverpath coverimgpath = calcoverpath
# logger.debug('coverpath:%s'%coverpath) ## Note that this cover will be replaced if 'inject
## generated' cover is on
logger.debug('coverimgpath:%s'%coverimgpath)
mrg_args = [tmp.name, mrg_args = [tmp.name,
[ x['outfile'] for x in good_list ],] [ x['outfile'] for x in good_list ],]
mrg_kwargs = { mrg_kwargs = {
@ -2234,7 +2271,7 @@ class FanFicFarePlugin(InterfaceAction):
'titleopt':mergebook['title'], 'titleopt':mergebook['title'],
'keepmetadatafiles':True, 'keepmetadatafiles':True,
'source':mergebook['url'], 'source':mergebook['url'],
'coverjpgpath':coverpath 'coverjpgpath':coverimgpath
} }
logger.debug('anthology_merge_keepsingletocs:%s'% logger.debug('anthology_merge_keepsingletocs:%s'%
mergebook['anthology_merge_keepsingletocs']) mergebook['anthology_merge_keepsingletocs'])
@ -2267,7 +2304,6 @@ class FanFicFarePlugin(InterfaceAction):
errorcol_label = self.get_custom_col_label(prefs['errorcol']) errorcol_label = self.get_custom_col_label(prefs['errorcol'])
lastcheckedcol_label = self.get_custom_col_label(prefs['lastcheckedcol']) lastcheckedcol_label = self.get_custom_col_label(prefs['lastcheckedcol'])
columns = self.gui.library_view.model().custom_columns
if good_list or prefs['mark'] or (bad_list and errorcol_label) or lastcheckedcol_label: if good_list or prefs['mark'] or (bad_list and errorcol_label) or lastcheckedcol_label:
LoopProgressDialog(self.gui, LoopProgressDialog(self.gui,
good_list+bad_list, good_list+bad_list,
@ -2613,7 +2649,6 @@ class FanFicFarePlugin(InterfaceAction):
db.new_api.set_link_for_authors(author_id_to_link_map) db.new_api.set_link_for_authors(author_id_to_link_map)
# set series link if found. # set series link if found.
logger.debug("has link_map:%s"%(hasattr(db.new_api,'set_link_map')))
## new_api.set_link_map added in Calibre v6.15 ## new_api.set_link_map added in Calibre v6.15
if hasattr(db.new_api,'set_link_map') and \ if hasattr(db.new_api,'set_link_map') and \
prefs['set_series_url'] and \ prefs['set_series_url'] and \
@ -2622,6 +2657,7 @@ class FanFicFarePlugin(InterfaceAction):
series = book['series'] series = book['series']
if '[' in series: # a few can have a series w/o number if '[' in series: # a few can have a series w/o number
series = series[:series.rindex(' [')] series = series[:series.rindex(' [')]
logger.debug("Setting series link:%s"%book['all_metadata']['seriesUrl'])
db.new_api.set_link_map('series',{series: db.new_api.set_link_map('series',{series:
book['all_metadata']['seriesUrl']}) book['all_metadata']['seriesUrl']})
@ -2811,6 +2847,9 @@ class FanFicFarePlugin(InterfaceAction):
mi.pubdate = book['pubdate'] mi.pubdate = book['pubdate']
mi.timestamp = book['timestamp'] mi.timestamp = book['timestamp']
mi.comments = book['comments'] mi.comments = book['comments']
if prefs['seriescase']:
from calibre.ebooks.metadata.sources.base import fixcase
book['series'] = fixcase(book['series'])
mi.series = book['series'] mi.series = book['series']
return mi return mi
@ -3162,6 +3201,7 @@ The previously downloaded book is still in the anthology, but FFF doesn't have t
if prefs['setanthologyseries'] and book['title'] == series: if prefs['setanthologyseries'] and book['title'] == series:
book['series'] = series+' [0]' book['series'] = series+' [0]'
book['all_metadata']['seriesUrl'] = options.get('anthology_url','')
# logger.debug("anthology_title_pattern:%s"%configuration.getConfig('anthology_title_pattern')) # logger.debug("anthology_title_pattern:%s"%configuration.getConfig('anthology_title_pattern'))
if configuration.getConfig('anthology_title_pattern'): if configuration.getConfig('anthology_title_pattern'):
@ -3182,7 +3222,9 @@ The previously downloaded book is still in the anthology, but FFF doesn't have t
s = options.get('frompage',{}).get('status','') s = options.get('frompage',{}).get('status','')
if s: if s:
book['all_metadata']['status'] = s book['all_metadata']['status'] = s
book['tags'].append(s) ## status into tags only if in include_subject_tags
if 'status' in configuration.getConfigList('include_subject_tags'):
book['tags'].append(s)
book['tags'].extend(configuration.getConfigList('anthology_tags')) book['tags'].extend(configuration.getConfigList('anthology_tags'))
book['all_metadata']['anthology'] = "true" book['all_metadata']['anthology'] = "true"

View file

@ -44,33 +44,44 @@ def do_download_worker_single(site,
print_basic_debug_info(sys.stderr) print_basic_debug_info(sys.stderr)
notification(0.01, _('Downloading FanFiction Stories')) notification(0.01, _('Downloading FanFiction Stories'))
from calibre_plugins.fanficfare_plugin import FanFicFareBase
fffbase = FanFicFareBase(options['plugin_path'])
with fffbase: # so the sys.path was modified while loading the
# plug impl.
from fanficfare.fff_profile import do_cprofile
count = 0 ## extra function just so I can easily use the same
totals = {} ## @do_cprofile decorator
# can't do direct assignment in list comprehension? I'm sure it @do_cprofile
# makes sense to some pythonista. def profiled_func():
# [ totals[x['url']]=0.0 for x in book_list if x['good'] ] count = 0
[ totals.update({x['url']:0.0}) for x in book_list if x['good'] ] totals = {}
# logger.debug(sites_lists.keys()) # can't do direct assignment in list comprehension? I'm sure it
# makes sense to some pythonista.
# [ totals[x['url']]=0.0 for x in book_list if x['good'] ]
[ totals.update({x['url']:0.0}) for x in book_list if x['good'] ]
# logger.debug(sites_lists.keys())
def do_indiv_notif(percent,msg): def do_indiv_notif(percent,msg):
totals[msg] = percent/len(totals) totals[msg] = percent/len(totals)
notification(max(0.01,sum(totals.values())), _('%(count)d of %(total)d stories finished downloading')%{'count':count,'total':len(totals)}) notification(max(0.01,sum(totals.values())), _('%(count)d of %(total)d stories finished downloading')%{'count':count,'total':len(totals)})
do_list = [] do_list = []
done_list = [] done_list = []
## pass failures from metadata through bg job so all results are logger.info("\n\n"+_("Downloading FanFiction Stories")+"\n%s\n"%("\n".join([ "%(status)s %(url)s %(comment)s" % book for book in book_list])))
## together. ## pass failures from metadata through bg job so all results are
for book in book_list: ## together.
if book['good']: for book in book_list:
do_list.append(book) if book['good']:
else: do_list.append(book)
done_list.append(book) else:
for book in do_list: done_list.append(book)
# logger.info("%s"%book['url']) for book in do_list:
done_list.append(do_download_for_worker(book,options,merge,do_indiv_notif)) # logger.info("%s"%book['url'])
count += 1 done_list.append(do_download_for_worker(book,options,merge,do_indiv_notif))
return finish_download(done_list) count += 1
return finish_download(done_list)
return profiled_func()
def finish_download(donelist): def finish_download(donelist):
book_list = sorted(donelist,key=lambda x : x['listorder']) book_list = sorted(donelist,key=lambda x : x['listorder'])
@ -113,15 +124,6 @@ def finish_download(donelist):
# return the book list as the job result # return the book list as the job result
return book_list return book_list
def do_download_site(site,book_list,options,merge,notification=lambda x,y:x):
# logger.info(_("Started job for %s")%site)
retval = []
for book in book_list:
# logger.info("%s"%book['url'])
retval.append(do_download_for_worker(book,options,merge,notification))
notification(10.0,book['url'])
return retval
def do_download_for_worker(book,options,merge,notification=lambda x,y:x): def do_download_for_worker(book,options,merge,notification=lambda x,y:x):
''' '''
Child job, to download story when run as a worker job Child job, to download story when run as a worker job
@ -131,13 +133,13 @@ def do_download_for_worker(book,options,merge,notification=lambda x,y:x):
fffbase = FanFicFareBase(options['plugin_path']) fffbase = FanFicFareBase(options['plugin_path'])
with fffbase: # so the sys.path was modified while loading the with fffbase: # so the sys.path was modified while loading the
# plug impl. # plug impl.
from calibre_plugins.fanficfare_plugin.dialogs import NotGoingToDownload
from calibre_plugins.fanficfare_plugin.prefs import ( from calibre_plugins.fanficfare_plugin.prefs import (
SAVE_YES, SAVE_YES_UNLESS_SITE, OVERWRITE, OVERWRITEALWAYS, UPDATE, SAVE_YES, SAVE_YES_UNLESS_SITE, OVERWRITE, OVERWRITEALWAYS, UPDATE,
UPDATEALWAYS, ADDNEW, SKIP, CALIBREONLY, CALIBREONLYSAVECOL) UPDATEALWAYS, ADDNEW, SKIP, CALIBREONLY, CALIBREONLYSAVECOL)
from calibre_plugins.fanficfare_plugin.wordcount import get_word_count from calibre_plugins.fanficfare_plugin.wordcount import get_word_count
from fanficfare import adapters, writers from fanficfare import adapters, writers
from fanficfare.epubutils import get_update_data from fanficfare.epubutils import get_update_data
from fanficfare.exceptions import NotGoingToDownload
from fanficfare.six import text_type as unicode from fanficfare.six import text_type as unicode
from calibre_plugins.fanficfare_plugin.fff_util import get_fff_config from calibre_plugins.fanficfare_plugin.fff_util import get_fff_config

View file

@ -1599,18 +1599,13 @@ chaptertitles:Prologue,Chapter 1\, Xenos on Cinnabar,Chapter 2\, Sinmay on Kinti
[adult-fanfiction.org] [adult-fanfiction.org]
use_basic_cache:true
extra_valid_entries:eroticatags,disclaimer extra_valid_entries:eroticatags,disclaimer
eroticatags_label:Erotica Tags eroticatags_label:Erotica Tags
disclaimer_label:Disclaimer disclaimer_label:Disclaimer
extra_titlepage_entries:eroticatags,disclaimer extra_titlepage_entries:eroticatags,disclaimer
## Some sites require login (or login for some rated stories) The
## program can prompt you, or you can save it in config. In
## commandline version, this should go in your personal.ini, not
## defaults.ini.
#username:YourName
#password:yourpassword
[althistory.com] [althistory.com]
## Note this is NOT the same as www.alternatehistory.com ## Note this is NOT the same as www.alternatehistory.com
## see [base_xenforoforum] ## see [base_xenforoforum]
@ -1724,13 +1719,13 @@ make_linkhtml_entries:series00,series01,series02,series03,collections
## hardcoded to include the site specific metadata freeformtags & ## hardcoded to include the site specific metadata freeformtags &
## ao3categories in the standard metadata field genre. By making it ## ao3categories in the standard metadata field genre. By making it
## configurable, users can change it. ## configurable, users can change it.
include_in_genre: freeformtags, ao3categories include_in_genre: genre, freeformtags, ao3categories
## AO3 uses the word 'category' differently than most sites. The ## AO3 uses the word 'category' differently than most sites. The
## adapter used to be hardcoded to include the site specific metadata ## adapter used to be hardcoded to include the site specific metadata
## fandom in the standard metadata field category. By making it ## fandom in the standard metadata field category. By making it
## configurable, users can change it. ## configurable, users can change it.
include_in_category:fandoms include_in_category:category,fandoms
## freeformtags was previously typo'ed as freefromtags. This way, ## freeformtags was previously typo'ed as freefromtags. This way,
## freefromtags will still work for people who've used it. ## freefromtags will still work for people who've used it.
@ -1939,7 +1934,7 @@ make_linkhtml_entries:translators,betas
## For most sites, 'category' is the fandom, but fanfics.me has ## For most sites, 'category' is the fandom, but fanfics.me has
## fandoms and a separate category. By making it configurable, users ## fandoms and a separate category. By making it configurable, users
## can change it. ## can change it.
include_in_category:fandoms include_in_category:category,fandoms
[fanfictalk.com] [fanfictalk.com]
use_basic_cache:true use_basic_cache:true
@ -2715,13 +2710,13 @@ make_linkhtml_entries:series00,series01,series02,series03,collections
## hardcoded to include the site specific metadata freeformtags & ## hardcoded to include the site specific metadata freeformtags &
## ao3categories in the standard metadata field genre. By making it ## ao3categories in the standard metadata field genre. By making it
## configurable, users can change it. ## configurable, users can change it.
include_in_genre: freeformtags, ao3categories include_in_genre: genre, freeformtags, ao3categories
## OTW uses the word 'category' differently than most sites. The ## OTW uses the word 'category' differently than most sites. The
## adapter used to be hardcoded to include the site specific metadata ## adapter used to be hardcoded to include the site specific metadata
## fandom in the standard metadata field category. By making it ## fandom in the standard metadata field category. By making it
## configurable, users can change it. ## configurable, users can change it.
include_in_category:fandoms include_in_category:category,fandoms
## freeformtags was previously typo'ed as freefromtags. This way, ## freeformtags was previously typo'ed as freefromtags. This way,
## freefromtags will still work for people who've used it. ## freefromtags will still work for people who've used it.
@ -3022,13 +3017,13 @@ make_linkhtml_entries:series00,series01,series02,series03,collections
## hardcoded to include the site specific metadata freeformtags & ## hardcoded to include the site specific metadata freeformtags &
## ao3categories in the standard metadata field genre. By making it ## ao3categories in the standard metadata field genre. By making it
## configurable, users can change it. ## configurable, users can change it.
include_in_genre: freeformtags, ao3categories include_in_genre: genre, freeformtags, ao3categories
## OTW uses the word 'category' differently than most sites. The ## OTW uses the word 'category' differently than most sites. The
## adapter used to be hardcoded to include the site specific metadata ## adapter used to be hardcoded to include the site specific metadata
## fandom in the standard metadata field category. By making it ## fandom in the standard metadata field category. By making it
## configurable, users can change it. ## configurable, users can change it.
include_in_category:fandoms include_in_category:category,fandoms
## freeformtags was previously typo'ed as freefromtags. This way, ## freeformtags was previously typo'ed as freefromtags. This way,
## freefromtags will still work for people who've used it. ## freefromtags will still work for people who've used it.
@ -3157,8 +3152,8 @@ bookmarkmemo_label:ブックマークメモ
bookmarkprivate_label:非公開ブックマーク bookmarkprivate_label:非公開ブックマーク
subscribed_label:更新通知 subscribed_label:更新通知
include_in_genre: fullgenre include_in_genre: genre, fullgenre
#include_in_genre: biggenre, smallgenre #include_in_genre: genre, biggenre, smallgenre
## adds to titlepage_entries instead of replacing it. ## adds to titlepage_entries instead of replacing it.
#extra_titlepage_entries: fullgenre,biggenre,smallgenre,imprint,freeformtags,comments,reviews,bookmarks,ratingpoints,overallpoints,bookmarked,bookmarkcategory,bookmarkmemo,bookmarkprivate,subscribed #extra_titlepage_entries: fullgenre,biggenre,smallgenre,imprint,freeformtags,comments,reviews,bookmarks,ratingpoints,overallpoints,bookmarked,bookmarkcategory,bookmarkmemo,bookmarkprivate,subscribed
@ -3401,13 +3396,13 @@ make_linkhtml_entries:series00,series01,series02,series03,collections
## hardcoded to include the site specific metadata freeformtags & ## hardcoded to include the site specific metadata freeformtags &
## ao3categories in the standard metadata field genre. By making it ## ao3categories in the standard metadata field genre. By making it
## configurable, users can change it. ## configurable, users can change it.
include_in_genre: freeformtags, ao3categories include_in_genre: genre, freeformtags, ao3categories
## OTW uses the word 'category' differently than most sites. The ## OTW uses the word 'category' differently than most sites. The
## adapter used to be hardcoded to include the site specific metadata ## adapter used to be hardcoded to include the site specific metadata
## fandom in the standard metadata field category. By making it ## fandom in the standard metadata field category. By making it
## configurable, users can change it. ## configurable, users can change it.
include_in_category:fandoms include_in_category:category,fandoms
## freeformtags was previously typo'ed as freefromtags. This way, ## freeformtags was previously typo'ed as freefromtags. This way,
## freefromtags will still work for people who've used it. ## freefromtags will still work for people who've used it.
@ -3538,7 +3533,7 @@ upvotes_label:Upvotes
subscribers_label:Subscribers subscribers_label:Subscribers
views_label:Views views_label:Views
include_in_category:tags include_in_category:category,tags
#extra_titlepage_entries:upvotes,subscribers,views #extra_titlepage_entries:upvotes,subscribers,views
@ -3674,13 +3669,13 @@ make_linkhtml_entries:series00,series01,series02,series03,collections
## hardcoded to include the site specific metadata freeformtags & ## hardcoded to include the site specific metadata freeformtags &
## ao3categories in the standard metadata field genre. By making it ## ao3categories in the standard metadata field genre. By making it
## configurable, users can change it. ## configurable, users can change it.
include_in_genre: freeformtags, ao3categories include_in_genre: genre, freeformtags, ao3categories
## OTW uses the word 'category' differently than most sites. The ## OTW uses the word 'category' differently than most sites. The
## adapter used to be hardcoded to include the site specific metadata ## adapter used to be hardcoded to include the site specific metadata
## fandom in the standard metadata field category. By making it ## fandom in the standard metadata field category. By making it
## configurable, users can change it. ## configurable, users can change it.
include_in_category:fandoms include_in_category:category,fandoms
## freeformtags was previously typo'ed as freefromtags. This way, ## freeformtags was previously typo'ed as freefromtags. This way,
## freefromtags will still work for people who've used it. ## freefromtags will still work for people who've used it.
@ -4433,9 +4428,6 @@ extracategories:Buffy: The Vampire Slayer
extracharacters:Buffy, Spike extracharacters:Buffy, Spike
extraships:Spike/Buffy extraships:Spike/Buffy
[www.swi.org.ru]
use_basic_cache:true
[www.the-sietch.com] [www.the-sietch.com]
## see [base_xenforoforum] ## see [base_xenforoforum]

View file

@ -126,6 +126,7 @@ default_prefs['suppressauthorsort'] = False
default_prefs['suppresstitlesort'] = False default_prefs['suppresstitlesort'] = False
default_prefs['authorcase'] = False default_prefs['authorcase'] = False
default_prefs['titlecase'] = False default_prefs['titlecase'] = False
default_prefs['seriescase'] = False
default_prefs['setanthologyseries'] = False default_prefs['setanthologyseries'] = False
default_prefs['mark'] = False default_prefs['mark'] = False
default_prefs['mark_success'] = True default_prefs['mark_success'] = True

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -120,7 +120,6 @@ from . import adapter_novelonlinefullcom
from . import adapter_wwwnovelallcom from . import adapter_wwwnovelallcom
from . import adapter_hentaifoundrycom from . import adapter_hentaifoundrycom
from . import adapter_mugglenetfanfictioncom from . import adapter_mugglenetfanfictioncom
from . import adapter_swiorgru
from . import adapter_fanficsme from . import adapter_fanficsme
from . import adapter_fanfictalkcom from . import adapter_fanfictalkcom
from . import adapter_scifistoriescom from . import adapter_scifistoriescom

View file

@ -68,9 +68,7 @@ class AdultFanFictionOrgAdapter(BaseSiteAdapter):
# The date format will vary from site to site. # The date format will vary from site to site.
# http://docs.python.org/library/datetime.html#strftime-strptime-behavior # http://docs.python.org/library/datetime.html#strftime-strptime-behavior
self.dateformat = "%Y-%m-%d" self.dateformat = "%B %d, %Y"
## Added because adult-fanfiction.org does send you to ## Added because adult-fanfiction.org does send you to
## www.adult-fanfiction.org when you go to it and it also moves ## www.adult-fanfiction.org when you go to it and it also moves
@ -139,91 +137,45 @@ class AdultFanFictionOrgAdapter(BaseSiteAdapter):
def getSiteURLPattern(self): def getSiteURLPattern(self):
return r'https?://(anime|anime2|bleach|books|buffy|cartoon|celeb|comics|ff|games|hp|inu|lotr|manga|movies|naruto|ne|original|tv|xmen|ygo|yuyu)\.adult-fanfiction\.org/story\.php\?no=\d+$' return r'https?://(anime|anime2|bleach|books|buffy|cartoon|celeb|comics|ff|games|hp|inu|lotr|manga|movies|naruto|ne|original|tv|xmen|ygo|yuyu)\.adult-fanfiction\.org/story\.php\?no=\d+$'
##This is not working right now, so I'm commenting it out, but leaving it for future testing
## Login seems to be reasonably standard across eFiction sites.
#def needToLoginCheck(self, data):
##This adapter will always require a login
# return True
# <form name="login" method="post" action="">
# <div class="top">E-mail: <span id="sprytextfield1">
# <input name="email" type="text" id="email" size="20" maxlength="255" />
# <span class="textfieldRequiredMsg">Email is required.</span><span class="textfieldInvalidFormatMsg">Invalid E-mail.</span></span></div>
# <div class="top">Password: <span id="sprytextfield2">
# <input name="pass1" type="password" id="pass1" size="20" maxlength="32" />
# <span class="textfieldRequiredMsg">password is required.</span><span class="textfieldMinCharsMsg">Minimum 8 characters8.</span><span class="textfieldMaxCharsMsg">Exceeded 32 characters.</span></span></div>
# <div class="top"><br /> <input name="loginsubmittop" type="hidden" id="loginsubmit" value="TRUE" />
# <input type="submit" value="Login" />
# </div>
# </form>
##This is not working right now, so I'm commenting it out, but leaving it for future testing
#def performLogin(self, url, soup):
# params = {}
# if self.password:
# params['email'] = self.username
# params['pass1'] = self.password
# else:
# params['email'] = self.getConfig("username")
# params['pass1'] = self.getConfig("password")
# params['submit'] = 'Login'
# # copy all hidden input tags to pick up appropriate tokens.
# for tag in soup.find_all('input',{'type':'hidden'}):
# params[tag['name']] = tag['value']
# logger.debug("Will now login to URL {0} as {1} with password: {2}".format(url, params['email'],params['pass1']))
# d = self.post_request(url, params, usecache=False)
# d = self.post_request(url, params, usecache=False)
# soup = self.make_soup(d)
#if not (soup.find('form', {'name' : 'login'}) == None):
# logger.info("Failed to login to URL %s as %s" % (url, params['email']))
# raise exceptions.FailedToLogin(url,params['email'])
# return False
#else:
# return True
## Getting the chapter list and the meta data, plus 'is adult' checking. ## Getting the chapter list and the meta data, plus 'is adult' checking.
def doExtractChapterUrlsAndMetadata(self, get_cover=True): def doExtractChapterUrlsAndMetadata(self, get_cover=True):
## You need to have your is_adult set to true to get this story ## You need to have your is_adult set to true to get this story
if not (self.is_adult or self.getConfig("is_adult")): if not (self.is_adult or self.getConfig("is_adult")):
raise exceptions.AdultCheckRequired(self.url) raise exceptions.AdultCheckRequired(self.url)
else:
d = self.post_request('https://www.adult-fanfiction.org/globals/ajax/age-verify.php', {"verify":"1"})
if "Age verified successfully" not in d:
raise exceptions.FailedToDownload("Failed to Verify Age: {0}".format(d))
url = self.url url = self.url
logger.debug("URL: "+url) logger.debug("URL: "+url)
data = self.get_request(url) data = self.get_request(url)
# logger.debug(data)
if "The dragons running the back end of the site can not seem to find the story you are looking for." in data: if "The dragons running the back end of the site can not seem to find the story you are looking for." in data:
raise exceptions.StoryDoesNotExist("{0}.{1} says: The dragons running the back end of the site can not seem to find the story you are looking for.".format(self.zone, self.getBaseDomain())) raise exceptions.StoryDoesNotExist("{0}.{1} says: The dragons running the back end of the site can not seem to find the story you are looking for.".format(self.zone, self.getBaseDomain()))
soup = self.make_soup(data) soup = self.make_soup(data)
##This is not working right now, so I'm commenting it out, but leaving it for future testing
#self.performLogin(url, soup)
## Title ## Title
## Some of the titles have a backslash on the story page, but not on the Author's page ## Some of the titles have a backslash on the story page, but not on the Author's page
## So I am removing it from the title, so it can be found on the Author's page further in the code. ## So I am removing it from the title, so it can be found on the Author's page further in the code.
## Also, some titles may have extra spaces ' ', and the search on the Author's page removes them, ## Also, some titles may have extra spaces ' ', and the search on the Author's page removes them,
## so I have to here as well. I used multiple replaces to make sure, since I did the same below. ## so I have to here as well. I used multiple replaces to make sure, since I did the same below.
a = soup.find('a', href=re.compile(r'story.php\?no='+self.story.getMetadata('storyId')+"$")) h1 = soup.find('h1')
self.story.setMetadata('title',stripHTML(a).replace('\\','').replace(' ',' ').replace(' ',' ').replace(' ',' ').strip()) # logger.debug("Title:%s"%h1)
self.story.setMetadata('title',stripHTML(h1).replace('\\','').replace(' ',' ').replace(' ',' ').replace(' ',' ').strip())
# Find the chapters: # Find the chapters from first list only
chapters = soup.find('ul',{'class':'dropdown-content'}) chapters = soup.select_one('select.chapter-select').select('option')
for i, chapter in enumerate(chapters.find_all('a')): for chapter in chapters:
self.add_chapter(chapter,self.url+'&chapter='+unicode(i+1)) self.add_chapter(chapter,self.url+'&chapter='+chapter['value'])
# Find authorid and URL from... author url. # Find authorid and URL from... author url.
a = soup.find('a', href=re.compile(r"profile.php\?no=\d+")) a = soup.find('a', href=re.compile(r"profile.php\?id=\d+"))
if a == None: if a == None:
# I know that the original author of fanficfare wants to always have metadata, # I know that the original author of fanficfare wants to always have metadata,
# but I posit that if the story is there, even if we can't get the metadata from the # but I posit that if the story is there, even if we can't get the metadata from the
@ -232,140 +184,56 @@ class AdultFanFictionOrgAdapter(BaseSiteAdapter):
self.story.setMetadata('authorUrl','https://www.adult-fanfiction.org') self.story.setMetadata('authorUrl','https://www.adult-fanfiction.org')
self.story.setMetadata('author','Unknown') self.story.setMetadata('author','Unknown')
logger.warning('There was no author found for the story... Metadata will not be retreived.') logger.warning('There was no author found for the story... Metadata will not be retreived.')
self.setDescription(url,'>>>>>>>>>> No Summary Given <<<<<<<<<<') self.setDescription(url,'>>>>>>>>>> No Summary Given, Unknown Author <<<<<<<<<<')
else: else:
self.story.setMetadata('authorId',a['href'].split('=')[1]) self.story.setMetadata('authorId',a['href'].split('=')[1])
self.story.setMetadata('authorUrl',a['href']) self.story.setMetadata('authorUrl',a['href'])
self.story.setMetadata('author',stripHTML(a)) self.story.setMetadata('author',stripHTML(a))
##The story page does not give much Metadata, so we go to the Author's page ## The story page does not give much Metadata, so we go to
## the Author's page. Except it's actually a sub-req for
## list of author's stories for that subdomain
author_Url = 'https://members.{0}/load-user-stories.php?subdomain={1}&uid={2}'.format(
self.getBaseDomain(),
self.zone,
self.story.getMetadata('authorId'))
##Get the first Author page to see if there are multiple pages. logger.debug('Getting the load-user-stories page: {0}'.format(author_Url))
##AFF doesn't care if the page number is larger than the actual pages,
##it will continue to show the last page even if the variable is larger than the actual page
author_Url = '{0}&view=story&zone={1}&page=1'.format(self.story.getMetadata('authorUrl'), self.zone)
#author_Url = self.story.getMetadata('authorUrl')+'&view=story&zone='+self.zone+'&page=1'
##I'm resetting the author page to the zone for this story
self.story.setMetadata('authorUrl',author_Url)
logger.debug('Getting the author page: {0}'.format(author_Url))
adata = self.get_request(author_Url) adata = self.get_request(author_Url)
if "The member you are looking for does not exist." in adata: none_found = "No stories found in this category."
raise exceptions.StoryDoesNotExist("{0}.{1} says: The member you are looking for does not exist.".format(self.zone, self.getBaseDomain())) if none_found in adata:
#raise exceptions.StoryDoesNotExist(self.zone+'.'+self.getBaseDomain() +" says: The member you are looking for does not exist.") raise exceptions.StoryDoesNotExist("{0}.{1} says: {2}".format(self.zone, self.getBaseDomain(), none_found))
asoup = self.make_soup(adata) asoup = self.make_soup(adata)
# logger.debug(asoup)
##Getting the number of author pages story_card = asoup.select_one('div.story-card:has(a[href="{0}"])'.format(url))
pages = 0 # logger.debug(story_card)
pagination=asoup.find('ul',{'class' : 'pagination'})
if pagination:
pages = pagination.find_all('li')[-1].find('a')
if not pages == None:
pages = pages['href'].split('=')[-1]
else:
pages = 0
storya = None ## Category
##If there is only 1 page of stories, check it to get the Metadata, ## I've only seen one category per story so far, but just in case:
if pages == 0: for cat in story_card.select('div.story-card-category'):
a = asoup.find_all('li') # remove Category:, old code suggests Located: is also
for lc2 in a: # possible, so removing by <strong>
if lc2.find('a', href=re.compile(r'story.php\?no='+self.story.getMetadata('storyId')+"$")): cat.find("strong").decompose()
storya = lc2 self.story.addToList('category',stripHTML(cat))
break
## otherwise go through the pages
else:
page=1
i=0
while i == 0:
##We already have the first page, so if this is the first time through, skip getting the page
if page != 1:
author_Url = '{0}&view=story&zone={1}&page={2}'.format(self.story.getMetadata('authorUrl'), self.zone, unicode(page))
logger.debug('Getting the author page: {0}'.format(author_Url))
adata = self.get_request(author_Url)
##This will probably never be needed, since AFF doesn't seem to care what number you put as
## the page number, it will default to the last page, even if you use 1000, for an author
## that only hase 5 pages of stories, but I'm keeping it in to appease Saint Justin Case (just in case).
if "The member you are looking for does not exist." in adata:
raise exceptions.StoryDoesNotExist("{0}.{1} says: The member you are looking for does not exist.".format(self.zone, self.getBaseDomain()))
# we look for the li element that has the story here
asoup = self.make_soup(adata)
a = asoup.find_all('li') self.setDescription(url,story_card.select_one('div.story-card-description'))
for lc2 in a:
if lc2.find('a', href=re.compile(r'story.php\?no='+self.story.getMetadata('storyId')+"$")):
i=1
storya = lc2
break
page = page + 1
if page > int(pages):
break
##Split the Metadata up into a list for tag in story_card.select('span.story-tag'):
##We have to change the soup type to a string, then remove the newlines, and double spaces, self.story.addToList('eroticatags',stripHTML(tag))
##then changes the <br/> to '-:-', which seperates the different elemeents.
##Then we strip the HTML elements from the string. ## created/updates share formatting
##There is also a double <br/>, so we have to fix that, then remove the leading and trailing '-:-'. for meta in story_card.select('div.story-card-meta-item span:last-child'):
##They are always in the same order. meta = stripHTML(meta)
## EDIT 09/26/2016: Had some trouble with unicode errors... so I had to put in the decode/encode parts to fix it if 'Created: ' in meta:
liMetadata = unicode(storya).replace('\n','').replace('\r','').replace('\t',' ').replace(' ',' ').replace(' ',' ').replace(' ',' ') meta = meta.replace('Created: ','')
liMetadata = stripHTML(liMetadata.replace(r'<br/>','-:-').replace('<!-- <br /-->','-:-')) self.story.setMetadata('datePublished', makeDate(meta, self.dateformat))
liMetadata = liMetadata.strip('-:-').strip('-:-').encode('utf-8')
for i, value in enumerate(liMetadata.decode('utf-8').split('-:-')): if 'Updated: ' in meta:
if i == 0: meta = meta.replace('Updated: ','')
# The value for the title has been manipulated, so may not be the same as gotten at the start. self.story.setMetadata('dateUpdated', makeDate(meta, self.dateformat))
# I'm going to use the href from the storya retrieved from the author's page to determine if it is correct.
if storya.find('a', href=re.compile(r'story.php\?no='+self.story.getMetadata('storyId')+"$"))['href'] != url:
raise exceptions.StoryDoesNotExist('Did not find story in author story list: {0}'.format(author_Url))
elif i == 1:
##Get the description
self.setDescription(url,stripHTML(value.strip()))
else:
# the rest of the values can be missing, so instead of hardcoding the numbers, we search for them.
if 'Located :' in value:
self.story.setMetadata('category',value.replace(r'&gt;',r'>').replace(r'Located :',r'').strip())
elif 'Category :' in value:
# Get the Category
self.story.setMetadata('category',value.replace(r'&gt;',r'>').replace(r'Located :',r'').strip())
elif 'Content Tags :' in value:
# Get the Erotic Tags
value = stripHTML(value.replace(r'Content Tags :',r'')).strip()
for code in re.split(r'\s',value):
self.story.addToList('eroticatags',code)
elif 'Posted :' in value:
# Get the Posted Date
value = value.replace(r'Posted :',r'').strip()
if value.startswith('008'):
# It is unknown how the 200 became 008, but I'm going to change it back here
value = value.replace('008','200')
elif value.startswith('0000'):
# Since the date is showing as 0000,
# I'm going to put the memberdate here
value = asoup.find('div',{'id':'contentdata'}).find('p').get_text(strip=True).replace('Member Since','').strip()
self.story.setMetadata('datePublished', makeDate(stripHTML(value), self.dateformat))
elif 'Edited :' in value:
# Get the 'Updated' Edited date
# AFF has the time for the Updated date, and we only want the date,
# so we take the first 10 characters only
value = value.replace(r'Edited :',r'').strip()[0:10]
if value.startswith('008'):
# It is unknown how the 200 became 008, but I'm going to change it back here
value = value.replace('008','200')
self.story.setMetadata('dateUpdated', makeDate(stripHTML(value), self.dateformat))
elif value.startswith('0000') or '-00-' in value:
# Since the date is showing as 0000,
# or there is -00- in the date,
# I'm going to put the Published date here
self.story.setMetadata('dateUpdated', self.story.getMetadata('datPublished'))
else:
self.story.setMetadata('dateUpdated', makeDate(stripHTML(value), self.dateformat))
else:
# This catches the blank elements, and the Review and Dragon Prints.
# I am not interested in these, so do nothing
zzzzzzz=0
# grab the text for an individual chapter. # grab the text for an individual chapter.
def getChapterText(self, url): def getChapterText(self, url):
@ -373,10 +241,11 @@ class AdultFanFictionOrgAdapter(BaseSiteAdapter):
logger.debug('Getting chapter text from: %s' % url) logger.debug('Getting chapter text from: %s' % url)
soup = self.make_soup(self.get_request(url)) soup = self.make_soup(self.get_request(url))
chaptertag = soup.find('ul',{'class':'pagination'}).parent.parent.parent.findNextSibling('li') chaptertag = soup.select_one('div.chapter-body')
if None == chaptertag: if None == chaptertag:
raise exceptions.FailedToDownload("Error downloading Chapter: {0}! Missing required element!".format(url)) raise exceptions.FailedToDownload("Error downloading Chapter: {0}! Missing required element!".format(url))
# Change td to a div. ## chapter text includes a copy of story title, author,
chaptertag.name='div' ## chapter title, & eroticatags specific to the chapter. Did
## before, too.
return self.utf8FromSoup(url,chaptertag) return self.utf8FromSoup(url,chaptertag)

View file

@ -157,9 +157,6 @@ class BloodshedverseComAdapter(BaseSiteAdapter):
self.story.addToList('warnings', warning) self.story.addToList('warnings', warning)
elif key == 'Chapters':
self.story.setMetadata('numChapters', int(value))
elif key == 'Words': elif key == 'Words':
# Apparently only numChapters need to be an integer for # Apparently only numChapters need to be an integer for
# some strange reason. Remove possible ',' characters as to # some strange reason. Remove possible ',' characters as to
@ -174,7 +171,7 @@ class BloodshedverseComAdapter(BaseSiteAdapter):
# ugly %p(am/pm) hack moved into makeDate so other sites can use it. # ugly %p(am/pm) hack moved into makeDate so other sites can use it.
self.story.setMetadata('dateUpdated', date) self.story.setMetadata('dateUpdated', date)
if self.story.getMetadata('rating') == 'NC-17' and not (self.is_adult or self.getConfig('is_adult')): if self.story.getMetadataRaw('rating') == 'NC-17' and not (self.is_adult or self.getConfig('is_adult')):
raise exceptions.AdultCheckRequired(self.url) raise exceptions.AdultCheckRequired(self.url)
def getChapterText(self, url): def getChapterText(self, url):

View file

@ -53,6 +53,9 @@ class FanficAuthorsNetAdapter(BaseSiteAdapter):
#Setting the 'Zone' for each "Site" #Setting the 'Zone' for each "Site"
self.zone = self.parsedUrl.netloc.replace('.fanficauthors.net','') self.zone = self.parsedUrl.netloc.replace('.fanficauthors.net','')
# site change .nsns to -nsns
self.zone = self.zone.replace('.nsns','-nsns')
# normalized story URL. # normalized story URL.
self._setURL('https://{0}.{1}/{2}/'.format( self._setURL('https://{0}.{1}/{2}/'.format(
self.zone, self.getBaseDomain(), self.story.getMetadata('storyId'))) self.zone, self.getBaseDomain(), self.story.getMetadata('storyId')))
@ -79,7 +82,10 @@ class FanficAuthorsNetAdapter(BaseSiteAdapter):
@classmethod @classmethod
def getAcceptDomains(cls): def getAcceptDomains(cls):
# need both .nsns(old) and -nsns(new) because it's a domain
# change, not just URL change.
return ['aaran-st-vines.nsns.fanficauthors.net', return ['aaran-st-vines.nsns.fanficauthors.net',
'aaran-st-vines-nsns.fanficauthors.net',
'abraxan.fanficauthors.net', 'abraxan.fanficauthors.net',
'bobmin.fanficauthors.net', 'bobmin.fanficauthors.net',
'canoncansodoff.fanficauthors.net', 'canoncansodoff.fanficauthors.net',
@ -95,9 +101,12 @@ class FanficAuthorsNetAdapter(BaseSiteAdapter):
'jeconais.fanficauthors.net', 'jeconais.fanficauthors.net',
'kinsfire.fanficauthors.net', 'kinsfire.fanficauthors.net',
'kokopelli.nsns.fanficauthors.net', 'kokopelli.nsns.fanficauthors.net',
'kokopelli-nsns.fanficauthors.net',
'ladya.nsns.fanficauthors.net', 'ladya.nsns.fanficauthors.net',
'ladya-nsns.fanficauthors.net',
'lorddwar.fanficauthors.net', 'lorddwar.fanficauthors.net',
'mrintel.nsns.fanficauthors.net', 'mrintel.nsns.fanficauthors.net',
'mrintel-nsns.fanficauthors.net',
'musings-of-apathy.fanficauthors.net', 'musings-of-apathy.fanficauthors.net',
'ruskbyte.fanficauthors.net', 'ruskbyte.fanficauthors.net',
'seelvor.fanficauthors.net', 'seelvor.fanficauthors.net',
@ -108,7 +117,7 @@ class FanficAuthorsNetAdapter(BaseSiteAdapter):
################################################################################################ ################################################################################################
@classmethod @classmethod
def getSiteExampleURLs(self): def getSiteExampleURLs(self):
return ("https://aaran-st-vines.nsns.fanficauthors.net/A_Story_Name/ " return ("https://aaran-st-vines-nsns.fanficauthors.net/A_Story_Name/ "
+ "https://abraxan.fanficauthors.net/A_Story_Name/ " + "https://abraxan.fanficauthors.net/A_Story_Name/ "
+ "https://bobmin.fanficauthors.net/A_Story_Name/ " + "https://bobmin.fanficauthors.net/A_Story_Name/ "
+ "https://canoncansodoff.fanficauthors.net/A_Story_Name/ " + "https://canoncansodoff.fanficauthors.net/A_Story_Name/ "
@ -123,10 +132,10 @@ class FanficAuthorsNetAdapter(BaseSiteAdapter):
+ "https://jbern.fanficauthors.net/A_Story_Name/ " + "https://jbern.fanficauthors.net/A_Story_Name/ "
+ "https://jeconais.fanficauthors.net/A_Story_Name/ " + "https://jeconais.fanficauthors.net/A_Story_Name/ "
+ "https://kinsfire.fanficauthors.net/A_Story_Name/ " + "https://kinsfire.fanficauthors.net/A_Story_Name/ "
+ "https://kokopelli.nsns.fanficauthors.net/A_Story_Name/ " + "https://kokopelli-nsns.fanficauthors.net/A_Story_Name/ "
+ "https://ladya.nsns.fanficauthors.net/A_Story_Name/ " + "https://ladya-nsns.fanficauthors.net/A_Story_Name/ "
+ "https://lorddwar.fanficauthors.net/A_Story_Name/ " + "https://lorddwar.fanficauthors.net/A_Story_Name/ "
+ "https://mrintel.nsns.fanficauthors.net/A_Story_Name/ " + "https://mrintel-nsns.fanficauthors.net/A_Story_Name/ "
+ "https://musings-of-apathy.fanficauthors.net/A_Story_Name/ " + "https://musings-of-apathy.fanficauthors.net/A_Story_Name/ "
+ "https://ruskbyte.fanficauthors.net/A_Story_Name/ " + "https://ruskbyte.fanficauthors.net/A_Story_Name/ "
+ "https://seelvor.fanficauthors.net/A_Story_Name/ " + "https://seelvor.fanficauthors.net/A_Story_Name/ "
@ -136,8 +145,16 @@ class FanficAuthorsNetAdapter(BaseSiteAdapter):
################################################################################################ ################################################################################################
def getSiteURLPattern(self): def getSiteURLPattern(self):
## .nsns kept here to match both . and -
return r'https?://(aaran-st-vines.nsns|abraxan|bobmin|canoncansodoff|chemprof|copperbadge|crys|deluded-musings|draco664|fp|frenchsession|ishtar|jbern|jeconais|kinsfire|kokopelli.nsns|ladya.nsns|lorddwar|mrintel.nsns|musings-of-apathy|ruskbyte|seelvor|tenhawk|viridian|whydoyouneedtoknow)\.fanficauthors\.net/([a-zA-Z0-9_]+)/' return r'https?://(aaran-st-vines.nsns|abraxan|bobmin|canoncansodoff|chemprof|copperbadge|crys|deluded-musings|draco664|fp|frenchsession|ishtar|jbern|jeconais|kinsfire|kokopelli.nsns|ladya.nsns|lorddwar|mrintel.nsns|musings-of-apathy|ruskbyte|seelvor|tenhawk|viridian|whydoyouneedtoknow)\.fanficauthors\.net/([a-zA-Z0-9_]+)/'
@classmethod
def get_section_url(cls,url):
## only changing .nsns to -nsns and only when part of the
## domain.
url = url.replace('.nsns.fanficauthors.net','-nsns.fanficauthors.net')
return url
################################################################################################ ################################################################################################
def doExtractChapterUrlsAndMetadata(self, get_cover=True): def doExtractChapterUrlsAndMetadata(self, get_cover=True):
@ -202,7 +219,7 @@ class FanficAuthorsNetAdapter(BaseSiteAdapter):
## Raising AdultCheckRequired after collecting chapters gives ## Raising AdultCheckRequired after collecting chapters gives
## a double chapter list. So does genre, but it de-dups ## a double chapter list. So does genre, but it de-dups
## automatically. ## automatically.
if( self.story.getMetadata('rating') == 'Mature' if( self.story.getMetadataRaw('rating') in ['Mature','Adult Only']
and not (self.is_adult or self.getConfig("is_adult")) ): and not (self.is_adult or self.getConfig("is_adult")) ):
raise exceptions.AdultCheckRequired(self.url) raise exceptions.AdultCheckRequired(self.url)
@ -226,7 +243,7 @@ class FanficAuthorsNetAdapter(BaseSiteAdapter):
# grab the text for an individual chapter. # grab the text for an individual chapter.
def getChapterText(self, url): def getChapterText(self, url):
logger.debug('Getting chapter text from: %s' % url) logger.debug('Getting chapter text from: %s' % url)
if( self.story.getMetadata('rating') == 'Mature' and if( self.story.getMetadataRaw('rating') in ['Mature','Adult Only'] and
(self.is_adult or self.getConfig("is_adult")) ): (self.is_adult or self.getConfig("is_adult")) ):
addurl = "?bypass=1" addurl = "?bypass=1"
else: else:

View file

@ -150,7 +150,7 @@ class FanFicsMeAdapter(BaseSiteAdapter):
self.story.setMetadata('rating',stripHTML(get_meta_content(u'Рейтинг'))) self.story.setMetadata('rating',stripHTML(get_meta_content(u'Рейтинг')))
## Need to login for any rating higher than General. ## Need to login for any rating higher than General.
if self.story.getMetadata('rating') != 'General' and self.needToLoginCheck(data): if self.story.getMetadataRaw('rating') != 'General' and self.needToLoginCheck(data):
self.performLogin(url) self.performLogin(url)
# reload after login. # reload after login.
data = self.get_request(url,usecache=False) data = self.get_request(url,usecache=False)

View file

@ -157,7 +157,6 @@ class FicBookNetAdapter(BaseSiteAdapter):
update = chapterdate update = chapterdate
else: else:
self.add_chapter(self.story.getMetadata('title'),url) self.add_chapter(self.story.getMetadata('title'),url)
self.story.setMetadata('numChapters',1)
date_str = soup.find('div', {'class' : 'part-date'}).find('span', {'title': True})['title'].replace(u"\u202fг. в", "") date_str = soup.find('div', {'class' : 'part-date'}).find('span', {'title': True})['title'].replace(u"\u202fг. в", "")
for month_name, month_num in fullmon.items(): for month_name, month_num in fullmon.items():
date_str = date_str.replace(month_name, month_num) date_str = date_str.replace(month_name, month_num)

View file

@ -173,7 +173,7 @@ class FictionLiveAdapter(BaseSiteAdapter):
tags = data['ta'] if 'ta' in data else [] tags = data['ta'] if 'ta' in data else []
if (self.story.getMetadata('rating') in {"nsfw", "adult"} or 'smut' in tags) and \ if (self.story.getMetadataRaw('rating') in {"nsfw", "adult"} or 'smut' in tags) and \
not (self.is_adult or self.getConfig("is_adult")): not (self.is_adult or self.getConfig("is_adult")):
raise exceptions.AdultCheckRequired(self.url) raise exceptions.AdultCheckRequired(self.url)

View file

@ -40,10 +40,6 @@ class FictionManiaTVAdapter(BaseSiteAdapter):
self._setURL(self.READ_TEXT_STORY_URL_TEMPLATE % story_id) self._setURL(self.READ_TEXT_STORY_URL_TEMPLATE % story_id)
self.story.setMetadata('siteabbrev', self.SITE_ABBREVIATION) self.story.setMetadata('siteabbrev', self.SITE_ABBREVIATION)
# Always single chapters, probably should use the Anthology feature to
# merge chapters of a story
self.story.setMetadata('numChapters', 1)
@staticmethod @staticmethod
def getSiteDomain(): def getSiteDomain():
return FictionManiaTVAdapter.SITE_DOMAIN return FictionManiaTVAdapter.SITE_DOMAIN

View file

@ -66,7 +66,8 @@ class FicwadComSiteAdapter(BaseSiteAdapter):
params['username'])) params['username']))
d = self.post_request(loginUrl,params,usecache=False) d = self.post_request(loginUrl,params,usecache=False)
if "Login attempt failed..." in d: if "Login attempt failed..." in d or \
'<div id="error">Please enter your username and password.</div>' in d:
logger.info("Failed to login to URL %s as %s" % (loginUrl, logger.info("Failed to login to URL %s as %s" % (loginUrl,
params['username'])) params['username']))
raise exceptions.FailedToLogin(url,params['username']) raise exceptions.FailedToLogin(url,params['username'])
@ -114,7 +115,7 @@ class FicwadComSiteAdapter(BaseSiteAdapter):
titleh4 = soup.find('div',{'class':'storylist'}).find('h4') titleh4 = soup.find('div',{'class':'storylist'}).find('h4')
self.story.setMetadata('title', stripHTML(titleh4.a)) self.story.setMetadata('title', stripHTML(titleh4.a))
if 'Deleted story' in self.story.getMetadata('title'): if 'Deleted story' in self.story.getMetadataRaw('title'):
raise exceptions.StoryDoesNotExist("This story was deleted. %s"%self.url) raise exceptions.StoryDoesNotExist("This story was deleted. %s"%self.url)
# Find authorid and URL from... author url. # Find authorid and URL from... author url.

View file

@ -151,7 +151,8 @@ class FimFictionNetSiteAdapter(BaseSiteAdapter):
self.story.setMetadata("authorId", author['href'].split('/')[2]) self.story.setMetadata("authorId", author['href'].split('/')[2])
self.story.setMetadata("authorUrl", "https://%s/user/%s/%s" % (self.getSiteDomain(), self.story.setMetadata("authorUrl", "https://%s/user/%s/%s" % (self.getSiteDomain(),
self.story.getMetadata('authorId'), self.story.getMetadata('authorId'),
self.story.getMetadata('author'))) # meta entry author can be changed by the user.
stripHTML(author)))
#Rating text is replaced with full words for historical compatibility after the site changed #Rating text is replaced with full words for historical compatibility after the site changed
#on 2014-10-27 #on 2014-10-27
@ -183,7 +184,8 @@ class FimFictionNetSiteAdapter(BaseSiteAdapter):
if storyImage: if storyImage:
coverurl = storyImage['data-fullsize'] coverurl = storyImage['data-fullsize']
# try setting from data-fullsize, if fails, try using data-src # try setting from data-fullsize, if fails, try using data-src
if self.setCoverImage(self.url,coverurl)[0].startswith("failedtoload"): cover_set = self.setCoverImage(self.url,coverurl)[0]
if not cover_set or cover_set.startswith("failedtoload"):
coverurl = storyImage['src'] coverurl = storyImage['src']
self.setCoverImage(self.url,coverurl) self.setCoverImage(self.url,coverurl)

View file

@ -105,7 +105,6 @@ class FireFlyFansNetSiteAdapter(BaseSiteAdapter):
# to download them one at a time yourself. I'm also setting the status to # to download them one at a time yourself. I'm also setting the status to
# complete # complete
self.add_chapter(self.story.getMetadata('title'), self.url) self.add_chapter(self.story.getMetadata('title'), self.url)
self.story.setMetadata('numChapters', 1)
self.story.setMetadata('status', 'Completed') self.story.setMetadata('status', 'Completed')
## some stories do not have a summary listed, so I'm setting it here. ## some stories do not have a summary listed, so I'm setting it here.

View file

@ -163,7 +163,7 @@ class KakuyomuJpAdapter(BaseSiteAdapter):
titles = [] titles = []
nestingLevel = 0 nestingLevel = 0
newSection = False newSection = False
for tocNodeRef in info[workKey]['tableOfContents']: for tocNodeRef in info[workKey]['tableOfContentsV2']:
tocNode = info[tocNodeRef['__ref']] tocNode = info[tocNodeRef['__ref']]
if tocNode['chapter'] is not None: if tocNode['chapter'] is not None:
@ -197,8 +197,6 @@ class KakuyomuJpAdapter(BaseSiteAdapter):
self.add_chapter(epTitle, epUrl) self.add_chapter(epTitle, epUrl)
newSection = False newSection = False
self.story.setMetadata('numChapters', numEpisodes)
logger.debug("Story: <%s>", self.story) logger.debug("Story: <%s>", self.story)
return return

View file

@ -99,7 +99,7 @@ class LiteroticaSiteAdapter(BaseSiteAdapter):
## apply clean_chapter_titles ## apply clean_chapter_titles
def add_chapter(self,chapter_title,url,othermeta={}): def add_chapter(self,chapter_title,url,othermeta={}):
if self.getConfig("clean_chapter_titles"): if self.getConfig("clean_chapter_titles"):
storytitle = self.story.getMetadata('title').lower() storytitle = self.story.getMetadataRaw('title').lower()
chapter_name_type = None chapter_name_type = None
# strip trailing ch or pt before doing the chapter clean. # strip trailing ch or pt before doing the chapter clean.
# doesn't remove from story title metadata # doesn't remove from story title metadata
@ -241,7 +241,7 @@ class LiteroticaSiteAdapter(BaseSiteAdapter):
self.story.extendList('eroticatags', [ stripHTML(t).title() for t in soup.select('div#tabpanel-tags a.av_as') ]) self.story.extendList('eroticatags', [ stripHTML(t).title() for t in soup.select('div#tabpanel-tags a.av_as') ])
if soup.select('div[class^="_widget__tags_"]'): if soup.select('div[class^="_widget__tags_"]'):
# logger.debug("tags2") # logger.debug("tags2")
self.story.extendList('eroticatags', [ stripHTML(t).title() for t in soup.select('div[class^="_widget__tags_"] a[class^="_tags__link_"]') ]) self.story.extendList('eroticatags', [ stripHTML(t).title() for t in soup.select('div[class^="_widget__tags_"] a[class^="_tag_item_"]') ])
# logger.debug(self.story.getList('eroticatags')) # logger.debug(self.story.getList('eroticatags'))
## look first for 'Series Introduction', then Info panel short desc ## look first for 'Series Introduction', then Info panel short desc
@ -395,7 +395,7 @@ class LiteroticaSiteAdapter(BaseSiteAdapter):
## Collect tags from series/story page if tags_from_chapters is enabled ## Collect tags from series/story page if tags_from_chapters is enabled
if self.getConfig("tags_from_chapters"): if self.getConfig("tags_from_chapters"):
self.story.extendList('eroticatags', [ stripHTML(t['tag']).title() for t in chap['tags'] ]) self.story.extendList('eroticatags', [ unicode(t['tag']).title() for t in chap['tags'] ])
except Exception as e: except Exception as e:

View file

@ -162,7 +162,7 @@ class MassEffect2InAdapter(BaseSiteAdapter):
self.story.extendList('authorId', [authorId]) self.story.extendList('authorId', [authorId])
self.story.extendList('authorUrl', [authorUrl]) self.story.extendList('authorUrl', [authorUrl])
if not self.story.getMetadata('rating'): if not self.story.getMetadataRaw('rating'):
ratingTitle = chapter.getRatingTitle() ratingTitle = chapter.getRatingTitle()
if ratingTitle: if ratingTitle:
self.story.setMetadata('rating', ratingTitle) self.story.setMetadata('rating', ratingTitle)
@ -204,7 +204,6 @@ class MassEffect2InAdapter(BaseSiteAdapter):
self.story.setMetadata('datePublished', datePublished) self.story.setMetadata('datePublished', datePublished)
self.story.setMetadata('dateUpdated', dateUpdated) self.story.setMetadata('dateUpdated', dateUpdated)
self.story.setMetadata('numWords', unicode(wordCount)) self.story.setMetadata('numWords', unicode(wordCount))
self.story.setMetadata('numChapters', len(chapters))
# Site-specific metadata. # Site-specific metadata.
self.story.setMetadata('language', self.SITE_LANGUAGE) self.story.setMetadata('language', self.SITE_LANGUAGE)

View file

@ -289,7 +289,8 @@ class RoyalRoadAdapter(BaseSiteAdapter):
if img: if img:
cover_url = img['src'] cover_url = img['src']
# usually URL is for thumbnail. Try expected URL for larger image, if fails fall back to the original URL # usually URL is for thumbnail. Try expected URL for larger image, if fails fall back to the original URL
if self.setCoverImage(url,cover_url.replace('/covers-full/', '/covers-large/'))[0].startswith("failedtoload"): cover_set = self.setCoverImage(url,cover_url.replace('/covers-full/', '/covers-large/'))[0]
if not cover_set or cover_set.startswith("failedtoload"):
self.setCoverImage(url,cover_url) self.setCoverImage(url,cover_url)
# some content is show as tables, this will preserve them # some content is show as tables, this will preserve them

View file

@ -197,33 +197,20 @@ class ScribbleHubComAdapter(BaseSiteAdapter): # XXX
# Get the contents list from scribblehub, iterate through and add to chapters # Get the contents list from scribblehub, iterate through and add to chapters
# Can be fairly certain this will not 404 - we know the story id is valid # Can be fairly certain this will not 404 - we know the story id is valid
contents_payload = {"action": "wi_gettocchp", contents_payload = {"action": "wi_getreleases_pagination",
"strSID": self.story.getMetadata('storyId'), "pagenum": -1,
"strmypostid": 0, "mypostid": self.story.getMetadata('storyId')}
"strFic": "yes"}
# 14/12/22 - Looks like it should follow this format now (below), but still returns a 400
# but not a 403. tested in browser getting rid of all other cookies to try and get a 400 and nopes.
# contents_payload = {"action": "wi_getreleases_pagination",
# "pagenum": 1,
# "mypostid": 421879}
# contents_payload = "action=wi_getreleases_pagination&pagenum=1&mypostid=421879"
contents_data = self.post_request("https://www.scribblehub.com/wp-admin/admin-ajax.php", contents_payload) contents_data = self.post_request("https://www.scribblehub.com/wp-admin/admin-ajax.php", contents_payload)
# logger.debug(contents_data)
contents_soup = self.make_soup(contents_data) contents_soup = self.make_soup(contents_data)
for i in range(1, int(contents_soup.find('ol',{'id':'ol_toc'}).get('count')) + 1): for toca in contents_soup.select('a.toc_a'):
chapter_url = contents_soup.find('li',{'cnt':str(i)}).find('a').get('href') chapter_url = toca['href']
chapter_name = contents_soup.find('li',{'cnt':str(i)}).find('a').get('title') chapter_name = stripHTML(toca)
# logger.debug("Found Chapter " + str(i) + ", name: " + chapter_name + ", url: " + chapter_url) # logger.debug("Found Chapter: " + chapter_name + ", url: " + chapter_url)
self.add_chapter(chapter_name, chapter_url) self.add_chapter(chapter_name, chapter_url)
# eFiction sites don't help us out a lot with their meta data
# formating, so it's a little ugly.
# utility method # utility method
def defaultGetattr(d,k): def defaultGetattr(d,k):
try: try:

View file

@ -1,144 +0,0 @@
# -*- coding: utf-8 -*-
from __future__ import absolute_import
import logging
logger = logging.getLogger(__name__)
import re
from ..htmlcleanup import stripHTML
from .. import exceptions as exceptions
# py2 vs py3 transition
from .base_adapter import BaseSiteAdapter, makeDate
def getClass():
return SwiOrgRuAdapter
logger = logging.getLogger(__name__)
class SwiOrgRuAdapter(BaseSiteAdapter):
def __init__(self, config, url):
BaseSiteAdapter.__init__(self, config, url)
self.username = "NoneGiven" # if left empty, site doesn't return any message at all.
self.password = ""
self.is_adult=False
storyId = self.parsedUrl.path.split('/',)[3]
self.story.setMetadata('storyId', storyId)
# normalized story URL.
self._setURL('http://' + self.getSiteDomain() + '/mlp-fim/story/'+self.story.getMetadata('storyId'))
# Each adapter needs to have a unique site abbreviation.
self.story.setMetadata('siteabbrev','swiorgru')
# The date format will vary from site to site.
# http://docs.python.org/library/datetime.html#strftime-strptime-behavior
self.dateformat = "%Y.%m.%d"
@staticmethod # must be @staticmethod, don't remove it.
def getSiteDomain():
return 'www.swi.org.ru'
@classmethod
def getSiteExampleURLs(cls):
return "http://" + cls.getSiteDomain() + "/mlp-fim/story/11341/ http://" + cls.getSiteDomain() + "/mlp-fim/story/11341/chapter1.html"
def getSiteURLPattern(self):
return r"http://" + re.escape(self.getSiteDomain() + "/mlp-fim/story/")+r"\d+"
def extractChapterUrlsAndMetadata(self):
url=self.url
logger.debug("URL: "+url)
data = self.get_request(url)
soup = self.make_soup(data)
title = soup.find('h1')
for tag in title.find_all('sup'):
tag.extract()
self.story.setMetadata('title', stripHTML(title.text))
logger.debug("Title: (%s)"%self.story.getMetadata('title'))
author_title = soup.find('strong', string = re.compile(u"Автор: "))
if author_title == None:
raise exceptions.FailedToDownload("Error downloading page: %s! Missing required author_title element!" % url)
author = author_title.next_sibling
self.story.setMetadata('authorId', author.text) # Author's name is unique
self.story.setMetadata('authorUrl','http://'+self.host + author['href'])
self.story.setMetadata('author', author.text)
logger.debug("Author: (%s)"%self.story.getMetadata('author'))
date_pub = soup.find('em', string = re.compile(r'\d{4}.\d{2}.\d{2}'))
if not date_pub == None:
self.story.setMetadata('datePublished', makeDate(date_pub.text, self.dateformat))
rating_label = soup.find('strong', string = re.compile(u"рейтинг:"))
if not rating_label == None:
rating = rating_label.next_sibling.next_sibling
self.story.setMetadata('rating', stripHTML(rating))
if not self.is_adult or self.getConfig("is_adult"):
if "NC-18" in rating:
raise exceptions.AdultCheckRequired(self.url)
characters = soup.find_all('img', src=re.compile(r"/mlp-fim/img/chars/\d+.png"))
logger.debug("numCharacters: (%s)"%str(len(characters)))
for x in range(0,len(characters)):
character=characters[x]
self.story.addToList('characters', character['title'])
if soup.find('font', color = r"green", string = u"завершен"):
self.story.setMetadata('status', 'Completed')
else:
self.story.setMetadata('status', 'In-Progress')
categories_label = soup.find('strong', string = u"категории:")
if not categories_label == None:
categories_element = categories_label.next_sibling.next_sibling
categories = re.findall(r'"(.+?)"', categories_element.text)
for x in range(0, len(categories)):
category=categories[x]
self.story.addToList('category', category)
chapters_header = soup.find('h2', string = re.compile(u"Главы:"))
if chapters_header==None:
raise exceptions.FailedToDownload("Error downloading page: %s! Missing required chapters_header element!" % url)
chapters_table = chapters_header.next_sibling.next_sibling
self.story.setMetadata('language','Russian')
chapters=chapters_table.find_all('a', href=re.compile(r'/mlp-fim/story/'+self.story.getMetadata('storyId')+r"/chapter\d+"))
self.story.setMetadata('numChapters', len(chapters))
logger.debug("numChapters: (%s)"%str(self.story.getMetadata('numChapters')))
for x in range(0,len(chapters)):
chapter=chapters[x]
churl='http://'+self.host+chapter['href']
self.add_chapter(chapter,churl)
# grab the text for an individual chapter.
def getChapterText(self, url):
logger.debug('Getting chapter text from: %s' % url)
soup = self.make_soup(self.get_request(url))
chapter = soup.find('div', {'id' : 'content'})
chapter_header = chapter.find('h1', id = re.compile("chapter"))
if not chapter_header == None:
chapter_header.decompose()
if chapter == None:
raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url)
return self.utf8FromSoup(url,chapter)

View file

@ -255,7 +255,6 @@ class SyosetuComAdapter(BaseSiteAdapter):
numChapters = int(re.sub(r'[^\d]', '', infoSoup.find('span', {'class':'p-infotop-type__allep'}).text.strip())) numChapters = int(re.sub(r'[^\d]', '', infoSoup.find('span', {'class':'p-infotop-type__allep'}).text.strip()))
oneshot = False oneshot = False
completed = True if noveltype == '完結済' else False completed = True if noveltype == '完結済' else False
self.story.setMetadata('numChapters', numChapters)
self.story.setMetadata('status', 'Completed' if completed else 'In-Progress') self.story.setMetadata('status', 'Completed' if completed else 'In-Progress')
# Keywords # Keywords

View file

@ -149,20 +149,20 @@ Some more longer description. "I suck at summaries!" "Better than it sounds!"
# greater than 10, no language or series. # greater than 10, no language or series.
if idnum < 10: if idnum < 10:
## non-English was changing series sort order which
## confuses me more often than I test other langs.
# langs = {
# 0:"English",
# 1:"Russian",
# 2:"French",
# 3:"German",
# }
# self.story.setMetadata('language',langs[idnum%len(langs)])
self.setSeries('The Great Test',idnum) self.setSeries('The Great Test',idnum)
self.story.setMetadata('seriesUrl','http://'+self.getSiteDomain()+'/seriesid=1') self.story.setMetadata('seriesUrl','http://'+self.getSiteDomain()+'/seriesid=1')
elif idnum < 20: elif idnum < 20:
self.setSeries('魔法少女まどか★マギカ',idnum) self.setSeries('魔法少女まどか★マギカ',idnum)
self.story.setMetadata('seriesUrl','http://'+self.getSiteDomain()+'/seriesid=1') self.story.setMetadata('seriesUrl','http://'+self.getSiteDomain()+'/seriesid=1')
elif idnum < 30:
langs = {
0:"English",
1:"Russian",
2:"French",
3:"German",
}
self.story.setMetadata('language',langs[idnum%len(langs)])
if idnum == 0: if idnum == 0:
self.setSeries("A Nook Hyphen Test "+self.story.getMetadata('dateCreated'),idnum) self.setSeries("A Nook Hyphen Test "+self.story.getMetadata('dateCreated'),idnum)
self.story.setMetadata('seriesUrl','http://'+self.getSiteDomain()+'/seriesid=0') self.story.setMetadata('seriesUrl','http://'+self.getSiteDomain()+'/seriesid=0')
@ -491,6 +491,7 @@ Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor
desc = '<div><p>The Great Test Series of '+self.getSiteDomain()+'!</p><p>Now with two lines!</p></div>' desc = '<div><p>The Great Test Series of '+self.getSiteDomain()+'!</p><p>Now with two lines!</p></div>'
return {'name':'The Great Test', return {'name':'The Great Test',
'desc':desc, 'desc':desc,
'status':'AStatus',
'urllist':['http://'+self.getSiteDomain()+'?sid=1', 'urllist':['http://'+self.getSiteDomain()+'?sid=1',
'http://'+self.getSiteDomain()+'?sid=2', 'http://'+self.getSiteDomain()+'?sid=2',
'http://'+self.getSiteDomain()+'?sid=3', 'http://'+self.getSiteDomain()+'?sid=3',

View file

@ -101,7 +101,6 @@ class TouchFluffyTailAdapter(BaseSiteAdapter):
self.story.setMetadata('status', 'Completed') self.story.setMetadata('status', 'Completed')
self.add_chapter(self.story.getMetadata('title'),url) self.add_chapter(self.story.getMetadata('title'),url)
self.story.setMetadata('numChapters',1)
avrrate = body.find_all('footer', class_='entry-meta')[1].find('em').span.find_all('strong') avrrate = body.find_all('footer', class_='entry-meta')[1].find('em').span.find_all('strong')
averrating = avrrate[1].text averrating = avrrate[1].text

View file

@ -126,11 +126,6 @@ class TrekFanFictionNetSiteAdapter(BaseSiteAdapter):
## url since we can't get the chapter without this, I'm leaving it in. ## url since we can't get the chapter without this, I'm leaving it in.
self.add_chapter(self.story.getMetadata('title'), url) self.add_chapter(self.story.getMetadata('title'), url)
## I'm going to comment this out, because thereis always only one chapter for each story,
## so this is really not needed
## And I am uncommenting it because the rest of FFF expects
## there to always be numChapters, even if it's one. --Jimm
# getting the rest of the metadata... there isn't much here, and the summary can only be # getting the rest of the metadata... there isn't much here, and the summary can only be
# gotten on the author's page... so we'll get it to get the information from # gotten on the author's page... so we'll get it to get the information from
adata = self.get_request(self.story.getMetadata('authorUrl')) adata = self.get_request(self.story.getMetadata('authorUrl'))

View file

@ -199,9 +199,6 @@ class Voracity2EficComAdapter(BaseSiteAdapter):
self.story.setMetadata('series', a.string) self.story.setMetadata('series', a.string)
self.story.setMetadata('seriesUrl', urlparse.urljoin(self.BASE_URL, a['href'])) self.story.setMetadata('seriesUrl', urlparse.urljoin(self.BASE_URL, a['href']))
elif key == 'Chapter':
self.story.setMetadata('numChapters', int(value))
elif key == 'Completed': elif key == 'Completed':
self.story.setMetadata('status', 'Completed' if value == 'Yes' else 'In-Progress') self.story.setMetadata('status', 'Completed' if value == 'Yes' else 'In-Progress')

View file

@ -670,6 +670,7 @@ try to download.</p>
return url in self.add_img_names return url in self.add_img_names
def include_css_urls(self,parenturl,style): def include_css_urls(self,parenturl,style):
FONT_EXTS = ('ttf','otf','woff','woff2')
# logger.debug("include_css_urls(%s,%s)"%(parenturl,style)) # logger.debug("include_css_urls(%s,%s)"%(parenturl,style))
## pass in the style string, will be returned with URLs ## pass in the style string, will be returned with URLs
## replaced and images will be added. ## replaced and images will be added.
@ -680,12 +681,16 @@ try to download.</p>
## url('href') ## url('href')
## the pattern will also accept mismatched '/", which is broken CSS. ## the pattern will also accept mismatched '/", which is broken CSS.
for style_url in re.findall(r'url\([\'"]?(.*?)[\'"]?\)', style): for style_url in re.findall(r'url\([\'"]?(.*?)[\'"]?\)', style):
logger.debug("Adding style url(%s)"%style_url)
## additional_images don't get processing. Applies ## additional_images don't get processing. Applies
## only to CSS url(), that should be the only time ## only to CSS url(), that should be the only time
## additional_images is used. ## additional_images is used.
if self.is_additional_image(style_url): if self.is_additional_image(style_url):
logger.debug("Skipping sheet style url(%s), in additional_images"%style_url)
continue continue
if style_url.rsplit('.')[-1].lower() in FONT_EXTS:
logger.debug("Skipping sheet style url(%s), assumed font"%style_url)
continue
logger.debug("Adding style url(%s)"%style_url)
try: try:
# longdesc(aka origurl) isn't saved anywhere in CSS. # longdesc(aka origurl) isn't saved anywhere in CSS.
@ -777,7 +782,7 @@ try to download.</p>
(img['src'],longdesc)=self.story.addImgUrl(url,self.img_url_trans(img['src']),fetch, (img['src'],longdesc)=self.story.addImgUrl(url,self.img_url_trans(img['src']),fetch,
coverexclusion=self.getConfig('cover_exclusion_regexp')) coverexclusion=self.getConfig('cover_exclusion_regexp'))
if longdesc: if longdesc:
logger.debug("---set longdesc:%s"%longdesc) # logger.debug("---set longdesc:%s"%longdesc)
img['longdesc'] = longdesc img['longdesc'] = longdesc
except AttributeError as ae: except AttributeError as ae:
logger.info("Parsing for img tags failed--probably poor input HTML. Skipping img(%s)"%img) logger.info("Parsing for img tags failed--probably poor input HTML. Skipping img(%s)"%img)
@ -828,7 +833,9 @@ try to download.</p>
## handle identifiers that otherwise appear to be ## handle identifiers that otherwise appear to be
## selectors themselves. #966 ## selectors themselves. #966
try: try:
if href[0] == "#" and soup.select_one("[id='%s']"%href[1:]): # logger.debug("Search for internal link anchor href:(%s)"%href)
if href[0] == "#" and soup.select_one("[id='%s'], [name='%s']"%(href[1:],href[1:])):
# logger.debug("Found internal link anchor href:(%s)"%href)
hrefurl = href hrefurl = href
except Exception as e: except Exception as e:
logger.debug("Search for internal link anchor failed href:(%s)"%href) logger.debug("Search for internal link anchor failed href:(%s)"%href)

View file

@ -317,8 +317,6 @@ class BaseEfictionAdapter(BaseSiteAdapter):
for val in re.split(r"\s*,\s*", value): for val in re.split(r"\s*,\s*", value):
# TODO this should be an official field I guess # TODO this should be an official field I guess
self.story.addToList('challenge', val) self.story.addToList('challenge', val)
elif key == 'Chapters':
self.story.setMetadata('numChapters', int(value))
elif key == 'Rating' or key == 'Rated': elif key == 'Rating' or key == 'Rated':
self.story.setMetadata('rating', value) self.story.setMetadata('rating', value)
elif key == 'Word count': elif key == 'Word count':
@ -446,7 +444,7 @@ class BaseEfictionAdapter(BaseSiteAdapter):
if sn: if sn:
self.story.setMetadata('storynotes', stripHTML(sn)) self.story.setMetadata('storynotes', stripHTML(sn))
if not self.story.getMetadata('rating'): if not self.story.getMetadataRaw('rating'):
self.getRatingFromTOC(); self.getRatingFromTOC();
## Chapter URLs ## Chapter URLs

View file

@ -320,7 +320,6 @@ class BaseOTWAdapter(BaseSiteAdapter):
# break epub update. # break epub update.
# Find the chapters: # Find the chapters:
chapters=soup.find_all('a', href=re.compile(r'/works/'+self.story.getMetadata('storyId')+r"/chapters/\d+$")) chapters=soup.find_all('a', href=re.compile(r'/works/'+self.story.getMetadata('storyId')+r"/chapters/\d+$"))
self.story.setMetadata('numChapters',len(chapters))
logger.debug("numChapters: (%s)"%self.story.getMetadata('numChapters')) logger.debug("numChapters: (%s)"%self.story.getMetadata('numChapters'))
if len(chapters)==1: if len(chapters)==1:
self.add_chapter(self.story.getMetadata('title'),'https://'+self.host+chapters[0]['href']) self.add_chapter(self.story.getMetadata('title'),'https://'+self.host+chapters[0]['href'])

View file

@ -22,6 +22,7 @@ from .base_browsercache import BaseBrowserCache, CACHE_DIR_CONFIG
from .browsercache_simple import SimpleCache from .browsercache_simple import SimpleCache
from .browsercache_blockfile import BlockfileCache from .browsercache_blockfile import BlockfileCache
from .browsercache_firefox2 import FirefoxCache2 from .browsercache_firefox2 import FirefoxCache2
from .browsercache_sqldb import SqldbCache
import logging import logging
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -34,12 +35,13 @@ class BrowserCache(object):
def __init__(self, site, getConfig_fn, getConfigList_fn): def __init__(self, site, getConfig_fn, getConfigList_fn):
"""Constructor for BrowserCache""" """Constructor for BrowserCache"""
# import of child classes have to be inside the def to avoid circular import error # import of child classes have to be inside the def to avoid circular import error
for browser_cache_class in [SimpleCache, BlockfileCache, FirefoxCache2]: for browser_cache_class in [SimpleCache, BlockfileCache, FirefoxCache2, SqldbCache]:
self.browser_cache_impl = browser_cache_class.new_browser_cache(site, self.browser_cache_impl = browser_cache_class.new_browser_cache(site,
getConfig_fn, getConfig_fn,
getConfigList_fn) getConfigList_fn)
if self.browser_cache_impl is not None: if self.browser_cache_impl is not None:
break break
logger.debug("Not using Browser Cache Class %s"%browser_cache_class)
if self.browser_cache_impl is None: if self.browser_cache_impl is None:
raise BrowserCacheException("%s is not set, or directory does not contain a known browser cache type: '%s'"% raise BrowserCacheException("%s is not set, or directory does not contain a known browser cache type: '%s'"%
(CACHE_DIR_CONFIG,getConfig_fn(CACHE_DIR_CONFIG))) (CACHE_DIR_CONFIG,getConfig_fn(CACHE_DIR_CONFIG)))

View file

@ -90,18 +90,23 @@ class BlockfileCache(BaseChromiumCache):
def is_cache_dir(cache_dir): def is_cache_dir(cache_dir):
"""Return True only if a directory is a valid Cache for this class""" """Return True only if a directory is a valid Cache for this class"""
if not os.path.isdir(cache_dir): if not os.path.isdir(cache_dir):
logger.debug("Cache dir not found")
return False return False
index_path = os.path.join(cache_dir, "index") index_path = os.path.join(cache_dir, "index")
if not os.path.isfile(index_path): if not os.path.isfile(index_path):
logger.debug("index file not found")
return False return False
with share_open(index_path, 'rb') as index_file: with share_open(index_path, 'rb') as index_file:
if struct.unpack('I', index_file.read(4))[0] != INDEX_MAGIC_NUMBER: if struct.unpack('I', index_file.read(4))[0] != INDEX_MAGIC_NUMBER:
logger.debug("index file failed magic number check")
return False return False
data0_path = os.path.join(cache_dir, "data_0") data0_path = os.path.join(cache_dir, "data_0")
if not os.path.isfile(data0_path): if not os.path.isfile(data0_path):
logger.debug("data_0 file not found")
return False return False
with share_open(data0_path, 'rb') as data0_file: with share_open(data0_path, 'rb') as data0_file:
if struct.unpack('I', data0_file.read(4))[0] != BLOCK_MAGIC_NUMBER: if struct.unpack('I', data0_file.read(4))[0] != BLOCK_MAGIC_NUMBER:
logger.debug("data_0 failed magic number check")
return False return False
return True return True

View file

@ -68,6 +68,7 @@ class FirefoxCache2(BaseBrowserCache):
"""Return True only if a directory is a valid Cache for this class""" """Return True only if a directory is a valid Cache for this class"""
# logger.debug("\n\n1Starting cache check\n\n") # logger.debug("\n\n1Starting cache check\n\n")
if not os.path.isdir(cache_dir): if not os.path.isdir(cache_dir):
logger.debug("Cache dir not found")
return False return False
## check at least one entry file exists. ## check at least one entry file exists.
for en_fl in glob.iglob(os.path.join(cache_dir, 'entries', '????????????????????????????????????????')): for en_fl in glob.iglob(os.path.join(cache_dir, 'entries', '????????????????????????????????????????')):
@ -75,6 +76,7 @@ class FirefoxCache2(BaseBrowserCache):
k = _validate_entry_file(en_fl) k = _validate_entry_file(en_fl)
if k is not None: if k is not None:
return True return True
logger.debug("No valid cache files found")
return False return False
def make_keys(self,url): def make_keys(self,url):

View file

@ -76,15 +76,19 @@ class SimpleCache(BaseChromiumCache):
def is_cache_dir(cache_dir): def is_cache_dir(cache_dir):
"""Return True only if a directory is a valid Cache for this class""" """Return True only if a directory is a valid Cache for this class"""
if not os.path.isdir(cache_dir): if not os.path.isdir(cache_dir):
logger.debug("Cache dir not found")
return False return False
index_file = os.path.join(cache_dir, "index") index_file = os.path.join(cache_dir, "index")
if not (os.path.isfile(index_file) and os.path.getsize(index_file) == 24): if not os.path.isfile(index_file) or os.path.getsize(index_file) > 24:
logger.debug("index file not found or too big(%s)"%os.path.getsize(index_file))
return False return False
real_index_file = os.path.join(cache_dir, "index-dir", "the-real-index") real_index_file = os.path.join(cache_dir, "index-dir", "the-real-index")
if not os.path.isfile(real_index_file): if not os.path.isfile(real_index_file):
logger.debug("real_index_file not found")
return False return False
with share_open(real_index_file, 'rb') as index_file: with share_open(real_index_file, 'rb') as index_file:
if struct.unpack('QQ', index_file.read(16))[1] != THE_REAL_INDEX_MAGIC_NUMBER: if struct.unpack('QQ', index_file.read(16))[1] != THE_REAL_INDEX_MAGIC_NUMBER:
logger.debug("real_index_file failed magic number check")
return False return False
try: try:
# logger.debug("\n\nStarting cache check\n\n") # logger.debug("\n\nStarting cache check\n\n")
@ -92,9 +96,11 @@ class SimpleCache(BaseChromiumCache):
k = _validate_entry_file(en_fl) k = _validate_entry_file(en_fl)
if k is not None: if k is not None:
return True return True
except SimpleCacheException: except SimpleCacheException as sce:
# raise # raise
logger.debug(sce)
return False return False
logger.debug("No valid cache files found")
return False return False
def get_data_key_impl(self, url, key): def get_data_key_impl(self, url, key):

View file

@ -0,0 +1,185 @@
# -*- coding: utf-8 -*-
# Copyright 2026 FanFicFare team
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
from __future__ import absolute_import
import os
import apsw
import ctypes
# note share_open (on windows CLI) is implicitly readonly.
from .share_open import share_open
from .base_chromium import BaseChromiumCache
from .chromagnon import SuperFastHash
import logging
logger = logging.getLogger(__name__)
class SqldbCache(BaseChromiumCache):
"""Class to access data stream in Chrome Disk Sqldb Cache format cache files"""
def __init__(self, *args, **kargs):
"""Constructor for SqldbCache"""
super(SqldbCache,self).__init__(*args, **kargs)
logger.debug("Using SqldbCache")
# def scan_cache_keys(self):
## XXX will impl a scan if and when needed. It's a lot easier
## to peek inside an sqlite
@staticmethod
def is_cache_dir(cache_dir):
"""Return True only if a directory is a valid Cache for this class"""
if not os.path.isdir(cache_dir):
logger.debug("Cache dir not found")
return False
index_path = os.path.join(cache_dir, "index")
if not os.path.isfile(index_path):
logger.debug("index file not found")
return False
sqldb0_path = os.path.join(cache_dir, "sqldb0")
if not os.path.isfile(sqldb0_path):
logger.debug("sqldb0 file not found")
return False
## XXX check schema of db?
return True
## XXX others uses share_open() - will sqlite open work concurrently?
def get_data_key_impl(self, url, key):
"""
returns location, entry age(unix epoch), content-encoding and
raw(compressed) data
"""
location, age, encoding, data = '', None, None, None
qstr = 'SELECT last_used, head, blob FROM resources as r join blobs as b on b.res_id=r.res_id where cache_key_hash=?'
cache_key_hash = _key_hash(key)
logger.debug(" key:%s"%key)
logger.debug("cache_key_hash:%s"%cache_key_hash)
## XXX worth optimizing to keep sql conn open?
from ..six.moves.urllib.request import pathname2url
fileuri = os.path.join(self.cache_dir, "sqldb0")# pathname2url()
logger.debug(fileuri)
shareopenVFS = ShareOpenVFS()
logger.debug("VFS available %s"% apsw.vfs_names())
with apsw.Connection("file:"+fileuri+"?immutable=1",
flags=apsw.SQLITE_OPEN_READONLY | apsw.SQLITE_OPEN_URI,
vfs=shareopenVFS.vfs_name
) as db:
logger.debug("db flags:%xd"%db.open_flags)
logger.debug("db vfs:%s"%db.open_vfs)
for last, head, blob in db.execute(qstr,[cache_key_hash]):
row_age = self.make_age(last)
if age and row_age < age:
logger.debug("skipping an older row for same hash")
break
age = row_age
logger.debug("age from last_used:%s"%age)
## cheesy way to pull out the http headers, inspired
## by equal cheese in chromagnon/cacheData.py. Only
## actually care about location &content-encoding,
## ignore the rest.
head = head[head.index(b'HTTP'):]
head = head[:head.index(b'\x00\x00')]
# logger.debug(head)
for line in head.split(b'\0'):
logger.debug(line)
if b'content-encoding' in line.lower():
encoding = line.split(b':')[1].strip().lower()
logger.debug("encoding from header:%s"%encoding)
if b'location' in line.lower():
location = b':'.join(line.split(b':')[1:]).strip()
logger.debug("location from header:%s"%encoding)
## XXX might need entry age from header, too.
## Hoping db last_used is equiv.
data = blob
if data:
return (location, age, encoding, data)
else:
return None
## calculate SuperFashHash, but the sql saved it signed.
def _key_hash(key):
unsigned_hash = SuperFastHash.superFastHash(key)
number = unsigned_hash & 0xFFFFFFFF
return ctypes.c_int32(number).value
class ShareOpenVFS(apsw.VFS):
def __init__(self):
self.vfs_name = 'shareopen'
super().__init__(name=self.vfs_name, base='')
def xAccess(self, pathname, flags):
return True
def xFullPathname(self, filename):
return filename
def xDelete(self, filename, syncdir):
logger.debug("xDelete NOT DELETING")
pass
def xOpen(self, name, flags):
return ShareOpenVFSFile(name, flags)
class ShareOpenVFSFile:
def __init__(self, name, flags):
self.filename = name.filename() if isinstance(name, apsw.URIFilename) else name
self.filename = os.path.normpath(self.filename)
logger.debug("Doing share open(%s)"%self.filename)
self.file = share_open(self.filename, 'rb')
def xRead(self, amount, offset):
self.file.seek(offset, 0)
return self.file.read(amount)
def xFileSize(self):
return os.stat(self.filename).st_size
def xClose(self):
self.file.close()
def xSectorSize(self):
return 0
def xFileControl(self, *args):
return False
def xCheckReservedLock(self):
return False
def xLock(self, level):
pass
def xUnlock(self, level):
pass
def xSync(self, flags):
return True
def xTruncate(self, newsize):
logger.debug("xTruncate NOT TRUNCING")
pass
def xWrite(self, data, offset):
logger.debug("xWrite NOT WRITING")
pass

View file

@ -27,8 +27,7 @@ import pprint
import string import string
import os, sys, platform import os, sys, platform
version="4.57.7"
version="4.54.0"
os.environ['CURRENT_VERSION_ID']=version os.environ['CURRENT_VERSION_ID']=version
global_cache = 'global_cache' global_cache = 'global_cache'
@ -51,6 +50,8 @@ from fanficfare.geturls import get_urls_from_page, get_urls_from_imap
from fanficfare.six.moves import configparser from fanficfare.six.moves import configparser
from fanficfare.six import text_type as unicode from fanficfare.six import text_type as unicode
from fanficfare.fff_profile import do_cprofile
def write_story(config, adapter, writeformat, def write_story(config, adapter, writeformat,
metaonly=False, nooutput=False, metaonly=False, nooutput=False,
outstream=None): outstream=None):
@ -346,6 +347,7 @@ def main(argv=None,
dispatch(options, urls, passed_defaultsini, passed_personalini, warn, fail) dispatch(options, urls, passed_defaultsini, passed_personalini, warn, fail)
# make rest a function and loop on it. # make rest a function and loop on it.
@do_cprofile
def do_download(arg, def do_download(arg,
options, options,
passed_defaultsini, passed_defaultsini,

View file

@ -139,19 +139,6 @@ def get_valid_sections():
allowedsections.append('%s:%s'%(section,f)) allowedsections.append('%s:%s'%(section,f))
return allowedsections return allowedsections
def get_valid_list_entries():
return list(['category',
'genre',
'characters',
'ships',
'warnings',
'extratags',
'author',
'authorId',
'authorUrl',
'lastupdate',
])
boollist=['true','false'] boollist=['true','false']
base_xenforo2_list=['base_xenforo2forum', base_xenforo2_list=['base_xenforo2forum',
'forums.sufficientvelocity.com', 'forums.sufficientvelocity.com',
@ -188,7 +175,7 @@ def get_valid_set_options():
This is to further restrict keywords to certain sections and/or This is to further restrict keywords to certain sections and/or
values. get_valid_keywords() below is the list of allowed values. get_valid_keywords() below is the list of allowed
keywords. Any keyword listed here must also be listed there. keywords. Any keyword not listed here must be listed there.
This is what's used by the code when you save personal.ini in This is what's used by the code when you save personal.ini in
plugin that stops and points out possible errors in keyword plugin that stops and points out possible errors in keyword
@ -343,6 +330,158 @@ def get_valid_set_options():
return dict(valdict) return dict(valdict)
# *known* keywords -- or rather regexps for them.
def get_valid_keywords():
'''
Among other things, this list is used by the color highlighting in
personal.ini editing in plugin. Note that entries in
get_valid_set_options() do not need to be duplicated here anymore.
'''
return list(get_valid_set_options().keys())+\
['(in|ex)clude_metadata_(pre|post)',
'add_category_when_multi_category',
'add_genre_when_multi_category',
'adult_ratings',
'allow_unsafe_filename',
'always_overwrite',
'anthology_merge_keepsingletocs',
'anthology_tags',
'anthology_title_pattern',
'background_color',
'browser_cache_age_limit',
'chapter_end',
'chapter_start',
'chapter_title_add_pattern',
'chapter_title_addnew_pattern',
'chapter_title_def_pattern',
'chapter_title_error_mark',
'chapter_title_new_pattern',
'chapter_title_strip_pattern',
'chardet_confidence_limit',
'comma_entries',
'connect_timeout',
'continue_on_chapter_error_try_limit',
'convert_images_to',
'cover_content',
'cover_exclusion_regexp',
'cover_min_size',
'custom_columns_settings',
'dateCreated_format',
'datePublished_format',
'dateUpdated_format',
'datethreadmark_format',
'default_cover_image',
'description_limit',
'epub_version',
'exclude_editor_signature',
'exclude_notes',
'extra_logpage_entries',
'extra_subject_tags',
'extra_titlepage_entries',
'extra_valid_entries',
'extracategories',
'extracharacters',
'extragenres',
'extraships',
'extratags',
'extrawarnings',
'fail_on_password',
'file_end',
'file_start',
'fileformat',
'find_chapters',
'fix_pseudo_html',
'flaresolverr_proxy_address',
'flaresolverr_proxy_port',
'flaresolverr_proxy_protocol',
'flaresolverr_proxy_timeout',
'flaresolverr_session',
'force_cover_image',
'force_img_self_referer_regexp',
'force_login',
'generate_cover_settings',
'http_proxy',
'https_proxy',
'ignore_chapter_url_list',
'image_max_size',
'include_subject_tags',
'join_string_authorHTML',
'keep_empty_tags',
'keep_html_attrs',
'keep_summary_html',
'logpage_end',
'logpage_entries',
'logpage_entry',
'logpage_start',
'logpage_update_end',
'logpage_update_start',
'make_directories',
'make_linkhtml_entries',
'max_fg_sleep',
'max_fg_sleep_at_downloads',
'max_zalgo',
'min_fg_sleep',
'no_image_processing_regexp',
'nsapa_proxy_address',
'nsapa_proxy_port',
'order_threadmarks_by_date_categories',
'output_css',
'output_filename',
'output_filename_safepattern',
'password',
'post_process_cmd',
'rating_titles',
'reader_posts_per_page',
'remove_tags',
'remove_transparency',
'replace_chapter_text',
'replace_metadata',
'replace_tags_with_spans',
'replace_xbr_with_hr',
'show_spoiler_tags',
'skip_threadmarks_categories',
'slow_down_sleep_time',
'sort_ships_splits',
'strip_chapter_numeral',
'threadmark_category_order',
'threadmarks_per_page',
'title_chapter_range_pattern',
'titlepage_end',
'titlepage_entries',
'titlepage_entry',
'titlepage_no_title_entry',
'titlepage_start',
'titlepage_wide_entry',
'tocpage_end',
'tocpage_entry',
'tocpage_start',
'user_agent',
'username',
'website_encodings',
'wide_titlepage_entries',
'wrap_width',
'zip_filename',
'zip_output'
]
# *known* entry keywords -- or rather regexps for them.
def get_valid_entry_keywords():
return list(['%s_(label|format)',
'(default_value|include_in|join_string|keep_in_order)_%s',])
def get_valid_list_entries():
return list(['category',
'genre',
'characters',
'ships',
'warnings',
'extratags',
'author',
'authorId',
'authorUrl',
'lastupdate',
])
def get_valid_scalar_entries(): def get_valid_scalar_entries():
return list(['series', return list(['series',
'seriesUrl', 'seriesUrl',
@ -382,263 +521,20 @@ def get_valid_scalar_entries():
def get_valid_entries(): def get_valid_entries():
return get_valid_list_entries() + get_valid_scalar_entries() return get_valid_list_entries() + get_valid_scalar_entries()
# *known* keywords -- or rather regexps for them. ## Metadata entries that are not allowed to be changed.
def get_valid_keywords(): def get_immutable_entries():
''' return list([
Among other things, this list is used by the color highlighting in 'authorId',
personal.ini editing in plugin. Note that it's separate from 'authorUrl',
value checking and most keywords need to be added to both. 'storyId',
''' 'storyUrl',
return list(['(in|ex)clude_metadata_(pre|post)', 'langcode',
'add_chapter_numbers', 'numChapters',
'add_genre_when_multi_category', 'site',
'add_category_when_multi_category', 'anthology',
'adult_ratings', 'newforanthology',
'allow_unsafe_filename', 'cover_image',
'always_overwrite', ])
'anthology_tags',
'anthology_title_pattern',
'anthology_merge_keepsingletocs',
'background_color',
'bulk_load',
'chapter_end',
'chapter_start',
'chapter_title_strip_pattern',
'chapter_title_def_pattern',
'chapter_title_add_pattern',
'chapter_title_new_pattern',
'chapter_title_addnew_pattern',
'title_chapter_range_pattern',
'mark_new_chapters',
'check_next_chapter',
'meta_from_last_chapter',
'skip_author_cover',
'try_shortened_title_urls',
'collect_series',
'comma_entries',
'connect_timeout',
'convert_images_to',
'cover_content',
'cover_exclusion_regexp',
'custom_columns_settings',
'dateCreated_format',
'datePublished_format',
'dateUpdated_format',
'default_cover_image',
'force_cover_image',
'force_img_self_referer_regexp',
'description_limit',
'do_update_hook',
'use_archived_author',
'use_view_full_work',
'use_workskin',
'always_login',
'exclude_notes',
'remove_authorfootnotes_on_update',
'use_archive_transformativeworks_org',
'use_archiveofourown_gay',
'exclude_editor_signature',
'extra_logpage_entries',
'extra_subject_tags',
'extra_titlepage_entries',
'extra_valid_entries',
'extratags',
'extracategories',
'extragenres',
'extracharacters',
'extraships',
'extrawarnings',
'fail_on_password',
'file_end',
'file_start',
'fileformat',
'find_chapters',
'fix_fimf_blockquotes',
'keep_prequel_in_description',
'scrape_bookshelf',
'include_author_notes',
'force_login',
'generate_cover_settings',
'grayscale_images',
'image_max_size',
'include_images',
'jpg_quality',
'additional_images',
'include_logpage',
'logpage_at_end',
'calibre_series_meta',
'force_update_epub_always',
'page_progression_direction_rtl',
'include_subject_tags',
'include_titlepage',
'include_tocpage',
'chardet_confidence_limit',
'is_adult',
'join_string_authorHTML',
'keep_style_attr',
'keep_title_attr',
'keep_html_attrs',
'remove_class_chapter',
'replace_tags_with_spans',
'keep_empty_tags',
'remove_tags',
'keep_summary_html',
'logpage_end',
'logpage_entries',
'logpage_entry',
'logpage_start',
'logpage_update_end',
'logpage_update_start',
'make_directories',
'make_firstimage_cover',
'use_old_cover',
'make_linkhtml_entries',
'max_fg_sleep',
'max_fg_sleep_at_downloads',
'min_fg_sleep',
'never_make_cover',
'cover_min_size',
'no_image_processing',
'no_image_processing_regexp',
'dedup_img_files',
'convert_inline_images',
'non_breaking_spaces',
'download_text_version',
'nook_img_fix',
'output_css',
'output_filename',
'output_filename_safepattern',
'password',
'post_process_cmd',
'rating_titles',
'remove_transparency',
'replace_br_with_p',
'replace_chapter_text',
'replace_hr',
'remove_empty_p',
'replace_xbr_with_hr',
'replace_metadata',
'slow_down_sleep_time',
'sort_ships',
'sort_ships_splits',
'strip_chapter_numbers',
'strip_chapter_numeral',
'strip_text_links',
'centeredcat_to_characters',
'pairingcat_to_characters_ships',
'romancecat_to_characters_ships',
'use_meta_keywords',
'clean_chapter_titles',
'conditionals_use_lists',
'description_in_chapter',
'order_chapters_by_date',
'fetch_stories_from_api',
'tags_from_chapters',
'dates_from_chapters',
'include_chapter_descriptions_in_summary',
'inject_chapter_title',
'inject_chapter_image',
'append_datepublished_to_storyurl',
'auto_sub',
'titlepage_end',
'titlepage_entries',
'titlepage_entry',
'titlepage_no_title_entry',
'titlepage_start',
'titlepage_use_table',
'titlepage_wide_entry',
'tocpage_end',
'tocpage_entry',
'tocpage_start',
'tweak_fg_sleep',
'universe_as_series',
'use_ssl_unverified_context',
'use_ssl_default_seclevelone',
'http_proxy',
'https_proxy',
'use_cloudscraper',
'use_basic_cache',
'use_browser_cache',
'use_browser_cache_only',
'open_pages_in_browser',
'use_nsapa_proxy',
'nsapa_proxy_address',
'nsapa_proxy_port',
'use_flaresolverr_proxy',
'flaresolverr_proxy_address',
'flaresolverr_proxy_port',
'flaresolverr_proxy_protocol',
'flaresolverr_proxy_timeout',
'use_flaresolverr_session',
'flaresolverr_session',
'browser_cache_path',
'browser_cache_age_limit',
'user_agent',
'username',
'website_encodings',
'wide_titlepage_entries',
'windows_eol',
'wrap_width',
'zip_filename',
'zip_output',
'capitalize_forumtags',
'continue_on_chapter_error',
'chapter_title_error_mark',
'continue_on_chapter_error_try_limit',
'minimum_threadmarks',
'first_post_title',
'always_include_first_post',
'always_reload_first_chapter',
'always_use_forumtags',
'use_reader_mode',
'author_avatar_cover',
'reader_posts_per_page',
'threadmarks_per_page',
'remove_spoilers',
'legend_spoilers',
'details_spoilers',
'apocrypha_to_omake',
'skip_threadmarks_categories',
'fix_relative_text_links',
'normalize_text_links',
'internalize_text_links',
'replace_failed_smilies_with_alt_text',
'use_threadmark_wordcounts',
'always_include_first_post_chapters',
'threadmark_category_order',
'order_threadmarks_by_date',
'order_threadmarks_by_date_categories',
'reveal_invisible_text',
'use_threadmarks_description',
'use_threadmarks_status',
'use_threadmarks_cover',
'skip_sticky_first_posts',
'include_dice_rolls',
'include_nonauthor_poster',
'link_embedded_media',
'include_chapter_banner_images',
'dateUpdated_method',
'datethreadmark_format',
'fix_pseudo_html',
'fix_excess_space',
'dedup_order_chapter_list',
'ignore_chapter_url_list',
'include_appendices',
'dedup_chapter_list',
'show_timestamps',
'show_nsfw_cover_images',
'show_spoiler_tags',
'max_zalgo',
'decode_emails',
'epub_version',
'prepend_section_titles',
'replace_text_formatting',
])
# *known* entry keywords -- or rather regexps for them.
def get_valid_entry_keywords():
return list(['%s_(label|format)',
'(default_value|include_in|join_string|keep_in_order)_%s',])
# Moved here for test_config. # Moved here for test_config.
def make_generate_cover_settings(param): def make_generate_cover_settings(param):
@ -706,9 +602,13 @@ class Configuration(ConfigParser):
self.listTypeEntries = get_valid_list_entries() self.listTypeEntries = get_valid_list_entries()
self.validEntries = get_valid_entries() self.validEntries = get_valid_entries()
self.immutableEntries = get_immutable_entries()
self.url_config_set = False self.url_config_set = False
## to improve performance, cache config values.
self.cached_config = {}
def section_url_names(self,domain,section_url_f): def section_url_names(self,domain,section_url_f):
## domain is passed as a method to limit the damage if/when an ## domain is passed as a method to limit the damage if/when an
## adapter screws up _section_url ## adapter screws up _section_url
@ -750,6 +650,12 @@ class Configuration(ConfigParser):
def getValidMetaList(self): def getValidMetaList(self):
return self.validEntries + self.getConfigList("extra_valid_entries") return self.validEntries + self.getConfigList("extra_valid_entries")
def isImmutableMetaEntry(self, key):
return key in self.getImmutableMetaList()
def getImmutableMetaList(self):
return self.immutableEntries
# used by adapters & writers, non-convention naming style # used by adapters & writers, non-convention naming style
def hasConfig(self, key): def hasConfig(self, key):
return self.has_config(self.sectionslist, key) return self.has_config(self.sectionslist, key)
@ -780,6 +686,10 @@ class Configuration(ConfigParser):
return self.get_config(self.sectionslist,key,default) return self.get_config(self.sectionslist,key,default)
def get_config(self, sections, key, default=""): def get_config(self, sections, key, default=""):
try:
return self.cached_config[(tuple(sections),key)]
except KeyError as ke:
pass
val = default val = default
val_files = [] val_files = []
@ -824,6 +734,7 @@ class Configuration(ConfigParser):
except (configparser.NoOptionError, configparser.NoSectionError) as e: except (configparser.NoOptionError, configparser.NoSectionError) as e:
pass pass
self.cached_config[(tuple(sections),key)] = val
return val return val
# split and strip each. # split and strip each.
@ -1218,6 +1129,9 @@ class Configurable(object):
def isValidMetaEntry(self, key): def isValidMetaEntry(self, key):
return self.configuration.isValidMetaEntry(key) return self.configuration.isValidMetaEntry(key)
def isImmutableMetaEntry(self, key):
return self.configuration.isImmutableMetaEntry(key)
def getValidMetaList(self): def getValidMetaList(self):
return self.configuration.getValidMetaList() return self.configuration.getValidMetaList()

View file

@ -1592,18 +1592,13 @@ chaptertitles:Prologue,Chapter 1\, Xenos on Cinnabar,Chapter 2\, Sinmay on Kinti
[adult-fanfiction.org] [adult-fanfiction.org]
use_basic_cache:true
extra_valid_entries:eroticatags,disclaimer extra_valid_entries:eroticatags,disclaimer
eroticatags_label:Erotica Tags eroticatags_label:Erotica Tags
disclaimer_label:Disclaimer disclaimer_label:Disclaimer
extra_titlepage_entries:eroticatags,disclaimer extra_titlepage_entries:eroticatags,disclaimer
## Some sites require login (or login for some rated stories) The
## program can prompt you, or you can save it in config. In
## commandline version, this should go in your personal.ini, not
## defaults.ini.
#username:YourName
#password:yourpassword
[althistory.com] [althistory.com]
## Note this is NOT the same as www.alternatehistory.com ## Note this is NOT the same as www.alternatehistory.com
## see [base_xenforoforum] ## see [base_xenforoforum]
@ -1717,13 +1712,13 @@ make_linkhtml_entries:series00,series01,series02,series03,collections
## hardcoded to include the site specific metadata freeformtags & ## hardcoded to include the site specific metadata freeformtags &
## ao3categories in the standard metadata field genre. By making it ## ao3categories in the standard metadata field genre. By making it
## configurable, users can change it. ## configurable, users can change it.
include_in_genre: freeformtags, ao3categories include_in_genre: genre, freeformtags, ao3categories
## AO3 uses the word 'category' differently than most sites. The ## AO3 uses the word 'category' differently than most sites. The
## adapter used to be hardcoded to include the site specific metadata ## adapter used to be hardcoded to include the site specific metadata
## fandom in the standard metadata field category. By making it ## fandom in the standard metadata field category. By making it
## configurable, users can change it. ## configurable, users can change it.
include_in_category:fandoms include_in_category:category,fandoms
## freeformtags was previously typo'ed as freefromtags. This way, ## freeformtags was previously typo'ed as freefromtags. This way,
## freefromtags will still work for people who've used it. ## freefromtags will still work for people who've used it.
@ -1932,7 +1927,7 @@ make_linkhtml_entries:translators,betas
## For most sites, 'category' is the fandom, but fanfics.me has ## For most sites, 'category' is the fandom, but fanfics.me has
## fandoms and a separate category. By making it configurable, users ## fandoms and a separate category. By making it configurable, users
## can change it. ## can change it.
include_in_category:fandoms include_in_category:category,fandoms
[fanfictalk.com] [fanfictalk.com]
use_basic_cache:true use_basic_cache:true
@ -2708,13 +2703,13 @@ make_linkhtml_entries:series00,series01,series02,series03,collections
## hardcoded to include the site specific metadata freeformtags & ## hardcoded to include the site specific metadata freeformtags &
## ao3categories in the standard metadata field genre. By making it ## ao3categories in the standard metadata field genre. By making it
## configurable, users can change it. ## configurable, users can change it.
include_in_genre: freeformtags, ao3categories include_in_genre: genre, freeformtags, ao3categories
## OTW uses the word 'category' differently than most sites. The ## OTW uses the word 'category' differently than most sites. The
## adapter used to be hardcoded to include the site specific metadata ## adapter used to be hardcoded to include the site specific metadata
## fandom in the standard metadata field category. By making it ## fandom in the standard metadata field category. By making it
## configurable, users can change it. ## configurable, users can change it.
include_in_category:fandoms include_in_category:category,fandoms
## freeformtags was previously typo'ed as freefromtags. This way, ## freeformtags was previously typo'ed as freefromtags. This way,
## freefromtags will still work for people who've used it. ## freefromtags will still work for people who've used it.
@ -3015,13 +3010,13 @@ make_linkhtml_entries:series00,series01,series02,series03,collections
## hardcoded to include the site specific metadata freeformtags & ## hardcoded to include the site specific metadata freeformtags &
## ao3categories in the standard metadata field genre. By making it ## ao3categories in the standard metadata field genre. By making it
## configurable, users can change it. ## configurable, users can change it.
include_in_genre: freeformtags, ao3categories include_in_genre: genre, freeformtags, ao3categories
## OTW uses the word 'category' differently than most sites. The ## OTW uses the word 'category' differently than most sites. The
## adapter used to be hardcoded to include the site specific metadata ## adapter used to be hardcoded to include the site specific metadata
## fandom in the standard metadata field category. By making it ## fandom in the standard metadata field category. By making it
## configurable, users can change it. ## configurable, users can change it.
include_in_category:fandoms include_in_category:category,fandoms
## freeformtags was previously typo'ed as freefromtags. This way, ## freeformtags was previously typo'ed as freefromtags. This way,
## freefromtags will still work for people who've used it. ## freefromtags will still work for people who've used it.
@ -3150,8 +3145,8 @@ bookmarkmemo_label:ブックマークメモ
bookmarkprivate_label:非公開ブックマーク bookmarkprivate_label:非公開ブックマーク
subscribed_label:更新通知 subscribed_label:更新通知
include_in_genre: fullgenre include_in_genre: genre, fullgenre
#include_in_genre: biggenre, smallgenre #include_in_genre: genre, biggenre, smallgenre
## adds to titlepage_entries instead of replacing it. ## adds to titlepage_entries instead of replacing it.
#extra_titlepage_entries: fullgenre,biggenre,smallgenre,imprint,freeformtags,comments,reviews,bookmarks,ratingpoints,overallpoints,bookmarked,bookmarkcategory,bookmarkmemo,bookmarkprivate,subscribed #extra_titlepage_entries: fullgenre,biggenre,smallgenre,imprint,freeformtags,comments,reviews,bookmarks,ratingpoints,overallpoints,bookmarked,bookmarkcategory,bookmarkmemo,bookmarkprivate,subscribed
@ -3394,13 +3389,13 @@ make_linkhtml_entries:series00,series01,series02,series03,collections
## hardcoded to include the site specific metadata freeformtags & ## hardcoded to include the site specific metadata freeformtags &
## ao3categories in the standard metadata field genre. By making it ## ao3categories in the standard metadata field genre. By making it
## configurable, users can change it. ## configurable, users can change it.
include_in_genre: freeformtags, ao3categories include_in_genre: genre, freeformtags, ao3categories
## OTW uses the word 'category' differently than most sites. The ## OTW uses the word 'category' differently than most sites. The
## adapter used to be hardcoded to include the site specific metadata ## adapter used to be hardcoded to include the site specific metadata
## fandom in the standard metadata field category. By making it ## fandom in the standard metadata field category. By making it
## configurable, users can change it. ## configurable, users can change it.
include_in_category:fandoms include_in_category:category,fandoms
## freeformtags was previously typo'ed as freefromtags. This way, ## freeformtags was previously typo'ed as freefromtags. This way,
## freefromtags will still work for people who've used it. ## freefromtags will still work for people who've used it.
@ -3531,7 +3526,7 @@ upvotes_label:Upvotes
subscribers_label:Subscribers subscribers_label:Subscribers
views_label:Views views_label:Views
include_in_category:tags include_in_category:category,tags
#extra_titlepage_entries:upvotes,subscribers,views #extra_titlepage_entries:upvotes,subscribers,views
@ -3667,13 +3662,13 @@ make_linkhtml_entries:series00,series01,series02,series03,collections
## hardcoded to include the site specific metadata freeformtags & ## hardcoded to include the site specific metadata freeformtags &
## ao3categories in the standard metadata field genre. By making it ## ao3categories in the standard metadata field genre. By making it
## configurable, users can change it. ## configurable, users can change it.
include_in_genre: freeformtags, ao3categories include_in_genre: genre, freeformtags, ao3categories
## OTW uses the word 'category' differently than most sites. The ## OTW uses the word 'category' differently than most sites. The
## adapter used to be hardcoded to include the site specific metadata ## adapter used to be hardcoded to include the site specific metadata
## fandom in the standard metadata field category. By making it ## fandom in the standard metadata field category. By making it
## configurable, users can change it. ## configurable, users can change it.
include_in_category:fandoms include_in_category:category,fandoms
## freeformtags was previously typo'ed as freefromtags. This way, ## freeformtags was previously typo'ed as freefromtags. This way,
## freefromtags will still work for people who've used it. ## freefromtags will still work for people who've used it.
@ -4406,9 +4401,6 @@ extracategories:Buffy: The Vampire Slayer
extracharacters:Buffy, Spike extracharacters:Buffy, Spike
extraships:Spike/Buffy extraships:Spike/Buffy
[www.swi.org.ru]
use_basic_cache:true
[www.the-sietch.com] [www.the-sietch.com]
## see [base_xenforoforum] ## see [base_xenforoforum]

View file

@ -20,25 +20,9 @@ from .six import ensure_text, text_type as unicode
from .six import string_types as basestring from .six import string_types as basestring
from io import BytesIO from io import BytesIO
# from io import StringIO FONT_EXTS = ('ttf','otf','woff','woff2')
# import cProfile, pstats
# from pstats import SortKey from fanficfare.fff_profile import do_cprofile
# def do_cprofile(func):
# def profiled_func(*args, **kwargs):
# profile = cProfile.Profile()
# try:
# profile.enable()
# result = func(*args, **kwargs)
# profile.disable()
# return result
# finally:
# # profile.sort_stats(SortKey.CUMULATIVE).print_stats(20)
# s = StringIO()
# sortby = SortKey.CUMULATIVE
# ps = pstats.Stats(profile, stream=s).sort_stats(sortby)
# ps.print_stats(20)
# print(s.getvalue())
# return profiled_func
import bs4 import bs4
@ -49,9 +33,52 @@ def get_dcsource_chaptercount(inputio):
## getsoups=True to check for continue_on_chapter_error chapters. ## getsoups=True to check for continue_on_chapter_error chapters.
return get_update_data(inputio,getfilecount=True,getsoups=True)[:2] # (source,filecount) return get_update_data(inputio,getfilecount=True,getsoups=True)[:2] # (source,filecount)
def get_cover_data(inputio): ## only finds and returns cover image type and data, not cover page.
# (oldcoverhtmlhref,oldcoverhtmltype,oldcoverhtmldata,oldcoverimghref,oldcoverimgtype,oldcoverimgdata) ## should work on any epub. Added for anthology cover issues.
return get_update_data(inputio,getfilecount=True,getsoups=False)[4] def get_cover_img(inputio):
# (oldcoverimgtype,oldcoverimgdata)
epub = ZipFile(inputio, 'r') # works equally well with inputio as a path or a blob
## Find the .opf file.
container = epub.read("META-INF/container.xml")
containerdom = parseString(container)
rootfilenodelist = containerdom.getElementsByTagName("rootfile")
rootfilename = rootfilenodelist[0].getAttribute("full-path")
contentdom = parseString(epub.read(rootfilename))
firstmetadom = contentdom.getElementsByTagName("metadata")[0]
## Save the path to the .opf file--hrefs inside it are relative to it.
relpath = get_path_part(rootfilename)
# logger.debug("relpath:%s"%relpath)
# <meta name="cover" content="cover"/>
coverid = None
covertype = None
coverdata = None
for metatag in firstmetadom.getElementsByTagName("meta"):
if metatag.getAttribute('name') == 'cover':
coverid = metatag.getAttribute('content')
# logger.debug("coverid:%s"%coverid)
break
if coverid:
for item in contentdom.getElementsByTagName("item"):
if item.getAttribute('id') == coverid:
coverhref = relpath+item.getAttribute("href")
## remove .. and the part it obviates
coverhref = re.sub(r"([^/]+/\.\./)","",coverhref)
covertype = item.getAttribute('media-type')
# logger.debug("covertype:%s coverhref:%s"%(covertype,coverhref))
try:
coverdata = epub.read(coverhref)
# logger.debug("coverdatalen:%s"%len(coverdata))
except Exception as e:
logger.info("Failed to read cover (%s): %s"%(coverhref,e))
covertype, coverdata = None, None
break
return covertype, coverdata
def get_oldcover(epub,relpath,contentdom,item): def get_oldcover(epub,relpath,contentdom,item):
href=relpath+item.getAttribute("href") href=relpath+item.getAttribute("href")
@ -156,7 +183,11 @@ def get_update_data(inputio,
# (_u\d+)? is from calibre convert naming files # (_u\d+)? is from calibre convert naming files
# 3/OEBPS/file0005_u3.xhtml etc. # 3/OEBPS/file0005_u3.xhtml etc.
if getsoups: if getsoups:
soup = make_soup(epub.read(href).decode("utf-8")) try:
soup = make_soup(epub.read(href).decode("utf-8"))
except:
logger.warning("Listed chapter file(%s) not found in epub, skipping."%href)
continue
for img in soup.find_all('img'): for img in soup.find_all('img'):
newsrc='' newsrc=''
longdesc='' longdesc=''
@ -191,6 +222,9 @@ def get_update_data(inputio,
for style_url in re.findall(r'url\([\'"]?(.*?)[\'"]?\)', style): for style_url in re.findall(r'url\([\'"]?(.*?)[\'"]?\)', style):
if style_url.startswith('failedtoload'): if style_url.startswith('failedtoload'):
continue continue
if style_url.rsplit('.')[-1].lower() in FONT_EXTS:
logger.debug("Skipping sheet style url(%s), assumed font"%style_url)
continue
logger.debug("Updating inline/embedded style url(%s)"%style_url) logger.debug("Updating inline/embedded style url(%s)"%style_url)
newsrc='' newsrc=''
longdesc='' longdesc=''
@ -257,11 +291,18 @@ def get_update_data(inputio,
## update. output_css is configured, but 'extra_css' like ## update. output_css is configured, but 'extra_css' like
## otw workskin might vary. ## otw workskin might vary.
if item.getAttribute("media-type") == "text/css" and getsoups: if item.getAttribute("media-type") == "text/css" and getsoups:
style = epub.read(href).decode("utf-8") try:
style = epub.read(href).decode("utf-8")
except:
logger.warning("Listed CSS file(%s) not found in epub, skipping."%href)
continue
if 'url(' in style: if 'url(' in style:
# logger.debug("%s CSS url:%s"%(href,style)) # logger.debug("%s CSS url:%s"%(href,style))
## the pattern will also accept mismatched '/", which is broken CSS. ## the pattern will also accept mismatched '/", which is broken CSS.
for style_url in re.findall(r'url\([\'"]?(.*?)[\'"]?\)', style): for style_url in re.findall(r'url\([\'"]?(.*?)[\'"]?\)', style):
if style_url.rsplit('.')[-1].lower() in FONT_EXTS:
logger.debug("Skipping sheet style url(%s), assumed font"%style_url)
continue
logger.debug("Updating sheet style url(%s)"%style_url) logger.debug("Updating sheet style url(%s)"%style_url)
newsrc='' newsrc=''
longdesc='' longdesc=''
@ -288,10 +329,18 @@ def get_update_data(inputio,
for item in contentdom.getElementsByTagName("item"): for item in contentdom.getElementsByTagName("item"):
href=relpath+item.getAttribute("href") href=relpath+item.getAttribute("href")
if item.getAttribute("media-type").startswith("image/") and getsoups: if item.getAttribute("media-type").startswith("image/") and getsoups:
if oldcover and href == oldcover[3]:
# don't include cover image, already handled by
# oldcover code and can trip de-dup unintentionally.
continue
img_url = href.replace("OEBPS/","") img_url = href.replace("OEBPS/","")
# logger.debug("-->img img:%s"%img_url) # logger.debug("-->img img:%s"%img_url)
if img_url not in images: if img_url not in images:
data = epub.read(href) try:
data = epub.read(href)
except:
logger.warning("Listed image file(%s) not found in epub, skipping."%href)
continue
# logger.debug("-->img Add oldimages:%s"%href) # logger.debug("-->img Add oldimages:%s"%href)
images[img_url] = (img_url, data) images[img_url] = (img_url, data)
try: try:
@ -383,7 +432,7 @@ def get_story_url_from_zip_html(inputio,_is_good_url=None):
return ahref return ahref
return None return None
# @do_cprofile @do_cprofile
def reset_orig_chapters_epub(inputio,outfile): def reset_orig_chapters_epub(inputio,outfile):
inputepub = ZipFile(inputio, 'r') # works equally well with a path or a blob inputepub = ZipFile(inputio, 'r') # works equally well with a path or a blob
@ -436,28 +485,50 @@ def reset_orig_chapters_epub(inputio,outfile):
if re.match(r'.*/file\d+\.xhtml',zf): if re.match(r'.*/file\d+\.xhtml',zf):
#logger.debug("zf:%s"%zf) #logger.debug("zf:%s"%zf)
data = data.decode('utf-8') data = data.decode('utf-8')
# should be re-reading an FFF file, single soup should
# be good enough and halve processing time.
soup = make_soup(data,dblsoup=False)
chapterorigtitle = None ## For higher performance checking, don't need to
tag = soup.find('meta',{'name':'chapterorigtitle'}) ## make_soup if not different
if tag: header = data[0:data.find("</head>")]
chapterorigtitle = tag['content'] '''
<meta name="chapterorigtitle" content="8. Chapter 7" />
<meta name="chaptertoctitle" content="8. Chapter 7" />
<meta name="chaptertitle" content="8. (new) Chapter 7" />
'''
# logger.debug(header)
def get_meta_content(n,d):
m = re.match(r'.*<meta( name="%s"| content="(?P<found>[^"]+))+".*'%n,d,re.DOTALL)
if m:
# logger.debug("%s -> %s"%(n,m.groupdict().get('found',None)))
return m.groupdict().get('found',None)
# toctitle is separate for add_chapter_numbers:toconly users. chapterorigtitle = get_meta_content('chapterorigtitle',header)
chaptertoctitle = None chaptertoctitle =get_meta_content('chaptertoctitle',header)
tag = soup.find('meta',{'name':'chaptertoctitle'}) chaptertitle = get_meta_content('chaptertitle',header)
if tag:
chaptertoctitle = tag['content']
else:
chaptertoctitle = chapterorigtitle
chaptertitle = None if not (chapterorigtitle and chaptertoctitle and chaptertitle \
tag = soup.find('meta',{'name':'chaptertitle'}) and chapterorigtitle == chaptertitle):
if tag: # should be re-reading an FFF file, single soup should
chaptertitle = tag['content'] # be good enough and halve processing time.
chaptertitle_tag = tag soup = make_soup(data,dblsoup=False)
chapterorigtitle = None
tag = soup.find('meta',{'name':'chapterorigtitle'})
if tag:
chapterorigtitle = tag['content']
# toctitle is separate for add_chapter_numbers:toconly users.
chaptertoctitle = None
tag = soup.find('meta',{'name':'chaptertoctitle'})
if tag:
chaptertoctitle = tag['content']
else:
chaptertoctitle = chapterorigtitle
chaptertitle = None
tag = soup.find('meta',{'name':'chaptertitle'})
if tag:
chaptertitle = tag['content']
chaptertitle_tag = tag
#logger.debug("chaptertitle:(%s) chapterorigtitle:(%s)"%(chaptertitle, chapterorigtitle)) #logger.debug("chaptertitle:(%s) chapterorigtitle:(%s)"%(chaptertitle, chapterorigtitle))
if chaptertitle and chapterorigtitle and chapterorigtitle != chaptertitle: if chaptertitle and chapterorigtitle and chapterorigtitle != chaptertitle:

View file

@ -148,3 +148,12 @@ class HTTPErrorFFF(Exception):
class BrowserCacheException(Exception): class BrowserCacheException(Exception):
pass pass
class NotGoingToDownload(Exception):
def __init__(self,error,icon='dialog_error.png',showerror=True):
self.error=error
self.icon=icon
self.showerror=showerror
def __str__(self):
return self.error

44
fanficfare/fff_profile.py Normal file
View file

@ -0,0 +1,44 @@
# Copyright 2026 FanFicFare team
#
# Licensed under the Apache License, Version 2.0 (the 'License');
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an 'AS IS' BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
## not compatibly with py2, SortKey not available.
import sys
DO_PROFILING = False
if DO_PROFILING and sys.version_info >= (3, 7):
from io import StringIO
import cProfile, pstats
from pstats import SortKey
def do_cprofile(func):
def profiled_func(*args, **kwargs):
profile = cProfile.Profile()
try:
profile.enable()
result = func(*args, **kwargs)
profile.disable()
return result
finally:
# profile.print_stats()
s = StringIO()
sortby = SortKey.CUMULATIVE
ps = pstats.Stats(profile, stream=s).sort_stats(sortby)
ps.print_stats(20)
print(s.getvalue())
return profiled_func
else:
## no-nothing for py2
def do_cprofile(func):
def profiled_func(*args, **kwargs):
return func(*args, **kwargs)
return profiled_func

View file

@ -80,8 +80,10 @@ try:
def convert_image(url,data,sizes,grayscale, def convert_image(url,data,sizes,grayscale,
removetrans,imgtype="jpg",background='#ffffff',jpg_quality=95): removetrans,imgtype="jpg",background='#ffffff',jpg_quality=95):
# logger.debug("calibre convert_image called") # logger.debug("calibre convert_image called")
## I can just see somebody doing logo_svg.jpg
if url.lower().endswith('.svg') or '.svg?' in url.lower(): if url.lower().endswith('.svg') or '.svg?' in url.lower() \
or ensure_binary('<svg ') in data[:1000] \
or ensure_binary('xmlns="http://www.w3.org/2000/svg"') in data[:1000]:
raise exceptions.RejectImage("Calibre image processing chokes on SVG images.") raise exceptions.RejectImage("Calibre image processing chokes on SVG images.")
export = False export = False
img, format = image_and_format_from_data(data) img, format = image_and_format_from_data(data)
@ -656,7 +658,7 @@ class ImageStore:
if failure: if failure:
info['newsrc'] = 'failedtoload' info['newsrc'] = 'failedtoload'
info['actuallyused'] = False info['actuallyused'] = False
logger.debug("add_img(%s,%s,%s,%s,%s,used:%s)"%(url,ext,mime,uuid,info['newsrc'],info['actuallyused'])) # logger.debug("add_img(%s,%s,%s,%s,%s,used:%s)"%(url,ext,mime,uuid,info['newsrc'],info['actuallyused']))
return info return info
def cache_failed_url(self,url): def cache_failed_url(self,url):
@ -914,7 +916,7 @@ class Story(Requestable):
if key == "language": if key == "language":
try: try:
# getMetadata not just self.metadata[] to do replace_metadata. # getMetadata not just self.metadata[] to do replace_metadata.
self.setMetadata('langcode',langs[self.getMetadata(key)]) self.setMetadata('langcode',langs[self.getMetadataRaw(key)])
except: except:
self.setMetadata('langcode','en') self.setMetadata('langcode','en')
@ -1138,6 +1140,9 @@ class Story(Requestable):
removeallentities=False, removeallentities=False,
doreplacements=True, doreplacements=True,
seen_list={}): seen_list={}):
if self.isImmutableMetaEntry(key):
doreplacements = False
# check for a cached value to speed processing # check for a cached value to speed processing
if self.metadata_cache.is_cached_scalar(key,removeallentities,doreplacements): if self.metadata_cache.is_cached_scalar(key,removeallentities,doreplacements):
return self.metadata_cache.get_cached_scalar(key,removeallentities,doreplacements) return self.metadata_cache.get_cached_scalar(key,removeallentities,doreplacements)
@ -1306,6 +1311,9 @@ class Story(Requestable):
#print("getList(%s,%s)"%(listname,includelist)) #print("getList(%s,%s)"%(listname,includelist))
retlist = [] retlist = []
if self.isImmutableMetaEntry(listname):
doreplacements = False
# check for a cached value to speed processing # check for a cached value to speed processing
if not skip_cache and self.metadata_cache.is_cached_list(listname,removeallentities,doreplacements): if not skip_cache and self.metadata_cache.is_cached_list(listname,removeallentities,doreplacements):
return self.metadata_cache.get_cached_list(listname,removeallentities,doreplacements) return self.metadata_cache.get_cached_list(listname,removeallentities,doreplacements)
@ -1631,7 +1639,7 @@ class Story(Requestable):
## likely changed to jpg. ## likely changed to jpg.
(src,data)=oldimgs[url] (src,data)=oldimgs[url]
ext = src.split('.')[-1] ext = src.split('.')[-1]
logger.debug("load_oldimgs:(%s,%s,%s)"%(url,ext,imagetypes[ext])) # logger.debug("load_oldimgs:(%s,%s,%s)"%(url,ext,imagetypes[ext]))
self.img_store.add_img(url, self.img_store.add_img(url,
ext, ext,
imagetypes[ext], imagetypes[ext],
@ -1738,7 +1746,7 @@ class Story(Requestable):
(data,ext,mime) = no_convert_image(imgurl, (data,ext,mime) = no_convert_image(imgurl,
imgdata) imgdata)
else: else:
logger.debug("Doing image processing on (%s)"%imgurl) # logger.debug("Doing image processing on (%s)"%imgurl)
try: try:
sizes = [ int(x) for x in self.getConfigList('image_max_size',['580', '725']) ] sizes = [ int(x) for x in self.getConfigList('image_max_size',['580', '725']) ]
except Exception as e: except Exception as e:

View file

@ -16,7 +16,7 @@ name = "FanFicFare" # Required
# #
# For a discussion on single-sourcing the version, see # For a discussion on single-sourcing the version, see
# https://packaging.python.org/guides/single-sourcing-package-version/ # https://packaging.python.org/guides/single-sourcing-package-version/
version = "4.54.0" version = "4.57.7"
# This is a one-line description or tagline of what your project does. This # This is a one-line description or tagline of what your project does. This
# corresponds to the "Summary" metadata field: # corresponds to the "Summary" metadata field: