Compare commits

...

78 commits

Author SHA1 Message Date
Jim Miller
a172a7bd2b Bump Test Version 4.57.7 2026-05-07 13:54:08 -05:00
Jim Miller
ab103dce6e browsercache_sqldb: Better share_open and read-only. #1341 2026-05-07 13:54:02 -05:00
Jim Miller
892e9207f0 Bump Test Version 4.57.6 2026-05-06 19:53:58 -05:00
Jim Miller
b4e392fae1 browsercache_sqldb: Use share_open for windows file locking. #1341 2026-05-06 19:53:44 -05:00
Jim Miller
d9525d9726 Bump Test Version 4.57.5 2026-05-06 13:22:28 -05:00
Jim Miller
cb77b12754 Adding browsercache_sqldb for Yet Another caching scheme in Chrome. #1341 2026-05-06 13:22:22 -05:00
Jim Miller
b41a633821 Bump Test Version 4.57.4 2026-05-05 08:11:07 -05:00
Jim Miller
50c8db2992 browsercache_simple: Tweak index file size check. #1341 2026-05-05 08:10:59 -05:00
Jim Miller
ef6dd99bfe Bump Test Version 4.57.3 2026-05-04 15:05:25 -05:00
Jim Miller
59796ff537 Add debug out to Browser Cache cache dir checking #1341 2026-05-04 15:05:13 -05:00
Jim Miller
8ee0a6e898 Bump Test Version 4.57.2 2026-05-03 09:06:51 -05:00
Jim Miller
c53fc362bd Include genre/category in defaults.ini when include_in_X for extragenres/extracategories 2026-05-03 09:06:44 -05:00
Jim Miller
c87cfc1057 adapter_fanficauthorsnet: Domains changed from .nsns to -nsns 2026-05-01 10:10:37 -05:00
Jim Miller
6ee151c90a Bump Release Version 4.57.0 2026-05-01 09:38:27 -05:00
Jim Miller
db01c828a0 Update translations. 2026-05-01 09:37:13 -05:00
Jim Miller
4d03874f06 Fix a bad comment-out 2026-04-29 15:42:59 -05:00
Jim Miller
36f56483e6 Bump Test Version 4.56.10 2026-04-29 13:01:28 -05:00
Jim Miller
18e45a403b PI Anthology: Reuse epub cover if there is one. 2026-04-29 13:01:22 -05:00
Jim Miller
2e25172ba3 adapter_scribblehubcom: Update ajax call for chapters data. Didn't fix #1339 but change noted 3+ years ago 2026-04-29 10:15:26 -05:00
Jim Miller
65e3fd562b Update translations. 2026-04-27 16:53:06 -05:00
Jim Miller
7089bf6689 Bump Test Version 4.56.9 2026-04-21 15:02:05 -05:00
Jim Miller
061dc1333f PI: Correct Series field url link when setanthologyseries 2026-04-21 15:01:58 -05:00
Jim Miller
0a7fb5c090 Bump Test Version 4.56.8 2026-04-19 14:08:29 -05:00
Jim Miller
cf02f729ae adapter_literotica: Fix for numeric tag value from json. #1336 2026-04-19 14:08:21 -05:00
Jim Miller
730c4f77f9 Bump Test Version 4.56.7 2026-04-19 09:33:07 -05:00
Jim Miller
c02da29cbd Added strings for translation 2026-04-19 09:33:00 -05:00
Jim Miller
b87d796221 PI: Add Fix Series Case setting for #1338 2026-04-19 09:30:15 -05:00
Jim Miller
436370fe5b Done profiling for now 2026-04-19 09:03:10 -05:00
Jim Miller
ac77f31bc2 Move NotGoingToDownload to exceptions.py #1337 2026-04-19 09:02:32 -05:00
Jim Miller
16f2c74e4b Bump Test Version 4.56.6 2026-04-18 13:47:51 -05:00
praschke
af5c2aa0bc adapter_kakuyomujp: site update 2026-04-18 13:47:14 -05:00
Jim Miller
31dec5b62d Bump Test Version 4.56.5 2026-04-18 12:58:56 -05:00
Jim Miller
97d37fcfc1 fix_relative_text_links: Allow hrefs to name anchors as well as id. 2026-04-18 12:58:46 -05:00
Jim Miller
c730aa2f68 Bump Test Version 4.56.4 2026-04-17 10:22:20 -05:00
Jim Miller
4e2e359dee PI Anthologies: Only put status in tags if in include_subject_tags. Closes #1332 2026-04-17 10:22:13 -05:00
Jim Miller
bb96049934 Remove some debug 2026-04-16 14:27:48 -05:00
Jim Miller
84965ef25f Bump Test Version 4.56.3 2026-04-12 21:20:09 -05:00
Jim Miller
348d129a1e adapter_ficwadcom: Detect missing username as well as failed login #1330 2026-04-12 21:05:42 -05:00
Jim Miller
4794e9bc51 Bump Test Version 4.56.2 2026-04-10 21:56:43 -05:00
Jim Miller
d46dc76ae1 Somewhat better consolidated perf profiling 2026-04-10 21:56:43 -05:00
Jim Miller
08bae8d9be Imperfect, but working perf profiling 2026-04-10 16:49:17 -05:00
Jim Miller
405c37aeb5 Remove some dead code. 2026-04-10 16:43:49 -05:00
Jim Miller
270e01c3c7 Cache config values for performance improvement. 2026-04-10 16:24:37 -05:00
Jim Miller
12d57f5950 Bump Test Version 4.56.1 2026-04-06 12:07:14 -05:00
Jim Miller
562b3a4ecd Unnew Perf Improvement w/profiling 2026-04-06 12:07:05 -05:00
Jim Miller
e69045fd98 Bump Release Version 4.56.0 2026-04-02 10:03:42 -05:00
Jim Miller
747bde3394 Update (commented out) profiling code. 2026-04-02 10:02:58 -05:00
Jim Miller
aa00c7ae03 Bump Test Version 4.55.4 2026-03-27 11:54:50 -05:00
Jim Miller
0539f818f3 Add top menu items for Add/Edit Reject URLs. 2026-03-27 11:54:44 -05:00
Jim Miller
41a6f56f44 Remove fanficfare_macmenuhack. 2026-03-27 11:43:53 -05:00
Jim Miller
e3832245e6 Add Reject URLs: Accept story URLs drag/drop & paste like Add Stories by URL 2026-03-27 10:52:30 -05:00
Jim Miller
909b64c83c Remove some image processing debug output 2026-03-27 10:51:29 -05:00
Jim Miller
732f5e2571 Bump Test Version 4.55.3 2026-03-19 13:03:11 -05:00
Jim Miller
d9dd04396e Epub Update: Don't cache cover image with others, trips dedup. 2026-03-19 13:03:03 -05:00
Jim Miller
36e2183d45 Bump Test Version 4.55.2 2026-03-12 15:13:01 -05:00
Jim Miller
040b7205b8 adapter_literotica: Fix for site change (#1318) 2026-03-12 15:11:26 -05:00
Jim Miller
d8ed180eb1 Bump Test Version 4.55.1 2026-03-09 13:04:56 -05:00
Jim Miller
2a6c1e74db Make seriesUrl mutable again. 2026-03-09 13:04:50 -05:00
Jim Miller
b7c8c96153 Put download list at start of BG job too 2026-03-09 13:04:24 -05:00
Jim Miller
a16096592c Bump Release Version 4.55.0 2026-03-01 09:25:11 -06:00
Jim Miller
bb34eecc7c Remove a line of unused code. 2026-02-23 13:08:57 -06:00
Jim Miller
ceed7ef1a8 Bump Test Version 4.54.5 2026-02-10 08:45:34 -06:00
Jim Miller
1d2a887c2d Epub Update: Skip missing chapter, image and css files instead of failing. 2026-02-10 08:45:20 -06:00
Jim Miller
a3f3302312 Plugin only: In Skip mode, don't do initial metadata fetch if already matched in library. #1309 2026-02-10 08:30:02 -06:00
Jim Miller
ecf005b145 Bump Test Version 4.54.4 2026-02-05 16:09:00 -06:00
Jim Miller
3bd074fa2c Additional checks for svg images to reject--Calibre only. Related to #1298 2026-02-05 16:08:54 -06:00
Jim Miller
0fd95daa8e Bump Test Version 4.54.3 2026-02-05 13:46:42 -06:00
Jim Miller
1b57e49d98 Ignore CSS url() when ttf/otf/woff/woff2 font files 2026-02-05 13:46:24 -06:00
Jim Miller
db0d39c9cd Bump Test Version 4.54.2 2026-02-02 13:12:56 -06:00
Jim Miller
cbde66cf41 adapter_fimfictionnet/adapter_royalroadcom: Better handling of cover image size fall back #1306 2026-02-02 13:12:42 -06:00
Jim Miller
17331e9eb3 Bump Test Version 4.54.1 2026-02-01 13:51:23 -06:00
Jim Miller
9b96c151a5 adapter_adultfanfictionorg: Fixes for site changes #1305 2026-02-01 13:51:22 -06:00
Jim Miller
1b65a30798 Making some metadata entries immutable 2026-02-01 13:51:22 -06:00
Jim Miller
c9a47877f7 Allow for language getting changed by replace_metadata not breaking langcode 2026-02-01 09:15:31 -06:00
Jim Miller
bdc77ad0f6 Remove Site: swi.org.ru No DNS for site. 2026-02-01 09:15:31 -06:00
Jim Miller
719971c76c Don't set numChapters--it's done automatically. 2026-02-01 09:15:31 -06:00
Jim Miller
c74dba472a Fixes for mutable metadata entries used in code 2026-02-01 09:15:31 -06:00
Jim Miller
c1fb7f0fc5 Refactor metadata entry and settings name code a bit 2026-02-01 09:15:31 -06:00
65 changed files with 12400 additions and 12244 deletions

View file

@ -33,7 +33,7 @@ except NameError:
from calibre.customize import InterfaceActionBase
# pulled out from FanFicFareBase for saving in prefs.py
__version__ = (4, 54, 0)
__version__ = (4, 57, 7)
## Apparently the name for this class doesn't matter--it was still
## 'demo' for the first few versions.

View file

@ -371,6 +371,7 @@ class ConfigWidget(QWidget):
prefs['suppresstitlesort'] = self.std_columns_tab.suppresstitlesort.isChecked()
prefs['authorcase'] = self.std_columns_tab.authorcase.isChecked()
prefs['titlecase'] = self.std_columns_tab.titlecase.isChecked()
prefs['seriescase'] = self.std_columns_tab.seriescase.isChecked()
prefs['setanthologyseries'] = self.std_columns_tab.setanthologyseries.isChecked()
prefs['set_author_url'] =self.std_columns_tab.set_author_url.isChecked()
@ -760,6 +761,7 @@ class BasicTab(QWidget):
tooltip=_("One URL per line:\n<b>http://...,note</b>\n<b>http://...,title by author - note</b>"),
rejectreasons=rejecturllist.get_reject_reasons(),
reasonslabel=_('Add this reason to all URLs added:'),
accept_storyurls=True,
save_size_name='fff:Add Reject List')
d.exec_()
if d.result() == d.Accepted:
@ -1636,6 +1638,11 @@ class StandardColumnsTab(QWidget):
self.setanthologyseries.setChecked(prefs['setanthologyseries'])
row.append(self.setanthologyseries)
self.seriescase = QCheckBox(_('Fix Series Case?'),self)
self.seriescase.setToolTip(_("If checked, Calibre's routine for correcting the capitalization of title will be applied.")
+"\n"+_("This effects Calibre metadata only, not FanFicFare metadata in title page."))
self.seriescase.setChecked(prefs['seriescase'])
row.append(self.seriescase)
grid = QGridLayout()
for rownum, row in enumerate(rows):
for colnum, col in enumerate(row):

View file

@ -38,6 +38,7 @@ from calibre.gui2 import gprefs
show_download_options = 'fff:add new/update dialogs:show_download_options'
from calibre.gui2.dialogs.confirm_delete import confirm
from calibre.gui2.complete2 import EditWithComplete
from fanficfare.exceptions import NotGoingToDownload
from fanficfare.six import text_type as unicode, ensure_text
# pulls in translation files for _() strings
@ -155,15 +156,6 @@ class RejectUrlEntry:
return retval
class NotGoingToDownload(Exception):
def __init__(self,error,icon='dialog_error.png',showerror=True):
self.error=error
self.icon=icon
self.showerror=showerror
def __str__(self):
return self.error
class DroppableQTextEdit(QTextEdit):
def __init__(self,parent):
QTextEdit.__init__(self,parent)
@ -1328,6 +1320,7 @@ class EditTextDialog(SizePersistedDialog):
icon=None, title=None, label=None, tooltip=None,
read_only=False,
rejectreasons=[],reasonslabel=None,
accept_storyurls=False,
save_size_name='fff:edit text dialog',
):
SizePersistedDialog.__init__(self, parent, save_size_name)
@ -1341,7 +1334,10 @@ class EditTextDialog(SizePersistedDialog):
self.setWindowIcon(icon)
self.l.addWidget(self.label)
self.textedit = QTextEdit(self)
if accept_storyurls:
self.textedit = DroppableQTextEdit(self)
else:
self.textedit = QTextEdit(self)
self.textedit.setLineWrapMode(QTextEditNoWrap)
self.textedit.setReadOnly(read_only)
self.textedit.setText(text)

View file

@ -10,20 +10,6 @@ __docformat__ = 'restructuredtext en'
import fanficfare.six as six
from fanficfare.six import ensure_text, string_types, text_type as unicode
# import cProfile
# def do_cprofile(func):
# def profiled_func(*args, **kwargs):
# profile = cProfile.Profile()
# try:
# profile.enable()
# result = func(*args, **kwargs)
# profile.disable()
# return result
# finally:
# profile.print_stats()
# return profiled_func
import logging
logger = logging.getLogger(__name__)
@ -78,12 +64,14 @@ from fanficfare import adapters, exceptions
from fanficfare.epubutils import (
get_dcsource, get_dcsource_chaptercount, get_story_url_from_epub_html,
get_story_url_from_zip_html, reset_orig_chapters_epub, get_cover_data)
get_story_url_from_zip_html, reset_orig_chapters_epub, get_cover_img)
from fanficfare.geturls import (
get_urls_from_page, get_urls_from_text,get_urls_from_imap,
get_urls_from_mime)
from fanficfare.fff_profile import do_cprofile
from calibre_plugins.fanficfare_plugin.fff_util import (
get_fff_adapter, get_fff_config, get_fff_personalini,
get_common_elements)
@ -111,7 +99,8 @@ from calibre_plugins.fanficfare_plugin.dialogs import (
LoopProgressDialog, UserPassDialog, AboutDialog, CollectURLDialog,
RejectListDialog, EmailPassDialog, TOTPDialog,
save_collisions, question_dialog_all,
NotGoingToDownload, RejectUrlEntry, IniTextDialog)
RejectUrlEntry, IniTextDialog,
EditTextDialog)
# because calibre immediately transforms html into zip and don't want
# to have an 'if html'. db.has_format is cool with the case mismatch,
@ -205,20 +194,6 @@ class FanFicFarePlugin(InterfaceAction):
prefs,
self.qaction.icon())
## Kludgey, yes, but with the real configuration inside the
## library now, how else would a user be able to change this
## setting if it's crashing calibre?
def check_macmenuhack(self):
try:
return self.macmenuhack
except:
file_path = os.path.join(calibre_config_dir,
*("plugins/fanficfare_macmenuhack.txt".split('/')))
file_path = os.path.abspath(file_path)
logger.debug("Plugin %s macmenuhack file_path:%s"%(self.name,file_path))
self.macmenuhack = os.access(file_path, os.F_OK)
return self.macmenuhack
accepts_drops = True
def accept_enter_event(self, event, mime_data):
@ -443,30 +418,38 @@ class FanFicFarePlugin(InterfaceAction):
self.reject_list_action = self.create_menu_item_ex(self.menu, _('Reject Selected Books'),
unique_name='Reject Selected Books', image='rotate-right.png',
triggered=self.reject_list_urls)
# self.menu.addSeparator()
# print("platform.system():%s"%platform.system())
# print("platform.mac_ver()[0]:%s"%platform.mac_ver()[0])
if not self.check_macmenuhack(): # not platform.mac_ver()[0]: # Some macs crash on these menu items for unknown reasons.
self.menu.addSeparator()
self.editpersonalini_action = self.create_menu_item_ex(self.menu, _('Edit personal.ini'),
image= 'config.png',
unique_name='Edit personal.ini',
shortcut_name=_('Edit personal.ini'),
triggered=self.editpersonalini)
self.add_reject_urls_action = self.create_menu_item_ex(self.menu, _('Add Reject URLs'),
image='rotate-right.png',
unique_name='Add Reject URLs',
shortcut_name=_('Add Reject URLs'),
triggered=self.add_reject_urls)
self.config_action = self.create_menu_item_ex(self.menu, _('&Configure FanFicFare'),
image= 'config.png',
unique_name='Configure FanFicFare',
shortcut_name=_('Configure FanFicFare'),
triggered=do_user_config)
self.edit_reject_urls_action = self.create_menu_item_ex(self.menu, _('Edit Reject URLs'),
image='rotate-right.png',
unique_name='Edit Reject URLs',
shortcut_name=_('Edit Reject URLs'),
triggered=self.edit_reject_urls)
self.about_action = self.create_menu_item_ex(self.menu, _('About FanFicFare'),
image= 'images/icon.png',
unique_name='About FanFicFare',
shortcut_name=_('About FanFicFare'),
triggered=self.about)
self.menu.addSeparator()
self.editpersonalini_action = self.create_menu_item_ex(self.menu, _('Edit personal.ini'),
image= 'config.png',
unique_name='Edit personal.ini',
shortcut_name=_('Edit personal.ini'),
triggered=self.editpersonalini)
self.config_action = self.create_menu_item_ex(self.menu, _('&Configure FanFicFare'),
image= 'config.png',
unique_name='Configure FanFicFare',
shortcut_name=_('Configure FanFicFare'),
triggered=do_user_config)
self.about_action = self.create_menu_item_ex(self.menu, _('About FanFicFare'),
image= 'images/icon.png',
unique_name='About FanFicFare',
shortcut_name=_('About FanFicFare'),
triggered=self.about)
self.gui.keyboard.finalize()
def about(self,checked):
@ -502,6 +485,35 @@ class FanFicFarePlugin(InterfaceAction):
prefs['personal.ini'] = get_resources('plugin-example.ini')
prefs.save_to_db()
def add_reject_urls(self):
d = EditTextDialog(self.gui,
"http://example.com/story.php?sid=5,"+_("Reason why I rejected it")+"\nhttp://example.com/story.php?sid=6,"+_("Title by Author")+" - "+_("Reason why I rejected it"),
# icon=self.windowIcon(),
title=_("FanFicFare"),
label=_("Add Reject URLs. Use: <b>http://...,note</b> or <b>http://...,title by author - note</b><br>Invalid story URLs will be ignored."),
tooltip=_("One URL per line:\n<b>http://...,note</b>\n<b>http://...,title by author - note</b>"),
rejectreasons=rejecturllist.get_reject_reasons(),
reasonslabel=_('Add this reason to all URLs added:'),
accept_storyurls=True,
save_size_name='fff:Add Reject List')
d.exec_()
if d.result() == d.Accepted:
rejecturllist.add_text(d.get_plain_text(),d.get_reason_text())
def edit_reject_urls(self):
with busy_cursor():
d = RejectListDialog(self.gui,
rejecturllist.get_list(),
rejectreasons=rejecturllist.get_reject_reasons(),
header=_("Edit Reject URLs List"),
show_delete=False,
show_all_reasons=False)
d.exec_()
if d.result() != d.Accepted:
return
with busy_cursor():
rejecturllist.add(d.get_reject_list(),clear=True)
def create_menu_item_ex(self, parent_menu, menu_text, image=None, tooltip=None,
shortcut=None, triggered=None, is_checked=None, shortcut_name=None,
unique_name=None):
@ -1141,9 +1153,9 @@ class FanFicFarePlugin(InterfaceAction):
## Aug2024 moved site specific search changes to adapters as
## classmethod
regexp = adapters.get_url_search(url)
logger.debug(regexp)
# logger.debug(regexp)
retval = self.gui.current_db.search_getting_ids(regexp,None,use_virtual_library=False)
logger.debug(retval)
# logger.debug(retval)
return retval
def prep_downloads(self, options, books, merge=False, extrapayload=None):
@ -1273,7 +1285,7 @@ class FanFicFarePlugin(InterfaceAction):
# let other exceptions percolate up.
return adapter.getStoryMetadataOnly(get_cover=False)
# @do_cprofile
@do_cprofile
def prep_download_loop(self,book,
options={'fileform':'epub',
'collision':ADDNEW,
@ -1307,9 +1319,16 @@ class FanFicFarePlugin(InterfaceAction):
if self.reject_url(merge,book):
return
## Check existing for SKIP mode. Again, redundant with below
## for when story URL changes, but also kept here to avoid
## network hit.
identicalbooks = self.do_id_search(url)
if collision == SKIP and identicalbooks:
raise exceptions.NotGoingToDownload(_("Skipping duplicate story."),"list_remove.png")
# Dialogs should prevent this case now.
if collision in (UPDATE,UPDATEALWAYS) and fileform != 'epub':
raise NotGoingToDownload(_("Cannot update non-epub format."))
raise exceptions.NotGoingToDownload(_("Cannot update non-epub format."))
if not book['good']:
# book has already been flagged bad for whatever reason.
@ -1503,7 +1522,7 @@ class FanFicFarePlugin(InterfaceAction):
logger.debug("existing found by identifier URL")
if collision == SKIP and identicalbooks:
raise NotGoingToDownload(_("Skipping duplicate story."),"list_remove.png")
raise exceptions.NotGoingToDownload(_("Skipping duplicate story."),"list_remove.png")
if len(identicalbooks) > 1:
identicalbooks_msg = _("More than one identical book by Identifier URL or title/author(s)--can't tell which book to update/overwrite.")
@ -1534,7 +1553,7 @@ class FanFicFarePlugin(InterfaceAction):
identicalbooks = []
collision = book['collision'] = ADDNEW
else:
raise NotGoingToDownload(identicalbooks_msg,"minusminus.png")
raise exceptions.NotGoingToDownload(identicalbooks_msg,"minusminus.png")
## changed: add new book when CALIBREONLY if none found.
if collision in (CALIBREONLY, CALIBREONLYSAVECOL) and not identicalbooks:
@ -1621,11 +1640,11 @@ class FanFicFarePlugin(InterfaceAction):
# returns int adjusted for start-end range.
urlchaptercount = story.getChapterCount()
if chaptercount == urlchaptercount and collision == UPDATE:
raise NotGoingToDownload(_("Already contains %d chapters.")%chaptercount,'edit-undo.png',showerror=False)
raise exceptions.NotGoingToDownload(_("Already contains %d chapters.")%chaptercount,'edit-undo.png',showerror=False)
elif chaptercount > urlchaptercount and not (collision == UPDATEALWAYS and adapter.getConfig('force_update_epub_always')):
raise NotGoingToDownload(_("Existing epub contains %d chapters, web site only has %d. Use Overwrite or force_update_epub_always to force update.") % (chaptercount,urlchaptercount),'dialog_error.png')
raise exceptions.NotGoingToDownload(_("Existing epub contains %d chapters, web site only has %d. Use Overwrite or force_update_epub_always to force update.") % (chaptercount,urlchaptercount),'dialog_error.png')
elif chaptercount == 0:
raise NotGoingToDownload(_("FanFicFare doesn't recognize chapters in existing epub, epub is probably from a different source. Use Overwrite to force update."),'dialog_error.png')
raise exceptions.NotGoingToDownload(_("FanFicFare doesn't recognize chapters in existing epub, epub is probably from a different source. Use Overwrite to force update."),'dialog_error.png')
if collision == OVERWRITE and \
db.has_format(book_id,formmapping[fileform],index_is_id=True):
@ -1642,7 +1661,7 @@ class FanFicFarePlugin(InterfaceAction):
# updated does have time, use full timestamps.
if (lastupdated.time() == time.min and fileupdated.date() > lastupdated.date()) or \
(lastupdated.time() != time.min and fileupdated > lastupdated):
raise NotGoingToDownload(_("Not Overwriting, web site is not newer."),'edit-undo.png',showerror=False)
raise exceptions.NotGoingToDownload(_("Not Overwriting, web site is not newer."),'edit-undo.png',showerror=False)
# For update, provide a tmp file copy of the existing epub so
# it can't change underneath us. Now also overwrite for logpage preserve.
@ -1862,6 +1881,7 @@ class FanFicFarePlugin(InterfaceAction):
else:
return None
@do_cprofile
def update_books_loop(self,book,db=None,
options={'fileform':'epub',
'collision':ADDNEW,
@ -2193,30 +2213,45 @@ class FanFicFarePlugin(InterfaceAction):
## start with None. If no subbook covers, don't force one
## here. User can configure FFF to always create/polish a
## cover if they want. This is about when we force it.
coverpath = None
coverimgpath = None
coverimgtype = None
had_cover = False
## first, look for covers inside the subbooks. Stop at the
## first one, which will be used if there isn't a pre-existing
# epubmerge wants a path to cover img on disk
def write_image(imgtype,imgdata):
tmp = PersistentTemporaryFile(prefix='cover_',
suffix='.'+imagetypes[imgtype],
dir=options['tdir'])
tmp.write(imgdata)
tmp.flush()
tmp.close()
return tmp.name
## if prior epub had a cover, we should use it again.
if mergebook['calibre_id'] and db.has_format(mergebook['calibre_id'],'EPUB',index_is_id=True):
(covertype,coverdata) = get_cover_img(db.format(mergebook['calibre_id'],'EPUB',index_is_id=True,as_file=True))
if coverdata:
had_cover = True
coverimgpath = write_image(covertype,coverdata)
coverimgtype = covertype
logger.debug("prior anthology cover found")
## look for covers inside the subbooks. Stop at the first
## one, which will be used if there isn't a pre-existing
## calibre cover.
if not coverpath:
if not coverimgpath:
for book in good_list:
coverdata = get_cover_data(book['outfile'])
(covertype,coverdata) = get_cover_img(book['outfile'])
if coverdata: # found a cover.
(coverimgtype,coverimgdata) = coverdata[4:6]
# logger.debug('coverimgtype:%s [%s]'%(coverimgtype,imagetypes[coverimgtype]))
tmpcover = PersistentTemporaryFile(suffix='.'+imagetypes[coverimgtype],
dir=options['tdir'])
tmpcover.write(coverimgdata)
tmpcover.flush()
tmpcover.close()
coverpath = tmpcover.name
coverimgpath = write_image(covertype,coverdata)
coverimgtype = covertype
logger.debug('from subbook coverimgpath:%s'%coverimgpath)
break
# logger.debug('coverpath:%s'%coverpath)
## if updating an existing book and there is at least one
## subbook cover:
if coverpath and mergebook['calibre_id']:
if not had_cover and coverimgpath and mergebook['calibre_id']:
logger.debug("anth cover: using cal cover")
# Couldn't find a better way to get the cover path.
calcoverpath = os.path.join(db.library_path,
db.path(mergebook['calibre_id'], index_is_id=True),
@ -2224,9 +2259,11 @@ class FanFicFarePlugin(InterfaceAction):
## if there's an existing cover, use it. Calibre will set
## it for us during lots of different actions anyway.
if os.path.exists(calcoverpath):
coverpath = calcoverpath
coverimgpath = calcoverpath
# logger.debug('coverpath:%s'%coverpath)
## Note that this cover will be replaced if 'inject
## generated' cover is on
logger.debug('coverimgpath:%s'%coverimgpath)
mrg_args = [tmp.name,
[ x['outfile'] for x in good_list ],]
mrg_kwargs = {
@ -2234,7 +2271,7 @@ class FanFicFarePlugin(InterfaceAction):
'titleopt':mergebook['title'],
'keepmetadatafiles':True,
'source':mergebook['url'],
'coverjpgpath':coverpath
'coverjpgpath':coverimgpath
}
logger.debug('anthology_merge_keepsingletocs:%s'%
mergebook['anthology_merge_keepsingletocs'])
@ -2267,7 +2304,6 @@ class FanFicFarePlugin(InterfaceAction):
errorcol_label = self.get_custom_col_label(prefs['errorcol'])
lastcheckedcol_label = self.get_custom_col_label(prefs['lastcheckedcol'])
columns = self.gui.library_view.model().custom_columns
if good_list or prefs['mark'] or (bad_list and errorcol_label) or lastcheckedcol_label:
LoopProgressDialog(self.gui,
good_list+bad_list,
@ -2613,7 +2649,6 @@ class FanFicFarePlugin(InterfaceAction):
db.new_api.set_link_for_authors(author_id_to_link_map)
# set series link if found.
logger.debug("has link_map:%s"%(hasattr(db.new_api,'set_link_map')))
## new_api.set_link_map added in Calibre v6.15
if hasattr(db.new_api,'set_link_map') and \
prefs['set_series_url'] and \
@ -2622,6 +2657,7 @@ class FanFicFarePlugin(InterfaceAction):
series = book['series']
if '[' in series: # a few can have a series w/o number
series = series[:series.rindex(' [')]
logger.debug("Setting series link:%s"%book['all_metadata']['seriesUrl'])
db.new_api.set_link_map('series',{series:
book['all_metadata']['seriesUrl']})
@ -2811,6 +2847,9 @@ class FanFicFarePlugin(InterfaceAction):
mi.pubdate = book['pubdate']
mi.timestamp = book['timestamp']
mi.comments = book['comments']
if prefs['seriescase']:
from calibre.ebooks.metadata.sources.base import fixcase
book['series'] = fixcase(book['series'])
mi.series = book['series']
return mi
@ -3162,6 +3201,7 @@ The previously downloaded book is still in the anthology, but FFF doesn't have t
if prefs['setanthologyseries'] and book['title'] == series:
book['series'] = series+' [0]'
book['all_metadata']['seriesUrl'] = options.get('anthology_url','')
# logger.debug("anthology_title_pattern:%s"%configuration.getConfig('anthology_title_pattern'))
if configuration.getConfig('anthology_title_pattern'):
@ -3182,7 +3222,9 @@ The previously downloaded book is still in the anthology, but FFF doesn't have t
s = options.get('frompage',{}).get('status','')
if s:
book['all_metadata']['status'] = s
book['tags'].append(s)
## status into tags only if in include_subject_tags
if 'status' in configuration.getConfigList('include_subject_tags'):
book['tags'].append(s)
book['tags'].extend(configuration.getConfigList('anthology_tags'))
book['all_metadata']['anthology'] = "true"

View file

@ -44,33 +44,44 @@ def do_download_worker_single(site,
print_basic_debug_info(sys.stderr)
notification(0.01, _('Downloading FanFiction Stories'))
from calibre_plugins.fanficfare_plugin import FanFicFareBase
fffbase = FanFicFareBase(options['plugin_path'])
with fffbase: # so the sys.path was modified while loading the
# plug impl.
from fanficfare.fff_profile import do_cprofile
count = 0
totals = {}
# can't do direct assignment in list comprehension? I'm sure it
# makes sense to some pythonista.
# [ totals[x['url']]=0.0 for x in book_list if x['good'] ]
[ totals.update({x['url']:0.0}) for x in book_list if x['good'] ]
# logger.debug(sites_lists.keys())
## extra function just so I can easily use the same
## @do_cprofile decorator
@do_cprofile
def profiled_func():
count = 0
totals = {}
# can't do direct assignment in list comprehension? I'm sure it
# makes sense to some pythonista.
# [ totals[x['url']]=0.0 for x in book_list if x['good'] ]
[ totals.update({x['url']:0.0}) for x in book_list if x['good'] ]
# logger.debug(sites_lists.keys())
def do_indiv_notif(percent,msg):
totals[msg] = percent/len(totals)
notification(max(0.01,sum(totals.values())), _('%(count)d of %(total)d stories finished downloading')%{'count':count,'total':len(totals)})
def do_indiv_notif(percent,msg):
totals[msg] = percent/len(totals)
notification(max(0.01,sum(totals.values())), _('%(count)d of %(total)d stories finished downloading')%{'count':count,'total':len(totals)})
do_list = []
done_list = []
## pass failures from metadata through bg job so all results are
## together.
for book in book_list:
if book['good']:
do_list.append(book)
else:
done_list.append(book)
for book in do_list:
# logger.info("%s"%book['url'])
done_list.append(do_download_for_worker(book,options,merge,do_indiv_notif))
count += 1
return finish_download(done_list)
do_list = []
done_list = []
logger.info("\n\n"+_("Downloading FanFiction Stories")+"\n%s\n"%("\n".join([ "%(status)s %(url)s %(comment)s" % book for book in book_list])))
## pass failures from metadata through bg job so all results are
## together.
for book in book_list:
if book['good']:
do_list.append(book)
else:
done_list.append(book)
for book in do_list:
# logger.info("%s"%book['url'])
done_list.append(do_download_for_worker(book,options,merge,do_indiv_notif))
count += 1
return finish_download(done_list)
return profiled_func()
def finish_download(donelist):
book_list = sorted(donelist,key=lambda x : x['listorder'])
@ -113,15 +124,6 @@ def finish_download(donelist):
# return the book list as the job result
return book_list
def do_download_site(site,book_list,options,merge,notification=lambda x,y:x):
# logger.info(_("Started job for %s")%site)
retval = []
for book in book_list:
# logger.info("%s"%book['url'])
retval.append(do_download_for_worker(book,options,merge,notification))
notification(10.0,book['url'])
return retval
def do_download_for_worker(book,options,merge,notification=lambda x,y:x):
'''
Child job, to download story when run as a worker job
@ -131,13 +133,13 @@ def do_download_for_worker(book,options,merge,notification=lambda x,y:x):
fffbase = FanFicFareBase(options['plugin_path'])
with fffbase: # so the sys.path was modified while loading the
# plug impl.
from calibre_plugins.fanficfare_plugin.dialogs import NotGoingToDownload
from calibre_plugins.fanficfare_plugin.prefs import (
SAVE_YES, SAVE_YES_UNLESS_SITE, OVERWRITE, OVERWRITEALWAYS, UPDATE,
UPDATEALWAYS, ADDNEW, SKIP, CALIBREONLY, CALIBREONLYSAVECOL)
from calibre_plugins.fanficfare_plugin.wordcount import get_word_count
from fanficfare import adapters, writers
from fanficfare.epubutils import get_update_data
from fanficfare.exceptions import NotGoingToDownload
from fanficfare.six import text_type as unicode
from calibre_plugins.fanficfare_plugin.fff_util import get_fff_config

View file

@ -1599,18 +1599,13 @@ chaptertitles:Prologue,Chapter 1\, Xenos on Cinnabar,Chapter 2\, Sinmay on Kinti
[adult-fanfiction.org]
use_basic_cache:true
extra_valid_entries:eroticatags,disclaimer
eroticatags_label:Erotica Tags
disclaimer_label:Disclaimer
extra_titlepage_entries:eroticatags,disclaimer
## Some sites require login (or login for some rated stories) The
## program can prompt you, or you can save it in config. In
## commandline version, this should go in your personal.ini, not
## defaults.ini.
#username:YourName
#password:yourpassword
[althistory.com]
## Note this is NOT the same as www.alternatehistory.com
## see [base_xenforoforum]
@ -1724,13 +1719,13 @@ make_linkhtml_entries:series00,series01,series02,series03,collections
## hardcoded to include the site specific metadata freeformtags &
## ao3categories in the standard metadata field genre. By making it
## configurable, users can change it.
include_in_genre: freeformtags, ao3categories
include_in_genre: genre, freeformtags, ao3categories
## AO3 uses the word 'category' differently than most sites. The
## adapter used to be hardcoded to include the site specific metadata
## fandom in the standard metadata field category. By making it
## configurable, users can change it.
include_in_category:fandoms
include_in_category:category,fandoms
## freeformtags was previously typo'ed as freefromtags. This way,
## freefromtags will still work for people who've used it.
@ -1939,7 +1934,7 @@ make_linkhtml_entries:translators,betas
## For most sites, 'category' is the fandom, but fanfics.me has
## fandoms and a separate category. By making it configurable, users
## can change it.
include_in_category:fandoms
include_in_category:category,fandoms
[fanfictalk.com]
use_basic_cache:true
@ -2715,13 +2710,13 @@ make_linkhtml_entries:series00,series01,series02,series03,collections
## hardcoded to include the site specific metadata freeformtags &
## ao3categories in the standard metadata field genre. By making it
## configurable, users can change it.
include_in_genre: freeformtags, ao3categories
include_in_genre: genre, freeformtags, ao3categories
## OTW uses the word 'category' differently than most sites. The
## adapter used to be hardcoded to include the site specific metadata
## fandom in the standard metadata field category. By making it
## configurable, users can change it.
include_in_category:fandoms
include_in_category:category,fandoms
## freeformtags was previously typo'ed as freefromtags. This way,
## freefromtags will still work for people who've used it.
@ -3022,13 +3017,13 @@ make_linkhtml_entries:series00,series01,series02,series03,collections
## hardcoded to include the site specific metadata freeformtags &
## ao3categories in the standard metadata field genre. By making it
## configurable, users can change it.
include_in_genre: freeformtags, ao3categories
include_in_genre: genre, freeformtags, ao3categories
## OTW uses the word 'category' differently than most sites. The
## adapter used to be hardcoded to include the site specific metadata
## fandom in the standard metadata field category. By making it
## configurable, users can change it.
include_in_category:fandoms
include_in_category:category,fandoms
## freeformtags was previously typo'ed as freefromtags. This way,
## freefromtags will still work for people who've used it.
@ -3157,8 +3152,8 @@ bookmarkmemo_label:ブックマークメモ
bookmarkprivate_label:非公開ブックマーク
subscribed_label:更新通知
include_in_genre: fullgenre
#include_in_genre: biggenre, smallgenre
include_in_genre: genre, fullgenre
#include_in_genre: genre, biggenre, smallgenre
## adds to titlepage_entries instead of replacing it.
#extra_titlepage_entries: fullgenre,biggenre,smallgenre,imprint,freeformtags,comments,reviews,bookmarks,ratingpoints,overallpoints,bookmarked,bookmarkcategory,bookmarkmemo,bookmarkprivate,subscribed
@ -3401,13 +3396,13 @@ make_linkhtml_entries:series00,series01,series02,series03,collections
## hardcoded to include the site specific metadata freeformtags &
## ao3categories in the standard metadata field genre. By making it
## configurable, users can change it.
include_in_genre: freeformtags, ao3categories
include_in_genre: genre, freeformtags, ao3categories
## OTW uses the word 'category' differently than most sites. The
## adapter used to be hardcoded to include the site specific metadata
## fandom in the standard metadata field category. By making it
## configurable, users can change it.
include_in_category:fandoms
include_in_category:category,fandoms
## freeformtags was previously typo'ed as freefromtags. This way,
## freefromtags will still work for people who've used it.
@ -3538,7 +3533,7 @@ upvotes_label:Upvotes
subscribers_label:Subscribers
views_label:Views
include_in_category:tags
include_in_category:category,tags
#extra_titlepage_entries:upvotes,subscribers,views
@ -3674,13 +3669,13 @@ make_linkhtml_entries:series00,series01,series02,series03,collections
## hardcoded to include the site specific metadata freeformtags &
## ao3categories in the standard metadata field genre. By making it
## configurable, users can change it.
include_in_genre: freeformtags, ao3categories
include_in_genre: genre, freeformtags, ao3categories
## OTW uses the word 'category' differently than most sites. The
## adapter used to be hardcoded to include the site specific metadata
## fandom in the standard metadata field category. By making it
## configurable, users can change it.
include_in_category:fandoms
include_in_category:category,fandoms
## freeformtags was previously typo'ed as freefromtags. This way,
## freefromtags will still work for people who've used it.
@ -4433,9 +4428,6 @@ extracategories:Buffy: The Vampire Slayer
extracharacters:Buffy, Spike
extraships:Spike/Buffy
[www.swi.org.ru]
use_basic_cache:true
[www.the-sietch.com]
## see [base_xenforoforum]

View file

@ -126,6 +126,7 @@ default_prefs['suppressauthorsort'] = False
default_prefs['suppresstitlesort'] = False
default_prefs['authorcase'] = False
default_prefs['titlecase'] = False
default_prefs['seriescase'] = False
default_prefs['setanthologyseries'] = False
default_prefs['mark'] = False
default_prefs['mark_success'] = True

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -120,7 +120,6 @@ from . import adapter_novelonlinefullcom
from . import adapter_wwwnovelallcom
from . import adapter_hentaifoundrycom
from . import adapter_mugglenetfanfictioncom
from . import adapter_swiorgru
from . import adapter_fanficsme
from . import adapter_fanfictalkcom
from . import adapter_scifistoriescom

View file

@ -68,9 +68,7 @@ class AdultFanFictionOrgAdapter(BaseSiteAdapter):
# The date format will vary from site to site.
# http://docs.python.org/library/datetime.html#strftime-strptime-behavior
self.dateformat = "%Y-%m-%d"
self.dateformat = "%B %d, %Y"
## Added because adult-fanfiction.org does send you to
## www.adult-fanfiction.org when you go to it and it also moves
@ -139,91 +137,45 @@ class AdultFanFictionOrgAdapter(BaseSiteAdapter):
def getSiteURLPattern(self):
return r'https?://(anime|anime2|bleach|books|buffy|cartoon|celeb|comics|ff|games|hp|inu|lotr|manga|movies|naruto|ne|original|tv|xmen|ygo|yuyu)\.adult-fanfiction\.org/story\.php\?no=\d+$'
##This is not working right now, so I'm commenting it out, but leaving it for future testing
## Login seems to be reasonably standard across eFiction sites.
#def needToLoginCheck(self, data):
##This adapter will always require a login
# return True
# <form name="login" method="post" action="">
# <div class="top">E-mail: <span id="sprytextfield1">
# <input name="email" type="text" id="email" size="20" maxlength="255" />
# <span class="textfieldRequiredMsg">Email is required.</span><span class="textfieldInvalidFormatMsg">Invalid E-mail.</span></span></div>
# <div class="top">Password: <span id="sprytextfield2">
# <input name="pass1" type="password" id="pass1" size="20" maxlength="32" />
# <span class="textfieldRequiredMsg">password is required.</span><span class="textfieldMinCharsMsg">Minimum 8 characters8.</span><span class="textfieldMaxCharsMsg">Exceeded 32 characters.</span></span></div>
# <div class="top"><br /> <input name="loginsubmittop" type="hidden" id="loginsubmit" value="TRUE" />
# <input type="submit" value="Login" />
# </div>
# </form>
##This is not working right now, so I'm commenting it out, but leaving it for future testing
#def performLogin(self, url, soup):
# params = {}
# if self.password:
# params['email'] = self.username
# params['pass1'] = self.password
# else:
# params['email'] = self.getConfig("username")
# params['pass1'] = self.getConfig("password")
# params['submit'] = 'Login'
# # copy all hidden input tags to pick up appropriate tokens.
# for tag in soup.find_all('input',{'type':'hidden'}):
# params[tag['name']] = tag['value']
# logger.debug("Will now login to URL {0} as {1} with password: {2}".format(url, params['email'],params['pass1']))
# d = self.post_request(url, params, usecache=False)
# d = self.post_request(url, params, usecache=False)
# soup = self.make_soup(d)
#if not (soup.find('form', {'name' : 'login'}) == None):
# logger.info("Failed to login to URL %s as %s" % (url, params['email']))
# raise exceptions.FailedToLogin(url,params['email'])
# return False
#else:
# return True
## Getting the chapter list and the meta data, plus 'is adult' checking.
def doExtractChapterUrlsAndMetadata(self, get_cover=True):
## You need to have your is_adult set to true to get this story
if not (self.is_adult or self.getConfig("is_adult")):
raise exceptions.AdultCheckRequired(self.url)
else:
d = self.post_request('https://www.adult-fanfiction.org/globals/ajax/age-verify.php', {"verify":"1"})
if "Age verified successfully" not in d:
raise exceptions.FailedToDownload("Failed to Verify Age: {0}".format(d))
url = self.url
logger.debug("URL: "+url)
data = self.get_request(url)
# logger.debug(data)
if "The dragons running the back end of the site can not seem to find the story you are looking for." in data:
raise exceptions.StoryDoesNotExist("{0}.{1} says: The dragons running the back end of the site can not seem to find the story you are looking for.".format(self.zone, self.getBaseDomain()))
soup = self.make_soup(data)
##This is not working right now, so I'm commenting it out, but leaving it for future testing
#self.performLogin(url, soup)
## Title
## Some of the titles have a backslash on the story page, but not on the Author's page
## So I am removing it from the title, so it can be found on the Author's page further in the code.
## Also, some titles may have extra spaces ' ', and the search on the Author's page removes them,
## so I have to here as well. I used multiple replaces to make sure, since I did the same below.
a = soup.find('a', href=re.compile(r'story.php\?no='+self.story.getMetadata('storyId')+"$"))
self.story.setMetadata('title',stripHTML(a).replace('\\','').replace(' ',' ').replace(' ',' ').replace(' ',' ').strip())
h1 = soup.find('h1')
# logger.debug("Title:%s"%h1)
self.story.setMetadata('title',stripHTML(h1).replace('\\','').replace(' ',' ').replace(' ',' ').replace(' ',' ').strip())
# Find the chapters:
chapters = soup.find('ul',{'class':'dropdown-content'})
for i, chapter in enumerate(chapters.find_all('a')):
self.add_chapter(chapter,self.url+'&chapter='+unicode(i+1))
# Find the chapters from first list only
chapters = soup.select_one('select.chapter-select').select('option')
for chapter in chapters:
self.add_chapter(chapter,self.url+'&chapter='+chapter['value'])
# Find authorid and URL from... author url.
a = soup.find('a', href=re.compile(r"profile.php\?no=\d+"))
a = soup.find('a', href=re.compile(r"profile.php\?id=\d+"))
if a == None:
# I know that the original author of fanficfare wants to always have metadata,
# but I posit that if the story is there, even if we can't get the metadata from the
@ -232,140 +184,56 @@ class AdultFanFictionOrgAdapter(BaseSiteAdapter):
self.story.setMetadata('authorUrl','https://www.adult-fanfiction.org')
self.story.setMetadata('author','Unknown')
logger.warning('There was no author found for the story... Metadata will not be retreived.')
self.setDescription(url,'>>>>>>>>>> No Summary Given <<<<<<<<<<')
self.setDescription(url,'>>>>>>>>>> No Summary Given, Unknown Author <<<<<<<<<<')
else:
self.story.setMetadata('authorId',a['href'].split('=')[1])
self.story.setMetadata('authorUrl',a['href'])
self.story.setMetadata('author',stripHTML(a))
##The story page does not give much Metadata, so we go to the Author's page
## The story page does not give much Metadata, so we go to
## the Author's page. Except it's actually a sub-req for
## list of author's stories for that subdomain
author_Url = 'https://members.{0}/load-user-stories.php?subdomain={1}&uid={2}'.format(
self.getBaseDomain(),
self.zone,
self.story.getMetadata('authorId'))
##Get the first Author page to see if there are multiple pages.
##AFF doesn't care if the page number is larger than the actual pages,
##it will continue to show the last page even if the variable is larger than the actual page
author_Url = '{0}&view=story&zone={1}&page=1'.format(self.story.getMetadata('authorUrl'), self.zone)
#author_Url = self.story.getMetadata('authorUrl')+'&view=story&zone='+self.zone+'&page=1'
##I'm resetting the author page to the zone for this story
self.story.setMetadata('authorUrl',author_Url)
logger.debug('Getting the author page: {0}'.format(author_Url))
logger.debug('Getting the load-user-stories page: {0}'.format(author_Url))
adata = self.get_request(author_Url)
if "The member you are looking for does not exist." in adata:
raise exceptions.StoryDoesNotExist("{0}.{1} says: The member you are looking for does not exist.".format(self.zone, self.getBaseDomain()))
#raise exceptions.StoryDoesNotExist(self.zone+'.'+self.getBaseDomain() +" says: The member you are looking for does not exist.")
none_found = "No stories found in this category."
if none_found in adata:
raise exceptions.StoryDoesNotExist("{0}.{1} says: {2}".format(self.zone, self.getBaseDomain(), none_found))
asoup = self.make_soup(adata)
# logger.debug(asoup)
##Getting the number of author pages
pages = 0
pagination=asoup.find('ul',{'class' : 'pagination'})
if pagination:
pages = pagination.find_all('li')[-1].find('a')
if not pages == None:
pages = pages['href'].split('=')[-1]
else:
pages = 0
story_card = asoup.select_one('div.story-card:has(a[href="{0}"])'.format(url))
# logger.debug(story_card)
storya = None
##If there is only 1 page of stories, check it to get the Metadata,
if pages == 0:
a = asoup.find_all('li')
for lc2 in a:
if lc2.find('a', href=re.compile(r'story.php\?no='+self.story.getMetadata('storyId')+"$")):
storya = lc2
break
## otherwise go through the pages
else:
page=1
i=0
while i == 0:
##We already have the first page, so if this is the first time through, skip getting the page
if page != 1:
author_Url = '{0}&view=story&zone={1}&page={2}'.format(self.story.getMetadata('authorUrl'), self.zone, unicode(page))
logger.debug('Getting the author page: {0}'.format(author_Url))
adata = self.get_request(author_Url)
##This will probably never be needed, since AFF doesn't seem to care what number you put as
## the page number, it will default to the last page, even if you use 1000, for an author
## that only hase 5 pages of stories, but I'm keeping it in to appease Saint Justin Case (just in case).
if "The member you are looking for does not exist." in adata:
raise exceptions.StoryDoesNotExist("{0}.{1} says: The member you are looking for does not exist.".format(self.zone, self.getBaseDomain()))
# we look for the li element that has the story here
asoup = self.make_soup(adata)
## Category
## I've only seen one category per story so far, but just in case:
for cat in story_card.select('div.story-card-category'):
# remove Category:, old code suggests Located: is also
# possible, so removing by <strong>
cat.find("strong").decompose()
self.story.addToList('category',stripHTML(cat))
a = asoup.find_all('li')
for lc2 in a:
if lc2.find('a', href=re.compile(r'story.php\?no='+self.story.getMetadata('storyId')+"$")):
i=1
storya = lc2
break
page = page + 1
if page > int(pages):
break
self.setDescription(url,story_card.select_one('div.story-card-description'))
##Split the Metadata up into a list
##We have to change the soup type to a string, then remove the newlines, and double spaces,
##then changes the <br/> to '-:-', which seperates the different elemeents.
##Then we strip the HTML elements from the string.
##There is also a double <br/>, so we have to fix that, then remove the leading and trailing '-:-'.
##They are always in the same order.
## EDIT 09/26/2016: Had some trouble with unicode errors... so I had to put in the decode/encode parts to fix it
liMetadata = unicode(storya).replace('\n','').replace('\r','').replace('\t',' ').replace(' ',' ').replace(' ',' ').replace(' ',' ')
liMetadata = stripHTML(liMetadata.replace(r'<br/>','-:-').replace('<!-- <br /-->','-:-'))
liMetadata = liMetadata.strip('-:-').strip('-:-').encode('utf-8')
for i, value in enumerate(liMetadata.decode('utf-8').split('-:-')):
if i == 0:
# The value for the title has been manipulated, so may not be the same as gotten at the start.
# I'm going to use the href from the storya retrieved from the author's page to determine if it is correct.
if storya.find('a', href=re.compile(r'story.php\?no='+self.story.getMetadata('storyId')+"$"))['href'] != url:
raise exceptions.StoryDoesNotExist('Did not find story in author story list: {0}'.format(author_Url))
elif i == 1:
##Get the description
self.setDescription(url,stripHTML(value.strip()))
else:
# the rest of the values can be missing, so instead of hardcoding the numbers, we search for them.
if 'Located :' in value:
self.story.setMetadata('category',value.replace(r'&gt;',r'>').replace(r'Located :',r'').strip())
elif 'Category :' in value:
# Get the Category
self.story.setMetadata('category',value.replace(r'&gt;',r'>').replace(r'Located :',r'').strip())
elif 'Content Tags :' in value:
# Get the Erotic Tags
value = stripHTML(value.replace(r'Content Tags :',r'')).strip()
for code in re.split(r'\s',value):
self.story.addToList('eroticatags',code)
elif 'Posted :' in value:
# Get the Posted Date
value = value.replace(r'Posted :',r'').strip()
if value.startswith('008'):
# It is unknown how the 200 became 008, but I'm going to change it back here
value = value.replace('008','200')
elif value.startswith('0000'):
# Since the date is showing as 0000,
# I'm going to put the memberdate here
value = asoup.find('div',{'id':'contentdata'}).find('p').get_text(strip=True).replace('Member Since','').strip()
self.story.setMetadata('datePublished', makeDate(stripHTML(value), self.dateformat))
elif 'Edited :' in value:
# Get the 'Updated' Edited date
# AFF has the time for the Updated date, and we only want the date,
# so we take the first 10 characters only
value = value.replace(r'Edited :',r'').strip()[0:10]
if value.startswith('008'):
# It is unknown how the 200 became 008, but I'm going to change it back here
value = value.replace('008','200')
self.story.setMetadata('dateUpdated', makeDate(stripHTML(value), self.dateformat))
elif value.startswith('0000') or '-00-' in value:
# Since the date is showing as 0000,
# or there is -00- in the date,
# I'm going to put the Published date here
self.story.setMetadata('dateUpdated', self.story.getMetadata('datPublished'))
else:
self.story.setMetadata('dateUpdated', makeDate(stripHTML(value), self.dateformat))
else:
# This catches the blank elements, and the Review and Dragon Prints.
# I am not interested in these, so do nothing
zzzzzzz=0
for tag in story_card.select('span.story-tag'):
self.story.addToList('eroticatags',stripHTML(tag))
## created/updates share formatting
for meta in story_card.select('div.story-card-meta-item span:last-child'):
meta = stripHTML(meta)
if 'Created: ' in meta:
meta = meta.replace('Created: ','')
self.story.setMetadata('datePublished', makeDate(meta, self.dateformat))
if 'Updated: ' in meta:
meta = meta.replace('Updated: ','')
self.story.setMetadata('dateUpdated', makeDate(meta, self.dateformat))
# grab the text for an individual chapter.
def getChapterText(self, url):
@ -373,10 +241,11 @@ class AdultFanFictionOrgAdapter(BaseSiteAdapter):
logger.debug('Getting chapter text from: %s' % url)
soup = self.make_soup(self.get_request(url))
chaptertag = soup.find('ul',{'class':'pagination'}).parent.parent.parent.findNextSibling('li')
chaptertag = soup.select_one('div.chapter-body')
if None == chaptertag:
raise exceptions.FailedToDownload("Error downloading Chapter: {0}! Missing required element!".format(url))
# Change td to a div.
chaptertag.name='div'
## chapter text includes a copy of story title, author,
## chapter title, & eroticatags specific to the chapter. Did
## before, too.
return self.utf8FromSoup(url,chaptertag)

View file

@ -157,9 +157,6 @@ class BloodshedverseComAdapter(BaseSiteAdapter):
self.story.addToList('warnings', warning)
elif key == 'Chapters':
self.story.setMetadata('numChapters', int(value))
elif key == 'Words':
# Apparently only numChapters need to be an integer for
# some strange reason. Remove possible ',' characters as to
@ -174,7 +171,7 @@ class BloodshedverseComAdapter(BaseSiteAdapter):
# ugly %p(am/pm) hack moved into makeDate so other sites can use it.
self.story.setMetadata('dateUpdated', date)
if self.story.getMetadata('rating') == 'NC-17' and not (self.is_adult or self.getConfig('is_adult')):
if self.story.getMetadataRaw('rating') == 'NC-17' and not (self.is_adult or self.getConfig('is_adult')):
raise exceptions.AdultCheckRequired(self.url)
def getChapterText(self, url):

View file

@ -53,6 +53,9 @@ class FanficAuthorsNetAdapter(BaseSiteAdapter):
#Setting the 'Zone' for each "Site"
self.zone = self.parsedUrl.netloc.replace('.fanficauthors.net','')
# site change .nsns to -nsns
self.zone = self.zone.replace('.nsns','-nsns')
# normalized story URL.
self._setURL('https://{0}.{1}/{2}/'.format(
self.zone, self.getBaseDomain(), self.story.getMetadata('storyId')))
@ -79,7 +82,10 @@ class FanficAuthorsNetAdapter(BaseSiteAdapter):
@classmethod
def getAcceptDomains(cls):
# need both .nsns(old) and -nsns(new) because it's a domain
# change, not just URL change.
return ['aaran-st-vines.nsns.fanficauthors.net',
'aaran-st-vines-nsns.fanficauthors.net',
'abraxan.fanficauthors.net',
'bobmin.fanficauthors.net',
'canoncansodoff.fanficauthors.net',
@ -95,9 +101,12 @@ class FanficAuthorsNetAdapter(BaseSiteAdapter):
'jeconais.fanficauthors.net',
'kinsfire.fanficauthors.net',
'kokopelli.nsns.fanficauthors.net',
'kokopelli-nsns.fanficauthors.net',
'ladya.nsns.fanficauthors.net',
'ladya-nsns.fanficauthors.net',
'lorddwar.fanficauthors.net',
'mrintel.nsns.fanficauthors.net',
'mrintel-nsns.fanficauthors.net',
'musings-of-apathy.fanficauthors.net',
'ruskbyte.fanficauthors.net',
'seelvor.fanficauthors.net',
@ -108,7 +117,7 @@ class FanficAuthorsNetAdapter(BaseSiteAdapter):
################################################################################################
@classmethod
def getSiteExampleURLs(self):
return ("https://aaran-st-vines.nsns.fanficauthors.net/A_Story_Name/ "
return ("https://aaran-st-vines-nsns.fanficauthors.net/A_Story_Name/ "
+ "https://abraxan.fanficauthors.net/A_Story_Name/ "
+ "https://bobmin.fanficauthors.net/A_Story_Name/ "
+ "https://canoncansodoff.fanficauthors.net/A_Story_Name/ "
@ -123,10 +132,10 @@ class FanficAuthorsNetAdapter(BaseSiteAdapter):
+ "https://jbern.fanficauthors.net/A_Story_Name/ "
+ "https://jeconais.fanficauthors.net/A_Story_Name/ "
+ "https://kinsfire.fanficauthors.net/A_Story_Name/ "
+ "https://kokopelli.nsns.fanficauthors.net/A_Story_Name/ "
+ "https://ladya.nsns.fanficauthors.net/A_Story_Name/ "
+ "https://kokopelli-nsns.fanficauthors.net/A_Story_Name/ "
+ "https://ladya-nsns.fanficauthors.net/A_Story_Name/ "
+ "https://lorddwar.fanficauthors.net/A_Story_Name/ "
+ "https://mrintel.nsns.fanficauthors.net/A_Story_Name/ "
+ "https://mrintel-nsns.fanficauthors.net/A_Story_Name/ "
+ "https://musings-of-apathy.fanficauthors.net/A_Story_Name/ "
+ "https://ruskbyte.fanficauthors.net/A_Story_Name/ "
+ "https://seelvor.fanficauthors.net/A_Story_Name/ "
@ -136,8 +145,16 @@ class FanficAuthorsNetAdapter(BaseSiteAdapter):
################################################################################################
def getSiteURLPattern(self):
## .nsns kept here to match both . and -
return r'https?://(aaran-st-vines.nsns|abraxan|bobmin|canoncansodoff|chemprof|copperbadge|crys|deluded-musings|draco664|fp|frenchsession|ishtar|jbern|jeconais|kinsfire|kokopelli.nsns|ladya.nsns|lorddwar|mrintel.nsns|musings-of-apathy|ruskbyte|seelvor|tenhawk|viridian|whydoyouneedtoknow)\.fanficauthors\.net/([a-zA-Z0-9_]+)/'
@classmethod
def get_section_url(cls,url):
## only changing .nsns to -nsns and only when part of the
## domain.
url = url.replace('.nsns.fanficauthors.net','-nsns.fanficauthors.net')
return url
################################################################################################
def doExtractChapterUrlsAndMetadata(self, get_cover=True):
@ -202,7 +219,7 @@ class FanficAuthorsNetAdapter(BaseSiteAdapter):
## Raising AdultCheckRequired after collecting chapters gives
## a double chapter list. So does genre, but it de-dups
## automatically.
if( self.story.getMetadata('rating') == 'Mature'
if( self.story.getMetadataRaw('rating') in ['Mature','Adult Only']
and not (self.is_adult or self.getConfig("is_adult")) ):
raise exceptions.AdultCheckRequired(self.url)
@ -226,7 +243,7 @@ class FanficAuthorsNetAdapter(BaseSiteAdapter):
# grab the text for an individual chapter.
def getChapterText(self, url):
logger.debug('Getting chapter text from: %s' % url)
if( self.story.getMetadata('rating') == 'Mature' and
if( self.story.getMetadataRaw('rating') in ['Mature','Adult Only'] and
(self.is_adult or self.getConfig("is_adult")) ):
addurl = "?bypass=1"
else:

View file

@ -150,7 +150,7 @@ class FanFicsMeAdapter(BaseSiteAdapter):
self.story.setMetadata('rating',stripHTML(get_meta_content(u'Рейтинг')))
## Need to login for any rating higher than General.
if self.story.getMetadata('rating') != 'General' and self.needToLoginCheck(data):
if self.story.getMetadataRaw('rating') != 'General' and self.needToLoginCheck(data):
self.performLogin(url)
# reload after login.
data = self.get_request(url,usecache=False)

View file

@ -157,7 +157,6 @@ class FicBookNetAdapter(BaseSiteAdapter):
update = chapterdate
else:
self.add_chapter(self.story.getMetadata('title'),url)
self.story.setMetadata('numChapters',1)
date_str = soup.find('div', {'class' : 'part-date'}).find('span', {'title': True})['title'].replace(u"\u202fг. в", "")
for month_name, month_num in fullmon.items():
date_str = date_str.replace(month_name, month_num)

View file

@ -173,7 +173,7 @@ class FictionLiveAdapter(BaseSiteAdapter):
tags = data['ta'] if 'ta' in data else []
if (self.story.getMetadata('rating') in {"nsfw", "adult"} or 'smut' in tags) and \
if (self.story.getMetadataRaw('rating') in {"nsfw", "adult"} or 'smut' in tags) and \
not (self.is_adult or self.getConfig("is_adult")):
raise exceptions.AdultCheckRequired(self.url)

View file

@ -40,10 +40,6 @@ class FictionManiaTVAdapter(BaseSiteAdapter):
self._setURL(self.READ_TEXT_STORY_URL_TEMPLATE % story_id)
self.story.setMetadata('siteabbrev', self.SITE_ABBREVIATION)
# Always single chapters, probably should use the Anthology feature to
# merge chapters of a story
self.story.setMetadata('numChapters', 1)
@staticmethod
def getSiteDomain():
return FictionManiaTVAdapter.SITE_DOMAIN

View file

@ -66,7 +66,8 @@ class FicwadComSiteAdapter(BaseSiteAdapter):
params['username']))
d = self.post_request(loginUrl,params,usecache=False)
if "Login attempt failed..." in d:
if "Login attempt failed..." in d or \
'<div id="error">Please enter your username and password.</div>' in d:
logger.info("Failed to login to URL %s as %s" % (loginUrl,
params['username']))
raise exceptions.FailedToLogin(url,params['username'])
@ -114,7 +115,7 @@ class FicwadComSiteAdapter(BaseSiteAdapter):
titleh4 = soup.find('div',{'class':'storylist'}).find('h4')
self.story.setMetadata('title', stripHTML(titleh4.a))
if 'Deleted story' in self.story.getMetadata('title'):
if 'Deleted story' in self.story.getMetadataRaw('title'):
raise exceptions.StoryDoesNotExist("This story was deleted. %s"%self.url)
# Find authorid and URL from... author url.

View file

@ -151,7 +151,8 @@ class FimFictionNetSiteAdapter(BaseSiteAdapter):
self.story.setMetadata("authorId", author['href'].split('/')[2])
self.story.setMetadata("authorUrl", "https://%s/user/%s/%s" % (self.getSiteDomain(),
self.story.getMetadata('authorId'),
self.story.getMetadata('author')))
# meta entry author can be changed by the user.
stripHTML(author)))
#Rating text is replaced with full words for historical compatibility after the site changed
#on 2014-10-27
@ -183,7 +184,8 @@ class FimFictionNetSiteAdapter(BaseSiteAdapter):
if storyImage:
coverurl = storyImage['data-fullsize']
# try setting from data-fullsize, if fails, try using data-src
if self.setCoverImage(self.url,coverurl)[0].startswith("failedtoload"):
cover_set = self.setCoverImage(self.url,coverurl)[0]
if not cover_set or cover_set.startswith("failedtoload"):
coverurl = storyImage['src']
self.setCoverImage(self.url,coverurl)

View file

@ -105,7 +105,6 @@ class FireFlyFansNetSiteAdapter(BaseSiteAdapter):
# to download them one at a time yourself. I'm also setting the status to
# complete
self.add_chapter(self.story.getMetadata('title'), self.url)
self.story.setMetadata('numChapters', 1)
self.story.setMetadata('status', 'Completed')
## some stories do not have a summary listed, so I'm setting it here.

View file

@ -163,7 +163,7 @@ class KakuyomuJpAdapter(BaseSiteAdapter):
titles = []
nestingLevel = 0
newSection = False
for tocNodeRef in info[workKey]['tableOfContents']:
for tocNodeRef in info[workKey]['tableOfContentsV2']:
tocNode = info[tocNodeRef['__ref']]
if tocNode['chapter'] is not None:
@ -197,8 +197,6 @@ class KakuyomuJpAdapter(BaseSiteAdapter):
self.add_chapter(epTitle, epUrl)
newSection = False
self.story.setMetadata('numChapters', numEpisodes)
logger.debug("Story: <%s>", self.story)
return

View file

@ -99,7 +99,7 @@ class LiteroticaSiteAdapter(BaseSiteAdapter):
## apply clean_chapter_titles
def add_chapter(self,chapter_title,url,othermeta={}):
if self.getConfig("clean_chapter_titles"):
storytitle = self.story.getMetadata('title').lower()
storytitle = self.story.getMetadataRaw('title').lower()
chapter_name_type = None
# strip trailing ch or pt before doing the chapter clean.
# doesn't remove from story title metadata
@ -241,7 +241,7 @@ class LiteroticaSiteAdapter(BaseSiteAdapter):
self.story.extendList('eroticatags', [ stripHTML(t).title() for t in soup.select('div#tabpanel-tags a.av_as') ])
if soup.select('div[class^="_widget__tags_"]'):
# logger.debug("tags2")
self.story.extendList('eroticatags', [ stripHTML(t).title() for t in soup.select('div[class^="_widget__tags_"] a[class^="_tags__link_"]') ])
self.story.extendList('eroticatags', [ stripHTML(t).title() for t in soup.select('div[class^="_widget__tags_"] a[class^="_tag_item_"]') ])
# logger.debug(self.story.getList('eroticatags'))
## look first for 'Series Introduction', then Info panel short desc
@ -395,7 +395,7 @@ class LiteroticaSiteAdapter(BaseSiteAdapter):
## Collect tags from series/story page if tags_from_chapters is enabled
if self.getConfig("tags_from_chapters"):
self.story.extendList('eroticatags', [ stripHTML(t['tag']).title() for t in chap['tags'] ])
self.story.extendList('eroticatags', [ unicode(t['tag']).title() for t in chap['tags'] ])
except Exception as e:

View file

@ -162,7 +162,7 @@ class MassEffect2InAdapter(BaseSiteAdapter):
self.story.extendList('authorId', [authorId])
self.story.extendList('authorUrl', [authorUrl])
if not self.story.getMetadata('rating'):
if not self.story.getMetadataRaw('rating'):
ratingTitle = chapter.getRatingTitle()
if ratingTitle:
self.story.setMetadata('rating', ratingTitle)
@ -204,7 +204,6 @@ class MassEffect2InAdapter(BaseSiteAdapter):
self.story.setMetadata('datePublished', datePublished)
self.story.setMetadata('dateUpdated', dateUpdated)
self.story.setMetadata('numWords', unicode(wordCount))
self.story.setMetadata('numChapters', len(chapters))
# Site-specific metadata.
self.story.setMetadata('language', self.SITE_LANGUAGE)

View file

@ -289,7 +289,8 @@ class RoyalRoadAdapter(BaseSiteAdapter):
if img:
cover_url = img['src']
# usually URL is for thumbnail. Try expected URL for larger image, if fails fall back to the original URL
if self.setCoverImage(url,cover_url.replace('/covers-full/', '/covers-large/'))[0].startswith("failedtoload"):
cover_set = self.setCoverImage(url,cover_url.replace('/covers-full/', '/covers-large/'))[0]
if not cover_set or cover_set.startswith("failedtoload"):
self.setCoverImage(url,cover_url)
# some content is show as tables, this will preserve them

View file

@ -197,33 +197,20 @@ class ScribbleHubComAdapter(BaseSiteAdapter): # XXX
# Get the contents list from scribblehub, iterate through and add to chapters
# Can be fairly certain this will not 404 - we know the story id is valid
contents_payload = {"action": "wi_gettocchp",
"strSID": self.story.getMetadata('storyId'),
"strmypostid": 0,
"strFic": "yes"}
# 14/12/22 - Looks like it should follow this format now (below), but still returns a 400
# but not a 403. tested in browser getting rid of all other cookies to try and get a 400 and nopes.
# contents_payload = {"action": "wi_getreleases_pagination",
# "pagenum": 1,
# "mypostid": 421879}
# contents_payload = "action=wi_getreleases_pagination&pagenum=1&mypostid=421879"
contents_payload = {"action": "wi_getreleases_pagination",
"pagenum": -1,
"mypostid": self.story.getMetadata('storyId')}
contents_data = self.post_request("https://www.scribblehub.com/wp-admin/admin-ajax.php", contents_payload)
# logger.debug(contents_data)
contents_soup = self.make_soup(contents_data)
for i in range(1, int(contents_soup.find('ol',{'id':'ol_toc'}).get('count')) + 1):
chapter_url = contents_soup.find('li',{'cnt':str(i)}).find('a').get('href')
chapter_name = contents_soup.find('li',{'cnt':str(i)}).find('a').get('title')
# logger.debug("Found Chapter " + str(i) + ", name: " + chapter_name + ", url: " + chapter_url)
for toca in contents_soup.select('a.toc_a'):
chapter_url = toca['href']
chapter_name = stripHTML(toca)
# logger.debug("Found Chapter: " + chapter_name + ", url: " + chapter_url)
self.add_chapter(chapter_name, chapter_url)
# eFiction sites don't help us out a lot with their meta data
# formating, so it's a little ugly.
# utility method
def defaultGetattr(d,k):
try:

View file

@ -1,144 +0,0 @@
# -*- coding: utf-8 -*-
from __future__ import absolute_import
import logging
logger = logging.getLogger(__name__)
import re
from ..htmlcleanup import stripHTML
from .. import exceptions as exceptions
# py2 vs py3 transition
from .base_adapter import BaseSiteAdapter, makeDate
def getClass():
return SwiOrgRuAdapter
logger = logging.getLogger(__name__)
class SwiOrgRuAdapter(BaseSiteAdapter):
def __init__(self, config, url):
BaseSiteAdapter.__init__(self, config, url)
self.username = "NoneGiven" # if left empty, site doesn't return any message at all.
self.password = ""
self.is_adult=False
storyId = self.parsedUrl.path.split('/',)[3]
self.story.setMetadata('storyId', storyId)
# normalized story URL.
self._setURL('http://' + self.getSiteDomain() + '/mlp-fim/story/'+self.story.getMetadata('storyId'))
# Each adapter needs to have a unique site abbreviation.
self.story.setMetadata('siteabbrev','swiorgru')
# The date format will vary from site to site.
# http://docs.python.org/library/datetime.html#strftime-strptime-behavior
self.dateformat = "%Y.%m.%d"
@staticmethod # must be @staticmethod, don't remove it.
def getSiteDomain():
return 'www.swi.org.ru'
@classmethod
def getSiteExampleURLs(cls):
return "http://" + cls.getSiteDomain() + "/mlp-fim/story/11341/ http://" + cls.getSiteDomain() + "/mlp-fim/story/11341/chapter1.html"
def getSiteURLPattern(self):
return r"http://" + re.escape(self.getSiteDomain() + "/mlp-fim/story/")+r"\d+"
def extractChapterUrlsAndMetadata(self):
url=self.url
logger.debug("URL: "+url)
data = self.get_request(url)
soup = self.make_soup(data)
title = soup.find('h1')
for tag in title.find_all('sup'):
tag.extract()
self.story.setMetadata('title', stripHTML(title.text))
logger.debug("Title: (%s)"%self.story.getMetadata('title'))
author_title = soup.find('strong', string = re.compile(u"Автор: "))
if author_title == None:
raise exceptions.FailedToDownload("Error downloading page: %s! Missing required author_title element!" % url)
author = author_title.next_sibling
self.story.setMetadata('authorId', author.text) # Author's name is unique
self.story.setMetadata('authorUrl','http://'+self.host + author['href'])
self.story.setMetadata('author', author.text)
logger.debug("Author: (%s)"%self.story.getMetadata('author'))
date_pub = soup.find('em', string = re.compile(r'\d{4}.\d{2}.\d{2}'))
if not date_pub == None:
self.story.setMetadata('datePublished', makeDate(date_pub.text, self.dateformat))
rating_label = soup.find('strong', string = re.compile(u"рейтинг:"))
if not rating_label == None:
rating = rating_label.next_sibling.next_sibling
self.story.setMetadata('rating', stripHTML(rating))
if not self.is_adult or self.getConfig("is_adult"):
if "NC-18" in rating:
raise exceptions.AdultCheckRequired(self.url)
characters = soup.find_all('img', src=re.compile(r"/mlp-fim/img/chars/\d+.png"))
logger.debug("numCharacters: (%s)"%str(len(characters)))
for x in range(0,len(characters)):
character=characters[x]
self.story.addToList('characters', character['title'])
if soup.find('font', color = r"green", string = u"завершен"):
self.story.setMetadata('status', 'Completed')
else:
self.story.setMetadata('status', 'In-Progress')
categories_label = soup.find('strong', string = u"категории:")
if not categories_label == None:
categories_element = categories_label.next_sibling.next_sibling
categories = re.findall(r'"(.+?)"', categories_element.text)
for x in range(0, len(categories)):
category=categories[x]
self.story.addToList('category', category)
chapters_header = soup.find('h2', string = re.compile(u"Главы:"))
if chapters_header==None:
raise exceptions.FailedToDownload("Error downloading page: %s! Missing required chapters_header element!" % url)
chapters_table = chapters_header.next_sibling.next_sibling
self.story.setMetadata('language','Russian')
chapters=chapters_table.find_all('a', href=re.compile(r'/mlp-fim/story/'+self.story.getMetadata('storyId')+r"/chapter\d+"))
self.story.setMetadata('numChapters', len(chapters))
logger.debug("numChapters: (%s)"%str(self.story.getMetadata('numChapters')))
for x in range(0,len(chapters)):
chapter=chapters[x]
churl='http://'+self.host+chapter['href']
self.add_chapter(chapter,churl)
# grab the text for an individual chapter.
def getChapterText(self, url):
logger.debug('Getting chapter text from: %s' % url)
soup = self.make_soup(self.get_request(url))
chapter = soup.find('div', {'id' : 'content'})
chapter_header = chapter.find('h1', id = re.compile("chapter"))
if not chapter_header == None:
chapter_header.decompose()
if chapter == None:
raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url)
return self.utf8FromSoup(url,chapter)

View file

@ -255,7 +255,6 @@ class SyosetuComAdapter(BaseSiteAdapter):
numChapters = int(re.sub(r'[^\d]', '', infoSoup.find('span', {'class':'p-infotop-type__allep'}).text.strip()))
oneshot = False
completed = True if noveltype == '完結済' else False
self.story.setMetadata('numChapters', numChapters)
self.story.setMetadata('status', 'Completed' if completed else 'In-Progress')
# Keywords

View file

@ -149,20 +149,20 @@ Some more longer description. "I suck at summaries!" "Better than it sounds!"
# greater than 10, no language or series.
if idnum < 10:
## non-English was changing series sort order which
## confuses me more often than I test other langs.
# langs = {
# 0:"English",
# 1:"Russian",
# 2:"French",
# 3:"German",
# }
# self.story.setMetadata('language',langs[idnum%len(langs)])
self.setSeries('The Great Test',idnum)
self.story.setMetadata('seriesUrl','http://'+self.getSiteDomain()+'/seriesid=1')
elif idnum < 20:
self.setSeries('魔法少女まどか★マギカ',idnum)
self.story.setMetadata('seriesUrl','http://'+self.getSiteDomain()+'/seriesid=1')
elif idnum < 30:
langs = {
0:"English",
1:"Russian",
2:"French",
3:"German",
}
self.story.setMetadata('language',langs[idnum%len(langs)])
if idnum == 0:
self.setSeries("A Nook Hyphen Test "+self.story.getMetadata('dateCreated'),idnum)
self.story.setMetadata('seriesUrl','http://'+self.getSiteDomain()+'/seriesid=0')
@ -491,6 +491,7 @@ Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor
desc = '<div><p>The Great Test Series of '+self.getSiteDomain()+'!</p><p>Now with two lines!</p></div>'
return {'name':'The Great Test',
'desc':desc,
'status':'AStatus',
'urllist':['http://'+self.getSiteDomain()+'?sid=1',
'http://'+self.getSiteDomain()+'?sid=2',
'http://'+self.getSiteDomain()+'?sid=3',

View file

@ -101,7 +101,6 @@ class TouchFluffyTailAdapter(BaseSiteAdapter):
self.story.setMetadata('status', 'Completed')
self.add_chapter(self.story.getMetadata('title'),url)
self.story.setMetadata('numChapters',1)
avrrate = body.find_all('footer', class_='entry-meta')[1].find('em').span.find_all('strong')
averrating = avrrate[1].text

View file

@ -126,11 +126,6 @@ class TrekFanFictionNetSiteAdapter(BaseSiteAdapter):
## url since we can't get the chapter without this, I'm leaving it in.
self.add_chapter(self.story.getMetadata('title'), url)
## I'm going to comment this out, because thereis always only one chapter for each story,
## so this is really not needed
## And I am uncommenting it because the rest of FFF expects
## there to always be numChapters, even if it's one. --Jimm
# getting the rest of the metadata... there isn't much here, and the summary can only be
# gotten on the author's page... so we'll get it to get the information from
adata = self.get_request(self.story.getMetadata('authorUrl'))

View file

@ -199,9 +199,6 @@ class Voracity2EficComAdapter(BaseSiteAdapter):
self.story.setMetadata('series', a.string)
self.story.setMetadata('seriesUrl', urlparse.urljoin(self.BASE_URL, a['href']))
elif key == 'Chapter':
self.story.setMetadata('numChapters', int(value))
elif key == 'Completed':
self.story.setMetadata('status', 'Completed' if value == 'Yes' else 'In-Progress')

View file

@ -670,6 +670,7 @@ try to download.</p>
return url in self.add_img_names
def include_css_urls(self,parenturl,style):
FONT_EXTS = ('ttf','otf','woff','woff2')
# logger.debug("include_css_urls(%s,%s)"%(parenturl,style))
## pass in the style string, will be returned with URLs
## replaced and images will be added.
@ -680,12 +681,16 @@ try to download.</p>
## url('href')
## the pattern will also accept mismatched '/", which is broken CSS.
for style_url in re.findall(r'url\([\'"]?(.*?)[\'"]?\)', style):
logger.debug("Adding style url(%s)"%style_url)
## additional_images don't get processing. Applies
## only to CSS url(), that should be the only time
## additional_images is used.
if self.is_additional_image(style_url):
logger.debug("Skipping sheet style url(%s), in additional_images"%style_url)
continue
if style_url.rsplit('.')[-1].lower() in FONT_EXTS:
logger.debug("Skipping sheet style url(%s), assumed font"%style_url)
continue
logger.debug("Adding style url(%s)"%style_url)
try:
# longdesc(aka origurl) isn't saved anywhere in CSS.
@ -777,7 +782,7 @@ try to download.</p>
(img['src'],longdesc)=self.story.addImgUrl(url,self.img_url_trans(img['src']),fetch,
coverexclusion=self.getConfig('cover_exclusion_regexp'))
if longdesc:
logger.debug("---set longdesc:%s"%longdesc)
# logger.debug("---set longdesc:%s"%longdesc)
img['longdesc'] = longdesc
except AttributeError as ae:
logger.info("Parsing for img tags failed--probably poor input HTML. Skipping img(%s)"%img)
@ -828,7 +833,9 @@ try to download.</p>
## handle identifiers that otherwise appear to be
## selectors themselves. #966
try:
if href[0] == "#" and soup.select_one("[id='%s']"%href[1:]):
# logger.debug("Search for internal link anchor href:(%s)"%href)
if href[0] == "#" and soup.select_one("[id='%s'], [name='%s']"%(href[1:],href[1:])):
# logger.debug("Found internal link anchor href:(%s)"%href)
hrefurl = href
except Exception as e:
logger.debug("Search for internal link anchor failed href:(%s)"%href)

View file

@ -317,8 +317,6 @@ class BaseEfictionAdapter(BaseSiteAdapter):
for val in re.split(r"\s*,\s*", value):
# TODO this should be an official field I guess
self.story.addToList('challenge', val)
elif key == 'Chapters':
self.story.setMetadata('numChapters', int(value))
elif key == 'Rating' or key == 'Rated':
self.story.setMetadata('rating', value)
elif key == 'Word count':
@ -446,7 +444,7 @@ class BaseEfictionAdapter(BaseSiteAdapter):
if sn:
self.story.setMetadata('storynotes', stripHTML(sn))
if not self.story.getMetadata('rating'):
if not self.story.getMetadataRaw('rating'):
self.getRatingFromTOC();
## Chapter URLs

View file

@ -320,7 +320,6 @@ class BaseOTWAdapter(BaseSiteAdapter):
# break epub update.
# Find the chapters:
chapters=soup.find_all('a', href=re.compile(r'/works/'+self.story.getMetadata('storyId')+r"/chapters/\d+$"))
self.story.setMetadata('numChapters',len(chapters))
logger.debug("numChapters: (%s)"%self.story.getMetadata('numChapters'))
if len(chapters)==1:
self.add_chapter(self.story.getMetadata('title'),'https://'+self.host+chapters[0]['href'])

View file

@ -22,6 +22,7 @@ from .base_browsercache import BaseBrowserCache, CACHE_DIR_CONFIG
from .browsercache_simple import SimpleCache
from .browsercache_blockfile import BlockfileCache
from .browsercache_firefox2 import FirefoxCache2
from .browsercache_sqldb import SqldbCache
import logging
logger = logging.getLogger(__name__)
@ -34,12 +35,13 @@ class BrowserCache(object):
def __init__(self, site, getConfig_fn, getConfigList_fn):
"""Constructor for BrowserCache"""
# import of child classes have to be inside the def to avoid circular import error
for browser_cache_class in [SimpleCache, BlockfileCache, FirefoxCache2]:
for browser_cache_class in [SimpleCache, BlockfileCache, FirefoxCache2, SqldbCache]:
self.browser_cache_impl = browser_cache_class.new_browser_cache(site,
getConfig_fn,
getConfigList_fn)
if self.browser_cache_impl is not None:
break
logger.debug("Not using Browser Cache Class %s"%browser_cache_class)
if self.browser_cache_impl is None:
raise BrowserCacheException("%s is not set, or directory does not contain a known browser cache type: '%s'"%
(CACHE_DIR_CONFIG,getConfig_fn(CACHE_DIR_CONFIG)))

View file

@ -90,18 +90,23 @@ class BlockfileCache(BaseChromiumCache):
def is_cache_dir(cache_dir):
"""Return True only if a directory is a valid Cache for this class"""
if not os.path.isdir(cache_dir):
logger.debug("Cache dir not found")
return False
index_path = os.path.join(cache_dir, "index")
if not os.path.isfile(index_path):
logger.debug("index file not found")
return False
with share_open(index_path, 'rb') as index_file:
if struct.unpack('I', index_file.read(4))[0] != INDEX_MAGIC_NUMBER:
logger.debug("index file failed magic number check")
return False
data0_path = os.path.join(cache_dir, "data_0")
if not os.path.isfile(data0_path):
logger.debug("data_0 file not found")
return False
with share_open(data0_path, 'rb') as data0_file:
if struct.unpack('I', data0_file.read(4))[0] != BLOCK_MAGIC_NUMBER:
logger.debug("data_0 failed magic number check")
return False
return True

View file

@ -68,6 +68,7 @@ class FirefoxCache2(BaseBrowserCache):
"""Return True only if a directory is a valid Cache for this class"""
# logger.debug("\n\n1Starting cache check\n\n")
if not os.path.isdir(cache_dir):
logger.debug("Cache dir not found")
return False
## check at least one entry file exists.
for en_fl in glob.iglob(os.path.join(cache_dir, 'entries', '????????????????????????????????????????')):
@ -75,6 +76,7 @@ class FirefoxCache2(BaseBrowserCache):
k = _validate_entry_file(en_fl)
if k is not None:
return True
logger.debug("No valid cache files found")
return False
def make_keys(self,url):

View file

@ -76,15 +76,19 @@ class SimpleCache(BaseChromiumCache):
def is_cache_dir(cache_dir):
"""Return True only if a directory is a valid Cache for this class"""
if not os.path.isdir(cache_dir):
logger.debug("Cache dir not found")
return False
index_file = os.path.join(cache_dir, "index")
if not (os.path.isfile(index_file) and os.path.getsize(index_file) == 24):
if not os.path.isfile(index_file) or os.path.getsize(index_file) > 24:
logger.debug("index file not found or too big(%s)"%os.path.getsize(index_file))
return False
real_index_file = os.path.join(cache_dir, "index-dir", "the-real-index")
if not os.path.isfile(real_index_file):
logger.debug("real_index_file not found")
return False
with share_open(real_index_file, 'rb') as index_file:
if struct.unpack('QQ', index_file.read(16))[1] != THE_REAL_INDEX_MAGIC_NUMBER:
logger.debug("real_index_file failed magic number check")
return False
try:
# logger.debug("\n\nStarting cache check\n\n")
@ -92,9 +96,11 @@ class SimpleCache(BaseChromiumCache):
k = _validate_entry_file(en_fl)
if k is not None:
return True
except SimpleCacheException:
except SimpleCacheException as sce:
# raise
logger.debug(sce)
return False
logger.debug("No valid cache files found")
return False
def get_data_key_impl(self, url, key):

View file

@ -0,0 +1,185 @@
# -*- coding: utf-8 -*-
# Copyright 2026 FanFicFare team
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
from __future__ import absolute_import
import os
import apsw
import ctypes
# note share_open (on windows CLI) is implicitly readonly.
from .share_open import share_open
from .base_chromium import BaseChromiumCache
from .chromagnon import SuperFastHash
import logging
logger = logging.getLogger(__name__)
class SqldbCache(BaseChromiumCache):
"""Class to access data stream in Chrome Disk Sqldb Cache format cache files"""
def __init__(self, *args, **kargs):
"""Constructor for SqldbCache"""
super(SqldbCache,self).__init__(*args, **kargs)
logger.debug("Using SqldbCache")
# def scan_cache_keys(self):
## XXX will impl a scan if and when needed. It's a lot easier
## to peek inside an sqlite
@staticmethod
def is_cache_dir(cache_dir):
"""Return True only if a directory is a valid Cache for this class"""
if not os.path.isdir(cache_dir):
logger.debug("Cache dir not found")
return False
index_path = os.path.join(cache_dir, "index")
if not os.path.isfile(index_path):
logger.debug("index file not found")
return False
sqldb0_path = os.path.join(cache_dir, "sqldb0")
if not os.path.isfile(sqldb0_path):
logger.debug("sqldb0 file not found")
return False
## XXX check schema of db?
return True
## XXX others uses share_open() - will sqlite open work concurrently?
def get_data_key_impl(self, url, key):
"""
returns location, entry age(unix epoch), content-encoding and
raw(compressed) data
"""
location, age, encoding, data = '', None, None, None
qstr = 'SELECT last_used, head, blob FROM resources as r join blobs as b on b.res_id=r.res_id where cache_key_hash=?'
cache_key_hash = _key_hash(key)
logger.debug(" key:%s"%key)
logger.debug("cache_key_hash:%s"%cache_key_hash)
## XXX worth optimizing to keep sql conn open?
from ..six.moves.urllib.request import pathname2url
fileuri = os.path.join(self.cache_dir, "sqldb0")# pathname2url()
logger.debug(fileuri)
shareopenVFS = ShareOpenVFS()
logger.debug("VFS available %s"% apsw.vfs_names())
with apsw.Connection("file:"+fileuri+"?immutable=1",
flags=apsw.SQLITE_OPEN_READONLY | apsw.SQLITE_OPEN_URI,
vfs=shareopenVFS.vfs_name
) as db:
logger.debug("db flags:%xd"%db.open_flags)
logger.debug("db vfs:%s"%db.open_vfs)
for last, head, blob in db.execute(qstr,[cache_key_hash]):
row_age = self.make_age(last)
if age and row_age < age:
logger.debug("skipping an older row for same hash")
break
age = row_age
logger.debug("age from last_used:%s"%age)
## cheesy way to pull out the http headers, inspired
## by equal cheese in chromagnon/cacheData.py. Only
## actually care about location &content-encoding,
## ignore the rest.
head = head[head.index(b'HTTP'):]
head = head[:head.index(b'\x00\x00')]
# logger.debug(head)
for line in head.split(b'\0'):
logger.debug(line)
if b'content-encoding' in line.lower():
encoding = line.split(b':')[1].strip().lower()
logger.debug("encoding from header:%s"%encoding)
if b'location' in line.lower():
location = b':'.join(line.split(b':')[1:]).strip()
logger.debug("location from header:%s"%encoding)
## XXX might need entry age from header, too.
## Hoping db last_used is equiv.
data = blob
if data:
return (location, age, encoding, data)
else:
return None
## calculate SuperFashHash, but the sql saved it signed.
def _key_hash(key):
unsigned_hash = SuperFastHash.superFastHash(key)
number = unsigned_hash & 0xFFFFFFFF
return ctypes.c_int32(number).value
class ShareOpenVFS(apsw.VFS):
def __init__(self):
self.vfs_name = 'shareopen'
super().__init__(name=self.vfs_name, base='')
def xAccess(self, pathname, flags):
return True
def xFullPathname(self, filename):
return filename
def xDelete(self, filename, syncdir):
logger.debug("xDelete NOT DELETING")
pass
def xOpen(self, name, flags):
return ShareOpenVFSFile(name, flags)
class ShareOpenVFSFile:
def __init__(self, name, flags):
self.filename = name.filename() if isinstance(name, apsw.URIFilename) else name
self.filename = os.path.normpath(self.filename)
logger.debug("Doing share open(%s)"%self.filename)
self.file = share_open(self.filename, 'rb')
def xRead(self, amount, offset):
self.file.seek(offset, 0)
return self.file.read(amount)
def xFileSize(self):
return os.stat(self.filename).st_size
def xClose(self):
self.file.close()
def xSectorSize(self):
return 0
def xFileControl(self, *args):
return False
def xCheckReservedLock(self):
return False
def xLock(self, level):
pass
def xUnlock(self, level):
pass
def xSync(self, flags):
return True
def xTruncate(self, newsize):
logger.debug("xTruncate NOT TRUNCING")
pass
def xWrite(self, data, offset):
logger.debug("xWrite NOT WRITING")
pass

View file

@ -27,8 +27,7 @@ import pprint
import string
import os, sys, platform
version="4.54.0"
version="4.57.7"
os.environ['CURRENT_VERSION_ID']=version
global_cache = 'global_cache'
@ -51,6 +50,8 @@ from fanficfare.geturls import get_urls_from_page, get_urls_from_imap
from fanficfare.six.moves import configparser
from fanficfare.six import text_type as unicode
from fanficfare.fff_profile import do_cprofile
def write_story(config, adapter, writeformat,
metaonly=False, nooutput=False,
outstream=None):
@ -346,6 +347,7 @@ def main(argv=None,
dispatch(options, urls, passed_defaultsini, passed_personalini, warn, fail)
# make rest a function and loop on it.
@do_cprofile
def do_download(arg,
options,
passed_defaultsini,

View file

@ -139,19 +139,6 @@ def get_valid_sections():
allowedsections.append('%s:%s'%(section,f))
return allowedsections
def get_valid_list_entries():
return list(['category',
'genre',
'characters',
'ships',
'warnings',
'extratags',
'author',
'authorId',
'authorUrl',
'lastupdate',
])
boollist=['true','false']
base_xenforo2_list=['base_xenforo2forum',
'forums.sufficientvelocity.com',
@ -188,7 +175,7 @@ def get_valid_set_options():
This is to further restrict keywords to certain sections and/or
values. get_valid_keywords() below is the list of allowed
keywords. Any keyword listed here must also be listed there.
keywords. Any keyword not listed here must be listed there.
This is what's used by the code when you save personal.ini in
plugin that stops and points out possible errors in keyword
@ -343,6 +330,158 @@ def get_valid_set_options():
return dict(valdict)
# *known* keywords -- or rather regexps for them.
def get_valid_keywords():
'''
Among other things, this list is used by the color highlighting in
personal.ini editing in plugin. Note that entries in
get_valid_set_options() do not need to be duplicated here anymore.
'''
return list(get_valid_set_options().keys())+\
['(in|ex)clude_metadata_(pre|post)',
'add_category_when_multi_category',
'add_genre_when_multi_category',
'adult_ratings',
'allow_unsafe_filename',
'always_overwrite',
'anthology_merge_keepsingletocs',
'anthology_tags',
'anthology_title_pattern',
'background_color',
'browser_cache_age_limit',
'chapter_end',
'chapter_start',
'chapter_title_add_pattern',
'chapter_title_addnew_pattern',
'chapter_title_def_pattern',
'chapter_title_error_mark',
'chapter_title_new_pattern',
'chapter_title_strip_pattern',
'chardet_confidence_limit',
'comma_entries',
'connect_timeout',
'continue_on_chapter_error_try_limit',
'convert_images_to',
'cover_content',
'cover_exclusion_regexp',
'cover_min_size',
'custom_columns_settings',
'dateCreated_format',
'datePublished_format',
'dateUpdated_format',
'datethreadmark_format',
'default_cover_image',
'description_limit',
'epub_version',
'exclude_editor_signature',
'exclude_notes',
'extra_logpage_entries',
'extra_subject_tags',
'extra_titlepage_entries',
'extra_valid_entries',
'extracategories',
'extracharacters',
'extragenres',
'extraships',
'extratags',
'extrawarnings',
'fail_on_password',
'file_end',
'file_start',
'fileformat',
'find_chapters',
'fix_pseudo_html',
'flaresolverr_proxy_address',
'flaresolverr_proxy_port',
'flaresolverr_proxy_protocol',
'flaresolverr_proxy_timeout',
'flaresolverr_session',
'force_cover_image',
'force_img_self_referer_regexp',
'force_login',
'generate_cover_settings',
'http_proxy',
'https_proxy',
'ignore_chapter_url_list',
'image_max_size',
'include_subject_tags',
'join_string_authorHTML',
'keep_empty_tags',
'keep_html_attrs',
'keep_summary_html',
'logpage_end',
'logpage_entries',
'logpage_entry',
'logpage_start',
'logpage_update_end',
'logpage_update_start',
'make_directories',
'make_linkhtml_entries',
'max_fg_sleep',
'max_fg_sleep_at_downloads',
'max_zalgo',
'min_fg_sleep',
'no_image_processing_regexp',
'nsapa_proxy_address',
'nsapa_proxy_port',
'order_threadmarks_by_date_categories',
'output_css',
'output_filename',
'output_filename_safepattern',
'password',
'post_process_cmd',
'rating_titles',
'reader_posts_per_page',
'remove_tags',
'remove_transparency',
'replace_chapter_text',
'replace_metadata',
'replace_tags_with_spans',
'replace_xbr_with_hr',
'show_spoiler_tags',
'skip_threadmarks_categories',
'slow_down_sleep_time',
'sort_ships_splits',
'strip_chapter_numeral',
'threadmark_category_order',
'threadmarks_per_page',
'title_chapter_range_pattern',
'titlepage_end',
'titlepage_entries',
'titlepage_entry',
'titlepage_no_title_entry',
'titlepage_start',
'titlepage_wide_entry',
'tocpage_end',
'tocpage_entry',
'tocpage_start',
'user_agent',
'username',
'website_encodings',
'wide_titlepage_entries',
'wrap_width',
'zip_filename',
'zip_output'
]
# *known* entry keywords -- or rather regexps for them.
def get_valid_entry_keywords():
return list(['%s_(label|format)',
'(default_value|include_in|join_string|keep_in_order)_%s',])
def get_valid_list_entries():
return list(['category',
'genre',
'characters',
'ships',
'warnings',
'extratags',
'author',
'authorId',
'authorUrl',
'lastupdate',
])
def get_valid_scalar_entries():
return list(['series',
'seriesUrl',
@ -382,263 +521,20 @@ def get_valid_scalar_entries():
def get_valid_entries():
return get_valid_list_entries() + get_valid_scalar_entries()
# *known* keywords -- or rather regexps for them.
def get_valid_keywords():
'''
Among other things, this list is used by the color highlighting in
personal.ini editing in plugin. Note that it's separate from
value checking and most keywords need to be added to both.
'''
return list(['(in|ex)clude_metadata_(pre|post)',
'add_chapter_numbers',
'add_genre_when_multi_category',
'add_category_when_multi_category',
'adult_ratings',
'allow_unsafe_filename',
'always_overwrite',
'anthology_tags',
'anthology_title_pattern',
'anthology_merge_keepsingletocs',
'background_color',
'bulk_load',
'chapter_end',
'chapter_start',
'chapter_title_strip_pattern',
'chapter_title_def_pattern',
'chapter_title_add_pattern',
'chapter_title_new_pattern',
'chapter_title_addnew_pattern',
'title_chapter_range_pattern',
'mark_new_chapters',
'check_next_chapter',
'meta_from_last_chapter',
'skip_author_cover',
'try_shortened_title_urls',
'collect_series',
'comma_entries',
'connect_timeout',
'convert_images_to',
'cover_content',
'cover_exclusion_regexp',
'custom_columns_settings',
'dateCreated_format',
'datePublished_format',
'dateUpdated_format',
'default_cover_image',
'force_cover_image',
'force_img_self_referer_regexp',
'description_limit',
'do_update_hook',
'use_archived_author',
'use_view_full_work',
'use_workskin',
'always_login',
'exclude_notes',
'remove_authorfootnotes_on_update',
'use_archive_transformativeworks_org',
'use_archiveofourown_gay',
'exclude_editor_signature',
'extra_logpage_entries',
'extra_subject_tags',
'extra_titlepage_entries',
'extra_valid_entries',
'extratags',
'extracategories',
'extragenres',
'extracharacters',
'extraships',
'extrawarnings',
'fail_on_password',
'file_end',
'file_start',
'fileformat',
'find_chapters',
'fix_fimf_blockquotes',
'keep_prequel_in_description',
'scrape_bookshelf',
'include_author_notes',
'force_login',
'generate_cover_settings',
'grayscale_images',
'image_max_size',
'include_images',
'jpg_quality',
'additional_images',
'include_logpage',
'logpage_at_end',
'calibre_series_meta',
'force_update_epub_always',
'page_progression_direction_rtl',
'include_subject_tags',
'include_titlepage',
'include_tocpage',
'chardet_confidence_limit',
'is_adult',
'join_string_authorHTML',
'keep_style_attr',
'keep_title_attr',
'keep_html_attrs',
'remove_class_chapter',
'replace_tags_with_spans',
'keep_empty_tags',
'remove_tags',
'keep_summary_html',
'logpage_end',
'logpage_entries',
'logpage_entry',
'logpage_start',
'logpage_update_end',
'logpage_update_start',
'make_directories',
'make_firstimage_cover',
'use_old_cover',
'make_linkhtml_entries',
'max_fg_sleep',
'max_fg_sleep_at_downloads',
'min_fg_sleep',
'never_make_cover',
'cover_min_size',
'no_image_processing',
'no_image_processing_regexp',
'dedup_img_files',
'convert_inline_images',
'non_breaking_spaces',
'download_text_version',
'nook_img_fix',
'output_css',
'output_filename',
'output_filename_safepattern',
'password',
'post_process_cmd',
'rating_titles',
'remove_transparency',
'replace_br_with_p',
'replace_chapter_text',
'replace_hr',
'remove_empty_p',
'replace_xbr_with_hr',
'replace_metadata',
'slow_down_sleep_time',
'sort_ships',
'sort_ships_splits',
'strip_chapter_numbers',
'strip_chapter_numeral',
'strip_text_links',
'centeredcat_to_characters',
'pairingcat_to_characters_ships',
'romancecat_to_characters_ships',
'use_meta_keywords',
'clean_chapter_titles',
'conditionals_use_lists',
'description_in_chapter',
'order_chapters_by_date',
'fetch_stories_from_api',
'tags_from_chapters',
'dates_from_chapters',
'include_chapter_descriptions_in_summary',
'inject_chapter_title',
'inject_chapter_image',
'append_datepublished_to_storyurl',
'auto_sub',
'titlepage_end',
'titlepage_entries',
'titlepage_entry',
'titlepage_no_title_entry',
'titlepage_start',
'titlepage_use_table',
'titlepage_wide_entry',
'tocpage_end',
'tocpage_entry',
'tocpage_start',
'tweak_fg_sleep',
'universe_as_series',
'use_ssl_unverified_context',
'use_ssl_default_seclevelone',
'http_proxy',
'https_proxy',
'use_cloudscraper',
'use_basic_cache',
'use_browser_cache',
'use_browser_cache_only',
'open_pages_in_browser',
'use_nsapa_proxy',
'nsapa_proxy_address',
'nsapa_proxy_port',
'use_flaresolverr_proxy',
'flaresolverr_proxy_address',
'flaresolverr_proxy_port',
'flaresolverr_proxy_protocol',
'flaresolverr_proxy_timeout',
'use_flaresolverr_session',
'flaresolverr_session',
'browser_cache_path',
'browser_cache_age_limit',
'user_agent',
'username',
'website_encodings',
'wide_titlepage_entries',
'windows_eol',
'wrap_width',
'zip_filename',
'zip_output',
'capitalize_forumtags',
'continue_on_chapter_error',
'chapter_title_error_mark',
'continue_on_chapter_error_try_limit',
'minimum_threadmarks',
'first_post_title',
'always_include_first_post',
'always_reload_first_chapter',
'always_use_forumtags',
'use_reader_mode',
'author_avatar_cover',
'reader_posts_per_page',
'threadmarks_per_page',
'remove_spoilers',
'legend_spoilers',
'details_spoilers',
'apocrypha_to_omake',
'skip_threadmarks_categories',
'fix_relative_text_links',
'normalize_text_links',
'internalize_text_links',
'replace_failed_smilies_with_alt_text',
'use_threadmark_wordcounts',
'always_include_first_post_chapters',
'threadmark_category_order',
'order_threadmarks_by_date',
'order_threadmarks_by_date_categories',
'reveal_invisible_text',
'use_threadmarks_description',
'use_threadmarks_status',
'use_threadmarks_cover',
'skip_sticky_first_posts',
'include_dice_rolls',
'include_nonauthor_poster',
'link_embedded_media',
'include_chapter_banner_images',
'dateUpdated_method',
'datethreadmark_format',
'fix_pseudo_html',
'fix_excess_space',
'dedup_order_chapter_list',
'ignore_chapter_url_list',
'include_appendices',
'dedup_chapter_list',
'show_timestamps',
'show_nsfw_cover_images',
'show_spoiler_tags',
'max_zalgo',
'decode_emails',
'epub_version',
'prepend_section_titles',
'replace_text_formatting',
])
# *known* entry keywords -- or rather regexps for them.
def get_valid_entry_keywords():
return list(['%s_(label|format)',
'(default_value|include_in|join_string|keep_in_order)_%s',])
## Metadata entries that are not allowed to be changed.
def get_immutable_entries():
return list([
'authorId',
'authorUrl',
'storyId',
'storyUrl',
'langcode',
'numChapters',
'site',
'anthology',
'newforanthology',
'cover_image',
])
# Moved here for test_config.
def make_generate_cover_settings(param):
@ -706,9 +602,13 @@ class Configuration(ConfigParser):
self.listTypeEntries = get_valid_list_entries()
self.validEntries = get_valid_entries()
self.immutableEntries = get_immutable_entries()
self.url_config_set = False
## to improve performance, cache config values.
self.cached_config = {}
def section_url_names(self,domain,section_url_f):
## domain is passed as a method to limit the damage if/when an
## adapter screws up _section_url
@ -750,6 +650,12 @@ class Configuration(ConfigParser):
def getValidMetaList(self):
return self.validEntries + self.getConfigList("extra_valid_entries")
def isImmutableMetaEntry(self, key):
return key in self.getImmutableMetaList()
def getImmutableMetaList(self):
return self.immutableEntries
# used by adapters & writers, non-convention naming style
def hasConfig(self, key):
return self.has_config(self.sectionslist, key)
@ -780,6 +686,10 @@ class Configuration(ConfigParser):
return self.get_config(self.sectionslist,key,default)
def get_config(self, sections, key, default=""):
try:
return self.cached_config[(tuple(sections),key)]
except KeyError as ke:
pass
val = default
val_files = []
@ -824,6 +734,7 @@ class Configuration(ConfigParser):
except (configparser.NoOptionError, configparser.NoSectionError) as e:
pass
self.cached_config[(tuple(sections),key)] = val
return val
# split and strip each.
@ -1218,6 +1129,9 @@ class Configurable(object):
def isValidMetaEntry(self, key):
return self.configuration.isValidMetaEntry(key)
def isImmutableMetaEntry(self, key):
return self.configuration.isImmutableMetaEntry(key)
def getValidMetaList(self):
return self.configuration.getValidMetaList()

View file

@ -1592,18 +1592,13 @@ chaptertitles:Prologue,Chapter 1\, Xenos on Cinnabar,Chapter 2\, Sinmay on Kinti
[adult-fanfiction.org]
use_basic_cache:true
extra_valid_entries:eroticatags,disclaimer
eroticatags_label:Erotica Tags
disclaimer_label:Disclaimer
extra_titlepage_entries:eroticatags,disclaimer
## Some sites require login (or login for some rated stories) The
## program can prompt you, or you can save it in config. In
## commandline version, this should go in your personal.ini, not
## defaults.ini.
#username:YourName
#password:yourpassword
[althistory.com]
## Note this is NOT the same as www.alternatehistory.com
## see [base_xenforoforum]
@ -1717,13 +1712,13 @@ make_linkhtml_entries:series00,series01,series02,series03,collections
## hardcoded to include the site specific metadata freeformtags &
## ao3categories in the standard metadata field genre. By making it
## configurable, users can change it.
include_in_genre: freeformtags, ao3categories
include_in_genre: genre, freeformtags, ao3categories
## AO3 uses the word 'category' differently than most sites. The
## adapter used to be hardcoded to include the site specific metadata
## fandom in the standard metadata field category. By making it
## configurable, users can change it.
include_in_category:fandoms
include_in_category:category,fandoms
## freeformtags was previously typo'ed as freefromtags. This way,
## freefromtags will still work for people who've used it.
@ -1932,7 +1927,7 @@ make_linkhtml_entries:translators,betas
## For most sites, 'category' is the fandom, but fanfics.me has
## fandoms and a separate category. By making it configurable, users
## can change it.
include_in_category:fandoms
include_in_category:category,fandoms
[fanfictalk.com]
use_basic_cache:true
@ -2708,13 +2703,13 @@ make_linkhtml_entries:series00,series01,series02,series03,collections
## hardcoded to include the site specific metadata freeformtags &
## ao3categories in the standard metadata field genre. By making it
## configurable, users can change it.
include_in_genre: freeformtags, ao3categories
include_in_genre: genre, freeformtags, ao3categories
## OTW uses the word 'category' differently than most sites. The
## adapter used to be hardcoded to include the site specific metadata
## fandom in the standard metadata field category. By making it
## configurable, users can change it.
include_in_category:fandoms
include_in_category:category,fandoms
## freeformtags was previously typo'ed as freefromtags. This way,
## freefromtags will still work for people who've used it.
@ -3015,13 +3010,13 @@ make_linkhtml_entries:series00,series01,series02,series03,collections
## hardcoded to include the site specific metadata freeformtags &
## ao3categories in the standard metadata field genre. By making it
## configurable, users can change it.
include_in_genre: freeformtags, ao3categories
include_in_genre: genre, freeformtags, ao3categories
## OTW uses the word 'category' differently than most sites. The
## adapter used to be hardcoded to include the site specific metadata
## fandom in the standard metadata field category. By making it
## configurable, users can change it.
include_in_category:fandoms
include_in_category:category,fandoms
## freeformtags was previously typo'ed as freefromtags. This way,
## freefromtags will still work for people who've used it.
@ -3150,8 +3145,8 @@ bookmarkmemo_label:ブックマークメモ
bookmarkprivate_label:非公開ブックマーク
subscribed_label:更新通知
include_in_genre: fullgenre
#include_in_genre: biggenre, smallgenre
include_in_genre: genre, fullgenre
#include_in_genre: genre, biggenre, smallgenre
## adds to titlepage_entries instead of replacing it.
#extra_titlepage_entries: fullgenre,biggenre,smallgenre,imprint,freeformtags,comments,reviews,bookmarks,ratingpoints,overallpoints,bookmarked,bookmarkcategory,bookmarkmemo,bookmarkprivate,subscribed
@ -3394,13 +3389,13 @@ make_linkhtml_entries:series00,series01,series02,series03,collections
## hardcoded to include the site specific metadata freeformtags &
## ao3categories in the standard metadata field genre. By making it
## configurable, users can change it.
include_in_genre: freeformtags, ao3categories
include_in_genre: genre, freeformtags, ao3categories
## OTW uses the word 'category' differently than most sites. The
## adapter used to be hardcoded to include the site specific metadata
## fandom in the standard metadata field category. By making it
## configurable, users can change it.
include_in_category:fandoms
include_in_category:category,fandoms
## freeformtags was previously typo'ed as freefromtags. This way,
## freefromtags will still work for people who've used it.
@ -3531,7 +3526,7 @@ upvotes_label:Upvotes
subscribers_label:Subscribers
views_label:Views
include_in_category:tags
include_in_category:category,tags
#extra_titlepage_entries:upvotes,subscribers,views
@ -3667,13 +3662,13 @@ make_linkhtml_entries:series00,series01,series02,series03,collections
## hardcoded to include the site specific metadata freeformtags &
## ao3categories in the standard metadata field genre. By making it
## configurable, users can change it.
include_in_genre: freeformtags, ao3categories
include_in_genre: genre, freeformtags, ao3categories
## OTW uses the word 'category' differently than most sites. The
## adapter used to be hardcoded to include the site specific metadata
## fandom in the standard metadata field category. By making it
## configurable, users can change it.
include_in_category:fandoms
include_in_category:category,fandoms
## freeformtags was previously typo'ed as freefromtags. This way,
## freefromtags will still work for people who've used it.
@ -4406,9 +4401,6 @@ extracategories:Buffy: The Vampire Slayer
extracharacters:Buffy, Spike
extraships:Spike/Buffy
[www.swi.org.ru]
use_basic_cache:true
[www.the-sietch.com]
## see [base_xenforoforum]

View file

@ -20,25 +20,9 @@ from .six import ensure_text, text_type as unicode
from .six import string_types as basestring
from io import BytesIO
# from io import StringIO
# import cProfile, pstats
# from pstats import SortKey
# def do_cprofile(func):
# def profiled_func(*args, **kwargs):
# profile = cProfile.Profile()
# try:
# profile.enable()
# result = func(*args, **kwargs)
# profile.disable()
# return result
# finally:
# # profile.sort_stats(SortKey.CUMULATIVE).print_stats(20)
# s = StringIO()
# sortby = SortKey.CUMULATIVE
# ps = pstats.Stats(profile, stream=s).sort_stats(sortby)
# ps.print_stats(20)
# print(s.getvalue())
# return profiled_func
FONT_EXTS = ('ttf','otf','woff','woff2')
from fanficfare.fff_profile import do_cprofile
import bs4
@ -49,9 +33,52 @@ def get_dcsource_chaptercount(inputio):
## getsoups=True to check for continue_on_chapter_error chapters.
return get_update_data(inputio,getfilecount=True,getsoups=True)[:2] # (source,filecount)
def get_cover_data(inputio):
# (oldcoverhtmlhref,oldcoverhtmltype,oldcoverhtmldata,oldcoverimghref,oldcoverimgtype,oldcoverimgdata)
return get_update_data(inputio,getfilecount=True,getsoups=False)[4]
## only finds and returns cover image type and data, not cover page.
## should work on any epub. Added for anthology cover issues.
def get_cover_img(inputio):
# (oldcoverimgtype,oldcoverimgdata)
epub = ZipFile(inputio, 'r') # works equally well with inputio as a path or a blob
## Find the .opf file.
container = epub.read("META-INF/container.xml")
containerdom = parseString(container)
rootfilenodelist = containerdom.getElementsByTagName("rootfile")
rootfilename = rootfilenodelist[0].getAttribute("full-path")
contentdom = parseString(epub.read(rootfilename))
firstmetadom = contentdom.getElementsByTagName("metadata")[0]
## Save the path to the .opf file--hrefs inside it are relative to it.
relpath = get_path_part(rootfilename)
# logger.debug("relpath:%s"%relpath)
# <meta name="cover" content="cover"/>
coverid = None
covertype = None
coverdata = None
for metatag in firstmetadom.getElementsByTagName("meta"):
if metatag.getAttribute('name') == 'cover':
coverid = metatag.getAttribute('content')
# logger.debug("coverid:%s"%coverid)
break
if coverid:
for item in contentdom.getElementsByTagName("item"):
if item.getAttribute('id') == coverid:
coverhref = relpath+item.getAttribute("href")
## remove .. and the part it obviates
coverhref = re.sub(r"([^/]+/\.\./)","",coverhref)
covertype = item.getAttribute('media-type')
# logger.debug("covertype:%s coverhref:%s"%(covertype,coverhref))
try:
coverdata = epub.read(coverhref)
# logger.debug("coverdatalen:%s"%len(coverdata))
except Exception as e:
logger.info("Failed to read cover (%s): %s"%(coverhref,e))
covertype, coverdata = None, None
break
return covertype, coverdata
def get_oldcover(epub,relpath,contentdom,item):
href=relpath+item.getAttribute("href")
@ -156,7 +183,11 @@ def get_update_data(inputio,
# (_u\d+)? is from calibre convert naming files
# 3/OEBPS/file0005_u3.xhtml etc.
if getsoups:
soup = make_soup(epub.read(href).decode("utf-8"))
try:
soup = make_soup(epub.read(href).decode("utf-8"))
except:
logger.warning("Listed chapter file(%s) not found in epub, skipping."%href)
continue
for img in soup.find_all('img'):
newsrc=''
longdesc=''
@ -191,6 +222,9 @@ def get_update_data(inputio,
for style_url in re.findall(r'url\([\'"]?(.*?)[\'"]?\)', style):
if style_url.startswith('failedtoload'):
continue
if style_url.rsplit('.')[-1].lower() in FONT_EXTS:
logger.debug("Skipping sheet style url(%s), assumed font"%style_url)
continue
logger.debug("Updating inline/embedded style url(%s)"%style_url)
newsrc=''
longdesc=''
@ -257,11 +291,18 @@ def get_update_data(inputio,
## update. output_css is configured, but 'extra_css' like
## otw workskin might vary.
if item.getAttribute("media-type") == "text/css" and getsoups:
style = epub.read(href).decode("utf-8")
try:
style = epub.read(href).decode("utf-8")
except:
logger.warning("Listed CSS file(%s) not found in epub, skipping."%href)
continue
if 'url(' in style:
# logger.debug("%s CSS url:%s"%(href,style))
## the pattern will also accept mismatched '/", which is broken CSS.
for style_url in re.findall(r'url\([\'"]?(.*?)[\'"]?\)', style):
if style_url.rsplit('.')[-1].lower() in FONT_EXTS:
logger.debug("Skipping sheet style url(%s), assumed font"%style_url)
continue
logger.debug("Updating sheet style url(%s)"%style_url)
newsrc=''
longdesc=''
@ -288,10 +329,18 @@ def get_update_data(inputio,
for item in contentdom.getElementsByTagName("item"):
href=relpath+item.getAttribute("href")
if item.getAttribute("media-type").startswith("image/") and getsoups:
if oldcover and href == oldcover[3]:
# don't include cover image, already handled by
# oldcover code and can trip de-dup unintentionally.
continue
img_url = href.replace("OEBPS/","")
# logger.debug("-->img img:%s"%img_url)
if img_url not in images:
data = epub.read(href)
try:
data = epub.read(href)
except:
logger.warning("Listed image file(%s) not found in epub, skipping."%href)
continue
# logger.debug("-->img Add oldimages:%s"%href)
images[img_url] = (img_url, data)
try:
@ -383,7 +432,7 @@ def get_story_url_from_zip_html(inputio,_is_good_url=None):
return ahref
return None
# @do_cprofile
@do_cprofile
def reset_orig_chapters_epub(inputio,outfile):
inputepub = ZipFile(inputio, 'r') # works equally well with a path or a blob
@ -436,28 +485,50 @@ def reset_orig_chapters_epub(inputio,outfile):
if re.match(r'.*/file\d+\.xhtml',zf):
#logger.debug("zf:%s"%zf)
data = data.decode('utf-8')
# should be re-reading an FFF file, single soup should
# be good enough and halve processing time.
soup = make_soup(data,dblsoup=False)
chapterorigtitle = None
tag = soup.find('meta',{'name':'chapterorigtitle'})
if tag:
chapterorigtitle = tag['content']
## For higher performance checking, don't need to
## make_soup if not different
header = data[0:data.find("</head>")]
'''
<meta name="chapterorigtitle" content="8. Chapter 7" />
<meta name="chaptertoctitle" content="8. Chapter 7" />
<meta name="chaptertitle" content="8. (new) Chapter 7" />
'''
# logger.debug(header)
def get_meta_content(n,d):
m = re.match(r'.*<meta( name="%s"| content="(?P<found>[^"]+))+".*'%n,d,re.DOTALL)
if m:
# logger.debug("%s -> %s"%(n,m.groupdict().get('found',None)))
return m.groupdict().get('found',None)
# toctitle is separate for add_chapter_numbers:toconly users.
chaptertoctitle = None
tag = soup.find('meta',{'name':'chaptertoctitle'})
if tag:
chaptertoctitle = tag['content']
else:
chaptertoctitle = chapterorigtitle
chapterorigtitle = get_meta_content('chapterorigtitle',header)
chaptertoctitle =get_meta_content('chaptertoctitle',header)
chaptertitle = get_meta_content('chaptertitle',header)
chaptertitle = None
tag = soup.find('meta',{'name':'chaptertitle'})
if tag:
chaptertitle = tag['content']
chaptertitle_tag = tag
if not (chapterorigtitle and chaptertoctitle and chaptertitle \
and chapterorigtitle == chaptertitle):
# should be re-reading an FFF file, single soup should
# be good enough and halve processing time.
soup = make_soup(data,dblsoup=False)
chapterorigtitle = None
tag = soup.find('meta',{'name':'chapterorigtitle'})
if tag:
chapterorigtitle = tag['content']
# toctitle is separate for add_chapter_numbers:toconly users.
chaptertoctitle = None
tag = soup.find('meta',{'name':'chaptertoctitle'})
if tag:
chaptertoctitle = tag['content']
else:
chaptertoctitle = chapterorigtitle
chaptertitle = None
tag = soup.find('meta',{'name':'chaptertitle'})
if tag:
chaptertitle = tag['content']
chaptertitle_tag = tag
#logger.debug("chaptertitle:(%s) chapterorigtitle:(%s)"%(chaptertitle, chapterorigtitle))
if chaptertitle and chapterorigtitle and chapterorigtitle != chaptertitle:

View file

@ -148,3 +148,12 @@ class HTTPErrorFFF(Exception):
class BrowserCacheException(Exception):
pass
class NotGoingToDownload(Exception):
def __init__(self,error,icon='dialog_error.png',showerror=True):
self.error=error
self.icon=icon
self.showerror=showerror
def __str__(self):
return self.error

44
fanficfare/fff_profile.py Normal file
View file

@ -0,0 +1,44 @@
# Copyright 2026 FanFicFare team
#
# Licensed under the Apache License, Version 2.0 (the 'License');
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an 'AS IS' BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
## not compatibly with py2, SortKey not available.
import sys
DO_PROFILING = False
if DO_PROFILING and sys.version_info >= (3, 7):
from io import StringIO
import cProfile, pstats
from pstats import SortKey
def do_cprofile(func):
def profiled_func(*args, **kwargs):
profile = cProfile.Profile()
try:
profile.enable()
result = func(*args, **kwargs)
profile.disable()
return result
finally:
# profile.print_stats()
s = StringIO()
sortby = SortKey.CUMULATIVE
ps = pstats.Stats(profile, stream=s).sort_stats(sortby)
ps.print_stats(20)
print(s.getvalue())
return profiled_func
else:
## no-nothing for py2
def do_cprofile(func):
def profiled_func(*args, **kwargs):
return func(*args, **kwargs)
return profiled_func

View file

@ -80,8 +80,10 @@ try:
def convert_image(url,data,sizes,grayscale,
removetrans,imgtype="jpg",background='#ffffff',jpg_quality=95):
# logger.debug("calibre convert_image called")
if url.lower().endswith('.svg') or '.svg?' in url.lower():
## I can just see somebody doing logo_svg.jpg
if url.lower().endswith('.svg') or '.svg?' in url.lower() \
or ensure_binary('<svg ') in data[:1000] \
or ensure_binary('xmlns="http://www.w3.org/2000/svg"') in data[:1000]:
raise exceptions.RejectImage("Calibre image processing chokes on SVG images.")
export = False
img, format = image_and_format_from_data(data)
@ -656,7 +658,7 @@ class ImageStore:
if failure:
info['newsrc'] = 'failedtoload'
info['actuallyused'] = False
logger.debug("add_img(%s,%s,%s,%s,%s,used:%s)"%(url,ext,mime,uuid,info['newsrc'],info['actuallyused']))
# logger.debug("add_img(%s,%s,%s,%s,%s,used:%s)"%(url,ext,mime,uuid,info['newsrc'],info['actuallyused']))
return info
def cache_failed_url(self,url):
@ -914,7 +916,7 @@ class Story(Requestable):
if key == "language":
try:
# getMetadata not just self.metadata[] to do replace_metadata.
self.setMetadata('langcode',langs[self.getMetadata(key)])
self.setMetadata('langcode',langs[self.getMetadataRaw(key)])
except:
self.setMetadata('langcode','en')
@ -1138,6 +1140,9 @@ class Story(Requestable):
removeallentities=False,
doreplacements=True,
seen_list={}):
if self.isImmutableMetaEntry(key):
doreplacements = False
# check for a cached value to speed processing
if self.metadata_cache.is_cached_scalar(key,removeallentities,doreplacements):
return self.metadata_cache.get_cached_scalar(key,removeallentities,doreplacements)
@ -1306,6 +1311,9 @@ class Story(Requestable):
#print("getList(%s,%s)"%(listname,includelist))
retlist = []
if self.isImmutableMetaEntry(listname):
doreplacements = False
# check for a cached value to speed processing
if not skip_cache and self.metadata_cache.is_cached_list(listname,removeallentities,doreplacements):
return self.metadata_cache.get_cached_list(listname,removeallentities,doreplacements)
@ -1631,7 +1639,7 @@ class Story(Requestable):
## likely changed to jpg.
(src,data)=oldimgs[url]
ext = src.split('.')[-1]
logger.debug("load_oldimgs:(%s,%s,%s)"%(url,ext,imagetypes[ext]))
# logger.debug("load_oldimgs:(%s,%s,%s)"%(url,ext,imagetypes[ext]))
self.img_store.add_img(url,
ext,
imagetypes[ext],
@ -1738,7 +1746,7 @@ class Story(Requestable):
(data,ext,mime) = no_convert_image(imgurl,
imgdata)
else:
logger.debug("Doing image processing on (%s)"%imgurl)
# logger.debug("Doing image processing on (%s)"%imgurl)
try:
sizes = [ int(x) for x in self.getConfigList('image_max_size',['580', '725']) ]
except Exception as e:

View file

@ -16,7 +16,7 @@ name = "FanFicFare" # Required
#
# For a discussion on single-sourcing the version, see
# https://packaging.python.org/guides/single-sourcing-package-version/
version = "4.54.0"
version = "4.57.7"
# This is a one-line description or tagline of what your project does. This
# corresponds to the "Summary" metadata field: