Compare commits

...

1494 commits
v4.4.0 ... main

Author SHA1 Message Date
Jim Miller
a172a7bd2b Bump Test Version 4.57.7 2026-05-07 13:54:08 -05:00
Jim Miller
ab103dce6e browsercache_sqldb: Better share_open and read-only. #1341 2026-05-07 13:54:02 -05:00
Jim Miller
892e9207f0 Bump Test Version 4.57.6 2026-05-06 19:53:58 -05:00
Jim Miller
b4e392fae1 browsercache_sqldb: Use share_open for windows file locking. #1341 2026-05-06 19:53:44 -05:00
Jim Miller
d9525d9726 Bump Test Version 4.57.5 2026-05-06 13:22:28 -05:00
Jim Miller
cb77b12754 Adding browsercache_sqldb for Yet Another caching scheme in Chrome. #1341 2026-05-06 13:22:22 -05:00
Jim Miller
b41a633821 Bump Test Version 4.57.4 2026-05-05 08:11:07 -05:00
Jim Miller
50c8db2992 browsercache_simple: Tweak index file size check. #1341 2026-05-05 08:10:59 -05:00
Jim Miller
ef6dd99bfe Bump Test Version 4.57.3 2026-05-04 15:05:25 -05:00
Jim Miller
59796ff537 Add debug out to Browser Cache cache dir checking #1341 2026-05-04 15:05:13 -05:00
Jim Miller
8ee0a6e898 Bump Test Version 4.57.2 2026-05-03 09:06:51 -05:00
Jim Miller
c53fc362bd Include genre/category in defaults.ini when include_in_X for extragenres/extracategories 2026-05-03 09:06:44 -05:00
Jim Miller
c87cfc1057 adapter_fanficauthorsnet: Domains changed from .nsns to -nsns 2026-05-01 10:10:37 -05:00
Jim Miller
6ee151c90a Bump Release Version 4.57.0 2026-05-01 09:38:27 -05:00
Jim Miller
db01c828a0 Update translations. 2026-05-01 09:37:13 -05:00
Jim Miller
4d03874f06 Fix a bad comment-out 2026-04-29 15:42:59 -05:00
Jim Miller
36f56483e6 Bump Test Version 4.56.10 2026-04-29 13:01:28 -05:00
Jim Miller
18e45a403b PI Anthology: Reuse epub cover if there is one. 2026-04-29 13:01:22 -05:00
Jim Miller
2e25172ba3 adapter_scribblehubcom: Update ajax call for chapters data. Didn't fix #1339 but change noted 3+ years ago 2026-04-29 10:15:26 -05:00
Jim Miller
65e3fd562b Update translations. 2026-04-27 16:53:06 -05:00
Jim Miller
7089bf6689 Bump Test Version 4.56.9 2026-04-21 15:02:05 -05:00
Jim Miller
061dc1333f PI: Correct Series field url link when setanthologyseries 2026-04-21 15:01:58 -05:00
Jim Miller
0a7fb5c090 Bump Test Version 4.56.8 2026-04-19 14:08:29 -05:00
Jim Miller
cf02f729ae adapter_literotica: Fix for numeric tag value from json. #1336 2026-04-19 14:08:21 -05:00
Jim Miller
730c4f77f9 Bump Test Version 4.56.7 2026-04-19 09:33:07 -05:00
Jim Miller
c02da29cbd Added strings for translation 2026-04-19 09:33:00 -05:00
Jim Miller
b87d796221 PI: Add Fix Series Case setting for #1338 2026-04-19 09:30:15 -05:00
Jim Miller
436370fe5b Done profiling for now 2026-04-19 09:03:10 -05:00
Jim Miller
ac77f31bc2 Move NotGoingToDownload to exceptions.py #1337 2026-04-19 09:02:32 -05:00
Jim Miller
16f2c74e4b Bump Test Version 4.56.6 2026-04-18 13:47:51 -05:00
praschke
af5c2aa0bc adapter_kakuyomujp: site update 2026-04-18 13:47:14 -05:00
Jim Miller
31dec5b62d Bump Test Version 4.56.5 2026-04-18 12:58:56 -05:00
Jim Miller
97d37fcfc1 fix_relative_text_links: Allow hrefs to name anchors as well as id. 2026-04-18 12:58:46 -05:00
Jim Miller
c730aa2f68 Bump Test Version 4.56.4 2026-04-17 10:22:20 -05:00
Jim Miller
4e2e359dee PI Anthologies: Only put status in tags if in include_subject_tags. Closes #1332 2026-04-17 10:22:13 -05:00
Jim Miller
bb96049934 Remove some debug 2026-04-16 14:27:48 -05:00
Jim Miller
84965ef25f Bump Test Version 4.56.3 2026-04-12 21:20:09 -05:00
Jim Miller
348d129a1e adapter_ficwadcom: Detect missing username as well as failed login #1330 2026-04-12 21:05:42 -05:00
Jim Miller
4794e9bc51 Bump Test Version 4.56.2 2026-04-10 21:56:43 -05:00
Jim Miller
d46dc76ae1 Somewhat better consolidated perf profiling 2026-04-10 21:56:43 -05:00
Jim Miller
08bae8d9be Imperfect, but working perf profiling 2026-04-10 16:49:17 -05:00
Jim Miller
405c37aeb5 Remove some dead code. 2026-04-10 16:43:49 -05:00
Jim Miller
270e01c3c7 Cache config values for performance improvement. 2026-04-10 16:24:37 -05:00
Jim Miller
12d57f5950 Bump Test Version 4.56.1 2026-04-06 12:07:14 -05:00
Jim Miller
562b3a4ecd Unnew Perf Improvement w/profiling 2026-04-06 12:07:05 -05:00
Jim Miller
e69045fd98 Bump Release Version 4.56.0 2026-04-02 10:03:42 -05:00
Jim Miller
747bde3394 Update (commented out) profiling code. 2026-04-02 10:02:58 -05:00
Jim Miller
aa00c7ae03 Bump Test Version 4.55.4 2026-03-27 11:54:50 -05:00
Jim Miller
0539f818f3 Add top menu items for Add/Edit Reject URLs. 2026-03-27 11:54:44 -05:00
Jim Miller
41a6f56f44 Remove fanficfare_macmenuhack. 2026-03-27 11:43:53 -05:00
Jim Miller
e3832245e6 Add Reject URLs: Accept story URLs drag/drop & paste like Add Stories by URL 2026-03-27 10:52:30 -05:00
Jim Miller
909b64c83c Remove some image processing debug output 2026-03-27 10:51:29 -05:00
Jim Miller
732f5e2571 Bump Test Version 4.55.3 2026-03-19 13:03:11 -05:00
Jim Miller
d9dd04396e Epub Update: Don't cache cover image with others, trips dedup. 2026-03-19 13:03:03 -05:00
Jim Miller
36e2183d45 Bump Test Version 4.55.2 2026-03-12 15:13:01 -05:00
Jim Miller
040b7205b8 adapter_literotica: Fix for site change (#1318) 2026-03-12 15:11:26 -05:00
Jim Miller
d8ed180eb1 Bump Test Version 4.55.1 2026-03-09 13:04:56 -05:00
Jim Miller
2a6c1e74db Make seriesUrl mutable again. 2026-03-09 13:04:50 -05:00
Jim Miller
b7c8c96153 Put download list at start of BG job too 2026-03-09 13:04:24 -05:00
Jim Miller
a16096592c Bump Release Version 4.55.0 2026-03-01 09:25:11 -06:00
Jim Miller
bb34eecc7c Remove a line of unused code. 2026-02-23 13:08:57 -06:00
Jim Miller
ceed7ef1a8 Bump Test Version 4.54.5 2026-02-10 08:45:34 -06:00
Jim Miller
1d2a887c2d Epub Update: Skip missing chapter, image and css files instead of failing. 2026-02-10 08:45:20 -06:00
Jim Miller
a3f3302312 Plugin only: In Skip mode, don't do initial metadata fetch if already matched in library. #1309 2026-02-10 08:30:02 -06:00
Jim Miller
ecf005b145 Bump Test Version 4.54.4 2026-02-05 16:09:00 -06:00
Jim Miller
3bd074fa2c Additional checks for svg images to reject--Calibre only. Related to #1298 2026-02-05 16:08:54 -06:00
Jim Miller
0fd95daa8e Bump Test Version 4.54.3 2026-02-05 13:46:42 -06:00
Jim Miller
1b57e49d98 Ignore CSS url() when ttf/otf/woff/woff2 font files 2026-02-05 13:46:24 -06:00
Jim Miller
db0d39c9cd Bump Test Version 4.54.2 2026-02-02 13:12:56 -06:00
Jim Miller
cbde66cf41 adapter_fimfictionnet/adapter_royalroadcom: Better handling of cover image size fall back #1306 2026-02-02 13:12:42 -06:00
Jim Miller
17331e9eb3 Bump Test Version 4.54.1 2026-02-01 13:51:23 -06:00
Jim Miller
9b96c151a5 adapter_adultfanfictionorg: Fixes for site changes #1305 2026-02-01 13:51:22 -06:00
Jim Miller
1b65a30798 Making some metadata entries immutable 2026-02-01 13:51:22 -06:00
Jim Miller
c9a47877f7 Allow for language getting changed by replace_metadata not breaking langcode 2026-02-01 09:15:31 -06:00
Jim Miller
bdc77ad0f6 Remove Site: swi.org.ru No DNS for site. 2026-02-01 09:15:31 -06:00
Jim Miller
719971c76c Don't set numChapters--it's done automatically. 2026-02-01 09:15:31 -06:00
Jim Miller
c74dba472a Fixes for mutable metadata entries used in code 2026-02-01 09:15:31 -06:00
Jim Miller
c1fb7f0fc5 Refactor metadata entry and settings name code a bit 2026-02-01 09:15:31 -06:00
Jim Miller
94c932cd2f Bump Release Version 4.54.0 2026-02-01 09:04:34 -06:00
Jim Miller
27fb765c0d Update translations. 2026-02-01 09:04:08 -06:00
Jim Miller
06ce46f64a Bump Test Version 4.53.15 2026-01-30 08:52:46 -06:00
Jim Miller
c04d85fa97 Plugin BG settings: Remove 'old' vs 'new' BG handling verbiage 2026-01-29 13:16:56 -06:00
Jim Miller
b6cdc30db5 Bump Test Version 4.53.14 2026-01-29 11:23:03 -06:00
Jim Miller
9bbb5e8b01 adapter_ficbooknet: Change how replace_text_formatting converts to text. 2026-01-29 11:22:40 -06:00
Jim Miller
18ce6e6fba BrowserCache: Add comment about py2 and gzip.decompress 2026-01-29 11:20:42 -06:00
Jim Miller
507910f5da Don't give format section warnings for fix_excess_space 2026-01-29 09:28:45 -06:00
Jim Miller
ccf7801a89 Bump Test Version 4.53.13 2026-01-27 11:24:25 -06:00
Jim Miller
9a52a10626 adapter_ficbooknet: Add replace_text_formatting option to replace CSS paragraphing with tags, for txt output. 2026-01-27 11:24:15 -06:00
Jim Miller
6963153aac adapter_storiesonlinenet: Site changed, get series number from series page now. 2026-01-27 10:10:52 -06:00
Jim Miller
ee357cd5b4 Bump Test Version 4.53.12 2026-01-24 09:32:26 -06:00
Jim Miller
b84e3d2858 adapter_royalroadcom: Fix login failure reporting #1302 2026-01-24 09:32:09 -06:00
Jim Miller
9377fc6671 Bump Test Version 4.53.11 2026-01-22 13:33:43 -06:00
Jim Miller
aaa0fa613a Image Handling: Fix tidy cover caching when no cover. 2026-01-22 13:33:36 -06:00
Jim Miller
eac5acfbfa Bump Test Version 4.53.10 2026-01-22 12:13:37 -06:00
Jim Miller
8dca1ef343 Image Handling: Remove unused images properly with dedup_img_files 2026-01-22 12:11:45 -06:00
Jim Miller
28e8f61cf8 Image Handling: Tidy cover caching 2026-01-22 11:29:20 -06:00
Jim Miller
78abf476ea Image Handling: Rename dedup'ed images on first pass, too. 2026-01-22 11:20:12 -06:00
Jim Miller
2b1f9446dd Bump Test Version 4.53.9 2026-01-20 10:09:19 -06:00
Jim Miller
9815736b4e Fix dedup_img_files - changes <img longdesc= to deduped URL. 2026-01-20 10:09:19 -06:00
Jim Miller
3f54cce9a1 Don't record longdesc on img fails. 2026-01-20 10:09:19 -06:00
Jim Miller
223138b8e5 Image Handling: Cache fails w/in download (but not between), keep full src URL with failedtodownload marker 2026-01-20 10:09:12 -06:00
Jim Miller
4aa47c8bab Bump Test Version 4.53.8 2026-01-15 18:06:47 -06:00
Jim Miller
a97a85f357 epub update: Read all images for oldimgs after reading chapters to keep longdesc=origurl 2026-01-15 18:03:54 -06:00
Jim Miller
ffc3696d84 Bump Test Version 4.53.7 2026-01-15 15:14:38 -06:00
Jim Miller
86c4e1974b Skip CSS url() handling on empty tags by content instead of stripHTML 2026-01-15 15:14:23 -06:00
Jim Miller
b6fd7c2ca4 Fix additional_images 2026-01-15 13:23:01 -06:00
Jim Miller
326300b40e Correct comment. 2026-01-15 13:22:40 -06:00
Jim Miller
282bafe514 Bump Test Version 4.53.6 2026-01-15 12:20:53 -06:00
Jim Miller
061a8feccf CSS url() processing only when include_images:true 2026-01-15 12:20:46 -06:00
Jim Miller
26c9b6d2ce Bump Test Version 4.53.5 2026-01-15 09:10:13 -06:00
Jim Miller
ed02d61953 epubutils: Load all images, not just referenced. uuid5 will still allow use. 2026-01-15 09:10:07 -06:00
Jim Miller
b58d54b8ea Bump Test Version 4.53.4 2026-01-14 16:53:53 -06:00
Jim Miller
1bc3ffc269 base_xenforo2forum_adapter: Add ytimg.com to default cover_exclusion_regexp 2026-01-14 16:53:46 -06:00
Jim Miller
cbd295f911 Bump Test Version 4.53.3 2026-01-14 13:55:33 -06:00
Jim Miller
35653f533f base_xenforo2forum_adapter: Add link_embedded_media option 2026-01-14 13:55:23 -06:00
Jim Miller
ea7afea8c2 Fix XF sites lists in configurable.py 2026-01-14 13:35:51 -06:00
Jim Miller
384a2fe8b7 CSS url() style attr--don't do when tag is empty. 2026-01-14 13:18:51 -06:00
Jim Miller
b278cac620 Bump Test Version 4.53.2 2026-01-13 16:45:35 -06:00
Jim Miller
e23de49fb5 uuid5 converts to bytes but gets unhappy about getting bytes to start on
Calibre?
2026-01-13 16:45:00 -06:00
Jim Miller
f64f041546 Adding CSS url() image inclusion, name all images by uuid5 2026-01-13 14:20:11 -06:00
Jim Miller
1d53c506c9 writer_epub: Pretty print epub meta files 2026-01-13 13:47:56 -06:00
Jim Miller
c8d6ce8004 Add webp as a known image type. 2026-01-13 13:43:57 -06:00
Jim Miller
3f08417c04 writer_epub: Don't dup image ids in content.opf on update with old cover. 2026-01-10 15:16:00 -06:00
Jim Miller
79ebf6a02b Bump Test Version 4.53.1 2026-01-08 10:04:59 -06:00
Jim Miller
41dfb8eab8 base_xenforo2forum_adapter: Fix include_nonauthor_poster: Had left testing conditional 2026-01-08 09:10:40 -06:00
Jim Miller
590b663170 Bump Release Version 4.53.0 2026-01-01 09:18:34 -06:00
Jim Miller
9bb408c8b3 Bump Test Version 4.52.9 2025-12-31 10:01:20 -06:00
Jim Miller
5d6a63a8ca Fix for rare 'false' as INI list corner case 2025-12-31 09:59:53 -06:00
Jim Miller
4078ccfdb1 Bump Test Version 4.52.8 2025-12-29 12:49:57 -06:00
Jim Miller
79c29121c3 writer_epub: Add <spine page-progression-direction=rtl> option as page_progression_direction_rtl 2025-12-29 12:49:40 -06:00
Jim Miller
dea48d9e07 adapter_storiesonlinenet: Improve inject_chapter_title for #1294 2025-12-29 12:25:27 -06:00
Jim Miller
c165196a35 base_xenforo2forum_adapter: Add include_nonauthor_poster option 2025-12-29 12:10:26 -06:00
Jim Miller
c385013db9 adapter_literotica: Remove unused chapter_categories_use_all option, fix other site options for better defaults.ini #1292 2025-12-29 10:48:36 -06:00
Jim Miller
8780aa3105 Bump Test Version 4.52.7 2025-12-26 11:53:04 -06:00
Jim Miller
12c7bfe29c adapter_literotica: Remove unused chapter_categories_use_all option, fix other site options for better defaults.ini #1292 2025-12-26 11:52:51 -06:00
Jim Miller
08d0b8a4e0 Changes for #1292 for normalizing different series URL forms. 2025-12-26 11:45:26 -06:00
Jim Miller
1d401f8dba Bump Test Version 4.52.6 2025-12-20 19:41:22 -06:00
Jim Miller
193bb3ed61 AO3: Site changed 'don't have permission' string 2025-12-20 19:40:54 -06:00
Jim Miller
63fd8cd660 Calc words_added even if not in logpage_entries. 2025-12-14 19:49:45 -06:00
Jim Miller
26a1152390 Bump Test Version 4.52.5 2025-12-11 11:20:26 -06:00
WWeapn
e0907147f7
adapter_literotica: Get series ID from data object (#1290) 2025-12-11 11:20:02 -06:00
Jim Miller
99bba3ff12 Bump Test Version 4.52.4 2025-12-10 09:57:34 -06:00
Jim Miller
3fdb6630fb Remove dup of remove_class_chapter from get_valid_set_options() 2025-12-10 09:57:28 -06:00
dbhmw
0d6b789c9f
adapter_literotica: Add chapter descriptions to summary (#1287) 2025-12-10 09:56:15 -06:00
Jim Miller
edaa03ef42 Bump Test Version 4.52.3 2025-12-07 11:06:43 -06:00
Jim Miller
4e17a10792 adapter_literotica: Don't require tags_from_chapters for old eroticatags collection. From #1280 2025-12-07 11:06:37 -06:00
Jim Miller
9fd48e0168 Bump Test Version 4.52.2 2025-12-04 14:04:35 -06:00
Jim Miller
818e990184 adapter_fictionlive: create self.chapter_id_to_api earlier for normalize_chapterurl 2025-12-04 14:04:24 -06:00
Jim Miller
9bb7b54023 Bump Test Version 4.52.1 2025-12-04 09:26:48 -06:00
Jim Miller
af6695e27f adapter_literotica: Fix for one-shot aver_rating #1285 2025-12-04 09:26:32 -06:00
Jim Miller
46293f2d02 Bump Release Version 4.52.0 2025-12-01 08:25:22 -06:00
Jim Miller
7f968ba102 Bump Test Version 4.51.7 2025-11-30 11:02:57 -06:00
Jim Miller
1e5cb9b184 Update translations. 2025-11-30 11:02:30 -06:00
Jim Miller
9627e6e62c Remove site: www.wuxiaworld.xyz - DN parked somewhere questionable for +2 years 2025-11-30 10:58:18 -06:00
Jim Miller
5e644098f9 Remove Site: sinful-dreams.com/whispered/muse - broken for 6+ years even though other two sites on same DN work 2025-11-30 10:37:42 -06:00
Jim Miller
fa3a56d096 adapter_fanfictionsfr: Site SSL requires www now 2025-11-30 10:33:40 -06:00
Jim Miller
ba18216ef8 Bump Test Version 4.51.6 2025-11-28 12:48:32 -06:00
Jim Miller
f207e31b3b Add standard metadata entry marked_new_chapters for epub updated '(new)' chapters count 2025-11-28 12:48:25 -06:00
Jim Miller
0e1ace18e4 Bump Test Version 4.51.5 2025-11-28 09:05:21 -06:00
Jim Miller
b17a632640 adapter_literotica: fix tags_from_chapters for #1283 2025-11-25 10:48:46 -06:00
Jim Miller
485d4631f9 adapter_literotica: Partial fix for #1283, chapters from JSON fetch 2025-11-24 13:20:38 -06:00
Jim Miller
30929bc38e Better handling for no chapters found (#1283) 2025-11-24 12:24:44 -06:00
Jim Miller
ae4311f4dd Bump Test Version 4.51.4 2025-11-19 09:56:07 -06:00
MacaroonRemarkable
3a3c35ea1f Made it possible to use human-readable URLs in addition to api urls for ignore_chapter_url_list 2025-11-19 09:54:57 -06:00
MacaroonRemarkable
19dd89fb4d Fixed missing setting in plugin defaults 2025-11-19 09:54:57 -06:00
MacaroonRemarkable
b247a7465b Added include_appendices config option for fiction.live 2025-11-19 09:54:57 -06:00
albyofdoom
d5c20db681 Implement Alternate Tagging and Date calculation for Literotica 2025-11-19 09:54:40 -06:00
MacaroonRemarkable
a599ff6ad2 Added missing line to plugin-defaults 2025-11-19 09:54:13 -06:00
MacaroonRemarkable
e21c6604a1 Update QQ reader_posts_per_page default 2025-11-19 09:54:13 -06:00
Jim Miller
273c1931f4 Bump Test Version 4.51.3 2025-11-13 08:27:08 -06:00
Jim Miller
fdf29eeade adapter_royalroadcom: New status Inactive 2025-11-13 08:26:54 -06:00
Jim Miller
06e55728d0 Bump Test Version 4.51.2 2025-11-11 20:09:20 -06:00
Jim Miller
0a3ab4bc9d Fix for add_chapter_numbers:toconly and unnew. Closes #1274 2025-11-11 20:08:57 -06:00
Jim Miller
a4a91b373f Bump Test Version 4.51.1 2025-11-10 08:50:28 -06:00
Jim Miller
a68e771026 Don't issue flaresolverr image warning unless include_images:true 2025-11-10 08:50:11 -06:00
Jim Miller
d7c79fcb3b Bump Release Version 4.51.0 2025-11-07 09:53:24 -06:00
Jim Miller
5cc05ed96d Update translations. 2025-11-07 09:33:20 -06:00
Jim Miller
e5b5768f11 Perf improvement for unnew 2025-11-04 12:20:39 -06:00
Jim Miller
6cf2519ef9 Bump Test Version 4.50.5 2025-11-02 20:09:20 -06:00
Jim Miller
f4f98e0877 Don't include default_cover_image with use_old_cover with a different name. 2025-11-02 20:08:16 -06:00
Jim Miller
bb8fb9efa5 writer_epub: More epub3 - prefix & prop cover-image 2025-11-02 18:38:29 -06:00
Jim Miller
be38778d72 Bump Test Version 4.50.4 2025-11-02 09:50:15 -06:00
Jim Miller
55d8efbdcd writer_epub: Only do svg check for epub3 2025-11-02 09:49:51 -06:00
Jim Miller
9df7822e32 Bump Test Version 4.50.3 2025-11-01 14:12:45 -05:00
Jim Miller
69e6a3d2cf writer_epub: Rearrange to detect and flag files containing svg tags for epub3. 2025-11-01 14:12:40 -05:00
Jim Miller
8ea03be5f3 epub3 - Flag the cover *page*--epub3 only flags cover *img* 2025-11-01 13:03:08 -05:00
Jim Miller
75a213beb9 Find and use epub3 cover on update--relies on Calibre's calibre:title-page property. 2025-11-01 12:48:03 -05:00
Jim Miller
ead830c60a adapter_storiesonlinenet: Set authorUrl to site homepage when (Hidden) author for #1272 2025-11-01 09:09:31 -05:00
Brian
20681315e7
Update adapter_storiesonlinenet.py
Removed extraneous parens on conditional 'if' statements
2025-10-31 22:50:56 -07:00
Brian
e2961eaadf
adapter_storiesonlinenet.py - tolerate contest stories
Contest stories have author="(Hidden)" which breaks the code to get story info from author's page.
Added checks for this and also checks to verify soup actually found results before trying to blindly use the results.
2025-10-31 15:01:45 -07:00
Jim Miller
7f0d7f70be Bump Test Version 4.50.2 2025-10-29 13:48:06 -05:00
dbhmw
c5264c2147 adapter_ficbooknet: Collect numWords 2025-10-29 13:47:46 -05:00
MacaroonRemarkable
ff402c16ca
Preserve original titles for Reader Post blocks from fiction.live (#1269)
* Preserve original titles for Reader Post blocks from fiction.live

* Update adapter_fictionlive.py

Changed for py2 backward compatibility

* Update adapter_fictionlive.py

Switched to concatenation rather than .format

* Update adapter_fictionlive.py

Missing space -_-
2025-10-29 13:47:26 -05:00
Jim Miller
4a9da1c02e Bump Test Version 4.50.1 2025-10-19 22:14:16 -05:00
Jim Miller
c14f1014b8 OTW/AO3: Don't apply series page handling to non-series pages 2025-10-19 22:14:08 -05:00
Jim Miller
74bc398994 Bump Release Version 4.50.0 2025-10-19 19:00:10 -05:00
Jim Miller
6e8e74fc55 Bump Test Version 4.49.6 2025-10-18 09:29:20 -05:00
Jim Miller
68ad4c87aa OTW: Fix for site change breaking logged in detection. Closes #1263 2025-10-18 09:29:14 -05:00
Jim Miller
fe82aed91d Bump Test Version 4.49.5 2025-10-12 09:26:37 -05:00
Jim Miller
7d14bf6e90 base_otw_adapter: Fix for markedforlater site change 2025-10-12 09:26:20 -05:00
Brian
39500a9386 Update adapter_storiesonlinenet.py
Add check for SOL accounts in renewal warning period to verbosely explain to users why their downloads don't work
2025-10-12 09:15:38 -05:00
dbhmw
d5f8891e4f adapter_literotica: Site change, regex outdated. 2025-10-12 09:08:12 -05:00
Jim Miller
edce6949ae Bump Test Version 4.49.4 2025-10-10 11:12:09 -05:00
Jim Miller
bec6fac2ea base_otw_adapter: Use download link for chapter->work conversion #1258 2025-10-10 11:11:58 -05:00
Jim Miller
a9bd19a079 Bump Test Version 4.49.3 2025-10-07 10:35:46 -05:00
Jim Miller
7135ba5892 OTW(AO3): Accept /chapter/999 URLs without /works/999 for #1258 2025-10-07 10:35:38 -05:00
Jim Miller
9ba4c100ca Bump Test Version 4.49.2 2025-10-02 13:38:44 -05:00
Jim Miller
fe565149ba Fix tuple vs grouping vs list, closes #1254 2025-10-02 13:38:26 -05:00
Jim Miller
624f60a5c1 Bump Test Version 4.49.1 2025-10-01 11:55:08 -05:00
Jim Miller
5c79ac0b5c New site: althistory.com (NOT alternatehistory.com) for #1252 2025-10-01 11:55:08 -05:00
Jim Miller
615711f904 Comment some debugs 2025-10-01 11:55:08 -05:00
kilandra
2f77bd9e97 Spiritfanfiction login, closes #1247
Add login functionality to Spiritfanfiction.com
2025-10-01 09:05:09 -05:00
Jim Miller
abdc881812 Bump Release Version 4.49.0 2025-10-01 08:50:15 -05:00
Jim Miller
1ba73bf316 Update translations. 2025-09-30 09:22:34 -05:00
Jim Miller
a359c6b326 adapter_storiesonlinenet: Change page not found error reporting 2025-09-23 10:04:29 -05:00
Jim Miller
ff64356e85 Bump Test Version 4.48.7 2025-09-11 09:09:46 -05:00
Jim Miller
0271b14f6c adapter_literotica: Yet another site change, addresses #1245 2025-09-11 09:09:28 -05:00
Jim Miller
bf845e200f Bump Test Version 4.48.6 2025-09-10 13:47:45 -05:00
Jim Miller
e94ff6e1e8 base_otw: Add collectionsUrl and collectionsHTML metadata--keep in order 2025-09-10 13:47:39 -05:00
Jim Miller
07313d2744 Bump Test Version 4.48.5 2025-09-10 13:40:29 -05:00
Jim Miller
bd2026df7e base_otw: Add collectionsUrl and collectionsHTML metadata 2025-09-10 13:40:23 -05:00
Jim Miller
0fa177ff79 Bump Test Version 4.48.4 2025-09-10 08:40:01 -05:00
Jim Miller
d84c72a215 adapter_literotica: Site change 2025-09-10 08:39:55 -05:00
Jim Miller
c319857da0 Bump Test Version 4.48.3 2025-09-08 21:41:18 -05:00
Jim Miller
df586e9bb7 browsercache_simple: Code for 0 length stream in cache file, only seen in Mac 2025-09-08 21:41:11 -05:00
Jim Miller
354a5708ce Bump Test Version 4.48.2 2025-08-27 11:13:15 -05:00
Jim Miller
096face5d2 Add continue_on_chapter_error_try_limit setting 2025-08-27 11:13:07 -05:00
Jim Miller
02e3bddd5c Bump Test Version 4.48.1 2025-08-22 11:19:06 -05:00
Jim Miller
9dadef1905 adapter_fireflyfansnet: Allow for missing authorId. 2025-08-22 11:19:01 -05:00
Jim Miller
2e8a899d8c Bump Release Version 4.48.0 2025-08-07 11:42:37 -05:00
Jim Miller
623915f623 Update translations. 2025-08-07 11:42:36 -05:00
Jim Miller
57865ca53d scribblehub: slow_down_sleep_time:5 per user recommendation 2025-08-07 11:32:03 -05:00
Jim Miller
e9c4b9ef30 Bump Test Version 4.47.4 2025-08-05 08:41:54 -05:00
Jim Miller
0ad088b663 adapter_ficwadcom: Fix for site change. 2025-08-05 08:41:48 -05:00
Jim Miller
e37a7f72be Tweak a few defaults.ini settings. 2025-08-05 08:41:27 -05:00
Jim Miller
9befe122dd Bump Test Version 4.47.3 2025-07-20 12:17:29 -05:00
Jim Miller
e6d6227ff1 Improve error reporting for open_pages_in_browser_tries_limit #1231 2025-07-20 12:17:24 -05:00
Jim Miller
d854a6efe7 Bump Test Version 4.47.2 2025-07-09 10:50:08 -05:00
Jim Miller
a97af94f8a OTW/AO3 - change to 'need to login' text, accept both old and new and another string. #1229 2025-07-09 10:49:45 -05:00
Jim Miller
e2ea97e99a Bump Test Version 4.47.1 2025-07-05 08:41:20 -05:00
Jim Miller
215f6dd8ff OTW/AO3 - change to 'need to login' text 2025-07-05 08:41:09 -05:00
Jim Miller
687aa9c3ba Bump Release Version 4.47.0 2025-07-03 08:21:33 -05:00
Jim Miller
523cf78640 Update strings for translation. 2025-07-03 08:19:59 -05:00
Jim Miller
90e50964b6 Bump Test Version 4.46.11 2025-06-25 08:42:33 -05:00
Jim Miller
a83823ea13 adapter_ashwindersycophanthexcom: http to https 2025-06-25 08:41:47 -05:00
Jim Miller
727aa6f1bc Bump Test Version 4.46.10 2025-06-22 20:15:59 -05:00
Jim Miller
072d929298 adapter_fimfictionnet: New img attr and class. #1226 2025-06-22 20:15:19 -05:00
Jim Miller
992c5a1378 Bump Test Version 4.46.9 2025-06-22 11:59:56 -05:00
Jim Miller
f8937c1af3 Report BG job failed entirely as individual books failed instead of just exception. For #1225 2025-06-22 10:45:05 -05:00
Jim Miller
af5c78e2e9 Remove some unused imports 2025-06-22 09:38:40 -05:00
Jim Miller
4a26dfdfff Plugin BG Jobs: Remove old multi-process code 2025-06-16 19:24:46 -05:00
Jim Miller
a82ef5dbae Bump Test Version 4.46.8 2025-06-16 19:16:18 -05:00
snoonan
6adc995fa5 Update defaults.ini per PR 2025-06-16 19:11:43 -05:00
snoonan
f534efd3df Support for logging into royal road to keep chapter progress (and count as page views) 2025-06-16 19:11:43 -05:00
Jim Miller
f41e64141a Add SB favicons to cover_exclusion_regexp. 2025-06-15 17:30:47 -05:00
Jim Miller
94036e3fbb Send refresh_screen=True when updating Reading Lists in case of series column updates. 2025-06-13 21:07:42 -05:00
Jim Miller
9142609c61 Bump Test Version 4.46.7 2025-06-12 22:05:11 -05:00
Jim Miller
f9d7b893ee Fix images from existing epub being discarded during update. 2025-06-12 22:02:35 -05:00
Jim Miller
4e2ae7441d Bump Test Version 4.46.6 2025-06-11 15:29:12 -05:00
Jim Miller
87dbef980f Mildly kludgey fix for status bar notifications. 2025-06-11 10:47:09 -05:00
Jim Miller
921f8c287b Shutdown IMAP connection when done with it. 2025-06-10 17:42:07 -05:00
Jim Miller
637c6e3cc3 Change default base_xenforoforum minimum_threadmarks:1. See #1218 2025-06-10 16:36:21 -05:00
Jim Miller
ba90ff9f3a Bump Test Version 4.46.5 2025-06-10 12:56:26 -05:00
Jim Miller
34e84b2942 PI BG Jobs: Fix split without reconsolidate. 2025-06-10 12:56:16 -05:00
Jim Miller
31eb7f421a Bump Test Version 4.46.4 2025-06-08 09:45:01 -05:00
Jim Miller
85d4656005 alternatehistory needs at least cloudscraper now, it seems. 2025-06-08 09:45:01 -05:00
Jim Miller
006b8873a5 Fix xenforo2 prefixtags, some still using tags in title 2025-06-08 09:44:48 -05:00
Jim Miller
3246036f88 Bump Test Version 4.46.3 2025-06-08 08:39:04 -05:00
Jim Miller
6d114532e2 Py2 fix for split BG jobs, closes #1214 2025-06-08 08:38:24 -05:00
Jim Miller
2edb1d58d5 Bump Test Version 4.46.2 2025-06-07 13:42:29 -05:00
Jim Miller
8dc3c5d3d8 Skip OTW(AO3) login when open_pages_in_browser AND use_browser_cache AND use_browser_cache_only 2025-06-07 13:22:30 -05:00
Jim Miller
2ec8c97e28 Bump Test Version 4.46.1 2025-06-07 12:51:24 -05:00
Rae Knowler
c51161c3d1 Include Accept:image/* header when requesting an image url 2025-06-07 12:50:12 -05:00
Jim Miller
bd645a97c7 Add use_flaresolverr_session and flaresolverr_session settings for #1211 2025-06-07 12:49:08 -05:00
Jim Miller
f7cbfa56bb Bump Release Version 4.46.0 2025-06-06 20:02:47 -05:00
Jim Miller
07fd16813f Bump Test Version 4.45.15 2025-06-05 16:56:16 -05:00
Jim Miller
2fe971c79f OTW(AO3): Don't attempt login with use_archive_transformativeworks_org or open_pages_in_browser #1210 2025-06-05 16:56:10 -05:00
Jim Miller
e4082c6235 Bump Test Version 4.45.14 2025-06-05 08:59:03 -05:00
Jim Miller
960d5ba11a Ignore use_browser_cache_only when URL scheme is file 2025-06-05 08:57:39 -05:00
Jim Miller
066539793d Update translations. 2025-06-04 22:14:33 -05:00
Jim Miller
5b312494fb Bump Test Version 4.45.13 2025-05-27 19:16:33 -05:00
Jim Miller
e628b10247 adapter_literotica: Fix date parsing. See #1208 2025-05-27 19:16:23 -05:00
dbhmw
61c063ed72 adapter_ficbooknet: Site changes 2025-05-27 19:11:54 -05:00
Jim Miller
11d3f601c9 Add Ctrl-Enter to AddDialog, consolidating code with INIEdit 2025-05-24 13:05:05 -05:00
Jim Miller
3b8d0f63d4 Bump Test Version 4.45.12 2025-05-23 11:46:28 -05:00
Jim Miller
b8b30c6a78 adapter_literotica: Update for site change #1208 2025-05-23 11:46:17 -05:00
Jim Miller
b007f68a88 Bump Test Version 4.45.11 2025-05-23 10:19:17 -05:00
Jim Miller
6d8a67ef2e adapter_literotica: Update for site change #1208 2025-05-23 10:19:05 -05:00
Jim Miller
ab66e9e285 Bump Test Version 4.45.10 2025-05-23 10:02:15 -05:00
Jim Miller
b3f7add5a1 Split BG: Fixes for error column & showing meta collection errors 2025-05-23 10:02:09 -05:00
Jim Miller
800be43d24 Bump Test Version 4.45.9 2025-05-22 12:31:02 -05:00
Jim Miller
70f77e17e2 adapter_literotica: Update for site change 2025-05-22 12:07:16 -05:00
Jim Miller
caf46ba421 Bump Test Version 4.45.8 2025-05-19 15:38:40 -05:00
Jim Miller
686ed80230 Update BG Job changes settings verbiage and defaults 2025-05-19 15:38:27 -05:00
Jim Miller
56689a10c4 Bump Test Version 4.45.7 2025-05-18 10:13:45 -05:00
Jim Miller
065d077752 Improve job 'reconsolidate' for failed jobs and setting changing. 2025-05-18 10:10:02 -05:00
Jim Miller
c8f817e830 Bump Test Version 4.45.6 2025-05-17 13:53:49 -05:00
Jim Miller
1432241319 Single proc bg processing, optionally split by site & accumulate results -- experimental 2025-05-17 13:53:27 -05:00
Jim Miller
0e9f60f8a6 Bump Test Version 4.45.5 2025-05-12 17:02:59 -05:00
Jim Miller
74de62385f Fix remove_empty_p regexp to work with nested <br> tags and whitespace. 2025-05-12 17:02:51 -05:00
Jim Miller
d2f69eb5d5 Bump Test Version 4.45.4 2025-05-10 09:29:20 -05:00
Jim Miller
c3655d59ca AO3 make use_(domain) options not replace media.archiveofourown.org 2025-05-10 09:29:14 -05:00
Emmanuel Ferdman
aca07bbf59 Migrate to new bs4 API
Signed-off-by: Emmanuel Ferdman <emmanuelferdman@gmail.com>
2025-05-06 17:38:14 -05:00
Jim Miller
3edd3c3e7b Bump Test Version 4.45.3 2025-05-06 16:17:58 -05:00
Jim Miller
61ba096c6e Fix 'Add New Book' dialog when multiple existing found on update. 2025-05-06 16:17:51 -05:00
Jim Miller
47fd71c4b9 XF2: Allow extra / before threads in story URL. 2025-05-05 12:59:38 -05:00
Jim Miller
e1d0bed52d Bump Test Version 4.45.2 2025-05-05 09:46:15 -05:00
Jim Miller
acb88cbefc Include 'Add New Book' dialog when multiple existing found on update. 2025-05-05 09:45:33 -05:00
Jim Miller
f1e7cabf6a Bump Test Version 4.45.1 2025-05-04 10:15:28 -05:00
kilandra
21ec27ffd4 Fix for adapter_spiritfanfictioncom.py
Commenters are being identified as authors since webpage change.
2025-05-04 10:14:27 -05:00
Matěj Cepl
5567e6417d fix(pyproject): replace license by file to using SPDX keyword
As per https://packaging.python.org/en/latest/guides/writing-pyproject-toml/#license
2025-05-02 16:17:35 -05:00
Jim Miller
af352a480c Bump Release Version 4.45.0 2025-05-01 10:49:01 -05:00
Jim Miller
92069dc638 Add comment 2025-05-01 10:48:49 -05:00
Jim Miller
76e9421858 Bump Test Version 4.44.11 2025-04-30 09:17:23 -05:00
Jim Miller
70558bf444 Transition finestories.com to storyroom.com 2025-04-30 09:16:07 -05:00
Brian
b60dfdcc28
Update configurable.py
Add support for WPC/WLPC/SOL sub-site "storyroom.com" to replace "finestories.com"
2025-04-29 19:51:02 -07:00
Brian
b976439669
Create adapter_storyroomcom.py
Add support for WPC/WLPC/SOL sub-site "soryroom.com" which replaces "finestories.com"
2025-04-29 19:49:15 -07:00
Brian
6de50509ed
Update __init__.py
Add support for new WPC/WLPC/SOL subsite "storyroom.com" to duplicate/replace "finestories.com"
2025-04-29 19:38:48 -07:00
Jim Miller
4d9c38d3c2 Bump Test Version 4.44.10 2025-04-28 20:27:42 -05:00
Jim Miller
90ecb63be4 Fix for alternatehistory.com changing threadmark date attr. 2025-04-28 20:26:07 -05:00
Jim Miller
bd49f8e8fa XF2: Add threadmarks_per_page setting 2025-04-28 19:40:53 -05:00
Jim Miller
21c0315e60 Bump Test Version 4.44.9 2025-04-28 09:22:39 -05:00
dbhmw
fc97fa6d5c adapter_literotica: get_urls_from_page - series have urls 2025-04-28 09:22:22 -05:00
Jim Miller
2c3bf3c642 Update translations. 2025-04-28 09:22:01 -05:00
Jim Miller
a9c725d32a Bump Test Version 4.44.8 2025-04-26 22:14:30 -05:00
Jim Miller
f936c5b0fb Remove base_xenforoforum_adapter, consolidate into base_xenforo2forum_adapter 2025-04-26 22:14:24 -05:00
Jim Miller
53344afa49 Merge branch 'main' of https://github.com/JimmXinu/FanFicFare 2025-04-25 12:25:18 -05:00
Jim Miller
d5addfa2fd Complete impl of use_archiveofourown_gay 2025-04-25 12:25:01 -05:00
Jim Miller
6d8375a9f3 Bump Test Version 4.44.7 2025-04-23 09:47:10 -05:00
Jim Miller
7bc03ac798 adapter_archiveofourownorg: Add use_archiveofourown_gay, allow archiveofourown.gay input for story URLs. 2025-04-23 09:44:18 -05:00
Jim Miller
05d62a5343
Update CLI test version link 2025-04-21 12:06:49 -05:00
Jim Miller
31115f9245 Bump Test Version 4.44.6 2025-04-19 16:40:51 -05:00
dbhmw
26ee692208 adapter_fanfictionnet: Make get_urls_from_page work. 2025-04-19 16:35:01 -05:00
Jim Miller
dd43d25f76 Bump Test Version 4.44.5 2025-04-12 12:32:54 -05:00
dbhmw
fffd15d7ea adapter_ficbooknet: Add series collection & fix downloads 2025-04-12 12:32:30 -05:00
Jim Miller
7c2700c8ea Bump Test Version 4.44.4 2025-04-11 16:00:13 -05:00
Jim Miller
94518c4f25 adapter_fictionmaniatv: Update for ancient stories 2025-04-11 16:00:07 -05:00
Jim Miller
531b965b22 Bump Test Version 4.44.3 2025-04-09 09:46:28 -05:00
Jim Miller
658b637716 adapter_fictionmaniatv: Updates for site change 2025-04-09 09:46:22 -05:00
Jim Miller
44f5feacfb Remove some debugs. 2025-04-09 09:45:55 -05:00
Jim Miller
52451a3eba Bump Test Version 4.44.2 2025-04-03 10:00:46 -05:00
dbhmw
7123f7dd6f Reject HTML sites in no_convert_image 2025-04-03 10:00:32 -05:00
Jim Miller
08a0f9b5fc Bump Test Version 4.44.1 2025-04-01 22:32:36 -05:00
Jim Miller
74ac96a67e base_xenforoforum: Add timeperiodtags and better handle unexpected typed tags 2025-04-01 22:32:26 -05:00
Jim Miller
9eed0340e9 Bump Release Version 4.44.0 2025-04-01 09:54:58 -05:00
Jim Miller
73b90c0291 Additional translation strings 2025-04-01 09:52:17 -05:00
Jim Miller
c33a6e6b05 Bump Test Version 4.43.12 2025-03-29 17:17:51 -05:00
Jim Miller
d77cc15586 adapter_storiesonlinenet(et al): Add always_login option. Closes #1185 2025-03-29 17:17:44 -05:00
Jim Miller
21483f7227 Bump Test Version 4.43.11 2025-03-24 13:26:48 -05:00
Jim Miller
6c0df42fe7 Implementing Timed One Time Password(TOTP) 2FA Exception and collection 2025-03-24 13:22:26 -05:00
Jim Miller
c3a90a8914 Improve logpage updating 2025-03-24 10:47:05 -05:00
Jim Miller
e7f66d293a Bump Test Version 4.43.10 2025-03-21 12:24:46 -05:00
Jim Miller
e49b3a6be0 adapter_asianfanficscom: Add inject_chapter_image option. Closes #1143 2025-03-21 12:20:42 -05:00
Jim Miller
ae72efdc00 Note on open_pages_in_browser for MacOS users linking to #1142 2025-03-21 11:10:44 -05:00
Jim Miller
bc935e213a Bump Test Version 4.43.9 2025-03-21 10:51:29 -05:00
dbhmw
a8e0eabbd8 adapter_literotica: Fixed incorrect parsing for get url from webpage option. 2025-03-21 10:51:06 -05:00
Jim Miller
81b84a8133 Bump Test Version 4.43.8 2025-03-18 20:51:16 -05:00
Jim Miller
a973b8c926 ffnet only: try_shortened_title_urls option #1166 2025-03-18 20:50:12 -05:00
Jim Miller
08ccc659ca open_pages_in_browser_tries_limit is an int 2025-03-17 13:03:04 -05:00
Jim Miller
fb610de27a Revert "adapter_fanfictionnet: Attempt chapter from m. (vs www) when chapter not found"
This reverts commit 370be379f0.
2025-03-17 12:17:56 -05:00
Jim Miller
29d2e3734b base_otw sites list for ini settings. 2025-03-16 21:37:31 -05:00
Jim Miller
48cf17c7b7 Bump Test Version 4.43.7 2025-03-16 13:52:52 -05:00
Jim Miller
ac61c2bb68 AO3 use_archive_transformativeworks_org option 2025-03-16 13:52:52 -05:00
Nicolas SAPA
a12d2a688b Document the new 'directimages' for BrowserCache feature
Explain that this feature is useful for images delivered by WebSite with
a no-cache attribute when `use_browser_cache_only` is true (currently AO3).

Signed-off-by: Nicolas SAPA <nico@ByMe.at>
2025-03-16 13:52:19 -05:00
Nicolas SAPA
52027eac46 Silence a spammy debug
Silence a debug in addImgUrl that was spammy.

Signed-off-by: Nicolas SAPA <nico@ByMe.at>
2025-03-16 13:52:19 -05:00
Nicolas SAPA
a1d4fba728 Add support for 'directimages' with use_browser_cache
Hook the configurable into the direct_fetcher logic already existing for flaresolverr

Signed-off-by: Nicolas SAPA <nico@ByMe.at>
2025-03-16 13:52:19 -05:00
Nicolas SAPA
69872b922c Convert 'use_browser_cache' to bool+
Permit the 'use_browser_cache' configurable to take 'directimage'
so we can later use the default fetcher for image (only).

Signed-off-by: Nicolas SAPA <nico@ByMe.at>
2025-03-16 13:52:19 -05:00
dbhmw
7bd1a1acfc adapter_ficbooknet: Fix additional metadata collection 2025-03-14 18:58:36 -05:00
Jim Miller
80e5a22f0d Bump Test Version 4.43.5 2025-03-10 20:17:19 -05:00
Jim Miller
3cd4188bd8 Add remove_empty_p option, usually for AO3/OTW. #1177 2025-03-10 20:17:04 -05:00
Jim Miller
21d16dbe90 Bump Test Version 4.43.4 2025-03-09 09:56:47 -05:00
Brian
5ce7875851
Update adapter_storiesonlinenet.py
Moved soup.find for article below chapter search code, as it breaks when the description/details contains extraneous /div tag.
2025-03-08 15:20:01 -08:00
Jim Miller
35be14a168 Bump Test Version 4.43.3 2025-03-06 16:04:46 -06:00
dbhmw
930940c7fd adapter_fimfictionnet: Correct the config 2025-03-06 15:57:04 -06:00
dbhmw
f001f19a47 adapter_fimfictionnet: Fetch only the stories in the bookshelf. 2025-03-06 15:57:04 -06:00
Jim Miller
fd7382fb56 Bump Test Version 4.43.2 2025-03-06 13:08:46 -06:00
praschke
c69e940d2a
adapter_syosetucom: remove warningtags from ini 2025-03-06 18:58:39 +00:00
praschke
31dcd8e6ff
adapter_syosetucom: site update 2025-03-06 18:58:26 +00:00
Jim Miller
0bd85c10a8 Bump Test Version 4.43.1 2025-03-05 11:09:54 -06:00
Jim Miller
b075c22261 BrowserCache Chrome Block: Treat entry missing headers same as not found. #1167 #1169 2025-03-05 11:03:32 -06:00
Jim Miller
87b3e04fa1 Bump Release Version 4.43.0 2025-03-01 15:27:40 -06:00
Jim Miller
630f09e644 Bump Test Version 4.42.14 2025-02-28 20:03:09 -06:00
Jim Miller
a0463fc85b base_xenforoforum: Add details_spoiler option for #1165 2025-02-28 20:00:54 -06:00
Jim Miller
de7d8079d9 Add [base_otw] with use_basic_cache:true to defaults.ini 2025-02-26 13:42:04 -06:00
Jim Miller
4aad0ec913 Bump Test Version 4.42.13 2025-02-24 21:24:55 -06:00
Jim Miller
c379b45cb9 BrowserCache: Better handle cache file changing/failing while reading. 2025-02-24 21:24:43 -06:00
Jim Miller
82825d1b16 Bump Test Version 4.42.12 2025-02-24 20:26:13 -06:00
Jim Miller
11b2d5643e Fix BrowserCache for image--cache partitioned by parent(story) page. 2025-02-24 20:26:05 -06:00
Jim Miller
06dc2add8f Bump Test Version 4.42.11 2025-02-24 11:46:50 -06:00
Jim Miller
ab7198bb8f base_otw_adapter: Detect & report 'This site is in beta' page 2025-02-24 11:05:38 -06:00
Jim Miller
d854733ffa AO3: Double default slow_down_sleep_time 2025-02-24 11:05:07 -06:00
Jim Miller
a2cc6bcdd3 Bump Test Version 4.42.10 2025-02-23 20:46:13 -06:00
Jim Miller
c9accda3f8 adapter_mcstoriescom: Suppress site URLs that look like stories but aren't. #1160 2025-02-23 20:46:03 -06:00
Jim Miller
8e55d1e6f4 More direct way for /../ in Get Story URLs from web page, previous broke other sites. #1160 2025-02-23 20:45:47 -06:00
Jim Miller
9b8eb547fc Use urljoin() to remove /../ and /./ from Get Story URLs from web page 2025-02-23 15:22:27 -06:00
Jim Miller
62b3c9264e Bump Test Version 4.42.9 2025-02-22 10:00:41 -06:00
Jim Miller
370be379f0 adapter_fanfictionnet: Attempt chapter from m. (vs www) when chapter not found 2025-02-22 10:00:22 -06:00
Jim Miller
1addfe14fc Strip leading m./www. from domain for brower cache partition. 2025-02-22 10:00:18 -06:00
Jim Miller
e510fb027e Bump Test Version 4.42.8 2025-02-20 19:27:48 -06:00
dbhmw
86b807805f adapter_literotica: Implements get_urls_from_page 2025-02-20 19:27:25 -06:00
Jim Miller
0ace02ee75 six fix for py2/Calibre2 2025-02-19 20:28:40 -06:00
Jim Miller
38ad74af68 Bump Test Version 4.42.7 2025-02-19 10:10:49 -06:00
Jim Miller
6c70a60cdb Add include_tocpage:always option. 2025-02-19 10:10:42 -06:00
Jim Miller
80ee0ca9b9 adapter_fimfictionnet: Further cover fix 2025-02-19 09:58:28 -06:00
Jim Miller
8b143a0c1b Bump Test Version 4.42.6 2025-02-17 20:23:03 -06:00
Jim Miller
9fb86da341 adapter_fimfictionnet: Fix cover images and use data-source attr for img src. 2025-02-17 20:22:55 -06:00
Jim Miller
5c703122ec Bump Test Version 4.42.5 2025-02-14 20:43:59 -06:00
Jim Miller
75f89beab1 adapter_storiesonlinenet: Remove some code that broke parsing when 'author' was in the title. 2025-02-14 20:43:51 -06:00
Jim Miller
fc9d184f20 Bump Test Version 4.42.4 2025-02-13 12:46:19 -06:00
Jim Miller
6c411e054a adapter_literotica: Site changes for non-www domains. 2025-02-13 12:46:12 -06:00
Jim Miller
dbef4719d9 adapter_literotica: http->https 2025-02-13 10:22:06 -06:00
Jim Miller
da6b4c25f2 Bump Test Version 4.42.3 2025-02-12 09:00:13 -06:00
Jim Miller
23004e3953 Make plugin use own copy of six only--including in Smarten Punc 2025-02-12 09:00:06 -06:00
Jim Miller
4a15c2a7d5 Bump Test Version 4.42.2 2025-02-09 08:39:01 -06:00
Hazel Shanks
84dad2ec43 fix bounds check in vote accumulaton. resolves JimmXinu#1154 2025-02-09 08:37:52 -06:00
Jim Miller
5ac38fc327 Bump Test Version 4.42.1 2025-02-05 17:15:32 -06:00
Jim Miller
35e0ada643 Make plugin use own copy of six only. 2025-02-05 17:15:08 -06:00
Alexandre Detiste
a9533364ec make plugin work without system "six" 2025-02-05 21:48:17 +01:00
Jim Miller
4a03186ce6 Bump Release Version 4.42.0 2025-02-01 16:52:54 -06:00
Jim Miller
a0271e2957 Update translations. 2025-02-01 16:52:35 -06:00
Jim Miller
11491c6383 Bump Test Version 4.41.5 2025-01-23 13:50:36 -06:00
Jim Miller
24dccc73f0 Re-alphabetize defaults.ini 2025-01-23 13:50:06 -06:00
Jim Miller
8e3a88776a adapter_wwwaneroticstorycom: Update for site changes. 2025-01-23 13:47:43 -06:00
Jim Miller
28141ce9d1 Remove site: ponyfictionarchive.net - Moved to AO3 2025-01-23 13:22:11 -06:00
Jim Miller
ffaa3bf82a Remove site: www.novelupdates.cc - Domain parked somewhere sketchy 2025-01-23 13:19:35 -06:00
Jim Miller
d0d05d6c3b Remove site: fastnovels.net - Blog only now, no stories. 2025-01-23 13:14:48 -06:00
Jim Miller
6d74a58181 Remove site: starskyhutcharchive.net, moved to starskyandhutcharchive.net, not eFiction. Nobody's missed it. 2025-01-23 12:54:08 -06:00
Jim Miller
de85fd42f7 adapter_fanfictalkcom: Update domain name & match pattern 2025-01-23 12:48:02 -06:00
Jim Miller
c4aebd40df Bump Test Version 4.41.4 2025-01-22 17:21:47 -06:00
Jim Miller
81cb631491 Browser Simple Cache adding orig resp time field & removing browser_cache_simple_header_old option. 2025-01-22 17:21:41 -06:00
Jim Miller
35aa5d2143 Bump Test Version 4.41.3 2025-01-18 08:18:27 -06:00
Jim Miller
a8b1489233 Strip out unused parts of requests_toolbelt to avoid dependency issues. #1145 2025-01-18 08:18:13 -06:00
Jim Miller
ffb179c9a1 Bump Test Version 4.41.2 2025-01-13 09:38:38 -06:00
Jim Miller
6d8d7ab66f Cleanup some INI comments 2025-01-13 09:38:38 -06:00
Jim Miller
a128083ce8 Add no_image_processing_regexp option for #1144 2025-01-13 09:37:40 -06:00
Jim Miller
9f78ec0177 Bump Test Version 4.41.1 2025-01-03 09:46:22 -06:00
Jim Miller
d941810825 adapter_fictionmaniatv: Change to https 2025-01-03 09:46:16 -06:00
Jim Miller
ba1975342c Bump Release Version 4.41.0 2025-01-01 10:37:56 -06:00
Jim Miller
27cfac45e4 Bump Test Version 4.40.5 2024-12-30 20:11:47 -06:00
Jim Miller
64a4eb2bb2 Merge branch 'main' of https://github.com/JimmXinu/FanFicFare 2024-12-30 20:11:27 -06:00
dbhmw
371f995fda adapter_inkbunnynet: Implemented always_login 2024-12-30 20:11:21 -06:00
dbhmw
816bbdfd66
Small fixes for Wattpad. (#1137)
* adapter_wattpadcom: Various fixes and changes

* adapter_wattpadcom: Config update & category 0 not always present

---------

Co-authored-by: dbhmw <github.spherical376@passmail.net>
2024-12-30 20:10:56 -06:00
Jim Miller
cdd6df8a57 Bump Test Version 4.40.4 2024-12-30 19:06:07 -06:00
Jim Miller
5d4489bb28 Update translations. 2024-12-30 19:05:43 -06:00
kat
a9944cd255
add superlove & CFAA (otwarchive sites) support (#1136)
* add superlove otwarchive site support

* add cfaa otwarchive site support

* fixes slash changes for PR

* another fix sorry
2024-12-30 18:34:12 -06:00
Jim Miller
c284b2a6c6 XenForo lazyload: use data-src first if data-url also present. QQ proxy in data-src caches/bypasses some issues 2024-12-15 21:26:25 -06:00
Jim Miller
15dde72f14 Bump Test Version 4.40.3 2024-12-15 12:40:47 -06:00
Jim Miller
ff0f22565c adapter_fimfictionnet: Implement always_login. Remove unused fail_on_password & do_update_hook settings. #1135 2024-12-15 12:33:00 -06:00
Jim Miller
33813b4047 Bump Test Version 4.40.2 2024-11-17 09:23:18 -06:00
Jim Miller
ae3accca27 Call Calibre's safe_open_url for open_pages_in_browser 2024-11-17 09:20:59 -06:00
Jim Miller
d998467f7a Bump Test Version 4.40.1 2024-11-08 10:36:53 -06:00
Jim Miller
29fddbce8e Fix for double replace_metadata when non-list metadata called by getList(). 2024-11-08 10:36:44 -06:00
Jim Miller
a4e1db32e0 Add subject_tags to -m/j CLI output 2024-11-08 10:35:39 -06:00
Jim Miller
81aea65555 Update certifi to 2024.08.30 certs 2024-11-01 09:32:12 -05:00
Jim Miller
9005f9db4c Bump Release Version 4.40.0 2024-11-01 09:03:43 -05:00
Jim Miller
7de040d8db Update translations. 2024-10-30 20:18:54 -05:00
Jim Miller
9c53cf236e Bump Test Version 4.39.10 2024-10-29 16:15:13 -05:00
Jim Miller
2e6ac07020 Fix for D/L from URL Mode Menu actions not honoring changed update mode in dialog. 2024-10-29 16:15:06 -05:00
Jim Miller
3febac62a8 Bump Test Version 4.39.9 2024-10-22 13:37:07 -05:00
Jim Miller
c4ea6ca5fd Add translation strings. 2024-10-22 13:37:07 -05:00
Jim Miller
75f9fb2d38 Add error_dialog for email fetch failure about 2FA/outlook etc. 2024-10-22 13:37:03 -05:00
Jim Miller
e4f83c52ca Fix for translation string bug 2024-10-22 13:26:47 -05:00
Jim Miller
eb54731ae9 Bump Test Version 4.39.8 2024-10-16 16:20:47 -05:00
dbhmw
eb24bcb2ac
adapter_ficbook: Another site update (#1125)
Co-authored-by: dbhmw <github.spherical376@passmail.net>
2024-10-16 16:20:05 -05:00
Jim Miller
ffa533e5fd Bump Test Version 4.39.7 2024-10-15 17:37:28 -05:00
dbhmw
bd76066905 adapter_ficbooknet: Fixes for site changes 2024-10-15 21:42:24 +00:00
Jim Miller
eb17af9252 Bump Test Version 4.39.6 2024-10-14 22:25:26 -05:00
Jim Miller
4471b1f980 adapter_fimfictionnet: Skip group JSON collection on failure. #1122 2024-10-14 22:25:21 -05:00
Jim Miller
9cfd88c098 Backout flaresolverr_json_fix since it doesn't work for everyone. #1122 2024-10-14 22:25:10 -05:00
Jim Miller
c1cf8995ea Bump Test Version 4.39.5 2024-10-14 16:16:17 -05:00
Jim Miller
55995be7de Add flaresolverr_json_fix. See #1122 2024-10-14 16:16:10 -05:00
Jim Miller
869686f363 Bump Test Version 4.39.4 2024-10-05 20:02:46 -05:00
Jim Miller
f45a05ddb6 adapter_syosetucom 'is' isn't '=='--generated SyntaxWarning 2024-10-05 20:02:46 -05:00
dbhmw
434ff0de74
adapter_inkbunnynet: adds before_get_urls_from_page (#1119)
Co-authored-by: dbhmw <github.spherical376@passmail.net>
2024-10-05 19:47:25 -05:00
Jim Miller
d0ece28197 Bump Test Version 4.39.3 2024-10-04 20:16:48 -05:00
Jim Miller
cd1db0a462 adapter_deviantartcom: Site changes, new chapter text tag. See #1118 2024-10-04 20:16:38 -05:00
Jim Miller
075c5cb7c2 Bump Test Version 4.39.2 2024-10-04 19:08:19 -05:00
praschke
b8740ca1c7
syosetu: fix chapter extraction (#1117) 2024-10-04 19:08:02 -05:00
Jim Miller
3db3e28595 Bump Test Version 4.39.1 2024-10-04 14:01:58 -05:00
Jim Miller
b610d49f6b Change decode_emails default to true. 2024-10-04 14:01:58 -05:00
praschke
35afca430a
syosetu: adjust div names on multi-chapter stories (#1116) 2024-10-04 14:01:14 -05:00
Jim Miller
1499037e19 Bump Release Version 4.39.0 2024-10-01 20:52:39 -05:00
Jim Miller
1aaa4102a5 Bump Test Version 4.38.3 2024-10-01 09:25:07 -05:00
Jim Miller
049c9af0e4 adapter_asianfanficscom: use_cloudscraper:true in defaults.ini 2024-10-01 09:25:07 -05:00
Jim Miller
482b6b67eb adapter_asianfanficscom: Add Is adult toggle call. 2024-10-01 09:22:41 -05:00
Jim Miller
cdb752df6a Better error when utf8FromSoup called with None. 2024-10-01 09:20:21 -05:00
Jim Miller
0412355001 Bump Test Version 4.38.2 2024-09-29 20:28:37 -05:00
Jim Miller
0dc049aedb Reduce debug output 2024-09-29 20:28:37 -05:00
Jim Miller
832387dea0 Add decode_emails option, defaults to false. 2024-09-29 20:28:28 -05:00
Jim Miller
94bd4bf236 Bump Test Version 4.38.1 2024-09-28 19:11:06 -05:00
Jim Miller
493e76df30 Fix(es) for get_url_search not found when seriesUrl doesn't match an adapter site. 2024-09-28 19:10:37 -05:00
Jim Miller
44b6e752f6 Reduce debug output 2024-09-28 18:16:12 -05:00
Jim Miller
5d6f2c91c1 Apply replace_chapter_text to chapter title to CLI metadata dump 2024-09-28 13:18:47 -05:00
Jim Miller
04ae49f944 adapter_adastrafanficcom: Fix class name. Doesn't *actually* matter. 2024-09-25 23:06:31 -05:00
Jim Miller
020606fea1 Fix for regression when browser_cache_simple_header_old added. Closes #1104 2024-09-08 14:49:42 -05:00
Jim Miller
711698620e Bump Release Version 4.38.0 2024-09-01 11:36:55 -05:00
Jim Miller
968687bb82 Update translations. 2024-08-31 19:43:14 -05:00
Jim Miller
07ab6d137b Update translations. 2024-08-30 11:04:43 -05:00
Jim Miller
d51ac5d6f5 Bump Test Version 4.37.4 2024-08-28 14:16:40 -05:00
Jim Miller
478d2e8f17 BrowserCache: pass getConfig, add browser_cache_simple_header_old 2024-08-28 14:16:30 -05:00
Jim Miller
67a1dcee90 Bump Test Version 4.37.3 2024-08-27 19:47:12 -05:00
Jim Miller
af834b1e40 Experimental: Chrome Simple Cache extra field 2024-08-27 19:47:07 -05:00
Jim Miller
ae535e2518 Add get_url_search() to base_xenforoforum_adapter. 2024-08-26 13:54:56 -05:00
Jim Miller
96d36ae71a Bump Test Version 4.37.2 2024-08-26 13:53:40 -05:00
Jim Miller
480b7239e5 Add adapter classmethod get_url_search to move site specific calibre search code to adapters 2024-08-26 11:21:14 -05:00
Jim Miller
2666164c5b Bump Test Version 4.37.1 2024-08-14 13:13:08 -05:00
Jim Miller
6ef8d1b215 Make CLI username prompt more visible 2024-08-14 13:13:00 -05:00
Jim Miller
654619e7e2 adapter_scribblehubcom: Allow for changing title in story URL. 2024-08-14 13:00:33 -05:00
dado330
4ea869a764
Update adapter_syosetucom.py (#1095)
Fix update retrieval for series not completed
2024-08-10 14:54:09 -05:00
Jim Miller
837df18cb0 Bump Release Version 4.37.0 2024-08-01 08:31:43 -05:00
Jim Miller
248f1c022b Update translations. 2024-07-31 19:40:47 -05:00
Jim Miller
4fabf9e65c Bump Test Version 4.36.5 2024-07-29 19:34:49 -05:00
Jim Miller
b7c318f520 Fix for paginated AO3 series, closes #1091 2024-07-29 19:34:42 -05:00
Jim Miller
89a15e1b16 Bump Test Version 4.36.4 2024-07-16 13:13:03 -05:00
Jim Miller
5b41097abc Use titlepage_entry for titlepage_wide_entry unless explicitly set. 2024-07-16 13:12:57 -05:00
Jim Miller
a672b6dbdf Bump Test Version 4.36.3 2024-07-16 12:03:39 -05:00
Jim Miller
e4d5d43efa Allow scribblehub.com story URLs w/o title and search calibre w/o title 2024-07-16 12:03:33 -05:00
Jim Miller
cc572857e0 Bump Test Version 4.36.2 2024-07-08 09:17:09 -05:00
Jim Miller
2f52ae31c0 adapter_storiesonlinenet: Fix for chapter select getting cover image link. 2024-07-08 09:17:02 -05:00
Jim Miller
3ddf801925 Bump Test Version 4.36.1 2024-07-07 09:56:12 -05:00
Jim Miller
182695b0af adapter_storiesonlinenet: Remove ''s Page' to '.s Page' 2024-07-07 09:21:51 -05:00
Jim Miller
656e67cc57 Full OTW settings for www.adastrafanfic.com in defaults.ini 2024-07-07 09:15:34 -05:00
Jim Miller
34215ce0ee Bump Release Version 4.36.0 2024-07-01 15:17:26 -05:00
Jim Miller
c706aed271 Bump Test Version 4.35.7 2024-07-01 15:14:43 -05:00
Jim Miller
e5f8e5bba4 Built-in Event For Action Chains plugin 2024-07-01 15:14:37 -05:00
Jim Miller
11d8fae876 Update defaults.ini comments about OTW 2024-06-30 19:12:12 -05:00
Jim Miller
4a14e5fc86 Bump Test Version 4.35.6 2024-06-23 09:54:36 -05:00
Jim Miller
7548ce6ae0 Catch bad href searches during internal link anchor search. 2024-06-23 09:53:36 -05:00
Jim Miller
e113bbfb1c base_xenforoforum: Remove [] from prefixtags ala [NSFW] on QQ 2024-06-18 19:41:12 -05:00
Jim Miller
d1ccdfd21f adapter_spiritfanfictioncom: Minor regex fix. 2024-06-09 14:51:07 -05:00
Jim Miller
68e8f49e9f Bump Test Version 4.35.5 2024-06-09 14:47:05 -05:00
Jim Miller
49a0328268 adapter_spiritfanfictioncom: Cheesy fix for py2 not knowing %z in dates. 2024-06-09 14:46:54 -05:00
Jim Miller
25ea3fcaad adapter_spiritfanfictioncom: use_basic_cache:true 2024-06-09 14:23:09 -05:00
Jim Miller
a5378ca419 Bump Test Version 4.35.4 2024-06-09 12:58:06 -05:00
Jim Miller
e0b733b60d Alphabet order INI sections 2024-06-09 12:57:35 -05:00
kilandra
33b2b10bf3
New Site: SpiritFanfiction.com (#1078)
Add support for spiritfanfiction.com
2024-06-09 12:54:58 -05:00
Jim Miller
c468c26208 Bump Test Version 4.35.3 2024-06-09 08:46:34 -05:00
Jim Miller
9d29f888b3 XF2: SB/SV changed the header for thread_status 2024-06-09 08:46:07 -05:00
Jim Miller
d1e8a77489 Bump Test Version 4.35.2 2024-06-04 10:31:21 -05:00
Jim Miller
ef66e73fa4 adapter_ficbooknet: Better fixes for py2 from dbhmw 2024-06-04 10:31:08 -05:00
Jim Miller
7f128587c0 Bump Test Version 4.35.1 2024-06-03 15:52:05 -05:00
Jim Miller
53a7a60dbc adapter_ficbooknet: Fixes for py2 in older Calibres 2024-06-03 15:51:55 -05:00
dbhmw
71a61ff166
adapter_ficbooknet: Fix breakage for proxies & add covers (#1077)
* Make ficbook work under nsapa

* Support adding covers

* More patches

* .

* Fix

* Fix num pages

* Add updated urls to getSiteExampleURLs
Update configs
Add logging

* New getSiteURLPattern

* Fixed scraping the collections

* Fixed follow count

* Fixed num awards count

* Adds ability to login

* A minor refactor

---------

Co-authored-by: dbhmw <github.spherical376@passmail.net>
2024-06-03 15:50:57 -05:00
Jim Miller
9c051e6c3b Bump Release Version 4.35.0 2024-06-01 09:03:25 -05:00
Jim Miller
f0d89498dc Update translations. 2024-06-01 09:01:39 -05:00
Jim Miller
abb370a852 Bump Test Version 4.34.9 2024-05-28 21:54:57 -05:00
Jim Miller
4b9054d1b4 Add download_finished_signal for Action Chains #1073 2024-05-28 21:54:42 -05:00
Jim Miller
2d0db171a8 Remove checks for pre-2.85.1 features--assumed present. 2024-05-28 14:09:10 -05:00
dbhmw
7f67465767
Support for touchfluffytail.org (#1071)
* An attempt is made

* Quick fix

* Update adapter_touchfluffytail.py

* Config fix

* Add num of comments & reviews

* Minor conf change;Add num of views

* Add section to plugin-defaults.ini

* Fixed config
Improved a bit views count
Changed getSiteURLPattern to stop grabbing navigation pages

* Repair plugin-defaults.ini

---------

Co-authored-by: dbhmw <github.spherical376@passmail.net>
2024-05-26 10:23:28 -05:00
Jim Miller
6801d5e01d Bump Test Version 4.34.8 2024-05-26 10:20:55 -05:00
Jim Miller
b01914c24e adapter_wattpadcom: Improve error reporting when story not found / connection refused. 2024-05-26 10:20:36 -05:00
Jim Miller
dd41f99288 Bump Test Version 4.34.7 2024-05-22 19:44:45 -05:00
Jim Miller
37db56e6b3 base_xenforo2 better detect whether logged in. 2024-05-22 19:39:31 -05:00
Jim Miller
f0a08f7647 Bump Test Version 4.34.6 2024-05-21 20:56:42 -05:00
Jim Miller
2593f742c9 adapter_deviantartcom: Streamline login vs watchers vs mature See #1070 2024-05-21 20:56:22 -05:00
Jim Miller
6ac299c198 adapter_deviantartcom: Watchers only stories need login #1070 2024-05-20 19:26:41 -05:00
dbhmw
3eda289349
adapter_inkbunny: Fix author & category (#1069)
* Fix author & category

* Remove redundant calls

---------

Co-authored-by: dbhmw <github.spherical376@passmail.net>
2024-05-17 16:57:07 -05:00
Jim Miller
95a7bdd3a9 Bump Test Version 4.34.5 2024-05-16 21:55:43 -05:00
Jim Miller
84257e7388 base_xenforo2forum: Prefix tag collecting too much. 2024-05-16 21:55:35 -05:00
Jim Miller
465bffd896 datetime.utcnow() deprecated in more recent py3 versions 2024-05-11 16:54:52 -05:00
dbhmw
eabfd1bef3
Skip invalid images, detect img types (#1068)
* Skip invalid images

* Exception handling

---------

Co-authored-by: dbhmw <github.spherical376@passmail.net>
2024-05-11 16:06:33 -05:00
Jim Miller
8d6676617c Bump Test Version 4.34.4 2024-05-09 11:25:05 -05:00
Jim Miller
c47b620f67 Fix for WebToEpub firefox cache key changing 2024-05-09 11:24:57 -05:00
Jim Miller
df94cc439e Bump Test Version 4.34.3 2024-05-08 11:51:43 -05:00
Jim Miller
08032778bd QQ: Doesn't need reader_posts_per_page:30 anymore 2024-05-08 11:51:26 -05:00
Jim Miller
52deec3fd8 Bump Test Version 4.34.2 2024-05-07 13:38:12 -05:00
Jim Miller
5b443d4363 adapter_forumquestionablequestingcom:Switch to BaseXenForo2ForumAdapter 2024-05-07 13:38:06 -05:00
Jim Miller
4170cfd9a6 Bump Test Version 4.34.1 2024-05-02 11:13:08 -05:00
Jim Miller
ae4735df04 adapter_ficbooknet: Remove py3 string handling that breaks on py2 2024-05-02 11:13:00 -05:00
Jim Miller
6041036787 Update bundled certifi cacert.pem and version, not core.py 2024-05-02 08:48:16 -05:00
Jim Miller
d451265621 Bump Release Version 4.34.0 2024-05-01 09:01:51 -05:00
Jim Miller
677f213337 adapter_literotica: Match prior formatting of averrating. 2024-04-30 16:43:10 -05:00
Jim Miller
8537702028 Bump Test Version 4.33.15 2024-04-29 09:44:35 -05:00
Jim Miller
6d3d4d1ae6 adapter_literotica: Fix category collection. #1058 2024-04-29 09:44:28 -05:00
Jim Miller
1f42c188fa Bump Test Version 4.33.14 2024-04-28 21:08:46 -05:00
Jim Miller
9346985718 adapter_literotica: Restore chapter descs description when nothing else. #1058 2024-04-28 21:08:40 -05:00
Jim Miller
4585afde50 Bump Test Version 4.33.13 2024-04-28 10:57:54 -05:00
Jim Miller
bee6cb9ba6 adapter_literotica: Don't setDescription() if tag is empty #1058 2024-04-28 10:57:33 -05:00
Jim Miller
581b627a3e Bump Test Version 4.33.12 2024-04-27 14:41:58 -05:00
Jim Miller
4436001494 adapter_literotica: Improved description collection. #1058 2024-04-27 14:41:34 -05:00
Jim Miller
6116a19986 adapter_literotica: Collect averrating from hidden JSON. #1058 2024-04-27 14:41:19 -05:00
dbhmw
99fd4ea0e5
adapter_ficbooknet Fix update date not working (#1066)
* Fixes pub and updates v2

* Add 'part_text' for porper formatting

* Fix collections grabbing

* Add collection of numawards

* Add collection of categories

* Collect awards
2024-04-27 12:06:14 -05:00
Jim Miller
a613b842f2 Bump Test Version 4.33.11 2024-04-26 15:42:13 -05:00
Jim Miller
6462c5c366 adapter_literotica: Allow /series/se/alphanumeric instead of just numeric. 2024-04-26 15:31:06 -05:00
Jim Miller
8c4a8cd2da Bump Test Version 4.33.10 2024-04-26 12:26:17 -05:00
Jim Miller
7a0ea3ce96 adapter_literotica: Remove use_meta_keywords option. #1058 2024-04-26 12:25:24 -05:00
Jim Miller
f14fe9d3aa adapter_literotica: Rewrite(mostly) for site changes. #1058 2024-04-26 12:24:10 -05:00
Jim Miller
36add28269 adapter_literotica: Fix for chapter_categories_use_all:true causing Tag vs string error. 2024-04-26 10:34:19 -05:00
Jim Miller
87b4171dd4 Bump Test Version 4.33.9 2024-04-24 16:57:37 -05:00
dbhmw
951acf61b4
ficbook.net Add chapter dates for TOC (#1065)
* Fix the broken stuff

* Last correction

* Add chapter dates
2024-04-24 16:57:23 -05:00
Jim Miller
8674b54753 Bump Test Version 4.33.8 2024-04-24 12:09:04 -05:00
Jim Miller
b7e5bf0468 adapter_literotica: Not all chapters have Rating 2024-04-24 12:08:57 -05:00
Jim Miller
0f12c127b6 Bump Test Version 4.33.7 2024-04-24 10:37:06 -05:00
Jim Miller
50c51dc993 adapter_literotica: Beta site changes #1058 2024-04-24 10:36:36 -05:00
Jim Miller
65bf03a613
Merge pull request #1064 from dbhmw/main
Ficbook.net More metadata collection
2024-04-24 10:35:15 -05:00
dbhmw
0bb8421f98 Fixes 2024-04-24 13:53:43 +00:00
dbhmw
108e603e63 Collect collections v1 2024-04-24 12:54:58 +00:00
dbhmw
1868ed842e En 2024-04-23 22:38:39 +00:00
dbhmw
6c505a6170 The SPACE 2024-04-23 22:33:05 +00:00
dbhmw
72d508b0bf Add more metadata collection 2024-04-23 22:21:07 +00:00
Jim Miller
d6f2faf170 Bump Test Version 4.33.6 2024-04-23 08:39:18 -05:00
Jim Miller
92cbff7db9 Merge branch 'main' of https://github.com/JimmXinu/FanFicFare 2024-04-23 08:32:42 -05:00
Jim Miller
4bb2d50921
Merge pull request #1063 from dbhmw/dbhmw-patch-1
Ficbook.net Fix categories and add chapter notes
2024-04-23 08:32:33 -05:00
dbhmw
c3d8bc4fd0
Fix categories and add chapter notes 2024-04-23 10:14:52 +00:00
Jim Miller
37ae6cbdbb Bump Test Version 4.33.5 2024-04-20 18:22:28 -05:00
Jim Miller
b953daa3c2 adapter_storiesonlinenet: Fix for /library/ -> /s/ 2024-04-20 18:20:23 -05:00
Jim Miller
463910cd54 Bump Test Version 4.33.4 2024-04-14 08:57:48 -05:00
Jim Miller
95bfdf907f Alphabetize INIs 2024-04-14 08:57:40 -05:00
Yves
85550aeaf6
Add a fanfictions.fr connector (#1061)
* Add a fanfictions.fr connector

* PR fixes

* Move the cover image test outside of the generic tests

* Handle suspended fanfictions

* Allow downloading of fanfictions only available in zip files

* Add the date published element

* Add the basic cache

* Aggressive line breaks

* Fix description formatting

* Add more metadata

* Fix the description test
2024-04-14 08:56:13 -05:00
Jim Miller
5b20926f2c Bump Test Version 4.33.3 2024-04-09 10:58:01 -05:00
Jim Miller
c915aceb85 basexf: Fix for prefix tags, put in prefixtags included in genre 2024-04-09 10:57:34 -05:00
Jim Miller
36d56b867c Bump Test Version 4.33.2 2024-04-09 10:19:21 -05:00
Jim Miller
e1cec84075 basexf: Add XF categorized tags into: category, genre, characters, contenttags and formattags 2024-04-09 10:19:15 -05:00
Jim Miller
ba3676d73f Remove some debugs 2024-04-09 10:12:02 -05:00
Jim Miller
80f50b298f Add a warning output about minimum_threadmarks for XF. 2024-04-05 10:54:29 -05:00
Jim Miller
9120504249 Remove some debugs. 2024-04-04 16:47:31 -05:00
Jim Miller
55c7ca9c10 Bump Test Version 4.33.1 2024-04-02 10:31:36 -05:00
Jim Miller
704ea89d72 OTW(AO3) Support Paginated Series 2024-04-02 10:31:30 -05:00
Jim Miller
8eecd0aa7d Bump Release Version 4.33.0 2024-04-01 08:59:48 -05:00
Jim Miller
c53f99d01c Update translations. 2024-03-30 15:37:02 -05:00
Jim Miller
438a1265f2 Bump Test Version 4.32.13 2024-03-26 20:57:30 -05:00
hmonsta
86766223cb updated inkbunny adapter
fixed "keep_summary_html" config being ignored and always stripping formatting
cover images can now be extracted from more submissions
2024-03-26 20:56:38 -05:00
Jim Miller
1fa94de1d9
Update README.md 2024-03-26 17:41:33 -05:00
Jim Miller
56d1cf19ef Bump Test Version 4.32.12 2024-03-25 09:22:33 -05:00
Jim Miller
701c096ed4 adapter_deviantartcom: Add a 6th different message to indicate 'mature content'. #1052 2024-03-25 09:22:26 -05:00
Jim Miller
aab3e1c601 Bump Test Version 4.32.11 2024-03-24 15:53:15 -05:00
Jim Miller
8d040a4926 adapter_deviantartcom: Bad username fails separately than bad pass. #1052 2024-03-24 15:53:15 -05:00
Jim Miller
4453cbb143 Perform replace_chapter_text on chapter titles, too. 2024-03-24 13:26:10 -05:00
Jim Miller
0c173f8110 Bump Test Version 4.32.10 2024-03-23 17:52:06 -05:00
Jim Miller
a14b39eb4c Paste into ini edit as plain text only. 2024-03-23 17:51:59 -05:00
Jim Miller
c9cb51f8c4 Bump Test Version 4.32.9 2024-03-22 13:01:07 -05:00
Jim Miller
dbe6c6105c Ignore bs4 XMLParsedAsHTMLWarning as per #894 from mcepl 2024-03-22 13:01:00 -05:00
Jim Miller
04231eecfe Add note to tweak_fg_sleep settings. 2024-03-21 10:08:09 -05:00
Jim Miller
a55a4c93a5 Bump Test Version 4.32.8 2024-03-18 19:33:44 -05:00
praschke
dcd4f0f6a5 syosetu: make numeric metadata robust against wording changes 2024-03-18 19:33:31 -05:00
Jim Miller
792ab02195 Bump Test Version 4.32.7 2024-03-14 08:58:53 -05:00
praschke
7a87310403 syosetu: typos 2024-03-14 08:56:35 -05:00
praschke
7e070528a1 Add support for kakuyomu.jp 2024-03-14 08:56:35 -05:00
Jim Miller
4f3af1395f Bump Test Version 4.32.6 2024-03-10 20:55:43 -05:00
Jim Miller
1fc4f3d70b Don't try to set imap tags before checking for 'good' update. 2024-03-10 20:55:36 -05:00
Jim Miller
12ee3dae5e Bump Test Version 4.32.5 2024-03-10 16:14:20 -05:00
Jim Miller
cf28bc26f0 adapter_deviantartcom: Add another way to remove comments section. 2024-03-10 16:14:14 -05:00
Jim Miller
bd41796231 Bump Test Version 4.32.4 2024-03-05 08:04:29 -06:00
Jim Miller
f21f039b3a Move new exception catching for metadata errors 2024-03-05 08:04:22 -06:00
Jim Miller
7263f4120c Bump Test Version 4.32.3 2024-03-03 17:21:03 -06:00
Jim Miller
22e0e8da66 Report errors during library update loop better. 2024-03-03 17:20:57 -06:00
Jim Miller
7173bf0803 Fix setting book['tags'] for bgmeta for update AND overwrite. 2024-03-03 17:20:13 -06:00
Jim Miller
7246cdf853 Bump Test Version 4.32.2 2024-03-02 10:45:51 -06:00
Jim Miller
c60b296bc9 SV site change, '...' in paginated threadmarks list 2024-03-02 10:45:43 -06:00
Jim Miller
a8a86533ad Bump Test Version 4.32.1 2024-03-01 15:14:45 -06:00
Jim Miller
d1c5847a58 SV site change, paginated threadmarks list 2024-03-01 15:14:39 -06:00
Jim Miller
68e0d70fcb Bump Release Version 4.32.0 2024-03-01 08:45:36 -06:00
Jim Miller
74b28f7ead Update translations. 2024-03-01 08:44:07 -06:00
Jim Miller
acda805c3c Merge branch 'main' of https://github.com/JimmXinu/FanFicFare 2024-02-28 12:21:10 -06:00
Jim Miller
a37fbbbd51 Bump Test Version 4.31.8 2024-02-28 12:20:41 -06:00
Jim Miller
2cdb6036ea Add Edit personal.ini as a direct menu item--can keyboard shortcut 2024-02-28 12:20:31 -06:00
Jim Miller
77afdc0208
Update README.md 2024-02-27 12:13:31 -06:00
Jim Miller
7e0e68f66f Bump Test Version 4.31.7 2024-02-24 10:35:24 -06:00
Jim Miller
bbec6fcd5f adapter_deviantartcom: Fix for site change and detect no username. Closes #1042 2024-02-24 10:35:11 -06:00
Jim Miller
631fe6c9c9 Bump Test Version 4.31.6 2024-02-18 17:44:19 -06:00
Jim Miller
a86755ad98 Merge branch 'syosetu' of https://github.com/praschke/FanFicFare 2024-02-18 17:24:15 -06:00
praschke
42d2b00007
syosetu: python 2 and beautiful soup compatibility fixes 2024-02-18 21:40:11 +00:00
praschke
ad10cad0b0
syosetu: add all extra metadata to commented titlepage addition 2024-02-18 21:40:06 +00:00
praschke
71d3589ebc
syosetu: remove timezone 2024-02-18 21:39:56 +00:00
praschke
84ed1827be
syosetu: remove suggested japanese labels from ini files 2024-02-18 21:39:40 +00:00
Jim Miller
ce29a6923e Bump Test Version 4.31.5 2024-02-18 14:45:17 -06:00
Jim Miller
d96d194b2b Set book[tags] after writeStory for literotica. 2024-02-18 14:34:11 -06:00
praschke
5cb3bccf45
Add support for syosetu.com 2024-02-18 00:02:00 +00:00
Jim Miller
e6639323b7 Bump Test Version 4.31.4 2024-02-15 10:34:40 -06:00
Jim Miller
f94e0eaf32 Don't need \n after </span> looking for log entries. 2024-02-15 10:34:40 -06:00
Jim Miller
37bcb1284b Don't do random lang anymore in test1--changes series sort in Calibre. 2024-02-13 10:44:21 -06:00
Jim Miller
295bd2e1ab Bump Test Version 4.31.3 2024-02-06 10:21:22 -06:00
Jim Miller
45b4a8d8bf Add include_images:coveronly option for #1037 2024-02-06 10:21:14 -06:00
Jim Miller
cdb60423fe Bump Test Version 4.31.2 2024-02-05 10:47:07 -06:00
Jim Miller
50f913843b adapter_wwwutopiastoriescom: Remove author page get, add extracategories instead. Site static now. 2024-02-05 10:46:57 -06:00
Jim Miller
581d6f6657 adapter_literotica: Allow for empty div.aa_ht tags, remove extra None
from text
2024-02-05 10:36:58 -06:00
Jim Miller
e03f65332a Bump Test Version 4.31.1 2024-02-04 16:48:21 -06:00
Jim Miller
3e9abec817 adapter_wwwutopiastoriescom: Updates for site changes. 2024-02-04 16:48:13 -06:00
Jim Miller
0d8f84ba23 Bump Release Version 4.31.0 2024-02-01 11:26:10 -06:00
Jim Miller
c646419336 Update translations. 2024-02-01 11:25:51 -06:00
Jim Miller
622a4eb44b Change default flaresolverr_proxy_timeout:59000 so it happens before default connect_timeout:60.0 2024-01-30 19:46:49 -06:00
Jim Miller
d4fbc73b41 Bump Test Version 4.30.10 2024-01-30 10:36:48 -06:00
Jim Miller
391f469a99 adapter_deviantartcom: Changed to 2 post login (#1035) and finding story id 2024-01-30 10:35:41 -06:00
Jim Miller
a0ca55d7f6 Bump Test Version 4.30.9 2024-01-29 19:12:43 -06:00
Jim Miller
a4bbe27771 fetcher_flaresolverr: Report errors from Flaresolverr better and fail faster--no retries. 2024-01-29 19:12:32 -06:00
Jim Miller
a5e2d1eb45 Bump Test Version 4.30.8 2024-01-28 14:05:34 -06:00
Moxie
7a89d03339 Add a configuration option to normalize URLs returned from CLI --imap 2024-01-28 14:05:00 -06:00
Jim Miller
ae638fd0a1 Bump Test Version 4.30.7 2024-01-26 15:58:17 -06:00
grenskul
26a59b373a
Update adapter_royalroadcom.py 2024-01-26 21:34:31 +00:00
grenskul
479c0b7d95
Update adapter_royalroadcom.py
owner recommended comits
2024-01-26 20:59:33 +00:00
grenskul
52a0bb6e0e
Update adapter_royalroadcom.py
fix for including "speak: never"; in the style
2024-01-26 20:46:17 +00:00
Jim Miller
f2f333c807 Bump Test Version 4.30.6 2024-01-26 12:32:11 -06:00
grenskul
3f2f2a33d3 Update adapter_royalroadcom.py
forgot the spoilers the first time
2024-01-26 12:31:50 -06:00
grenskul
ba9272822b Update adapter_royalroadcom.py
Added a bypass for Royal Road introducing lines saying "A case of theft: this story is not rightfully on Amazon; if you spot it, report the violation" etc . This is done by finding elements with the "display: none;" style and extracting them
2024-01-26 12:31:50 -06:00
Jim Miller
9575044262 Bump Test Version 4.30.5 2024-01-17 17:33:43 -06:00
Jim Miller
7306e81a30 Fix for site change, adapter_deviantartcom. #1030 2024-01-17 17:33:24 -06:00
Jim Miller
19f9132109 Bump Test Version 4.30.4 2024-01-15 12:20:01 -06:00
Jim Miller
f340ba50da Allow image/comic and poems on literotica 2024-01-15 12:19:51 -06:00
Jim Miller
6e90c7ed7b Bump Test Version 4.30.3 2024-01-14 16:22:07 -06:00
Moxie
0a81bc7c6b Fix selector for xenforo2 stats, needed to pull word count estimate 2024-01-14 16:21:40 -06:00
Jim Miller
f5dd6b90fc Bump Test Version 4.30.2 2024-01-10 13:19:10 -06:00
Jim Miller
e1a9438595 Swap out SuperFastHash implementation #1026 2024-01-10 13:19:02 -06:00
Jim Miller
97a72380e6 Bump Test Version 4.30.1 2023-12-16 09:44:53 -06:00
Jim Miller
a6a3a4e240 Another OTW(AO3) block/hidden story string 2023-12-16 09:41:08 -06:00
Jim Miller
b6b1e6ecdc Bump Release Version 4.30.0 2023-12-01 12:25:25 -06:00
Jim Miller
85cf21a32c Update translations. 2023-12-01 12:24:59 -06:00
Jim Miller
918ed4a23e Bump Test Version 4.29.7 2023-11-26 11:32:48 -06:00
Jim Miller
84d6106a30 Better handling of &<> entities with stripHTML() and chapter titles. #1019 2023-11-26 11:32:41 -06:00
Jim Miller
6761cae9c1 Bump Test Version 4.29.6 2023-11-21 18:52:55 -06:00
Jim Miller
e330ccbe94 SB(but not SV) removed RSS link from thread list title. Closes #1017 2023-11-21 18:52:46 -06:00
Jim Miller
da7059e978 Bump Test Version 4.29.5 2023-11-20 11:41:56 -06:00
Jim Miller
893345dc33 adapter_storiesonlinenet: Allow /n/ as well as /s/ paths 2023-11-20 11:41:50 -06:00
Jim Miller
9fcc6fe68a Bump Test Version 4.29.4 2023-11-20 09:34:49 -06:00
Brian
0c02f17d67 Update adapter_storiesonlinenet.py
Fix issue introduced in initial fix for issue #1013 for paid subscribers that include download link and URL link in same list
2023-11-20 09:34:37 -06:00
Jim Miller
11c8805f4c Bump Test Version 4.29.3 2023-11-19 12:46:22 -06:00
Jim Miller
cf065fa706 adapter_fanfictionnet: Only use data-original cover images. 2023-11-19 12:46:16 -06:00
Jim Miller
3c94c9d308 Bump Test Version 4.29.2 2023-11-19 09:54:43 -06:00
Jim Miller
831bea725f adapter_storiesonlinenet: Update for chapter URL change. See #1013 2023-11-19 09:49:46 -06:00
Jim Miller
b748283484 Comment about why we're keeping a typo. See #1011 2023-11-15 10:48:39 -06:00
Jim Miller
28af7e1722 Bump Test Version 4.29.1 2023-11-15 08:50:27 -06:00
Jim Miller
1673da5a4b BrowserCache should ignore usecache flag, that's for BasicCache. 2023-11-15 08:50:20 -06:00
Jim Miller
c97c0e822d Suppress a debug output except when it matters. 2023-11-15 08:49:40 -06:00
Jim Miller
ce24ac70d9 Bump Release Version 4.29.0 2023-11-01 11:41:05 -05:00
Jim Miller
9ab4739710 Update translations. 2023-10-24 20:59:23 -05:00
Jim Miller
685084e711 Add use_flaresolverr_proxy:directimages comment to defaults.ini #1007 2023-10-24 20:57:47 -05:00
Jim Miller
dd049ac297 Bump Test Version 4.28.8 2023-10-21 09:50:16 -05:00
Jim Miller
516f7464b7 Update messages for translations 2023-10-21 09:47:17 -05:00
Jim Miller
46be37e034 Retry Calibre metadata update when it fails due to conflicting program(Windows File Explorer) 2023-10-21 09:15:54 -05:00
Jim Miller
693f0aa774 Bump Test Version 4.28.7 2023-10-19 13:01:54 -05:00
Jim Miller
646693ca3e Change bs4.find(text=) to string= for deprecation change. 2023-10-19 13:01:47 -05:00
Jim Miller
22534986d3 adapter_asianfanficscom: Fix for 'friend only' stories initially giving 404 2023-10-19 12:08:12 -05:00
Jim Miller
18b183585a Tweaks to use_flaresolverr_proxy:directimages 2023-10-19 12:06:32 -05:00
Jim Miller
5862ba627e Bump Test Version 4.28.6 2023-10-18 12:13:27 -05:00
Jim Miller
c38f4ab400 Add use_flaresolverr_proxy:directimages experimental for #1007 2023-10-18 12:13:27 -05:00
Jim Miller
f5c9fcf029 Comment out a debug 2023-10-18 12:13:22 -05:00
Jim Miller
9e206d2215 Comment out some debugs. 2023-10-16 11:05:14 -05:00
Jim Miller
b1b2451fa6 Fixes for poor '\' escapes that give SyntaxWarning 2023-10-13 17:51:52 -05:00
Jim Miller
91f2f84c10 Remove tests for removed site wuxiaworld.site 2023-10-13 17:43:59 -05:00
Jim Miller
16ba74c98e Bump Test Version 4.28.5 2023-10-13 16:02:03 -05:00
Jim Miller
0cc3b81580 ConfigParser.read_file added in py3.2, readfp removed in py3.12, only used in plugin #1006 2023-10-13 16:01:48 -05:00
Jim Miller
c769900332 Bump Test Version 4.28.4 2023-10-12 20:43:42 -05:00
Jim Miller
a84e6ab385 Update missing image library message. 2023-10-12 20:41:58 -05:00
Jim Miller
af163c27e0 Update six.py to 1.16 for Python 3.12 compatibility. #1006 2023-10-12 20:41:48 -05:00
Jim Miller
016452ec89 Fix a spacing in ini 2023-10-12 11:30:05 -05:00
Jim Miller
b584779a13 Bump Test Version 4.28.3 2023-10-10 12:41:05 -05:00
Jim Miller
01d97ed770 Add base_adapter.img_url_trans() for adapter_fictionlive image URLs #1004 2023-10-10 12:40:49 -05:00
Jim Miller
607ef27fe1 Bump Test Version 4.28.2 2023-10-07 09:29:28 -05:00
Jim Miller
448a9cfaef Pillow minimum version for CLI 2023-10-07 09:29:20 -05:00
Jim Miller
88fb6069fc Bump Test Version 4.28.1 2023-10-07 09:23:23 -05:00
Jim Miller
cd5fd2cab4 Pillow change for CLI, closes #1002 2023-10-07 09:23:04 -05:00
Jim Miller
a21fcf7e77 Bump Release Version 4.28.0 2023-10-02 13:34:29 -05:00
Jim Miller
627a8dbff5 Update translations. 2023-10-02 13:33:49 -05:00
Jim Miller
dd1207f11e Bump Test Version 4.27.4 2023-09-21 20:01:15 -05:00
Jim Miller
49aec452ca adapter_fanficsme: More fixes for unusual cases, now using regexp #999 2023-09-21 20:01:08 -05:00
Jim Miller
e033f71ece Bump Test Version 4.27.3 2023-09-21 11:45:34 -05:00
Jim Miller
62b097f3d5 adapter_fanficsme: Fixes for some unusual cases Closes #999 2023-09-21 11:44:22 -05:00
Jim Miller
3098c1983f Bump Test Version 4.27.2 2023-09-20 11:29:07 -05:00
Jim Miller
37626680f9 Refactor adastrafanfic.com to use base_otw_adapter 2023-09-20 11:29:07 -05:00
Jim Miller
d99fe607da Refactor to make base_otw_adapter 2023-09-20 11:29:07 -05:00
Jim Miller
c80f22cdd3 Remove site: noveltrove.com - Site broken +1 years, owner unresponsive 3years Closes #998 2023-09-20 11:29:07 -05:00
Jim Miller
0b6402ca8a Remove site: hlfiction.net - Site broken ~3 years (sql errors) 2023-09-20 11:29:07 -05:00
Jim Miller
26a7633337 Remove site: worldofx.de - Changed ~2years ago incompatibly, not efiction 2023-09-20 11:29:07 -05:00
Jim Miller
3ee7614441 Remove site: archive.skyehawke.com - Domain parked, broken ~3years 2023-09-20 11:29:07 -05:00
Jim Miller
718ae6ac83 Remove site: www.destinysgateway.com - Domain parked +1year 2023-09-20 11:29:07 -05:00
Jim Miller
e0686eada2 Remove site: merengo.hu - Doesn't serve text in full print +1year 2023-09-20 11:29:07 -05:00
Jim Miller
9f1fd42889 Remove site: www.scarvesandcoffee.net - Changed +2years ago incompatibly 2023-09-20 11:29:07 -05:00
Jim Miller
a088a34c89 Remove site: www.silmarillionwritersguild.org - Changed +2years ago incompatibly 2023-09-20 11:29:07 -05:00
Jim Miller
14cdc10ee3 Remove site: www.lushstories.com - Changed +2years ago incompatibly
Closes #988
2023-09-20 11:28:45 -05:00
Jim Miller
8667643e7c Remove site: www.lotrgfic.com - DNS removed +1year ago 2023-09-20 10:49:57 -05:00
Jim Miller
e6d123a17d Switch from setup.py to pyproject.toml for CLI packaging. 2023-09-05 12:48:50 -05:00
Jim Miller
ae28b714b3 Bump Test Version 4.27.1 2023-09-05 10:30:22 -05:00
Jim Miller
33cd1642f8 Explicitly call set_image_allocation_limit() for larger image buffer and error on 0x0 image from image_and_format_from_data() 2023-09-05 10:29:34 -05:00
Jim Miller
63ec69f9f2 Log calibre, etc version data in FFF BG job. 2023-09-05 10:12:37 -05:00
Jim Miller
20ea9a00ed Bump Release Version 4.27.0 2023-09-01 08:30:19 -05:00
Jim Miller
779222b66d Bump Test Version 4.26.5 2023-08-24 08:10:53 -05:00
Jim Miller
afb2b9fe29 AO3: Real fix for adult string change, revert earlier attempted fixes. 2023-08-24 08:10:45 -05:00
Jim Miller
20052e1922 Bump Test Version 4.26.4 2023-08-23 14:59:03 -05:00
Jim Miller
e03f3f40da AO3: Don't assume div preface always present in chapters. 2023-08-23 14:58:55 -05:00
Jim Miller
00f6656d7d Bump Test Version 4.26.3 2023-08-23 13:24:32 -05:00
Jim Miller
dd2c1a48b5 AO3: Don't assume chapter userstuff module always present. 2023-08-23 13:24:27 -05:00
Jim Miller
a37588a8f7 Bump Test Version 4.26.2 2023-08-23 08:47:36 -05:00
Jim Miller
fc99805a85 AO3: Don't assume byline always present. 2023-08-23 08:47:29 -05:00
Jim Miller
d73b1732d3 Bump Test Version 4.26.1 2023-08-17 23:12:14 -05:00
Jim Miller
043fb289bf Fix for extratags not being picked up by include_in_subjects 2023-08-17 23:12:06 -05:00
Jim Miller
a0332f27be Bump Release Version 4.26.0 2023-08-17 12:06:13 -05:00
Jim Miller
99285763d3 adapter_royalroadcom: user found a story with no chapters 2023-08-16 12:06:00 -05:00
Jim Miller
26467d8f35 Bump Test Version 4.25.14 2023-08-05 16:38:04 -05:00
Jim Miller
930ba5bb19 base_efiction: .string -> stripHTML for nested tags Closes #984 2023-08-05 16:18:47 -05:00
Jim Miller
fb552c823a Bump Test Version 4.25.13 2023-07-27 08:29:51 -05:00
burny2
bfc0c4f3ef Fix fanfiktionde status parsing 2023-07-27 08:27:48 -05:00
Jim Miller
216cb27f03 Bump Test Version 4.25.12 2023-07-26 11:47:36 -05:00
Jim Miller
21a5ded593 AO3: Make subscribed, markedforlater True/False to match bookmarked/bookmarkprivate/bookmarkrec 2023-07-26 11:47:00 -05:00
Jim Miller
ff07987a02 Bump Test Version 4.25.11 2023-07-25 17:54:45 -05:00
Jim Miller
bd6afdafb8 AO3: Add subscribed, markedforlater 2023-07-25 17:54:37 -05:00
Jim Miller
fd7c5ac867 Add title replace_metadata(commented) for literotica Ch/Pt titles 2023-07-25 16:47:32 -05:00
Jim Miller
87eb84b5fa Bump Test Version 4.25.10 2023-07-23 16:29:36 -05:00
Jim Miller
784cb711d8 Update comments for include_subject_tags x_LIST 2023-07-23 16:29:30 -05:00
Jim Miller
54a00a934b Add _LIST option to include_subject_tags(/extra_subject_tags), refactor 2023-07-23 16:09:54 -05:00
Jim Miller
c638ac8457 Bump Test Version 4.25.9 2023-07-22 09:40:28 -05:00
Jim Miller
b710a4cdc7 Filter cookies for flaresolverr 2023-07-22 09:40:22 -05:00
Jim Miller
16c8c6b445 Bump Test Version 4.25.8 2023-07-22 08:48:08 -05:00
Jim Miller
5cee35149f AO3 fix for protected email addr in chapter name breaking chapter datetime 2023-07-22 08:43:36 -05:00
Jim Miller
de201c7263 Remove some test1.com default settings 2023-07-21 20:39:10 -05:00
Jim Miller
222a4f4828 Bump Test Version 4.25.7 2023-07-21 17:47:04 -05:00
Jim Miller
7d6af47f60 Fix for #979, AO3 Get URLs from Page 2023-07-21 17:46:56 -05:00
Jim Miller
1c05d58d1a Bump Test Version 4.25.6 2023-07-19 11:14:02 -05:00
Jim Miller
8152b51353 adapter_storiesofardacom: Fix for detecting adult question on indiv chapters. 2023-07-19 11:13:18 -05:00
Jim Miller
d387eafff2 Bump Test Version 4.25.5 2023-07-18 08:41:29 -05:00
Rose Davidson
fe5605ea50 Add support for www.sunnydaleafterdark.com
This is an EFiction style site, focusing on Buffy the Vampire Slayer fics with the Buffy/Spike ship.

There are a few quirks about how the site shows the infobox metadata.
2023-07-18 08:40:29 -05:00
Jim Miller
7f97decb8a Bump Test Version 4.25.4 2023-07-15 17:06:20 -05:00
Jim Miller
cfd28dd1ff Add anthology_merge_keepsingletocs option, requires new EpubMerge. 2023-07-15 17:06:20 -05:00
Jim Miller
2c43eab432 Use anthology url for site config section 2023-07-15 16:15:30 -05:00
Jim Miller
fda597ddae Bump Test Version 4.25.3 2023-07-12 12:48:24 -05:00
Jim Miller
7502c0f2fb Apply mark_new_chapters to new story chapters in Anthologies. #977 2023-07-12 12:48:13 -05:00
Jim Miller
eaeeda6911 Allow mark_new_chapters when 1 chapter in case it changes. 2023-07-12 10:26:15 -05:00
Jim Miller
8850c1a62b Increase sleep times between cache checks using open_pages_in_browser. 2023-07-08 13:32:02 -05:00
Jim Miller
0205ec4ccb Bump Test Version 4.25.2 2023-07-08 13:16:36 -05:00
Jim Miller
2600bf7be5 adapter_literotica: 'Fix' clean_chapter_titles for titles ending with Pt or Ch 2023-07-08 13:13:42 -05:00
Jim Miller
012ff40f0f Bump Test Version 4.25.1 2023-07-04 18:48:34 -05:00
Jim Miller
0df9e39931 Fix for ficbook.net date change. Closes #973 2023-07-04 18:48:24 -05:00
Jim Miller
97fcc3af33 Bump Release Version 4.25.0 2023-07-03 15:38:14 -05:00
Jim Miller
be40433377 Bump Test Version 4.24.9 2023-06-29 17:23:13 -05:00
Jim Miller
a1f29cb034 Fix for specific cover error. 2023-06-29 17:23:06 -05:00
Jim Miller
b2b584d832 Bump Test Version 4.24.8 2023-06-26 19:34:01 -05:00
Jim Miller
415cd6597e Fix for make_firstimage_cover causing embedded image to also use cover.jpg. 2023-06-26 19:33:47 -05:00
Jim Miller
d1d5d61b87 Bump Test Version 4.24.7 2023-06-24 14:45:14 -05:00
Jim Miller
2c11ecc5c8 adapter_wuxiaworldxyz: Paginated TOC 2023-06-24 14:45:00 -05:00
Jim Miller
0ac66425f8 Bump Test Version 4.24.6 2023-06-18 17:27:09 -05:00
Jim Miller
367d3e4435 Put output_css after workskin so it can override. See #967 2023-06-18 17:26:30 -05:00
Jim Miller
05b7147e64 Fix whitespace 2023-06-18 17:25:51 -05:00
Jim Miller
200c877418 Fix whitespace 2023-06-18 17:04:48 -05:00
Jim Miller
84323c1608 Use site lists for shared config entries in personal.ini 2023-06-18 16:53:42 -05:00
niacdoial
3ba2edef2d
improved config mechanism to include workskin 2023-06-18 18:43:31 +02:00
niacdoial
e5cc1cccf2
add ability to download AO3 workskin if scaping styles is enabled 2023-06-18 17:15:05 +02:00
Jim Miller
c50ffc40dc Bump Test Version 4.24.5 2023-06-17 17:00:54 -05:00
Jim Miller
1f8106c1f3 fix_relative_text_links fix for #anchors doesn't handle '.' in id. Fixes #966 refer #952 2023-06-17 17:00:40 -05:00
Jim Miller
d9ca72571e Bump Test Version 4.24.4 2023-06-17 10:42:16 -05:00
Jim Miller
ecb0620929 Make Rejects List Note column orderable. 2023-06-17 10:41:19 -05:00
Jim Miller
c6b381e61a Bump Test Version 4.24.3 2023-06-14 13:24:06 -05:00
mvlcek
faf352bf80 Login now has a hidden token. 2023-06-14 11:54:26 -05:00
Jim Miller
269b7d5bd1 Bump Test Version 4.24.2 2023-06-14 09:51:49 -05:00
Jim Miller
439d617364 AO3 Check for hidden work after login. 2023-06-14 09:51:28 -05:00
Jim Miller
d0c85feda5 Bump Test Version 4.24.1 2023-06-12 16:48:00 -05:00
Jim Miller
25ebc603e7 Allow for href='' in fix_relative_text_links processing. 2023-06-12 16:47:54 -05:00
Jim Miller
1683d950c3 Cleanup changes from #958 2023-06-08 08:09:40 -05:00
chocolatechipcats
961bb28ecd Update plugin-defaults.ini
Updated the always-login comment to mention hide warnings/additional tags.
2023-06-08 08:05:24 -05:00
chocolatechipcats
bbb3db31a8 Update defaults.ini
Updated the always-login comment to mention hide warnings/additional tags.
2023-06-08 08:05:24 -05:00
chocolatechipcats
c917c5da3d Update plugin-defaults.ini
Added note about login-restricted stories in include_in_genre.
2023-06-08 08:05:24 -05:00
chocolatechipcats
edc2056e75 Update defaults.ini
Added note about login-restricted stories in include_in_genre.
2023-06-08 08:05:24 -05:00
chocolatechipcats
84b7cbcda2 Update defaults.ini
Added note about login-restricted stories in include_in_genre.
2023-06-08 08:05:24 -05:00
Jim Miller
44484670f2 Bump Release Version 4.24.0 2023-06-02 08:40:56 -05:00
Jim Miller
0b442422ab Bump Test Version 4.23.6 2023-05-27 20:21:25 -05:00
Jim Miller
d0448af52e AO3: Apparently minor change to chapter end note HTML. #956 2023-05-27 20:20:41 -05:00
Jim Miller
e82585ecc7 Bump Test Version 4.23.5 2023-05-27 16:36:41 -05:00
Jim Miller
ff36bd30c5 Fix force_cover_image when already in story 2023-05-27 16:36:33 -05:00
Jim Miller
12b2117c77 Fix wuxiaworld.xyz specific setting section 2023-05-25 10:04:49 -05:00
Jim Miller
34ec532eed Bump Test Version 4.23.4 2023-05-21 10:44:41 -05:00
Jim Miller
2fa23ce9fd wuxiaworld.co -> wuxiaworld.xyz changes most data collection #953 2023-05-21 10:44:22 -05:00
Jim Miller
8399061dc9 Bump Test Version 4.23.3 2023-05-08 21:53:45 -05:00
Jim Miller
86ab2806fa fix_relative_text_links: Keep #anchor links if target also in chapter. See #952 2023-05-08 21:51:42 -05:00
Jim Miller
6f77504ca9 Fix Update Always metadata in BG logic 2023-05-08 19:47:46 -05:00
Jim Miller
a259297092 Bump Test Version 4.23.2 2023-05-06 21:34:32 -05:00
Jim Miller
2c662b6f33 Add order_chapters_by_date option for literotica.com, used to be hard coded. 2023-05-06 21:34:17 -05:00
Jim Miller
548d6a5a58 Bump Test Version 4.23.1 2023-05-04 19:24:12 -05:00
Jim Miller
f3d2513d32 Add force_update_epub_always option to update when EPUB has more chapters than source. See #950 #949 #942. 2023-05-04 19:23:18 -05:00
Jim Miller
8b20756095 Bump Release Version 4.23.0 2023-05-02 08:37:33 -05:00
Jim Miller
8f093769ce Bump Test Version 4.22.7 2023-04-27 13:38:53 -05:00
Jim Miller
f6dafecfa1 Change force_img_referer to force_img_self_referer_regexp See #940 #941 2023-04-27 13:37:13 -05:00
Jim Miller
98f95a7da8 adapter_deviantartcom: Another detect login string *properly*. #947 2023-04-27 13:18:59 -05:00
Jim Miller
f3d373c8ca Change force_img_referer to force_img_referer See #940 #941 2023-04-27 09:06:54 -05:00
Jim Miller
536ff35d66 Bump Test Version 4.22.6 2023-04-27 08:37:17 -05:00
Jim Miller
6d31c5fb94 adapter_deviantartcom: Another detect login string. #947 2023-04-27 08:36:58 -05:00
Jim Miller
5730d3583a Bump Test Version 4.22.5 2023-04-18 10:07:20 -05:00
Jim Miller
da64336967 Show INI highlighting under [storyUrl] sections 2023-04-18 10:07:09 -05:00
Jim Miller
480311c442 Update translations. 2023-04-17 10:32:32 -05:00
Jim Miller
8b44e3d4b6 adapter_quotevcom: Some stories have no comments. 2023-04-17 09:58:34 -05:00
Jim Miller
9049625ec2 Bump Test Version 4.22.4 2023-04-08 21:34:29 -05:00
Jim Miller
d8c70ceae2 Don't try to set seriesUrl when no series (Anthologies) 2023-04-08 21:33:59 -05:00
Jim Miller
95bb8a0c7f Bump Test Version 4.22.3 2023-04-06 14:49:33 -05:00
Jim Miller
9b1a64616b Add force_img_referer optional feature. Closes #940 2023-04-06 14:48:52 -05:00
Jim Miller
8a6894fa28 Fix to allow update epub to get missing images. 2023-04-06 12:27:21 -05:00
Jim Miller
7c4e819c93 Add comment about why cover not read from epub on update 2023-04-06 12:03:21 -05:00
Jim Miller
9bedeb55a0 Add to AO3 authorUrl comments. 2023-04-05 10:20:48 -05:00
Jim Miller
6c92d45d97 Merge branch 'main' of https://github.com/JimmXinu/FanFicFare 2023-04-05 10:14:16 -05:00
chocolatechipcats
c7c029c706 missed a line... 2023-04-05 10:14:11 -05:00
chocolatechipcats
6fec02f79e missed a line... 2023-04-05 10:14:11 -05:00
chocolatechipcats
fc3e8bb8ff Replacing orphan_account regex 2023-04-05 10:14:11 -05:00
chocolatechipcats
3f52734da2 Replacing orphan_account regex 2023-04-05 10:14:11 -05:00
chocolatechipcats
cde8a739fb orphan_account authorUrl
This replaces the orphan_account authorUrl (which 404's) with a link to the AO3 homepage
2023-04-05 10:14:11 -05:00
chocolatechipcats
dc5837badb orphan_account authorUrl
This replaces the orphan_account authorUrl (which 404's) with a link to the AO3 homepage
2023-04-05 10:14:11 -05:00
Jim Miller
43a2d5cd67 Series name can also have [ in it. 2023-04-04 17:14:18 -05:00
Jim Miller
2c0a1d1046 Don't use Raw series with calibre_series_meta. 2023-04-04 16:56:14 -05:00
Jim Miller
64aaaf6daa Bump Test Version 4.22.2 2023-04-03 16:49:03 -05:00
Jim Miller
dd2a076b6f Add static include_in_* when double quoted. 2023-04-02 19:06:54 -05:00
Jim Miller
cf7f84c886 Set Calibre Series URL link. 2023-04-01 20:46:31 -05:00
Jim Miller
98a5a120c1 Bump Test Version 4.22.1 2023-04-01 20:46:31 -05:00
Jim Miller
77d35d88c7 Anthologies don't need per-story config(custom_columns_settings) 2023-04-01 20:46:31 -05:00
Jim Miller
f25ed9efbb Bump Release Version 4.22.0 2023-04-01 10:14:18 -05:00
Jim Miller
de7e4803a3 Bump Test Version 4.21.8 2023-03-29 22:38:59 -05:00
Jim Miller
1516b100d2 Remove site: merlinfic.dtwins.co.uk 'This site has been removed due to PHP compatibility issues.' 2023-03-29 22:34:57 -05:00
Jim Miller
7ff2976dfe Remove site: tasteofpoison.inkubation.net broken ~3 years 2023-03-29 22:32:02 -05:00
Jim Miller
f4426d0532 Remove site: www.andromeda-web.com broken ~3 years 2023-03-29 22:31:13 -05:00
Jim Miller
f4fbbf0d34 Remove site: sword.borderline-angel.com broken ~3 years 2023-03-29 22:29:14 -05:00
Jim Miller
57cf738df5 Remove site: www.qaf-fic.com broken ~3 years 2023-03-29 22:27:50 -05:00
Jim Miller
edb09d1a7e Remove site: buffygiles.velocitygrass.com broken ~3 years 2023-03-29 22:26:41 -05:00
Jim Miller
84c5e245e6 Remove site: trekiverse.org broken ~3 years 2023-03-29 22:24:49 -05:00
Jim Miller
95cece7e9c Remove site: archive.shriftweb.org broken ~4 years 2023-03-29 19:54:17 -05:00
Jim Miller
ea345b059d Remove site: csi-forensics.com - SSL expired +1yr, broken ~3 years 2023-03-29 19:50:49 -05:00
Jim Miller
6ca6d47066 Remove site: www.wraithbait.com - SSL expired +1yr, broken ~3 years 2023-03-29 19:49:29 -05:00
Jim Miller
fea04ed16c Remove site: www.ik-eternal.net - No DNS, broken ~18 months 2023-03-29 19:46:26 -05:00
Jim Miller
84b3b6d61e Remove site: themaplebookshelf.com - No DNS, broken ~18 months 2023-03-29 19:45:09 -05:00
Jim Miller
4f0be16f0b Remove site: www.looselugs.com - No DNS, broken ~3 years 2023-03-29 19:43:26 -05:00
Jim Miller
f8fc1a2881 Remove site: fanfic.potterheadsanonymous.com - No DNS, broken ~3 years 2023-03-29 19:42:02 -05:00
Jim Miller
f9471377bb Remove site: sugarquill.net - Site retired 2023-03-29 17:56:12 -05:00
Jim Miller
152088de87 adapter_thehookupzonenet: Fix changed date format 2023-03-29 17:49:01 -05:00
Jim Miller
82702ea958 Update translations. 2023-03-25 09:54:36 -05:00
Jim Miller
3432a786d5 Bump Test Version 4.21.7 2023-03-21 21:17:14 -05:00
Jim Miller
4fd8972f6a adapter_ficbooknet: Fix for site change. 2023-03-21 21:17:09 -05:00
Jim Miller
e4847653c6 Bump Test Version 4.21.6 2023-03-21 13:30:16 -05:00
Jim Miller
6e73c7400a adapter_wattpadcom: Fix accidentally hardcoding a story in. 2023-03-21 13:16:20 -05:00
Jim Miller
5c40f4073a Bump Test Version 4.21.5 2023-03-20 11:18:25 -05:00
Jim Miller
da3777a0ca Changes to wattpad API mapping chapter URLs to story URLs. 2023-03-20 11:18:20 -05:00
Jim Miller
dd636bb55f Bump Test Version 4.21.4 2023-03-15 10:24:59 -05:00
Jim Miller
6fcfdaabf3 Remove inline ads, only seen with flaresolverr so far 2023-03-15 10:24:51 -05:00
Jim Miller
e26eb9d9cc Bump Test Version 4.21.3 2023-03-14 16:47:25 -05:00
Jim Miller
732d40f5c8 Fix for custom columns [storyUrl] sections. 2023-03-14 16:46:58 -05:00
Jim Miller
814cf2931c Bump Test Version 4.21.2 2023-03-09 13:42:49 -06:00
Jim Miller
5e4f041509 Remove doubled doreplacements/removeallentities from author(etc)HTML processing. 2023-03-09 13:40:02 -06:00
Jim Miller
8862ec985f Bump Test Version 4.21.1 2023-03-07 10:44:17 -06:00
Jim Miller
c887697d61 AO3: Better fix for always_reload_first_chapter vs use_view_full_work
Fixes #932
This reverts commit a2e9d29cf6.
2023-03-07 10:39:30 -06:00
Jim Miller
30115980af adapter_fictionmaniatv: Site change for status, fixes #931 2023-03-07 09:55:13 -06:00
Jim Miller
be057e296f Bump Release Version 4.21.0 2023-03-01 11:22:40 -06:00
Jim Miller
a5d42e07c9 Bump Test Version 4.20.7 2023-02-26 16:49:02 -06:00
Hazel Shanks
6484f588e4 fix #922 -- call utf8fromSoup exactly once 2023-02-26 16:38:08 -06:00
chocolatechipcats
83a5c28d71 Update defaults.ini 2023-02-24 17:59:37 -06:00
chocolatechipcats
96a129a70f Update plugin-defaults.ini 2023-02-24 17:59:37 -06:00
Jim Miller
51e6892a5e Bump Test Version 4.20.6 2023-02-24 16:21:50 -06:00
Jim Miller
47ad5c1e1f adapter_royalroadcom: Fixes for site changes. #923 2023-02-24 16:19:52 -06:00
Jim Miller
bdb90941d3 Bump Test Version 4.20.5 2023-02-19 20:43:47 -06:00
Jim Miller
a2e9d29cf6 AO3: Re-soup full_work on every chapter to avoid problems with soup changes. Found with always_reload_first_chapter:true 2023-02-19 20:43:28 -06:00
Jim Miller
b43bec4126 Import brotlidecpy directly for A-shell on iOS users. 2023-02-17 14:23:46 -06:00
Jim Miller
5992f835fb defaults.ini: Add comment about setting int/float custom columns to None 2023-02-12 17:07:41 -06:00
Jim Miller
263c840f30 Minor fix in exception processing 2023-02-11 16:40:02 -06:00
Jim Miller
7786b1b5a9 Bump Test Version 4.20.4 2023-02-11 13:39:16 -06:00
Jim Miller
b1ce5f8956 adapter_thesietchcom: Fix for site more closely following XenForo2 2023-02-11 13:38:37 -06:00
Jim Miller
5e6ab494b9 Bump Test Version 4.20.3 2023-02-10 10:00:22 -06:00
Jim Miller
b99560acca FlareSolverr: novelfull.com sometimes w/o expires of any kind 2023-02-10 09:20:22 -06:00
Jim Miller
b146552e39 Bump Test Version 4.20.2 2023-02-05 09:48:18 -06:00
Jim Miller
8468a502bb Add style attr by default to fiction.live 2023-02-05 09:48:18 -06:00
Jim Miller
1b96617c78 adapter_fictionlive: Soup chapter text to fix up HTML 2023-02-05 09:48:18 -06:00
Jim Miller
7ac179e068 Bump Test Version 4.20.1 2023-02-03 12:20:13 -06:00
Jim Miller
f29f3f973a Comment out some debug 2023-02-03 09:00:06 -06:00
Jim Miller
e775bd451d Bump Release Version 4.20.0 2023-02-02 10:29:04 -06:00
Jim Miller
bef71a49b6 Bump Test Version 4.19.16 2023-01-26 09:46:42 -06:00
Jim Miller
e5ab3e1d0c Fixes for adapter_fictionlive story URLs-normalize & skip unsub URL 2023-01-26 09:46:35 -06:00
Jim Miller
bb06ffdaea Bump Test Version 4.19.15 2023-01-23 10:58:41 -06:00
Jim Miller
5ce7aa5c48 Merge branch 'bugmaschine-main' 2023-01-23 10:58:03 -06:00
Jim Miller
85450360de Fixes for #910 adapter_deviantartcom date changes. 2023-01-23 10:55:18 -06:00
Bugmaschine
ec6873f95f C
This should be bette
2023-01-23 10:55:18 -06:00
Bugmaschine
e4d5b61ef6 Modified code to use existing parse_relative_date_string function 2023-01-23 10:55:18 -06:00
Bugmaschine
644bd369e4 Did some testing, Should work 2023-01-23 10:55:18 -06:00
Bugmaschine
dede2376c3 Looks cleaner 2023-01-23 10:55:18 -06:00
Bugmaschine
2bd727bec2 Should work, but it is not clean 2023-01-23 10:55:18 -06:00
Jim Miller
50c85d4835 Revamp retries for browser cache with open_pages_in_browser 2023-01-23 10:39:17 -06:00
Jim Miller
7103630e55 Bump Test Version 4.19.14 2023-01-20 09:58:22 -06:00
Jim Miller
a31d58bca3 Fix for &amp; in chapter title. 2023-01-20 09:58:16 -06:00
Jim Miller
6ae424d3ff Bump Test Version 4.19.13 2023-01-17 21:28:16 -06:00
Jim Miller
3b703da1f3 Add r_anthmax/n_anthmax options for custom_columns_settings 2023-01-17 21:28:10 -06:00
Bugmaschine
6695f23079 Fixed the Deviantart adapter not detecting that a Deviation is Marked as needing a login 2023-01-17 13:33:25 -06:00
Jim Miller
5d4d8e6239 Bump Test Version 4.19.12 2023-01-15 14:47:02 -06:00
Jim Miller
b14590c112 Skip day of week for localization in browsercache_firefox2 2023-01-15 14:46:51 -06:00
Jim Miller
e11e09f935 Bump Test Version 4.19.11 2023-01-15 13:17:55 -06:00
Jim Miller
4e0aa707b9 Move makeDate to dateutils to call from browsercache_firefox2 2023-01-15 13:17:50 -06:00
Jim Miller
0845deb095 Bump Test Version 4.19.10 2023-01-14 10:01:14 -06:00
Jim Miller
2719705a1a adapter_mediaminerorg: Updates for site changes 2023-01-14 10:00:57 -06:00
Jim Miller
346da2cdee Report site unavailable for AO3. Closes #908 2023-01-13 17:16:43 -06:00
Jim Miller
db39aaf4ff Bump Test Version 4.19.9 2023-01-13 10:37:09 -06:00
Jim Miller
22ea1d4a15 adapter_fastnovelsnet: Fixes for site changes -- tested with use_flaresolverr_proxy 2023-01-13 10:36:53 -06:00
Jim Miller
4365e852fe Remove a debug 2023-01-13 10:35:42 -06:00
Jim Miller
6a474eb0a0 Bump Test Version 4.19.8 2023-01-12 12:05:38 -06:00
Jim Miller
020d8d9e5b Update language->langcode mapping for updated AO3 list 2023-01-12 12:05:18 -06:00
Fedor Suchkov
220ca33cc9 Added fanficfare.fetchers to packages in setup.py. 2023-01-12 10:36:40 -06:00
Jim Miller
2cee4cca06
Merge pull request #905 from JimmXinu/browsercache
Browser Cache Refactor & open_pages_in_browser feature
2023-01-11 11:14:58 -06:00
Jim Miller
a31ace8032 Bump Test Version 4.19.7 2023-01-11 11:00:35 -06:00
Jim Miller
6d0495eab8 Add open_pages_in_browser setting to defaults.ini 2023-01-11 11:00:29 -06:00
Jim Miller
6d6457a32f Bump Test Version 4.19.6 2023-01-06 13:49:34 -06:00
Jim Miller
befe0e5254 Tweak (undoc) setting name open_pages_in_browser_tries_limit 2023-01-06 13:41:55 -06:00
Jim Miller
2c41230b74 Bump Test Version 4.19.5 2023-01-06 13:31:50 -06:00
Jim Miller
0e1e92750c Use scheme in cache keys for http vs https 2023-01-06 13:29:53 -06:00
Jim Miller
b27854b8a5 Newer default date due to timezones 2023-01-06 13:05:51 -06:00
Jim Miller
2c504ae67e Change header labels to all lowercase. 2023-01-06 13:04:56 -06:00
Jim Miller
24d02895ef Bump Test Version 4.19.4 2023-01-06 12:53:02 -06:00
Jim Miller
01887e37b4 Tweak browser cache timings 2023-01-06 12:33:59 -06:00
Jim Miller
628f76c20a Fix first time browser cache sleep 2023-01-06 12:23:17 -06:00
Jim Miller
f31e7b1860 Fix for fictionpress.com 2023-01-06 12:22:48 -06:00
Jim Miller
073d52a17c Bump Test Version 4.19.3 2023-01-06 12:03:18 -06:00
Jim Miller
eac3531f31 Tweak open_pages_in_browser timings. 2023-01-06 12:02:48 -06:00
Jim Miller
7873e25779 Make py2.7 compatible. 2023-01-06 11:29:43 -06:00
Jim Miller
f468611b01 Firefox2 cache dates, convert UTC to local 2023-01-06 11:18:08 -06:00
Jim Miller
d3aea54b6c Bump Test Version 4.19.2 2023-01-05 14:19:01 -06:00
Jim Miller
1d5afe8cd6 Wrap browser cache in thread lock just in case. 2023-01-05 14:07:34 -06:00
Jim Miller
91d6aacc74 open_browser_pages_tries_limit basic implementation 2023-01-05 13:12:39 -06:00
Jim Miller
0036ba94d9 Tweak debug output 2023-01-05 13:11:54 -06:00
Jim Miller
3711663a12 Take firefox cached time from response header. 2023-01-05 13:11:17 -06:00
Jim Miller
7e2eb531ba Comment some debugs, tweak browser cache to do normal sleep after open browser 2023-01-02 19:24:40 -06:00
Jim Miller
39cca07432 Bump Test Version 4.19.1 2023-01-01 13:06:22 -06:00
Jim Miller
001cdd34c7 Tweaks to browser cache 2023-01-01 13:06:03 -06:00
Jim Miller
4cb0201970 open_pages_in_browser setting 2023-01-01 13:06:03 -06:00
Jim Miller
56da4a2850 Fix EmailPassDialog 2023-01-01 13:06:03 -06:00
Jim Miller
f613fea791 Location: and location: headers both used... 2023-01-01 13:06:03 -06:00
Jim Miller
ccd25b0c93 Only apply open_page_in_browser when use_browser_cache_only:true 2023-01-01 13:06:03 -06:00
Jim Miller
60c14c2cef Add key list to browser cache to look for WebToEpub cache entries 2023-01-01 13:06:03 -06:00
Jim Miller
895274ad24 Scandir for cache troubleshooting 2023-01-01 13:06:03 -06:00
Jim Miller
bf13b81837 Move open_page_in_browser up into BrowserCacheDecorator 2023-01-01 13:06:03 -06:00
Jim Miller
adeb9f26c3 Move open_page_in_browser up into BrowserCacheDecorator 2023-01-01 13:06:03 -06:00
Jim Miller
c3631f6ac7 Change BrowserCache to on-demand, not scan 2023-01-01 13:06:03 -06:00
Jim Miller
1301fc3dc4 Rename browsercache files before changing contents to preserve history. 2023-01-01 13:06:03 -06:00
Jim Miller
d76fa989d1 Fix encoding auto/chardet 2023-01-01 13:06:03 -06:00
Jim Miller
53dd0073f1 Update defaults.ini to only quotev.com benefiting from use_cloudscraper 2023-01-01 13:06:03 -06:00
Jim Miller
b6b0b0a8c5 Make failed chapter URLs links with continue_on_chapter_error 2023-01-01 13:06:03 -06:00
Jim Miller
c0573d76fd Include (current) story-title in normalized ffnet chapter URLs. 2023-01-01 13:06:03 -06:00
Jim Miller
44b803a529 Tweak debug output 2023-01-01 13:06:03 -06:00
Jim Miller
c6705a82db Refactoring for browser cache v2/fetcher 2023-01-01 13:06:03 -06:00
Jim Miller
66813584f5 Bump Release Version 4.19.0 2023-01-01 13:00:04 -06:00
Jim Miller
e61829052e Bump Test Version 4.18.5 2022-12-30 08:38:16 -06:00
Jim Miller
701d358ea6 Fixes for config base_xenforo options, closes #902 2022-12-30 08:35:56 -06:00
Jim Miller
15d434fce2 Bump Test Version 4.18.4 2022-12-16 13:51:14 -06:00
Eleanor Davies
c801729215
scribblehub flaresolverr fix (#900)
* scribblehub flaresolverr fix
2022-12-16 13:46:34 -06:00
Jim Miller
2e192380f0 Bump Test Version 4.18.3 2022-12-16 12:29:37 -06:00
Jim Miller
4c4355a910 Equalize ok/cancel buttons on user/pass & email pass dialogs 2022-12-16 12:11:37 -06:00
Jim Miller
7c17a2dcd0 Fix for adapter_quotevcom status 2022-12-16 11:57:50 -06:00
Jim Miller
186a97042b Bump Test Version 4.18.2 2022-11-30 09:42:12 -06:00
Jim Miller
d2f6d2d6b8 adapter_ficbooknet: Site change for status + remove debug 2022-11-30 09:42:04 -06:00
Jim Miller
0c1bbd0c96 Bump Test Version 4.18.1 2022-11-27 09:14:20 -06:00
Jim Miller
f5f9a7d303 Tweak for adapter_storiesonlinenet description parsing 2022-11-27 09:14:04 -06:00
Jim Miller
224bd11821 Bump Release Version 4.18.0 2022-11-21 19:04:53 -06:00
Jim Miller
6d6cac850b Remove a somewhat misleading status message. #897 2022-11-21 19:03:51 -06:00
Jim Miller
d81cc0bd4a Bump Test Version 4.17.8 2022-11-15 08:43:10 -06:00
Jim Miller
73459f2b83 Still allow images with use_flaresolverr_proxy if use_browser_cache 2022-11-15 08:42:41 -06:00
Jim Miller
aa8c96de7b defaults.ini file name settings tweaks 2022-11-14 12:06:09 -06:00
Jim Miller
61a7701e78 Bump Test Version 4.17.7 2022-11-10 21:45:13 -06:00
Jim Miller
337086b90b Update metadata caching with dependency invalidating 2022-11-10 21:45:00 -06:00
Jim Miller
20003aa49d Bump Test Version 4.17.6 2022-11-06 14:16:12 -06:00
Jim Miller
e1d5a68a90 Adding replace_chapter_text feature. 2022-11-06 14:15:17 -06:00
Jim Miller
ac5f94a6ac Bump Test Version 4.17.5 2022-11-06 10:25:23 -06:00
mvlcek
d85e3b977e Support classic AND modern (and minimalist) theme for storiesonline, finestories and scifistories 2022-11-06 12:01:19 +01:00
Jim Miller
fead675aae Bump Test Version 4.17.4 2022-11-05 10:12:54 -05:00
Jim Miller
c33267750d remove_class_chapter missing from config lists 2022-11-05 10:10:38 -05:00
Jim Miller
9c5badc2bf Bump Test Version 4.17.3 2022-10-26 10:14:37 -05:00
Jim Miller
b65713f902 adapter_tenhawkpresents: Change site to t.evancurrie.ca 2022-10-25 18:16:54 -05:00
Jim Miller
8ad18383cc Bump Test Version 4.17.2 2022-10-22 12:04:18 -05:00
Jim Miller
6e1892dd4e adapter_adultfanfictionorg: Fixes for site changes, thanks cryosaur. 2022-10-22 11:48:48 -05:00
Jim Miller
f593295d06 Bump Test Version 4.17.1 2022-10-21 09:18:07 -05:00
Jim Miller
7eb142e598 adapter_adultfanfictionorg: Fixes for site changes, thanks cryosaur. 2022-10-21 09:17:51 -05:00
Jim Miller
4d322a8fae Remove Calibre Update Cover option entirely(was deprecated) #878 2022-10-20 09:45:27 -05:00
Jim Miller
ccea7827ce Bump Release Version 4.17.0 2022-10-18 11:47:27 -05:00
Jim Miller
ed2bb78657 Update translations. 2022-10-18 11:47:12 -05:00
Jim Miller
8871352b2c Bump Test Version 4.16.6 2022-10-14 10:48:33 -05:00
Jim Miller
04632728bc Flaresolverr v3 beta doesn't have 'headers'?? 2022-10-14 10:48:18 -05:00
Jim Miller
d92475b980 Bump Test Version 4.16.5 2022-10-14 09:29:20 -05:00
Jim Miller
89c4b68b9f Flaresolverr v3 beta using 'expiry' cookie key, was 'expires'. 2022-10-14 09:29:14 -05:00
Jim Miller
6e97d98118 Fix site name fanfiction.tenhawkpresents.ink 2022-10-08 10:19:14 -05:00
Jim Miller
e326b81b3f Bump Test Version 4.16.4 2022-09-27 10:01:42 -05:00
Jim Miller
a7ced3d78a adapter_adultfanfictionorg: Fixes for site changes. 2022-09-27 10:01:42 -05:00
Jim Miller
c78ff37f56 Bump Test Version 4.16.3 2022-09-25 08:56:47 -05:00
Jim Miller
560abad128 Disable Cancel during metadata update ProgBar. 2022-09-25 08:56:39 -05:00
Jim Miller
1adba9193a Bump Test Version 4.16.2 2022-09-22 12:27:17 -05:00
Jim Miller
a6d492d970 adapter_chosentwofanficcom: Site has several links to each story in a series page. 2022-09-22 12:27:09 -05:00
Jim Miller
56a7f271ff Bump Test Version 4.16.1 2022-09-20 10:49:10 -05:00
Jim Miller
3fffd22996 Fixes for add_category/genre_when_multi_category settings. #884 2022-09-20 10:46:40 -05:00
Jim Miller
d11d4c5263 Bump Release Version 4.16.0 2022-09-19 12:20:42 -05:00
Jim Miller
ed5260f035 Update translations. 2022-09-19 12:20:28 -05:00
Jim Miller
5df1608d74 Bump Test Version 4.15.18 2022-09-13 10:00:05 -05:00
Jim Miller
773b2600c5 Add use_ssl_default_seclevelone option for aneroticstory 2022-09-13 09:59:13 -05:00
Jim Miller
d0fddf2da6 Update embedded certifi to 2022.06.15.1 2022-09-13 09:54:51 -05:00
Jim Miller
8ccc3dc129 Bump Test Version 4.15.17 & Update translations 2022-09-12 17:25:17 -05:00
Jim Miller
2b001f003b adapter_storiesonlinenet: Fix for empty scores. #882 2022-09-12 08:50:37 -05:00
Jim Miller
dd88bef85a Bump Test Version 4.15.16 2022-09-11 20:58:04 -05:00
Jim Miller
2a6e92e586 Add flaresolverr_proxy_timeout (default 60000ms) #703 2022-09-11 20:57:44 -05:00
Jim Miller
102b23434b Bump Test Version 4.15.15 2022-09-11 20:40:31 -05:00
Jim Miller
7ea7c8497c adapter_storiesonlinenet: More tweaks to keep story-title in URL. #882 2022-09-11 20:40:24 -05:00
Jim Miller
2faafdd9f3 Bump Test Version 4.15.14 2022-09-11 16:53:30 -05:00
Jim Miller
a09c84258f adapter_storiesonlinenet: Also change index URL after login. #882 2022-09-11 16:53:23 -05:00
Jim Miller
8a3ce58d4e Bump Test Version 4.15.13 2022-09-11 14:59:25 -05:00
Jim Miller
599a89ee6a adapter_storiesonlinenet: Fix for premium accounts redirecting to directly chapter? #882 2022-09-11 14:59:15 -05:00
Jim Miller
5b0b91eb46 Bump Test Version 4.15.12 2022-09-10 09:30:40 -05:00
Jim Miller
cddfd8b835 SOL/etc: Change story URL scheme to keep story-title & use to detect story ID reuse. #882 2022-09-10 09:30:30 -05:00
Jim Miller
770c9fa167 AO3: Detect 'This work is part of an ongoing challenge and will be revealed soon!' 2022-09-09 10:24:06 -05:00
Jim Miller
ecf4b10238 Bump Test Version 4.15.11 2022-09-09 10:09:07 -05:00
Jim Miller
4c64b406df Fix for remove from update & rejects lists when lower selected first. 2022-09-09 10:08:28 -05:00
Jim Miller
031b9052d1 Update translations. 2022-09-08 12:25:05 -05:00
Jim Miller
f276b836c7 Use a chapter URL as referrer for default_cover_image/force_cover_image. 2022-09-07 13:22:41 -05:00
Jim Miller
e63b05ff16 Bump Test Version 4.15.10 2022-09-07 10:42:28 -05:00
Jim Miller
0113d07a63 adapter_wattpadcom: Add include_chapter_banner_images feature, defaults on 2022-09-07 10:42:22 -05:00
Jim Miller
c0b6e918ad Bump Test Version 4.15.9 2022-09-06 13:59:24 -05:00
Jim Miller
92d3c7c8f0 Update defaults.ini for use_old_cover and force_cover_image 2022-09-06 13:58:25 -05:00
Jim Miller
543c741502 Bump Test Version 4.15.8 2022-09-05 13:14:54 -05:00
Jim Miller
018f87767d Deprecate(rather than remove) updateepubcover feature. #878 2022-09-05 13:14:42 -05:00
Jim Miller
238884ad53 Restore updateepubcover feature. 2022-09-05 10:57:30 -05:00
Jim Miller
cd83136278 Rename always_use_existing_cover to use_old_cover 2022-09-04 16:39:42 -05:00
Jim Miller
6759803ccd Add force_cover_image setting. 2022-09-04 16:36:55 -05:00
Jim Miller
b5f6a447b9 Bump Test Version 4.15.7 2022-09-04 15:40:34 -05:00
Jim Miller
b26b124cfe Improve handling for default_cover_image failing to load. 2022-09-04 14:07:38 -05:00
Jim Miller
e58df9ac97 Fix for always_use_existing_cover when oldcover name collides with dl image. 2022-09-04 14:01:33 -05:00
Jim Miller
11f7c6f115 Link where Default Update EPUB Cover was. 2022-09-04 13:14:30 -05:00
Jim Miller
662b808ba9 Better coverage for do_updateepubcover_warning 2022-09-04 11:35:45 -05:00
Jim Miller
dbeba818f7 Bump Test Version 4.15.6 2022-09-03 19:35:59 -05:00
Jim Miller
666c3b4143 Correct a generate_cover_settings example line in plugin-defaults.ini 2022-09-03 18:11:32 -05:00
Jim Miller
e2dba246b2 Pare down and tweak initial personal.ini for new users. 2022-09-03 17:02:07 -05:00
Jim Miller
4e57d27a57 Tweak Calibre Cover options layouts, texts and defaults to be more rational for new users. 2022-09-03 16:32:34 -05:00
Jim Miller
4a58c43af9 Only apply covernewonly to setting cover from epub--not GC. 2022-09-03 16:24:29 -05:00
Jim Miller
1d2006761d Add always_use_existing_cover setting. 2022-09-03 14:42:05 -05:00
Jim Miller
23bc94451e Add test1.com sid=91 / 92 cover_image cases. 2022-09-03 14:25:00 -05:00
Jim Miller
a1f3349da0 Remove 'Update EPUB Cover?' download up, add Cover New Only instead. 2022-09-03 14:25:00 -05:00
Jim Miller
f99889d5e8 Remove a dead line of code 2022-09-03 14:25:00 -05:00
Jim Miller
137138a8ab adapter_fictionlive shouldn't set cover_image 2022-09-02 11:23:09 -05:00
Jim Miller
640b0eac0e Clear metadata cache on numWords set from plugin for derived values. 2022-09-02 11:22:42 -05:00
Jim Miller
73b78d6335 Bump Test Version 4.15.5 2022-08-25 08:18:14 -05:00
Jim Miller
7558c998df Fix for calibre_series_meta feature when series contains [ 2022-08-25 08:17:58 -05:00
Jim Miller
387aad83b6 Bump Test Version 4.15.4 2022-08-21 18:01:27 -05:00
Nicolas SAPA
43b07b6d6a nsapa_proxy: detect proxy protocol violation
Fix #865 by validating proxy response.

Signed-off-by: Nicolas SAPA <nico@ByMe.at>
2022-08-21 18:00:22 -05:00
Jim Miller
b6abcc41cf Bump Test Version 4.15.3 2022-08-21 12:20:45 -05:00
Jim Miller
a307c128fa Also include threadmarks_title in tagsfromtitle (XF) 2022-08-21 12:20:03 -05:00
Jim Miller
16b78523e5 Remove RSS link from threadmarks_title (XF2) 2022-08-21 12:20:03 -05:00
Jim Miller
8084761154 Bump Test Version 4.15.2 2022-08-18 21:42:12 -05:00
Jim Miller
d3dd5a86a8 Better layout stretching for Make series name/comment area scrollable 2022-08-18 21:41:57 -05:00
Jim Miller
69510094d3 Bump Test Version 4.15.1 2022-08-18 21:17:45 -05:00
Jim Miller
b0ca83f760 Make series name/comment area scrollable for when lengthy. 2022-08-18 21:17:37 -05:00
Khoyo
2c707a74dd README: update archlinux package information
The Archlinux fanficfare package has been dropped from the [community] repository on 2022-04-01, and is now an AUR package. See https://aur.archlinux.org/cgit/aur.git/log/PKGBUILD?h=fanficfare
2022-08-12 19:20:29 -05:00
Jim Miller
dfbbed0709 Bump Release Version 4.15.0 2022-08-11 16:12:23 -05:00
Jim Miller
842b2d2d55 Bump Test Version 4.14.9 2022-08-09 08:54:19 -05:00
Jim Miller
af22795cd5 Update translations. 2022-08-09 08:54:19 -05:00
Jim Miller
cd71351181 adapter_adultfanfictionorg: http->https Closes #870 2022-08-09 08:48:49 -05:00
Jim Miller
86b3f49e6b Fix for bug with cal6 icon theme change - doesn't immediately affect FFF. 2022-08-02 17:54:56 -05:00
Jim Miller
7e53863d15 Bump Test Version 4.14.8 2022-07-28 12:26:02 -05:00
Jim Miller
a5832e8d02 Fix for win10/qt6 progbar not displaying initially. 2022-07-28 12:24:35 -05:00
Jim Miller
fc68c4574a Bump Test Version 4.14.7 2022-07-26 10:40:08 -05:00
Jim Miller
f4a7a8657e adapter_storiesonlinenet: Single chapter stories slightly different. Also scifistories and finestories. Closes #867 2022-07-26 10:39:40 -05:00
Jim Miller
943bf1f36c Bump Test Version 4.14.6 2022-07-26 09:02:03 -05:00
Jim Miller
2482416ea5 Add get_section_url() for adapter_royalroadcom 2022-07-25 16:19:55 -05:00
Jim Miller
431369ed42 Bump Test Version 4.14.5 2022-07-20 13:54:43 -05:00
Jim Miller
314ff73280 Use Cal6 get_icons() so icon themes apply--print_tracebacks_for_missing_resources=False cal 6.2+. 2022-07-20 13:33:26 -05:00
Jim Miller
ce6df518a2 adapter_scifistoriescom: inherit from StoriesOnlineNetAdapter instead of FineStoriesComAdapter 2022-07-20 12:37:44 -05:00
Jim Miller
99049da5c6 Set use_basic_cache:true by default for finestories.com & scifistories.com 2022-07-20 12:36:16 -05:00
Jim Miller
1d73c51712 Bump Test Version 4.14.4 2022-07-19 09:01:04 -05:00
Jim Miller
dead6872d4 Use cal6 icon theme system to allow plugin icon customization. 2022-07-19 08:59:20 -05:00
Jim Miller
7a93a494ec Don't need old icon.png, still have xcf 2022-07-18 13:22:05 -05:00
Jim Miller
93cfc97d1d Bump Point Release Version 4.14.3 2022-07-15 10:26:07 -05:00
Jim Miller
bcd16b7840 Update translations. 2022-07-15 10:24:30 -05:00
Jim Miller
be9f626c85 Bump Test Version 4.14.2 2022-07-14 14:49:52 -05:00
Jim Miller
1133f5cc3a Remove site: webnovel.com See #843 2022-07-14 14:31:19 -05:00
Jim Miller
b37ae23af7 Bump Test Version 4.14.1 2022-07-12 18:09:06 -05:00
Jim Miller
e9574d66df Fix for qt6 vs qt5 in Cal6 and personal.ini search. 2022-07-12 18:08:58 -05:00
Jim Miller
55f6b882df Bump Release Version 4.14.0 2022-07-11 12:28:24 -05:00
Jim Miller
8692665724 Update some comments. 2022-07-11 12:28:06 -05:00
Jim Miller
93f483e42c Update translations. 2022-07-11 12:27:01 -05:00
Jim Miller
05e3415059 Bump Test Version 4.13.11 2022-07-09 11:20:55 -05:00
Jim Miller
e6b66636b9 adapter_fictionhuntcom: Fix for changes to chapter list. 2022-07-09 11:20:01 -05:00
Jim Miller
13c6a1fd77 Bump Test Version 4.13.10 2022-07-07 12:23:02 -05:00
Jim Miller
9b6c6da639 base XF needs chapter title as a string. Entities added back in base_adapter. 2022-07-07 12:23:02 -05:00
Jim Miller
7b596c1110 Bump Test Version 4.13.9 2022-07-07 12:01:15 -05:00
Jim Miller
23e0977218 Update translations. 2022-07-07 12:01:15 -05:00
Jim Miller
7fbcb054ad Restore & > < entities in chapter titles. Closes #863 2022-07-07 11:57:59 -05:00
Jim Miller
40a2af2b3d Bump Test Version 4.13.8 2022-07-06 11:21:14 -05:00
Jim Miller
0b8180a2cf adapter_fictionhuntcom: Update for site changes. 2022-07-06 11:20:32 -05:00
Jim Miller
33f3aa8dd2 Additional strings for translation for #860 2022-07-06 11:15:07 -05:00
Jim Miller
6682a3117b Better handling for fail of an existing anthology book on update. Closes #860 2022-07-06 11:13:57 -05:00
Jim Miller
38ea209a40 Bump Test Version 4.13.7 2022-06-28 16:20:09 -05:00
Jim Miller
295868b923 Fix for problem with remove_tags refactor. 2022-06-28 16:18:59 -05:00
Jim Miller
fc8e96cc9e Bump Test Version 4.13.6 2022-06-25 15:09:16 -05:00
Jim Miller
58387605e6 Collect rating for adapter_libraryofmoriacom, refactor rating from TOC in base_efiction. Closes #859 2022-06-25 15:09:16 -05:00
Jim Miller
1d5e5d3722 Refactor code to remove empty tags to also remove now-empty parents by making another pass. 2022-06-24 14:58:26 -05:00
Jim Miller
4aa9c1bf34 Output utf8FromSoup times to debug. Remove before release. 2022-06-24 14:58:04 -05:00
Jim Miller
d347523942 Bump Test Version 4.13.5 2022-06-17 09:45:43 -05:00
Jim Miller
a181c36ccb adapter_themasquenet: Switch to https, closes #854 2022-06-17 09:44:57 -05:00
Jim Miller
28a2b5e926 Bump Test Version 4.13.4 2022-06-10 09:02:43 -05:00
Jim Miller
65a7538452 PI: Ctrl-Return/Enter on personal.ini editbox equivalent to clicking OK button. 2022-06-10 08:59:49 -05:00
Jim Miller
bb3a86298e Bump Test Version 4.13.3 2022-06-09 13:11:05 -05:00
Jim Miller
d01ae7004a base_xenforoforum_adapter(QQ): Allow for guest/deleted author w/o a link. Closes #852 2022-06-09 13:10:58 -05:00
Jim Miller
31f3384c8e Bump Test Version 4.13.2 2022-06-08 09:10:59 -05:00
Faye
97823bc12b readonlymind: add option to include foreword/author's note
ROM chapters can include an author's note as a foreword on the chapter
page. It's an entirely separate section tag from the story content, so
when it is present, a div tag is used to wrap both.
2022-06-08 09:10:34 -05:00
Jim Miller
7f2514c177 Bump Test Version 4.13.1 2022-06-05 14:06:38 -05:00
Jim Miller
e3b487205d adapter_mcstoriescom Allow multiple authors. Closes #847 2022-06-05 14:06:29 -05:00
Jim Miller
b5dd8d4565 Bump Release Version 4.13.0 2022-06-01 11:20:53 -05:00
Jim Miller
7341598cc3 Update translations. 2022-06-01 11:20:37 -05:00
Jim Miller
04dd608930 Bump Test Version 4.12.16 2022-05-31 09:33:09 -05:00
Jim Miller
8b64b415c4 adapter_chosentwofanficcom: http->https 2022-05-31 09:33:03 -05:00
Jim Miller
0da8d430d9 Bump Test Version 4.12.15 2022-05-26 08:29:32 -05:00
Jim Miller
38570c26c7 Update translations. 2022-05-26 08:29:07 -05:00
Jim Miller
78c6b3e5cd adapter_mediaminerorg: More story URL forms. #845 2022-05-26 08:27:55 -05:00
Jim Miller
7550554c3e Bump Test Version 4.12.14 2022-05-22 14:24:52 -05:00
Jim Miller
68bb6f6fcf Don't set marked in Calibre when book_id is None #833 2022-05-22 14:24:40 -05:00
Jim Miller
bf01b1a7de Bump Test Version 4.12.13 2022-05-22 10:32:12 -05:00
Jim Miller
c53cbfe156 adapter_quotevcom: Update collection of searchtags 2022-05-22 10:25:56 -05:00
Jim Miller
a1f839d732 Bump Test Version 4.12.12 2022-05-18 17:57:35 -05:00
Jim Miller
71de6900ee Add config check parsing for custom_columns_settings on personal.ini save. 2022-05-18 17:57:27 -05:00
Jim Miller
11665834b5 Bump Test Version 4.12.11 2022-05-15 13:40:17 -05:00
Jim Miller
36eed1bc43 Refactor img code in story.py to fix a problem when cover image also in story. 2022-05-15 13:39:56 -05:00
Jim Miller
b39d6a33b7 Bump Test Version 4.12.10 2022-05-10 21:23:04 -05:00
Jim Miller
9c554375aa adapter_webnovelcom: Not all paragraphs starting with '<' are HTML. #841 2022-05-10 21:22:44 -05:00
Jim Miller
7c6c82e0ac Bump Test Version 4.12.9 2022-05-10 16:09:55 -05:00
Martin Vlcek
ceccc5baab
fix storiesonline login (again) - parameter name was changed back to "email" (#840) 2022-05-10 16:08:14 -05:00
Jim Miller
379d6ac634 Bump Test Version 4.12.8 2022-05-10 10:16:23 -05:00
Jim Miller
53c75ce01c Rename adapter_fastnovelsnet - Fixes for site changes 2022-05-10 10:16:12 -05:00
Jim Miller
08044e5c0d Bump Test Version 4.12.7 2022-05-05 22:27:28 -05:00
Jim Miller
63b1d7ac72 Lighten color highlighting for storyUrls sections in dark mode. 2022-05-05 22:27:22 -05:00
Jim Miller
63450c65e1 Bump Test Version 4.12.6 2022-05-05 09:13:07 -05:00
mvlcek
e9d206bf9b fix storiesonline login 2022-05-05 09:12:44 -05:00
Jim Miller
3913028800 Bump Test Version 4.12.5 2022-05-04 16:43:46 -05:00
Jim Miller
b8879d6b75 adapter_ficbooknet: Fix for site change. 2022-05-04 16:39:05 -05:00
Jim Miller
7df74c2bbb adapter_wwwutopiastoriescom: Fixes for site changes. 2022-05-04 16:32:42 -05:00
Jim Miller
1782a32674 Remove site tomparisdorm.com - Moved to AO3 2022-05-04 16:17:21 -05:00
Jim Miller
20574c7e94 Remove site: bloodties-fans.com - Moved to AO3. 2022-05-04 16:04:52 -05:00
Jim Miller
a78eb07c77 adapter_fanficsme: Fix for changed 'words' metadata. 2022-05-04 15:54:09 -05:00
Jim Miller
a8bdcde4bf Remove site wuxiaworld.com Closes #796 2022-05-03 17:22:36 -05:00
Jim Miller
523aa75588 Remove site wuxiaworld.site Closes #758 2022-05-03 17:12:48 -05:00
Jim Miller
2b36871281 Bump Test Version 4.12.4 2022-05-01 12:12:36 -05:00
Jim Miller
0cff71b9d6 adapter_storiesonlinenet py2 fixes See #832 #829 #830 2022-05-01 12:12:28 -05:00
Jim Miller
e3d358e4e0 Bump Test Version 4.12.3 2022-04-30 17:02:35 -05:00
Jim Miller
afacc475b4 Merge branch 'main' of https://github.com/JimmXinu/FanFicFare 2022-04-30 14:24:05 -05:00
David Buckley
8051ef7c9f
add chapter date metadata to RoyalRoadAdapter (#831)
* add chapter date metadata to RoyalRoadAdapter

* string format chapter date metadata

* improve formatting
2022-04-30 14:23:56 -05:00
Jim Miller
d0a13b63ff Bump Test Version 4.12.2 2022-04-30 09:13:52 -05:00
Jim Miller
c5734f96b8 Add slow_down_sleep_time:1 for [storiesonline.net] 2022-04-30 09:13:42 -05:00
Martin Vlcek
adefbcfcf8
Address Storiesonline.net "Click to Load text..."
#756
2022-04-30 09:11:07 -05:00
Jim Miller
eb9e3ba9fe Bump Test Version 4.12.1 2022-04-29 10:31:26 -05:00
Jim Miller
6e3055e753 Fix for SB using an attr on noscript tags now. 2022-04-29 10:31:19 -05:00
Jim Miller
6c3a133ccd Bump Release Version 4.12.0 2022-04-27 11:01:38 -05:00
Jim Miller
75af89464d Update translations. 2022-04-27 10:59:44 -05:00
Jim Miller
b40676518c Bump Test Version 4.11.15 2022-04-26 16:54:43 -05:00
Jim Miller
86b86b50f9 Py2 vs py3 fix #828 2022-04-26 16:54:34 -05:00
Jim Miller
5fd455b981 Fix some indenting 2022-04-26 10:45:23 -05:00
Jim Miller
58a8ca411c Bump Test Version 4.11.14 2022-04-25 21:33:11 -05:00
Jim Miller
d2ff6ba5d2 adapter_phoenixsongnet: Remove login code and changes for static author links. 2022-04-25 21:33:00 -05:00
Jim Miller
cb3f7e1644 Remove some dead code from inherited common_utils.py 2022-04-25 12:53:26 -05:00
Jim Miller
e2c6d4be99 Bump Test Version 4.11.13 2022-04-25 11:55:36 -05:00
Jim Miller
20802c8a6b adapter_fictionhuntcom: Fixes for site changes. 2022-04-25 11:54:41 -05:00
Jim Miller
2243edb175 adapter_webnovelcom: Fixes for site changes. #828 2022-04-25 10:55:59 -05:00
Jim Miller
80c4f4cb56 Bump Test Version 4.11.12 2022-04-20 19:21:21 -05:00
Jim Miller
b43d0e4b79 Update translations. 2022-04-20 19:21:10 -05:00
Jim Miller
3c95a6a533 Xenforo2 Ongoing==In-Progress 2022-04-16 23:37:51 -05:00
Jim Miller
d3d0865a00 Bump Test Version 4.11.11 2022-04-16 17:52:31 -05:00
Jim Miller
41e2f5ed75 Apply connect_timeout setting to network requests. 2022-04-16 17:52:24 -05:00
Jim Miller
8653b1520f Bump Test Version 4.11.10 2022-04-13 13:53:40 -05:00
Jim Miller
a67dd3d7b0 adapter_webnovelcom: Unescape & too. See #825 2022-04-13 13:53:25 -05:00
Jim Miller
bdeb2a80f7 Bump Test Version 4.11.9 2022-04-13 10:03:36 -05:00
Jim Miller
0eb543a726 Update translations. 2022-04-13 09:59:13 -05:00
Jim Miller
9c9a2a22f5 Bump Test Version 4.11.8 2022-04-12 09:36:32 -05:00
Jim Miller
8aeb05a22d Detect and error on adjusted chapter list < 1. Closes #826 2022-04-12 09:36:24 -05:00
Jim Miller
11670b30ba Bump Test Version 4.11.7 2022-04-08 19:36:29 -05:00
Jim Miller
ff0a9a7335 adapter_webnovelcom: Read chapter text from json in <script> tag. 2022-04-08 19:36:23 -05:00
Jim Miller
33272aaa22 Update translations. 2022-04-08 10:03:05 -05:00
Jim Miller
75fc53f93a Bump Test Version 4.11.6 2022-04-08 09:46:50 -05:00
Sidney Markowitz
890f416eae
use large cover images on royalroad (#823) 2022-04-08 09:46:34 -05:00
Jim Miller
3a35e4d2d0 Bump Test Version 4.11.5 2022-04-06 10:52:17 -05:00
Jim Miller
81ef198d00 Add --mozilla-cookies CLI option. 2022-04-06 10:52:11 -05:00
Jim Miller
d7f149e990 Bump Test Version 4.11.4 2022-04-05 09:47:05 -05:00
Jim Miller
6e86f51164 PI: Update translation strings. 2022-04-05 09:47:05 -05:00
Jim Miller
a086de264c PI: Check for existing anthology ebook on new anthology from series URL. 2022-04-05 09:45:03 -05:00
Jim Miller
dc28197c7b Bump Test Version 4.11.3 2022-04-01 09:32:12 -05:00
Jim Miller
de8443298e ffnet: Add meta_from_last_chapter option. 2022-04-01 09:32:05 -05:00
Jim Miller
eee92b4ebb Bump Test Version 4.11.2 2022-03-27 09:55:48 -05:00
Jim Miller
10a07fe4bf adapter_literotica: add ...$ to story URL search for when /xyz-pt1 and /xyz are different stories. 2022-03-27 09:55:42 -05:00
Jim Miller
a8c10bb017 Bump Test Version 4.11.1 2022-03-25 12:08:28 -05:00
Jim Miller
ecfa75c235 Adding fandom/category parsing to adapter_fictionhuntcom--more than just HP now. 2022-03-25 12:08:20 -05:00
Jim Miller
21bd4b951d New ffnet fandom containing + 2022-03-25 10:52:36 -05:00
Jim Miller
ff6950b2e2 Alphabet order defaults.ini files. 2022-03-25 10:26:11 -05:00
Jim Miller
f9a39897a2 Merge branch 'Rikkitp-fix-wuxiaworldco' 2022-03-25 10:21:53 -05:00
Snegirev Dmitry
eeac5f2b9a fix wuxiaworldco: www => m 2022-03-25 16:24:25 +03:00
Jim Miller
98ea6ba721 Bump Release Version 4.11.0 2022-03-23 09:35:01 -05:00
Jim Miller
2ca954f048 Update translations. 2022-03-23 09:34:44 -05:00
Jim Miller
fa7cf95ee2 Bump Test Version 4.10.8 2022-03-11 10:27:33 -06:00
Jim Miller
5680027b72 adapter_quotevcom: Additional chapter image parsing. 2022-03-11 10:27:26 -06:00
Jim Miller
8c6c6991c2 Bump Test Version 4.10.7 2022-03-02 10:00:29 -06:00
Sidney Markowitz
addc024e49 Issue 813 recognize various royalroad chapter url formats 2022-03-02 20:31:42 +13:00
Jim Miller
335bfb02c2 Bump Test Version 4.10.6 2022-02-24 21:48:24 -06:00
Jim Miller
fb94a3f3f1 Change base_xenforoforum reveal_invisible_text feature to also add class=invisible_text. Closes #812 2022-02-24 21:48:06 -06:00
Jim Miller
9ea9cf4c68 Bump Test Version 4.10.5 2022-02-22 13:10:08 -06:00
Jim Miller
e977587fae adapter_fastnovelnet: Update to redirected Story URL. 2022-02-22 13:09:58 -06:00
Jim Miller
0c02cd98e0 Bump Test Version 4.10.4 2022-02-22 11:31:16 -06:00
Jim Miller
c67e19e0bf adapter_fastnovelnet: 'Normalize' chapter URLs to current storyId URL--site is changing it frequently 2022-02-22 11:31:10 -06:00
Jim Miller
4e4360ec62 Bump Test Version 4.10.3 2022-02-20 11:47:43 -06:00
Jim Miller
e786090aeb base_efiction - narrow chapter search even more. 2022-02-20 11:28:06 -06:00
Jim Miller
03f2657a6e base_efiction - narrow chapter search regexp a little more. 2022-02-20 11:18:28 -06:00
Jim Miller
16be4cbbe5 Abstract 'Back to index' in base_efiction for other languages. 2022-02-20 11:08:32 -06:00
Jim Miller
53c8b69f1e Stop looking for FFDL settings--it's been 7 years. 2022-02-18 10:16:14 -06:00
Jim Miller
28238b18ff Bump Test Version 4.10.2 2022-02-16 11:26:14 -06:00
Jim Miller
f4c06014dd Look for story URLs in pasted mime as well as dropped. See #809 2022-02-16 11:11:44 -06:00
Jim Miller
fb8ab400b7 Bump Test Version 4.10.1 2022-02-14 14:22:01 -06:00
Jim Miller
f2d74defca adapter_storiesonlinenet: fix for dateUpdated when 'Last Activity' #808 2022-02-14 14:21:54 -06:00
Jim Miller
c1c18a5a87 Bump Release Version 4.10.0 2022-02-14 09:39:42 -06:00
Jim Miller
54e952748f Bump Test Version 4.9.10 2022-02-02 11:14:13 -06:00
Jim Miller
30470c8f6a adapter_fanfiktionde: Update where description comes from. 2022-02-02 11:14:07 -06:00
Jim Miller
4da7db4305 Bump Test Version 4.9.9 2022-02-01 12:36:58 -06:00
Jim Miller
23a00fb15a Correct use_flaresolverr_proxy error checking. 2022-02-01 12:36:52 -06:00
Jim Miller
951cc73e46 Merge branch 'fswithimages' 2022-02-01 09:12:32 -06:00
Nothorse
53452ca410
ReadOnlyMindAdapter: Add series_tags feature to populate series metadata (#803) 2022-02-01 09:11:47 -06:00
Jim Miller
01ba441a63 Bump Test Version 4.9.8 2022-02-01 09:10:35 -06:00
Jim Miller
582c1a6e7f Add use_flaresolverr_proxy:withimages option for FlareSolverr v1 users. 2022-02-01 09:10:35 -06:00
Jim Miller
77d1037a90 adapter_fanfictionnet: don't do skip_author_cover check without include_images:true 2022-02-01 09:10:35 -06:00
Jim Miller
52587ef69b Bump Test Version 4.9.7 2022-01-30 19:32:56 -06:00
Jim Miller
ea66ae350b Use logger.warning() not .warn() consistently. 2022-01-30 19:32:42 -06:00
Jim Miller
ad3a16f423 Force include_images:false when use_flaresolverr_proxy:true -- FlareSolverr v2.2.0 crashes on image request. 2022-01-30 19:31:52 -06:00
Jim Miller
4cf37d449e Stop passing download:true to FlareSolverr, they aren't putting it back. 2022-01-30 19:30:57 -06:00
Jim Miller
2c00752e23 Bump Test Version 4.9.6 2022-01-30 12:06:06 -06:00
Jim Miller
05e15487e4 adapter_royalroadcom: Add status 'Dropped' 2022-01-30 12:05:51 -06:00
Jim Miller
99236e82ad Bump Test Version 4.9.5 2022-01-30 09:59:16 -06:00
Nothorse
b9f5686a3c
readonlymind adapter (#801)
New Site: readonlymind.com, thanks Nothorse
2022-01-30 09:57:48 -06:00
Jim Miller
b99a7fe494 Bump Test Version 4.9.4 2022-01-29 10:19:16 -06:00
Jim Miller
f028bc9b6c adapter_royalroadcom: Add status 'Hiatus' Closes #800 2022-01-29 10:19:03 -06:00
Jim Miller
bd1bfbfaf9 base_efiction: Add 'Igen' as equiv to 'Yes, Completed' in Hungarian 2022-01-29 10:17:40 -06:00
Jim Miller
f61696fb3f Remove defunct site: hpfanficarchive.com 2022-01-24 11:15:27 -06:00
Jim Miller
f47f859de0 Bump Test Version 4.9.3 2022-01-23 10:13:45 -06:00
Jim Miller
6a18f3509b Remove fanfic.hu, moved to merengo.hu, but don't know if old storyIds are valid. 2022-01-23 10:13:11 -06:00
Jim Miller
02734791cd Add merengo.hu as eFiction with added consent click through. 2022-01-23 10:09:46 -06:00
Jim Miller
6194f3d9e7 Bump Test Version 4.9.2 2022-01-20 11:24:00 -06:00
Jim Miller
197c6dde81 Extend base_xenforoforum tagsfromtitle for ')(' '][' 2022-01-18 10:54:03 -06:00
Jim Miller
ea87916f4b Fix for py2 for base_xenforoforum tagsfromtitle. 2022-01-18 10:53:12 -06:00
Jim Miller
b710bdaafd Add flaresolverr_proxy settings to defaults.ini 2022-01-14 11:01:27 -06:00
Jim Miller
7b2d6a91fb Bump Test Version 4.9.1 2022-01-11 16:32:04 -06:00
Jim Miller
c7a542fd17 qt6 QFont.Normal/Bold & QTextEdit.NoWrap 2022-01-11 16:30:46 -06:00
Jim Miller
fa2b3c9511 Remove setTabStopWidth from raw prefs viewer--changed in qt6 and not needed. 2022-01-11 16:20:59 -06:00
Jim Miller
d6258ab74d Remove unneeded QTableWidgetItem.UserType 2022-01-11 16:07:00 -06:00
Jim Miller
f633ef8137 Remove dead convert_qvariant() code. 2022-01-11 16:07:00 -06:00
Jim Miller
a4c6fd9ff7 Replace QTextEdit.setTabStopWidth with setTabStopDistance 2022-01-11 16:07:00 -06:00
Jim Miller
0812d13003 Fix for QTableWidgetItem.UserType 2022-01-11 16:07:00 -06:00
Jim Miller
c97407ae56 Remove all Qt4 imports. 2022-01-11 16:07:00 -06:00
Jim Miller
b2b56e6366 Bump Release Version 4.9.0 2022-01-11 15:58:22 -06:00
Jim Miller
78e3689062 Remove removed fictionalley site from plugin-example.ini 2022-01-10 11:59:13 -06:00
Jim Miller
9f77f3a60d Bump Test Version 4.8.8 2022-01-10 09:09:18 -06:00
Jim Miller
db85c2c4b3 Update translations. 2022-01-10 09:09:06 -06:00
Jim Miller
dc26cef572 Update defaults.inis for Chrome's new Cache_Data dir. 2022-01-10 09:07:39 -06:00
Jim Miller
bc149a2deb Bump Test Version 4.8.7 2022-01-04 13:06:16 -06:00
Jim Miller
1e46c97bbd Adding plugin feature to Mark anthologies when individual story skipped. See #786 2022-01-04 13:05:44 -06:00
Jim Miller
790744c9e1 fictionhunt.com isn't requiring login anymore. Closes #784 2022-01-04 09:58:23 -06:00
Jim Miller
033c38fc91 Bump Test Version 4.8.6 2021-12-21 12:58:28 -06:00
Jim Miller
825a2070c5 Re-sync defaults.ini & plugin-defaults.ini 2021-12-21 12:58:28 -06:00
Jim Miller
5128dc6743 Strip base_xenforoforum tagsfromtitle with commas. Addresses final issue in #782 2021-12-21 12:58:28 -06:00
gesh
8828e1fc28 Fix nested []/()
Problem was regexes treated `[,(` and `],)` equally, considering eg
`[..)` balanced.

Considering `()` is also used within titles without signifying tags, it
might be worth investigating only matching that kind of bracketed tag at
the end of the title.

Closes: #783
2021-12-21 12:58:28 -06:00
hseg
a43949d123
Refactor main() in cli.py (#781)
* main: Replace return by explicit calls to exit()

In anticipation of breaking out these sections in their own functions

* Make doc-getting flags shortcut

This saves on pointless validation/setup work when only getting help
information. Moreover, these were the only actions that were in the
middle of the parse/validate/setup/run core logic of main(), moving them
out clears the way to cleanly breaking it up.

Removes -v alias for --version. If this is undesirable, a trick similar
to that for --sites-list can be used to shortcut it as well.

* Move up flag implication logic, var renaming

These are "virtual flags", should be set up asap after actual flags are
setup. Ideally, these would be set up in arg parsing, but this is
sometimes impossible/impractical.

Future improvement: use callbacks to say eg --updatealways sets
options.update, options.updatealways

* Move up validation

Fail fast if the arguments are invalid

* Internalize list_only into options

Helps keep related state together

* Pack up configs, printers for easier passing

* Break up main() into phases

* Remove unnecessary semicolon

* Unbundle configs, printers

This reverts commit 5dd44bbfc3.
Revertion reasons:
1) Initial commit was broken -- it reordered parameters in invocations
   to `get_config()`. This happened because python complained about
   invocations of the form `f(x,**d,z)` -- positional parameters may not
   appear after kwarg expansion. I mistakenly believed kwarg expansion
   would consume the slots of those parameters, and so this code would
   be equivalent to `f(x,z,**d)`. Instead, this passes `z` to the second
   positional parameter, which luckily enough had a key contained in `d`
   so it only caused a TypeError over the multiple values for that
   parameter.
2) To maintain the vision of the original commit would require multiple
   pessimizations *over* the previous state. Specifically:
   1) Using our running example of invocations of the form `f(x,**d,z)`,
      we'd need to turn `z` into a keyword argument. Since Python has no
      way of writing "`z` is a keyword argument whose value is taken
      from the current scope", that forces writing `f(x,**d,z=z)`.
      (Even if a proposal like <https://lwn.net/Articles/818129/> is
      accepted, we wouldn't be able to use it since we need to support
      Python 2)
   2) `dispatch()` uses `fail` internally. So we have one of two
      options:
      * Bundle `warn, fail` in `dispatch`'s arguments, and add a line
        `fail=printers['fail']` to the top of `dispatch`
      * Don't bundle `warn, fail` in `dispatch`'s arguments, and have
        `dispatch` bundle them instead
      Neither of these is palatable, especially over
      * Don't bundle `warn, fail` anywhere

* Restore -v alias for version

As 0847fc9 suggested might be desired
2021-12-21 12:21:02 -06:00
Jim Miller
61bc732810 Bump Test Version 4.8.5 2021-12-19 12:10:54 -06:00
Jim Miller
555872bdef
Merge pull request #780 from hseg/cleanup-cli-main-wip
cli.py: move out parseArgs
2021-12-19 12:10:36 -06:00
gesh
c0d776f64c cli.py: move out parseArgs
Resolves: #779
2021-12-19 03:48:35 +02:00
Jim Miller
a2dd11326f Bump Test Version 4.8.4 2021-12-14 19:36:34 -06:00
Jim Miller
0904101b7d adapter_archiveofourownorg: AO3 notification emails now sending http: instead of https: 2021-12-14 19:35:17 -06:00
Jim Miller
6fc9aa6dfc Bump Test Version 4.8.3 2021-12-11 19:32:16 -06:00
Jim Miller
3b72126f5f Add remove_class_chapter feature, true by default. 2021-12-11 19:32:06 -06:00
Jim Miller
80fb72928e Bump Test Version 4.8.2 2021-12-10 10:12:27 -06:00
Jim Miller
8ee9fc36ab adapter_scribblehubcom: Corner case removing spoilers. Closes #778 2021-12-10 10:12:18 -06:00
Jim Miller
89e731031c Bump Test Version 4.8.1 2021-12-03 10:56:46 -06:00
Jim Miller
619bc8a6f9 adapter_wwwnovelallcom: fixes for story w/o chapters & html desc. 2021-12-03 10:56:37 -06:00
Jim Miller
a2523f1a1e Bump Release Version 4.8.0 2021-12-02 09:33:07 -06:00
Jim Miller
3499548a2f Update translations. 2021-11-29 09:08:39 -06:00
Jim Miller
4460ee00cf Bump Test Version 4.7.9 2021-11-28 12:32:17 -06:00
Jim Miller
89290bf7a4 Add fix_relative_text_links feature, defaults to true. 2021-11-28 12:32:08 -06:00
Jim Miller
a07b36b61f Merge branch 'main' of https://github.com/JimmXinu/FanFicFare 2021-11-28 12:27:20 -06:00
Jim Miller
6f305d6254
Merge pull request #773 from Epicpkmn11/sh-spoilers-footnotes
Make Scribble Hub spoilers & footnotes look nicer
2021-11-28 12:27:14 -06:00
Pk11
7e356b733e Fix crash when news or spoiler notes excluded 2021-11-28 04:58:25 -06:00
Pk11
f2c8ae6a0a Make Scribble Hub spoilers & footnotes look nicer 2021-11-28 04:39:40 -06:00
Jim Miller
b1ab540c11 Merge branch 'main' of https://github.com/JimmXinu/FanFicFare 2021-11-20 15:50:25 -06:00
Jim Miller
9ca0bfc5d8
Merge pull request #771 from jcotton42/main
Remove Patreon support, discussion in #770
2021-11-20 15:50:10 -06:00
Josh Cotton
7011250353 Revert "Merge branch 'jcotton42-patreon'"
This removes support for Patreon, as discussed in #770.

This reverts commit df26e74145, reversing
changes made to 23e4f9468d.
2021-11-20 13:30:08 -08:00
Jim Miller
744400b161 Bump Test Version 4.7.8 2021-11-20 15:24:58 -06:00
Jim Miller
d0b81c1c7b Add averrating metadata to adapter_novelfull. 2021-11-20 15:24:36 -06:00
Jim Miller
adfaf141d3 Bump Test Version 4.7.7 2021-11-19 20:21:50 -06:00
Jim Miller
a8047ba0a9 Fix for cover_min_size causing failures with SVG images when no_image_processing:true. 2021-11-19 20:21:44 -06:00
Jim Miller
b142654dfc Bump Test Version 4.7.6 2021-11-19 10:38:35 -06:00
Jim Miller
56d4688f2c
Merge pull request #766 from rapjul/main
Get largest Webnovel cover image
2021-11-19 10:22:16 -06:00
Jim Miller
df26e74145 Merge branch 'jcotton42-patreon' 2021-11-19 10:18:35 -06:00
Jim Miller
8dd9154982 Merge branch 'patreon' of https://github.com/jcotton42/FanFicFare into jcotton42-patreon 2021-11-19 10:16:57 -06:00
Jim Miller
23e4f9468d Reorder defaults.ini sections. 2021-11-19 10:13:57 -06:00
Jim Miller
aa966de4bc Merge branch 'main' of https://github.com/JimmXinu/FanFicFare 2021-11-19 10:12:35 -06:00
Jim Miller
a711083e90 Set default slow_down_sleep_time:2 for [www.asianfanfics.com] 2021-11-19 10:12:30 -06:00
Jim Miller
99bafb052b
Merge pull request #769 from jcotton42/deviantart-dates-and-ids
Deviantart date fix (closes #768), also storyId and extratags changes
2021-11-19 10:11:06 -06:00
rapjul
61b5cd8e43
Update adapter_webnovelcom.py
Removed `map()` to better support Python2.
Used inline `for` loop instead.
2021-11-18 20:54:00 -06:00
Josh Cotton
1466ff2422 Patreon support using the browser cache. 2021-11-18 18:19:04 -08:00
Josh Cotton
af8a979984 FanFiction tag no longer added to deviantArt works by default. 2021-11-18 17:00:53 -08:00
Josh Cotton
1d562d1fe4 Use the unique deviation ID for the story ID. 2021-11-18 16:54:26 -08:00
Josh Cotton
d437654320 Fix date parsing to be Python 2-compatible. 2021-11-18 16:48:57 -08:00
rapjul
1eb5eb2d54
Update adapter_webnovelcom.py
Finds the largest image source – in case Webnovel changes their code.
2021-11-18 07:10:37 -06:00
rapjul
dbc90cfce5
Update adapter_webnovelcom.py
Gets the largest cover image.
2021-11-17 17:16:28 -06:00
Jim Miller
cf5c0fd68c Bump Test Version 4.7.5 2021-11-17 12:54:50 -06:00
Jim Miller
b02f40318c
Merge pull request #765 from jcotton42/deviantart
Support for deviantArt (closes #374)
2021-11-17 12:54:24 -06:00
Josh Cotton
bc6d65de26 Change site abbreviation to 'dac' 2021-11-17 10:37:22 -08:00
Josh Cotton
09f2fc4d4b Have deviantArt use the basic cache by default. 2021-11-16 23:33:09 -08:00
Josh Cotton
5c06b32a30 Fetch tags from dA. 2021-11-16 23:24:27 -08:00
Josh Cotton
125c55e1e3 Add erorr message if both story detections fail. 2021-11-16 22:42:56 -08:00
Josh Cotton
841fe6e396 Remove comments before scraping chapter to avoid false matches. 2021-11-16 22:40:54 -08:00
Josh Cotton
f245310927 Handle deviantArt mature content. 2021-11-16 21:42:27 -08:00
Josh Cotton
5e31182bc8 Deviantart login support. 2021-11-16 21:42:22 -08:00
Josh Cotton
0ca4d20720 Baisc support for deviantArt. 2021-11-14 23:38:25 -08:00
Jim Miller
2ddce1acd5 Bump Test Version 4.7.4 2021-11-14 11:57:29 -06:00
Jim Miller
dc88a00ea4 New Site: psychfic.com (re-added), thanks HappyFaceSpider Closes #764 2021-11-14 11:57:16 -06:00
Jim Miller
df61e88714 Bump Test Version 4.7.3 2021-11-12 19:21:31 -06:00
Brian
36efc7366e
Update adapter_storiesonlinenet.py
Added age/rating field parsing for finestories and scifistories
2021-11-12 17:00:41 -08:00
Jim Miller
a829d01e7c Bump Test Version 4.7.2 2021-11-09 16:47:03 -06:00
Jim Miller
1459ad8611 Add --json-meta-file CLI option. #761 2021-11-09 16:46:56 -06:00
Jim Miller
2e78b153d5 Bump Test Version 4.7.1 2021-11-09 08:54:49 -06:00
Jim Miller
467d79120e adapter_ficbooknet: Fix for site change. 2021-11-09 08:54:42 -06:00
Jim Miller
9080349615 Bump Release Version 4.7.0 2021-11-04 12:36:56 -05:00
Jim Miller
2085dda0a3 Bump Test Version 4.6.11 2021-11-03 15:00:56 -05:00
Jim Miller
52e69abb88 Update translations. 2021-11-03 15:00:08 -05:00
Jim Miller
06fa73666f Code for FlareSolverr v2.0.X, can't handle images. 2021-11-03 14:56:13 -05:00
Jim Miller
d7940213ab Bump Test Version 4.6.10 2021-11-01 20:10:02 -05:00
Jim Miller
da5ec5b357 adapter_royalroadcom: fix for ancient bug reading unixtime attr that's come back after years. 2021-11-01 20:09:09 -05:00
Jim Miller
605fc0dbcf Bump Test Version 4.6.9 2021-10-27 13:35:04 -05:00
Jim Miller
9da07fd160 fictionalley-archive.org: Convert adapter_fictionalleyorg to adapter_fictionalleyarchiveorg. 2021-10-27 13:34:30 -05:00
Jim Miller
913f8dc256 Bump Test Version 4.6.8 2021-10-25 13:07:59 -05:00
Jim Miller
f8cb9e9364 adapter_storiesonlinenet: Fix for site updates, login and dates. 2021-10-25 13:05:23 -05:00
Jim Miller
7ec234a052 Bump Test Version 4.6.7 2021-10-24 09:44:55 -05:00
Jim Miller
bb12670ef3 adapter_wwwutopiastoriescom: Add siterating_votes,siterating,siterank_of,siterank,views #750 2021-10-24 09:41:05 -05:00
Jim Miller
120a82c82b Bump Test Version 4.6.6 2021-10-20 13:20:45 -05:00
Jim Miller
bd9128044a Fix for more arbitrary py3 incompatibility(MutableSet). Closes #748 2021-10-20 13:14:56 -05:00
Jim Miller
9e54b8d82b Bump Test Version 4.6.5 2021-10-16 15:30:53 -05:00
Jim Miller
1f3f09d713 Switching royalroad.com specific 'get from imap' code to use fetcher instead of urllib. For #746 2021-10-16 15:30:40 -05:00
Jim Miller
17c9a26c8a Switch config _filelist feature from using urllig.request.build_opener to fetcher.RequestsFetcher. 2021-10-16 14:50:06 -05:00
Jim Miller
5755d462cc Update bundled certifi to 2021.10.08 2021-10-16 14:41:11 -05:00
Jim Miller
f4de32550c Bump Test Version 4.6.4 2021-10-10 20:35:14 -05:00
Jim Miller
2ae9c679e1 Update for adapter_fictionhuntcom, 'next' link in author pages changed. 2021-10-10 20:35:08 -05:00
Jim Miller
9dc4de0f07 Bump Test Version 4.6.3 2021-10-10 19:41:16 -05:00
Jim Miller
a64a415f59 adapter_wwwutopiastoriescom: Split 'eroticatags' metadata entry rather than single string. Closes #744 2021-10-10 19:40:34 -05:00
Jim Miller
5e02fdc2ae Bump Test Version 4.6.2 2021-10-07 11:18:57 -05:00
Jim Miller
6d75c4b464 Handle errors in royalroad email links better. 2021-10-07 10:51:19 -05:00
Jim Miller
ff05648b04 Fix typo in warning string. 2021-10-03 12:34:18 -05:00
Jim Miller
0a114cd313 Bump Test Version 4.6.1 2021-10-02 17:13:32 -05:00
TheCakeIsNaOH
db3b17ed5f Flaresolverr proxy add option to specify protocol
Add an option to specify the protocol to use for flaresolverr.
This allows usage of flaresolverr over https.
2021-10-02 17:10:58 -05:00
Jim Miller
14231fdd0a Fix for flaresolverr_proxy doing get urls from page 2021-10-02 13:57:02 -05:00
Jim Miller
76565e959a Bump Release Version 4.6.0 2021-09-30 11:53:56 -05:00
Jim Miller
70e67f7960 Bump Test Version 4.5.15 2021-09-28 12:57:31 -05:00
Jim Miller
9f244b9c01 Remove site www.squidge.org/peja now hosted on squidgeworld.org 2021-09-28 12:53:42 -05:00
Jim Miller
6a1dccd270 Remove site faerie-archive.com - redirects to some sketchy looking ad sites. 2021-09-28 12:33:49 -05:00
Jim Miller
b146954afd Remove site fanfiction-junkies.de - redirects to ad site. 2021-09-28 12:31:22 -05:00
Jim Miller
58cc24e9c4 Remove site it-could-happen.net - redirects to some sketchy looking ad sites. 2021-09-28 12:29:42 -05:00
Jim Miller
c97de461a8 Remove site sebklaine.net - redirects to some sketchy looking ad sites. 2021-09-28 12:27:47 -05:00
Jim Miller
e759240175 Remove site nha.magical-worlds.us - redirects to something unrelated. 2021-09-28 12:25:59 -05:00
Jim Miller
0eb3abd44a Bump Test Version 4.5.14 2021-09-27 11:25:46 -05:00
Jim Miller
e699910675 Fix for adapter_wuxiaworldsite site change - chapter list in page HTML now. 2021-09-27 11:25:37 -05:00
Jim Miller
c92acf2b3b Bump Test Version 4.5.13 2021-09-24 11:23:06 -05:00
Jim Miller
b439fa8bf0 Don't retry connection to flaresolverr proxy and report specifically on fail. Closes #737 2021-09-24 11:21:21 -05:00
Jim Miller
7ac8d1f1aa Bump Test Version 4.5.12 2021-09-24 10:49:35 -05:00
Jim Miller
991b928edb Accept wuxiaworldsite.com as alias for wuxiaworld.site. 2021-09-24 10:48:26 -05:00
Jim Miller
fb815c0453 Better error message for AO3 login-required series. Closes #736 2021-09-23 11:01:00 -05:00
Jim Miller
74ddae0fd9 Add section about test versions to home page. 2021-09-23 10:31:08 -05:00
Jim Miller
4dcc9ec510 Bump Test Version 4.5.11 2021-09-23 10:16:01 -05:00
Jim Miller
2245167580 Fix for adapter_asianfanficscom - site changed author URLs. 2021-09-23 10:15:54 -05:00
Jim Miller
97fe1bbcf6 Remove some debugs. 2021-09-22 21:57:02 -05:00
Jim Miller
93fc626332 Tweak XF tagsfromtitle in defaults.ini to not break &amp; 2021-09-22 21:56:31 -05:00
Jim Miller
16f19e6b4a Bump Test Version 4.5.10 2021-09-16 19:09:23 -05:00
Jim Miller
66ed3478cd Fix for FFF plugin not recognizing the same ffnet story URL when the title changes. 2021-09-16 19:09:10 -05:00
Jim Miller
982fd32a06 Bump Test Version 4.5.9 2021-09-16 12:45:30 -05:00
Jim Miller
3be15436a8 Add FS session code, but disabled currently. 2021-09-16 12:24:28 -05:00
Jim Miller
443a543bb5 Bump Test Version 4.5.8 2021-09-16 12:24:28 -05:00
Jim Miller
e28773850f flaresolverr_proxy watch for super future cookies, reduce proxy timeout. 2021-09-16 12:24:28 -05:00
Jim Miller
52e740cf58 Bump Test Version 4.5.7 2021-09-16 12:24:28 -05:00
Jim Miller
bdd8921328 Need a cookie version to read saved cookie file back. flaresolverr_proxy 2021-09-16 12:24:28 -05:00
Jim Miller
3f2596c247 Fixes for flaresolverr_proxy so POST works. 2021-09-16 12:24:28 -05:00
Jim Miller
73305fe0df Bump Test Version 4.5.6 2021-09-16 12:24:28 -05:00
Jim Miller
5ca13c71b3 Adding flaresolverr_proxy. 2021-09-16 12:24:28 -05:00
Jim Miller
06730f3f7b Add order_threadmarks_by_date_categories option, closes #733 2021-09-16 12:24:02 -05:00
Jim Miller
464a7a3ee3 Bump Test Version 4.5.5 2021-09-08 16:40:01 -05:00
oh45454545
bd52738e4c Update adapter_asianfanficscom.py 2021-09-08 16:39:39 -05:00
oh45454545
3b6a4b85a9 Update adapter_asianfanficscom.py 2021-09-08 16:39:39 -05:00
oh45454545
03d030feab Update adapter_asianfanficscom.py 2021-09-08 16:39:39 -05:00
Jim Miller
1082dc5417 Bump Test Version 4.5.4 2021-09-07 18:22:37 -05:00
Jim Miller
afb9f38ab4 Add 'min' for minutes to parse_relative_date_string() #731 2021-09-07 18:22:14 -05:00
Jim Miller
9754747785 Add use_browser_cache comments to fictionpress.com sections. 2021-09-07 10:25:42 -05:00
Jim Miller
8ea2aca735 Bump Test Version 4.5.3 2021-09-04 15:11:58 -05:00
Jim Miller
f7dcce698b Fix for dateutils change breaking royalroad 2021-09-04 15:11:49 -05:00
Jim Miller
b94779f7d4 Bump Test Version 4.5.2 2021-09-03 11:26:55 -05:00
Jim Miller
b24db52b3d Fixes for site changes in adapter_webnovelcom. #731 2021-09-03 11:26:40 -05:00
Jim Miller
19571e3b2b Don't mark wuxiaworld.com and webnovel.com stories FanFiction by default. #730 2021-08-30 11:57:54 -05:00
Jim Miller
3ae3d6c677 Bump Test Version 4.5.1 2021-08-29 15:36:35 -05:00
Jim Miller
6924828c8d Add POST request for wuxiaworld.site site changers. Closes #729. 2021-08-29 15:36:25 -05:00
Jim Miller
5aa8f2b25c Bump Release Version 4.5.0 2021-08-25 09:09:46 -05:00
Jim Miller
f0b14e680e Update translations. 2021-08-24 18:18:24 -05:00
Jim Miller
fc3f1c6588 Update translations. 2021-08-24 17:57:40 -05:00
Jim Miller
e62c771a3f Remove removed site from example.ini 2021-08-21 13:16:22 -05:00
Jim Miller
72ada92aa4 Bump Test Version 4.4.6 2021-08-20 19:42:22 -05:00
Jim Miller
18aa2776b0 Fix for lazyload images in base_xenforoforum_adapter 2021-08-20 19:41:49 -05:00
Jim Miller
3a30d2c5ea Bump Test Version 4.4.5 2021-08-09 13:29:33 -05:00
Jim Miller
e859aa23bf More 'correct' fix for missing URL on anthology update. Reverses fcf8dc2cde 2021-08-09 13:29:23 -05:00
Jim Miller
ca3a453447 Bump Test Version 4.4.4 2021-08-09 12:57:12 -05:00
Jim Miller
7132d16053 Update translations. 2021-08-09 12:55:00 -05:00
Jim Miller
fcf8dc2cde Keep existing series/anthology URL during update for config purposes. 2021-08-09 12:54:12 -05:00
Jim Miller
3aebb20ec2 Bump Test Version 4.4.3 2021-08-07 08:35:05 -05:00
Jim Miller
db1d6d9e0c Allow chapter URLs for adapter_webnovelcom. 2021-08-07 08:34:52 -05:00
Jim Miller
0501e98b13 literotica.com: extratags:Erotica 2021-07-27 16:44:29 -05:00
Jim Miller
0609d8bfae Bump Test Version 4.4.2 2021-07-26 17:06:24 -05:00
Jim Miller
89c6d45786 Shift adapter_fictionmaniatv to http, problems with https server? 2021-07-26 17:06:16 -05:00
Jim Miller
48065e5d83 Add reveal_invisible_text option to base_xenforoforum_adapter. 2021-07-21 11:28:23 -05:00
Jim Miller
5c3a8931ed Add <s>strikethrough</s> example to adapter_test1 2021-07-21 11:27:53 -05:00
Jim Miller
f994c67cc5 Remove site harrypotterfanfiction.com, site closed. Closes #719 2021-07-21 10:02:54 -05:00
Jim Miller
466e706f1c Bump Test Version 4.4.1 2021-07-14 13:09:19 -05:00
Jim Miller
de01752a8b Allow fictionpress.com with use_browser_cache--user still needs to configure in personal.ini Closes #716 2021-07-14 13:09:12 -05:00
Jim Miller
e2a3b48481 Change blockfilecache to save uint32 addrs instead of original cache key. Hashing cache key proved unreliable in some cases. 2021-07-14 12:58:10 -05:00
261 changed files with 42856 additions and 39657 deletions

3
.gitignore vendored
View file

@ -20,6 +20,9 @@
# pycharm project specific settings files # pycharm project specific settings files
.idea .idea
# vscode project specific settings file
.vscode
cleanup.sh cleanup.sh
FanFictionDownLoader.zip FanFictionDownLoader.zip
*.epub *.epub

View file

@ -44,19 +44,23 @@ pip install FanFicFare
``` ```
- _As of late November 2019, the web service version is shutdown. See the [Wiki Home](https://github.com/JimmXinu/FanFicFare/wiki#web-service-version) page for details._ - _As of late November 2019, the web service version is shutdown. See the [Wiki Home](https://github.com/JimmXinu/FanFicFare/wiki#web-service-version) page for details._
### Test Versions
FanFicFare is released roughly every month, but new test versions are posted more frequently as changes are made.
Test versions are available at:
- The [test plugin] is posted at MobileRead.
- The test version of CLI for pip install is uploaded to the testpypi repository and can be installed with:
```
pip install --extra-index-url https://test.pypi.org/simple/ --upgrade FanFicFare
```
### Other Releases ### Other Releases
Other versions may be available depending on your OS. I(JimmXinu) don't directly support these: Other versions may be available depending on your OS. I(JimmXinu) don't directly support these:
- **Arch Linux**: The CLI can also be obtained on Arch Linux from the OS repositories: - **Arch Linux**: The latest CLI release can be obtained from the [fanficfare](https://aur.archlinux.org/packages/fanficfare) AUR package. It will install the calibre plugin, if calibre is installed.
```
pacman -S fanficfare
```
...or from git via the [AUR package](https://aur.archlinux.org/packages/fanficfare-git)
(which will also update the calibre plugin, if calibre is installed).
[this post in the old FFDL thread]: https://www.mobileread.com/forums/showthread.php?p=1982785#post1982785 [this post in the old FFDL thread]: https://www.mobileread.com/forums/showthread.php?p=1982785#post1982785
@ -64,3 +68,4 @@ pacman -S fanficfare
[FanFicFare maillist]: https://groups.google.com/group/fanfic-downloader [FanFicFare maillist]: https://groups.google.com/group/fanfic-downloader
[wiki]: https://github.com/JimmXinu/FanFicFare/wiki [wiki]: https://github.com/JimmXinu/FanFicFare/wiki
[discussion thread]: https://www.mobileread.com/forums/showthread.php?t=259221 [discussion thread]: https://www.mobileread.com/forums/showthread.php?t=259221
[test plugin]: https://www.mobileread.com/forums/showthread.php?p=3084025&postcount=2

View file

@ -1,8 +1,9 @@
[main] [main]
host = https://www.transifex.com host = https://www.transifex.com
[calibre-plugins.fanfictiondownloader] [o:calibre:p:calibre-plugins:r:fanfictiondownloader]
file_filter = translations/<lang>.po file_filter = translations/<lang>.po
source_file = translations/en.po
source_lang = en source_lang = en
type = PO type = PO

View file

@ -33,7 +33,7 @@ except NameError:
from calibre.customize import InterfaceActionBase from calibre.customize import InterfaceActionBase
# pulled out from FanFicFareBase for saving in prefs.py # pulled out from FanFicFareBase for saving in prefs.py
__version__ = (4, 4, 0) __version__ = (4, 57, 7)
## Apparently the name for this class doesn't matter--it was still ## Apparently the name for this class doesn't matter--it was still
## 'demo' for the first few versions. ## 'demo' for the first few versions.

View file

@ -0,0 +1,20 @@
from __future__ import (unicode_literals, division, absolute_import,
print_function)
__license__ = 'GPL v3'
__copyright__ = '2024, Jim Miller'
__docformat__ = 'restructuredtext en'
## References:
## https://www.mobileread.com/forums/showthread.php?p=4435205&postcount=65
## https://www.mobileread.com/forums/showthread.php?p=4102834&postcount=389
from calibre_plugins.action_chains.events import ChainEvent
class FanFicFareDownloadFinished(ChainEvent):
# replace with the name of your event
name = 'FanFicFare Download Finished'
def get_event_signal(self):
return self.gui.iactions['FanFicFare'].download_finished_signal

View file

@ -9,10 +9,7 @@ __docformat__ = 'restructuredtext en'
import re import re
try: from PyQt5.Qt import (Qt, QSyntaxHighlighter, QTextCharFormat, QBrush)
from PyQt5.Qt import (Qt, QSyntaxHighlighter, QTextCharFormat, QBrush)
except ImportError as e:
from PyQt4.Qt import (Qt, QSyntaxHighlighter, QTextCharFormat, QBrush)
from fanficfare.six import string_types from fanficfare.six import string_types

View file

@ -2,7 +2,6 @@
from __future__ import (unicode_literals, division, absolute_import, from __future__ import (unicode_literals, division, absolute_import,
print_function) print_function)
import six
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = '2011, Grant Drake <grant.drake@gmail.com>, 2018, Jim Miller' __copyright__ = '2011, Grant Drake <grant.drake@gmail.com>, 2018, Jim Miller'
@ -10,26 +9,21 @@ __docformat__ = 'restructuredtext en'
import os import os
from contextlib import contextmanager from contextlib import contextmanager
try: from PyQt5.Qt import (QApplication, Qt, QIcon, QPixmap, QLabel, QDialog, QHBoxLayout,
from PyQt5 import QtWidgets as QtGui QTableWidgetItem, QFont, QLineEdit, QComboBox,
from PyQt5.Qt import (QApplication, Qt, QIcon, QPixmap, QLabel, QDialog, QHBoxLayout, QVBoxLayout, QDialogButtonBox, QStyledItemDelegate, QDateTime,
QTableWidgetItem, QFont, QLineEdit, QComboBox, QTextEdit, QListWidget, QAbstractItemView, QCursor)
QVBoxLayout, QDialogButtonBox, QStyledItemDelegate, QDateTime,
QTextEdit, QListWidget, QAbstractItemView, QCursor)
except ImportError as e:
from PyQt4 import QtGui
from PyQt4.Qt import (QApplication, Qt, QIcon, QPixmap, QLabel, QDialog, QHBoxLayout,
QTableWidgetItem, QFont, QLineEdit, QComboBox,
QVBoxLayout, QDialogButtonBox, QStyledItemDelegate, QDateTime,
QTextEdit, QListWidget, QAbstractItemView, QCursor)
from calibre.constants import numeric_version as calibre_version
from calibre.constants import iswindows, DEBUG from calibre.constants import iswindows, DEBUG
from calibre.gui2 import UNDEFINED_QDATETIME, gprefs, info_dialog from calibre.gui2 import UNDEFINED_QDATETIME, gprefs, info_dialog
from calibre.gui2.actions import menu_action_unique_name from calibre.gui2.actions import menu_action_unique_name
from calibre.gui2.keyboard import ShortcutConfig from calibre.gui2.keyboard import ShortcutConfig
from calibre.utils.config import config_dir from calibre.utils.config import config_dir
from calibre.utils.date import now, format_date, qt_to_dt, UNDEFINED_DATE from calibre.utils.date import now, format_date, qt_to_dt, UNDEFINED_DATE
from fanficfare.six import text_type as unicode
import fanficfare.six as six
from six import text_type as unicode
# Global definition of our plugin name. Used for common functions that require this. # Global definition of our plugin name. Used for common functions that require this.
plugin_name = None plugin_name = None
@ -48,8 +42,41 @@ def set_plugin_icon_resources(name, resources):
plugin_name = name plugin_name = name
plugin_icon_resources = resources plugin_icon_resources = resources
# print_tracebacks_for_missing_resources first appears in cal 6.2.0
if calibre_version >= (6,2,0):
def get_icons_nolog(icon_name,plugin_name):
return get_icons(icon_name,
plugin_name,
print_tracebacks_for_missing_resources=False)
else:
get_icons_nolog = get_icons
def get_icon(icon_name): def get_icon_6plus(icon_name):
'''
Retrieve a QIcon for the named image from
1. Calibre's image cache
2. resources/images
3. the icon theme
4. the plugin zip
Only plugin zip has images/ in the image name for backward
compatibility.
'''
icon = None
if icon_name:
icon = QIcon.ic(icon_name)
## both .ic and get_icons return an empty QIcon if not found.
if not icon or icon.isNull():
# don't need a tracestack from get_icons just because
# there's no icon in the theme
icon = get_icons_nolog(icon_name.replace('images/',''),
plugin_name)
if not icon or icon.isNull():
icon = get_icons(icon_name,plugin_name)
if not icon:
icon = QIcon()
return icon
def get_icon_old(icon_name):
''' '''
Retrieve a QIcon for the named image from the zip file if it exists, Retrieve a QIcon for the named image from the zip file if it exists,
or if not then from Calibre's image cache. or if not then from Calibre's image cache.
@ -63,6 +90,11 @@ def get_icon(icon_name):
return QIcon(pixmap) return QIcon(pixmap)
return QIcon() return QIcon()
# get_icons changed in Cal6.
if calibre_version >= (6,0,0):
get_icon = get_icon_6plus
else:
get_icon = get_icon_old
def get_pixmap(icon_name): def get_pixmap(icon_name):
''' '''
@ -109,34 +141,6 @@ def get_local_images_dir(subfolder=None):
return images_dir return images_dir
def create_menu_item(ia, parent_menu, menu_text, image=None, tooltip=None,
shortcut=(), triggered=None, is_checked=None):
'''
Create a menu action with the specified criteria and action
Note that if no shortcut is specified, will not appear in Preferences->Keyboard
This method should only be used for actions which either have no shortcuts,
or register their menus only once. Use create_menu_action_unique for all else.
'''
if shortcut is not None:
if len(shortcut) == 0:
shortcut = ()
else:
shortcut = shortcut
ac = ia.create_action(spec=(menu_text, None, tooltip, shortcut),
attr=menu_text)
if image:
ac.setIcon(get_icon(image))
if triggered is not None:
ac.triggered.connect(triggered)
if is_checked is not None:
ac.setCheckable(True)
if is_checked:
ac.setChecked(True)
parent_menu.addAction(ac)
return ac
def create_menu_action_unique(ia, parent_menu, menu_text, image=None, tooltip=None, def create_menu_action_unique(ia, parent_menu, menu_text, image=None, tooltip=None,
shortcut=None, triggered=None, is_checked=None, shortcut_name=None, shortcut=None, triggered=None, is_checked=None, shortcut_name=None,
unique_name=None): unique_name=None):
@ -177,13 +181,6 @@ def create_menu_action_unique(ia, parent_menu, menu_text, image=None, tooltip=No
return ac return ac
def swap_author_names(author):
if author.find(',') == -1:
return author
name_parts = author.strip().partition(',')
return name_parts[2].strip() + ' ' + name_parts[0]
def get_library_uuid(db): def get_library_uuid(db):
try: try:
library_uuid = db.library_id library_uuid = db.library_id
@ -200,17 +197,6 @@ def busy_cursor():
finally: finally:
QApplication.restoreOverrideCursor() QApplication.restoreOverrideCursor()
class ImageLabel(QLabel):
def __init__(self, parent, icon_name, size=16):
QLabel.__init__(self, parent)
pixmap = get_pixmap(icon_name)
self.setPixmap(pixmap)
self.setMaximumSize(size, size)
self.setScaledContents(True)
class ImageTitleLayout(QHBoxLayout): class ImageTitleLayout(QHBoxLayout):
''' '''
A reusable layout widget displaying an image followed by a title A reusable layout widget displaying an image followed by a title
@ -266,7 +252,7 @@ class EditableTableWidgetItem(QTableWidgetItem):
def __init__(self, text): def __init__(self, text):
if text is None: if text is None:
text = '' text = ''
QTableWidgetItem.__init__(self, text, QtGui.QTableWidgetItem.UserType) QTableWidgetItem.__init__(self, text)
self.setFlags(Qt.ItemIsSelectable|Qt.ItemIsEnabled|Qt.ItemIsEditable) self.setFlags(Qt.ItemIsSelectable|Qt.ItemIsEnabled|Qt.ItemIsEditable)
class ReadOnlyTableWidgetItem(QTableWidgetItem): class ReadOnlyTableWidgetItem(QTableWidgetItem):
@ -274,65 +260,10 @@ class ReadOnlyTableWidgetItem(QTableWidgetItem):
def __init__(self, text): def __init__(self, text):
if text is None: if text is None:
text = '' text = ''
QTableWidgetItem.__init__(self, text, QtGui.QTableWidgetItem.UserType) QTableWidgetItem.__init__(self, text)
self.setFlags(Qt.ItemIsSelectable|Qt.ItemIsEnabled) self.setFlags(Qt.ItemIsSelectable|Qt.ItemIsEnabled)
class RatingTableWidgetItem(QTableWidgetItem):
def __init__(self, rating, is_read_only=False):
QTableWidgetItem.__init__(self, '', QtGui.QTableWidgetItem.UserType)
self.setData(Qt.DisplayRole, rating)
if is_read_only:
self.setFlags(Qt.ItemIsSelectable|Qt.ItemIsEnabled)
class DateTableWidgetItem(QTableWidgetItem):
def __init__(self, date_read, is_read_only=False, default_to_today=False):
if date_read == UNDEFINED_DATE and default_to_today:
date_read = now()
if is_read_only:
QTableWidgetItem.__init__(self, format_date(date_read, None), QtGui.QTableWidgetItem.UserType)
self.setFlags(Qt.ItemIsSelectable|Qt.ItemIsEnabled)
else:
QTableWidgetItem.__init__(self, '', QtGui.QTableWidgetItem.UserType)
self.setData(Qt.DisplayRole, QDateTime(date_read))
class NoWheelComboBox(QComboBox):
def wheelEvent (self, event):
# Disable the mouse wheel on top of the combo box changing selection as plays havoc in a grid
event.ignore()
class CheckableTableWidgetItem(QTableWidgetItem):
def __init__(self, checked=False, is_tristate=False):
QTableWidgetItem.__init__(self, '')
self.setFlags(Qt.ItemFlags(Qt.ItemIsSelectable | Qt.ItemIsUserCheckable | Qt.ItemIsEnabled ))
if is_tristate:
self.setFlags(self.flags() | Qt.ItemIsTristate)
if checked:
self.setCheckState(Qt.Checked)
else:
if is_tristate and checked is None:
self.setCheckState(Qt.PartiallyChecked)
else:
self.setCheckState(Qt.Unchecked)
def get_boolean_value(self):
'''
Return a boolean value indicating whether checkbox is checked
If this is a tristate checkbox, a partially checked value is returned as None
'''
if self.checkState() == Qt.PartiallyChecked:
return None
else:
return self.checkState() == Qt.Checked
class TextIconWidgetItem(QTableWidgetItem): class TextIconWidgetItem(QTableWidgetItem):
def __init__(self, text, icon): def __init__(self, text, icon):
@ -349,64 +280,6 @@ class ReadOnlyTextIconWidgetItem(ReadOnlyTableWidgetItem):
self.setIcon(icon) self.setIcon(icon)
class ReadOnlyLineEdit(QLineEdit):
def __init__(self, text, parent):
if text is None:
text = ''
QLineEdit.__init__(self, text, parent)
self.setEnabled(False)
class KeyValueComboBox(QComboBox):
def __init__(self, parent, values, selected_key):
QComboBox.__init__(self, parent)
self.values = values
self.populate_combo(selected_key)
def populate_combo(self, selected_key):
self.clear()
selected_idx = idx = -1
for key, value in six.iteritems(self.values):
idx = idx + 1
self.addItem(value)
if key == selected_key:
selected_idx = idx
self.setCurrentIndex(selected_idx)
def selected_key(self):
for key, value in six.iteritems(self.values):
if value == unicode(self.currentText()).strip():
return key
class CustomColumnComboBox(QComboBox):
def __init__(self, parent, custom_columns, selected_column, initial_items=['']):
QComboBox.__init__(self, parent)
self.populate_combo(custom_columns, selected_column, initial_items)
def populate_combo(self, custom_columns, selected_column, initial_items=['']):
self.clear()
self.column_names = initial_items
if len(initial_items) > 0:
self.addItems(initial_items)
selected_idx = 0
for idx, value in enumerate(initial_items):
if value == selected_column:
selected_idx = idx
for key in sorted(custom_columns.keys()):
self.column_names.append(key)
self.addItem('%s (%s)'%(key, custom_columns[key]['name']))
if key == selected_column:
selected_idx = len(self.column_names) - 1
self.setCurrentIndex(selected_idx)
def get_selected_column(self):
return self.column_names[self.currentIndex()]
class KeyboardConfigDialog(SizePersistedDialog): class KeyboardConfigDialog(SizePersistedDialog):
''' '''
This dialog is used to allow editing of keyboard shortcuts. This dialog is used to allow editing of keyboard shortcuts.
@ -440,43 +313,6 @@ class KeyboardConfigDialog(SizePersistedDialog):
self.accept() self.accept()
class DateDelegate(QStyledItemDelegate):
'''
Delegate for dates. Because this delegate stores the
format as an instance variable, a new instance must be created for each
column. This differs from all the other delegates.
'''
def __init__(self, parent):
QStyledItemDelegate.__init__(self, parent)
self.format = 'dd MMM yyyy'
def displayText(self, val, locale):
d = val.toDateTime()
if d <= UNDEFINED_QDATETIME:
return ''
return format_date(qt_to_dt(d, as_utc=False), self.format)
def createEditor(self, parent, option, index):
qde = QStyledItemDelegate.createEditor(self, parent, option, index)
qde.setDisplayFormat(self.format)
qde.setMinimumDateTime(UNDEFINED_QDATETIME)
qde.setSpecialValueText(_('Undefined'))
qde.setCalendarPopup(True)
return qde
def setEditorData(self, editor, index):
val = index.model().data(index, Qt.DisplayRole).toDateTime()
if val is None or val == UNDEFINED_QDATETIME:
val = now()
editor.setDateTime(val)
def setModelData(self, editor, model, index):
val = editor.dateTime()
if val <= UNDEFINED_QDATETIME:
model.setData(index, UNDEFINED_QDATETIME, Qt.EditRole)
else:
model.setData(index, QDateTime(val), Qt.EditRole)
class PrefsViewerDialog(SizePersistedDialog): class PrefsViewerDialog(SizePersistedDialog):
def __init__(self, gui, namespace): def __init__(self, gui, namespace):
@ -507,7 +343,6 @@ class PrefsViewerDialog(SizePersistedDialog):
self.keys_list.setAlternatingRowColors(True) self.keys_list.setAlternatingRowColors(True)
ml.addWidget(self.keys_list) ml.addWidget(self.keys_list)
self.value_text = QTextEdit(self) self.value_text = QTextEdit(self)
self.value_text.setTabStopWidth(24)
self.value_text.setReadOnly(True) self.value_text.setReadOnly(True)
ml.addWidget(self.value_text, 1) ml.addWidget(self.value_text, 1)

View file

@ -2,7 +2,6 @@
from __future__ import (unicode_literals, division, absolute_import, from __future__ import (unicode_literals, division, absolute_import,
print_function) print_function)
import six
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = '2021, Jim Miller' __copyright__ = '2021, Jim Miller'
@ -15,37 +14,17 @@ import re
import threading import threading
from collections import OrderedDict from collections import OrderedDict
try: from PyQt5 import QtWidgets as QtGui
from PyQt5 import QtWidgets as QtGui from PyQt5.Qt import (QWidget, QVBoxLayout, QHBoxLayout, QGridLayout, QLabel,
from PyQt5.Qt import (QWidget, QVBoxLayout, QHBoxLayout, QGridLayout, QLabel, QLineEdit, QComboBox, QCheckBox, QPushButton, QTabWidget,
QLineEdit, QComboBox, QCheckBox, QPushButton, QTabWidget, QScrollArea, QGroupBox, QButtonGroup, QRadioButton,
QScrollArea, QGroupBox, QButtonGroup, QRadioButton, Qt)
Qt)
except ImportError as e:
from PyQt4 import QtGui
from PyQt4.Qt import (QWidget, QVBoxLayout, QHBoxLayout, QGridLayout, QLabel,
QLineEdit, QComboBox, QCheckBox, QPushButton, QTabWidget,
QScrollArea, QGroupBox, QButtonGroup, QRadioButton,
Qt)
try:
from calibre.gui2 import QVariant
del QVariant
except ImportError:
is_qt4 = False
convert_qvariant = lambda x: x
else:
is_qt4 = True
def convert_qvariant(x):
vt = x.type()
if vt == x.String:
return unicode(x.toString())
if vt == x.List:
return [convert_qvariant(i) for i in x.toList()]
return x.toPyObject()
from calibre.gui2 import dynamic, info_dialog from calibre.gui2 import dynamic, info_dialog
from calibre.gui2.complete2 import EditWithComplete from calibre.gui2.complete2 import EditWithComplete
from fanficfare.six import text_type as unicode from calibre.gui2.dialogs.confirm_delete import confirm
import fanficfare.six as six
from six import text_type as unicode
try: try:
from calibre.ebooks.covers import generate_cover as cal_generate_cover from calibre.ebooks.covers import generate_cover as cal_generate_cover
@ -300,7 +279,6 @@ class ConfigWidget(QWidget):
prefs['collision'] = save_collisions[unicode(self.basic_tab.collision.currentText())] prefs['collision'] = save_collisions[unicode(self.basic_tab.collision.currentText())]
prefs['updatemeta'] = self.basic_tab.updatemeta.isChecked() prefs['updatemeta'] = self.basic_tab.updatemeta.isChecked()
prefs['bgmeta'] = self.basic_tab.bgmeta.isChecked() prefs['bgmeta'] = self.basic_tab.bgmeta.isChecked()
prefs['updateepubcover'] = self.basic_tab.updateepubcover.isChecked()
prefs['keeptags'] = self.basic_tab.keeptags.isChecked() prefs['keeptags'] = self.basic_tab.keeptags.isChecked()
prefs['mark'] = self.basic_tab.mark.isChecked() prefs['mark'] = self.basic_tab.mark.isChecked()
prefs['mark_success'] = self.basic_tab.mark_success.isChecked() prefs['mark_success'] = self.basic_tab.mark_success.isChecked()
@ -317,6 +295,7 @@ class ConfigWidget(QWidget):
prefs['lookforurlinhtml'] = self.basic_tab.lookforurlinhtml.isChecked() prefs['lookforurlinhtml'] = self.basic_tab.lookforurlinhtml.isChecked()
prefs['checkforseriesurlid'] = self.basic_tab.checkforseriesurlid.isChecked() prefs['checkforseriesurlid'] = self.basic_tab.checkforseriesurlid.isChecked()
prefs['auto_reject_seriesurlid'] = self.basic_tab.auto_reject_seriesurlid.isChecked() prefs['auto_reject_seriesurlid'] = self.basic_tab.auto_reject_seriesurlid.isChecked()
prefs['mark_series_anthologies'] = self.basic_tab.mark_series_anthologies.isChecked()
prefs['checkforurlchange'] = self.basic_tab.checkforurlchange.isChecked() prefs['checkforurlchange'] = self.basic_tab.checkforurlchange.isChecked()
prefs['injectseries'] = self.basic_tab.injectseries.isChecked() prefs['injectseries'] = self.basic_tab.injectseries.isChecked()
prefs['matchtitleauth'] = self.basic_tab.matchtitleauth.isChecked() prefs['matchtitleauth'] = self.basic_tab.matchtitleauth.isChecked()
@ -354,9 +333,10 @@ class ConfigWidget(QWidget):
prefs['calibre_gen_cover'] = self.calibrecover_tab.calibre_gen_cover.isChecked() prefs['calibre_gen_cover'] = self.calibrecover_tab.calibre_gen_cover.isChecked()
prefs['plugin_gen_cover'] = self.calibrecover_tab.plugin_gen_cover.isChecked() prefs['plugin_gen_cover'] = self.calibrecover_tab.plugin_gen_cover.isChecked()
prefs['gcnewonly'] = self.calibrecover_tab.gcnewonly.isChecked() prefs['gcnewonly'] = self.calibrecover_tab.gcnewonly.isChecked()
prefs['covernewonly'] = self.calibrecover_tab.covernewonly.isChecked()
gc_site_settings = {} gc_site_settings = {}
for (site,combo) in six.iteritems(self.calibrecover_tab.gc_dropdowns): for (site,combo) in six.iteritems(self.calibrecover_tab.gc_dropdowns):
val = unicode(convert_qvariant(combo.itemData(combo.currentIndex()))) val = unicode(combo.itemData(combo.currentIndex()))
if val != 'none': if val != 'none':
gc_site_settings[site] = val gc_site_settings[site] = val
#print("gc_site_settings[%s]:%s"%(site,gc_site_settings[site])) #print("gc_site_settings[%s]:%s"%(site,gc_site_settings[site]))
@ -391,27 +371,29 @@ class ConfigWidget(QWidget):
prefs['suppresstitlesort'] = self.std_columns_tab.suppresstitlesort.isChecked() prefs['suppresstitlesort'] = self.std_columns_tab.suppresstitlesort.isChecked()
prefs['authorcase'] = self.std_columns_tab.authorcase.isChecked() prefs['authorcase'] = self.std_columns_tab.authorcase.isChecked()
prefs['titlecase'] = self.std_columns_tab.titlecase.isChecked() prefs['titlecase'] = self.std_columns_tab.titlecase.isChecked()
prefs['seriescase'] = self.std_columns_tab.seriescase.isChecked()
prefs['setanthologyseries'] = self.std_columns_tab.setanthologyseries.isChecked() prefs['setanthologyseries'] = self.std_columns_tab.setanthologyseries.isChecked()
prefs['set_author_url'] =self.std_columns_tab.set_author_url.isChecked() prefs['set_author_url'] =self.std_columns_tab.set_author_url.isChecked()
prefs['set_series_url'] =self.std_columns_tab.set_series_url.isChecked()
prefs['includecomments'] =self.std_columns_tab.includecomments.isChecked() prefs['includecomments'] =self.std_columns_tab.includecomments.isChecked()
prefs['anth_comments_newonly'] =self.std_columns_tab.anth_comments_newonly.isChecked() prefs['anth_comments_newonly'] =self.std_columns_tab.anth_comments_newonly.isChecked()
# Custom Columns tab # Custom Columns tab
# error column # error column
prefs['errorcol'] = unicode(convert_qvariant(self.cust_columns_tab.errorcol.itemData(self.cust_columns_tab.errorcol.currentIndex()))) prefs['errorcol'] = unicode(self.cust_columns_tab.errorcol.itemData(self.cust_columns_tab.errorcol.currentIndex()))
prefs['save_all_errors'] = self.cust_columns_tab.save_all_errors.isChecked() prefs['save_all_errors'] = self.cust_columns_tab.save_all_errors.isChecked()
# metadata column # metadata column
prefs['savemetacol'] = unicode(convert_qvariant(self.cust_columns_tab.savemetacol.itemData(self.cust_columns_tab.savemetacol.currentIndex()))) prefs['savemetacol'] = unicode(self.cust_columns_tab.savemetacol.itemData(self.cust_columns_tab.savemetacol.currentIndex()))
# lastchecked column # lastchecked column
prefs['lastcheckedcol'] = unicode(convert_qvariant(self.cust_columns_tab.lastcheckedcol.itemData(self.cust_columns_tab.lastcheckedcol.currentIndex()))) prefs['lastcheckedcol'] = unicode(self.cust_columns_tab.lastcheckedcol.itemData(self.cust_columns_tab.lastcheckedcol.currentIndex()))
# cust cols tab # cust cols tab
colsmap = {} colsmap = {}
for (col,combo) in six.iteritems(self.cust_columns_tab.custcol_dropdowns): for (col,combo) in six.iteritems(self.cust_columns_tab.custcol_dropdowns):
val = unicode(convert_qvariant(combo.itemData(combo.currentIndex()))) val = unicode(combo.itemData(combo.currentIndex()))
if val != 'none': if val != 'none':
colsmap[col] = val colsmap[col] = val
#print("colsmap[%s]:%s"%(col,colsmap[col])) #print("colsmap[%s]:%s"%(col,colsmap[col]))
@ -435,6 +417,10 @@ class ConfigWidget(QWidget):
prefs['auto_reject_from_email'] = self.imap_tab.auto_reject_from_email.isChecked() prefs['auto_reject_from_email'] = self.imap_tab.auto_reject_from_email.isChecked()
prefs['update_existing_only_from_email'] = self.imap_tab.update_existing_only_from_email.isChecked() prefs['update_existing_only_from_email'] = self.imap_tab.update_existing_only_from_email.isChecked()
prefs['download_from_email_immediately'] = self.imap_tab.download_from_email_immediately.isChecked() prefs['download_from_email_immediately'] = self.imap_tab.download_from_email_immediately.isChecked()
prefs['site_split_jobs'] = self.other_tab.site_split_jobs.isChecked()
prefs['reconsolidate_jobs'] = self.other_tab.reconsolidate_jobs.isChecked()
prefs.save_to_db() prefs.save_to_db()
self.plugin_action.set_popup_mode() self.plugin_action.set_popup_mode()
@ -503,11 +489,6 @@ class BasicTab(QWidget):
self.updatemeta.setChecked(prefs['updatemeta']) self.updatemeta.setChecked(prefs['updatemeta'])
horz.addWidget(self.updatemeta) horz.addWidget(self.updatemeta)
self.updateepubcover = QCheckBox(_('Default Update EPUB Cover when Updating EPUB?'),self)
self.updateepubcover.setToolTip(_("On each download, FanFicFare offers an option to update the book cover image <i>inside</i> the EPUB from the web site when the EPUB is updated.<br />This sets whether that will default to on or off."))
self.updateepubcover.setChecked(prefs['updateepubcover'])
horz.addWidget(self.updateepubcover)
self.bgmeta = QCheckBox(_('Default Background Metadata?'),self) self.bgmeta = QCheckBox(_('Default Background Metadata?'),self)
self.bgmeta.setToolTip(_("On each download, FanFicFare offers an option to Collect Metadata from sites in a Background process.<br />This returns control to you quicker while updating, but you won't be asked for username/passwords or if you are an adult--stories that need those will just fail.<br />Only available for Update/Overwrite of existing books in case URL given isn't canonical or matches to existing book by Title/Author.")) self.bgmeta.setToolTip(_("On each download, FanFicFare offers an option to Collect Metadata from sites in a Background process.<br />This returns control to you quicker while updating, but you won't be asked for username/passwords or if you are an adult--stories that need those will just fail.<br />Only available for Update/Overwrite of existing books in case URL given isn't canonical or matches to existing book by Title/Author."))
self.bgmeta.setChecked(prefs['bgmeta']) self.bgmeta.setChecked(prefs['bgmeta'])
@ -538,10 +519,24 @@ class BasicTab(QWidget):
self.auto_reject_seriesurlid.setToolTip(_("Automatically reject storys with existing Series Anthology books.\nOnly works if 'Check for existing Series Anthology books' is on.\nDoesn't work when Collect Metadata in Background is selected.")) self.auto_reject_seriesurlid.setToolTip(_("Automatically reject storys with existing Series Anthology books.\nOnly works if 'Check for existing Series Anthology books' is on.\nDoesn't work when Collect Metadata in Background is selected."))
self.auto_reject_seriesurlid.setChecked(prefs['auto_reject_seriesurlid']) self.auto_reject_seriesurlid.setChecked(prefs['auto_reject_seriesurlid'])
self.auto_reject_seriesurlid.setEnabled(self.checkforseriesurlid.isChecked()) self.auto_reject_seriesurlid.setEnabled(self.checkforseriesurlid.isChecked())
self.checkforseriesurlid.stateChanged.connect(lambda x : self.auto_reject_seriesurlid.setEnabled(self.checkforseriesurlid.isChecked()))
self.mark_series_anthologies = QCheckBox(_("Mark Matching Anthologies?"),self)
self.mark_series_anthologies.setToolTip(_("Mark and show existing Series Anthology books when individual updates are skipped.\nOnly works if 'Check for existing Series Anthology books' is on.\nDoesn't work when Collect Metadata in Background is selected."))
self.mark_series_anthologies.setChecked(prefs['mark_series_anthologies'])
self.mark_series_anthologies.setEnabled(self.checkforseriesurlid.isChecked())
def mark_anthologies():
self.auto_reject_seriesurlid.setEnabled(self.checkforseriesurlid.isChecked())
self.mark_series_anthologies.setEnabled(self.checkforseriesurlid.isChecked())
self.checkforseriesurlid.stateChanged.connect(mark_anthologies)
mark_anthologies()
horz = QHBoxLayout() horz = QHBoxLayout()
horz.addItem(QtGui.QSpacerItem(20, 1)) horz.addItem(QtGui.QSpacerItem(20, 1))
horz.addWidget(self.auto_reject_seriesurlid) vertright = QVBoxLayout()
horz.addLayout(vertright)
vertright.addWidget(self.auto_reject_seriesurlid)
vertright.addWidget(self.mark_series_anthologies)
self.l.addLayout(horz) self.l.addLayout(horz)
self.checkforurlchange = QCheckBox(_("Check for changed Story URL?"),self) self.checkforurlchange = QCheckBox(_("Check for changed Story URL?"),self)
@ -766,6 +761,7 @@ class BasicTab(QWidget):
tooltip=_("One URL per line:\n<b>http://...,note</b>\n<b>http://...,title by author - note</b>"), tooltip=_("One URL per line:\n<b>http://...,note</b>\n<b>http://...,title by author - note</b>"),
rejectreasons=rejecturllist.get_reject_reasons(), rejectreasons=rejecturllist.get_reject_reasons(),
reasonslabel=_('Add this reason to all URLs added:'), reasonslabel=_('Add this reason to all URLs added:'),
accept_storyurls=True,
save_size_name='fff:Add Reject List') save_size_name='fff:Add Reject List')
d.exec_() d.exec_()
if d.result() == d.Accepted: if d.result() == d.Accepted:
@ -1004,7 +1000,7 @@ class CalibreCoverTab(QWidget):
self.gencov_elements=[] ## used to disable/enable when gen self.gencov_elements=[] ## used to disable/enable when gen
## cover is off/on. This is more ## cover is off/on. This is more
## about being a visual que than real ## about being a visual cue than real
## necessary function. ## necessary function.
topl = self.l = QVBoxLayout() topl = self.l = QVBoxLayout()
@ -1048,9 +1044,17 @@ class CalibreCoverTab(QWidget):
horz.addWidget(self.updatecalcover) horz.addWidget(self.updatecalcover)
self.l.addLayout(horz) self.l.addLayout(horz)
self.covernewonly = QCheckBox(_("Set Calibre Cover Only for New Books"),self)
self.covernewonly.setToolTip(_("Set the Calibre cover from EPUB only for new\nbooks, not updates to existing books."))
self.covernewonly.setChecked(prefs['covernewonly'])
horz = QHBoxLayout()
horz.addItem(QtGui.QSpacerItem(20, 1))
horz.addWidget(self.covernewonly)
self.l.addLayout(horz)
self.l.addSpacing(5)
tooltip = _("Generate a Calibre book cover image when Calibre metadata is updated.<br />" tooltip = _("Generate a Calibre book cover image when Calibre metadata is updated.<br />"
"Defaults to 'Yes, Always' for backward compatibility and because %(gc)s(Plugin)" "Note that %(gc)s(Plugin) will only run if there is a %(gc)s setting configured below for Default or the appropriate site.")%no_trans
" will only run if configured for Default or site.")%no_trans
horz = QHBoxLayout() horz = QHBoxLayout()
label = QLabel(_('Generate Calibre Cover:')) label = QLabel(_('Generate Calibre Cover:'))
label.setToolTip(tooltip) label.setToolTip(tooltip)
@ -1058,13 +1062,7 @@ class CalibreCoverTab(QWidget):
self.gencalcover = QComboBox(self) self.gencalcover = QComboBox(self)
for i in gencalcover_order: for i in gencalcover_order:
self.gencalcover.addItem(i) self.gencalcover.addItem(i)
# back compat. If has own value, use.
# if prefs['gencalcover']:
self.gencalcover.setCurrentIndex(self.gencalcover.findText(prefs_save_options[prefs['gencalcover']])) self.gencalcover.setCurrentIndex(self.gencalcover.findText(prefs_save_options[prefs['gencalcover']]))
# elif prefs['gencover']: # doesn't have own val, set YES if old value set.
# self.gencalcover.setCurrentIndex(self.gencalcover.findText(prefs_save_options[SAVE_YES]))
# else: # doesn't have own value, old value not set, NO.
# self.gencalcover.setCurrentIndex(self.gencalcover.findText(prefs_save_options[SAVE_NO]))
self.gencalcover.setToolTip(tooltip) self.gencalcover.setToolTip(tooltip)
label.setBuddy(self.gencalcover) label.setBuddy(self.gencalcover)
@ -1072,6 +1070,26 @@ class CalibreCoverTab(QWidget):
self.l.addLayout(horz) self.l.addLayout(horz)
self.gencalcover.currentIndexChanged.connect(self.endisable_elements) self.gencalcover.currentIndexChanged.connect(self.endisable_elements)
horz = QHBoxLayout()
horz.addItem(QtGui.QSpacerItem(20, 1))
vert = QVBoxLayout()
horz.addLayout(vert)
self.l.addLayout(horz)
self.gcnewonly = QCheckBox(_("Generate Covers Only for New Books")%no_trans,self)
self.gcnewonly.setToolTip(_("Default is to generate a cover any time the calibre metadata is"
" updated.<br />Used for both Calibre and Plugin generated covers."))
self.gcnewonly.setChecked(prefs['gcnewonly'])
vert.addWidget(self.gcnewonly)
self.gencov_elements.append(self.gcnewonly)
self.gc_polish_cover = QCheckBox(_("Inject/update the generated cover inside EPUB"),self)
self.gc_polish_cover.setToolTip(_("Calibre's Polish feature will be used to inject or update the generated"
" cover into the EPUB ebook file.<br />Used for both Calibre and Plugin generated covers."))
self.gc_polish_cover.setChecked(prefs['gc_polish_cover'])
vert.addWidget(self.gc_polish_cover)
self.gencov_elements.append(self.gc_polish_cover)
# can't be local or it's destroyed when __init__ is done and # can't be local or it's destroyed when __init__ is done and
# connected things don't fire. # connected things don't fire.
self.gencov_rdgrp = QButtonGroup() self.gencov_rdgrp = QButtonGroup()
@ -1080,7 +1098,9 @@ class CalibreCoverTab(QWidget):
self.gencov_gb.setLayout(horz) self.gencov_gb.setLayout(horz)
self.plugin_gen_cover = QRadioButton(_('Plugin %(gc)s')%no_trans,self) self.plugin_gen_cover = QRadioButton(_('Plugin %(gc)s')%no_trans,self)
self.plugin_gen_cover.setToolTip(_("Use plugin to create covers. Additional settings are below.")) self.plugin_gen_cover.setToolTip(_("Use the %(gc)s plugin to create covers.<br>"
"Requires that you have the the %(gc)s plugin installed.<br>"
"Additional settings are below.")%no_trans)
self.gencov_rdgrp.addButton(self.plugin_gen_cover) self.gencov_rdgrp.addButton(self.plugin_gen_cover)
# always, new only, when no cover from site, inject yes/no... # always, new only, when no cover from site, inject yes/no...
self.plugin_gen_cover.setChecked(prefs['plugin_gen_cover']) self.plugin_gen_cover.setChecked(prefs['plugin_gen_cover'])
@ -1102,20 +1122,6 @@ class CalibreCoverTab(QWidget):
#self.l.addLayout(horz) #self.l.addLayout(horz)
self.l.addWidget(self.gencov_gb) self.l.addWidget(self.gencov_gb)
self.gcnewonly = QCheckBox(_("Generate Covers Only for New Books")%no_trans,self)
self.gcnewonly.setToolTip(_("Default is to generate a cover any time the calibre metadata is"
" updated.<br />Used for both Calibre and Plugin generated covers."))
self.gcnewonly.setChecked(prefs['gcnewonly'])
self.l.addWidget(self.gcnewonly)
self.gencov_elements.append(self.gcnewonly)
self.gc_polish_cover = QCheckBox(_("Inject/update the cover inside EPUB"),self)
self.gc_polish_cover.setToolTip(_("Calibre's Polish feature will be used to inject or update the generated"
" cover into the EPUB ebook file.<br />Used for both Calibre and Plugin generated covers."))
self.gc_polish_cover.setChecked(prefs['gc_polish_cover'])
self.l.addWidget(self.gc_polish_cover)
self.gencov_elements.append(self.gc_polish_cover)
self.gcp_gb = QGroupBox(_("%(gc)s(Plugin) Settings")%no_trans) self.gcp_gb = QGroupBox(_("%(gc)s(Plugin) Settings")%no_trans)
topl.addWidget(self.gcp_gb) topl.addWidget(self.gcp_gb)
self.l = QVBoxLayout() self.l = QVBoxLayout()
@ -1274,6 +1280,31 @@ class OtherTab(QWidget):
self.l = QVBoxLayout() self.l = QVBoxLayout()
self.setLayout(self.l) self.setLayout(self.l)
groupbox = QGroupBox()
self.l.addWidget(groupbox)
groupl = QVBoxLayout()
groupbox.setLayout(groupl)
label = QLabel("<h3>"+
_("Background Job Settings")+
"</h3>"
)
label.setWordWrap(True)
groupl.addWidget(label)
self.site_split_jobs = QCheckBox(_('Split downloads into separate background jobs by site'),self)
self.site_split_jobs.setToolTip(_("Launches a separate background Job for each site in the list of stories to download/update. Otherwise, there will be only one background job."))
self.site_split_jobs.setChecked(prefs['site_split_jobs'])
groupl.addWidget(self.site_split_jobs)
self.reconsolidate_jobs = QCheckBox(_('Reconsolidate split downloads before updating library'),self)
self.reconsolidate_jobs.setToolTip(_("Hold all downloads/updates launched together until they all finish. Otherwise, there will be a 'Proceed to update' dialog for each site."))
self.reconsolidate_jobs.setChecked(prefs['reconsolidate_jobs'])
groupl.addWidget(self.reconsolidate_jobs)
self.l.addSpacing(5)
label = QLabel(_("These controls aren't plugin settings as such, but convenience buttons for setting Keyboard shortcuts and getting all the FanFicFare confirmation dialogs back again.")) label = QLabel(_("These controls aren't plugin settings as such, but convenience buttons for setting Keyboard shortcuts and getting all the FanFicFare confirmation dialogs back again."))
label.setWordWrap(True) label.setWordWrap(True)
self.l.addWidget(label) self.l.addWidget(label)
@ -1579,22 +1610,39 @@ class StandardColumnsTab(QWidget):
self.titlecase.setChecked(prefs['titlecase']) self.titlecase.setChecked(prefs['titlecase'])
row.append(self.titlecase) row.append(self.titlecase)
elif key == 'authors': elif key == 'authors':
self.set_author_url = QCheckBox(_('Set Calibre Author URL'),self)
self.set_author_url.setToolTip(_("Set Calibre Author URL to Author's URL on story site."))
self.set_author_url.setChecked(prefs['set_author_url'])
row.append(self.set_author_url)
self.suppressauthorsort = QCheckBox(_('Force Author into Author Sort?'),self) self.suppressauthorsort = QCheckBox(_('Force Author into Author Sort?'),self)
self.suppressauthorsort.setToolTip(_("If checked, the author(s) as given will be used for the Author Sort, too.\nIf not checked, calibre will apply it's built in algorithm which makes 'Bob Smith' sort as 'Smith, Bob', etc.")) self.suppressauthorsort.setToolTip(_("If checked, the author(s) as given will be used for the Author Sort, too.\nIf not checked, calibre will apply it's built in algorithm which makes 'Bob Smith' sort as 'Smith, Bob', etc."))
self.suppressauthorsort.setChecked(prefs['suppressauthorsort']) self.suppressauthorsort.setChecked(prefs['suppressauthorsort'])
row.append(self.suppressauthorsort) row.append(self.suppressauthorsort)
self.authorcase = QCheckBox(_('Fix Author Case?'),self) self.authorcase = QCheckBox(_('Fix Author Case?'),self)
self.authorcase.setToolTip(_("If checked, Calibre's routine for correcting the capitalization of author names will be applied.") self.authorcase.setToolTip(_("If checked, Calibre's routine for correcting the capitalization of author names will be applied.")
+"\n"+_("Calibre remembers all authors in the library; changing the author case on one book will effect all books by that author.") +"\n"+_("Calibre remembers all authors in the library; changing the author case on one book will effect all books by that author.")
+"\n"+_("This effects Calibre metadata only, not FanFicFare metadata in title page.")) +"\n"+_("This effects Calibre metadata only, not FanFicFare metadata in title page."))
self.authorcase.setChecked(prefs['authorcase']) self.authorcase.setChecked(prefs['authorcase'])
row.append(self.authorcase) row.append(self.authorcase)
elif key == 'series': elif key == 'series':
self.set_series_url = QCheckBox(_('Set Calibre Series URL'),self)
self.set_series_url.setToolTip(_("Set Calibre Series URL to Series's URL on story site."))
self.set_series_url.setChecked(prefs['set_series_url'])
row.append(self.set_series_url)
self.setanthologyseries = QCheckBox(_("Set 'Series [0]' for New Anthologies?"),self) self.setanthologyseries = QCheckBox(_("Set 'Series [0]' for New Anthologies?"),self)
self.setanthologyseries.setToolTip(_("If checked, the Series column will be set to 'Series Name [0]' when an Anthology for a series is first created.")) self.setanthologyseries.setToolTip(_("If checked, the Series column will be set to 'Series Name [0]' when an Anthology for a series is first created."))
self.setanthologyseries.setChecked(prefs['setanthologyseries']) self.setanthologyseries.setChecked(prefs['setanthologyseries'])
row.append(self.setanthologyseries) row.append(self.setanthologyseries)
self.seriescase = QCheckBox(_('Fix Series Case?'),self)
self.seriescase.setToolTip(_("If checked, Calibre's routine for correcting the capitalization of title will be applied.")
+"\n"+_("This effects Calibre metadata only, not FanFicFare metadata in title page."))
self.seriescase.setChecked(prefs['seriescase'])
row.append(self.seriescase)
grid = QGridLayout() grid = QGridLayout()
for rownum, row in enumerate(rows): for rownum, row in enumerate(rows):
for colnum, col in enumerate(row): for colnum, col in enumerate(row):
@ -1607,11 +1655,6 @@ class StandardColumnsTab(QWidget):
self.l.addWidget(label) self.l.addWidget(label)
self.l.addSpacing(5) self.l.addSpacing(5)
self.set_author_url = QCheckBox(_('Set Calibre Author URL'),self)
self.set_author_url.setToolTip(_("Set Calibre Author URL to Author's URL on story site."))
self.set_author_url.setChecked(prefs['set_author_url'])
self.l.addWidget(self.set_author_url)
self.includecomments = QCheckBox(_("Include Books' Comments in Anthology Comments?"),self) self.includecomments = QCheckBox(_("Include Books' Comments in Anthology Comments?"),self)
self.includecomments.setToolTip(_('''Include all the merged books' comments in the new book's comments. self.includecomments.setToolTip(_('''Include all the merged books' comments in the new book's comments.
Default is a list of included titles only.''')) Default is a list of included titles only.'''))

View file

@ -16,43 +16,29 @@ logger = logging.getLogger(__name__)
from datetime import datetime from datetime import datetime
from PyQt5 import QtWidgets as QtGui
from PyQt5 import QtCore
from PyQt5.Qt import (QApplication, QDialog, QWidget, QTableWidget, QTableWidgetItem, QVBoxLayout,
QHBoxLayout, QGridLayout, QPushButton, QFont, QLabel, QCheckBox, QIcon,
QLineEdit, QComboBox, QProgressDialog, QTimer, QDialogButtonBox,
QScrollArea, QPixmap, Qt, QAbstractItemView, QTextEdit,
pyqtSignal, QGroupBox, QFrame, QTextCursor)
try: try:
from PyQt5 import QtWidgets as QtGui # qt6 Calibre v6+
from PyQt5 import QtCore QTextEditNoWrap = QTextEdit.LineWrapMode.NoWrap
from PyQt5.Qt import (QApplication, QDialog, QWidget, QTableWidget, QVBoxLayout, QHBoxLayout, MoveOperations = QTextCursor.MoveOperation
QGridLayout, QPushButton, QFont, QLabel, QCheckBox, QIcon, MoveMode = QTextCursor.MoveMode
QLineEdit, QComboBox, QProgressDialog, QTimer, QDialogButtonBox, except:
QScrollArea, QPixmap, Qt, QAbstractItemView, QTextEdit, # qt5 Calibre v2-5
pyqtSignal, QGroupBox, QFrame) QTextEditNoWrap = QTextEdit.NoWrap
except ImportError as e: MoveOperations = QTextCursor
from PyQt4 import QtGui MoveMode = QTextCursor
from PyQt4 import QtCore
from PyQt4.Qt import (QApplication, QDialog, QWidget, QTableWidget, QVBoxLayout, QHBoxLayout,
QGridLayout, QPushButton, QFont, QLabel, QCheckBox, QIcon,
QLineEdit, QComboBox, QProgressDialog, QTimer, QDialogButtonBox,
QScrollArea, QPixmap, Qt, QAbstractItemView, QTextEdit,
pyqtSignal, QGroupBox, QFrame)
try:
from calibre.gui2 import QVariant
del QVariant
except ImportError:
is_qt4 = False
convert_qvariant = lambda x: x
else:
is_qt4 = True
def convert_qvariant(x):
vt = x.type()
if vt == x.String:
return unicode(x.toString())
if vt == x.List:
return [convert_qvariant(i) for i in x.toList()]
return x.toPyObject()
from calibre.gui2 import gprefs from calibre.gui2 import gprefs
show_download_options = 'fff:add new/update dialogs:show_download_options' show_download_options = 'fff:add new/update dialogs:show_download_options'
from calibre.gui2.dialogs.confirm_delete import confirm from calibre.gui2.dialogs.confirm_delete import confirm
from calibre.gui2.complete2 import EditWithComplete from calibre.gui2.complete2 import EditWithComplete
from fanficfare.exceptions import NotGoingToDownload
from fanficfare.six import text_type as unicode, ensure_text from fanficfare.six import text_type as unicode, ensure_text
# pulls in translation files for _() strings # pulls in translation files for _() strings
@ -170,34 +156,57 @@ class RejectUrlEntry:
return retval return retval
class NotGoingToDownload(Exception):
def __init__(self,error,icon='dialog_error.png',showerror=True):
self.error=error
self.icon=icon
self.showerror=showerror
def __str__(self):
return self.error
class DroppableQTextEdit(QTextEdit): class DroppableQTextEdit(QTextEdit):
def __init__(self,parent): def __init__(self,parent):
QTextEdit.__init__(self,parent) QTextEdit.__init__(self,parent)
self.setTabChangesFocus(True) self.setTabChangesFocus(True)
def dropEvent(self,event): def dropEvent(self,event):
# print("event:%s"%event) # logger.debug("dropEvent")
urllist = get_urls_from_mime(event.mimeData()) urllist = get_urls_from_mime(event.mimeData())
if urllist: if urllist:
self.append("\n".join(urllist)) self.append("\n".join(urllist))
return None return None
return QTextEdit.dropEvent(self,event) return QTextEdit.dropEvent(self,event)
class AddNewDialog(SizePersistedDialog): def insertFromMimeData(self, mime_data):
# logger.debug("insertFromMimeData")
# logger.debug(mime_data)
urllist = None
if mime_data.hasFormat('text/html'):
urllist = get_urls_from_mime(mime_data)
# logger.debug(urllist)
if urllist:
[ self.append(url) for url in urllist ]
else:
return QTextEdit.insertFromMimeData(self, mime_data)
class HotKeyedSizePersistedDialog(SizePersistedDialog):
def __init__(self, gui, save_size_name):
super(HotKeyedSizePersistedDialog,self).__init__(gui, save_size_name)
self.keys=dict()
def addCtrlKeyPress(self,key,func):
# print("addKeyPress: key(0x%x)"%key)
# print("control: 0x%x"%QtCore.Qt.ControlModifier)
self.keys[key]=func
def keyPressEvent(self, event):
# print("event: key(0x%x) modifiers(0x%x)"%(event.key(),event.modifiers()))
if (event.modifiers() & QtCore.Qt.ControlModifier) and event.key() in self.keys:
func = self.keys[event.key()]
return func()
else:
return super(HotKeyedSizePersistedDialog,self).keyPressEvent(event)
class AddNewDialog(HotKeyedSizePersistedDialog):
go_signal = pyqtSignal(object, object, object, object) go_signal = pyqtSignal(object, object, object, object)
def __init__(self, gui, prefs, icon): def __init__(self, gui, prefs, icon):
SizePersistedDialog.__init__(self, gui, 'fff:add new dialog') super(AddNewDialog,self).__init__(gui, 'fff:add new dialog')
self.prefs = prefs self.prefs = prefs
self.setMinimumWidth(300) self.setMinimumWidth(300)
@ -219,19 +228,25 @@ class AddNewDialog(SizePersistedDialog):
self.toplabel=QLabel("Toplabel") self.toplabel=QLabel("Toplabel")
self.l.addWidget(self.toplabel) self.l.addWidget(self.toplabel)
## XXX add labels for series name and desc? Desc in tooltip? ## scrollable area for lengthy series comments.
row = 0 scrollable = QScrollArea()
scrollcontent = QWidget()
scrollable.setWidget(scrollcontent)
scrollable.setWidgetResizable(True)
self.l.addWidget(scrollable)
grid = QGridLayout() grid = QGridLayout()
scrollcontent.setLayout(grid)
self.mergeshow.append(scrollable)
row = 0
label = QLabel('<b>'+_('Series')+':</b>') label = QLabel('<b>'+_('Series')+':</b>')
grid.addWidget(label,row,0) grid.addWidget(label,row,0)
self.mergedname=QLabel("mergedname") self.mergedname=QLabel("mergedname")
tt = _('This name will be used with the %s setting to set the title of the new book.')%'<i>anthology_title_pattern</i>' tt = _('This name will be used with the %s setting to set the title of the new book.')%'<i>anthology_title_pattern</i>'
label.setToolTip(tt) label.setToolTip(tt)
self.mergeshow.append(label)
self.mergedname.setToolTip(tt) self.mergedname.setToolTip(tt)
grid.addWidget(self.mergedname,row,1,1,-1) grid.addWidget(self.mergedname,row,1,1,-1)
self.l.addLayout(grid)
self.mergeshow.append(self.mergedname)
row+=1 row+=1
label = QLabel('<b>'+_('Comments')+':</b>') label = QLabel('<b>'+_('Comments')+':</b>')
@ -239,18 +254,15 @@ class AddNewDialog(SizePersistedDialog):
self.mergeddesc=QLabel("mergeddesc") self.mergeddesc=QLabel("mergeddesc")
tt = _('These comments about the series will be included in the Comments of the new book.')+'<i></i>' # for html for auto-wrap tt = _('These comments about the series will be included in the Comments of the new book.')+'<i></i>' # for html for auto-wrap
label.setToolTip(tt) label.setToolTip(tt)
self.mergeshow.append(label)
self.mergeddesc.setToolTip(tt) self.mergeddesc.setToolTip(tt)
self.mergeddesc.setWordWrap(True) self.mergeddesc.setWordWrap(True)
grid.addWidget(self.mergeddesc,row,1,1,-1) grid.addWidget(self.mergeddesc,row,1,1,-1)
self.l.addLayout(grid)
self.mergeshow.append(self.mergeddesc)
grid.setColumnStretch(1,1) grid.setColumnStretch(1,1)
self.url = DroppableQTextEdit(self) self.url = DroppableQTextEdit(self)
self.url.setToolTip("UrlTooltip") self.url.setToolTip("UrlTooltip")
self.url.setLineWrapMode(QTextEdit.NoWrap) self.url.setLineWrapMode(QTextEditNoWrap)
self.l.addWidget(self.url) self.l.addWidget(self.url,1) # 1 higher 'stretch'==higher priority
self.groupbox = QGroupBox(_("Show Download Options")) self.groupbox = QGroupBox(_("Show Download Options"))
self.groupbox.setCheckable(True) self.groupbox.setCheckable(True)
@ -314,12 +326,6 @@ class AddNewDialog(SizePersistedDialog):
self.mergehide.append(self.updatemeta) self.mergehide.append(self.updatemeta)
self.mergeupdateshow.append(self.updatemeta) self.mergeupdateshow.append(self.updatemeta)
self.updateepubcover = QCheckBox(_('Update EPUB Cover?'),self)
self.updateepubcover.setToolTip(_('Update book cover image from site or defaults (if found) <i>inside</i> the EPUB when EPUB is updated.'))
self.updateepubcover.setChecked(self.prefs['updateepubcover'])
horz.addWidget(self.updateepubcover)
self.mergehide.append(self.updateepubcover)
self.gbl.addLayout(horz) self.gbl.addLayout(horz)
## bgmeta not used with Add New because of stories that change ## bgmeta not used with Add New because of stories that change
@ -339,6 +345,9 @@ class AddNewDialog(SizePersistedDialog):
self.button_box.rejected.connect(self.reject) self.button_box.rejected.connect(self.reject)
self.l.addWidget(self.button_box) self.l.addWidget(self.button_box)
self.addCtrlKeyPress(QtCore.Qt.Key_Return,self.ok_clicked)
self.addCtrlKeyPress(QtCore.Qt.Key_Enter,self.ok_clicked) # num pad
def click_show_download_options(self,x): def click_show_download_options(self,x):
self.gbf.setVisible(x) self.gbf.setVisible(x)
gprefs[show_download_options] = x gprefs[show_download_options] = x
@ -449,9 +458,6 @@ class AddNewDialog(SizePersistedDialog):
self.updatemeta.setChecked(self.prefs['updatemeta']) self.updatemeta.setChecked(self.prefs['updatemeta'])
# self.bgmeta.setChecked(self.prefs['bgmeta']) # self.bgmeta.setChecked(self.prefs['bgmeta'])
if not self.merge:
self.updateepubcover.setChecked(self.prefs['updateepubcover'])
self.url.setText(url_list_text) self.url.setText(url_list_text)
if url_list_text: if url_list_text:
self.button_box.button(QDialogButtonBox.Ok).setFocus() self.button_box.button(QDialogButtonBox.Ok).setFocus()
@ -484,30 +490,29 @@ class AddNewDialog(SizePersistedDialog):
self.collision.setCurrentIndex(i) self.collision.setCurrentIndex(i)
def get_fff_options(self): def get_fff_options(self):
retval = { retval = dict(self.extraoptions)
'fileform': unicode(self.fileform.currentText()), retval.update( {
'collision': unicode(self.collision.currentText()), 'fileform': unicode(self.fileform.currentText()),
'updatemeta': self.updatemeta.isChecked(), 'collision': unicode(self.collision.currentText()),
'bgmeta': False, # self.bgmeta.isChecked(), 'updatemeta': self.updatemeta.isChecked(),
'updateepubcover': self.updateepubcover.isChecked(), 'bgmeta': False, # self.bgmeta.isChecked(),
'smarten_punctuation':self.prefs['smarten_punctuation'], 'smarten_punctuation':self.prefs['smarten_punctuation'],
'do_wordcount':self.prefs['do_wordcount'], 'do_wordcount':self.prefs['do_wordcount'],
} } )
if self.merge: if self.merge:
retval['fileform']=='epub' retval['fileform']=='epub'
retval['updateepubcover']=True
if self.newmerge: if self.newmerge:
retval['updatemeta']=True retval['updatemeta']=True
retval['collision']=ADDNEW retval['collision']=ADDNEW
logger.debug("self.extraoptions['anthology_url']:%s"%self.extraoptions.get('anthology_url','NOT FOUND'))
retval.update(self.extraoptions) retval.update(self.extraoptions)
return retval return retval
def get_urlstext(self): def get_urlstext(self):
return unicode(self.url.toPlainText()) return unicode(self.url.toPlainText())
class FakeLineEdit(): class FakeLineEdit():
def __init__(self): def __init__(self):
pass pass
@ -583,35 +588,83 @@ class UserPassDialog(QDialog):
QDialog.__init__(self, gui) QDialog.__init__(self, gui)
self.status=False self.status=False
self.l = QGridLayout() self.l = QVBoxLayout()
self.setLayout(self.l) self.setLayout(self.l)
grid = QGridLayout()
self.l.addLayout(grid)
if exception and exception.passwdonly: if exception and exception.passwdonly:
self.setWindowTitle(_('Password')) self.setWindowTitle(_('Password'))
self.l.addWidget(QLabel(_("Author requires a password for this story(%s).")%exception.url),0,0,1,2) grid.addWidget(QLabel(_("Author requires a password for this story(%s).")%exception.url),0,0,1,2)
# user isn't used, but it's easier to still have it for # user isn't used, but it's easier to still have it for
# post processing. # post processing.
self.user = FakeLineEdit() self.user = FakeLineEdit()
else: else:
self.setWindowTitle(_('User/Password')) self.setWindowTitle(_('User/Password'))
self.l.addWidget(QLabel(_("%s requires you to login to download this story.")%site),0,0,1,2) grid.addWidget(QLabel(_("%s requires you to login to download this story.")%site),0,0,1,2)
self.l.addWidget(QLabel(_("User:")),1,0) grid.addWidget(QLabel(_("User:")),1,0)
self.user = QLineEdit(self) self.user = QLineEdit(self)
self.l.addWidget(self.user,1,1) grid.addWidget(self.user,1,1)
self.l.addWidget(QLabel(_("Password:")),2,0) grid.addWidget(QLabel(_("Password:")),2,0)
self.passwd = QLineEdit(self) self.passwd = QLineEdit(self)
self.passwd.setEchoMode(QLineEdit.Password) self.passwd.setEchoMode(QLineEdit.Password)
self.l.addWidget(self.passwd,2,1) grid.addWidget(self.passwd,2,1)
horz = QHBoxLayout()
self.l.addLayout(horz)
self.ok_button = QPushButton(_('OK'), self) self.ok_button = QPushButton(_('OK'), self)
self.ok_button.clicked.connect(self.ok) self.ok_button.clicked.connect(self.ok)
self.l.addWidget(self.ok_button,3,0) horz.addWidget(self.ok_button)
self.cancel_button = QPushButton(_('Cancel'), self) self.cancel_button = QPushButton(_('Cancel'), self)
self.cancel_button.clicked.connect(self.cancel) self.cancel_button.clicked.connect(self.cancel)
self.l.addWidget(self.cancel_button,3,1) horz.addWidget(self.cancel_button)
self.resize(self.sizeHint())
def ok(self):
self.status=True
self.hide()
def cancel(self):
self.status=False
self.hide()
class TOTPDialog(QDialog):
'''
Need to collect Timebased One Time Password(TOTP) for some sites.
'''
def __init__(self, gui, site, exception=None):
QDialog.__init__(self, gui)
self.status=False
self.l = QVBoxLayout()
self.setLayout(self.l)
grid = QGridLayout()
self.l.addLayout(grid)
self.setWindowTitle(_('Time-based One Time Password(TOTP)'))
grid.addWidget(QLabel(_("Site requires a Time-based One Time Password(TOTP) for this url:\n%s")%exception.url),0,0,1,2)
grid.addWidget(QLabel(_("TOTP:")),2,0)
self.totp = QLineEdit(self)
grid.addWidget(self.totp,2,1)
horz = QHBoxLayout()
self.l.addLayout(horz)
self.ok_button = QPushButton(_('OK'), self)
self.ok_button.clicked.connect(self.ok)
horz.addWidget(self.ok_button)
self.cancel_button = QPushButton(_('Cancel'), self)
self.cancel_button.clicked.connect(self.cancel)
horz.addWidget(self.cancel_button)
self.resize(self.sizeHint()) self.resize(self.sizeHint())
@ -629,13 +682,15 @@ def LoopProgressDialog(gui,
finish_function, finish_function,
init_label=_("Fetching metadata for stories..."), init_label=_("Fetching metadata for stories..."),
win_title=_("Downloading metadata for stories"), win_title=_("Downloading metadata for stories"),
status_prefix=_("Fetched metadata for")): status_prefix=_("Fetched metadata for"),
disable_cancel=False):
ld = _LoopProgressDialog(gui, ld = _LoopProgressDialog(gui,
book_list, book_list,
foreach_function, foreach_function,
init_label, init_label,
win_title, win_title,
status_prefix) status_prefix,
disable_cancel)
# Mac OS X gets upset if the finish_function is called from inside # Mac OS X gets upset if the finish_function is called from inside
# the real _LoopProgressDialog class. # the real _LoopProgressDialog class.
@ -653,10 +708,12 @@ class _LoopProgressDialog(QProgressDialog):
foreach_function, foreach_function,
init_label=_("Fetching metadata for stories..."), init_label=_("Fetching metadata for stories..."),
win_title=_("Downloading metadata for stories"), win_title=_("Downloading metadata for stories"),
status_prefix=_("Fetched metadata for")): status_prefix=_("Fetched metadata for"),
disable_cancel=False):
QProgressDialog.__init__(self, QProgressDialog.__init__(self,
init_label, init_label,
_('Cancel'), 0, len(book_list), gui) _('Cancel'), 0, len(book_list), gui)
self.gui = gui
self.setWindowTitle(win_title) self.setWindowTitle(win_title)
self.setMinimumWidth(500) self.setMinimumWidth(500)
self.book_list = book_list self.book_list = book_list
@ -664,7 +721,6 @@ class _LoopProgressDialog(QProgressDialog):
self.status_prefix = status_prefix self.status_prefix = status_prefix
self.i = 0 self.i = 0
self.start_time = datetime.now() self.start_time = datetime.now()
self.first = True
# can't import at file load. # can't import at file load.
from calibre_plugins.fanficfare_plugin.prefs import prefs from calibre_plugins.fanficfare_plugin.prefs import prefs
@ -673,11 +729,27 @@ class _LoopProgressDialog(QProgressDialog):
self.setLabelText('%s %d / %d' % (self.status_prefix, self.i, len(self.book_list))) self.setLabelText('%s %d / %d' % (self.status_prefix, self.i, len(self.book_list)))
self.setValue(self.i) self.setValue(self.i)
if disable_cancel:
self.setCancelButton(None)
self.reject = self.disabled_reject
self.closeEvent = self.disabled_closeEvent
## self.do_loop does QTimer.singleShot on self.do_loop also. ## self.do_loop does QTimer.singleShot on self.do_loop also.
## A weird way to do a loop, but that was the example I had. ## A weird way to do a loop, but that was the example I had.
QTimer.singleShot(0, self.do_loop) ## 100 instead of 0 on the first go due to Win10(and later
## qt6) not displaying dialog properly.
QTimer.singleShot(100, self.do_loop)
self.exec_() self.exec_()
# used when disable_cancel = True
def disabled_reject(self):
pass
# used when disable_cancel = True
def disabled_closeEvent(self, event):
if event.spontaneous():
event.ignore()
def updateStatus(self): def updateStatus(self):
remaining_time_string = '' remaining_time_string = ''
if self.show_est_time and self.i > -1: if self.show_est_time and self.i > -1:
@ -691,15 +763,6 @@ class _LoopProgressDialog(QProgressDialog):
def do_loop(self): def do_loop(self):
if self.first:
## Windows 10 doesn't want to show the prog dialog content
## until after the timer's been called again. Something to
## do with cooperative multi threading maybe?
## So this just trips the timer loop an extra time at the start.
self.first = False
QTimer.singleShot(0, self.do_loop)
return
book = self.book_list[self.i] book = self.book_list[self.i]
try: try:
## collision spec passed into getadapter by partial from fff_plugin ## collision spec passed into getadapter by partial from fff_plugin
@ -898,11 +961,6 @@ class UpdateExistingDialog(SizePersistedDialog):
self.updatemeta.setChecked(self.prefs['updatemeta']) self.updatemeta.setChecked(self.prefs['updatemeta'])
horz.addWidget(self.updatemeta) horz.addWidget(self.updatemeta)
self.updateepubcover = QCheckBox(_('Update EPUB Cover?'),self)
self.updateepubcover.setToolTip(_('Update book cover image from site or defaults (if found) <i>inside</i> the EPUB when EPUB is updated.'))
self.updateepubcover.setChecked(self.prefs['updateepubcover'])
horz.addWidget(self.updateepubcover)
self.bgmeta = QCheckBox(_('Background Metadata?'),self) self.bgmeta = QCheckBox(_('Background Metadata?'),self)
self.bgmeta.setToolTip(_("Collect Metadata from sites in a Background process.<br />This returns control to you quicker while updating, but you won't be asked for username/passwords or if you are an adult--stories that need those will just fail.")) self.bgmeta.setToolTip(_("Collect Metadata from sites in a Background process.<br />This returns control to you quicker while updating, but you won't be asked for username/passwords or if you are an adult--stories that need those will just fail."))
self.bgmeta.setChecked(self.prefs['bgmeta']) self.bgmeta.setChecked(self.prefs['bgmeta'])
@ -954,7 +1012,6 @@ class UpdateExistingDialog(SizePersistedDialog):
'collision': unicode(self.collision.currentText()), 'collision': unicode(self.collision.currentText()),
'updatemeta': self.updatemeta.isChecked(), 'updatemeta': self.updatemeta.isChecked(),
'bgmeta': self.bgmeta.isChecked(), 'bgmeta': self.bgmeta.isChecked(),
'updateepubcover': self.updateepubcover.isChecked(),
'smarten_punctuation':self.prefs['smarten_punctuation'], 'smarten_punctuation':self.prefs['smarten_punctuation'],
'do_wordcount':self.prefs['do_wordcount'], 'do_wordcount':self.prefs['do_wordcount'],
} }
@ -1027,7 +1084,7 @@ class StoryListTableWidget(QTableWidget):
books = [] books = []
#print("=========================\nbooks:%s"%self.books) #print("=========================\nbooks:%s"%self.books)
for row in range(self.rowCount()): for row in range(self.rowCount()):
rnum = convert_qvariant(self.item(row, 1).data(Qt.UserRole)) rnum = self.item(row, 1).data(Qt.UserRole)
book = self.books[rnum] book = self.books[rnum]
books.append(book) books.append(book)
return books return books
@ -1035,6 +1092,7 @@ class StoryListTableWidget(QTableWidget):
def remove_selected_rows(self): def remove_selected_rows(self):
self.setFocus() self.setFocus()
rows = self.selectionModel().selectedRows() rows = self.selectionModel().selectedRows()
rows = sorted(rows, key=lambda x: x.row(), reverse=True)
if len(rows) == 0: if len(rows) == 0:
return return
message = '<p>'+_('Are you sure you want to remove this book from the list?') message = '<p>'+_('Are you sure you want to remove this book from the list?')
@ -1043,7 +1101,7 @@ class StoryListTableWidget(QTableWidget):
if not confirm(message,'fff_delete_item', self): if not confirm(message,'fff_delete_item', self):
return return
first_sel_row = self.currentRow() first_sel_row = self.currentRow()
for selrow in reversed(rows): for selrow in rows:
self.removeRow(selrow.row()) self.removeRow(selrow.row())
if first_sel_row < self.rowCount(): if first_sel_row < self.rowCount():
self.select_and_scroll_to_row(first_sel_row) self.select_and_scroll_to_row(first_sel_row)
@ -1054,6 +1112,19 @@ class StoryListTableWidget(QTableWidget):
self.selectRow(row) self.selectRow(row)
self.scrollToItem(self.currentItem()) self.scrollToItem(self.currentItem())
## Added to allow sorting by Notes column
class NotesWidgetItem(QTableWidgetItem):
def __init__(self,content):
QTableWidgetItem.__init__(self)
self.content=content
def currentText(self):
return self.content.currentText()
def __lt__(self, other):
return (unicode(self.currentText()).lower().strip() <
unicode(other.currentText()).lower().strip())
class RejectListTableWidget(QTableWidget): class RejectListTableWidget(QTableWidget):
def __init__(self, parent,rejectreasons=[]): def __init__(self, parent,rejectreasons=[]):
@ -1098,6 +1169,7 @@ class RejectListTableWidget(QTableWidget):
self.setItem(row, 1, EditableTableWidgetItem(rej.title)) self.setItem(row, 1, EditableTableWidgetItem(rej.title))
self.setItem(row, 2, EditableTableWidgetItem(rej.auth)) self.setItem(row, 2, EditableTableWidgetItem(rej.auth))
# sort_func orders dropdown-constant to preserve user order.
note_cell = EditWithComplete(self,sort_func=lambda x:1) note_cell = EditWithComplete(self,sort_func=lambda x:1)
items = [rej.note]+self.rejectreasons items = [rej.note]+self.rejectreasons
@ -1105,12 +1177,14 @@ class RejectListTableWidget(QTableWidget):
note_cell.show_initial_value(rej.note) note_cell.show_initial_value(rej.note)
note_cell.set_separator(None) note_cell.set_separator(None)
note_cell.setToolTip(_('Select or Edit Reject Note.')) note_cell.setToolTip(_('Select or Edit Reject Note.'))
self.setItem(row, 3, NotesWidgetItem(note_cell))
self.setCellWidget(row, 3, note_cell) self.setCellWidget(row, 3, note_cell)
note_cell.setCursorPosition(0) note_cell.setCursorPosition(0)
def remove_selected_rows(self): def remove_selected_rows(self):
self.setFocus() self.setFocus()
rows = self.selectionModel().selectedRows() rows = self.selectionModel().selectedRows()
rows = sorted(rows, key=lambda x: x.row(), reverse=True)
if len(rows) == 0: if len(rows) == 0:
return return
message = '<p>'+_('Are you sure you want to remove this URL from the list?') message = '<p>'+_('Are you sure you want to remove this URL from the list?')
@ -1119,7 +1193,7 @@ class RejectListTableWidget(QTableWidget):
if not confirm(message,'fff_rejectlist_delete_item_again', self): if not confirm(message,'fff_rejectlist_delete_item_again', self):
return return
first_sel_row = self.currentRow() first_sel_row = self.currentRow()
for selrow in reversed(rows): for selrow in rows:
self.removeRow(selrow.row()) self.removeRow(selrow.row())
if first_sel_row < self.rowCount(): if first_sel_row < self.rowCount():
self.select_and_scroll_to_row(first_sel_row) self.select_and_scroll_to_row(first_sel_row)
@ -1215,7 +1289,7 @@ class RejectListDialog(SizePersistedDialog):
rejectrows = [] rejectrows = []
for row in range(self.rejects_table.rowCount()): for row in range(self.rejects_table.rowCount()):
url = unicode(self.rejects_table.item(row, 0).text()).strip() url = unicode(self.rejects_table.item(row, 0).text()).strip()
book_id =convert_qvariant(self.rejects_table.item(row, 0).data(Qt.UserRole)) book_id =self.rejects_table.item(row, 0).data(Qt.UserRole)
title = unicode(self.rejects_table.item(row, 1).text()).strip() title = unicode(self.rejects_table.item(row, 1).text()).strip()
auth = unicode(self.rejects_table.item(row, 2).text()).strip() auth = unicode(self.rejects_table.item(row, 2).text()).strip()
note = unicode(self.rejects_table.cellWidget(row, 3).currentText()).strip() note = unicode(self.rejects_table.cellWidget(row, 3).currentText()).strip()
@ -1225,7 +1299,7 @@ class RejectListDialog(SizePersistedDialog):
def get_reject_list_ids(self): def get_reject_list_ids(self):
rejectrows = [] rejectrows = []
for row in range(self.rejects_table.rowCount()): for row in range(self.rejects_table.rowCount()):
book_id = convert_qvariant(self.rejects_table.item(row, 0).data(Qt.UserRole)) book_id = self.rejects_table.item(row, 0).data(Qt.UserRole)
if book_id: if book_id:
rejectrows.append(book_id) rejectrows.append(book_id)
return rejectrows return rejectrows
@ -1246,6 +1320,7 @@ class EditTextDialog(SizePersistedDialog):
icon=None, title=None, label=None, tooltip=None, icon=None, title=None, label=None, tooltip=None,
read_only=False, read_only=False,
rejectreasons=[],reasonslabel=None, rejectreasons=[],reasonslabel=None,
accept_storyurls=False,
save_size_name='fff:edit text dialog', save_size_name='fff:edit text dialog',
): ):
SizePersistedDialog.__init__(self, parent, save_size_name) SizePersistedDialog.__init__(self, parent, save_size_name)
@ -1259,8 +1334,11 @@ class EditTextDialog(SizePersistedDialog):
self.setWindowIcon(icon) self.setWindowIcon(icon)
self.l.addWidget(self.label) self.l.addWidget(self.label)
self.textedit = QTextEdit(self) if accept_storyurls:
self.textedit.setLineWrapMode(QTextEdit.NoWrap) self.textedit = DroppableQTextEdit(self)
else:
self.textedit = QTextEdit(self)
self.textedit.setLineWrapMode(QTextEditNoWrap)
self.textedit.setReadOnly(read_only) self.textedit.setReadOnly(read_only)
self.textedit.setText(text) self.textedit.setText(text)
self.l.addWidget(self.textedit) self.l.addWidget(self.textedit)
@ -1303,7 +1381,18 @@ class EditTextDialog(SizePersistedDialog):
def get_reason_text(self): def get_reason_text(self):
return unicode(self.reason_edit.currentText()).strip() return unicode(self.reason_edit.currentText()).strip()
class IniTextDialog(SizePersistedDialog): class QTextEditPlainPaste(QTextEdit):
def insertFromMimeData(self, mimeData):
# logger.debug("insertFromMimeData called")
#Ensure it is text.
if (mimeData.hasText()):
text = mimeData.text()
self.insertPlainText(text)
#In case not text.
else:
QTextEdit.insertFromMimeData(self, mimeData)
class IniTextDialog(HotKeyedSizePersistedDialog):
def __init__(self, parent, text, def __init__(self, parent, text,
icon=None, title=None, label=None, icon=None, title=None, label=None,
@ -1311,9 +1400,7 @@ class IniTextDialog(SizePersistedDialog):
read_only=False, read_only=False,
save_size_name='fff:ini text dialog', save_size_name='fff:ini text dialog',
): ):
SizePersistedDialog.__init__(self, parent, save_size_name) super(IniTextDialog,self).__init__(parent, save_size_name)
self.keys=dict()
self.l = QVBoxLayout() self.l = QVBoxLayout()
self.setLayout(self.l) self.setLayout(self.l)
@ -1324,7 +1411,7 @@ class IniTextDialog(SizePersistedDialog):
self.setWindowIcon(icon) self.setWindowIcon(icon)
self.l.addWidget(self.label) self.l.addWidget(self.label)
self.textedit = QTextEdit(self) self.textedit = QTextEditPlainPaste(self)
highlighter = IniHighlighter(self.textedit, highlighter = IniHighlighter(self.textedit,
sections=get_valid_sections(), sections=get_valid_sections(),
@ -1333,7 +1420,7 @@ class IniTextDialog(SizePersistedDialog):
entry_keywords=get_valid_entry_keywords(), entry_keywords=get_valid_entry_keywords(),
) )
self.textedit.setLineWrapMode(QTextEdit.NoWrap) self.textedit.setLineWrapMode(QTextEditNoWrap)
try: try:
self.textedit.setFont(QFont("Courier", self.textedit.setFont(QFont("Courier",
parent.font().pointSize()+1)) parent.font().pointSize()+1))
@ -1380,6 +1467,8 @@ class IniTextDialog(SizePersistedDialog):
self.addCtrlKeyPress(QtCore.Qt.Key_F,self.findFocus) self.addCtrlKeyPress(QtCore.Qt.Key_F,self.findFocus)
self.addCtrlKeyPress(QtCore.Qt.Key_G,self.find) self.addCtrlKeyPress(QtCore.Qt.Key_G,self.find)
self.addCtrlKeyPress(QtCore.Qt.Key_Return,self.accept)
self.addCtrlKeyPress(QtCore.Qt.Key_Enter,self.accept) # num pad
button_box = QDialogButtonBox(QDialogButtonBox.Ok | QDialogButtonBox.Cancel) button_box = QDialogButtonBox(QDialogButtonBox.Ok | QDialogButtonBox.Cancel)
button_box.accepted.connect(self.accept) button_box.accepted.connect(self.accept)
button_box.rejected.connect(self.reject) button_box.rejected.connect(self.reject)
@ -1412,19 +1501,6 @@ class IniTextDialog(SizePersistedDialog):
# print("call parent accept") # print("call parent accept")
return SizePersistedDialog.accept(self) return SizePersistedDialog.accept(self)
def addCtrlKeyPress(self,key,func):
# print("addKeyPress: key(0x%x)"%key)
# print("control: 0x%x"%QtCore.Qt.ControlModifier)
self.keys[key]=func
def keyPressEvent(self, event):
# print("event: key(0x%x) modifiers(0x%x)"%(event.key(),event.modifiers()))
if (event.modifiers() & QtCore.Qt.ControlModifier) and event.key() in self.keys:
func = self.keys[event.key()]
return func()
else:
return SizePersistedDialog.keyPressEvent(self, event)
def get_plain_text(self): def get_plain_text(self):
return unicode(self.textedit.toPlainText()) return unicode(self.textedit.toPlainText())
@ -1458,7 +1534,7 @@ class IniTextDialog(SizePersistedDialog):
else: else:
# Make the next search start from the begining again # Make the next search start from the begining again
self.lastStart = 0 self.lastStart = 0
self.textedit.moveCursor(self.textedit.textCursor().Start) self.textedit.moveCursor(MoveOperations.Start)
def moveCursor(self,start,end): def moveCursor(self,start,end):
@ -1470,7 +1546,8 @@ class IniTextDialog(SizePersistedDialog):
# Next we move the Cursor by over the match and pass the KeepAnchor parameter # Next we move the Cursor by over the match and pass the KeepAnchor parameter
# which will make the cursor select the match's text # which will make the cursor select the match's text
cursor.movePosition(cursor.Right,cursor.KeepAnchor,end - start) cursor.movePosition(MoveOperations.Right,
MoveMode.KeepAnchor,end - start)
# And finally we set this new cursor as the parent's # And finally we set this new cursor as the parent's
self.textedit.setTextCursor(cursor) self.textedit.setTextCursor(cursor)
@ -1484,15 +1561,14 @@ class IniTextDialog(SizePersistedDialog):
cursor.setPosition(0) cursor.setPosition(0)
# Next we move the Cursor down lineno times # Next we move the Cursor down lineno times
cursor.movePosition(cursor.Down,cursor.MoveAnchor,lineno-1) cursor.movePosition(MoveOperations.Down,MoveMode.MoveAnchor,lineno-1)
# Next we move the Cursor to the end of the line # Next we move the Cursor to the end of the line
cursor.movePosition(cursor.EndOfLine,cursor.KeepAnchor,1) cursor.movePosition(MoveOperations.EndOfLine,MoveMode.KeepAnchor,1)
# And finally we set this new cursor as the parent's # And finally we set this new cursor as the parent's
self.textedit.setTextCursor(cursor) self.textedit.setTextCursor(cursor)
class ViewLog(SizePersistedDialog): class ViewLog(SizePersistedDialog):
def label_clicked(self, event, lineno=None): def label_clicked(self, event, lineno=None):
@ -1581,28 +1657,30 @@ class EmailPassDialog(QDialog):
QDialog.__init__(self, gui) QDialog.__init__(self, gui)
self.status=False self.status=False
self.l = QGridLayout() self.l = QVBoxLayout()
self.setLayout(self.l) self.setLayout(self.l)
self.setWindowTitle(_('Password')) grid = QGridLayout()
self.l.addWidget(QLabel(_("Enter Email Password for %s:")%user),0,0,1,2) self.l.addLayout(grid)
# self.l.addWidget(QLabel(_("Password:")),1,0) self.setWindowTitle(_('Password'))
grid.addWidget(QLabel(_("Enter Email Password for %s:")%user),0,0,1,2)
# grid.addWidget(QLabel(_("Password:")),1,0)
self.passwd = QLineEdit(self) self.passwd = QLineEdit(self)
self.passwd.setEchoMode(QLineEdit.Password) self.passwd.setEchoMode(QLineEdit.Password)
self.l.addWidget(self.passwd,1,0,1,2) grid.addWidget(self.passwd,1,0,1,2)
horz = QHBoxLayout()
self.l.addLayout(horz)
self.ok_button = QPushButton(_('OK'), self) self.ok_button = QPushButton(_('OK'), self)
self.ok_button.clicked.connect(self.ok) self.ok_button.clicked.connect(self.ok)
self.l.addWidget(self.ok_button,2,0) horz.addWidget(self.ok_button)
self.cancel_button = QPushButton(_('Cancel'), self) self.cancel_button = QPushButton(_('Cancel'), self)
self.cancel_button.clicked.connect(self.cancel) self.cancel_button.clicked.connect(self.cancel)
self.l.addWidget(self.cancel_button,2,1) horz.addWidget(self.cancel_button)
# set stretch factors the same.
self.l.setColumnStretch(0,1)
self.l.setColumnStretch(1,1)
self.resize(self.sizeHint()) self.resize(self.sizeHint())

File diff suppressed because it is too large Load diff

View file

@ -7,7 +7,6 @@ __license__ = 'GPL v3'
__copyright__ = '2020, Jim Miller' __copyright__ = '2020, Jim Miller'
__docformat__ = 'restructuredtext en' __docformat__ = 'restructuredtext en'
import collections
from functools import reduce from functools import reduce
from io import StringIO from io import StringIO
@ -20,6 +19,7 @@ from fanficfare.configurable import Configuration
from calibre_plugins.fanficfare_plugin.prefs import prefs from calibre_plugins.fanficfare_plugin.prefs import prefs
from fanficfare.six import ensure_text from fanficfare.six import ensure_text
from fanficfare.six.moves import configparser from fanficfare.six.moves import configparser
from fanficfare.six.moves import collections_abc
def get_fff_personalini(): def get_fff_personalini():
return prefs['personal.ini'] return prefs['personal.ini']
@ -33,8 +33,8 @@ def get_fff_config(url,fileform="epub",personalini=None):
except Exception as e: except Exception as e:
logger.debug("Failed trying to get ini config for url(%s): %s, using section %s instead"%(url,e,sections)) logger.debug("Failed trying to get ini config for url(%s): %s, using section %s instead"%(url,e,sections))
configuration = Configuration(sections,fileform) configuration = Configuration(sections,fileform)
configuration.readfp(StringIO(ensure_text(get_resources("plugin-defaults.ini")))) configuration.read_file(StringIO(ensure_text(get_resources("plugin-defaults.ini"))))
configuration.readfp(StringIO(ensure_text(personalini))) configuration.read_file(StringIO(ensure_text(personalini)))
return configuration return configuration
@ -52,7 +52,7 @@ def test_config(initext):
return errors return errors
class OrderedSet(collections.MutableSet): class OrderedSet(collections_abc.MutableSet):
def __init__(self, iterable=None): def __init__(self, iterable=None):
self.end = end = [] self.end = end = []

Binary file not shown.

Before

Width:  |  Height:  |  Size: 24 KiB

View file

@ -12,10 +12,17 @@ import re
import logging import logging
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
from PyQt5.Qt import (QApplication, Qt, QColor, QSyntaxHighlighter,
QTextCharFormat, QBrush, QFont)
try: try:
from PyQt5.Qt import (QApplication, Qt, QColor, QSyntaxHighlighter, QTextCharFormat, QBrush, QFont) # qt6 Calibre v6+
except ImportError as e: QFontNormal = QFont.Weight.Normal
from PyQt4.Qt import (QApplication, Qt, QColor, QSyntaxHighlighter, QTextCharFormat, QBrush, QFont) QFontBold = QFont.Weight.Bold
except:
# qt5 Calibre v2-5
QFontNormal = QFont.Normal
QFontBold = QFont.Bold
from fanficfare.six import string_types from fanficfare.six import string_types
@ -51,7 +58,7 @@ class IniHighlighter(QSyntaxHighlighter):
'knownkeywords':QColor(Qt.blue).lighter(150), 'knownkeywords':QColor(Qt.blue).lighter(150),
'knownsections':Qt.darkCyan, 'knownsections':Qt.darkCyan,
'teststories':Qt.cyan, 'teststories':Qt.cyan,
'storyUrls':Qt.magenta, 'storyUrls':QColor(Qt.magenta).lighter(150),
'comments':Qt.yellow 'comments':Qt.yellow
} }
except Exception as e: except Exception as e:
@ -83,20 +90,21 @@ class IniHighlighter(QSyntaxHighlighter):
self.highlightingRules.append( HighlightingRule( r"^(add_to_)?"+rekeywords+r"(_filelist)?\s*[:=]", colors['knownkeywords'] ) ) self.highlightingRules.append( HighlightingRule( r"^(add_to_)?"+rekeywords+r"(_filelist)?\s*[:=]", colors['knownkeywords'] ) )
# *all* sections -- change known later. # *all* sections -- change known later.
self.highlightingRules.append( HighlightingRule( r"^\[[^\]]+\].*?$", colors['errors'], QFont.Bold, blocknum=1 ) ) self.highlightingRules.append( HighlightingRule( r"^\[[^\]]+\].*?$", colors['errors'], QFontBold, blocknum=1 ) )
if sections: if sections:
# *known* sections # *known* sections
resections = r'('+(r'|'.join(sections))+r')' resections = r'('+(r'|'.join(sections))+r')'
resections = resections.replace('.','\.') #escape dots. resections = resections.replace('.',r'\.') #escape dots.
self.highlightingRules.append( HighlightingRule( r"^\["+resections+r"\]\s*$", colors['knownsections'], QFont.Bold, blocknum=2 ) ) self.highlightingRules.append( HighlightingRule( r"^\["+resections+r"\]\s*$", colors['knownsections'], QFontBold, blocknum=2 ) )
# test story sections # test story sections
self.teststoryRule = HighlightingRule( r"^\[teststory:([0-9]+|defaults)\]", colors['teststories'], blocknum=3 ) self.teststoryRule = HighlightingRule( r"^\[teststory:([0-9]+|defaults)\]", colors['teststories'], blocknum=3 )
self.highlightingRules.append( self.teststoryRule ) self.highlightingRules.append( self.teststoryRule )
# storyUrl sections # storyUrl sections
self.storyUrlRule = HighlightingRule( r"^\[https?://.*\]", colors['storyUrls'], blocknum=4 ) # StoryUrls are *not* checked beyond looking for https?://
self.storyUrlRule = HighlightingRule( r"^\[https?://.*\]", colors['storyUrls'], QFontBold, blocknum=2 )
self.highlightingRules.append( self.storyUrlRule ) self.highlightingRules.append( self.storyUrlRule )
# NOT comments -- but can be custom columns, so don't flag. # NOT comments -- but can be custom columns, so don't flag.
@ -127,15 +135,16 @@ class IniHighlighter(QSyntaxHighlighter):
if blocknum == 3: if blocknum == 3:
self.setFormat( 0, len(text), self.teststoryRule.highlight ) self.setFormat( 0, len(text), self.teststoryRule.highlight )
# storyUrl section rules: ## changed storyUrl section to also be blocknum=1 April 2023
if blocknum == 4: ## storyUrl section rules:
self.setFormat( 0, len(text), self.storyUrlRule.highlight ) # if blocknum == 4:
# self.setFormat( 0, len(text), self.storyUrlRule.highlight )
self.setCurrentBlockState( blocknum ) self.setCurrentBlockState( blocknum )
class HighlightingRule(): class HighlightingRule():
def __init__( self, pattern, color, def __init__( self, pattern, color,
weight=QFont.Normal, weight=QFontNormal,
style=Qt.SolidPattern, style=Qt.SolidPattern,
blocknum=0): blocknum=0):
if isinstance(pattern, string_types): if isinstance(pattern, string_types):

View file

@ -2,7 +2,6 @@
from __future__ import (unicode_literals, division, absolute_import, from __future__ import (unicode_literals, division, absolute_import,
print_function) print_function)
import six
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = '2020, Jim Miller, 2011, Grant Drake <grant.drake@gmail.com>' __copyright__ = '2020, Jim Miller, 2011, Grant Drake <grant.drake@gmail.com>'
@ -15,10 +14,8 @@ from time import sleep
from datetime import time from datetime import time
from io import StringIO from io import StringIO
from collections import defaultdict from collections import defaultdict
import sys
from calibre.utils.ipc.server import Empty, Server
from calibre.utils.ipc.job import ParallelJob
from calibre.constants import numeric_version as calibre_version
from calibre.utils.date import local_tz from calibre.utils.date import local_tz
# pulls in translation files for _() strings # pulls in translation files for _() strings
@ -33,160 +30,100 @@ except NameError:
# #
# ------------------------------------------------------------------------------ # ------------------------------------------------------------------------------
def do_download_worker(book_list, def do_download_worker_single(site,
options, book_list,
cpus, options,
merge=False, merge,
notification=lambda x,y:x): notification=lambda x,y:x):
'''
Coordinator job, to launch child jobs to do downloads.
This is run as a worker job in the background to keep the UI more
responsive and get around any memory leak issues as it will launch
a child job for each book as a worker process
'''
## Now running one BG proc per site, which downloads for the same
## site in serial.
logger.info("CPUs:%s"%cpus)
server = Server(pool_size=cpus)
logger.info(options['version']) logger.info(options['version'])
sites_lists = defaultdict(list) ## same info debug calibre prints out at startup. For when users
[ sites_lists[x['site']].append(x) for x in book_list if x['good'] ] ## give me job output instead of debug log.
from calibre.debug import print_basic_debug_info
print_basic_debug_info(sys.stderr)
totals = {}
# can't do direct assignment in list comprehension? I'm sure it
# makes sense to some pythonista.
# [ totals[x['url']]=0.0 for x in book_list if x['good'] ]
[ totals.update({x['url']:0.0}) for x in book_list if x['good'] ]
# logger.debug(sites_lists.keys())
# Queue all the jobs
jobs_running = 0
for site in sites_lists.keys():
site_list = sites_lists[site]
logger.info(_("Launch background process for site %s:")%site + "\n" +
"\n".join([ x['url'] for x in site_list ]))
# logger.debug([ x['url'] for x in site_list])
args = ['calibre_plugins.fanficfare_plugin.jobs',
'do_download_site',
(site,site_list,options,merge)]
job = ParallelJob('arbitrary_n',
"site:(%s)"%site,
done=None,
args=args)
job._site_list = site_list
job._processed = False
server.add_job(job)
jobs_running += 1
# This server is an arbitrary_n job, so there is a notifier available.
# Set the % complete to a small number to avoid the 'unavailable' indicator
notification(0.01, _('Downloading FanFiction Stories')) notification(0.01, _('Downloading FanFiction Stories'))
from calibre_plugins.fanficfare_plugin import FanFicFareBase
fffbase = FanFicFareBase(options['plugin_path'])
with fffbase: # so the sys.path was modified while loading the
# plug impl.
from fanficfare.fff_profile import do_cprofile
# dequeue the job results as they arrive, saving the results ## extra function just so I can easily use the same
count = 0 ## @do_cprofile decorator
while True: @do_cprofile
job = server.changed_jobs_queue.get() def profiled_func():
# logger.debug("job get job._processed:%s"%job._processed) count = 0
# A job can 'change' when it is not finished, for example if it totals = {}
# produces a notification. # can't do direct assignment in list comprehension? I'm sure it
msg = None # makes sense to some pythonista.
try: # [ totals[x['url']]=0.0 for x in book_list if x['good'] ]
## msg = book['url'] [ totals.update({x['url']:0.0}) for x in book_list if x['good'] ]
(percent,msg) = job.notifications.get_nowait() # logger.debug(sites_lists.keys())
# logger.debug("%s<-%s"%(percent,msg))
if percent == 10.0: # Only when signaling d/l done. def do_indiv_notif(percent,msg):
count += 1
totals[msg] = 1.0/len(totals)
# logger.info("Finished: %s"%msg)
else:
totals[msg] = percent/len(totals) totals[msg] = percent/len(totals)
notification(max(0.01,sum(totals.values())), _('%(count)d of %(total)d stories finished downloading')%{'count':count,'total':len(totals)}) notification(max(0.01,sum(totals.values())), _('%(count)d of %(total)d stories finished downloading')%{'count':count,'total':len(totals)})
except Empty:
pass
# without update, is_finished will never be set. however, we
# do want to get all the notifications for status so we don't
# miss the 'done' ones.
job.update(consume_notifications=False)
# if not job._processed: do_list = []
# sleep(0.5) done_list = []
## Can have a race condition where job.is_finished before logger.info("\n\n"+_("Downloading FanFiction Stories")+"\n%s\n"%("\n".join([ "%(status)s %(url)s %(comment)s" % book for book in book_list])))
## notifications for all downloads have been processed. ## pass failures from metadata through bg job so all results are
## Or even after the job has been finished. ## together.
# logger.debug("job.is_finished(%s) or job._processed(%s)"%(job.is_finished, job._processed))
if not job.is_finished:
continue
## only process each job once. We can get more than one loop
## after job.is_finished.
if not job._processed:
# sleep(1)
# A job really finished. Get the information.
## This is where bg proc details end up in GUI log.
## job.details is the whole debug log for each proc.
logger.info("\n\n" + ("="*80) + " " + job.details.replace('\r',''))
# logger.debug("Finished background process for site %s:\n%s"%(job._site_list[0]['site'],"\n".join([ x['url'] for x in job._site_list ])))
for b in job._site_list:
book_list.remove(b)
book_list.extend(job.result)
job._processed = True
jobs_running -= 1
## Can't use individual count--I've seen stories all reported
## finished before results of all jobs processed.
if jobs_running == 0:
book_list = sorted(book_list,key=lambda x : x['listorder'])
logger.info("\n"+_("Download Results:")+"\n%s\n"%("\n".join([ "%(status)s %(url)s %(comment)s" % book for book in book_list])))
good_lists = defaultdict(list)
bad_lists = defaultdict(list)
for book in book_list: for book in book_list:
if book['good']: if book['good']:
good_lists[book['status']].append(book) do_list.append(book)
else: else:
bad_lists[book['status']].append(book) done_list.append(book)
for book in do_list:
# logger.info("%s"%book['url'])
done_list.append(do_download_for_worker(book,options,merge,do_indiv_notif))
count += 1
return finish_download(done_list)
return profiled_func()
order = [_('Add'), def finish_download(donelist):
_('Update'), book_list = sorted(donelist,key=lambda x : x['listorder'])
_('Meta'), logger.info("\n"+_("Download Results:")+"\n%s\n"%("\n".join([ "%(status)s %(url)s %(comment)s" % book for book in book_list])))
_('Different URL'),
_('Rejected'),
_('Skipped'),
_('Bad'),
_('Error'),
]
j = 0
for d in [ good_lists, bad_lists ]:
for status in order:
if d[status]:
l = d[status]
logger.info("\n"+status+"\n%s\n"%("\n".join([book['url'] for book in l])))
for book in l:
book['reportorder'] = j
j += 1
del d[status]
# just in case a status is added but doesn't appear in order.
for status in d.keys():
logger.info("\n"+status+"\n%s\n"%("\n".join([book['url'] for book in d[status]])))
break
server.close() good_lists = defaultdict(list)
bad_lists = defaultdict(list)
for book in book_list:
if book['good']:
good_lists[book['status']].append(book)
else:
bad_lists[book['status']].append(book)
order = [_('Add'),
_('Update'),
_('Meta'),
_('Different URL'),
_('Rejected'),
_('Skipped'),
_('Bad'),
_('Error'),
]
stnum = 0
for d in [ good_lists, bad_lists ]:
for status in order:
stnum += 1
if d[status]:
l = d[status]
logger.info("\n"+status+"\n%s\n"%("\n".join([book['url'] for book in l])))
for book in l:
# Add prior listorder to 10000 * status num for
# ordering of accumulated results with multiple bg
# jobs
book['reportorder'] = stnum*10000 + book['listorder']
del d[status]
# just in case a status is added but doesn't appear in order.
for status in d.keys():
logger.info("\n"+status+"\n%s\n"%("\n".join([book['url'] for book in d[status]])))
# return the book list as the job result # return the book list as the job result
return book_list return book_list
def do_download_site(site,book_list,options,merge,notification=lambda x,y:x):
# logger.info(_("Started job for %s")%site)
retval = []
for book in book_list:
# logger.info("%s"%book['url'])
retval.append(do_download_for_worker(book,options,merge,notification))
notification(10.0,book['url'])
return retval
def do_download_for_worker(book,options,merge,notification=lambda x,y:x): def do_download_for_worker(book,options,merge,notification=lambda x,y:x):
''' '''
Child job, to download story when run as a worker job Child job, to download story when run as a worker job
@ -196,13 +133,13 @@ def do_download_for_worker(book,options,merge,notification=lambda x,y:x):
fffbase = FanFicFareBase(options['plugin_path']) fffbase = FanFicFareBase(options['plugin_path'])
with fffbase: # so the sys.path was modified while loading the with fffbase: # so the sys.path was modified while loading the
# plug impl. # plug impl.
from calibre_plugins.fanficfare_plugin.dialogs import NotGoingToDownload
from calibre_plugins.fanficfare_plugin.prefs import ( from calibre_plugins.fanficfare_plugin.prefs import (
SAVE_YES, SAVE_YES_UNLESS_SITE, OVERWRITE, OVERWRITEALWAYS, UPDATE, SAVE_YES, SAVE_YES_UNLESS_SITE, OVERWRITE, OVERWRITEALWAYS, UPDATE,
UPDATEALWAYS, ADDNEW, SKIP, CALIBREONLY, CALIBREONLYSAVECOL) UPDATEALWAYS, ADDNEW, SKIP, CALIBREONLY, CALIBREONLYSAVECOL)
from calibre_plugins.fanficfare_plugin.wordcount import get_word_count from calibre_plugins.fanficfare_plugin.wordcount import get_word_count
from fanficfare import adapters, writers from fanficfare import adapters, writers
from fanficfare.epubutils import get_update_data from fanficfare.epubutils import get_update_data
from fanficfare.exceptions import NotGoingToDownload
from fanficfare.six import text_type as unicode from fanficfare.six import text_type as unicode
from calibre_plugins.fanficfare_plugin.fff_util import get_fff_config from calibre_plugins.fanficfare_plugin.fff_util import get_fff_config
@ -222,9 +159,6 @@ def do_download_for_worker(book,options,merge,notification=lambda x,y:x):
options['fileform'], options['fileform'],
options['personal.ini']) options['personal.ini'])
if not options['updateepubcover'] and 'epub_for_update' in book and book['collision'] in (UPDATE, UPDATEALWAYS):
configuration.set("overrides","never_make_cover","true")
# images only for epub, html, even if the user mistakenly # images only for epub, html, even if the user mistakenly
# turned it on else where. # turned it on else where.
if options['fileform'] not in ("epub","html"): if options['fileform'] not in ("epub","html"):
@ -234,18 +168,12 @@ def do_download_for_worker(book,options,merge,notification=lambda x,y:x):
adapter.is_adult = book['is_adult'] adapter.is_adult = book['is_adult']
adapter.username = book['username'] adapter.username = book['username']
adapter.password = book['password'] adapter.password = book['password']
adapter.totp = book['totp']
adapter.setChaptersRange(book['begin'],book['end']) adapter.setChaptersRange(book['begin'],book['end'])
## each site download job starts with a new copy of the ## each site download job starts with a new copy of the
## cookiejar and basic_cache from the FG process. They ## cookiejar and basic_cache from the FG process. They
## are not shared between different sites' BG downloads ## are not shared between different sites' BG downloads
if configuration.getConfig('use_browser_cache'):
if 'browser_cache' in options:
configuration.set_browser_cache(options['browser_cache'])
else:
options['browser_cache'] = configuration.get_browser_cache()
if 'browser_cachefile' in options:
options['browser_cache'].load_cache(options['browser_cachefile'])
if 'basic_cache' in options: if 'basic_cache' in options:
configuration.set_basic_cache(options['basic_cache']) configuration.set_basic_cache(options['basic_cache'])
else: else:
@ -261,6 +189,17 @@ def do_download_for_worker(book,options,merge,notification=lambda x,y:x):
if not story.getMetadata("series") and 'calibre_series' in book: if not story.getMetadata("series") and 'calibre_series' in book:
adapter.setSeries(book['calibre_series'][0],book['calibre_series'][1]) adapter.setSeries(book['calibre_series'][0],book['calibre_series'][1])
# logger.debug(merge)
# logger.debug(book.get('epub_for_update','(NONE)'))
# logger.debug(options.get('mergebook','(NOMERGEBOOK)'))
# is a merge, is a pre-existing anthology, and is not a pre-existing book in anthology.
if merge and 'mergebook' in options and 'epub_for_update' not in book:
# internal for plugin anthologies to mark chapters
# (new) in new stories
story.setMetadata("newforanthology","true")
logger.debug("metadata newforanthology:%s"%story.getMetadata("newforanthology"))
# set PI version instead of default. # set PI version instead of default.
if 'version' in options: if 'version' in options:
story.setMetadata('version',options['version']) story.setMetadata('version',options['version'])
@ -269,7 +208,6 @@ def do_download_for_worker(book,options,merge,notification=lambda x,y:x):
book['author_sort'] = book['author'] = story.getList("author", removeallentities=True) book['author_sort'] = book['author'] = story.getList("author", removeallentities=True)
book['publisher'] = story.getMetadata("publisher") book['publisher'] = story.getMetadata("publisher")
book['url'] = story.getMetadata("storyUrl", removeallentities=True) book['url'] = story.getMetadata("storyUrl", removeallentities=True)
book['tags'] = story.getSubjectTags(removeallentities=True)
book['comments'] = story.get_sanitized_description() book['comments'] = story.get_sanitized_description()
book['series'] = story.getMetadata("series", removeallentities=True) book['series'] = story.getMetadata("series", removeallentities=True)
@ -346,20 +284,21 @@ def do_download_for_worker(book,options,merge,notification=lambda x,y:x):
adapter.oldchaptersmap, adapter.oldchaptersmap,
adapter.oldchaptersdata) = get_update_data(book['epub_for_update'])[0:9] adapter.oldchaptersdata) = get_update_data(book['epub_for_update'])[0:9]
# dup handling from fff_plugin needed for anthology updates. # dup handling from fff_plugin needed for anthology updates & BG metadata.
if book['collision'] == UPDATE: if book['collision'] in (UPDATE,UPDATEALWAYS):
if chaptercount == urlchaptercount: if chaptercount == urlchaptercount and book['collision'] == UPDATE:
if merge: if merge:
## Deliberately pass for UPDATEALWAYS merge.
book['comment']=_("Already contains %d chapters. Reuse as is.")%chaptercount book['comment']=_("Already contains %d chapters. Reuse as is.")%chaptercount
book['all_metadata'] = story.getAllMetadata(removeallentities=True) book['all_metadata'] = story.getAllMetadata(removeallentities=True)
if options['savemetacol'] != '': if options['savemetacol'] != '':
book['savemetacol'] = story.dump_html_metadata() book['savemetacol'] = story.dump_html_metadata()
book['outfile'] = book['epub_for_update'] # for anthology merge ops. book['outfile'] = book['epub_for_update'] # for anthology merge ops.
return book return book
else: # not merge, else:
raise NotGoingToDownload(_("Already contains %d chapters.")%chaptercount,'edit-undo.png',showerror=False) raise NotGoingToDownload(_("Already contains %d chapters.")%chaptercount,'edit-undo.png',showerror=False)
elif chaptercount > urlchaptercount: elif chaptercount > urlchaptercount and not (book['collision'] == UPDATEALWAYS and adapter.getConfig('force_update_epub_always')):
raise NotGoingToDownload(_("Existing epub contains %d chapters, web site only has %d. Use Overwrite to force update.") % (chaptercount,urlchaptercount),'dialog_error.png') raise NotGoingToDownload(_("Existing epub contains %d chapters, web site only has %d. Use Overwrite or force_update_epub_always to force update.") % (chaptercount,urlchaptercount),'dialog_error.png')
elif chaptercount == 0: elif chaptercount == 0:
raise NotGoingToDownload(_("FanFicFare doesn't recognize chapters in existing epub, epub is probably from a different source. Use Overwrite to force update."),'dialog_error.png') raise NotGoingToDownload(_("FanFicFare doesn't recognize chapters in existing epub, epub is probably from a different source. Use Overwrite to force update."),'dialog_error.png')
@ -397,7 +336,11 @@ def do_download_for_worker(book,options,merge,notification=lambda x,y:x):
options['do_wordcount'] == SAVE_YES_UNLESS_SITE and not story.getMetadataRaw('numWords') ): options['do_wordcount'] == SAVE_YES_UNLESS_SITE and not story.getMetadataRaw('numWords') ):
try: try:
wordcount = get_word_count(outfile) wordcount = get_word_count(outfile)
# logger.info("get_word_count:%s"%wordcount) # logger.info("get_word_count:%s"%wordcount)
# clear cache for the rather unusual case of
# numWords affecting other previously cached
# entries.
story.clear_processed_metadata_cache()
story.setMetadata('numWords',wordcount) story.setMetadata('numWords',wordcount)
writer.writeStory(outfilename=outfile, forceOverwrite=True) writer.writeStory(outfilename=outfile, forceOverwrite=True)
book['all_metadata'] = story.getAllMetadata(removeallentities=True) book['all_metadata'] = story.getAllMetadata(removeallentities=True)
@ -406,8 +349,7 @@ def do_download_for_worker(book,options,merge,notification=lambda x,y:x):
except: except:
logger.error("WordCount failed") logger.error("WordCount failed")
if options['smarten_punctuation'] and options['fileform'] == "epub" \ if options['smarten_punctuation'] and options['fileform'] == "epub":
and calibre_version >= (0, 9, 39):
# for smarten punc # for smarten punc
from calibre.ebooks.oeb.polish.main import polish, ALL_OPTS from calibre.ebooks.oeb.polish.main import polish, ALL_OPTS
from calibre.utils.logging import Log from calibre.utils.logging import Log
@ -417,12 +359,14 @@ def do_download_for_worker(book,options,merge,notification=lambda x,y:x):
data = {'smarten_punctuation':True} data = {'smarten_punctuation':True}
opts = ALL_OPTS.copy() opts = ALL_OPTS.copy()
opts.update(data) opts.update(data)
O = namedtuple('Options', ' '.join(six.iterkeys(ALL_OPTS))) O = namedtuple('Options', ' '.join(ALL_OPTS.keys()))
opts = O(**opts) opts = O(**opts)
log = Log(level=Log.DEBUG) log = Log(level=Log.DEBUG)
polish({outfile:outfile}, opts, log, logger.info) polish({outfile:outfile}, opts, log, logger.info)
## here to catch tags set in chapters in literotica for
## both overwrites and updates.
book['tags'] = story.getSubjectTags(removeallentities=True)
except NotGoingToDownload as d: except NotGoingToDownload as d:
book['good']=False book['good']=False
book['status']=_('Bad') book['status']=_('Bad')
@ -448,11 +392,12 @@ def inject_cal_cols(book,story,configuration):
if 'calibre_columns' in book: if 'calibre_columns' in book:
injectini = ['[injected]'] injectini = ['[injected]']
extra_valid = [] extra_valid = []
for k, v in six.iteritems(book['calibre_columns']): for k in book['calibre_columns'].keys():
v = book['calibre_columns'][k]
story.setMetadata(k,v['val']) story.setMetadata(k,v['val'])
injectini.append('%s_label:%s'%(k,v['label'])) injectini.append('%s_label:%s'%(k,v['label']))
extra_valid.append(k) extra_valid.append(k)
if extra_valid: # if empty, there's nothing to add. if extra_valid: # if empty, there's nothing to add.
injectini.append("add_to_extra_valid_entries:,"+','.join(extra_valid)) injectini.append("add_to_extra_valid_entries:,"+','.join(extra_valid))
configuration.readfp(StringIO('\n'.join(injectini))) configuration.read_file(StringIO('\n'.join(injectini)))
#print("added:\n%s\n"%('\n'.join(injectini))) #print("added:\n%s\n"%('\n'.join(injectini)))

File diff suppressed because it is too large Load diff

View file

@ -3,22 +3,9 @@
[defaults] [defaults]
## [defaults] section applies to all formats and sites but may be ## [defaults] section applies to all formats and sites but may be
## overridden at several levels. Example: ## overridden at several levels. See
## https://github.com/JimmXinu/FanFicFare/wiki/INI-File for more
## [defaults] ## details.
## titlepage_entries: category,genre, status
## [www.whofic.com]
## # overrides defaults.
## titlepage_entries: category,genre, status,dateUpdated,rating
## [epub]
## # overrides defaults & site section
## titlepage_entries: category,genre, status,datePublished,dateUpdated,dateCreated
## [www.whofic.com:epub]
## # overrides defaults, site section & format section
## titlepage_entries: category,genre, status,datePublished
## [overrides]
## # overrides all other sections
## titlepage_entries: category
## Some sites also require the user to confirm they are adult for ## Some sites also require the user to confirm they are adult for
## adult content. Uncomment by removing '#' in front of is_adult. ## adult content. Uncomment by removing '#' in front of is_adult.
@ -29,42 +16,32 @@
## want to make them all look the same? Strip them off, then add them ## want to make them all look the same? Strip them off, then add them
## back on with add_chapter_numbers. Don't like the way it strips ## back on with add_chapter_numbers. Don't like the way it strips
## numbers or adds them back? See chapter_title_strip_pattern and ## numbers or adds them back? See chapter_title_strip_pattern and
## chapter_title_add_pattern. ## chapter_title_add_pattern in defaults.ini.
#strip_chapter_numbers:true #strip_chapter_numbers:true
#add_chapter_numbers:true #add_chapter_numbers:true
## Add this to genre if there's more than one category.
#add_genre_when_multi_category: Crossover
[epub] [epub]
## include images from img tags in the body and summary of stories. ## Include images from img tags in the body and summary of stories.
## Images will be converted to jpg for size if possible. Images work ## Images will be converted to jpg for size if possible. Images work
## in epub format only. To get mobi or other format with images, ## in epub format only. To get mobi or other format with images,
## download as epub and use Calibre to convert. ## download as epub and use Calibre to convert.
## true by default, uncomment and set false to not include images.
#include_images:true #include_images:true
## Quality level to use when converting images to jpg. Range is 0-100, ## If set false, the summary will have all html stripped for safety.
## reasonable values likely to be in the range 70-95.
#jpg_quality: 95
## If not set, the summary will have all html stripped for safety.
## Both this and include_images must be true to get images in the ## Both this and include_images must be true to get images in the
## summary. ## summary.
## true by default, uncomment and set false to not keep summary html.
#keep_summary_html:true #keep_summary_html:true
## If set, the first image found will be made the cover image. If ## If set true, and there isn't a specific cover image, the first
## keep_summary_html is true, any images in summary will be before any ## image found in the story will be made the cover image. If
## keep_summary_html is true, images in the summary will be before any
## in chapters. ## in chapters.
## true by default, uncomment and set false to turn off
#make_firstimage_cover:true #make_firstimage_cover:true
## Resize images down to width, height, preserving aspect ratio.
## Nook size, with margin.
#image_max_size: 580, 725
## Change image to grayscale, if graphics library allows, to save
## space.
#grayscale_images: false
## Most common, I expect will be using this to save username/passwords ## Most common, I expect will be using this to save username/passwords
## for different sites. Here are a few examples. See defaults.ini ## for different sites. Here are a few examples. See defaults.ini
@ -76,28 +53,6 @@
## default is false ## default is false
#collect_series: true #collect_series: true
[ficwad.com]
#username:YourUsername
#password:YourPassword
[www.adastrafanfic.com]
## Some sites do not require a login, but do require the user to
## confirm they are adult for adult content.
#is_adult:true
[www.twcslibrary.net]
#username:YourName
#password:yourpassword
#is_adult:true
## default is false
#collect_series: true
[www.fictionalley.org]
#is_adult:true
[www.harrypotterfanfiction.com]
#is_adult:true
[www.fimfiction.net] [www.fimfiction.net]
#is_adult:true #is_adult:true
#fail_on_password: false #fail_on_password: false
@ -106,8 +61,9 @@
#is_adult:true #is_adult:true
## tth is a little unusual--it doesn't require user/pass, but the site ## tth is a little unusual--it doesn't require user/pass, but the site
## keeps track of which chapters you've read and won't send another ## keeps track of which chapters you've read and won't send another
## update until it thinks you're up to date. This way, on download, ## update until it thinks you're up to date. If you set
## it thinks you're up to date. ## username/password, FFF will login to download. Then the site
## thinks you're up to date.
#username:YourName #username:YourName
#password:yourpassword #password:yourpassword

View file

@ -102,9 +102,6 @@ updatecalcover_order=[YES,YES_IF_IMG,NO]
gencalcover_order=[YES,YES_UNLESS_IMG,NO] gencalcover_order=[YES,YES_UNLESS_IMG,NO]
do_wordcount_order=[YES,YES_UNLESS_SITE,NO] do_wordcount_order=[YES,YES_UNLESS_SITE,NO]
# if don't have any settings for FanFicFarePlugin, copy from
# predecessor FanFictionDownLoaderPlugin.
FFDL_PREFS_NAMESPACE = 'FanFictionDownLoaderPlugin'
PREFS_NAMESPACE = 'FanFicFarePlugin' PREFS_NAMESPACE = 'FanFicFarePlugin'
PREFS_KEY_SETTINGS = 'settings' PREFS_KEY_SETTINGS = 'settings'
@ -123,12 +120,13 @@ default_prefs['reject_delete_default'] = True
default_prefs['updatemeta'] = True default_prefs['updatemeta'] = True
default_prefs['bgmeta'] = False default_prefs['bgmeta'] = False
default_prefs['updateepubcover'] = False #default_prefs['updateepubcover'] = True # removed in favor of always True Oct 2022
default_prefs['keeptags'] = False default_prefs['keeptags'] = False
default_prefs['suppressauthorsort'] = False default_prefs['suppressauthorsort'] = False
default_prefs['suppresstitlesort'] = False default_prefs['suppresstitlesort'] = False
default_prefs['authorcase'] = False default_prefs['authorcase'] = False
default_prefs['titlecase'] = False default_prefs['titlecase'] = False
default_prefs['seriescase'] = False
default_prefs['setanthologyseries'] = False default_prefs['setanthologyseries'] = False
default_prefs['mark'] = False default_prefs['mark'] = False
default_prefs['mark_success'] = True default_prefs['mark_success'] = True
@ -146,6 +144,7 @@ default_prefs['adddialogstaysontop'] = False
default_prefs['lookforurlinhtml'] = False default_prefs['lookforurlinhtml'] = False
default_prefs['checkforseriesurlid'] = True default_prefs['checkforseriesurlid'] = True
default_prefs['auto_reject_seriesurlid'] = False default_prefs['auto_reject_seriesurlid'] = False
default_prefs['mark_series_anthologies'] = False
default_prefs['checkforurlchange'] = True default_prefs['checkforurlchange'] = True
default_prefs['injectseries'] = False default_prefs['injectseries'] = False
default_prefs['matchtitleauth'] = True default_prefs['matchtitleauth'] = True
@ -161,11 +160,12 @@ default_prefs['addtolistsonread'] = False
default_prefs['autounnew'] = False default_prefs['autounnew'] = False
default_prefs['updatecalcover'] = SAVE_YES_IF_IMG default_prefs['updatecalcover'] = SAVE_YES_IF_IMG
default_prefs['gencalcover'] = SAVE_YES default_prefs['covernewonly'] = False
default_prefs['gencalcover'] = SAVE_YES_UNLESS_IMG
default_prefs['updatecover'] = False default_prefs['updatecover'] = False
default_prefs['calibre_gen_cover'] = False default_prefs['calibre_gen_cover'] = True
default_prefs['plugin_gen_cover'] = True default_prefs['plugin_gen_cover'] = False
default_prefs['gcnewonly'] = False default_prefs['gcnewonly'] = True
default_prefs['gc_site_settings'] = {} default_prefs['gc_site_settings'] = {}
default_prefs['allow_gc_from_ini'] = True default_prefs['allow_gc_from_ini'] = True
default_prefs['gc_polish_cover'] = False default_prefs['gc_polish_cover'] = False
@ -183,6 +183,7 @@ default_prefs['allow_custcol_from_ini'] = True
default_prefs['std_cols_newonly'] = {} default_prefs['std_cols_newonly'] = {}
default_prefs['set_author_url'] = True default_prefs['set_author_url'] = True
default_prefs['set_series_url'] = True
default_prefs['includecomments'] = False default_prefs['includecomments'] = False
default_prefs['anth_comments_newonly'] = True default_prefs['anth_comments_newonly'] = True
@ -197,6 +198,11 @@ default_prefs['auto_reject_from_email'] = False
default_prefs['update_existing_only_from_email'] = False default_prefs['update_existing_only_from_email'] = False
default_prefs['download_from_email_immediately'] = False default_prefs['download_from_email_immediately'] = False
#default_prefs['single_proc_jobs'] = True # setting and code removed
default_prefs['site_split_jobs'] = True
default_prefs['reconsolidate_jobs'] = True
def set_library_config(library_config,db,setting=PREFS_KEY_SETTINGS): def set_library_config(library_config,db,setting=PREFS_KEY_SETTINGS):
db.prefs.set_namespaced(PREFS_NAMESPACE, db.prefs.set_namespaced(PREFS_NAMESPACE,
setting, setting,
@ -211,12 +217,6 @@ def get_library_config(db,setting=PREFS_KEY_SETTINGS,def_prefs=default_prefs):
library_config = db.prefs.get_namespaced(PREFS_NAMESPACE, library_config = db.prefs.get_namespaced(PREFS_NAMESPACE,
setting) setting)
# if don't have any settings for FanFicFarePlugin, copy from
# predecessor FanFictionDownLoaderPlugin.
if library_config is None:
logger.info("Attempting to read settings from predecessor--FFDL")
library_config = db.prefs.get_namespaced(FFDL_PREFS_NAMESPACE,
setting)
if library_config is None: if library_config is None:
# defaults. # defaults.
logger.info("Using default settings") logger.info("Using default settings")

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -30,10 +30,14 @@ from .. import configurable as configurable
## must import each adapter here. ## must import each adapter here.
from . import base_adapter
from . import base_efiction_adapter from . import base_efiction_adapter
from . import adapter_test1 from . import adapter_test1
from . import adapter_test2
from . import adapter_test3
from . import adapter_test4
from . import adapter_fanfictionnet from . import adapter_fanfictionnet
from . import adapter_fictionalleyorg from . import adapter_fictionalleyarchiveorg
from . import adapter_fictionpresscom from . import adapter_fictionpresscom
from . import adapter_ficwadcom from . import adapter_ficwadcom
from . import adapter_fimfictionnet from . import adapter_fimfictionnet
@ -49,10 +53,7 @@ from . import adapter_archiveofourownorg
from . import adapter_ficbooknet from . import adapter_ficbooknet
from . import adapter_midnightwhispers from . import adapter_midnightwhispers
from . import adapter_ksarchivecom from . import adapter_ksarchivecom
from . import adapter_archiveskyehawkecom
from . import adapter_squidgeorgpeja
from . import adapter_libraryofmoriacom from . import adapter_libraryofmoriacom
from . import adapter_wraithbaitcom
from . import adapter_ashwindersycophanthexcom from . import adapter_ashwindersycophanthexcom
from . import adapter_chaossycophanthexcom from . import adapter_chaossycophanthexcom
from . import adapter_erosnsapphosycophanthexcom from . import adapter_erosnsapphosycophanthexcom
@ -61,45 +62,27 @@ from . import adapter_occlumencysycophanthexcom
from . import adapter_phoenixsongnet from . import adapter_phoenixsongnet
from . import adapter_walkingtheplankorg from . import adapter_walkingtheplankorg
from . import adapter_dokugacom from . import adapter_dokugacom
from . import adapter_iketernalnet
from . import adapter_storiesofardacom from . import adapter_storiesofardacom
from . import adapter_destinysgatewaycom
from . import adapter_ncisfictioncom from . import adapter_ncisfictioncom
from . import adapter_fanfiktionde from . import adapter_fanfiktionde
from . import adapter_ponyfictionarchivenet
from . import adapter_themasquenet from . import adapter_themasquenet
from . import adapter_pretendercentrecom from . import adapter_pretendercentrecom
from . import adapter_darksolaceorg from . import adapter_darksolaceorg
from . import adapter_finestoriescom from . import adapter_storyroomcom
from . import adapter_hpfanficarchivecom
from . import adapter_nhamagicalworldsus
from . import adapter_hlfictionnet
from . import adapter_dracoandginnycom from . import adapter_dracoandginnycom
from . import adapter_scarvesandcoffeenet
from . import adapter_wolverineandroguecom from . import adapter_wolverineandroguecom
from . import adapter_merlinficdtwinscouk
from . import adapter_thehookupzonenet from . import adapter_thehookupzonenet
from . import adapter_bloodtiesfancom
from . import adapter_qafficcom
from . import adapter_efpfanficnet from . import adapter_efpfanficnet
from . import adapter_faeriearchivecom
from . import adapter_imagineeficcom from . import adapter_imagineeficcom
from . import adapter_potterheadsanonymouscom
from . import adapter_storiesonlinenet from . import adapter_storiesonlinenet
from . import adapter_trekiverseorg
from . import adapter_literotica from . import adapter_literotica
from . import adapter_voracity2eficcom from . import adapter_voracity2eficcom
from . import adapter_spikeluvercom from . import adapter_spikeluvercom
from . import adapter_bloodshedversecom from . import adapter_bloodshedversecom
from . import adapter_fanfichu
from . import adapter_fictionmaniatv from . import adapter_fictionmaniatv
from . import adapter_themaplebookshelf
from . import adapter_sheppardweircom from . import adapter_sheppardweircom
from . import adapter_samandjacknet from . import adapter_samandjacknet
from . import adapter_csiforensicscom
from . import adapter_fanfictionjunkiesde
from . import adapter_tgstorytimecom from . import adapter_tgstorytimecom
from . import adapter_itcouldhappennet
from . import adapter_forumsspacebattlescom from . import adapter_forumsspacebattlescom
from . import adapter_forumssufficientvelocitycom from . import adapter_forumssufficientvelocitycom
from . import adapter_forumquestionablequestingcom from . import adapter_forumquestionablequestingcom
@ -107,8 +90,6 @@ from . import adapter_ninelivesarchivecom
from . import adapter_masseffect2in from . import adapter_masseffect2in
from . import adapter_quotevcom from . import adapter_quotevcom
from . import adapter_mcstoriescom from . import adapter_mcstoriescom
from . import adapter_buffygilescom
from . import adapter_andromedawebcom
from . import adapter_naiceanilmenet from . import adapter_naiceanilmenet
from . import adapter_adultfanfictionorg from . import adapter_adultfanfictionorg
from . import adapter_fictionhuntcom from . import adapter_fictionhuntcom
@ -118,58 +99,48 @@ from . import adapter_bdsmlibrarycom
from . import adapter_asexstoriescom from . import adapter_asexstoriescom
from . import adapter_gluttonyfictioncom from . import adapter_gluttonyfictioncom
from . import adapter_valentchambercom from . import adapter_valentchambercom
from . import adapter_looselugscom
from . import adapter_wwwgiantessworldnet from . import adapter_wwwgiantessworldnet
from . import adapter_lotrgficcom
from . import adapter_tomparisdormcom
from . import adapter_sugarquillnet
from . import adapter_starslibrarynet from . import adapter_starslibrarynet
from . import adapter_fanficauthorsnet from . import adapter_fanficauthorsnet
from . import adapter_fireflyfansnet from . import adapter_fireflyfansnet
from . import adapter_sebklainenet
from . import adapter_shriftweborgbfa
from . import adapter_trekfanfictionnet from . import adapter_trekfanfictionnet
from . import adapter_wuxiaworldcom
from . import adapter_wwwlushstoriescom
from . import adapter_wwwutopiastoriescom from . import adapter_wwwutopiastoriescom
from . import adapter_sinfuldreamscomunicornfic from . import adapter_sinfuldreamscomunicornfic
from . import adapter_sinfuldreamscomwhisperedmuse
from . import adapter_sinfuldreamscomwickedtemptation from . import adapter_sinfuldreamscomwickedtemptation
from . import adapter_asianfanficscom from . import adapter_asianfanficscom
from . import adapter_webnovelcom
from . import adapter_mttjustoncenet from . import adapter_mttjustoncenet
from . import adapter_narutoficorg from . import adapter_narutoficorg
from . import adapter_starskyhutcharchivenet
from . import adapter_swordborderlineangelcom
from . import adapter_tasteofpoisoninkubationnet
from . import adapter_thedelphicexpansecom from . import adapter_thedelphicexpansecom
from . import adapter_wwwaneroticstorycom from . import adapter_wwwaneroticstorycom
from . import adapter_lcfanficcom from . import adapter_lcfanficcom
from . import adapter_noveltrovecom
from . import adapter_inkbunnynet from . import adapter_inkbunnynet
from . import adapter_alternatehistorycom from . import adapter_alternatehistorycom
from . import adapter_wattpadcom from . import adapter_wattpadcom
from . import adapter_novelonlinefullcom from . import adapter_novelonlinefullcom
from . import adapter_wwwnovelallcom from . import adapter_wwwnovelallcom
from . import adapter_wuxiaworldco
from . import adapter_novelupdatescc
from . import adapter_harrypotterfanfictioncom
from . import adapter_hentaifoundrycom from . import adapter_hentaifoundrycom
from . import adapter_mugglenetfanfictioncom from . import adapter_mugglenetfanfictioncom
from . import adapter_swiorgru
from . import adapter_fanficsme from . import adapter_fanficsme
from . import adapter_fanfictalkcom from . import adapter_fanfictalkcom
from . import adapter_scifistoriescom from . import adapter_scifistoriescom
from . import adapter_silmarillionwritersguildorg
from . import adapter_chireadscom from . import adapter_chireadscom
from . import adapter_scribblehubcom from . import adapter_scribblehubcom
from . import adapter_fictionlive from . import adapter_fictionlive
from . import adapter_wuxiaworldsite
from . import adapter_thesietchcom from . import adapter_thesietchcom
from . import adapter_fastnovelnet
from . import adapter_squidgeworldorg from . import adapter_squidgeworldorg
from . import adapter_novelfull from . import adapter_novelfull
from . import adapter_worldofxde from . import adapter_psychficcom
from . import adapter_deviantartcom
from . import adapter_readonlymindcom
from . import adapter_wwwsunnydaleafterdarkcom
from . import adapter_syosetucom
from . import adapter_kakuyomujp
from . import adapter_fanfictionsfr
from . import adapter_touchfluffytail
from . import adapter_spiritfanfictioncom
from . import adapter_superlove
from . import adapter_cfaa
from . import adapter_althistorycom
## This bit of complexity allows adapters to be added by just adding ## This bit of complexity allows adapters to be added by just adding
## importing. It eliminates the long if/else clauses we used to need ## importing. It eliminates the long if/else clauses we used to need
@ -253,6 +224,21 @@ def get_section_url(url):
## return unchanged in that case. ## return unchanged in that case.
return url return url
def get_url_search(url):
'''
For adapters that have story URLs that can change. This is
used for searching the Calibre library by identifiers:url for
sites (generally) that contain author or title that can
change, but also have a unique identifier that doesn't.
returns a string containing a regexp, not a compiled re object.
'''
cls = _get_class_for(url)[0]
if not cls:
## still apply common processing.
cls = base_adapter.BaseSiteAdapter
return cls.get_url_search(url)
def getAdapter(config,url,anyurl=False): def getAdapter(config,url,anyurl=False):
#logger.debug("trying url:"+url) #logger.debug("trying url:"+url)

View file

@ -15,201 +15,24 @@
# limitations under the License. # limitations under the License.
# #
# Software: eFiction
from __future__ import absolute_import from __future__ import absolute_import
import logging import logging
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
import re
from ..htmlcleanup import stripHTML from .base_otw_adapter import BaseOTWAdapter
from .. import exceptions as exceptions
# py2 vs py3 transition
from ..six import text_type as unicode
from .base_adapter import BaseSiteAdapter, makeDate
class AdAstraFanficComSiteAdapter(BaseSiteAdapter):
def __init__(self, config, url):
BaseSiteAdapter.__init__(self, config, url)
self.story.setMetadata('siteabbrev','aaff')
self.is_adult=False
# get storyId from url--url validation guarantees query is only sid=1234
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
# normalized story URL.
self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId'))
@staticmethod
def getSiteDomain():
return 'www.adastrafanfic.com'
@classmethod
def getSiteExampleURLs(cls):
return "http://"+cls.getSiteDomain()+"/viewstory.php?sid=1234"
def getSiteURLPattern(self):
return re.escape("http://"+self.getSiteDomain()+"/viewstory.php?sid=")+r"\d+$"
def extractChapterUrlsAndMetadata(self):
if self.is_adult or self.getConfig("is_adult"):
addurl = "&warning=5"
else:
addurl=""
url = self.url+'&index=1'+addurl
logger.debug("URL: "+url)
data = self.get_request(url)
if "Content is only suitable for mature adults. May contain explicit language and adult themes. Equivalent of NC-17." in data:
raise exceptions.AdultCheckRequired(self.url)
# problems with some stories, but only in calibre. I suspect
# issues with different SGML parsers in python. This is a
# nasty hack, but it works.
data = data[data.index("<body"):]
soup = self.make_soup(data)
## Title
a = soup.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$"))
self.story.setMetadata('title',stripHTML(a))
# Find authorid and URL from... author url.
a = soup.find('a', href=re.compile(r"viewuser.php"))
self.story.setMetadata('authorId',a['href'].split('=')[1])
self.story.setMetadata('authorUrl','http://'+self.host+'/'+a['href'])
self.story.setMetadata('author',a.string)
# Find the chapters:
for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+r"&chapter=\d+$")):
# just in case there's tags, like <i> in chapter titles.
self.add_chapter(chapter,'http://'+self.host+'/'+chapter['href']+addurl)
## <meta name='description' content='&lt;p&gt;Description&lt;/p&gt; ...' >
## Summary, strangely, is in the content attr of a <meta name='description'> tag
## which is escaped HTML. Unfortunately, we can't use it because they don't
## escape (') chars in the desc, breakin the tag.
#meta_desc = soup.find('meta',{'name':'description'})
#metasoup = bs.BeautifulStoneSoup(meta_desc['content'])
#self.story.setMetadata('description',stripHTML(metasoup))
def defaultGetattr(d,k):
try:
return d[k]
except:
return ""
# <span class="label">Rated:</span> NC-17<br /> etc
labels = soup.findAll('span',{'class':'label'})
for labelspan in labels:
value = labelspan.nextSibling
label = labelspan.string
if 'Summary' in label:
## Everything until the next span class='label'
svalue = ''
while value and 'label' not in defaultGetattr(value,'class'):
svalue += unicode(value)
value = value.nextSibling
# sometimes poorly formated desc (<p> w/o </p>) leads
# to all labels being included.
svalue=svalue[:svalue.find('<span class="label">')]
self.setDescription(url,svalue)
#self.story.setMetadata('description',stripHTML(svalue))
if 'Rated' in label:
self.story.setMetadata('rating', value)
if 'Word count' in label:
self.story.setMetadata('numWords', value)
if 'Categories' in label:
cats = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=categories'))
catstext = [cat.string for cat in cats]
for cat in catstext:
self.story.addToList('category',cat.string)
if 'Characters' in label:
chars = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=characters'))
charstext = [char.string for char in chars]
for char in charstext:
self.story.addToList('characters',char.string)
if 'Genre' in label:
genres = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=1'))
genrestext = [genre.string for genre in genres]
self.genre = ', '.join(genrestext)
for genre in genrestext:
self.story.addToList('genre',genre.string)
if 'Warnings' in label:
warnings = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=2'))
warningstext = [warning.string for warning in warnings]
self.warning = ', '.join(warningstext)
for warning in warningstext:
self.story.addToList('warnings',warning.string)
if 'Completed' in label:
if 'Yes' in value:
self.story.setMetadata('status', 'Completed')
else:
self.story.setMetadata('status', 'In-Progress')
if 'Published' in label:
self.story.setMetadata('datePublished', makeDate(value.strip(), "%d %b %Y"))
if 'Updated' in label:
# there's a stray [ at the end.
#value = value[0:-1]
self.story.setMetadata('dateUpdated', makeDate(value.strip(), "%d %b %Y"))
try:
# Find Series name from series URL.
a = soup.find('a', href=re.compile(r"viewseries.php\?seriesid=\d+"))
series_name = a.string
series_url = 'http://'+self.host+'/'+a['href']
seriessoup = self.make_soup(self.get_request(series_url))
storyas = seriessoup.findAll('a', href=re.compile(r'^viewstory.php\?sid=\d+$'))
i=1
for a in storyas:
if a['href'] == ('viewstory.php?sid='+self.story.getMetadata('storyId')):
self.setSeries(series_name, i)
self.story.setMetadata('seriesUrl',series_url)
break
i+=1
except:
# I find it hard to care if the series parsing fails
pass
def getChapterText(self, url):
logger.debug('Getting chapter text from: %s' % url)
data = self.get_request(url)
# problems with some stories, but only in calibre. I suspect
# issues with different SGML parsers in python. This is a
# nasty hack, but it works.
data = data[data.index("<body"):]
soup = self.make_soup(data)
span = soup.find('div', {'id' : 'story'})
if None == span:
raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url)
return self.utf8FromSoup(url,span)
def getClass(): def getClass():
return AdAstraFanficComSiteAdapter return AdastrafanficComAdapter
class AdastrafanficComAdapter(BaseOTWAdapter):
def __init__(self, config, url):
BaseOTWAdapter.__init__(self, config, url)
# Each adapter needs to have a unique site abbreviation.
self.story.setMetadata('siteabbrev','aaff')
@staticmethod # must be @staticmethod, don't remove it.
def getSiteDomain():
# The site domain. Does have www here, if it uses it.
return 'www.adastrafanfic.com'

View file

@ -57,8 +57,8 @@ class AdultFanFictionOrgAdapter(BaseSiteAdapter):
# normalized story URL.(checking self.zone against list # normalized story URL.(checking self.zone against list
# removed--it was redundant w/getAcceptDomains and # removed--it was redundant w/getAcceptDomains and
# getSiteURLPattern both) # getSiteURLPattern both)
self._setURL('http://{0}.{1}/story.php?no={2}'.format(self.zone, self.getBaseDomain(), self.story.getMetadata('storyId'))) self._setURL('https://{0}.{1}/story.php?no={2}'.format(self.zone, self.getBaseDomain(), self.story.getMetadata('storyId')))
#self._setURL('http://' + self.zone + '.' + self.getBaseDomain() + '/story.php?no='+self.story.getMetadata('storyId')) #self._setURL('https://' + self.zone + '.' + self.getBaseDomain() + '/story.php?no='+self.story.getMetadata('storyId'))
# Each adapter needs to have a unique site abbreviation. # Each adapter needs to have a unique site abbreviation.
#self.story.setMetadata('siteabbrev',self.getSiteAbbrev()) #self.story.setMetadata('siteabbrev',self.getSiteAbbrev())
@ -68,9 +68,7 @@ class AdultFanFictionOrgAdapter(BaseSiteAdapter):
# The date format will vary from site to site. # The date format will vary from site to site.
# http://docs.python.org/library/datetime.html#strftime-strptime-behavior # http://docs.python.org/library/datetime.html#strftime-strptime-behavior
self.dateformat = "%Y-%m-%d" self.dateformat = "%B %d, %Y"
## Added because adult-fanfiction.org does send you to ## Added because adult-fanfiction.org does send you to
## www.adult-fanfiction.org when you go to it and it also moves ## www.adult-fanfiction.org when you go to it and it also moves
@ -113,79 +111,31 @@ class AdultFanFictionOrgAdapter(BaseSiteAdapter):
@classmethod @classmethod
def getSiteExampleURLs(self): def getSiteExampleURLs(self):
return ("http://anime.adult-fanfiction.org/story.php?no=123456789 " return ("https://anime.adult-fanfiction.org/story.php?no=123456789 "
+ "http://anime2.adult-fanfiction.org/story.php?no=123456789 " + "https://anime2.adult-fanfiction.org/story.php?no=123456789 "
+ "http://bleach.adult-fanfiction.org/story.php?no=123456789 " + "https://bleach.adult-fanfiction.org/story.php?no=123456789 "
+ "http://books.adult-fanfiction.org/story.php?no=123456789 " + "https://books.adult-fanfiction.org/story.php?no=123456789 "
+ "http://buffy.adult-fanfiction.org/story.php?no=123456789 " + "https://buffy.adult-fanfiction.org/story.php?no=123456789 "
+ "http://cartoon.adult-fanfiction.org/story.php?no=123456789 " + "https://cartoon.adult-fanfiction.org/story.php?no=123456789 "
+ "http://celeb.adult-fanfiction.org/story.php?no=123456789 " + "https://celeb.adult-fanfiction.org/story.php?no=123456789 "
+ "http://comics.adult-fanfiction.org/story.php?no=123456789 " + "https://comics.adult-fanfiction.org/story.php?no=123456789 "
+ "http://ff.adult-fanfiction.org/story.php?no=123456789 " + "https://ff.adult-fanfiction.org/story.php?no=123456789 "
+ "http://games.adult-fanfiction.org/story.php?no=123456789 " + "https://games.adult-fanfiction.org/story.php?no=123456789 "
+ "http://hp.adult-fanfiction.org/story.php?no=123456789 " + "https://hp.adult-fanfiction.org/story.php?no=123456789 "
+ "http://inu.adult-fanfiction.org/story.php?no=123456789 " + "https://inu.adult-fanfiction.org/story.php?no=123456789 "
+ "http://lotr.adult-fanfiction.org/story.php?no=123456789 " + "https://lotr.adult-fanfiction.org/story.php?no=123456789 "
+ "http://manga.adult-fanfiction.org/story.php?no=123456789 " + "https://manga.adult-fanfiction.org/story.php?no=123456789 "
+ "http://movies.adult-fanfiction.org/story.php?no=123456789 " + "https://movies.adult-fanfiction.org/story.php?no=123456789 "
+ "http://naruto.adult-fanfiction.org/story.php?no=123456789 " + "https://naruto.adult-fanfiction.org/story.php?no=123456789 "
+ "http://ne.adult-fanfiction.org/story.php?no=123456789 " + "https://ne.adult-fanfiction.org/story.php?no=123456789 "
+ "http://original.adult-fanfiction.org/story.php?no=123456789 " + "https://original.adult-fanfiction.org/story.php?no=123456789 "
+ "http://tv.adult-fanfiction.org/story.php?no=123456789 " + "https://tv.adult-fanfiction.org/story.php?no=123456789 "
+ "http://xmen.adult-fanfiction.org/story.php?no=123456789 " + "https://xmen.adult-fanfiction.org/story.php?no=123456789 "
+ "http://ygo.adult-fanfiction.org/story.php?no=123456789 " + "https://ygo.adult-fanfiction.org/story.php?no=123456789 "
+ "http://yuyu.adult-fanfiction.org/story.php?no=123456789") + "https://yuyu.adult-fanfiction.org/story.php?no=123456789")
def getSiteURLPattern(self): def getSiteURLPattern(self):
return r'http?://(anime|anime2|bleach|books|buffy|cartoon|celeb|comics|ff|games|hp|inu|lotr|manga|movies|naruto|ne|original|tv|xmen|ygo|yuyu)\.adult-fanfiction\.org/story\.php\?no=\d+$' return r'https?://(anime|anime2|bleach|books|buffy|cartoon|celeb|comics|ff|games|hp|inu|lotr|manga|movies|naruto|ne|original|tv|xmen|ygo|yuyu)\.adult-fanfiction\.org/story\.php\?no=\d+$'
##This is not working right now, so I'm commenting it out, but leaving it for future testing
## Login seems to be reasonably standard across eFiction sites.
#def needToLoginCheck(self, data):
##This adapter will always require a login
# return True
# <form name="login" method="post" action="">
# <div class="top">E-mail: <span id="sprytextfield1">
# <input name="email" type="text" id="email" size="20" maxlength="255" />
# <span class="textfieldRequiredMsg">Email is required.</span><span class="textfieldInvalidFormatMsg">Invalid E-mail.</span></span></div>
# <div class="top">Password: <span id="sprytextfield2">
# <input name="pass1" type="password" id="pass1" size="20" maxlength="32" />
# <span class="textfieldRequiredMsg">password is required.</span><span class="textfieldMinCharsMsg">Minimum 8 characters8.</span><span class="textfieldMaxCharsMsg">Exceeded 32 characters.</span></span></div>
# <div class="top"><br /> <input name="loginsubmittop" type="hidden" id="loginsubmit" value="TRUE" />
# <input type="submit" value="Login" />
# </div>
# </form>
##This is not working right now, so I'm commenting it out, but leaving it for future testing
#def performLogin(self, url, soup):
# params = {}
# if self.password:
# params['email'] = self.username
# params['pass1'] = self.password
# else:
# params['email'] = self.getConfig("username")
# params['pass1'] = self.getConfig("password")
# params['submit'] = 'Login'
# # copy all hidden input tags to pick up appropriate tokens.
# for tag in soup.findAll('input',{'type':'hidden'}):
# params[tag['name']] = tag['value']
# logger.debug("Will now login to URL {0} as {1} with password: {2}".format(url, params['email'],params['pass1']))
# d = self.post_request(url, params, usecache=False)
# d = self.post_request(url, params, usecache=False)
# soup = self.make_soup(d)
#if not (soup.find('form', {'name' : 'login'}) == None):
# logger.info("Failed to login to URL %s as %s" % (url, params['email']))
# raise exceptions.FailedToLogin(url,params['email'])
# return False
#else:
# return True
## Getting the chapter list and the meta data, plus 'is adult' checking. ## Getting the chapter list and the meta data, plus 'is adult' checking.
def doExtractChapterUrlsAndMetadata(self, get_cover=True): def doExtractChapterUrlsAndMetadata(self, get_cover=True):
@ -193,173 +143,97 @@ class AdultFanFictionOrgAdapter(BaseSiteAdapter):
## You need to have your is_adult set to true to get this story ## You need to have your is_adult set to true to get this story
if not (self.is_adult or self.getConfig("is_adult")): if not (self.is_adult or self.getConfig("is_adult")):
raise exceptions.AdultCheckRequired(self.url) raise exceptions.AdultCheckRequired(self.url)
else:
d = self.post_request('https://www.adult-fanfiction.org/globals/ajax/age-verify.php', {"verify":"1"})
if "Age verified successfully" not in d:
raise exceptions.FailedToDownload("Failed to Verify Age: {0}".format(d))
url = self.url url = self.url
logger.debug("URL: "+url) logger.debug("URL: "+url)
data = self.get_request(url) data = self.get_request(url)
# logger.debug(data)
if "The dragons running the back end of the site can not seem to find the story you are looking for." in data: if "The dragons running the back end of the site can not seem to find the story you are looking for." in data:
raise exceptions.StoryDoesNotExist("{0}.{1} says: The dragons running the back end of the site can not seem to find the story you are looking for.".format(self.zone, self.getBaseDomain())) raise exceptions.StoryDoesNotExist("{0}.{1} says: The dragons running the back end of the site can not seem to find the story you are looking for.".format(self.zone, self.getBaseDomain()))
soup = self.make_soup(data) soup = self.make_soup(data)
##This is not working right now, so I'm commenting it out, but leaving it for future testing
#self.performLogin(url, soup)
## Title ## Title
## Some of the titles have a backslash on the story page, but not on the Author's page ## Some of the titles have a backslash on the story page, but not on the Author's page
## So I am removing it from the title, so it can be found on the Author's page further in the code. ## So I am removing it from the title, so it can be found on the Author's page further in the code.
## Also, some titles may have extra spaces ' ', and the search on the Author's page removes them, ## Also, some titles may have extra spaces ' ', and the search on the Author's page removes them,
## so I have to here as well. I used multiple replaces to make sure, since I did the same below. ## so I have to here as well. I used multiple replaces to make sure, since I did the same below.
a = soup.find('a', href=re.compile(r'story.php\?no='+self.story.getMetadata('storyId')+"$")) h1 = soup.find('h1')
self.story.setMetadata('title',stripHTML(a).replace('\\','').replace(' ',' ').replace(' ',' ').replace(' ',' ').strip()) # logger.debug("Title:%s"%h1)
self.story.setMetadata('title',stripHTML(h1).replace('\\','').replace(' ',' ').replace(' ',' ').replace(' ',' ').strip())
# Find the chapters: # Find the chapters from first list only
chapters = soup.find('div',{'class':'dropdown-content'}) chapters = soup.select_one('select.chapter-select').select('option')
for i, chapter in enumerate(chapters.findAll('a')): for chapter in chapters:
self.add_chapter(chapter,self.url+'&chapter='+unicode(i+1)) self.add_chapter(chapter,self.url+'&chapter='+chapter['value'])
# Find authorid and URL from... author url. # Find authorid and URL from... author url.
a = soup.find('a', href=re.compile(r"profile.php\?no=\d+")) a = soup.find('a', href=re.compile(r"profile.php\?id=\d+"))
if a == None: if a == None:
# I know that the original author of fanficfare wants to always have metadata, # I know that the original author of fanficfare wants to always have metadata,
# but I posit that if the story is there, even if we can't get the metadata from the # but I posit that if the story is there, even if we can't get the metadata from the
# author page, the story should still be able to be downloaded, which is what I've done here. # author page, the story should still be able to be downloaded, which is what I've done here.
self.story.setMetadata('authorId','000000000') self.story.setMetadata('authorId','000000000')
self.story.setMetadata('authorUrl','http://www.adult-fanfiction.org') self.story.setMetadata('authorUrl','https://www.adult-fanfiction.org')
self.story.setMetadata('author','Unknown') self.story.setMetadata('author','Unknown')
logger.warning('There was no author found for the story... Metadata will not be retreived.') logger.warning('There was no author found for the story... Metadata will not be retreived.')
self.setDescription(url,'>>>>>>>>>> No Summary Given <<<<<<<<<<') self.setDescription(url,'>>>>>>>>>> No Summary Given, Unknown Author <<<<<<<<<<')
else: else:
self.story.setMetadata('authorId',a['href'].split('=')[1]) self.story.setMetadata('authorId',a['href'].split('=')[1])
self.story.setMetadata('authorUrl',a['href']) self.story.setMetadata('authorUrl',a['href'])
self.story.setMetadata('author',stripHTML(a)) self.story.setMetadata('author',stripHTML(a))
##The story page does not give much Metadata, so we go to the Author's page ## The story page does not give much Metadata, so we go to
## the Author's page. Except it's actually a sub-req for
## list of author's stories for that subdomain
author_Url = 'https://members.{0}/load-user-stories.php?subdomain={1}&uid={2}'.format(
self.getBaseDomain(),
self.zone,
self.story.getMetadata('authorId'))
##Get the first Author page to see if there are multiple pages. logger.debug('Getting the load-user-stories page: {0}'.format(author_Url))
##AFF doesn't care if the page number is larger than the actual pages,
##it will continue to show the last page even if the variable is larger than the actual page
author_Url = '{0}&view=story&zone={1}&page=1'.format(self.story.getMetadata('authorUrl'), self.zone)
#author_Url = self.story.getMetadata('authorUrl')+'&view=story&zone='+self.zone+'&page=1'
##I'm resetting the author page to the zone for this story
self.story.setMetadata('authorUrl',author_Url)
logger.debug('Getting the author page: {0}'.format(author_Url))
adata = self.get_request(author_Url) adata = self.get_request(author_Url)
if "The member you are looking for does not exist." in adata: none_found = "No stories found in this category."
raise exceptions.StoryDoesNotExist("{0}.{1} says: The member you are looking for does not exist.".format(self.zone, self.getBaseDomain())) if none_found in adata:
#raise exceptions.StoryDoesNotExist(self.zone+'.'+self.getBaseDomain() +" says: The member you are looking for does not exist.") raise exceptions.StoryDoesNotExist("{0}.{1} says: {2}".format(self.zone, self.getBaseDomain(), none_found))
asoup = self.make_soup(adata) asoup = self.make_soup(adata)
# logger.debug(asoup)
##Getting the number of pages story_card = asoup.select_one('div.story-card:has(a[href="{0}"])'.format(url))
pages=asoup.find('div',{'class' : 'pagination'}).findAll('li')[-1].find('a') # logger.debug(story_card)
if not pages == None:
pages = pages['href'].split('=')[-1]
else:
pages = 0
##If there is only 1 page of stories, check it to get the Metadata, ## Category
if pages == 0: ## I've only seen one category per story so far, but just in case:
a = asoup.findAll('li') for cat in story_card.select('div.story-card-category'):
for lc2 in a: # remove Category:, old code suggests Located: is also
if lc2.find('a', href=re.compile(r'story.php\?no='+self.story.getMetadata('storyId')+"$")): # possible, so removing by <strong>
break cat.find("strong").decompose()
## otherwise go through the pages self.story.addToList('category',stripHTML(cat))
else:
page=1
i=0
while i == 0:
##We already have the first page, so if this is the first time through, skip getting the page
if page != 1:
author_Url = '{0}&view=story&zone={1}&page={2}'.format(self.story.getMetadata('authorUrl'), self.zone, unicode(page))
logger.debug('Getting the author page: {0}'.format(author_Url))
adata = self.get_request(author_Url)
##This will probably never be needed, since AFF doesn't seem to care what number you put as
## the page number, it will default to the last page, even if you use 1000, for an author
## that only hase 5 pages of stories, but I'm keeping it in to appease Saint Justin Case (just in case).
if "The member you are looking for does not exist." in adata:
raise exceptions.StoryDoesNotExist("{0}.{1} says: The member you are looking for does not exist.".format(self.zone, self.getBaseDomain()))
# we look for the li element that has the story here
asoup = self.make_soup(adata)
a = asoup.findAll('li') self.setDescription(url,story_card.select_one('div.story-card-description'))
for lc2 in a:
if lc2.find('a', href=re.compile(r'story.php\?no='+self.story.getMetadata('storyId')+"$")):
i=1
break
page = page + 1
if page > int(pages):
break
##Split the Metadata up into a list for tag in story_card.select('span.story-tag'):
##We have to change the soup type to a string, then remove the newlines, and double spaces, self.story.addToList('eroticatags',stripHTML(tag))
##then changes the <br/> to '-:-', which seperates the different elemeents.
##Then we strip the HTML elements from the string. ## created/updates share formatting
##There is also a double <br/>, so we have to fix that, then remove the leading and trailing '-:-'. for meta in story_card.select('div.story-card-meta-item span:last-child'):
##They are always in the same order. meta = stripHTML(meta)
## EDIT 09/26/2016: Had some trouble with unicode errors... so I had to put in the decode/encode parts to fix it if 'Created: ' in meta:
liMetadata = unicode(lc2).replace('\n','').replace('\r','').replace('\t',' ').replace(' ',' ').replace(' ',' ').replace(' ',' ') meta = meta.replace('Created: ','')
liMetadata = stripHTML(liMetadata.replace(r'<br/>','-:-').replace('<!-- <br /-->','-:-')) self.story.setMetadata('datePublished', makeDate(meta, self.dateformat))
liMetadata = liMetadata.strip('-:-').strip('-:-').encode('utf-8')
for i, value in enumerate(liMetadata.decode('utf-8').split('-:-')): if 'Updated: ' in meta:
if i == 0: meta = meta.replace('Updated: ','')
# The value for the title has been manipulated, so may not be the same as gotten at the start. self.story.setMetadata('dateUpdated', makeDate(meta, self.dateformat))
# I'm going to use the href from the lc2 retrieved from the author's page to determine if it is correct.
if lc2.find('a', href=re.compile(r'story.php\?no='+self.story.getMetadata('storyId')+"$"))['href'] != url:
raise exceptions.StoryDoesNotExist('Did not find story in author story list: {0}'.format(author_Url))
elif i == 1:
##Get the description
self.setDescription(url,stripHTML(value.strip()))
else:
# the rest of the values can be missing, so instead of hardcoding the numbers, we search for them.
if 'Located :' in value:
self.story.setMetadata('category',value.replace(r'&gt;',r'>').replace(r'Located :',r'').strip())
elif 'Category :' in value:
# Get the Category
self.story.setMetadata('category',value.replace(r'&gt;',r'>').replace(r'Located :',r'').strip())
elif 'Content Tags :' in value:
# Get the Erotic Tags
value = stripHTML(value.replace(r'Content Tags :',r'')).strip()
for code in re.split(r'\s',value):
self.story.addToList('eroticatags',code)
elif 'Posted :' in value:
# Get the Posted Date
value = value.replace(r'Posted :',r'').strip()
if value.startswith('008'):
# It is unknown how the 200 became 008, but I'm going to change it back here
value = value.replace('008','200')
elif value.startswith('0000'):
# Since the date is showing as 0000,
# I'm going to put the memberdate here
value = asoup.find('div',{'id':'contentdata'}).find('p').get_text(strip=True).replace('Member Since','').strip()
self.story.setMetadata('datePublished', makeDate(stripHTML(value), self.dateformat))
elif 'Edited :' in value:
# Get the 'Updated' Edited date
# AFF has the time for the Updated date, and we only want the date,
# so we take the first 10 characters only
value = value.replace(r'Edited :',r'').strip()[0:10]
if value.startswith('008'):
# It is unknown how the 200 became 008, but I'm going to change it back here
value = value.replace('008','200')
self.story.setMetadata('dateUpdated', makeDate(stripHTML(value), self.dateformat))
elif value.startswith('0000') or '-00-' in value:
# Since the date is showing as 0000,
# or there is -00- in the date,
# I'm going to put the Published date here
self.story.setMetadata('dateUpdated', self.story.getMetadata('datPublished'))
else:
self.story.setMetadata('dateUpdated', makeDate(stripHTML(value), self.dateformat))
else:
# This catches the blank elements, and the Review and Dragon Prints.
# I am not interested in these, so do nothing
zzzzzzz=0
# grab the text for an individual chapter. # grab the text for an individual chapter.
def getChapterText(self, url): def getChapterText(self, url):
@ -367,10 +241,11 @@ class AdultFanFictionOrgAdapter(BaseSiteAdapter):
logger.debug('Getting chapter text from: %s' % url) logger.debug('Getting chapter text from: %s' % url)
soup = self.make_soup(self.get_request(url)) soup = self.make_soup(self.get_request(url))
chaptertag = soup.find('div',{'class' : 'pagination'}).parent.findNext('td') chaptertag = soup.select_one('div.chapter-body')
if None == chaptertag: if None == chaptertag:
raise exceptions.FailedToDownload("Error downloading Chapter: {0}! Missing required element!".format(url)) raise exceptions.FailedToDownload("Error downloading Chapter: {0}! Missing required element!".format(url))
# Change td to a div. ## chapter text includes a copy of story title, author,
chaptertag.name='div' ## chapter title, & eroticatags specific to the chapter. Did
## before, too.
return self.utf8FromSoup(url,chaptertag) return self.utf8FromSoup(url,chaptertag)

View file

@ -0,0 +1,40 @@
# -*- coding: utf-8 -*-
# Copyright 2026 FanFicFare team
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
from __future__ import absolute_import
import re
from .base_xenforo2forum_adapter import BaseXenForo2ForumAdapter
def getClass():
return AltHistoryComAdapter
## NOTE: This is a different site than www.alternatehistory.com.
class AltHistoryComAdapter(BaseXenForo2ForumAdapter):
def __init__(self, config, url):
BaseXenForo2ForumAdapter.__init__(self, config, url)
# Each adapter needs to have a unique site abbreviation.
self.story.setMetadata('siteabbrev','ahc')
@staticmethod # must be @staticmethod, don't remove it.
def getSiteDomain():
# The site domain. Does have www here, if it uses it.
return 'althistory.com'

View file

@ -1,280 +0,0 @@
# -*- coding: utf-8 -*-
# Copyright 2018 FanFicFare team
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# ####### Not all lables are captured. they are not formtted correctly on the
# ####### webpage.
# Software: eFiction
from __future__ import absolute_import
import logging
logger = logging.getLogger(__name__)
import re
from ..htmlcleanup import stripHTML
from .. import exceptions as exceptions
# py2 vs py3 transition
from ..six import text_type as unicode
from .base_adapter import BaseSiteAdapter, makeDate
def getClass():
return AndromedaWebComAdapter # XXX
# Class name has to be unique. Our convention is camel case the
# sitename with Adapter at the end. www is skipped.
class AndromedaWebComAdapter(BaseSiteAdapter): # XXX
def __init__(self, config, url):
BaseSiteAdapter.__init__(self, config, url)
self.username = "NoneGiven" # if left empty, site doesn't return any message at all.
self.password = ""
self.is_adult=False
# get storyId from url--url validation guarantees query is only sid=1234
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
# normalized story URL.
# XXX Most sites don't have the /fiction part. Replace all to remove it usually.
self._setURL('http://' + self.getSiteDomain() + '/fiction/viewstory.php?sid='+self.story.getMetadata('storyId'))
# Each adapter needs to have a unique site abbreviation.
self.story.setMetadata('siteabbrev','awc') # XXX
# The date format will vary from site to site.
# http://docs.python.org/library/datetime.html#strftime-strptime-behavior
self.dateformat = "%d %b %Y" # XXX
@staticmethod # must be @staticmethod, don't remove it.
def getSiteDomain():
# The site domain. Does have www here, if it uses it.
return 'www.andromeda-web.com' # XXX
@classmethod
def getSiteExampleURLs(cls):
return "http://"+cls.getSiteDomain()+"/fiction/viewstory.php?sid=1234"
def getSiteURLPattern(self):
return re.escape("http://"+self.getSiteDomain()+"/fiction/viewstory.php?sid=")+r"\d+$"
## Login seems to be reasonably standard across eFiction sites.
def needToLoginCheck(self, data):
if 'Registered Users Only' in data \
or 'There is no such account on our website' in data \
or "That password doesn't match the one in our database" in data:
return True
else:
return False
def performLogin(self, url):
params = {}
if self.password:
params['penname'] = self.username
params['password'] = self.password
else:
params['penname'] = self.getConfig("username")
params['password'] = self.getConfig("password")
params['cookiecheck'] = '1'
params['submit'] = 'Submit'
loginUrl = 'http://' + self.getSiteDomain() + '/user.php?action=login'
logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
params['penname']))
d = self.post_request(loginUrl, params)
if "Member Account" not in d : #Member Account
logger.info("Failed to login to URL %s as %s" % (loginUrl,
params['penname']))
raise exceptions.FailedToLogin(url,params['penname'])
return False
else:
return True
## Getting the chapter list and the meta data, plus 'is adult' checking.
def extractChapterUrlsAndMetadata(self):
if self.is_adult or self.getConfig("is_adult"):
# Weirdly, different sites use different warning numbers.
# If the title search below fails, there's a good chance
# you need a different number. print data at that point
# and see what the 'click here to continue' url says.
addurl = "&warning=2"
else:
addurl=""
# index=1 makes sure we see the story chapter index. Some
# sites skip that for one-chapter stories.
url = self.url+'&index=1'+addurl
logger.debug("URL: "+url)
data = self.get_request(url)
if self.needToLoginCheck(data):
# need to log in for this one.
self.performLogin(url)
data = self.get_request(url)
# Since the warning text can change by warning level, let's
# look for the warning pass url. ksarchive uses
# &amp;warning= -- actually, so do other sites. Must be an
# eFiction book.
# fiction/viewstory.php?sid=1882&amp;warning=4
# fiction/viewstory.php?sid=1654&amp;ageconsent=ok&amp;warning=2
#print data
m = re.search(r"'fiction/viewstory.php\?sid=10(&amp;warning=2)'",data)
m = re.search(r"'fiction/viewstory.php\?sid=\d+((?:&amp;ageconsent=ok)?&amp;warning=\d+)'",data)
if m != None:
if self.is_adult or self.getConfig("is_adult"):
# We tried the default and still got a warning, so
# let's pull the warning number from the 'continue'
# link and reload data.
addurl = m.group(1)
# correct stupid &amp; error in url.
addurl = addurl.replace("&amp;","&")
url = self.url+'&index=1'+addurl
logger.debug("URL 2nd try: "+url)
data = self.get_request(url)
else:
raise exceptions.AdultCheckRequired(self.url)
if "Access denied. This story has not been validated by the adminstrators of this site." in data:
raise exceptions.FailedToDownload(self.getSiteDomain() +" says: Access denied. This story has not been validated by the adminstrators of this site.")
soup = self.make_soup(data)
# print data
pagetitle = soup.find('div',{'id':'content'})
## Title
a = pagetitle.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$"))
self.story.setMetadata('title',stripHTML(a))
# Find authorid and URL from... author url.
a = pagetitle.find('a', href=re.compile(r"viewuser.php\?uid=\d+"))
self.story.setMetadata('authorId',a['href'].split('=')[1])
self.story.setMetadata('authorUrl','http://'+self.host+'/'+a['href'])
self.story.setMetadata('author',a.string)
# Find the chapters:
for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+r"&chapter=\d+$")):
# just in case there's tags, like <i> in chapter titles.
self.add_chapter(chapter,'http://'+self.host+'/fiction/'+chapter['href']+addurl)
# eFiction sites don't help us out a lot with their meta data
# formating, so it's a little ugly.
# utility method
def defaultGetattr(d,k):
try:
return d[k]
except:
return ""
# <span class="label">Rated:</span> NC-17<br /> etc
labels = soup.findAll('span',{'class':'label'})
for labelspan in labels:
value = labelspan.nextSibling
label = labelspan.string
if 'Summary' in label:
## Everything until the next span class='label'
svalue = ""
while 'label' not in defaultGetattr(value,'class'):
svalue += unicode(value)
value = value.nextSibling
self.setDescription(url,svalue)
#self.story.setMetadata('description',stripHTML(svalue))
if 'Rated' in label:
self.story.setMetadata('rating', value)
if 'Word count' in label:
self.story.setMetadata('numWords', value)
if 'Categories' in label:
cats = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=categories'))
for cat in cats:
self.story.addToList('category',cat.string)
if 'Characters' in label:
chars = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=characters'))
for char in chars:
self.story.addToList('characters',char.string)
if 'Genre' in label:
genres = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=1'))
for genre in genres:
self.story.addToList('genre',genre.string)
if 'Warnings' in label:
warnings = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=3'))
for warning in warnings:
self.story.addToList('warnings',warning.string)
if 'Completed' in label:
if 'Yes' in value:
self.story.setMetadata('status', 'Completed')
else:
self.story.setMetadata('status', 'In-Progress')
if 'Published' in label:
self.story.setMetadata('datePublished', makeDate(stripHTML(value), self.dateformat))
if 'Updated' in label:
# there's a stray [ at the end.
#value = value[0:-1]
self.story.setMetadata('dateUpdated', makeDate(stripHTML(value), self.dateformat))
try:
# Find Series name from series URL.
a = soup.find('a', href=re.compile(r"fiction/viewseries.php\?seriesid=\d+"))
series_name = a.string
series_url = 'http://'+self.host+'/'+a['href']
seriessoup = self.make_soup(self.get_request(series_url))
storyas = seriessoup.findAll('a', href=re.compile(r'^fiction/viewstory.php\?sid=\d+$'))
i=1
for a in storyas:
if a['href'] == ('fiction/viewstory.php?sid='+self.story.getMetadata('storyId')):
self.setSeries(series_name, i)
self.story.setMetadata('seriesUrl',series_url)
break
i+=1
except:
# I find it hard to care if the series parsing fails
pass
# grab the text for an individual chapter.
def getChapterText(self, url):
logger.debug('Getting chapter text from: %s' % url)
soup = self.make_soup(self.get_request(url))
div = soup.find('div', {'class' : 'story'})
if None == div:
raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url)
return self.utf8FromSoup(url,div)

View file

@ -18,55 +18,20 @@
from __future__ import absolute_import from __future__ import absolute_import
import logging import logging
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
import re
import json
from ..htmlcleanup import stripHTML from .base_otw_adapter import BaseOTWAdapter
from .. import exceptions as exceptions
# py2 vs py3 transition
from .base_adapter import BaseSiteAdapter, makeDate
def getClass(): def getClass():
return ArchiveOfOurOwnOrgAdapter return ArchiveOfOurOwnOrgAdapter
class ArchiveOfOurOwnOrgAdapter(BaseSiteAdapter): class ArchiveOfOurOwnOrgAdapter(BaseOTWAdapter):
def __init__(self, config, url): def __init__(self, config, url):
BaseSiteAdapter.__init__(self, config, url) BaseOTWAdapter.__init__(self, config, url)
self.username = "NoneGiven" # if left empty, site doesn't return any message at all.
self.password = ""
self.is_adult=False
self.addurl = ""
self.full_work_soup = None
self.full_work_chapters = None
self.use_full_work_soup = True
# get storyId from url--url validation guarantees query is only sid=1234
self.story.setMetadata('storyId',self.parsedUrl.path.split('/',)[2])
# get storyId from url--url validation guarantees query correct
m = re.match(self.getSiteURLPattern(),url)
if m:
self.story.setMetadata('storyId',m.group('id'))
# normalized story URL.
self._setURL('https://' + self.getSiteDomain() + '/works/'+self.story.getMetadata('storyId'))
else:
raise exceptions.InvalidStoryURL(url,
self.getSiteDomain(),
self.getSiteExampleURLs())
# Each adapter needs to have a unique site abbreviation. # Each adapter needs to have a unique site abbreviation.
self.story.setMetadata('siteabbrev','ao3') self.story.setMetadata('siteabbrev','ao3')
# The date format will vary from site to site.
# http://docs.python.org/library/datetime.html#strftime-strptime-behavior
self.dateformat = "%Y-%b-%d"
@staticmethod # must be @staticmethod, don't remove it. @staticmethod # must be @staticmethod, don't remove it.
def getSiteDomain(): def getSiteDomain():
# The site domain. Does have www here, if it uses it. # The site domain. Does have www here, if it uses it.
@ -84,528 +49,21 @@ class ArchiveOfOurOwnOrgAdapter(BaseSiteAdapter):
return ['archiveofourown.org', return ['archiveofourown.org',
'archiveofourown.com', 'archiveofourown.com',
'archiveofourown.net', 'archiveofourown.net',
'archiveofourown.gay',
'download.archiveofourown.org', 'download.archiveofourown.org',
'download.archiveofourown.com', 'download.archiveofourown.com',
'download.archiveofourown.net', 'download.archiveofourown.net',
'ao3.org', 'ao3.org',
] ]
@classmethod def mod_url_request(self, url):
def getSiteExampleURLs(cls):
return "https://"+cls.getSiteDomain()+"/works/123456 https://"+cls.getSiteDomain()+"/collections/Some_Archive/works/123456 https://"+cls.getSiteDomain()+"/works/123456/chapters/78901"
def getSiteURLPattern(self):
# https://archiveofourown.org/collections/Smallville_Slash_Archive/works/159770
# Discard leading zeros from story ID numbers--AO3 doesn't use them in it's own chapter URLs.
# logger.debug(r"https?://" + r"|".join([x.replace('.','\.') for x in self.getAcceptDomains()]) + r"(/collections/[^/]+)?/works/0*(?P<id>\d+)")
return r"https?://(" + r"|".join([x.replace('.',r'\.') for x in self.getAcceptDomains()]) + r")(/collections/[^/]+)?/works/0*(?P<id>\d+)"
@classmethod
def get_section_url(cls,url):
## minimal URL used for section names in INI and reject list
## for comparison
# logger.debug("pre--url:%s"%url)
## https://archiveofourown.org/works/19334905/chapters/71697933
url = re.sub(r'^(.*/works/\d+).*$',r'\1',url)
# logger.debug("post-url:%s"%url)
return url return url
## Login def mod_url_request(self, url):
def needToLoginCheck(self, data): ## add / to *not* replace media.archiveofourown.org
if 'This work is only available to registered users of the Archive.' in data \ if self.getConfig("use_archive_transformativeworks_org",False):
or "The password or user name you entered doesn't match our records" in data: return url.replace("/archiveofourown.org","/archive.transformativeworks.org")
return True elif self.getConfig("use_archiveofourown_gay",False):
return url.replace("/archiveofourown.org","/archiveofourown.gay")
else: else:
return False return url
def performLogin(self, url, data):
params = {}
if self.password:
params['user[login]'] = self.username
params['user[password]'] = self.password
else:
params['user[login]'] = self.getConfig("username")
params['user[password]'] = self.getConfig("password")
params['user[remember_me]'] = '1'
params['commit'] = 'Log in'
params['utf8'] = u'\x2713' # utf8 *is* required now. hex code works better than actual character for some reason. u'✓'
# authenticity_token now comes from a completely separate json call.
token_json = json.loads(self.get_request('https://' + self.getSiteDomain() + "/token_dispenser.json"))
params['authenticity_token'] = token_json['token']
loginUrl = 'https://' + self.getSiteDomain() + '/users/login'
logger.info("Will now login to URL (%s) as (%s)" % (loginUrl,
params['user[login]']))
d = self.post_request(loginUrl, params)
if 'href="/users/logout"' not in d :
logger.info("Failed to login to URL %s as %s" % (loginUrl,
params['user[login]']))
raise exceptions.FailedToLogin(url,params['user[login]'])
return False
else:
return True
## Getting the chapter list and the meta data, plus 'is adult' checking.
def extractChapterUrlsAndMetadata(self):
if self.is_adult or self.getConfig("is_adult"):
self.addurl = "?view_adult=true"
else:
self.addurl=""
metaurl = self.url+self.addurl
url = self.url+'/navigate'+self.addurl
logger.info("url: "+url)
logger.info("metaurl: "+metaurl)
data = self.get_request(url)
meta = self.get_request(metaurl)
if "This work could have adult content. If you proceed you have agreed that you are willing to see such content." in meta:
if self.addurl:
## "?view_adult=true" doesn't work on base story
## URL anymore, which means we have to
metasoup = self.make_soup(meta)
a = metasoup.find('a',text='Proceed')
metaurl = 'https://'+self.host+a['href']
meta = self.get_request(metaurl)
else:
raise exceptions.AdultCheckRequired(self.url)
if "Sorry, we couldn&#x27;t find the work you were looking for." in data:
raise exceptions.StoryDoesNotExist(self.url)
# need to log in for this one, or always_login.
if self.needToLoginCheck(data) or \
( self.getConfig("always_login") and 'href="/users/logout"' not in data ):
self.performLogin(url,data)
data = self.get_request(url,usecache=False)
meta = self.get_request(metaurl,usecache=False)
soup = self.make_soup(data)
for tag in soup.findAll('div',id='admin-banner'):
tag.extract()
metasoup = self.make_soup(meta)
for tag in metasoup.findAll('div',id='admin-banner'):
tag.extract()
## Title
a = soup.find('a', href=re.compile(r"/works/\d+$"))
self.story.setMetadata('title',stripHTML(a))
if self.getConfig("always_login"):
# deliberately using always_login instead of checking for
# actual login so we don't have a case where these show up
# for a user only when they get user-restricted stories.
try:
# is bookmarked if has update /bookmarks/ form --
# create bookmark form uses different url
self.story.setMetadata('bookmarked',
None != metasoup.find('form',action=re.compile(r'^/bookmarks/')))
self.story.extendList('bookmarktags',
metasoup.find('input',id='bookmark_tag_string')['value'].split(', '))
self.story.setMetadata('bookmarkprivate',
metasoup.find('input',id='bookmark_private').has_attr('checked'))
self.story.setMetadata('bookmarkrec',
metasoup.find('input',id='bookmark_rec').has_attr('checked'))
except KeyError:
pass
self.story.setMetadata('bookmarksummary',
stripHTML(metasoup.find('textarea',id='bookmark_notes')))
if metasoup.find('img',alt='(Restricted)'):
self.story.setMetadata('restricted','Restricted')
# Find authorid and URL from... author url.
alist = soup.findAll('a', href=re.compile(r"/users/\w+/pseuds/.+"))
if len(alist) < 1: # ao3 allows for author 'Anonymous' with no author link.
self.story.setMetadata('author','Anonymous')
self.story.setMetadata('authorUrl','https://' + self.getSiteDomain() + '/')
self.story.setMetadata('authorId','0')
else:
for a in alist:
self.story.addToList('authorId',a['href'].split('/')[-1])
self.story.addToList('authorUrl','https://'+self.host+a['href'])
self.story.addToList('author',a.text)
byline = metasoup.find('h3',{'class':'byline'})
if byline:
self.story.setMetadata('byline',stripHTML(byline))
# byline:
# <h3 class="byline heading">
# Hope Roy [archived by <a href="/users/ssa_archivist/pseuds/ssa_archivist" rel="author">ssa_archivist</a>]
# </h3>
# stripped:"Hope Roy [archived by ssa_archivist]"
m = re.match(r'(?P<author>.*) \[archived by ?(?P<archivist>.*)\]',stripHTML(byline))
if( m and
len(alist) == 1 and
self.getConfig('use_archived_author') ):
self.story.setMetadata('author',m.group('author'))
newestChapter = None
self.newestChapterNum = None # save for comparing during update.
# Scan all chapters to find the oldest and newest, on AO3 it's
# possible for authors to insert new chapters out-of-order or
# change the dates of earlier ones by editing them--That WILL
# break epub update.
# Find the chapters:
chapters=soup.findAll('a', href=re.compile(r'/works/'+self.story.getMetadata('storyId')+r"/chapters/\d+$"))
self.story.setMetadata('numChapters',len(chapters))
logger.debug("numChapters: (%s)"%self.story.getMetadata('numChapters'))
if len(chapters)==1:
self.add_chapter(self.story.getMetadata('title'),'https://'+self.host+chapters[0]['href'])
else:
for index, chapter in enumerate(chapters):
# strip just in case there's tags, like <i> in chapter titles.
# (2013-09-21)
date = stripHTML(chapter.findNext('span'))[1:-1]
chapterDate = makeDate(date,self.dateformat)
self.add_chapter(chapter,'https://'+self.host+chapter['href'],
{'date':chapterDate.strftime(self.getConfig("datechapter_format",self.getConfig("datePublished_format","%Y-%m-%d")))})
if newestChapter == None or chapterDate > newestChapter:
newestChapter = chapterDate
self.newestChapterNum = index
a = metasoup.find('blockquote',{'class':'userstuff'})
if a != None:
a.name='div' # Change blockquote to div.
self.setDescription(url,a)
#self.story.setMetadata('description',a.text)
a = metasoup.find('dd',{'class':"rating tags"})
if a != None:
self.story.setMetadata('rating',stripHTML(a.text))
d = metasoup.find('dd',{'class':"language"})
if d != None:
self.story.setMetadata('language',stripHTML(d.text))
a = metasoup.find('dd',{'class':"fandom tags"})
if a != None:
fandoms = a.findAll('a',{'class':"tag"})
for fandom in fandoms:
self.story.addToList('fandoms',fandom.string)
a = metasoup.find('dd',{'class':"warning tags"})
if a != None:
warnings = a.findAll('a',{'class':"tag"})
for warning in warnings:
self.story.addToList('warnings',warning.string)
a = metasoup.find('dd',{'class':"freeform tags"})
if a != None:
genres = a.findAll('a',{'class':"tag"})
for genre in genres:
self.story.addToList('freeformtags',genre.string)
a = metasoup.find('dd',{'class':"category tags"})
if a != None:
genres = a.findAll('a',{'class':"tag"})
for genre in genres:
if genre != "Gen":
self.story.addToList('ao3categories',genre.string)
a = metasoup.find('dd',{'class':"character tags"})
if a != None:
chars = a.findAll('a',{'class':"tag"})
for char in chars:
self.story.addToList('characters',char.string)
a = metasoup.find('dd',{'class':"relationship tags"})
if a != None:
ships = a.findAll('a',{'class':"tag"})
for ship in ships:
self.story.addToList('ships',ship.string)
a = metasoup.find('dd',{'class':"collections"})
if a != None:
collections = a.findAll('a')
for collection in collections:
self.story.addToList('collections',collection.string)
stats = metasoup.find('dl',{'class':'stats'})
dt = stats.findAll('dt')
dd = stats.findAll('dd')
for x in range(0,len(dt)):
label = dt[x].text
value = dd[x].text
if 'Words:' in label:
self.story.setMetadata('numWords', value)
if 'Comments:' in label:
self.story.setMetadata('comments', value)
if 'Kudos:' in label:
self.story.setMetadata('kudos', value)
if 'Hits:' in label:
self.story.setMetadata('hits', value)
if 'Bookmarks:' in label:
self.story.setMetadata('bookmarks', value)
if 'Chapters:' in label:
self.story.setMetadata('chapterslashtotal', value)
if value.split('/')[0] == value.split('/')[1]:
self.story.setMetadata('status', 'Completed')
else:
self.story.setMetadata('status', 'In-Progress')
if 'Published' in label:
self.story.setMetadata('datePublished', makeDate(stripHTML(value), self.dateformat))
self.story.setMetadata('dateUpdated', makeDate(stripHTML(value), self.dateformat))
if 'Updated' in label:
self.story.setMetadata('dateUpdated', makeDate(stripHTML(value), self.dateformat))
if 'Completed' in label:
self.story.setMetadata('dateUpdated', makeDate(stripHTML(value), self.dateformat))
# Find Series name from series URL.
ddseries = metasoup.find('dd',{'class':"series"})
if ddseries:
for i, a in enumerate(ddseries.findAll('a', href=re.compile(r"/series/\d+"))):
series_name = stripHTML(a)
series_url = 'https://'+self.host+a['href']
series_index = int(stripHTML(a.previousSibling).replace(', ','').split(' ')[1]) # "Part # of" or ", Part #"
self.story.setMetadata('series%02d'%i,"%s [%s]"%(series_name,series_index))
self.story.setMetadata('series%02dUrl'%i,series_url)
if i == 0:
self.setSeries(series_name, series_index)
self.story.setMetadata('seriesUrl',series_url)
def hookForUpdates(self,chaptercount):
if self.newestChapterNum and self.oldchapters and len(self.oldchapters) > self.newestChapterNum:
logger.info("Existing epub has %s chapters\nNewest chapter is %s. Discarding old chapters from there on."%(len(self.oldchapters), self.newestChapterNum+1))
self.oldchapters = self.oldchapters[:self.newestChapterNum]
return len(self.oldchapters)
## Normalize chapter URLs because a) site has changed from http to
## https and b) in case of title change. That way updates to
## existing stories don't re-download all chapters.
def normalize_chapterurl(self,url):
url = re.sub(r"https?://("+self.getSiteDomain()+r"/works/\d+/chapters/\d+)(\?view_adult=true)?$",
r"https://\1",url)
return url
# grab the text for an individual chapter.
def getChapterTextNum(self, url, index):
## FYI: Chapter urls used to include ?view_adult=true in each
## one. With cookiejar being passed now, that's not
## necessary. However, there is a corner case with plugin--If
## a user-required story is attempted after gathering metadata
## for one that needs adult, but not user AND the user doesn't
## enter a valid user, the is_adult cookie from before can be
## lost.
logger.debug('Getting chapter text for: %s index: %s' % (url,index))
save_chapter_soup = self.make_soup('<div class="story"></div>')
## use the div because the full soup will also have <html><body>.
## need save_chapter_soup for .new_tag()
save_chapter=save_chapter_soup.find('div')
whole_dl_soup = chapter_dl_soup = None
if self.use_full_work_soup and self.getConfig("use_view_full_work",True) and self.num_chapters() > 1:
logger.debug("USE view_full_work")
## Assumed view_adult=true was cookied during metadata
if not self.full_work_soup:
self.full_work_soup = self.make_soup(self.get_request(self.url+"?view_full_work=true"+self.addurl.replace('?','&')))
## AO3 has had several cases now where chapter numbers
## are missing, breaking the link between
## <div id=chapter-##> and Chapter ##.
## But they should all still be there and in the right
## order, so array[index]
self.full_work_chapters = self.full_work_soup.find_all('div',{'id':re.compile(r'chapter-\d+')})
if len(self.full_work_chapters) != self.num_chapters():
## sanity check just in case.
self.use_full_work_soup = False
self.full_work_soup = None
logger.warning("chapter count in view_full_work(%s) disagrees with num of chapters(%s)--ending use_view_full_work"%(len(self.full_work_chapters),self.num_chapters()))
whole_dl_soup = self.full_work_soup
if whole_dl_soup:
chapter_dl_soup = self.full_work_chapters[index]
else:
whole_dl_soup = chapter_dl_soup = self.make_soup(self.get_request(url+self.addurl))
if None == chapter_dl_soup:
raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url)
exclude_notes=self.getConfigList('exclude_notes')
def append_tag(elem,tag,string=None,classes=None):
'''bs4 requires tags be added separately.'''
new_tag = save_chapter_soup.new_tag(tag)
if string:
new_tag.string=string
if classes:
new_tag['class']=[classes]
elem.append(new_tag)
return new_tag
## These are the over-all work's 'Notes at the beginning'.
## They only appear on the first chapter in individual chapter
## pages and before chapter-1 div. Appending removes
## headnotes from whole_dl_soup, so be sure to only do it on
## the first chapter.
head_notes_div = append_tag(save_chapter,'div',classes="fff_chapter_notes fff_head_notes")
if 'authorheadnotes' not in exclude_notes and index == 0:
headnotes = whole_dl_soup.find('div', {'class' : "preface group"}).find('div', {'class' : "notes module"})
if headnotes != None:
## Also include ul class='associations'.
ulassoc = headnotes.find('ul', {'class' : "associations"})
headnotes = headnotes.find('blockquote', {'class' : "userstuff"})
if headnotes != None or ulassoc != None:
append_tag(head_notes_div,'b',"Author's Note:")
if ulassoc != None:
# fix relative links--all examples so far have been.
for alink in ulassoc.find_all('a'):
if 'http' not in alink['href']:
alink['href']='https://' + self.getSiteDomain() + alink['href']
head_notes_div.append(ulassoc)
if headnotes != None:
head_notes_div.append(headnotes)
## Can appear on every chapter
if 'chaptersummary' not in exclude_notes:
chapsumm = chapter_dl_soup.find('div', {'id' : "summary"})
if chapsumm != None:
chapsumm = chapsumm.find('blockquote')
append_tag(head_notes_div,'b',"Summary for the Chapter:")
head_notes_div.append(chapsumm)
## Can appear on every chapter
if 'chapterheadnotes' not in exclude_notes:
chapnotes = chapter_dl_soup.find('div', {'id' : "notes"})
if chapnotes != None:
chapnotes = chapnotes.find('blockquote')
if chapnotes != None:
append_tag(head_notes_div,'b',"Notes for the Chapter:")
head_notes_div.append(chapnotes)
text = chapter_dl_soup.find('div', {'class' : "userstuff module"})
chtext = text.find('h3', {'class' : "landmark heading"})
if chtext:
chtext.extract()
save_chapter.append(text)
foot_notes_div = append_tag(save_chapter,'div',classes="fff_chapter_notes fff_foot_notes")
## Can appear on every chapter
if 'chapterfootnotes' not in exclude_notes:
chapfoot = chapter_dl_soup.find('div', {'class' : "end notes module", 'role' : "complementary"})
if chapfoot != None:
chapfoot = chapfoot.find('blockquote')
append_tag(foot_notes_div,'b',"Notes for the Chapter:")
foot_notes_div.append(chapfoot)
skip_on_update_tags = []
## These are the over-all work's 'Notes at the end'.
## They only appear on the last chapter in individual chapter
## pages and after chapter-# div. Appending removes
## headnotes from whole_dl_soup, so be sure to only do it on
## the last chapter.
if 'authorfootnotes' not in exclude_notes and index+1 == self.num_chapters():
footnotes = whole_dl_soup.find('div', {'id' : "work_endnotes"})
if footnotes != None:
footnotes = footnotes.find('blockquote')
if footnotes:
b = append_tag(foot_notes_div,'b',"Author's Note:")
skip_on_update_tags.append(b)
skip_on_update_tags.append(footnotes)
foot_notes_div.append(footnotes)
## It looks like 'Inspired by' links now all appear in the ul
## class=associations tag in authorheadnotes. This code is
## left in case I'm wrong and there are still stories with div
## id=children inspired links at the end.
if 'inspiredlinks' not in exclude_notes and index+1 == self.num_chapters():
inspiredlinks = whole_dl_soup.find('div', {'id' : "children"})
if inspiredlinks != None:
if inspiredlinks:
inspiredlinks.find('h3').name='b' # don't want a big h3 at the end.
# fix relative links--all examples so far have been.
for alink in inspiredlinks.find_all('a'):
if 'http' not in alink['href']:
alink['href']='https://' + self.getSiteDomain() + alink['href']
skip_on_update_tags.append(inspiredlinks)
foot_notes_div.append(inspiredlinks)
## remove empty head/food notes div(s)
if not head_notes_div.find(True):
head_notes_div.extract()
if not foot_notes_div.find(True):
foot_notes_div.extract()
## AO3 story end notes end up in the 'last' chapter, but if
## updated, then there's a new 'last' chapter. This option
## applies the 'skip_on_ffdl_update' class to those tags which
## means they will be removed during epub reading for update.
## Results: only the last chapter will have end notes.
## Side-effect: An 'Update Always' that doesn't add a new
## lasts chapter will remove the end notes.
if self.getConfig("remove_authorfootnotes_on_update"):
for skip_tag in skip_on_update_tags:
if skip_tag.has_attr('class'):
skip_tag['class'].append('skip_on_ffdl_update')
else:
skip_tag['class']=['skip_on_ffdl_update']
# logger.debug(skip_tag)
return self.utf8FromSoup(url,save_chapter)
def before_get_urls_from_page(self,url,normalize):
# special stuff to log into archiveofourown.org, if possible.
# Unlike most that show the links to 'adult' stories, but protect
# them, AO3 doesn't even show them if not logged in. Only works
# with saved user/pass--not going to prompt for list.
if self.getConfig("username"):
if self.getConfig("is_adult"):
if '?' in url:
addurl = "&view_adult=true"
else:
addurl = "?view_adult=true"
else:
addurl=""
# just to get an authenticity_token.
data = self.get_request(url+addurl)
# login the session.
self.performLogin(url,data)
# get the list page with logged in session.
def get_series_from_page(self,url,data,normalize=False):
'''
This method is to make it easier for adapters to detect a
series URL, pick out the series metadata and list of storyUrls
to return without needing to override get_urls_from_page
entirely.
'''
## easiest way to get all the weird URL possibilities and stay
## up to date with future changes.
m = re.match(self.getSiteURLPattern().replace('/works/','/series/'),url)
if m:
soup = self.make_soup(data)
retval = {}
retval['urllist']=[ 'https://'+self.host+a['href'] for a in soup.select('h4.heading a:first-child') ]
retval['name']=stripHTML(soup.select_one("h2.heading"))
desc=soup.select_one("div.wrapper dd blockquote.userstuff")
if desc:
desc.name='div' # change blockquote to div to match stories.
retval['desc']=desc
stats=stripHTML(soup.select_one("dl.series dl.stats"))
if 'Complete:Yes' in stats:
retval['status'] = "Completed"
elif 'Complete:No' in stats:
retval['status'] = "In-Progress"
return retval
## return dict with at least {'urllist':['storyUrl','storyUrl',...]}
## optionally 'name' and 'desc'?
return {}

View file

@ -1,174 +0,0 @@
# -*- coding: utf-8 -*-
# Copyright 2011 Fanficdownloader team, 2018 FanFicFare team
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
from __future__ import absolute_import
import logging
logger = logging.getLogger(__name__)
from ..htmlcleanup import stripHTML
from .. import exceptions as exceptions
# py2 vs py3 transition
from .base_adapter import BaseSiteAdapter, makeDate
def getClass():
return ArchiveSkyeHawkeComAdapter
# Class name has to be unique. Our convention is camel case the
# sitename with Adapter at the end. www is skipped.
class ArchiveSkyeHawkeComAdapter(BaseSiteAdapter):
def __init__(self, config, url):
BaseSiteAdapter.__init__(self, config, url)
self.username = "NoneGiven" # if left empty, site doesn't return any message at all.
self.password = ""
self.is_adult=False
# get storyId from url--url validation guarantees query is only sid=1234
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
# normalized story URL.
self._setURL('http://' + self.getSiteDomain() + '/story.php?no='+self.story.getMetadata('storyId'))
# Each adapter needs to have a unique site abbreviation.
self.story.setMetadata('siteabbrev','ash')
# The date format will vary from site to site.
# http://docs.python.org/library/datetime.html#strftime-strptime-behavior
self.dateformat = "%Y-%m-%d"
@staticmethod # must be @staticmethod, don't remove it.
def getSiteDomain():
# The site domain. Does have www here, if it uses it.
return 'archive.skyehawke.com'
@classmethod
def getAcceptDomains(cls):
return ['archive.skyehawke.com','www.skyehawke.com']
@classmethod
def getSiteExampleURLs(cls):
return "http://archive.skyehawke.com/story.php?no=1234 http://www.skyehawke.com/archive/story.php?no=1234 http://skyehawke.com/archive/story.php?no=1234"
def getSiteURLPattern(self):
return r"https?://(archive|www)\.skyehawke\.com/(archive/)?story\.php\?no=\d+$"
## Getting the chapter list and the meta data, plus 'is adult' checking.
def extractChapterUrlsAndMetadata(self):
url = self.url
logger.debug("URL: "+url)
data = self.get_request(url)
soup = self.make_soup(data)
# print data
## Title
a = soup.find('div', {'class':"story border"}).find('span',{'class':'left'})
title=stripHTML(a).split('"')[1]
self.story.setMetadata('title',title)
# Find authorid and URL from... author url.
author = a.find('a')
self.story.setMetadata('authorId',author['href'].split('=')[1])
self.story.setMetadata('authorUrl','http://'+self.host+'/'+author['href'])
self.story.setMetadata('author',author.string)
authorSoup = self.make_soup(self.get_request(self.story.getMetadata('authorUrl')))
chapter=soup.find('select',{'name':'chapter'}).findAll('option')
for i in range(1,len(chapter)):
ch=chapter[i]
self.add_chapter(ch,ch['value'])
# eFiction sites don't help us out a lot with their meta data
# formating, so it's a little ugly.
box=soup.find('div', {'class': "container borderridge"})
sum=box.find('span').text
self.setDescription(url,sum)
boxes=soup.findAll('div', {'class': "container bordersolid"})
for box in boxes:
if box.find('b') != None and box.find('b').text == "History and Story Information":
for b in box.findAll('b'):
if "words" in b.nextSibling:
self.story.setMetadata('numWords', b.text)
if "archived" in b.previousSibling:
self.story.setMetadata('datePublished', makeDate(stripHTML(b.text), self.dateformat))
if "updated" in b.previousSibling:
self.story.setMetadata('dateUpdated', makeDate(stripHTML(b.text), self.dateformat))
if "fandom" in b.nextSibling:
self.story.addToList('category', b.text)
for br in box.findAll('br'):
br.replaceWith('split')
genre=box.text.split("Genre:")[1].split("split")[0]
if not "Unspecified" in genre:
self.story.addToList('genre',genre)
if box.find('span') != None and box.find('span').text == "WARNING":
rating=box.findAll('span')[1]
rating.find('br').replaceWith('split')
rating=rating.text.replace("This story is rated",'').split('split')[0]
self.story.setMetadata('rating',rating)
logger.debug(self.story.getMetadata('rating'))
warnings=box.find('ol')
if warnings != None:
warnings=warnings.text.replace(']', '').replace('[', '').split(' ')
for warning in warnings:
self.story.addToList('warnings',warning)
for asoup in authorSoup.findAll('div', {'class':"story bordersolid"}):
if asoup.find('a')['href'] == 'story.php?no='+self.story.getMetadata('storyId'):
if '[ Completed ]' in asoup.text:
self.story.setMetadata('status', 'Completed')
else:
self.story.setMetadata('status', 'In-Progress')
chars=asoup.findNext('div').text.split('Characters')[1].split(']')[0]
for char in chars.split(','):
if not "None" in char:
self.story.addToList('characters',char)
break
# grab the text for an individual chapter.
def getChapterText(self, url):
logger.debug('Getting chapter text from: %s' % url)
soup = self.make_soup(self.get_request(url))
div = soup.find('div',{'class':"chapter bordersolid"}).findNext('div').findNext('div')
if None == div:
raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url)
return self.utf8FromSoup(url,div)

View file

@ -79,7 +79,7 @@ class ASexStoriesComAdapter(BaseSiteAdapter):
data1 = self.get_request(self.url) data1 = self.get_request(self.url)
soup1 = self.make_soup(data1) soup1 = self.make_soup(data1)
#strip comments from soup #strip comments from soup
[comment.extract() for comment in soup1.find_all(text=lambda text:isinstance(text, Comment))] [comment.extract() for comment in soup1.find_all(string=lambda text:isinstance(text, Comment))]
if 'Page Not Found.' in data1: if 'Page Not Found.' in data1:
raise exceptions.StoryDoesNotExist(self.url) raise exceptions.StoryDoesNotExist(self.url)
@ -92,7 +92,7 @@ class ASexStoriesComAdapter(BaseSiteAdapter):
self.story.setMetadata('title', title.string) self.story.setMetadata('title', title.string)
# Author # Author
author = soup1.find('div',{'class':'story-info'}).findAll('div',{'class':'story-info-bl'})[1].find('a') author = soup1.find('div',{'class':'story-info'}).find_all('div',{'class':'story-info-bl'})[1].find('a')
authorurl = author['href'] authorurl = author['href']
self.story.setMetadata('author', author.string) self.story.setMetadata('author', author.string)
self.story.setMetadata('authorUrl', authorurl) self.story.setMetadata('authorUrl', authorurl)
@ -112,7 +112,7 @@ class ASexStoriesComAdapter(BaseSiteAdapter):
### add it before the rest of the pages, if any ### add it before the rest of the pages, if any
self.add_chapter('1', self.url) self.add_chapter('1', self.url)
chapterTable = soup1.find('div',{'class':'pages'}).findAll('a') chapterTable = soup1.find('div',{'class':'pages'}).find_all('a')
if chapterTable is not None: if chapterTable is not None:
# Multi-chapter story # Multi-chapter story
@ -124,7 +124,7 @@ class ASexStoriesComAdapter(BaseSiteAdapter):
self.add_chapter(chapterTitle, chapterUrl) self.add_chapter(chapterTitle, chapterUrl)
rated = soup1.find('div',{'class':'story-info'}).findAll('div',{'class':'story-info-bl5'})[0].find('img')['title'].replace('- Rate','').strip() rated = soup1.find('div',{'class':'story-info'}).find_all('div',{'class':'story-info-bl5'})[0].find('img')['title'].replace('- Rate','').strip()
self.story.setMetadata('rating',rated) self.story.setMetadata('rating',rated)
self.story.setMetadata('dateUpdated', makeDate('01/01/2001', '%m/%d/%Y')) self.story.setMetadata('dateUpdated', makeDate('01/01/2001', '%m/%d/%Y'))

View file

@ -48,7 +48,7 @@ class AshwinderSycophantHexComAdapter(BaseSiteAdapter):
# normalized story URL. # normalized story URL.
self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId')) self._setURL('https://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId'))
# Each adapter needs to have a unique site abbreviation. # Each adapter needs to have a unique site abbreviation.
self.story.setMetadata('siteabbrev','asph') self.story.setMetadata('siteabbrev','asph')
@ -64,10 +64,10 @@ class AshwinderSycophantHexComAdapter(BaseSiteAdapter):
@classmethod @classmethod
def getSiteExampleURLs(cls): def getSiteExampleURLs(cls):
return "http://"+cls.getSiteDomain()+"/viewstory.php?sid=1234" return "https://"+cls.getSiteDomain()+"/viewstory.php?sid=1234"
def getSiteURLPattern(self): def getSiteURLPattern(self):
return re.escape("http://"+self.getSiteDomain()+"/viewstory.php?sid=")+r"\d+$" return r"https?://"+re.escape(self.getSiteDomain()+"/viewstory.php?sid=")+r"\d+$"
## Login seems to be reasonably standard across eFiction sites. ## Login seems to be reasonably standard across eFiction sites.
def needToLoginCheck(self, data): def needToLoginCheck(self, data):
@ -92,7 +92,7 @@ class AshwinderSycophantHexComAdapter(BaseSiteAdapter):
params['intent'] = '' params['intent'] = ''
params['submit'] = 'Submit' params['submit'] = 'Submit'
loginUrl = 'http://' + self.getSiteDomain() + '/user.php' loginUrl = 'https://' + self.getSiteDomain() + '/user.php'
logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl, logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
params['penname'])) params['penname']))
@ -130,20 +130,20 @@ class AshwinderSycophantHexComAdapter(BaseSiteAdapter):
# Find authorid and URL from... author url. # Find authorid and URL from... author url.
a = soup.find('a', href=re.compile(r"viewuser.php\?uid=\d+")) a = soup.find('a', href=re.compile(r"viewuser.php\?uid=\d+"))
self.story.setMetadata('authorId',a['href'].split('=')[1]) self.story.setMetadata('authorId',a['href'].split('=')[1])
self.story.setMetadata('authorUrl','http://'+self.host+'/'+a['href']) self.story.setMetadata('authorUrl','https://'+self.host+'/'+a['href'])
self.story.setMetadata('author',a.string) self.story.setMetadata('author',a.string)
asoup = self.make_soup(self.get_request(self.story.getMetadata('authorUrl'))) asoup = self.make_soup(self.get_request(self.story.getMetadata('authorUrl')))
try: try:
# in case link points somewhere other than the first chapter # in case link points somewhere other than the first chapter
a = soup.findAll('option')[1]['value'] a = soup.find_all('option')[1]['value']
self.story.setMetadata('storyId',a.split('=',)[1]) self.story.setMetadata('storyId',a.split('=',)[1])
url = 'http://'+self.host+'/'+a url = 'https://'+self.host+'/'+a
soup = self.make_soup(self.get_request(url)) soup = self.make_soup(self.get_request(url))
except: except:
pass pass
for info in asoup.findAll('table', {'width' : '100%', 'bordercolor' : re.compile(r'#')}): for info in asoup.find_all('table', {'width' : '100%', 'bordercolor' : re.compile(r'#')}):
a = info.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$")) a = info.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$"))
if a != None: if a != None:
self.story.setMetadata('title',stripHTML(a)) self.story.setMetadata('title',stripHTML(a))
@ -151,13 +151,13 @@ class AshwinderSycophantHexComAdapter(BaseSiteAdapter):
# Find the chapters: # Find the chapters:
chapters=soup.findAll('a', href=re.compile(r'viewstory.php\?sid=\d+&i=1$')) chapters=soup.find_all('a', href=re.compile(r'viewstory.php\?sid=\d+&i=1$'))
if len(chapters) == 0: if len(chapters) == 0:
self.add_chapter(self.story.getMetadata('title'),url) self.add_chapter(self.story.getMetadata('title'),url)
else: else:
for chapter in chapters: for chapter in chapters:
# just in case there's tags, like <i> in chapter titles. # just in case there's tags, like <i> in chapter titles.
self.add_chapter(chapter,'http://'+self.host+'/'+chapter['href']) self.add_chapter(chapter,'https://'+self.host+'/'+chapter['href'])
# eFiction sites don't help us out a lot with their meta data # eFiction sites don't help us out a lot with their meta data
@ -170,7 +170,7 @@ class AshwinderSycophantHexComAdapter(BaseSiteAdapter):
except: except:
return "" return ""
cats = info.findAll('a',href=re.compile('categories.php')) cats = info.find_all('a',href=re.compile('categories.php'))
for cat in cats: for cat in cats:
self.story.addToList('category',cat.string) self.story.addToList('category',cat.string)
@ -188,7 +188,7 @@ class AshwinderSycophantHexComAdapter(BaseSiteAdapter):
## <td><span class="sb"><b>Published:</b> 04/08/2007</td> ## <td><span class="sb"><b>Published:</b> 04/08/2007</td>
## one story had <b>Updated...</b> in the description. Restrict to sub-table ## one story had <b>Updated...</b> in the description. Restrict to sub-table
labels = info.find('table').findAll('b') labels = info.find('table').find_all('b')
for labelspan in labels: for labelspan in labels:
value = labelspan.nextSibling value = labelspan.nextSibling
label = stripHTML(labelspan) label = stripHTML(labelspan)

View file

@ -111,11 +111,17 @@ class AsianFanFicsComAdapter(BaseSiteAdapter):
def doExtractChapterUrlsAndMetadata(self,get_cover=True): def doExtractChapterUrlsAndMetadata(self,get_cover=True):
url = self.url url = self.url
logger.info("url: "+url) logger.info("url: "+url)
data = self.get_request(url) soup = None
try:
data = self.get_request(url)
soup = self.make_soup(data)
except exceptions.HTTPErrorFFF as e:
if e.status_code != 404:
raise
data = self.decode_data(e.data)
soup = self.make_soup(data) # logger.debug(data)
if not soup or self.loginNeededCheck(data):
if self.loginNeededCheck(data):
# always login if not already to avoid lots of headaches # always login if not already to avoid lots of headaches
self.performLogin(url,data) self.performLogin(url,data)
# refresh website after logging in # refresh website after logging in
@ -140,8 +146,8 @@ class AsianFanFicsComAdapter(BaseSiteAdapter):
# Find authorid and URL from... author url. # Find authorid and URL from... author url.
mainmeta = soup.find('footer', {'class': 'main-meta'}) mainmeta = soup.find('footer', {'class': 'main-meta'})
alist = mainmeta.find('span', text='Author(s)') alist = mainmeta.find('span', string='Author(s)')
alist = alist.parent.findAll('a', href=re.compile(r"/profile/view/\d+")) alist = alist.parent.find_all('a', href=re.compile(r"/profile/u/[^/]+"))
for a in alist: for a in alist:
self.story.addToList('authorId',a['href'].split('/')[-1]) self.story.addToList('authorId',a['href'].split('/')[-1])
self.story.addToList('authorUrl','https://'+self.host+a['href']) self.story.addToList('authorUrl','https://'+self.host+a['href'])
@ -153,10 +159,10 @@ class AsianFanFicsComAdapter(BaseSiteAdapter):
chapters=soup.find('select',{'name':'chapter-nav'}) chapters=soup.find('select',{'name':'chapter-nav'})
hrefattr=None hrefattr=None
if chapters: if chapters:
chapters=chapters.findAll('option') chapters=chapters.find_all('option')
hrefattr='value' hrefattr='value'
else: # didn't find <select name='chapter-nav', look for alternative else: # didn't find <select name='chapter-nav', look for alternative
chapters=soup.find('div',{'class':'widget--chapters'}).findAll('a') chapters=soup.find('div',{'class':'widget--chapters'}).find_all('a')
hrefattr='href' hrefattr='href'
for index, chapter in enumerate(chapters): for index, chapter in enumerate(chapters):
if chapter.text != 'Foreword' and 'Collapse chapters' not in chapter.text: if chapter.text != 'Foreword' and 'Collapse chapters' not in chapter.text:
@ -165,9 +171,9 @@ class AsianFanFicsComAdapter(BaseSiteAdapter):
# find timestamp # find timestamp
a = soup.find('span', text='Updated') a = soup.find('span', string='Updated')
if a == None: if a == None:
a = soup.find('span', text='Published') # use published date if work was never updated a = soup.find('span', string='Published') # use published date if work was never updated
a = a.parent.find('time') a = a.parent.find('time')
chapterDate = makeDate(a['datetime'],self.dateformat) chapterDate = makeDate(a['datetime'],self.dateformat)
if newestChapter == None or chapterDate > newestChapter: if newestChapter == None or chapterDate > newestChapter:
@ -175,7 +181,7 @@ class AsianFanFicsComAdapter(BaseSiteAdapter):
self.newestChapterNum = index self.newestChapterNum = index
# story status # story status
a = mainmeta.find('span', text='Completed') a = mainmeta.find('span', string='Completed')
if a: if a:
self.story.setMetadata('status', 'Completed') self.story.setMetadata('status', 'Completed')
else: else:
@ -194,37 +200,37 @@ class AsianFanFicsComAdapter(BaseSiteAdapter):
self.setDescription(url,a) self.setDescription(url,a)
# story tags # story tags
a = mainmeta.find('span',text='Tags') a = mainmeta.find('span',string='Tags')
if a: if a:
tags = a.parent.findAll('a') tags = a.parent.find_all('a')
for tag in tags: for tag in tags:
self.story.addToList('tags', tag.text) self.story.addToList('tags', tag.text)
# story tags # story tags
a = mainmeta.find('span',text='Characters') a = mainmeta.find('span',string='Characters')
if a: if a:
self.story.addToList('characters', a.nextSibling) self.story.addToList('characters', a.nextSibling)
# published on # published on
a = soup.find('span', text='Published') a = soup.find('span', string='Published')
a = a.parent.find('time') a = a.parent.find('time')
self.story.setMetadata('datePublished', makeDate(a['datetime'], self.dateformat)) self.story.setMetadata('datePublished', makeDate(a['datetime'], self.dateformat))
# updated on # updated on
a = soup.find('span', text='Updated') a = soup.find('span', string='Updated')
if a: if a:
a = a.parent.find('time') a = a.parent.find('time')
self.story.setMetadata('dateUpdated', makeDate(a['datetime'], self.dateformat)) self.story.setMetadata('dateUpdated', makeDate(a['datetime'], self.dateformat))
# word count # word count
a = soup.find('span', text='Total Word Count') a = soup.find('span', string='Total Word Count')
if a: if a:
a = a.find_next('span') a = a.find_next('span')
self.story.setMetadata('numWords', int(a.text.split()[0])) self.story.setMetadata('numWords', int(a.text.split()[0]))
# upvote, subs, and views # upvote, subs, and views
a = soup.find('div',{'class':'title-meta'}) a = soup.find('div',{'class':'title-meta'})
spans = a.findAll('span', recursive=False) spans = a.find_all('span', recursive=False)
self.story.setMetadata('upvotes', re.search(r'\(([^)]+)', spans[0].find('span').text).group(1)) self.story.setMetadata('upvotes', re.search(r'\(([^)]+)', spans[0].find('span').text).group(1))
self.story.setMetadata('subscribers', re.search(r'\(([^)]+)', spans[1].find('span').text).group(1)) self.story.setMetadata('subscribers', re.search(r'\(([^)]+)', spans[1].find('span').text).group(1))
if len(spans) > 2: # views can be private if len(spans) > 2: # views can be private
@ -246,24 +252,39 @@ class AsianFanFicsComAdapter(BaseSiteAdapter):
data = self.get_request(url) data = self.get_request(url)
soup = self.make_soup(data) soup = self.make_soup(data)
# logger.debug(data)
try: ageform = soup.select_one('form[action="/account/toggle_age"]')
# <script>var postApi = "https://www.asianfanfics.com/api/chapters/4791923/chapter_46d32e413d1a702a26f7637eabbfb6f3.json";</script> # logger.debug(ageform)
jsonlink = soup.find('script',string=re.compile(r'/api/chapters/[0-9]+/chapter_[0-9a-z]+.json')).get_text().split('"')[1] # grabs url from quotation marks if ageform and (self.is_adult or self.getConfig("is_adult")):
chap_json = json.loads(self.get_request(jsonlink)) params = {}
content = self.make_soup(chap_json['post']).find('body') # BS4 adds <html><body> if not present. params['is_of_age']=ageform.select_one('input#is_of_age')['value']
content.name='div' # change body to a div. params['current_url']=ageform.select_one('input#current_url')['value']
if self.getConfig('inject_chapter_title'): params['csrf_aff_token']=ageform.select_one('input[name="csrf_aff_token"]')['value']
# the dumbest workaround ever for the abbreviated chapter titles from before loginUrl = 'https://' + self.getSiteDomain() + '/account/mark_over_18'
logger.debug("Injecting full-length chapter title") logger.info("Will now toggle age to URL (%s)" % (loginUrl))
newTitle = soup.find('h1', {'id' : 'chapter-title'}).text # logger.debug(params)
newTitle = self.make_soup('<h3>%s</h3>' % (newTitle)).find('body') # BS4 adds <html><body> if not present. data = self.post_request(loginUrl, params)
newTitle.name='div' # change body to a div. soup = self.make_soup(data)
newTitle.append(content) # logger.debug(data)
return self.utf8FromSoup(url,newTitle)
else: content = soup.find('div', {'id': 'user-submitted-body'})
return self.utf8FromSoup(url,content)
except Exception as e: if self.getConfig('inject_chapter_image'):
logger.debug("json lookup failed, going on with HTML chapter") logger.debug("Injecting chapter image")
content = soup.find('div', {'id': 'user-submitted-body'}) imgdiv = soup.select_one('div#bodyText div.bot-spacer')
return self.utf8FromSoup(url,content) if imgdiv:
content.insert(0, "\n")
content.insert(0, imgdiv)
content.insert(0, "\n")
if self.getConfig('inject_chapter_title'):
logger.debug("Injecting full-length chapter title")
title = soup.find('h1', {'id' : 'chapter-title'}).text
newTitle = soup.new_tag('h3')
newTitle.string = title
content.insert(0, "\n")
content.insert(0, newTitle)
content.insert(0, "\n")
return self.utf8FromSoup(url,content)

View file

@ -126,7 +126,7 @@ class BDSMLibraryComSiteAdapter(BaseSiteAdapter):
# Find the chapters: # Find the chapters:
# The update date is with the chapter links... so we will update it here as well # The update date is with the chapter links... so we will update it here as well
for chapter in soup.findAll('a', href=re.compile(r'/stories/chapter.php\?storyid='+self.story.getMetadata('storyId')+r"&chapterid=\d+$")): for chapter in soup.find_all('a', href=re.compile(r'/stories/chapter.php\?storyid='+self.story.getMetadata('storyId')+r"&chapterid=\d+$")):
value = chapter.findNext('td').findNext('td').string.replace('(added on','').replace(')','').strip() value = chapter.findNext('td').findNext('td').string.replace('(added on','').replace(')','').strip()
self.story.setMetadata('dateUpdated', makeDate(value, self.dateformat)) self.story.setMetadata('dateUpdated', makeDate(value, self.dateformat))
self.add_chapter(chapter,'https://'+self.getSiteDomain()+chapter['href']) self.add_chapter(chapter,'https://'+self.getSiteDomain()+chapter['href'])
@ -134,11 +134,11 @@ class BDSMLibraryComSiteAdapter(BaseSiteAdapter):
# Get the MetaData # Get the MetaData
# Erotia Tags # Erotia Tags
tags = soup.findAll('a',href=re.compile(r'/stories/search.php\?selectedcode')) tags = soup.find_all('a',href=re.compile(r'/stories/search.php\?selectedcode'))
for tag in tags: for tag in tags:
self.story.addToList('eroticatags',tag.text) self.story.addToList('eroticatags',tag.text)
for td in soup.findAll('td'): for td in soup.find_all('td'):
if len(td.text)>0: if len(td.text)>0:
if 'Added on:' in td.text and '<table' not in unicode(td): if 'Added on:' in td.text and '<table' not in unicode(td):
value = td.text.replace('Added on:','').strip() value = td.text.replace('Added on:','').strip()
@ -169,20 +169,20 @@ class BDSMLibraryComSiteAdapter(BaseSiteAdapter):
raise exceptions.FailedToDownload("Error downloading Chapter: {0}! Missing required element!".format(url)) raise exceptions.FailedToDownload("Error downloading Chapter: {0}! Missing required element!".format(url))
#strip comments from soup #strip comments from soup
[comment.extract() for comment in chaptertag.findAll(text=lambda text:isinstance(text, Comment))] [comment.extract() for comment in chaptertag.find_all(string=lambda text:isinstance(text, Comment))]
# BDSM Library basically wraps it's own html around the document, # BDSM Library basically wraps it's own html around the document,
# so we will be removing the script, title and meta content from the # so we will be removing the script, title and meta content from the
# storyblock # storyblock
for tag in chaptertag.findAll('head') + chaptertag.findAll('style') + chaptertag.findAll('title') + chaptertag.findAll('meta') + chaptertag.findAll('o:p') + chaptertag.findAll('link'): for tag in chaptertag.find_all('head') + chaptertag.find_all('style') + chaptertag.find_all('title') + chaptertag.find_all('meta') + chaptertag.find_all('o:p') + chaptertag.find_all('link'):
tag.extract() tag.extract()
for tag in chaptertag.findAll('o:smarttagtype'): for tag in chaptertag.find_all('o:smarttagtype'):
tag.name = 'span' tag.name = 'span'
## I'm going to take the attributes off all of the tags ## I'm going to take the attributes off all of the tags
## because they usually refer to the style that we removed above. ## because they usually refer to the style that we removed above.
for tag in chaptertag.findAll(True): for tag in chaptertag.find_all(True):
tag.attrs = None tag.attrs = None
return self.utf8FromSoup(url,chaptertag) return self.utf8FromSoup(url,chaptertag)

View file

@ -117,7 +117,7 @@ class BloodshedverseComAdapter(BaseSiteAdapter):
summary_div = list_box.find('div', {'class': 'list_summary'}) summary_div = list_box.find('div', {'class': 'list_summary'})
if not self.getConfig('keep_summary_html'): if not self.getConfig('keep_summary_html'):
summary = ''.join(summary_div(text=True)) summary = ''.join(summary_div(string=True))
else: else:
summary = self.utf8FromSoup(author_url, summary_div) summary = self.utf8FromSoup(author_url, summary_div)
@ -157,9 +157,6 @@ class BloodshedverseComAdapter(BaseSiteAdapter):
self.story.addToList('warnings', warning) self.story.addToList('warnings', warning)
elif key == 'Chapters':
self.story.setMetadata('numChapters', int(value))
elif key == 'Words': elif key == 'Words':
# Apparently only numChapters need to be an integer for # Apparently only numChapters need to be an integer for
# some strange reason. Remove possible ',' characters as to # some strange reason. Remove possible ',' characters as to
@ -174,7 +171,7 @@ class BloodshedverseComAdapter(BaseSiteAdapter):
# ugly %p(am/pm) hack moved into makeDate so other sites can use it. # ugly %p(am/pm) hack moved into makeDate so other sites can use it.
self.story.setMetadata('dateUpdated', date) self.story.setMetadata('dateUpdated', date)
if self.story.getMetadata('rating') == 'NC-17' and not (self.is_adult or self.getConfig('is_adult')): if self.story.getMetadataRaw('rating') == 'NC-17' and not (self.is_adult or self.getConfig('is_adult')):
raise exceptions.AdultCheckRequired(self.url) raise exceptions.AdultCheckRequired(self.url)
def getChapterText(self, url): def getChapterText(self, url):

View file

@ -1,310 +0,0 @@
# -*- coding: utf-8 -*-
# Copyright 2011 Fanficdownloader team, 2018 FanFicFare team
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Software: eFiction
from __future__ import absolute_import
import logging
logger = logging.getLogger(__name__)
import re
from bs4.element import Tag
from ..htmlcleanup import stripHTML
from .. import exceptions as exceptions
# py2 vs py3 transition
from ..six import text_type as unicode
from .base_adapter import BaseSiteAdapter, makeDate
# By virtue of being recent and requiring both is_adult and user/pass,
# adapter_fanficcastletvnet.py is the best choice for learning to
# write adapters--especially for sites that use the eFiction system.
# Most sites that have ".../viewstory.php?sid=123" in the story URL
# are eFiction.
# For non-eFiction sites, it can be considerably more complex, but
# this is still a good starting point.
# In general an 'adapter' needs to do these five things:
# - 'Register' correctly with the downloader
# - Site Login (if needed)
# - 'Are you adult?' check (if needed--some do one, some the other, some both)
# - Grab the chapter list
# - Grab the story meta-data (some (non-eFiction) adapters have to get it from the author page)
# - Grab the chapter texts
# Search for XXX comments--that's where things are most likely to need changing.
# This function is called by the downloader in all adapter_*.py files
# in this dir to register the adapter class. So it needs to be
# updated to reflect the class below it. That, plus getSiteDomain()
# take care of 'Registering'.
def getClass():
return BloodTiesFansComAdapter # XXX
# Class name has to be unique. Our convention is camel case the
# sitename with Adapter at the end. www is skipped.
class BloodTiesFansComAdapter(BaseSiteAdapter): # XXX
def __init__(self, config, url):
BaseSiteAdapter.__init__(self, config, url)
self.is_adult=False
# get storyId from url--url validation guarantees query is only sid=1234
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
# normalized story URL.
# XXX Most sites don't have the /fanfic part. Replace all to remove it usually.
self._setURL('http://' + self.getSiteDomain() + '/fiction/viewstory.php?sid='+self.story.getMetadata('storyId'))
# Each adapter needs to have a unique site abbreviation.
self.story.setMetadata('siteabbrev','btf') # XXX
# The date format will vary from site to site.
# http://docs.python.org/library/datetime.html#strftime-strptime-behavior
self.dateformat = "%d %b %Y" # XXX
@staticmethod # must be @staticmethod, don't remove it.
def getSiteDomain():
# The site domain. Does have www here, if it uses it.
return 'bloodties-fans.com' # XXX
@classmethod
def getSiteExampleURLs(cls):
return "http://"+cls.getSiteDomain()+"/fiction/viewstory.php?sid=1234"
def getSiteURLPattern(self):
return re.escape("http://"+self.getSiteDomain()+"/fiction/viewstory.php?sid=")+r"\d+$"
## Login seems to be reasonably standard across eFiction sites.
def needToLoginCheck(self, data):
if 'Registered Users Only' in data \
or 'There is no such account on our website' in data \
or "That password doesn't match the one in our database" in data:
return True
else:
return False
def performLogin(self, url):
params = {}
if self.password:
params['penname'] = self.username
params['password'] = self.password
else:
params['penname'] = self.getConfig("username")
params['password'] = self.getConfig("password")
params['cookiecheck'] = '1'
params['submit'] = 'Submit'
loginUrl = 'http://' + self.getSiteDomain() + '/fiction/user.php?action=login'
logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
params['penname']))
d = self.post_request(loginUrl, params)
if "Member Account" not in d : #Member Account
logger.info("Failed to login to URL %s as %s" % (loginUrl,
params['penname']))
raise exceptions.FailedToLogin(url,params['penname'])
return False
else:
return True
## Getting the chapter list and the meta data, plus 'is adult' checking.
def extractChapterUrlsAndMetadata(self):
if self.is_adult or self.getConfig("is_adult"):
# Weirdly, different sites use different warning numbers.
# If the title search below fails, there's a good chance
# you need a different number. print data at that point
# and see what the 'click here to continue' url says.
# Furthermore, there's a couple sites now with more than
# one warning level for different ratings. And they're
# fussy about it. midnightwhispers has three: 4, 2 & 1.
# we'll try 1 first.
addurl = "&ageconsent=ok&warning=4" # XXX
else:
addurl=""
# index=1 makes sure we see the story chapter index. Some
# sites skip that for one-chapter stories.
url = self.url+'&index=1'+addurl
logger.debug("URL: "+url)
data = self.get_request(url)
# The actual text that is used to announce you need to be an
# adult varies from site to site. Again, print data before
# the title search to troubleshoot.
# Since the warning text can change by warning level, let's
# look for the warning pass url. nfacommunity uses
# &amp;warning= -- actually, so do other sites. Must be an
# eFiction book.
# viewstory.php?sid=561&amp;warning=4
# viewstory.php?sid=561&amp;warning=1
# viewstory.php?sid=561&amp;warning=2
#print data
#m = re.search(r"'viewstory.php\?sid=1882(&amp;warning=4)'",data)
m = re.search(r"'viewstory.php\?sid=\d+((?:&amp;ageconsent=ok)?&amp;warning=\d+)'",data)
if m != None:
if self.is_adult or self.getConfig("is_adult"):
# We tried the default and still got a warning, so
# let's pull the warning number from the 'continue'
# link and reload data.
addurl = m.group(1)
# correct stupid &amp; error in url.
addurl = addurl.replace("&amp;","&")
url = self.url+'&index=1'+addurl
logger.debug("URL 2nd try: "+url)
data = self.get_request(url)
else:
raise exceptions.AdultCheckRequired(self.url)
if "Access denied. This story has not been validated by the adminstrators of this site." in data:
raise exceptions.AccessDenied(self.getSiteDomain() +" says: Access denied. This story has not been validated by the adminstrators of this site.")
soup = self.make_soup(data)
# print data
## Title
a = soup.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$"))
self.story.setMetadata('title',stripHTML(a))
# Find authorid and URL from... author url.
a = soup.find('a', href=re.compile(r"viewuser.php\?uid=\d+"))
self.story.setMetadata('authorId',a['href'].split('=')[1])
self.story.setMetadata('authorUrl','http://'+self.host+'/fiction/'+a['href'])
self.story.setMetadata('author',a.string)
# Find the chapters:
for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+r"&chapter=\d+$")):
# just in case there's tags, like <i> in chapter titles.
self.add_chapter(chapter,'http://'+self.host+'/fiction/'+chapter['href']+addurl)
# eFiction sites don't help us out a lot with their meta data
# formating, so it's a little ugly.
# utility method
def defaultGetattr(d,k):
try:
return d[k]
except:
return ""
listbox = soup.find('div',{'class':'listbox'})
# <strong>Rating:</strong> M<br /> etc
labels = listbox.findAll('strong')
for labelspan in labels:
value = labelspan.nextSibling
label = labelspan.string
if 'Summary' in label:
## Everything until the next strong tag.
svalue = ""
while not isinstance(value,Tag) or value.name != 'strong':
svalue += unicode(value)
value = value.nextSibling
self.setDescription(url,svalue)
#self.story.setMetadata('description',stripHTML(svalue))
if 'Rating' in label:
self.story.setMetadata('rating', value)
if 'Words' in label:
value=re.sub(r"\|",r"",value)
self.story.setMetadata('numWords', value)
if 'Categories' in label:
cats = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=categories'))
catstext = [cat.string for cat in cats]
for cat in catstext:
self.story.addToList('category',cat.string)
if 'Characters' in label:
chars = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=characters'))
charstext = [char.string for char in chars]
for char in charstext:
self.story.addToList('characters',char.string)
if 'Completed' in label:
if 'Yes' in value:
self.story.setMetadata('status', 'Completed')
else:
self.story.setMetadata('status', 'In-Progress')
if 'Published' in label:
value=re.sub(r"\|",r"",value)
self.story.setMetadata('datePublished', makeDate(stripHTML(value), self.dateformat))
if 'Updated' in label:
value=re.sub(r"\|",r"",value)
self.story.setMetadata('dateUpdated', makeDate(stripHTML(value), self.dateformat))
# moved outside because they changed *most*, but not *all* labels to <strong>
ships = listbox.findAll('a',href=re.compile(r'browse.php.type=class&(amp;)?type_id=2')) # crappy html: & vs &amp; in url.
shipstext = [ship.string for ship in ships]
for ship in shipstext:
self.story.addToList('ships',ship.string)
genres = listbox.findAll('a',href=re.compile(r'browse.php\?type=class&(amp;)?type_id=1')) # crappy html: & vs &amp; in url.
genrestext = [genre.string for genre in genres]
for genre in genrestext:
self.story.addToList('genre',genre.string)
try:
# Find Series name from series URL.
a = soup.find('a', href=re.compile(r"viewseries.php\?seriesid=\d+"))
series_name = a.string
series_url = 'http://'+self.host+'/fiction/'+a['href']
seriessoup = self.make_soup(self.get_request(series_url))
storyas = seriessoup.findAll('a', href=re.compile(r'^viewstory.php\?sid=\d+$'))
i=1
for a in storyas:
if a['href'] == ('viewstory.php?sid='+self.story.getMetadata('storyId')):
self.setSeries(series_name, i)
self.story.setMetadata('seriesUrl',series_url)
break
i+=1
except:
# I find it hard to care if the series parsing fails
pass
# grab the text for an individual chapter.
def getChapterText(self, url):
logger.debug('Getting chapter text from: %s' % url)
soup = self.make_soup(self.get_request(url))
div = soup.find('div', {'id' : 'story'})
if None == div:
raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url)
return self.utf8FromSoup(url,div)

View file

@ -1,279 +0,0 @@
# -*- coding: utf-8 -*-
# Copyright 2018 FanFicFare team
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Software: eFiction
from __future__ import absolute_import
import logging
logger = logging.getLogger(__name__)
import re
from ..htmlcleanup import stripHTML
from .. import exceptions as exceptions
# py2 vs py3 transition
from ..six import text_type as unicode
from .base_adapter import BaseSiteAdapter, makeDate
def getClass():
return BuffyGilesComAdapter
# Class name has to be unique. Our convention is camel case the
# sitename with Adapter at the end. www is skipped.
class BuffyGilesComAdapter(BaseSiteAdapter):
def __init__(self, config, url):
BaseSiteAdapter.__init__(self, config, url)
self.username = "NoneGiven" # if left empty, site doesn't return any message at all.
self.password = ""
self.is_adult=False
# get storyId from url--url validation guarantees query is only sid=1234
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
# normalized story URL.
# XXX Most sites don't have the /efiction part. Replace all to remove it usually.
self._setURL('http://' + self.getSiteDomain() + '/efiction/viewstory.php?sid='+self.story.getMetadata('storyId'))
# Each adapter needs to have a unique site abbreviation.
self.story.setMetadata('siteabbrev','bufg')
# The date format will vary from site to site.
# http://docs.python.org/library/datetime.html#strftime-strptime-behavior
self.dateformat = "%d/%m/%y"
@staticmethod # must be @staticmethod, don't remove it.
def getSiteDomain():
# The site domain. Does have www here, if it uses it.
return 'buffygiles.velocitygrass.com'
@classmethod
def getSiteExampleURLs(cls):
return "http://"+cls.getSiteDomain()+"/efiction/viewstory.php?sid=1234"
def getSiteURLPattern(self):
return re.escape("http://"+self.getSiteDomain()+"/efiction/viewstory.php?sid=")+r"\d+$"
## Login seems to be reasonably standard across eFiction sites.
def needToLoginCheck(self, data):
if 'Registered Users Only' in data \
or 'There is no such account on our website' in data \
or "That password doesn't match the one in our database" in data:
return True
else:
return False
def performLogin(self, url):
params = {}
if self.password:
params['penname'] = self.username
params['password'] = self.password
else:
params['penname'] = self.getConfig("username")
params['password'] = self.getConfig("password")
params['cookiecheck'] = '1'
params['submit'] = 'Submit'
loginUrl = 'http://' + self.getSiteDomain() + '/user.php?action=login'
logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
params['penname']))
d = self.post_request(loginUrl, params)
if "Member Account" not in d : #Member Account
logger.info("Failed to login to URL %s as %s" % (loginUrl,
params['penname']))
raise exceptions.FailedToLogin(url,params['penname'])
return False
else:
return True
## Getting the chapter list and the meta data, plus 'is adult' checking.
def extractChapterUrlsAndMetadata(self):
if self.is_adult or self.getConfig("is_adult"):
# Weirdly, different sites use different warning numbers.
# If the title search below fails, there's a good chance
# you need a different number. print data at that point
# and see what the 'click here to continue' url says.
addurl = "&warning=5"
else:
addurl=""
# index=1 makes sure we see the story chapter index. Some
# sites skip that for one-chapter stories.
url = self.url+'&index=1'+addurl
logger.debug("URL: "+url)
data = self.get_request(url)
if self.needToLoginCheck(data):
# need to log in for this one.
self.performLogin(url)
data = self.get_request(url)
# Since the warning text can change by warning level, let's
# look for the warning pass url. ksarchive uses
# &amp;warning= -- actually, so do other sites. Must be an
# eFiction book.
# efiction/viewstory.php?sid=1882&amp;warning=4
# efiction/viewstory.php?sid=1654&amp;ageconsent=ok&amp;warning=5
#print data
m = re.search(r"'efiction/viewstory.php\?sid=542(&amp;warning=5)'",data)
m = re.search(r"'efiction/viewstory.php\?sid=\d+((?:&amp;ageconsent=ok)?&amp;warning=\d+)'",data)
if m != None:
if self.is_adult or self.getConfig("is_adult"):
# We tried the default and still got a warning, so
# let's pull the warning number from the 'continue'
# link and reload data.
addurl = m.group(1)
# correct stupid &amp; error in url.
addurl = addurl.replace("&amp;","&")
url = self.url+'&index=1'+addurl
logger.debug("URL 2nd try: "+url)
data = self.get_request(url)
else:
raise exceptions.AdultCheckRequired(self.url)
if "Access denied. This story has not been validated by the adminstrators of this site." in data:
raise exceptions.FailedToDownload(self.getSiteDomain() +" says: Access denied. This story has not been validated by the adminstrators of this site.")
soup = self.make_soup(data)
# print data
pagetitle = soup.find('div',{'id':'pagetitle'})
## Title
a = pagetitle.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$"))
self.story.setMetadata('title',stripHTML(a))
# Find authorid and URL from... author url.
a = pagetitle.find('a', href=re.compile(r"viewuser.php\?uid=\d+"))
self.story.setMetadata('authorId',a['href'].split('=')[1])
self.story.setMetadata('authorUrl','http://'+self.host+'/'+a['href'])
self.story.setMetadata('author',a.string)
# Find the chapters:
for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+r"&chapter=\d+$")):
# just in case there's tags, like <i> in chapter titles.
self.add_chapter(chapter,'http://'+self.host+'/efiction/'+chapter['href']+addurl)
# eFiction sites don't help us out a lot with their meta data
# formating, so it's a little ugly.
# utility method
def defaultGetattr(d,k):
try:
return d[k]
except:
return ""
# <span class="label">Rated:</span> NC-17<br /> etc
labels = soup.findAll('span',{'class':'label'})
for labelspan in labels:
value = labelspan.nextSibling
label = labelspan.string
if 'Summary' in label:
## Everything until the next span class='label'
svalue = ""
while 'label' not in defaultGetattr(value,'class'):
svalue += unicode(value)
value = value.nextSibling
self.setDescription(url,svalue)
#self.story.setMetadata('description',stripHTML(svalue))
if 'Rated' in label:
self.story.setMetadata('rating', value)
if 'Word count' in label:
self.story.setMetadata('numWords', value)
if 'Categories' in label:
cats = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=categories'))
for cat in cats:
self.story.addToList('category',cat.string)
if 'Characters' in label:
chars = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=characters'))
for char in chars:
self.story.addToList('characters',char.string)
if 'Genre' in label:
genres = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=1'))
for genre in genres:
self.story.addToList('genre',genre.string)
if 'Warnings' in label:
warnings = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=3'))
for warning in warnings:
self.story.addToList('warnings',warning.string)
if 'Completed' in label:
if 'Yes' in value:
self.story.setMetadata('status', 'Completed')
else:
self.story.setMetadata('status', 'In-Progress')
if 'Published' in label:
self.story.setMetadata('datePublished', makeDate(stripHTML(value), self.dateformat))
if 'Updated' in label:
# there's a stray [ at the end.
#value = value[0:-1]
self.story.setMetadata('dateUpdated', makeDate(stripHTML(value), self.dateformat))
try:
# Find Series name from series URL.
a = soup.find('a', href=re.compile(r"efiction/viewseries.php\?seriesid=\d+"))
series_name = a.string
series_url = 'http://'+self.host+'/'+a['href']
seriessoup = self.make_soup(self.get_request(series_url))
storyas = seriessoup.findAll('a', href=re.compile(r'^efiction/viewstory.php\?sid=\d+$'))
i=1
for a in storyas:
if a['href'] == ('efiction/viewstory.php?sid='+self.story.getMetadata('storyId')):
self.setSeries(series_name, i)
self.story.setMetadata('seriesUrl',series_url)
break
i+=1
except:
# I find it hard to care if the series parsing fails
pass
# grab the text for an individual chapter.
def getChapterText(self, url):
logger.debug('Getting chapter text from: %s' % url)
soup = self.make_soup(self.get_request(url))
div = soup.find('div', {'id' : 'story'})
if None == div:
raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url)
return self.utf8FromSoup(url,div)

View file

@ -1,6 +1,6 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# Copyright 2012 Fanficdownloader team, 2018 FanFicFare team # Copyright 2024 FanFicFare team
# #
# Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License. # you may not use this file except in compliance with the License.
@ -15,26 +15,24 @@
# limitations under the License. # limitations under the License.
# #
# Software: eFiction
from __future__ import absolute_import from __future__ import absolute_import
from .base_efiction_adapter import BaseEfictionAdapter import logging
logger = logging.getLogger(__name__)
# Class name has to be unique. Our convention is camel case the from .base_otw_adapter import BaseOTWAdapter
# sitename with Adapter at the end. www is skipped.
class NHAMagicalWorldsUsAdapter(BaseEfictionAdapter):
@staticmethod
def getSiteDomain():
return 'nha.magical-worlds.us'
@classmethod
def getSiteAbbrev(self):
return 'nha'
@classmethod
def getDateFormat(self):
return "%d/%m/%y"
def getClass(): def getClass():
return NHAMagicalWorldsUsAdapter return CFAAAdapter
class CFAAAdapter(BaseOTWAdapter):
def __init__(self, config, url):
BaseOTWAdapter.__init__(self, config, url)
# Each adapter needs to have a unique site abbreviation.
self.story.setMetadata('siteabbrev','cfaa')
@staticmethod # must be @staticmethod, don't remove it.
def getSiteDomain():
# The site domain. Does have www here, if it uses it.
return 'www.cfaarchive.org'

View file

@ -116,7 +116,7 @@ class ChaosSycophantHexComAdapter(BaseSiteAdapter):
self.story.setMetadata('rating', rating) self.story.setMetadata('rating', rating)
# Find the chapters: # Find the chapters:
for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+r"&chapter=\d+$")): for chapter in soup.find_all('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+r"&chapter=\d+$")):
# just in case there's tags, like <i> in chapter titles. # just in case there's tags, like <i> in chapter titles.
self.add_chapter(chapter,'http://'+self.host+'/'+chapter['href']+addurl) self.add_chapter(chapter,'http://'+self.host+'/'+chapter['href']+addurl)
@ -134,7 +134,7 @@ class ChaosSycophantHexComAdapter(BaseSiteAdapter):
# <span class="label">Rated:</span> NC-17<br /> etc # <span class="label">Rated:</span> NC-17<br /> etc
labels = soup.findAll('span',{'class':'label'}) labels = soup.find_all('span',{'class':'label'})
value = labels[0].previousSibling value = labels[0].previousSibling
svalue = "" svalue = ""
@ -154,22 +154,22 @@ class ChaosSycophantHexComAdapter(BaseSiteAdapter):
self.story.setMetadata('numWords', value.split(' -')[0]) self.story.setMetadata('numWords', value.split(' -')[0])
if 'Categories' in label: if 'Categories' in label:
cats = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=categories')) cats = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=categories'))
for cat in cats: for cat in cats:
self.story.addToList('category',cat.string) self.story.addToList('category',cat.string)
if 'Characters' in label: if 'Characters' in label:
chars = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=characters')) chars = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=characters'))
for char in chars: for char in chars:
self.story.addToList('characters',char.string) self.story.addToList('characters',char.string)
if 'Genre' in label: if 'Genre' in label:
genres = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=1')) genres = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=class&type_id=1'))
for genre in genres: for genre in genres:
self.story.addToList('genre',genre.string) self.story.addToList('genre',genre.string)
if 'Warnings' in label: if 'Warnings' in label:
warnings = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=2')) warnings = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=class&type_id=2'))
for warning in warnings: for warning in warnings:
self.story.addToList('warnings',warning.string) self.story.addToList('warnings',warning.string)
@ -194,7 +194,7 @@ class ChaosSycophantHexComAdapter(BaseSiteAdapter):
series_url = 'http://'+self.host+'/'+a['href'] series_url = 'http://'+self.host+'/'+a['href']
seriessoup = self.make_soup(self.get_request(series_url)) seriessoup = self.make_soup(self.get_request(series_url))
storyas = seriessoup.findAll('a', href=re.compile(r'^viewstory.php\?sid=\d+$')) storyas = seriessoup.find_all('a', href=re.compile(r'^viewstory.php\?sid=\d+$'))
i=1 i=1
for a in storyas: for a in storyas:
if a['href'] == ('viewstory.php?sid='+self.story.getMetadata('storyId')): if a['href'] == ('viewstory.php?sid='+self.story.getMetadata('storyId')):

View file

@ -88,8 +88,8 @@ class ChireadsComSiteAdapter(BaseSiteAdapter):
intro = stripHTML(info.select_one('.inform-inform-txt').span) intro = stripHTML(info.select_one('.inform-inform-txt').span)
self.setDescription(self.url, intro) self.setDescription(self.url, intro)
for content in soup.findAll('div', {'id': 'content'}): for content in soup.find_all('div', {'id': 'content'}):
for a in content.findAll('a'): for a in content.find_all('a'):
self.add_chapter(a.get_text(), a['href']) self.add_chapter(a.get_text(), a['href'])

View file

@ -49,7 +49,7 @@ class ChosenTwoFanFicArchiveAdapter(BaseSiteAdapter):
# normalized story URL. # normalized story URL.
self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId')) self._setURL('https://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId'))
# Each adapter needs to have a unique site abbreviation. # Each adapter needs to have a unique site abbreviation.
self.story.setMetadata('siteabbrev','chosen2') self.story.setMetadata('siteabbrev','chosen2')
@ -65,10 +65,10 @@ class ChosenTwoFanFicArchiveAdapter(BaseSiteAdapter):
@classmethod @classmethod
def getSiteExampleURLs(cls): def getSiteExampleURLs(cls):
return "http://"+cls.getSiteDomain()+"/viewstory.php?sid=1234" return "https://"+cls.getSiteDomain()+"/viewstory.php?sid=1234"
def getSiteURLPattern(self): def getSiteURLPattern(self):
return re.escape("http://"+self.getSiteDomain()+"/viewstory.php?sid=")+r"\d+$" return r"https?"+re.escape("://"+self.getSiteDomain()+"/viewstory.php?sid=")+r"\d+$"
## Getting the chapter list and the meta data, plus 'is adult' checking. ## Getting the chapter list and the meta data, plus 'is adult' checking.
def extractChapterUrlsAndMetadata(self): def extractChapterUrlsAndMetadata(self):
@ -98,7 +98,7 @@ class ChosenTwoFanFicArchiveAdapter(BaseSiteAdapter):
## Title ## Title
## Some stories have a banner that has it's own a tag before the actual text title... ## Some stories have a banner that has it's own a tag before the actual text title...
## so I'm checking the pagetitle div for all a tags that match the criteria, then taking the last. ## so I'm checking the pagetitle div for all a tags that match the criteria, then taking the last.
a = soup.find('div',{'id':'pagetitle'}).findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$"))[-1] a = soup.find('div',{'id':'pagetitle'}).find_all('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$"))[-1]
self.story.setMetadata('title',stripHTML(a)) self.story.setMetadata('title',stripHTML(a))
# Find authorid and URL from... author url. # Find authorid and URL from... author url.
@ -106,14 +106,14 @@ class ChosenTwoFanFicArchiveAdapter(BaseSiteAdapter):
# so I'm checking the pagetitle div for this as well # so I'm checking the pagetitle div for this as well
a = soup.find('div',{'id':'pagetitle'}).find('a', href=re.compile(r"viewuser.php\?uid=\d+")) a = soup.find('div',{'id':'pagetitle'}).find('a', href=re.compile(r"viewuser.php\?uid=\d+"))
self.story.setMetadata('authorId',a['href'].split('=')[1]) self.story.setMetadata('authorId',a['href'].split('=')[1])
self.story.setMetadata('authorUrl','http://'+self.host+'/'+a['href']) self.story.setMetadata('authorUrl','https://'+self.host+'/'+a['href'])
self.story.setMetadata('author',a.string) self.story.setMetadata('author',a.string)
# Find the chapters: # Find the chapters:
for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+r"&chapter=\d+$")): for chapter in soup.find_all('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+r"&chapter=\d+$")):
# just in case there's tags, like <i> in chapter titles. # just in case there's tags, like <i> in chapter titles.
#self.add_chapter(chapter,'http://'+self.host+'/'+chapter['href']) #self.add_chapter(chapter,'http://'+self.host+'/'+chapter['href'])
self.add_chapter(chapter,'http://{0}/{1}{2}'.format(self.host, chapter['href'],addURL)) self.add_chapter(chapter,'https://{0}/{1}{2}'.format(self.host, chapter['href'],addURL))
# eFiction sites don't help us out a lot with their meta data # eFiction sites don't help us out a lot with their meta data
@ -127,7 +127,7 @@ class ChosenTwoFanFicArchiveAdapter(BaseSiteAdapter):
return "" return ""
# <span class="label">Rated:</span> NC-17<br /> etc # <span class="label">Rated:</span> NC-17<br /> etc
labels = soup.findAll('span',{'class':'label'}) labels = soup.find_all('span',{'class':'label'})
for labelspan in labels: for labelspan in labels:
val = labelspan.nextSibling val = labelspan.nextSibling
value = unicode('') value = unicode('')
@ -149,27 +149,27 @@ class ChosenTwoFanFicArchiveAdapter(BaseSiteAdapter):
self.story.setMetadata('numWords', stripHTML(value)) self.story.setMetadata('numWords', stripHTML(value))
if 'Categories' in label: if 'Categories' in label:
cats = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=categories')) cats = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=categories'))
for cat in cats: for cat in cats:
self.story.addToList('category',cat.string) self.story.addToList('category',cat.string)
if 'Characters' in label: if 'Characters' in label:
chars = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=characters')) chars = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=characters'))
for char in chars: for char in chars:
self.story.addToList('characters',char.string) self.story.addToList('characters',char.string)
if 'Genre' in label: if 'Genre' in label:
genres = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=1')) # XXX genres = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=class&type_id=1')) # XXX
for genre in genres: for genre in genres:
self.story.addToList('genre',genre.string) self.story.addToList('genre',genre.string)
if 'Pairing' in label: if 'Pairing' in label:
ships = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=4')) ships = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=class&type_id=4'))
for ship in ships: for ship in ships:
self.story.addToList('ships',ship.string) self.story.addToList('ships',ship.string)
if 'Warnings' in label: if 'Warnings' in label:
warnings = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=2')) # XXX warnings = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=class&type_id=2')) # XXX
for warning in warnings: for warning in warnings:
self.story.addToList('warnings',warning.string) self.story.addToList('warnings',warning.string)
@ -192,16 +192,16 @@ class ChosenTwoFanFicArchiveAdapter(BaseSiteAdapter):
# Find Series name from series URL. # Find Series name from series URL.
a = soup.find('a', href=re.compile(r"viewseries.php\?seriesid=\d+")) a = soup.find('a', href=re.compile(r"viewseries.php\?seriesid=\d+"))
series_name = a.string series_name = a.string
series_url = 'http://'+self.host+'/'+a['href'] series_url = 'https://'+self.host+'/'+a['href']
seriessoup = self.make_soup(self.get_request(series_url)) seriessoup = self.make_soup(self.get_request(series_url))
# can't use ^viewstory...$ in case of higher rated stories with javascript href. # can't use ^viewstory...$ in case of higher rated stories with javascript href.
storyas = seriessoup.findAll('a', href=re.compile(r'viewstory.php\?sid=\d+')) storyas = seriessoup.find_all('a', href=re.compile(r'viewstory.php\?sid=\d+'))
i=1 i=1
for a in storyas: for a in storyas:
# skip 'report this' and 'TOC' links # this site has several links to each story.
if 'contact.php' not in a['href'] and 'index' not in a['href']: if a.text == 'Latest Chapter':
if a['href'] == ('viewstory.php?sid='+self.story.getMetadata('storyId')): if ('viewstory.php?sid='+self.story.getMetadata('storyId')) in a['href']:
self.setSeries(series_name, i) self.setSeries(series_name, i)
self.story.setMetadata('seriesUrl',series_url) self.story.setMetadata('seriesUrl',series_url)
break break

View file

@ -1,220 +0,0 @@
# -*- coding: utf-8 -*-
# Copyright 2011 Fanficdownloader team, 2018 FanFicFare team
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
from __future__ import absolute_import
import logging
logger = logging.getLogger(__name__)
import re
from ..htmlcleanup import stripHTML
from .. import exceptions as exceptions
# py2 vs py3 transition
from .base_adapter import BaseSiteAdapter, makeDate
def getClass():
return CSIForensicsComAdapter
# Class name has to be unique. Our convention is camel case the
# sitename with Adapter at the end. www is skipped.
class CSIForensicsComAdapter(BaseSiteAdapter):
def __init__(self, config, url):
BaseSiteAdapter.__init__(self, config, url)
self.username = "NoneGiven" # if left empty, site doesn't return any message at all.
self.password = ""
self.is_adult=False
# get storyId from url--url validation guarantees query is only sid=1234
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
self._setURL('https://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId'))
# Each adapter needs to have a unique site abbreviation.
self.story.setMetadata('siteabbrev','csiforensics')
# The date format will vary from site to site.
# http://docs.python.org/library/datetime.html#strftime-strptime-behavior
self.dateformat = "%d %b %Y"
@staticmethod # must be @staticmethod, don't remove it.
def getSiteDomain():
# The site domain. Does have www here, if it uses it.
return 'csi-forensics.com'
@classmethod
def getSiteExampleURLs(cls):
return "https://"+cls.getSiteDomain()+"/viewstory.php?sid=1234"
def getSiteURLPattern(self):
return r"https?://"+re.escape(self.getSiteDomain()+"/viewstory.php?sid=")+r"\d+$"
## Getting the chapter list and the meta data, plus 'is adult' checking.
def extractChapterUrlsAndMetadata(self):
if self.is_adult or self.getConfig("is_adult"):
# Weirdly, different sites use different warning numbers.
# If the title search below fails, there's a good chance
# you need a different number. print data at that point
# and see what the 'click here to continue' url says.
addurl = "&ageconsent=ok&warning=5&skin=elegantcsi"
else:
addurl="&skin=elegantcsi"
# index=1 makes sure we see the story chapter index. Some
# sites skip that for one-chapter stories.
url = self.url+'&index=1'+addurl
logger.debug("URL: "+url)
data = self.get_request(url)
# The actual text that is used to announce you need to be an
# adult varies from site to site. Again, print data before
# the title search to troubleshoot.
if "This story is rated NC-17, and therefore is not suitable for minors. If you are below the age required to view such material in your locality, please return from whence you came." in data: # XXX
raise exceptions.AdultCheckRequired(self.url)
if "Access denied. This story has not been validated by the adminstrators of this site." in data:
raise exceptions.AccessDenied(self.getSiteDomain() +" says: Access denied. This story has not been validated by the adminstrators of this site.")
soup = self.make_soup(data)
# print data
## Title
pt = soup.find('div', {'id' : 'pagetitle'})
a = pt.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$"))
self.story.setMetadata('title',a.string)
# Find authorid and URL from... author url.
a = soup.find('a', href=re.compile(r"viewuser.php\?uid=\d+"))
self.story.setMetadata('authorId',a['href'].split('=')[1])
self.story.setMetadata('authorUrl','https://'+self.host+'/'+a['href'])
self.story.setMetadata('author',a.string)
# Rating
rate = stripHTML(soup.find('div',{'id':'pagetitle'}))
rate = rate[rate.rindex('[')+1:rate.rindex(']')]
self.story.setMetadata('rating', rate)
# Find the chapters:
for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+r"&chapter=\d+$")):
# just in case there's tags, like <i> in chapter titles.
self.add_chapter(chapter,'https://'+self.host+'/'+chapter['href']+addurl)
# eFiction sites don't help us out a lot with their meta data
# formating, so it's a little ugly.
# utility method
def defaultGetattr(d,k):
try:
return d[k]
except:
return ""
smalldiv = soup.find('div', {'class' : 'small'})
chars = smalldiv.findAll('a',href=re.compile(r'browse.php\?type=characters'))
for char in chars:
self.story.addToList('characters',char.string)
metatext = stripHTML(smalldiv)
if 'Completed: Yes' in metatext:
self.story.setMetadata('status', 'Completed')
else:
self.story.setMetadata('status', 'In-Progress')
word=soup.find(text=re.compile("Word count:")).split(':')
self.story.setMetadata('numWords', word[1])
cats = smalldiv.findAll('a',href=re.compile(r'browse.php\?type=categories'))
for cat in cats:
self.story.addToList('category',cat.string)
warnings = smalldiv.findAll('a',href=re.compile(r'browse.php\?type=class(&amp;)type_id=2(&amp;)classid=\d+'))
for warning in warnings:
self.story.addToList('warnings',warning.string)
date=soup.find('div',{'class' : 'bottom'})
pd=date.find(text=re.compile("Published:")).string.split(': ')
self.story.setMetadata('datePublished', makeDate(stripHTML(pd[1].split(' U')[0]), self.dateformat))
self.story.setMetadata('dateUpdated', makeDate(stripHTML(pd[2]), self.dateformat))
# <span class="label">Rated:</span> NC-17<br /> etc
labels = soup.findAll('span',{'class':'label'})
pub=0
for labelspan in labels:
value = labelspan.nextSibling
label = labelspan.string
if 'Genres' in label:
genres = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=1'))
for genre in genres:
self.story.addToList('genre',genre.string)
if 'Warnings' in label:
warnings = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=2'))
for warning in warnings:
self.story.addToList('warnings',warning.string)
try:
# Find Series name from series URL.
a = soup.find('a', href=re.compile(r"viewseries.php\?seriesid=\d+"))
series_name = a.string
series_url = 'https://'+self.host+'/'+a['href']
seriessoup = self.make_soup(self.get_request(series_url))
storyas = seriessoup.findAll('a', href=re.compile(r'^viewstory.php\?sid=\d+$'))
i=1
for a in storyas:
if a['href'] == ('viewstory.php?sid='+self.story.getMetadata('storyId')):
self.setSeries(series_name, i)
self.story.setMetadata('seriesUrl',series_url)
break
i+=1
except:
# I find it hard to care if the series parsing fails
pass
smalldiv.extract()
# Summary
summary = soup.find('div', {'class' : 'content'})
self.setDescription(url,summary)
# grab the text for an individual chapter.
def getChapterText(self, url):
logger.debug('Getting chapter text from: %s' % url)
soup = self.make_soup(self.get_request(url))
div = soup.find('div', {'id' : 'story'})
if None == div:
raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url)
return self.utf8FromSoup(url,div)

View file

@ -0,0 +1,256 @@
# -*- coding: utf-8 -*-
# Copyright 2021 FanFicFare team
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
from __future__ import absolute_import
import logging
import re
# py2 vs py3 transition
from ..six.moves.urllib.parse import urlparse
from .base_adapter import BaseSiteAdapter, makeDate
from fanficfare.htmlcleanup import stripHTML
from .. import exceptions as exceptions
from fanficfare.dateutils import parse_relative_date_string
logger = logging.getLogger(__name__)
def getClass():
return DeviantArtComSiteAdapter
class DeviantArtComSiteAdapter(BaseSiteAdapter):
def __init__(self, config, url):
BaseSiteAdapter.__init__(self, config, url)
self.story.setMetadata('siteabbrev', 'dac')
self.username = 'NoneGiven'
self.password = ''
self.is_adult = False
match = re.match(self.getSiteURLPattern(), url)
if not match:
raise exceptions.InvalidStoryURL(url, self.getSiteDomain(), self.getSiteExampleURLs())
story_id = match.group('id')
author = match.group('author')
self.story.setMetadata('author', author)
self.story.setMetadata('authorId', author)
self.story.setMetadata('authorUrl', 'https://www.deviantart.com/' + author)
self._setURL(url)
@staticmethod
def getSiteDomain():
return 'www.deviantart.com'
@classmethod
def getAcceptDomains(cls):
return ['www.deviantart.com']
@classmethod
def getProtocol(self):
return 'https'
@classmethod
def getSiteExampleURLs(cls):
return 'https://%s/<author>/art/<work-name>' % cls.getSiteDomain()
def getSiteURLPattern(self):
return r'https?://www\.deviantart\.com/(?P<author>[^/]+)/art/(?P<id>[^/]+)/?'
def performLogin(self, url):
if self.username and self.username != 'NoneGiven':
username = self.username
else:
username = self.getConfig('username')
# logger.debug("\n\nusername:(%s)\n\n"%username)
if not username:
logger.info("Login Required for URL %s" % url)
raise exceptions.FailedToLogin(url,username)
data = self.get_request_raw('https://www.deviantart.com/users/login', referer=url, usecache=False)
data = self.decode_data(data)
soup = self.make_soup(data)
params = {
'referer': 'https://www.deviantart.com/_sisu/do/signin', # soup.find('input', {'name': 'referer'})['value'],
'referer_type': soup.find('input', {'name': 'referer_type'})['value'],
'csrf_token': soup.find('input', {'name': 'csrf_token'})['value'],
'challenge': soup.find('input', {'name': 'challenge'})['value'],
'lu_token': soup.find('input', {'name': 'lu_token'})['value'],
'remember': 'on',
'username': username
}
loginUrl = 'https://' + self.getSiteDomain() + '/_sisu/do/step2'
logger.debug('Will now login to deviantARt as (%s)' % username)
result = self.post_request(loginUrl, params, usecache=False)
soup = self.make_soup(result)
if not soup.find('input', {'name': 'lu_token2'}):
logger.info("Login Failed for URL %s (no lu_token2 found)" % url)
raise exceptions.FailedToLogin(url,username)
params = {
'referer': 'https://www.deviantart.com/_sisu/do/signin', # soup.find('input', {'name': 'referer'})['value'],
'referer_type': soup.find('input', {'name': 'referer_type'})['value'],
'csrf_token': soup.find('input', {'name': 'csrf_token'})['value'],
'challenge': soup.find('input', {'name': 'challenge'})['value'],
'lu_token': soup.find('input', {'name': 'lu_token'})['value'],
'lu_token2': soup.find('input', {'name': 'lu_token2'})['value'],
'remember': 'on',
'username': ''
}
if self.password:
params['password'] = self.password
else:
params['password'] = self.getConfig('password')
# logger.debug("\n\nparams['password']:(%s)\n\n"%params['password'])
loginUrl = 'https://' + self.getSiteDomain() + '/_sisu/do/signin'
logger.debug('Will now send password to deviantARt')
result = self.post_request(loginUrl, params, usecache=False)
if 'Log In | DeviantArt' in result:
logger.error('Failed to login to deviantArt as %s' % username)
raise exceptions.FailedToLogin('https://www.deviantart.com', username)
else:
return True
def requiresLogin(self, data):
return '</a> has limited the viewing of this artwork to members of the DeviantArt community only' in data
def isLoggedIn(self, data):
return '<form id="logout-form" action="https://www.deviantart.com/users/logout" method="POST">' in data
def isWatchersOnly(self, data):
return '>Watchers-Only Deviation<' in data
def requiresMatureContentEnabled(self, data):
return (
'>This content is intended for mature audiences<' in data
or '>This deviation is intended for mature audiences<' in data
or '>This filter hides content that may be inappropriate for some viewers<' in data
or '>May contain sensitive content<' in data
or '>Log in to view<' in data
or '>This deviation has been labeled as containing themes not suitable for all deviants.<' in data
)
def extractChapterUrlsAndMetadata(self):
logger.debug('URL: %s', self.url)
data = self.get_request(self.url)
soup = self.make_soup(data)
## story can require login outright, or it can show up as
## watchers-only or mature-enabled without the same 'requires
## login' strings.
if self.requiresLogin(data) or ( not self.isLoggedIn(data) and
(self.isWatchersOnly(data) or
self.requiresMatureContentEnabled(data)) ):
if self.performLogin(self.url):
data = self.get_request(self.url, usecache=False)
soup = self.make_soup(data)
## Check watchers only and mature enabled again, separately,
## after login because they can still apply after login.
if self.isWatchersOnly(data):
raise exceptions.FailedToDownload(
'Deviation is only available for watchers.' +
'You must watch this author before you can download it.'
)
if self.requiresMatureContentEnabled(data):
raise exceptions.FailedToDownload(
'Deviation is set as mature, you must go into your account ' +
'and enable showing of mature content.'
)
appurl = soup.select_one('meta[property="og:url"]')['content']
if appurl:
story_id = urlparse(appurl).path.lstrip('/')
else:
logger.debug("Looking for JS story id")
## after login, this is only found in a JS block. Dunno why.
## F875A309-B0DB-860E-5079-790D0FBE5668
match = re.match(r'\\"deviationUuid\\":\\"(?P<id>[A-Z0-9-]+)\\",',data)
if match:
story_id = match.group('id')
else:
raise exceptions.FailedToDownload('Failed to find Story ID.')
self.story.setMetadata('storyId', story_id)
title = soup.select_one('h1').get_text()
self.story.setMetadata('title', stripHTML(title))
## dA has no concept of status
# self.story.setMetadata('status', 'Completed')
pubdate = soup.select_one('time').get_text()
# Maybe do this better, but this works
try:
self.story.setMetadata('datePublished', makeDate(pubdate, '%b %d, %Y'))
except:
self.story.setMetadata('datePublished', parse_relative_date_string(pubdate))
# do description here if appropriate
story_tags = soup.select('a[href^="https://www.deviantart.com/tag"] span')
if story_tags is not None:
for tag in story_tags:
self.story.addToList('genre', tag.get_text())
self.add_chapter(title, self.url)
def getChapterText(self, url):
logger.debug('Getting chapter text from: %s', url)
data = self.get_request(url)
# logger.debug(data)
soup = self.make_soup(data)
# remove comments section to avoid false matches
comments = soup.select_one('[data-hook=comments_thread]')
if comments:
comments.decompose()
# previous search not always found in some stories.
# <div id="comments"></div> inside the real containing
# div seems more common
commentsdiv = soup.select_one('div#comments')
if commentsdiv:
commentsdiv.parent.decompose()
# three different 'content' tags to look for.
# This is the current in Oct 2024
content = soup.select_one('[data-editor-viewer="1"]')
if content is None:
# older story? I can't find any of this style in Oct2024
content = soup.select_one('[data-id="rich-content-viewer"]')
if content is None:
# olderer story, but used by some older (2018) posts
content = soup.select_one('.legacy-journal')
if content is None:
raise exceptions.FailedToDownload(
'Could not find story text. Please open a bug with the URL %s' % self.url
)
return self.utf8FromSoup(url, content)

View file

@ -95,7 +95,7 @@ class DokugaComAdapter(BaseSiteAdapter):
params['Submit'] = 'Submit' params['Submit'] = 'Submit'
# copy all hidden input tags to pick up appropriate tokens. # copy all hidden input tags to pick up appropriate tokens.
for tag in soup.findAll('input',{'type':'hidden'}): for tag in soup.find_all('input',{'type':'hidden'}):
params[tag['name']] = tag['value'] params[tag['name']] = tag['value']
loginUrl = 'http://' + self.getSiteDomain() + '/fanfiction' loginUrl = 'http://' + self.getSiteDomain() + '/fanfiction'
@ -153,7 +153,7 @@ class DokugaComAdapter(BaseSiteAdapter):
self.story.setMetadata('title',stripHTML(a)) self.story.setMetadata('title',stripHTML(a))
# Find the chapters: # Find the chapters:
chapters = soup.find('select').findAll('option') chapters = soup.find('select').find_all('option')
if len(chapters)==1: if len(chapters)==1:
self.add_chapter(self.story.getMetadata('title'),'http://'+self.host+'/'+self.section+'/story/'+self.story.getMetadata('storyId')+'/1') self.add_chapter(self.story.getMetadata('title'),'http://'+self.host+'/'+self.section+'/story/'+self.story.getMetadata('storyId')+'/1')
else: else:
@ -168,7 +168,7 @@ class DokugaComAdapter(BaseSiteAdapter):
asoup=asoup.find('div', {'id' : 'cb_tabid_52'}).find('div') asoup=asoup.find('div', {'id' : 'cb_tabid_52'}).find('div')
#grab the rest of the metadata from the author's page #grab the rest of the metadata from the author's page
for div in asoup.findAll('div'): for div in asoup.find_all('div'):
nav=div.find('a', href=re.compile(r'/fanfiction/story/'+self.story.getMetadata('storyId')+"/1$")) nav=div.find('a', href=re.compile(r'/fanfiction/story/'+self.story.getMetadata('storyId')+"/1$"))
if nav != None: if nav != None:
break break
@ -208,7 +208,7 @@ class DokugaComAdapter(BaseSiteAdapter):
else: else:
asoup=asoup.find('div', {'id' : 'maincol'}).find('div', {'class' : 'padding'}) asoup=asoup.find('div', {'id' : 'maincol'}).find('div', {'class' : 'padding'})
for div in asoup.findAll('div'): for div in asoup.find_all('div'):
nav=div.find('a', href=re.compile(r'/spark/story/'+self.story.getMetadata('storyId')+"/1$")) nav=div.find('a', href=re.compile(r'/spark/story/'+self.story.getMetadata('storyId')+"/1$"))
if nav != None: if nav != None:
break break

View file

@ -161,7 +161,7 @@ class DracoAndGinnyComAdapter(BaseSiteAdapter):
self.story.setMetadata('author',a.string) self.story.setMetadata('author',a.string)
# Find the chapters: # Find the chapters:
for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+r"&chapter=\d+$")): for chapter in soup.find_all('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+r"&chapter=\d+$")):
# just in case there's tags, like <i> in chapter titles. # just in case there's tags, like <i> in chapter titles.
self.add_chapter(chapter,'http://'+self.host+'/'+chapter['href']+addurl) self.add_chapter(chapter,'http://'+self.host+'/'+chapter['href']+addurl)
@ -181,13 +181,13 @@ class DracoAndGinnyComAdapter(BaseSiteAdapter):
self.setDescription(url,content.find('blockquote')) self.setDescription(url,content.find('blockquote'))
for genre in content.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=1')): for genre in content.find_all('a',href=re.compile(r'browse.php\?type=class&type_id=1')):
self.story.addToList('genre',genre.string) self.story.addToList('genre',genre.string)
for warning in content.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=2')): for warning in content.find_all('a',href=re.compile(r'browse.php\?type=class&type_id=2')):
self.story.addToList('warnings',warning.string) self.story.addToList('warnings',warning.string)
labels = content.findAll('b') labels = content.find_all('b')
for labelspan in labels: for labelspan in labels:
value = labelspan.nextSibling value = labelspan.nextSibling
@ -208,22 +208,22 @@ class DracoAndGinnyComAdapter(BaseSiteAdapter):
self.story.setMetadata('rating', value) self.story.setMetadata('rating', value)
if 'Categories' in label: if 'Categories' in label:
cats = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=categories')) cats = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=categories'))
for cat in cats: for cat in cats:
self.story.addToList('category',cat.string) self.story.addToList('category',cat.string)
if 'Characters' in label: if 'Characters' in label:
chars = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=characters')) chars = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=characters'))
for char in chars: for char in chars:
self.story.addToList('characters',char.string) self.story.addToList('characters',char.string)
if 'Genre' in label: if 'Genre' in label:
genres = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=1')) genres = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=class&type_id=1'))
for genre in genres: for genre in genres:
self.story.addToList('genre',genre.string) self.story.addToList('genre',genre.string)
if 'Warnings' in label: if 'Warnings' in label:
warnings = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=2')) warnings = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=class&type_id=2'))
for warning in warnings: for warning in warnings:
self.story.addToList('warnings',warning.string) self.story.addToList('warnings',warning.string)
@ -247,7 +247,7 @@ class DracoAndGinnyComAdapter(BaseSiteAdapter):
seriessoup = self.make_soup(self.get_request(series_url)) seriessoup = self.make_soup(self.get_request(series_url))
# can't use ^viewstory...$ in case of higher rated stories with javascript href. # can't use ^viewstory...$ in case of higher rated stories with javascript href.
storyas = seriessoup.findAll('a', href=re.compile(r'viewstory.php\?sid=\d+')) storyas = seriessoup.find_all('a', href=re.compile(r'viewstory.php\?sid=\d+'))
i=1 i=1
for a in storyas: for a in storyas:
# skip 'report this' and 'TOC' links # skip 'report this' and 'TOC' links

View file

@ -138,7 +138,7 @@ class EFPFanFicNet(BaseSiteAdapter):
# no selector found, so it's a one-chapter story. # no selector found, so it's a one-chapter story.
self.add_chapter(self.story.getMetadata('title'),url) self.add_chapter(self.story.getMetadata('title'),url)
else: else:
allOptions = select.findAll('option', {'value' : re.compile(r'viewstory')}) allOptions = select.find_all('option', {'value' : re.compile(r'viewstory')})
for o in allOptions: for o in allOptions:
url = u'https://%s/%s' % ( self.getSiteDomain(), url = u'https://%s/%s' % ( self.getSiteDomain(),
o['value']) o['value'])
@ -170,14 +170,14 @@ class EFPFanFicNet(BaseSiteAdapter):
if authsoup != None: if authsoup != None:
# last author link with offset should be the 'next' link. # last author link with offset should be the 'next' link.
authurl = u'https://%s/%s' % ( self.getSiteDomain(), authurl = u'https://%s/%s' % ( self.getSiteDomain(),
authsoup.findAll('a',href=re.compile(r'viewuser\.php\?uid=\d+&catid=&offset='))[-1]['href'] ) authsoup.find_all('a',href=re.compile(r'viewuser\.php\?uid=\d+&catid=&offset='))[-1]['href'] )
# Need author page for most of the metadata. # Need author page for most of the metadata.
logger.debug("fetching author page: (%s)"%authurl) logger.debug("fetching author page: (%s)"%authurl)
authsoup = self.make_soup(self.get_request(authurl)) authsoup = self.make_soup(self.get_request(authurl))
#print("authsoup:%s"%authsoup) #print("authsoup:%s"%authsoup)
storyas = authsoup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+r'&i=1$')) storyas = authsoup.find_all('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+r'&i=1$'))
for storya in storyas: for storya in storyas:
#print("======storya:%s"%storya) #print("======storya:%s"%storya)
storyblock = storya.findParent('div',{'class':'storybloc'}) storyblock = storya.findParent('div',{'class':'storybloc'})
@ -194,7 +194,7 @@ class EFPFanFicNet(BaseSiteAdapter):
# Tipo di coppia: Het | Personaggi: Akasuna no Sasori , Akatsuki, Nuovo Personaggio | Note: OOC | Avvertimenti: Tematiche delicate<br /> # Tipo di coppia: Het | Personaggi: Akasuna no Sasori , Akatsuki, Nuovo Personaggio | Note: OOC | Avvertimenti: Tematiche delicate<br />
# Categoria: <a href="categories.php?catid=1&amp;parentcatid=1">Anime & Manga</a> > <a href="categories.php?catid=108&amp;parentcatid=108">Naruto</a> | Contesto: Naruto Shippuuden | Leggi le <a href="reviews.php?sid=1331275&amp;a=">3</a> recensioni</div> # Categoria: <a href="categories.php?catid=1&amp;parentcatid=1">Anime & Manga</a> > <a href="categories.php?catid=108&amp;parentcatid=108">Naruto</a> | Contesto: Naruto Shippuuden | Leggi le <a href="reviews.php?sid=1331275&amp;a=">3</a> recensioni</div>
cats = noteblock.findAll('a',href=re.compile(r'browse.php\?type=categories')) cats = noteblock.find_all('a',href=re.compile(r'browse.php\?type=categories'))
for cat in cats: for cat in cats:
self.story.addToList('category',cat.string) self.story.addToList('category',cat.string)
@ -262,7 +262,7 @@ class EFPFanFicNet(BaseSiteAdapter):
seriessoup = self.make_soup(self.get_request(series_url)) seriessoup = self.make_soup(self.get_request(series_url))
# can't use ^viewstory...$ in case of higher rated stories with javascript href. # can't use ^viewstory...$ in case of higher rated stories with javascript href.
storyas = seriessoup.findAll('a', href=re.compile(r'viewstory.php\?sid=\d+&i=1')) storyas = seriessoup.find_all('a', href=re.compile(r'viewstory.php\?sid=\d+&i=1'))
i=1 i=1
for a in storyas: for a in storyas:
if a['href'] == ('viewstory.php?sid='+self.story.getMetadata('storyId'))+'&i=1': if a['href'] == ('viewstory.php?sid='+self.story.getMetadata('storyId'))+'&i=1':
@ -288,11 +288,11 @@ class EFPFanFicNet(BaseSiteAdapter):
raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url) raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url)
# remove any header and 'o:p' tags. # remove any header and 'o:p' tags.
for tag in div.findAll("head") + div.findAll("o:p"): for tag in div.find_all("head") + div.find_all("o:p"):
tag.extract() tag.extract()
# change any html and body tags to div. # change any html and body tags to div.
for tag in div.findAll("html") + div.findAll("body"): for tag in div.find_all("html") + div.find_all("body"):
tag.name='div' tag.name='div'
# remove extra bogus doctype. # remove extra bogus doctype.

View file

@ -126,7 +126,7 @@ class ErosnSapphoSycophantHexComAdapter(BaseSiteAdapter):
self.story.setMetadata('rating', rating) self.story.setMetadata('rating', rating)
# Find the chapters: # Find the chapters:
for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+r"&chapter=\d+$")): for chapter in soup.find_all('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+r"&chapter=\d+$")):
# just in case there's tags, like <i> in chapter titles. # just in case there's tags, like <i> in chapter titles.
self.add_chapter(chapter,'http://'+self.host+'/'+chapter['href']+addurl) self.add_chapter(chapter,'http://'+self.host+'/'+chapter['href']+addurl)
@ -144,7 +144,7 @@ class ErosnSapphoSycophantHexComAdapter(BaseSiteAdapter):
# <span class="label">Rated:</span> NC-17<br /> etc # <span class="label">Rated:</span> NC-17<br /> etc
labels = soup.findAll('span',{'class':'label'}) labels = soup.find_all('span',{'class':'label'})
value = labels[0].previousSibling value = labels[0].previousSibling
svalue = "" svalue = ""
@ -164,22 +164,22 @@ class ErosnSapphoSycophantHexComAdapter(BaseSiteAdapter):
self.story.setMetadata('numWords', value.split(' -')[0]) self.story.setMetadata('numWords', value.split(' -')[0])
if 'Categories' in label: if 'Categories' in label:
cats = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=categories')) cats = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=categories'))
for cat in cats: for cat in cats:
self.story.addToList('category',cat.string) self.story.addToList('category',cat.string)
if 'Characters' in label: if 'Characters' in label:
chars = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=characters')) chars = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=characters'))
for char in chars: for char in chars:
self.story.addToList('characters',char.string) self.story.addToList('characters',char.string)
if 'Genre' in label: if 'Genre' in label:
genres = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=1')) genres = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=class&type_id=1'))
for genre in genres: for genre in genres:
self.story.addToList('genre',genre.string) self.story.addToList('genre',genre.string)
if 'Warnings' in label: if 'Warnings' in label:
warnings = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=2')) warnings = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=class&type_id=2'))
for warning in warnings: for warning in warnings:
self.story.addToList('warnings',warning.string) self.story.addToList('warnings',warning.string)
@ -204,7 +204,7 @@ class ErosnSapphoSycophantHexComAdapter(BaseSiteAdapter):
series_url = 'http://'+self.host+'/'+a['href'] series_url = 'http://'+self.host+'/'+a['href']
seriessoup = self.make_soup(self.get_request(series_url)) seriessoup = self.make_soup(self.get_request(series_url))
storyas = seriessoup.findAll('a', href=re.compile(r'viewstory.php\?sid=\d+')) storyas = seriessoup.find_all('a', href=re.compile(r'viewstory.php\?sid=\d+'))
i=1 i=1
for a in storyas: for a in storyas:
# skip 'report this' and 'TOC' links # skip 'report this' and 'TOC' links

View file

@ -1,218 +0,0 @@
# -*- coding: utf-8 -*-
# Copyright 2013 Fanficdownloader team, 2018 FanFicFare team
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Software: eFiction
from __future__ import absolute_import
import logging
logger = logging.getLogger(__name__)
import re
from ..htmlcleanup import stripHTML
from .. import exceptions as exceptions
# py2 vs py3 transition
from ..six import text_type as unicode
from .base_adapter import BaseSiteAdapter, makeDate
def getClass():
return FaerieArchiveComAdapter
# Class name has to be unique. Our convention is camel case the
# sitename with Adapter at the end. www is skipped.
class FaerieArchiveComAdapter(BaseSiteAdapter):
def __init__(self, config, url):
BaseSiteAdapter.__init__(self, config, url)
self.username = "NoneGiven" # if left empty, site doesn't return any message at all.
self.password = ""
self.is_adult=False
# get storyId from url--url validation guarantees query is only sid=1234
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
# normalized story URL.
self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId'))
# Each adapter needs to have a unique site abbreviation.
self.story.setMetadata('siteabbrev','fae')
# The date format will vary from site to site.
# http://docs.python.org/library/datetime.html#strftime-strptime-behavior
self.dateformat = "%B %d, %Y"
@staticmethod # must be @staticmethod, don't remove it.
def getSiteDomain():
# The site domain. Does have www here, if it uses it.
return 'faerie-archive.com'
@classmethod
def getAcceptDomains(cls):
# for backward compatibility
return ['efiction.esteliel.de',cls.getSiteDomain()]
@classmethod
def getConfigSections(cls):
"Only needs to be overriden if has additional ini sections."
# for backward compatibility
return ['efiction.esteliel.de',cls.getSiteDomain()]
@classmethod
def getSiteExampleURLs(cls):
return "http://"+cls.getSiteDomain()+"/viewstory.php?sid=1234"
def getSiteURLPattern(self):
siter = "("+"|".join([re.escape(x) for x in self.getAcceptDomains()])+")"
return re.escape("http://")+siter+re.escape("/viewstory.php?sid=")+r"\d+$"
## Getting the chapter list and the meta data, plus 'is adult' checking.
def extractChapterUrlsAndMetadata(self):
# index=1 makes sure we see the story chapter index. Some
# sites skip that for one-chapter stories.
url = self.url+'&index=1'
logger.debug("URL: "+url)
data = self.get_request(url)
if "Access denied. This story has not been validated by the adminstrators of this site." in data:
raise exceptions.AccessDenied(self.getSiteDomain() +" says: Access denied. This story has not been validated by the adminstrators of this site.")
## Title and author
soup = self.make_soup(data)
# print data
pagetitle = soup.find('div',{'id':'pagetitle'})
## Title
a = pagetitle.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$"))
self.story.setMetadata('title',stripHTML(a))
# Find authorid and URL from... author url.
a = pagetitle.find('a', href=re.compile(r"viewuser.php\?uid=\d+"))
self.story.setMetadata('authorId',a['href'].split('=')[1])
self.story.setMetadata('authorUrl','http://'+self.host+'/'+a['href'])
self.story.setMetadata('author',a.string)
# Find the chapters:
for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+r"&chapter=\d+$")):
# just in case there's tags, like <i> in chapter titles.
self.add_chapter(chapter,'http://'+self.host+'/'+chapter['href'])
# eFiction sites don't help us out a lot with their meta data
# formating, so it's a little ugly.
# utility method
def defaultGetattr(d,k):
try:
return d[k]
except:
return ""
# <span class="label">Rated:</span> NC-17<br /> etc
list = soup.find('div', {'class':'listbox'})
labelspan=list.find('span',{'class':'label'})
value = labelspan.nextSibling
label = labelspan.string
genres = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=1'))
for genre in genres:
self.story.addToList('genre',genre.string)
labels = list.findAll('b')
for labelspan in labels:
value = labelspan.nextSibling
label = labelspan.string
if 'Summary' in label:
## Everything until the next span class='label'
svalue = ""
while 'Rating' not in unicode(value):
svalue += unicode(value)
value = value.nextSibling
self.setDescription(url,svalue)
#self.story.setMetadata('description',stripHTML(svalue))
if 'Rating' in label:
self.story.setMetadata('rating', value)
if 'Words' in label:
self.story.setMetadata('numWords', value)
if 'Category' in label:
cats = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=categories'))
for cat in cats:
self.story.addToList('category',cat.string)
if 'Characters' in label:
chars = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=characters'))
for char in chars:
self.story.addToList('characters',char.string)
if 'Completed' in label:
if 'Yes' in value:
self.story.setMetadata('status', 'Completed')
else:
self.story.setMetadata('status', 'In-Progress')
if 'Published' in label:
self.story.setMetadata('datePublished', makeDate(stripHTML(value), self.dateformat))
if 'Updated' in label:
# there's a stray [ at the end.
#value = value[0:-1]
self.story.setMetadata('dateUpdated', makeDate(stripHTML(value), self.dateformat))
try:
if list.find('a', href=re.compile(r"series.php")) != None:
for series in asoup.findAll('a', href=re.compile(r"series.php\?seriesid=\d+")):
# Find Series name from series URL.
series_url = 'http://'+self.host+'/'+series['href']
seriessoup = self.make_soup(self.get_request(series_url))
storyas = seriessoup.findAll('a', href=re.compile(r'^viewstory.php\?sid=\d+$'))
i=1
for a in storyas:
if a['href'] == ('viewstory.php?sid='+self.story.getMetadata('storyId')):
name=seriessoup.find('div', {'id' : 'pagetitle'})
name.find('a').extract()
self.setSeries(name.text.split(' by[')[0], i)
self.story.setMetadata('seriesUrl',series_url)
i=0
break
i+=1
if i == 0:
break
except:
# I find it hard to care if the series parsing fails
pass
# grab the text for an individual chapter.
def getChapterText(self, url):
logger.debug('Getting chapter text from: %s' % url)
soup = self.make_soup(self.get_request(url))
div = soup.find('div', {'id' : 'story'})
if None == div:
raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url)
return self.utf8FromSoup(url,div)

View file

@ -53,6 +53,9 @@ class FanficAuthorsNetAdapter(BaseSiteAdapter):
#Setting the 'Zone' for each "Site" #Setting the 'Zone' for each "Site"
self.zone = self.parsedUrl.netloc.replace('.fanficauthors.net','') self.zone = self.parsedUrl.netloc.replace('.fanficauthors.net','')
# site change .nsns to -nsns
self.zone = self.zone.replace('.nsns','-nsns')
# normalized story URL. # normalized story URL.
self._setURL('https://{0}.{1}/{2}/'.format( self._setURL('https://{0}.{1}/{2}/'.format(
self.zone, self.getBaseDomain(), self.story.getMetadata('storyId'))) self.zone, self.getBaseDomain(), self.story.getMetadata('storyId')))
@ -79,7 +82,10 @@ class FanficAuthorsNetAdapter(BaseSiteAdapter):
@classmethod @classmethod
def getAcceptDomains(cls): def getAcceptDomains(cls):
# need both .nsns(old) and -nsns(new) because it's a domain
# change, not just URL change.
return ['aaran-st-vines.nsns.fanficauthors.net', return ['aaran-st-vines.nsns.fanficauthors.net',
'aaran-st-vines-nsns.fanficauthors.net',
'abraxan.fanficauthors.net', 'abraxan.fanficauthors.net',
'bobmin.fanficauthors.net', 'bobmin.fanficauthors.net',
'canoncansodoff.fanficauthors.net', 'canoncansodoff.fanficauthors.net',
@ -95,9 +101,12 @@ class FanficAuthorsNetAdapter(BaseSiteAdapter):
'jeconais.fanficauthors.net', 'jeconais.fanficauthors.net',
'kinsfire.fanficauthors.net', 'kinsfire.fanficauthors.net',
'kokopelli.nsns.fanficauthors.net', 'kokopelli.nsns.fanficauthors.net',
'kokopelli-nsns.fanficauthors.net',
'ladya.nsns.fanficauthors.net', 'ladya.nsns.fanficauthors.net',
'ladya-nsns.fanficauthors.net',
'lorddwar.fanficauthors.net', 'lorddwar.fanficauthors.net',
'mrintel.nsns.fanficauthors.net', 'mrintel.nsns.fanficauthors.net',
'mrintel-nsns.fanficauthors.net',
'musings-of-apathy.fanficauthors.net', 'musings-of-apathy.fanficauthors.net',
'ruskbyte.fanficauthors.net', 'ruskbyte.fanficauthors.net',
'seelvor.fanficauthors.net', 'seelvor.fanficauthors.net',
@ -108,7 +117,7 @@ class FanficAuthorsNetAdapter(BaseSiteAdapter):
################################################################################################ ################################################################################################
@classmethod @classmethod
def getSiteExampleURLs(self): def getSiteExampleURLs(self):
return ("https://aaran-st-vines.nsns.fanficauthors.net/A_Story_Name/ " return ("https://aaran-st-vines-nsns.fanficauthors.net/A_Story_Name/ "
+ "https://abraxan.fanficauthors.net/A_Story_Name/ " + "https://abraxan.fanficauthors.net/A_Story_Name/ "
+ "https://bobmin.fanficauthors.net/A_Story_Name/ " + "https://bobmin.fanficauthors.net/A_Story_Name/ "
+ "https://canoncansodoff.fanficauthors.net/A_Story_Name/ " + "https://canoncansodoff.fanficauthors.net/A_Story_Name/ "
@ -123,10 +132,10 @@ class FanficAuthorsNetAdapter(BaseSiteAdapter):
+ "https://jbern.fanficauthors.net/A_Story_Name/ " + "https://jbern.fanficauthors.net/A_Story_Name/ "
+ "https://jeconais.fanficauthors.net/A_Story_Name/ " + "https://jeconais.fanficauthors.net/A_Story_Name/ "
+ "https://kinsfire.fanficauthors.net/A_Story_Name/ " + "https://kinsfire.fanficauthors.net/A_Story_Name/ "
+ "https://kokopelli.nsns.fanficauthors.net/A_Story_Name/ " + "https://kokopelli-nsns.fanficauthors.net/A_Story_Name/ "
+ "https://ladya.nsns.fanficauthors.net/A_Story_Name/ " + "https://ladya-nsns.fanficauthors.net/A_Story_Name/ "
+ "https://lorddwar.fanficauthors.net/A_Story_Name/ " + "https://lorddwar.fanficauthors.net/A_Story_Name/ "
+ "https://mrintel.nsns.fanficauthors.net/A_Story_Name/ " + "https://mrintel-nsns.fanficauthors.net/A_Story_Name/ "
+ "https://musings-of-apathy.fanficauthors.net/A_Story_Name/ " + "https://musings-of-apathy.fanficauthors.net/A_Story_Name/ "
+ "https://ruskbyte.fanficauthors.net/A_Story_Name/ " + "https://ruskbyte.fanficauthors.net/A_Story_Name/ "
+ "https://seelvor.fanficauthors.net/A_Story_Name/ " + "https://seelvor.fanficauthors.net/A_Story_Name/ "
@ -136,8 +145,16 @@ class FanficAuthorsNetAdapter(BaseSiteAdapter):
################################################################################################ ################################################################################################
def getSiteURLPattern(self): def getSiteURLPattern(self):
## .nsns kept here to match both . and -
return r'https?://(aaran-st-vines.nsns|abraxan|bobmin|canoncansodoff|chemprof|copperbadge|crys|deluded-musings|draco664|fp|frenchsession|ishtar|jbern|jeconais|kinsfire|kokopelli.nsns|ladya.nsns|lorddwar|mrintel.nsns|musings-of-apathy|ruskbyte|seelvor|tenhawk|viridian|whydoyouneedtoknow)\.fanficauthors\.net/([a-zA-Z0-9_]+)/' return r'https?://(aaran-st-vines.nsns|abraxan|bobmin|canoncansodoff|chemprof|copperbadge|crys|deluded-musings|draco664|fp|frenchsession|ishtar|jbern|jeconais|kinsfire|kokopelli.nsns|ladya.nsns|lorddwar|mrintel.nsns|musings-of-apathy|ruskbyte|seelvor|tenhawk|viridian|whydoyouneedtoknow)\.fanficauthors\.net/([a-zA-Z0-9_]+)/'
@classmethod
def get_section_url(cls,url):
## only changing .nsns to -nsns and only when part of the
## domain.
url = url.replace('.nsns.fanficauthors.net','-nsns.fanficauthors.net')
return url
################################################################################################ ################################################################################################
def doExtractChapterUrlsAndMetadata(self, get_cover=True): def doExtractChapterUrlsAndMetadata(self, get_cover=True):
@ -163,7 +180,7 @@ class FanficAuthorsNetAdapter(BaseSiteAdapter):
# Find the chapters: # Find the chapters:
# The published and update dates are with the chapter links... # The published and update dates are with the chapter links...
# so we have to get them from there. # so we have to get them from there.
chapters = soup.findAll('a', href=re.compile('/'+self.story.getMetadata( chapters = soup.find_all('a', href=re.compile('/'+self.story.getMetadata(
'storyId')+'/([a-zA-Z0-9_]+)/')) 'storyId')+'/([a-zA-Z0-9_]+)/'))
# Here we are getting the published date. It is the date the first chapter was "updated" # Here we are getting the published date. It is the date the first chapter was "updated"
@ -202,7 +219,7 @@ class FanficAuthorsNetAdapter(BaseSiteAdapter):
## Raising AdultCheckRequired after collecting chapters gives ## Raising AdultCheckRequired after collecting chapters gives
## a double chapter list. So does genre, but it de-dups ## a double chapter list. So does genre, but it de-dups
## automatically. ## automatically.
if( self.story.getMetadata('rating') == 'Mature' if( self.story.getMetadataRaw('rating') in ['Mature','Adult Only']
and not (self.is_adult or self.getConfig("is_adult")) ): and not (self.is_adult or self.getConfig("is_adult")) ):
raise exceptions.AdultCheckRequired(self.url) raise exceptions.AdultCheckRequired(self.url)
@ -226,7 +243,7 @@ class FanficAuthorsNetAdapter(BaseSiteAdapter):
# grab the text for an individual chapter. # grab the text for an individual chapter.
def getChapterText(self, url): def getChapterText(self, url):
logger.debug('Getting chapter text from: %s' % url) logger.debug('Getting chapter text from: %s' % url)
if( self.story.getMetadata('rating') == 'Mature' and if( self.story.getMetadataRaw('rating') in ['Mature','Adult Only'] and
(self.is_adult or self.getConfig("is_adult")) ): (self.is_adult or self.getConfig("is_adult")) ):
addurl = "?bypass=1" addurl = "?bypass=1"
else: else:
@ -241,8 +258,8 @@ class FanficAuthorsNetAdapter(BaseSiteAdapter):
"Error downloading Chapter: '{0}'! Missing required element!".format(url)) "Error downloading Chapter: '{0}'! Missing required element!".format(url))
#Now, there are a lot of extranious tags within the story division.. so we will remove them. #Now, there are a lot of extranious tags within the story division.. so we will remove them.
for tag in story.findAll('ul',{'class':'pager'}) + story.findAll( for tag in story.find_all('ul',{'class':'pager'}) + story.find_all(
'div',{'class':'alert'}) + story.findAll('div', {'class':'btn-group'}): 'div',{'class':'alert'}) + story.find_all('div', {'class':'btn-group'}):
tag.extract() tag.extract()
return self.utf8FromSoup(url,story) return self.utf8FromSoup(url,story)

View file

@ -1,187 +0,0 @@
# -*- coding: utf-8 -*-
# Copyright 2014 Fanficdownloader team, 2018 FanFicFare team
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
from __future__ import absolute_import
import re
# py2 vs py3 transition
from ..six import ensure_text
from ..six.moves.urllib import parse as urlparse
from .base_adapter import BaseSiteAdapter, makeDate
from .. import exceptions
_SOURCE_CODE_ENCODING = 'utf-8'
def getClass():
return FanficHuAdapter
def _get_query_data(url):
components = urlparse.urlparse(url)
query_data = urlparse.parse_qs(components.query)
return dict((key, data[0]) for key, data in query_data.items())
class FanficHuAdapter(BaseSiteAdapter):
SITE_ABBREVIATION = 'ffh'
SITE_DOMAIN = 'fanfic.hu'
SITE_LANGUAGE = 'Hungarian'
BASE_URL = 'https://' + SITE_DOMAIN + '/merengo/'
VIEW_STORY_URL_TEMPLATE = BASE_URL + 'viewstory.php?sid=%s'
DATE_FORMAT = '%m/%d/%Y'
def __init__(self, config, url):
BaseSiteAdapter.__init__(self, config, url)
query_data = urlparse.parse_qs(self.parsedUrl.query)
story_id = query_data['sid'][0]
self.story.setMetadata('storyId', story_id)
self._setURL(self.VIEW_STORY_URL_TEMPLATE % story_id)
self.story.setMetadata('siteabbrev', self.SITE_ABBREVIATION)
self.story.setMetadata('language', self.SITE_LANGUAGE)
@staticmethod
def getSiteDomain():
return FanficHuAdapter.SITE_DOMAIN
@classmethod
def getSiteExampleURLs(cls):
return cls.VIEW_STORY_URL_TEMPLATE % 1234
def getSiteURLPattern(self):
return re.escape(self.VIEW_STORY_URL_TEMPLATE[:-2]).replace('https','https?') + r'\d+$'
def extractChapterUrlsAndMetadata(self):
soup = self.make_soup(self.get_request(self.url + '&i=1'))
if ensure_text(soup.title.string).strip(u' :') == u'írta':
raise exceptions.StoryDoesNotExist(self.url)
chapter_options = soup.find('form', action='viewstory.php').select('option')
# Remove redundant "Fejezetek" option
chapter_options.pop(0)
# If there is still more than one entry remove chapter overview entry
if len(chapter_options) > 1:
chapter_options.pop(0)
for option in chapter_options:
url = urlparse.urljoin(self.url, option['value'])
self.add_chapter(option.string, url)
author_url = urlparse.urljoin(self.BASE_URL, soup.find('a', href=lambda href: href and href.startswith('viewuser.php?uid='))['href'])
soup = self.make_soup(self.get_request(author_url))
story_id = self.story.getMetadata('storyId')
for table in soup('table', {'class': 'mainnav'}):
title_anchor = table.find('span', {'class': 'storytitle'}).a
href = title_anchor['href']
if href.startswith('javascript:'):
href = href.rsplit(' ', 1)[1].strip("'")
query_data = _get_query_data(href)
if query_data['sid'] == story_id:
break
else:
# This should never happen, the story must be found on the author's
# page.
raise exceptions.FailedToDownload(self.url)
self.story.setMetadata('title', title_anchor.string)
rows = table('tr')
anchors = rows[0].div('a')
author_anchor = anchors[1]
query_data = _get_query_data(author_anchor['href'])
self.story.setMetadata('author', author_anchor.string)
self.story.setMetadata('authorId', query_data['uid'])
self.story.setMetadata('authorUrl', urlparse.urljoin(self.BASE_URL, author_anchor['href']))
self.story.setMetadata('reviews', anchors[3].string)
if self.getConfig('keep_summary_html'):
self.story.setMetadata('description', self.utf8FromSoup(author_url, rows[1].td))
else:
self.story.setMetadata('description', ''.join(rows[1].td(text=True)))
for row in rows[3:]:
index = 0
cells = row('td')
while index < len(cells):
cell = cells[index]
key = ensure_text(cell.b.string).strip(u':')
try:
value = ensure_text(cells[index+1].string)
except:
value = None
if key == u'Kategória':
for anchor in cells[index+1]('a'):
self.story.addToList('category', anchor.string)
elif key == u'Szereplõk':
if cells[index+1].string:
for name in cells[index+1].string.split(', '):
self.story.addToList('character', name)
elif key == u'Korhatár':
if value != 'nem korhatáros':
self.story.setMetadata('rating', value)
elif key == u'Figyelmeztetések':
for b_tag in cells[index+1]('b'):
self.story.addToList('warnings', b_tag.string)
elif key == u'Jellemzõk':
for genre in cells[index+1].string.split(', '):
self.story.addToList('genre', genre)
elif key == u'Fejezetek':
self.story.setMetadata('numChapters', int(value))
elif key == u'Megjelenés':
self.story.setMetadata('datePublished', makeDate(value, self.DATE_FORMAT))
elif key == u'Frissítés':
self.story.setMetadata('dateUpdated', makeDate(value, self.DATE_FORMAT))
elif key == u'Szavak':
self.story.setMetadata('numWords', value)
elif key == u'Befejezett':
self.story.setMetadata('status', 'Completed' if value == 'Nem' else 'In-Progress')
index += 2
if self.story.getMetadata('rating') == '18':
if not (self.is_adult or self.getConfig('is_adult')):
raise exceptions.AdultCheckRequired(self.url)
def getChapterText(self, url):
soup = self.make_soup(self.get_request(url))
story_cell = soup.find('form', action='viewstory.php').parent.parent
for div in story_cell('div'):
div.extract()
return self.utf8FromSoup(url, story_cell)

View file

@ -134,7 +134,7 @@ class FanFicsMeAdapter(BaseSiteAdapter):
## restrict meta searches to header. ## restrict meta searches to header.
fichead = soup.find('div',class_='FicHead') fichead = soup.find('div',class_='FicHead')
def get_meta_content(title): def get_meta_content(title):
val_label = fichead.find('div',string=title+u':') val_label = fichead.find('div',string=re.compile(u'^'+title+u':'))
if val_label: if val_label:
return val_label.find_next('div') return val_label.find_next('div')
@ -150,7 +150,7 @@ class FanFicsMeAdapter(BaseSiteAdapter):
self.story.setMetadata('rating',stripHTML(get_meta_content(u'Рейтинг'))) self.story.setMetadata('rating',stripHTML(get_meta_content(u'Рейтинг')))
## Need to login for any rating higher than General. ## Need to login for any rating higher than General.
if self.story.getMetadata('rating') != 'General' and self.needToLoginCheck(data): if self.story.getMetadataRaw('rating') != 'General' and self.needToLoginCheck(data):
self.performLogin(url) self.performLogin(url)
# reload after login. # reload after login.
data = self.get_request(url,usecache=False) data = self.get_request(url,usecache=False)
@ -168,7 +168,7 @@ class FanFicsMeAdapter(BaseSiteAdapter):
self.story.setMetadata('title',stripHTML(h)) self.story.setMetadata('title',stripHTML(h))
## author(s): ## author(s):
content = get_meta_content(u'Автор') content = get_meta_content(u'Авторы?')
if content: if content:
alist = content.find_all('a', class_='user') alist = content.find_all('a', class_='user')
for a in alist: for a in alist:
@ -181,12 +181,8 @@ class FanFicsMeAdapter(BaseSiteAdapter):
self.story.setMetadata('authorUrl','https://'+self.host) self.story.setMetadata('authorUrl','https://'+self.host)
self.story.setMetadata('authorId','0') self.story.setMetadata('authorId','0')
# translator(s) # translator(s) in different strings
content = get_meta_content(u'Переводчик') content = get_meta_content(u'Переводчикк?и?')
if not content:
# Переводчик vs Переводчи is 'Translator' vs 'TranslatorS'
content = get_meta_content(u'Переводчи')
logger.debug(content)
if content: if content:
for a in content.find_all('a', class_='user'): for a in content.find_all('a', class_='user'):
self.story.addToList('translatorsId',a['href'].split('/user')[-1]) self.story.addToList('translatorsId',a['href'].split('/user')[-1])
@ -241,7 +237,7 @@ class FanFicsMeAdapter(BaseSiteAdapter):
## size block, only saving word count. ## size block, only saving word count.
content = get_meta_content(u'Размер') content = get_meta_content(u'Размер')
words = stripHTML(content.find_all('li')[1]) words = stripHTML(content.find('a'))
words = re.sub(r'[^0-9]','',words) # only keep numbers words = re.sub(r'[^0-9]','',words) # only keep numbers
self.story.setMetadata('numWords',words) self.story.setMetadata('numWords',words)
@ -301,6 +297,10 @@ class FanFicsMeAdapter(BaseSiteAdapter):
# grab the text for an individual chapter. # grab the text for an individual chapter.
def getChapterTextNum(self, url, index): def getChapterTextNum(self, url, index):
logger.debug('Getting chapter text for: %s index: %s' % (url,index)) logger.debug('Getting chapter text for: %s index: %s' % (url,index))
m = re.match(r'.*&chapter=(\d+).*',url)
if m:
index=m.group(1)
logger.debug("Using index(%s) from &chapter="%index)
chapter_div = None chapter_div = None
if self.use_full_work_soup and self.getConfig("use_view_full_work",True) and self.num_chapters() > 1: if self.use_full_work_soup and self.getConfig("use_view_full_work",True) and self.num_chapters() > 1:

View file

@ -44,9 +44,8 @@ class FanfictalkComAdapter(BaseSiteAdapter):
# get storyId from url--url validation guarantees query is only sid=1234 # get storyId from url--url validation guarantees query is only sid=1234
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1]) self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
# normalized story URL. # normalized story URL.
self._setURL('https://' + self.getSiteDomain() + '/archive/viewstory.php?sid='+self.story.getMetadata('storyId')) self._setURL('https://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId'))
# Each adapter needs to have a unique site abbreviation. # Each adapter needs to have a unique site abbreviation.
self.story.setMetadata('siteabbrev','ahpfftc') self.story.setMetadata('siteabbrev','ahpfftc')
@ -57,24 +56,24 @@ class FanfictalkComAdapter(BaseSiteAdapter):
@classmethod @classmethod
def getAcceptDomains(cls): def getAcceptDomains(cls):
return [cls.getSiteDomain(),'archive.hpfanfictalk.com'] return [cls.getSiteDomain(),'archive.hpfanfictalk.com','fanfictalk.com']
@classmethod @classmethod
def getConfigSections(cls): def getConfigSections(cls):
"Only needs to be overriden if has additional ini sections." "Only needs to be overriden if has additional ini sections."
return [cls.getConfigSection(),'archive.hpfanfictalk.com'] return [cls.getConfigSection(),'archive.hpfanfictalk.com','fanfictalk.com']
@staticmethod # must be @staticmethod, don't remove it. @staticmethod # must be @stgetAcceptDomainsaticmethod, don't remove it.
def getSiteDomain(): def getSiteDomain():
# The site domain. Does have www here, if it uses it. # The site domain. Does have www here, if it uses it.
return 'fanfictalk.com' return 'archive.fanfictalk.com'
@classmethod @classmethod
def getSiteExampleURLs(cls): def getSiteExampleURLs(cls):
return "https://"+cls.getSiteDomain()+"/archive/viewstory.php?sid=1234" return "https://"+cls.getSiteDomain()+"/viewstory.php?sid=1234"
def getSiteURLPattern(self): def getSiteURLPattern(self):
return r"https?://(archive\.hp)?"+re.escape(self.getSiteDomain())+r"(/archive)?/viewstory\.php\?sid=\d+$" return r"https?://("+r"|".join([x.replace('.',r'\.') for x in self.getAcceptDomains()])+r")(/archive)?/viewstory\.php\?sid=\d+$"
## Getting the chapter list and the meta data, plus 'is adult' checking. ## Getting the chapter list and the meta data, plus 'is adult' checking.
def extractChapterUrlsAndMetadata(self): def extractChapterUrlsAndMetadata(self):
@ -118,7 +117,7 @@ class FanfictalkComAdapter(BaseSiteAdapter):
# Find the chapters: # Find the chapters:
for chapter in soup.find_all('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+r"&chapter=\d+$")): for chapter in soup.find_all('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+r"&chapter=\d+$")):
# just in case there's tags, like <i> in chapter titles. # just in case there's tags, like <i> in chapter titles.
self.add_chapter(chapter,'https://'+self.host+'/archive/'+chapter['href']) self.add_chapter(chapter,'https://'+self.host+'/'+chapter['href'])
# categories # categories
for a in soup.select("div#sort a"): for a in soup.select("div#sort a"):
@ -171,14 +170,14 @@ class FanfictalkComAdapter(BaseSiteAdapter):
# Site allows stories to be in several series at once. FFF # Site allows stories to be in several series at once. FFF
# isn't thrilled with that, we have series00, series01, etc. # isn't thrilled with that, we have series00, series01, etc.
# Example: # Example:
# https://fanfictalk.com/archive/viewstory.php?sid=483 # https://archive.fanfictalk.com/viewstory.php?sid=483
if self.getConfig("collect_series"): if self.getConfig("collect_series"):
seriesspan = soup.find('span',label='Series') seriesspan = soup.find('span',label='Series')
for i, seriesa in enumerate(seriesspan.find_all('a', href=re.compile(r"viewseries\.php\?seriesid=\d+"))): for i, seriesa in enumerate(seriesspan.find_all('a', href=re.compile(r"viewseries\.php\?seriesid=\d+"))):
# logger.debug(seriesa) # logger.debug(seriesa)
series_name = stripHTML(seriesa) series_name = stripHTML(seriesa)
series_url = 'https://'+self.host+'/archive/'+seriesa['href'] series_url = 'https://'+self.host+'/'+seriesa['href']
seriessoup = self.make_soup(self.get_request(series_url)) seriessoup = self.make_soup(self.get_request(series_url))
storyas = seriessoup.find_all('a', href=re.compile(r'viewstory.php\?sid=\d+')) storyas = seriessoup.find_all('a', href=re.compile(r'viewstory.php\?sid=\d+'))
@ -205,9 +204,17 @@ class FanfictalkComAdapter(BaseSiteAdapter):
# grab the text for an individual chapter. # grab the text for an individual chapter.
def getChapterText(self, url): def getChapterText(self, url):
logger.debug('Getting chapter text from: %s' % url) if self.is_adult or self.getConfig("is_adult"):
# Weirdly, different sites use different warning numbers.
# If the title search below fails, there's a good chance
# you need a different number. print data at that point
# and see what the 'click here to continue' url says.
addurl = "&ageconsent=ok&warning=3"
else:
addurl=""
soup = self.make_soup(self.get_request(url)) logger.debug('Getting chapter text from: %s' % (url+addurl))
soup = self.make_soup(self.get_request(url+addurl))
div = soup.find('div', {'id' : 'story'}) div = soup.find('div', {'id' : 'story'})

View file

@ -1,274 +0,0 @@
# -*- coding: utf-8 -*-
# Copyright 2011 Fanficdownloader team, 2018 FanFicFare team
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Software: eFiction
from __future__ import absolute_import
import logging
logger = logging.getLogger(__name__)
import re
from ..htmlcleanup import stripHTML
from .. import exceptions as exceptions
# py2 vs py3 transition
from .base_adapter import BaseSiteAdapter, makeDate
# By virtue of being recent and requiring both is_adult and user/pass,
# adapter_fanficcastletvnet.py is the best choice for learning to
# write adapters--especially for sites that use the eFiction system.
# Most sites that have ".../viewstory.php?sid=123" in the story URL
# are eFiction.
# For non-eFiction sites, it can be considerably more complex, but
# this is still a good starting point.
# In general an 'adapter' needs to do these five things:
# - 'Register' correctly with the downloader
# - Site Login (if needed)
# - 'Are you adult?' check (if needed--some do one, some the other, some both)
# - Grab the chapter list
# - Grab the story meta-data (some (non-eFiction) adapters have to get it from the author page)
# - Grab the chapter texts
# Search for XXX comments--that's where things are most likely to need changing.
# This function is called by the downloader in all adapter_*.py files
# in this dir to register the adapter class. So it needs to be
# updated to reflect the class below it. That, plus getSiteDomain()
# take care of 'Registering'.
def getClass():
return FanfictionJunkiesDeAdapter # XXX
# Class name has to be unique. Our convention is camel case the
# sitename with Adapter at the end. www is skipped.
class FanfictionJunkiesDeAdapter(BaseSiteAdapter): # XXX
def __init__(self, config, url):
BaseSiteAdapter.__init__(self, config, url)
self.username = "NoneGiven" # if left empty, site doesn't return any message at all.
self.password = ""
self.is_adult=False
# get storyId from url--url validation guarantees query is only sid=1234
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
# normalized story URL.
# XXX Most sites don't have the /fanfic part. Replace all to remove it usually.
self._setURL('http://' + self.getSiteDomain() + '/efiction/viewstory.php?sid='+self.story.getMetadata('storyId'))
# Each adapter needs to have a unique site abbreviation.
self.story.setMetadata('siteabbrev','ffjde') # XXX
# The date format will vary from site to site.
# http://docs.python.org/library/datetime.html#strftime-strptime-behavior
self.dateformat = "%d/%m/%y" # XXX
@staticmethod # must be @staticmethod, don't remove it.
def getSiteDomain():
# The site domain. Does have www here, if it uses it.
return 'fanfiction-junkies.de' # XXX
@classmethod
def getSiteExampleURLs(cls):
return "http://"+cls.getSiteDomain()+"/efiction/viewstory.php?sid=1234"
def getSiteURLPattern(self):
return re.escape("http://"+self.getSiteDomain()+"/efiction/viewstory.php?sid=")+r"\d+$"
## Login seems to be reasonably standard across eFiction sites.
def needToLoginCheck(self, data):
if 'Registered Users Only' in data \
or 'There is no such account on our website' in data \
or "That password doesn't match the one in our database" in data:
return True
else:
return False
def performLogin(self, url):
params = {}
if self.password:
params['penname'] = self.username
params['password'] = self.password
else:
params['penname'] = self.getConfig("username")
params['password'] = self.getConfig("password")
params['cookiecheck'] = '1'
params['submit'] = 'Submit'
loginUrl = 'http://' + self.getSiteDomain() + '/efiction/user.php?action=login'
logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
params['penname']))
d = self.post_request(loginUrl, params)
if "Member Account" not in d : #Member Account
logger.info("Failed to login to URL %s as %s" % (loginUrl,
params['penname']))
raise exceptions.FailedToLogin(url,params['penname'])
return False
else:
return True
## Getting the chapter list and the meta data, plus 'is adult' checking.
def extractChapterUrlsAndMetadata(self):
if self.is_adult or self.getConfig("is_adult"):
# Weirdly, different sites use different warning numbers.
# If the title search below fails, there's a good chance
# you need a different number. print data at that point
# and see what the 'click here to continue' url says.
addurl = "&ageconsent=ok&warning=1" # XXX
else:
addurl=""
# index=1 makes sure we see the story chapter index. Some
# sites skip that for one-chapter stories.
url = self.url+'&index=1'+addurl
logger.debug("URL: "+url)
data = self.get_request(url)
if self.needToLoginCheck(data):
# need to log in for this one.
self.performLogin(url)
data = self.get_request(url)
# The actual text that is used to announce you need to be an
# adult varies from site to site. Again, print data before
# the title search to troubleshoot.
if "For adults only " in data: # XXX
raise exceptions.AdultCheckRequired(self.url)
if "Access denied. This story has not been validated by the adminstrators of this site." in data:
raise exceptions.AccessDenied(self.getSiteDomain() +" says: Access denied. This story has not been validated by the adminstrators of this site.")
soup = self.make_soup(data)
# print data
pagetitle = soup.find('h4')
## Title
a = pagetitle.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$"))
self.story.setMetadata('title',a.string)
# Find authorid and URL from... author url.
a = pagetitle.find('a', href=re.compile(r"viewuser.php\?uid=\d+"))
self.story.setMetadata('authorId',a['href'].split('=')[1])
self.story.setMetadata('authorUrl','http://'+self.host+'/efiction/'+a['href'])
self.story.setMetadata('author',a.string)
# Reviews
reviewdata = soup.find('div', {'id' : 'sort'})
a = reviewdata.findAll('a', href=re.compile(r'reviews.php\?type=ST&(amp;)?item='+self.story.getMetadata('storyId')+"$"))[1] # second one.
self.story.setMetadata('reviews',stripHTML(a))
# Find the chapters:
for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+r"&chapter=\d+$")):
# just in case there's tags, like <i> in chapter titles.
self.add_chapter(chapter,'http://'+self.host+'/efiction/'+chapter['href']+addurl)
# eFiction sites don't help us out a lot with their meta data
# formating, so it's a little ugly.
# utility method
def defaultGetattr(d,k):
try:
return d[k]
except:
return ""
# <span class="label">Rated:</span> NC-17<br /> etc
list = soup.find('div', {'class':'listbox'})
labels = list.findAll('b')
for labelspan in labels:
value = labelspan.nextSibling
label = labelspan.string
if 'Zusammenfassung' in label:
self.setDescription(url,value)
if 'Eingestuft' in label:
self.story.setMetadata('rating', value)
if u'Wörter' in label:
self.story.setMetadata('numWords', value)
if 'Kategorie' in label:
cats = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=categories'))
for cat in cats:
self.story.addToList('category',cat.string)
if 'Charaktere' in label:
chars = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=characters'))
for char in chars:
self.story.addToList('characters',char.string)
if 'Abgeschlossen' in label:
if 'Yes' in value:
self.story.setMetadata('status', 'Completed')
else:
self.story.setMetadata('status', 'In-Progress')
if u'Veröffentlicht' in label:
self.story.setMetadata('datePublished', makeDate(stripHTML(value), self.dateformat))
if 'Aktualisiert' in label:
# there's a stray [ at the end.
#value = value[0:-1]
self.story.setMetadata('dateUpdated', makeDate(stripHTML(value), self.dateformat))
try:
# Find Series name from series URL.
a = soup.find('a', href=re.compile(r"viewseries.php\?seriesid=\d+"))
series_name = a.string
series_url = 'http://'+self.host+'/efiction/'+a['href']
seriessoup = self.make_soup(self.get_request(series_url))
storyas = seriessoup.findAll('a', href=re.compile(r'^viewstory.php\?sid=\d+$'))
i=1
for a in storyas:
if a['href'] == ('viewstory.php?sid='+self.story.getMetadata('storyId')):
self.setSeries(series_name, i)
self.story.setMetadata('seriesUrl',series_url)
break
i+=1
except:
# I find it hard to care if the series parsing fails
pass
# grab the text for an individual chapter.
def getChapterText(self, url):
logger.debug('Getting chapter text from: %s' % url)
soup = self.make_soup(self.get_request(url))
div = soup.find('div', {'id' : 'story'})
if None == div:
raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url)
return self.utf8FromSoup(url,div)

View file

@ -35,7 +35,7 @@ ffnetgenres=["Adventure", "Angst", "Crime", "Drama", "Family", "Fantasy",
"Mystery", "Parody", "Poetry", "Romance", "Sci-Fi", "Spiritual", "Mystery", "Parody", "Poetry", "Romance", "Sci-Fi", "Spiritual",
"Supernatural", "Suspense", "Tragedy", "Western"] "Supernatural", "Suspense", "Tragedy", "Western"]
ffnetpluscategories=["+Anima", "Rosario + Vampire", "Blood+", ffnetpluscategories=["+Anima", "Alex + Ada", "Rosario + Vampire", "Blood+",
"+C: Sword and Cornett", "Norn9 - ノルン+ノネット", "+C: Sword and Cornett", "Norn9 - ノルン+ノネット",
"Haré+Guu/ジャングルはいつもハレのちグゥ", "Lost+Brain", "Haré+Guu/ジャングルはいつもハレのちグゥ", "Lost+Brain",
"Wicked + The Divine", "Alex + Ada", "RE: Alistair++", "Wicked + The Divine", "Alex + Ada", "RE: Alistair++",
@ -93,15 +93,47 @@ class FanFictionNetSiteAdapter(BaseSiteAdapter):
# logger.debug("post-url:%s"%url) # logger.debug("post-url:%s"%url)
return url return url
@classmethod
def get_url_search(cls,url):
regexp = super(getClass(), cls).get_url_search(url)
regexp = re.sub(r"^(?P<keep>.*net/s/\d+/\d+/)(?P<urltitle>[^\$]*)?",
r"\g<keep>(.*)",regexp)
logger.debug(regexp)
return regexp
def getSiteURLPattern(self): def getSiteURLPattern(self):
return self._get_site_url_pattern() return self._get_site_url_pattern()
## not actually putting urltitle on multi-chapters below, but ## normalized chapter URLs DO contain the story title now, but
## one-shots will have it, so this is still useful. normalized ## normalized to current urltitle in case of title changes.
## chapter URLs do NOT contain the story title.
def normalize_chapterurl(self,url): def normalize_chapterurl(self,url):
return re.sub(r"https?://(www|m)\.(?P<keep>fanfiction\.net/s/\d+/\d+/).*", return re.sub(r"https?://(www|m)\.(?P<keep>fanfiction\.net/s/\d+/\d+/).*",
r"https://www.\g<keep>",url) r"https://www.\g<keep>",url)+self.urltitle
def get_request(self,url,usecache=True):
## use super version if not set or isn't a chapter URL with a
## title.
if( not self.getConfig("try_shortened_title_urls") or
not re.match(r"https?://www\.fanfiction\.net/s/\d+/\d+/(?P<title>[^/]+)$", url) ):
return super(getClass(), self).get_request(url,usecache)
## kludgey way to attempt more than one URL variant by
## removing title one letter at a time. Note that network and
## open_pages_in_browser retries still happen first.
titlelen = len(url.split('/')[-1])
maxcut = min([4,titlelen])
j = 0
while j < maxcut: # should actually leave loop either by
# return or exception raise.
try:
useurl = url
if j: # j==0, full URL, then remove letters.
useurl = url[:-j]
return super(getClass(), self).get_request(useurl,usecache)
except exceptions.HTTPErrorFFF as fffe:
if j >= maxcut or 'Page not found or expired' not in unicode(fffe):
raise
j = j+1
def doExtractChapterUrlsAndMetadata(self,get_cover=True): def doExtractChapterUrlsAndMetadata(self,get_cover=True):
@ -130,18 +162,18 @@ class FanFictionNetSiteAdapter(BaseSiteAdapter):
canonicalurl = soup.select_one('link[rel=canonical]')['href'] canonicalurl = soup.select_one('link[rel=canonical]')['href']
self.set_story_idurl(canonicalurl) self.set_story_idurl(canonicalurl)
## ffnet used to have a tendency to send out update notices in
## email before all their servers were showing the update on
## the first chapter. It generates another server request and
## doesn't seem to be needed lately, so now default it to off.
try:
chapcount = len(soup.find('select', { 'name' : 'chapter' } ).find_all('option'))
# get chapter part of url.
except:
chapcount = 1
have_later_meta = False
if self.getConfig('check_next_chapter'): if self.getConfig('check_next_chapter'):
try: try:
## ffnet used to have a tendency to send out update
## notices in email before all their servers were
## showing the update on the first chapter. It
## generates another server request and doesn't seem
## to be needed lately, so now default it to off.
try:
chapcount = len(soup.find('select', { 'name' : 'chapter' } ).findAll('option'))
# get chapter part of url.
except:
chapcount = 1
tryurl = "https://%s/s/%s/%d/%s"%(self.getSiteDomain(), tryurl = "https://%s/s/%s/%d/%s"%(self.getSiteDomain(),
self.story.getMetadata('storyId'), self.story.getMetadata('storyId'),
chapcount+1, chapcount+1,
@ -152,9 +184,20 @@ class FanFictionNetSiteAdapter(BaseSiteAdapter):
and "This request takes too long to process, it is timed out by the server." not in newdata: and "This request takes too long to process, it is timed out by the server." not in newdata:
logger.debug('=======Found newer chapter: %s' % tryurl) logger.debug('=======Found newer chapter: %s' % tryurl)
soup = self.make_soup(newdata) soup = self.make_soup(newdata)
have_later_meta = True
except Exception as e: except Exception as e:
logger.warning("Caught exception in check_next_chapter URL: %s Exception %s."%(unicode(tryurl),unicode(e))) logger.warning("Caught exception in check_next_chapter URL: %s Exception %s."%(unicode(tryurl),unicode(e)))
if self.getConfig('meta_from_last_chapter') and not have_later_meta and chapcount > 1:
tryurl = "https://%s/s/%s/%d/%s"%(self.getSiteDomain(),
self.story.getMetadata('storyId'),
chapcount,
self.urltitle)
logger.debug('=Trying last chapter for meta_from_last_chapter: %s' % tryurl)
newdata = self.get_request(tryurl)
soup = self.make_soup(newdata)
have_later_meta = True
# Find authorid and URL from... author url. # Find authorid and URL from... author url.
a = soup.find('a', href=re.compile(r"^/u/\d+")) a = soup.find('a', href=re.compile(r"^/u/\d+"))
self.story.setMetadata('authorId',a['href'].split('/')[2]) self.story.setMetadata('authorId',a['href'].split('/')[2])
@ -169,7 +212,7 @@ class FanFictionNetSiteAdapter(BaseSiteAdapter):
## For 1, use the second link. ## For 1, use the second link.
## For 2, fetch the crossover page and pull the two categories from there. ## For 2, fetch the crossover page and pull the two categories from there.
pre_links = soup.find('div',{'id':'pre_story_links'}) pre_links = soup.find('div',{'id':'pre_story_links'})
categories = pre_links.findAll('a',{'class':'xcontrast_txt'}) categories = pre_links.find_all('a',{'class':'xcontrast_txt'})
#print("xcontrast_txt a:%s"%categories) #print("xcontrast_txt a:%s"%categories)
if len(categories) > 1: if len(categories) > 1:
# Strangely, the ones with *two* links are the # Strangely, the ones with *two* links are the
@ -208,7 +251,7 @@ class FanFictionNetSiteAdapter(BaseSiteAdapter):
grayspan = gui_table1i.find('span', {'class':'xgray xcontrast_txt'}) grayspan = gui_table1i.find('span', {'class':'xgray xcontrast_txt'})
# for b in grayspan.findAll('button'): # for b in grayspan.find_all('button'):
# b.extract() # b.extract()
metatext = stripHTML(grayspan).replace('Hurt/Comfort','Hurt-Comfort') metatext = stripHTML(grayspan).replace('Hurt/Comfort','Hurt-Comfort')
#logger.debug("metatext:(%s)"%metatext) #logger.debug("metatext:(%s)"%metatext)
@ -247,7 +290,7 @@ class FanFictionNetSiteAdapter(BaseSiteAdapter):
# Updated: <span data-xutime='1368059198'>5/8</span> - Published: <span data-xutime='1278984264'>7/12/2010</span> # Updated: <span data-xutime='1368059198'>5/8</span> - Published: <span data-xutime='1278984264'>7/12/2010</span>
# Published: <span data-xutime='1384358726'>8m ago</span> # Published: <span data-xutime='1384358726'>8m ago</span>
dates = soup.findAll('span',{'data-xutime':re.compile(r'^\d+$')}) dates = soup.find_all('span',{'data-xutime':re.compile(r'^\d+$')})
if len(dates) > 1 : if len(dates) > 1 :
# updated get set to the same as published upstream if not found. # updated get set to the same as published upstream if not found.
self.story.setMetadata('dateUpdated',datetime.fromtimestamp(float(dates[0]['data-xutime']))) self.story.setMetadata('dateUpdated',datetime.fromtimestamp(float(dates[0]['data-xutime'])))
@ -298,15 +341,14 @@ class FanFictionNetSiteAdapter(BaseSiteAdapter):
img = soup.select_one('img.lazy.cimage') img = soup.select_one('img.lazy.cimage')
cover_url=img['data-original'] cover_url=img['data-original']
except: except:
img = soup.select_one('img.cimage:not(.lazy)') ## Nov 2023 - src is always "/static/images/d_60_90.jpg" now
if img: ## Only take cover if there's data-original
cover_url=img['src'] ## Primary motivator is to prevent unneeded author page hits.
## Nov 19, 2020, ffnet lazy cover images returning 0 byte pass
## files. logger.debug("cover_url:%s"%cover_url)
# logger.debug("cover_url:%s"%cover_url)
authimg_url = "" authimg_url = ""
if cover_url and self.getConfig('skip_author_cover'): if cover_url and self.getConfig('skip_author_cover') and self.getConfig('include_images'):
try: try:
authsoup = self.make_soup(self.get_request(self.story.getMetadata('authorUrl'))) authsoup = self.make_soup(self.get_request(self.story.getMetadata('authorUrl')))
try: try:
@ -353,31 +395,37 @@ class FanFictionNetSiteAdapter(BaseSiteAdapter):
# no selector found, so it's a one-chapter story. # no selector found, so it's a one-chapter story.
self.add_chapter(self.story.getMetadata('title'),url) self.add_chapter(self.story.getMetadata('title'),url)
else: else:
allOptions = select.findAll('option') allOptions = select.find_all('option')
for o in allOptions: for o in allOptions:
url = u'https://%s/s/%s/%s/' % ( self.getSiteDomain(), ## title URL will be put back on chapter URL during
self.story.getMetadata('storyId'), ## normalize_chapterurl() anyway, but also here for
o['value']) ## clarity
url = u'https://%s/s/%s/%s/%s' % ( self.getSiteDomain(),
self.story.getMetadata('storyId'),
o['value'],
self.urltitle)
# just in case there's tags, like <i> in chapter titles. # just in case there's tags, like <i> in chapter titles.
title = u"%s" % o title = u"%s" % o
title = re.sub(r'<[^>]+>','',title) title = re.sub(r'<[^>]+>','',title)
self.add_chapter(title,url) self.add_chapter(title,url)
return return
def getChapterText(self, url): def getChapterText(self, url):
logger.debug('Getting chapter text from: %s' % url) logger.debug('Getting chapter text from: %s' % (url))
## AND explicitly put title URL back on chapter URL for fetch ## title URL was put back on chapter URL during
## *only*--normalized chapter URL does NOT have urltitle ## normalize_chapterurl()
data = self.get_request(url+self.urltitle) data = self.get_request(url)
if "Please email this error message in full to <a href='mailto:support@fanfiction.com'>support@fanfiction.com</a>" in data: if "Please email this error message in full to <a href='mailto:" in data:
raise exceptions.FailedToDownload("Error downloading Chapter: %s! FanFiction.net Site Error!" % url) raise exceptions.FailedToDownload("Error downloading Chapter: %s! FanFiction.net Site Error!" % url)
soup = self.make_soup(data) soup = self.make_soup(data)
## remove inline ads -- only seen with flaresolverr
for adtag in soup.select("div.google-auto-placed"):
adtag.decompose()
div = soup.find('div', {'id' : 'storytextp'}) div = soup.find('div', {'id' : 'storytextp'})
if None == div: if None == div:

View file

@ -0,0 +1,157 @@
# -*- coding: utf-8 -*-
# Copyright 2024 FanFicFare team
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
from __future__ import absolute_import
import io
import logging
import re
import zipfile
from bs4 import BeautifulSoup
# py2 vs py3 transition
from .base_adapter import BaseSiteAdapter, makeDate
from fanficfare.htmlcleanup import stripHTML
from .. import exceptions as exceptions
logger = logging.getLogger(__name__)
def getClass():
return FanfictionsFrSiteAdapter
class FanfictionsFrSiteAdapter(BaseSiteAdapter):
def __init__(self, config, url):
BaseSiteAdapter.__init__(self, config, url)
self.story.setMetadata('siteabbrev', 'fanfictionsfr')
self.story.setMetadata('langcode','fr')
self.story.setMetadata('language','Français')
# get storyId from url--url validation guarantees query correct
match = re.match(self.getSiteURLPattern(), url)
if not match:
raise exceptions.InvalidStoryURL(url, self.getSiteDomain(), self.getSiteExampleURLs())
story_id = match.group('id')
self.story.setMetadata('storyId', story_id)
fandom_name = match.group('fandom')
self._setURL('https://%s/fanfictions/%s/%s/chapters.html' % (self.getSiteDomain(), fandom_name, story_id))
@staticmethod
def getSiteDomain():
return 'www.fanfictions.fr'
@classmethod
def getSiteExampleURLs(cls):
return 'https://%s/fanfictions/fandom/fanfiction-id/chapters.html' % cls.getSiteDomain()
def getSiteURLPattern(self):
return r'https?://(?:www\.)?fanfictions\.fr/fanfictions/(?P<fandom>[^/]+)/(?P<id>[^/]+)(/chapters.html)?'
def extractChapterUrlsAndMetadata(self):
logger.debug('URL: %s', self.url)
data = self.get_request(self.url)
soup = self.make_soup(data)
# detect if the fanfiction is 'suspended' (chapters unavailable)
alert_div = soup.find('div', id='alertInactiveFic')
if alert_div:
raise exceptions.FailedToDownload("Failed to download the fanfiction, most likely because it is suspended.")
title_element = soup.find('h1', itemprop='name')
self.story.setMetadata('title', stripHTML(title_element))
author_div = soup.find('div', itemprop='author')
author_name = stripHTML(author_div.a)
author_id = author_div.a['href'].split('/')[-1].replace('.html', '')
self.story.setMetadata('author', author_name)
self.story.setMetadata('authorId', author_id)
published_date_element = soup.find('span', class_='date-distance')
published_date_text = published_date_element['data-date']
published_date = makeDate(published_date_text, '%Y-%m-%d %H:%M:%S')
if published_date:
self.story.setMetadata('datePublished', published_date)
status_element = soup.find('p', title="Statut de la fanfiction").find('span', class_='badge')
french_status = stripHTML(status_element)
status_translation = {
"En cours": "In-Progress",
"Terminée": "Completed",
"One-shot": "Completed",
}
self.story.setMetadata('status', status_translation.get(french_status, french_status))
genre_elements = soup.find('div', title="Format et genres").find_all('span', class_="highlightable")
self.story.extendList('genre', [ stripHTML(genre) for genre in genre_elements[1:] ])
category_elements = soup.find_all('li', class_="breadcrumb-item")
self.story.extendList('category', [ stripHTML(category) for category in category_elements[-2].find_all('a') ])
first_description = soup.find('p', itemprop='abstract')
self.setDescription(self.url, first_description)
chapter_cards = soup.find_all(class_=['card', 'chapter'])
for chapter_card in chapter_cards:
chapter_title_tag = chapter_card.find('h2')
if chapter_title_tag:
chapter_title = stripHTML(chapter_title_tag)
chapter_link = 'https://'+self.getSiteDomain()+chapter_title_tag.find('a')['href']
# Clean up the chapter title by replacing multiple spaces and newline characters with a single space
chapter_title = re.sub(r'\s+', ' ', chapter_title)
self.add_chapter(chapter_title, chapter_link)
last_chapter_div = chapter_cards[-1]
updated_date_element = last_chapter_div.find('span', class_='date-distance')
last_chapter_update_date = updated_date_element['data-date']
date = makeDate(last_chapter_update_date, '%Y-%m-%d %H:%M:%S')
if date:
self.story.setMetadata('dateUpdated', date)
def getChapterText(self, url):
logger.debug('Getting chapter text from: %s' % url)
response, redirection_url = self.get_request_redirected(url)
if "telecharger_pdf.html" in redirection_url:
with zipfile.ZipFile(io.BytesIO(response.encode('latin1'))) as z:
# Assuming there's only one text file inside the zip
file_list = z.namelist()
if len(file_list) != 1:
raise exceptions.FailedToDownload("Error downloading Chapter: %s! Zip file should contain exactly one text file!" % url)
text_filename = file_list[0]
with z.open(text_filename) as text_file:
# Decode the text file with windows-1252 encoding
text = text_file.read().decode('windows-1252')
return text.replace("\r\n", "<br>\r\n")
else:
soup = self.make_soup(response)
div_content = soup.find('div', id='readarea')
if div_content is None:
raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url)
return self.utf8FromSoup(url, div_content)

View file

@ -119,7 +119,7 @@ class FanFiktionDeAdapter(BaseSiteAdapter):
raise exceptions.FailedToDownload(self.getSiteDomain() +" says: Auserhalb der Zeit von 23:00 Uhr bis 04:00 Uhr ist diese Geschichte nur nach einer erfolgreichen Altersverifikation zuganglich.") raise exceptions.FailedToDownload(self.getSiteDomain() +" says: Auserhalb der Zeit von 23:00 Uhr bis 04:00 Uhr ist diese Geschichte nur nach einer erfolgreichen Altersverifikation zuganglich.")
soup = self.make_soup(data) soup = self.make_soup(data)
# print data # logger.debug(data)
## Title ## Title
@ -134,7 +134,7 @@ class FanFiktionDeAdapter(BaseSiteAdapter):
self.story.setMetadata('author',stripHTML(a)) self.story.setMetadata('author',stripHTML(a))
# Find the chapters: # Find the chapters:
for chapter in soup.find('select').findAll('option'): for chapter in soup.find('select').find_all('option'):
self.add_chapter(chapter,'https://'+self.host+'/s/'+self.story.getMetadata('storyId')+'/'+chapter['value']) self.add_chapter(chapter,'https://'+self.host+'/s/'+self.story.getMetadata('storyId')+'/'+chapter['value'])
## title="Wörter" failed with max_zalgo:1 ## title="Wörter" failed with max_zalgo:1
@ -163,29 +163,31 @@ class FanFiktionDeAdapter(BaseSiteAdapter):
except e: except e:
logger.debug("Failed to find native status:%s"%e) logger.debug("Failed to find native status:%s"%e)
if head.find('span',title='Fertiggestellt'): if head.find('span',title='fertiggestellt'):
self.story.setMetadata('status', 'Completed') self.story.setMetadata('status', 'Completed')
elif head.find('span',title='Pausiert'): elif head.find('span',title='pausiert'):
self.story.setMetadata('status', 'Paused') self.story.setMetadata('status', 'Paused')
elif head.find('span',title='Abgebrochen'): elif head.find('span',title='abgebrochen'):
self.story.setMetadata('status', 'Cancelled') self.story.setMetadata('status', 'Cancelled')
else: else:
self.story.setMetadata('status', 'In-Progress') self.story.setMetadata('status', 'In-Progress')
## Get description from own URL: ## Get description
## /?a=v&storyid=46ccbef30000616306614050&s=1 descdiv = soup.select_one('div#story-summary-inline div')
descsoup = self.make_soup(self.get_request("https://"+self.getSiteDomain()+"/?a=v&storyid="+self.story.getMetadata('storyId')+"&s=1")) if descdiv:
self.setDescription(url,stripHTML(descsoup)) if 'center' in descdiv['class']:
del descdiv['class']
self.setDescription(url,descdiv)
# #find metadata on the author's page # #find metadata on the author's page
# asoup = self.make_soup(self.get_request("https://"+self.getSiteDomain()+"?a=q&a1=v&t=nickdetailsstories&lbi=stories&ar=0&nick="+self.story.getMetadata('authorId'))) # asoup = self.make_soup(self.get_request("https://"+self.getSiteDomain()+"?a=q&a1=v&t=nickdetailsstories&lbi=stories&ar=0&nick="+self.story.getMetadata('authorId')))
# tr=asoup.findAll('tr') # tr=asoup.find_all('tr')
# for i in range(1,len(tr)): # for i in range(1,len(tr)):
# a = tr[i].find('a') # a = tr[i].find('a')
# if '/s/'+self.story.getMetadata('storyId')+'/1/' in a['href']: # if '/s/'+self.story.getMetadata('storyId')+'/1/' in a['href']:
# break # break
# td = tr[i].findAll('td') # td = tr[i].find_all('td')
# self.story.addToList('category',stripHTML(td[2])) # self.story.addToList('category',stripHTML(td[2]))
# self.story.setMetadata('rating', stripHTML(td[5])) # self.story.setMetadata('rating', stripHTML(td[5]))
# self.story.setMetadata('numWords', stripHTML(td[6])) # self.story.setMetadata('numWords', stripHTML(td[6]))
@ -202,7 +204,7 @@ class FanFiktionDeAdapter(BaseSiteAdapter):
soup = self.make_soup(self.get_request(url)) soup = self.make_soup(self.get_request(url))
div = soup.find('div', {'id' : 'storytext'}) div = soup.find('div', {'id' : 'storytext'})
for a in div.findAll('script'): for a in div.find_all('script'):
a.extract() a.extract()
if None == div: if None == div:

View file

@ -1,134 +0,0 @@
# -*- coding: utf-8 -*-
# Copyright 2018 FanFicFare team
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
####################################################################################################
### Adapted by Rikkit on November 7. 2017
###=================================================================================================
### Tested with Calibre
####################################################################################################
from __future__ import absolute_import
import logging
import re
# py2 vs py3 transition
from .base_adapter import BaseSiteAdapter, makeDate
from ..htmlcleanup import stripHTML
from .. import exceptions as exceptions
logger = logging.getLogger(__name__)
def getClass():
''' Initializing the class '''
return FastNovelNetAdapter
class FastNovelNetAdapter(BaseSiteAdapter):
''' Adapter for FASTNOVEL.net '''
def __init__(self, config, url):
BaseSiteAdapter.__init__(self, config, url)
self.story.setMetadata('siteabbrev', 'fstnvl')
self.dateformat = '%d/%m/%Y'
# get storyId from url--url validation guarantees query correct
match = re.match(self.getSiteURLPattern(), url)
if not match:
raise exceptions.InvalidStoryURL(url, self.getSiteDomain(), self.getSiteExampleURLs())
story_id = match.group('id')
self.story.setMetadata('storyId', story_id)
self._setURL('https://%s/%s/' % (self.getSiteDomain(), story_id))
@staticmethod
def getSiteDomain():
return 'fastnovel.net'
@classmethod
def getSiteExampleURLs(cls):
return "https://fastnovel.net/a-story-name-id"
def getSiteURLPattern(self):
# https://fastnovel.net/ultimate-scheming-system-158/
return r"https?://fastnovel\.net/(?P<id>[^/]+)"
def extractChapterUrlsAndMetadata(self):
logger.debug('URL: %s', self.url)
data = self.get_request(self.url)
soup = self.make_soup(data)
self.story.setMetadata('title', soup.find('h1').string)
for li in soup.select('.meta-data li'):
label = li.select_one('label')
if not label:
continue
if label.string == "Author:":
for a in li.select('a'):
self.story.setMetadata('authorId', a["href"].split('/')[2])
self.story.setMetadata('authorUrl','https://'+self.host+a["href"])
self.story.setMetadata('author', a["title"])
if label.string == "Genre:":
for a in li.select('a'):
self.story.addToList('genre',a["title"])
if label.string == "Status:":
if li.select_one('strong').string.strip() == "Completed":
self.story.setMetadata('status', 'Completed')
else:
self.story.setMetadata('status', 'In-Progress')
if label.string == "Last updated:":
dateUpd = label.next_sibling.strip()
self.story.setMetadata('dateUpdated', makeDate(stripHTML(dateUpd), self.dateformat))
coverurl = soup.select_one('div.book-cover')["data-original"]
if coverurl != "https://fastnovel.net/images/novel/default.jpg":
self.setCoverImage(self.url, coverurl)
tags = soup.select_one('.tags')
if tags:
for a in tags.select("li.tag-item a"):
self.story.addToList('tags', a["title"])
# extract tags, because it inside description
tags.extract()
# remove title from description
soup.select_one('.film-content h3').extract()
desc = soup.select_one('.film-content').extract()
self.setDescription(self.url, desc)
for book in soup.select("#list-chapters .book"):
volume = book.select_one('.title a').string
for a in book.select(".list-chapters a.chapter"):
title = volume + " " + stripHTML(a)
self.add_chapter(title, 'https://' + self.host + a["href"])
def getChapterText(self, url):
data = self.get_request(url)
soup = self.make_soup(data)
story = soup.select_one('#chapter-body')
if not story:
raise exceptions.FailedToDownload(
"Error downloading Chapter: %s! Missing required element!" % url)
return self.utf8FromSoup(url, story)

View file

@ -15,16 +15,16 @@
# limitations under the License. # limitations under the License.
# #
from __future__ import absolute_import from __future__ import absolute_import,unicode_literals
import datetime # import datetime
import logging import logging
logger = logging.getLogger(__name__) import json
import re import re
from .. import translit # from .. import translit
from ..htmlcleanup import stripHTML from ..htmlcleanup import stripHTML
from .. import exceptions as exceptions from .. import exceptions# as exceptions
# py2 vs py3 transition # py2 vs py3 transition
@ -58,7 +58,7 @@ class FicBookNetAdapter(BaseSiteAdapter):
# The date format will vary from site to site. # The date format will vary from site to site.
# http://docs.python.org/library/datetime.html#strftime-strptime-behavior # http://docs.python.org/library/datetime.html#strftime-strptime-behavior
self.dateformat = "%d %m %Y" self.dateformat = u"%d %m %Y г., %H:%M"
@staticmethod # must be @staticmethod, don't remove it. @staticmethod # must be @staticmethod, don't remove it.
def getSiteDomain(): def getSiteDomain():
@ -67,17 +67,33 @@ class FicBookNetAdapter(BaseSiteAdapter):
@classmethod @classmethod
def getSiteExampleURLs(cls): def getSiteExampleURLs(cls):
return "https://"+cls.getSiteDomain()+"/readfic/12345 https://"+cls.getSiteDomain()+"/readfic/93626/246417#part_content" return "https://"+cls.getSiteDomain()+"/readfic/12345 https://"+cls.getSiteDomain()+"/readfic/93626/246417#part_content https://"+cls.getSiteDomain()+"/readfic/578de1cd-a8b4-7ff1-aa49-750426508b82 https://"+cls.getSiteDomain()+"/readfic/578de1cd-a8b4-7ff1-aa49-750426508b82/94793742#part_content"
def getSiteURLPattern(self): def getSiteURLPattern(self):
return r"https?://"+re.escape(self.getSiteDomain()+"/readfic/")+r"\d+" return r"https?://"+re.escape(self.getSiteDomain()+"/readfic/")+r"[\d\-a-zA-Z]+"
def performLogin(self,url,data):
params = {}
if self.password:
params['login'] = self.username
params['password'] = self.password
else:
params['login'] = self.getConfig("username")
params['password'] = self.getConfig("password")
logger.debug("Try to login in as (%s)" % params['login'])
d = self.post_request('https://' + self.getSiteDomain() + '/login_check_static',params,usecache=False)
if 'Войти используя аккаунт на сайте' in d:
raise exceptions.FailedToLogin(url,params['login'])
return True
## Getting the chapter list and the meta data, plus 'is adult' checking. ## Getting the chapter list and the meta data, plus 'is adult' checking.
def extractChapterUrlsAndMetadata(self): def extractChapterUrlsAndMetadata(self,get_cover=True):
url=self.url url=self.url
logger.debug("URL: "+url) logger.debug("URL: "+url)
data = self.get_request(url) data = self.get_request(url)
soup = self.make_soup(data) soup = self.make_soup(data)
adult_div = soup.find('div',id='adultCoverWarning') adult_div = soup.find('div',id='adultCoverWarning')
@ -87,9 +103,11 @@ class FicBookNetAdapter(BaseSiteAdapter):
else: else:
raise exceptions.AdultCheckRequired(self.url) raise exceptions.AdultCheckRequired(self.url)
## Title ## Title
a = soup.find('section',{'class':'chapter-info'}).find('h1') try:
a = soup.find('section',{'class':'chapter-info'}).find('h1')
except AttributeError:
raise exceptions.FailedToDownload("Error collecting meta: %s! Missing required element!" % url)
# kill '+' marks if present. # kill '+' marks if present.
sup = a.find('sup') sup = a.find('sup')
if sup: if sup:
@ -99,40 +117,12 @@ class FicBookNetAdapter(BaseSiteAdapter):
# Find authorid and URL from... author url. # Find authorid and URL from... author url.
# assume first avatar-nickname -- there can be a second marked 'beta'. # assume first avatar-nickname -- there can be a second marked 'beta'.
a = soup.find('a',{'class':'creator-nickname'}) a = soup.find('a',{'class':'creator-username'})
self.story.setMetadata('authorId',a.text) # Author's name is unique self.story.setMetadata('authorId',a.text) # Author's name is unique
self.story.setMetadata('authorUrl','https://'+self.host+a['href']) self.story.setMetadata('authorUrl','https://'+self.host+a['href'])
self.story.setMetadata('author',a.text) self.story.setMetadata('author',a.text)
logger.debug("Author: (%s)"%self.story.getMetadata('author')) logger.debug("Author: (%s)"%self.story.getMetadata('author'))
# Find the chapters:
pubdate = None
chapters = soup.find('ul', {'class' : 'list-of-fanfic-parts'})
if chapters != None:
for chapdiv in chapters.findAll('li', {'class':'part'}):
chapter=chapdiv.find('a',href=re.compile(r'/readfic/'+self.story.getMetadata('storyId')+r"/\d+#part_content$"))
churl='https://'+self.host+chapter['href']
self.add_chapter(chapter,churl)
datespan = chapdiv.find('span')
if pubdate == None and datespan:
pubdate = translit.translit(stripHTML(datespan))
update = translit.translit(stripHTML(datespan))
else:
self.add_chapter(self.story.getMetadata('title'),url)
self.story.setMetadata('numChapters',1)
pubdate=translit.translit(stripHTML(soup.find('div',{'class':'title-area'}).find('span')))
update=pubdate
logger.debug("numChapters: (%s)"%self.story.getMetadata('numChapters'))
if not ',' in pubdate:
pubdate=datetime.date.today().strftime(self.dateformat)
if not ',' in update:
update=datetime.date.today().strftime(self.dateformat)
pubdate=pubdate.split(',')[0]
update=update.split(',')[0]
fullmon = {"yanvarya":"01", u"января":"01", fullmon = {"yanvarya":"01", u"января":"01",
"fievralya":"02", u"февраля":"02", "fievralya":"02", u"февраля":"02",
"marta":"03", u"марта":"03", "marta":"03", u"марта":"03",
@ -146,31 +136,50 @@ class FicBookNetAdapter(BaseSiteAdapter):
"noyabrya":"11", u"ноября":"11", "noyabrya":"11", u"ноября":"11",
"diekabrya":"12", u"декабря":"12" } "diekabrya":"12", u"декабря":"12" }
for (name,num) in fullmon.items(): # Find the chapters:
if name in pubdate: pubdate = None
pubdate = pubdate.replace(name,num) chapters = soup.find('ul', {'class' : 'list-of-fanfic-parts'})
if name in update: if chapters is not None:
update = update.replace(name,num) for chapdiv in chapters.find_all('li', {'class':'part'}):
chapter=chapdiv.find('a',href=re.compile(r'/readfic/'+self.story.getMetadata('storyId')+r"/\d+#part_content$"))
churl='https://'+self.host+chapter['href']
self.story.setMetadata('dateUpdated', makeDate(update, self.dateformat)) # Find the chapter dates.
self.story.setMetadata('datePublished', makeDate(pubdate, self.dateformat)) date_str = chapdiv.find('span', {'title': True})['title'].replace(u"\u202fг. в", "")
for month_name, month_num in fullmon.items():
date_str = date_str.replace(month_name, month_num)
chapterdate = makeDate(date_str,self.dateformat)
self.add_chapter(chapter,churl,
{'date':chapterdate.strftime(self.getConfig("datechapter_format",self.getConfig("datePublished_format",self.dateformat)))})
if pubdate is None and chapterdate:
pubdate = chapterdate
update = chapterdate
else:
self.add_chapter(self.story.getMetadata('title'),url)
date_str = soup.find('div', {'class' : 'part-date'}).find('span', {'title': True})['title'].replace(u"\u202fг. в", "")
for month_name, month_num in fullmon.items():
date_str = date_str.replace(month_name, month_num)
pubdate = update = makeDate(date_str,self.dateformat)
logger.debug("numChapters: (%s)"%self.story.getMetadata('numChapters'))
self.story.setMetadata('dateUpdated', update)
self.story.setMetadata('datePublished', pubdate)
self.story.setMetadata('language','Russian') self.story.setMetadata('language','Russian')
## after site change, I don't see word count anywhere. dlinfo = soup.select_one('header.d-flex.flex-column.gap-12.word-break')
# pr=soup.find('a', href=re.compile(r'/printfic/\w+'))
# pr='https://'+self.host+pr['href']
# pr = self.make_soup(self.get_request(pr))
# pr=pr.findAll('div', {'class' : 'part_text'})
# i=0
# for part in pr:
# i=i+len(stripHTML(part).split(' '))
# self.story.setMetadata('numWords', unicode(i))
series_label = dlinfo.select_one('div.description.word-break').find('strong', string='Серия:')
dlinfo = soup.find('div',{'class':'fanfic-main-info'}) logger.debug('Series: %s'%str(series_label))
if series_label:
series_div = series_label.find_next_sibling("div")
# No accurate series number as for that, additional request needs to be made
self.setSeries(stripHTML(series_div.a), 1)
self.story.setMetadata('seriesUrl','https://' + self.getSiteDomain() + series_div.a.get('href'))
i=0 i=0
fandoms = dlinfo.find('div').findAll('a', href=re.compile(r'/fanfiction/\w+')) fandoms = dlinfo.select_one('div:not([class])').find_all('a', href=re.compile(r'/fanfiction/\w+'))
for fandom in fandoms: for fandom in fandoms:
self.story.addToList('category',fandom.string) self.story.addToList('category',fandom.string)
i=i+1 i=i+1
@ -179,13 +188,16 @@ class FicBookNetAdapter(BaseSiteAdapter):
tags = soup.find('div',{'class':'tags'}) tags = soup.find('div',{'class':'tags'})
if tags: if tags:
for genre in tags.findAll('a',href=re.compile(r'/tags/')): for genre in tags.find_all('a',href=re.compile(r'/tags/')):
self.story.addToList('genre',stripHTML(genre)) self.story.addToList('genre',stripHTML(genre))
ratingdt = dlinfo.find('strong',{'class':re.compile(r'badge-rating-.*')}) logger.debug("category: (%s)"%self.story.getMetadata('category'))
self.story.setMetadata('rating', stripHTML(ratingdt.find_next('span'))) logger.debug("genre: (%s)"%self.story.getMetadata('genre'))
# meta=table.findAll('a', href=re.compile(r'/ratings/')) ratingdt = dlinfo.find('div',{'class':re.compile(r'badge-rating-.*')})
self.story.setMetadata('rating', stripHTML(ratingdt.find('span')))
# meta=table.find_all('a', href=re.compile(r'/ratings/'))
# i=0 # i=0
# for m in meta: # for m in meta:
# if i == 0: # if i == 0:
@ -198,12 +210,17 @@ class FicBookNetAdapter(BaseSiteAdapter):
# elif i == 2: # elif i == 2:
# self.story.addToList('warnings', m.find('b').text) # self.story.addToList('warnings', m.find('b').text)
if dlinfo.find('span', {'class':'badge-status-finished'}): if dlinfo.find('div', {'class':'badge-status-finished'}):
self.story.setMetadata('status', 'Completed') self.story.setMetadata('status', 'Completed')
else: else:
self.story.setMetadata('status', 'In-Progress') self.story.setMetadata('status', 'In-Progress')
paircharsdt = soup.find('strong',text='Пэйринг и персонажи:') try:
self.story.setMetadata('universe', stripHTML(dlinfo.find('a', href=re.compile('/fandom_universe/'))))
except AttributeError:
pass
paircharsdt = soup.find('strong',string='Пэйринг и персонажи:')
# site keeps both ships and indiv chars in /pairings/ links. # site keeps both ships and indiv chars in /pairings/ links.
if paircharsdt: if paircharsdt:
for paira in paircharsdt.find_next('div').find_all('a', href=re.compile(r'/pairings/')): for paira in paircharsdt.find_next('div').find_all('a', href=re.compile(r'/pairings/')):
@ -215,9 +232,99 @@ class FicBookNetAdapter(BaseSiteAdapter):
else: else:
self.story.addToList('characters',stripHTML(paira)) self.story.addToList('characters',stripHTML(paira))
summary=soup.find('div', {'class' : 'urlize'}) summary=soup.find('div', itemprop='description')
self.setDescription(url,summary) if summary:
#self.story.setMetadata('description', summary.text) # Fix for the text not displaying properly
summary['class'].append('part_text')
self.setDescription(url,summary)
#self.story.setMetadata('description', summary.text)
stats = soup.find('div', {'class':'hat-actions-container'})
targetdata = stats.find_all('span', {'class' : 'main-info'})
for data in targetdata:
svg_class = data.find('svg')['class'][1] if data.find('svg') else None
value = int(stripHTML(data)) if stripHTML(data).isdigit() else 0
if svg_class == 'ic_thumbs-up' and value > 0:
self.story.setMetadata('likes', value)
#logger.debug("likes: (%s)"%self.story.getMetadata('likes'))
elif svg_class == 'ic_bubble-dark' and value > 0:
self.story.setMetadata('reviews', value)
#logger.debug("reviews: (%s)"%self.story.getMetadata('reviews'))
elif svg_class == 'ic_bookmark' and value > 0:
self.story.setMetadata('numCollections', value)
logger.debug("numCollections: (%s)"%self.story.getMetadata('numCollections'))
# Grab the amount of pages and words
targetpages = soup.find('strong',string='Размер:').find_next('div')
if targetpages:
targetpages_text = re.sub(r"(?<!\,)\s| ", "", targetpages.text, flags=re.UNICODE | re.MULTILINE)
pages_raw = re.search(r'(\d+)(?:страницы|страниц)', targetpages_text, re.UNICODE)
pages = int(pages_raw.group(1))
if pages > 0:
self.story.setMetadata('pages', pages)
logger.debug("pages: (%s)"%self.story.getMetadata('pages'))
numWords_raw = re.search(r"(\d+)(?:слова|слов)", targetpages_text, re.UNICODE)
numWords = int(numWords_raw.group(1))
if numWords > 0:
self.story.setMetadata('numWords', numWords)
logger.debug("numWords: (%s)"%self.story.getMetadata('numWords'))
# Grab FBN Category
class_tag = soup.select_one('div[class^="badge-with-icon direction"]').find('span', {'class' : 'badge-text'}).text
if class_tag:
self.story.setMetadata('classification',class_tag)
#logger.debug("classification: (%s)"%self.story.getMetadata('classification'))
# Find dedication.
ded = soup.find('div', {'class' : 'js-public-beta-dedication'})
if ded:
ded['class'].append('part_text')
self.story.setMetadata('dedication',ded)
# Find author comment
comm = soup.find('div', {'class' : 'js-public-beta-author-comment'})
if comm:
comm['class'].append('part_text')
self.story.setMetadata('authorcomment',comm)
follows = stats.find('fanfic-follow-button')[':follow-count']
if int(follows) > 0:
self.story.setMetadata('follows', int(follows))
logger.debug("follows: (%s)"%self.story.getMetadata('follows'))
# Grab the amount of awards
numAwards = 0
try:
awards = soup.find('fanfic-reward-list')[':initial-fic-rewards-list']
award_list = json.loads(awards)
numAwards = int(len(award_list))
# Grab the awards, but if multiple awards have the same name, only one will be kept; only an issue with hundreds of them.
self.story.extendList('awards', [str(award['user_text']) for award in award_list])
#logger.debug("awards (%s)"%self.story.getMetadata('awards'))
except (TypeError, KeyError):
logger.debug("Could not grab the awards")
if numAwards > 0:
self.story.setMetadata('numAwards', numAwards)
logger.debug("Num Awards (%s)"%self.story.getMetadata('numAwards'))
if get_cover:
cover = soup.find('fanfic-cover', {'class':"jsVueComponent"})
if cover is not None:
self.setCoverImage(url,cover['src-original'])
def replace_formatting(self,tag):
tname = tag.name
## operating on plain text because BS4 is hard to work on
## text with.
## stripHTML() discards whitespace around other tags, like <i>
txt = tag.get_text()
txt = txt.replace("\n","<br/>")
soup = self.make_soup("<"+tname+">"+txt+"</"+tname+">")
return soup.find(tname)
# grab the text for an individual chapter. # grab the text for an individual chapter.
def getChapterText(self, url): def getChapterText(self, url):
@ -227,10 +334,60 @@ class FicBookNetAdapter(BaseSiteAdapter):
soup = self.make_soup(self.get_request(url)) soup = self.make_soup(self.get_request(url))
chapter = soup.find('div', {'id' : 'content'}) chapter = soup.find('div', {'id' : 'content'})
if chapter == None: ## still needed? if chapter is None: ## still needed?
chapter = soup.find('div', {'class' : 'public_beta_disabled'}) chapter = soup.find('div', {'class' : 'public_beta_disabled'})
if None == chapter: if chapter is None:
raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url) raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url)
## ficbook uses weird CSS white-space: pre-wrap; for
## paragraphing. Doesn't work with txt output
if 'part_text' in chapter['class'] and self.getConfig('replace_text_formatting'):
## copy classes, except part_text
divclasses = chapter['class']
divclasses.remove('part_text')
chapter = self.replace_formatting(chapter)
chapter['class'] = divclasses
exclude_notes=self.getConfigList('exclude_notes')
if 'headnotes' not in exclude_notes:
# Find the headnote
head_note = soup.select_one("div.part-comment-top div.js-public-beta-comment-before")
if head_note:
# Create the structure for the headnote
head_notes_div_tag = soup.new_tag('div', attrs={'class': 'fff_chapter_notes fff_head_notes'})
head_b_tag = soup.new_tag('b')
head_b_tag.string = 'Примечания:'
if 'text-preline' in head_note['class'] and self.getConfig('replace_text_formatting'):
head_blockquote_tag = self.replace_formatting(head_note)
head_blockquote_tag.name = 'blockquote'
else:
head_blockquote_tag = soup.new_tag('blockquote')
head_blockquote_tag.string = stripHTML(head_note)
head_notes_div_tag.append(head_b_tag)
head_notes_div_tag.append(head_blockquote_tag)
# Prepend the headnotes to the chapter, <hr> to mimic the site
chapter.insert(0, head_notes_div_tag)
chapter.insert(1, soup.new_tag('hr'))
if 'footnotes' not in exclude_notes:
# Find the endnote
end_note = soup.select_one("div.part-comment-bottom div.js-public-beta-comment-after")
if end_note:
# Create the structure for the footnote
end_notes_div_tag = soup.new_tag('div', attrs={'class': 'fff_chapter_notes fff_foot_notes'})
end_b_tag = soup.new_tag('b')
end_b_tag.string = 'Примечания:'
if 'text-preline' in end_note['class'] and self.getConfig('replace_text_formatting'):
end_blockquote_tag = self.replace_formatting(end_note)
end_blockquote_tag.name = 'blockquote'
else:
end_blockquote_tag = soup.new_tag('blockquote')
end_blockquote_tag.string = stripHTML(end_note)
end_notes_div_tag.append(end_b_tag)
end_notes_div_tag.append(end_blockquote_tag)
# Append the endnotes to the chapter, <hr> to mimic the site
chapter.append(soup.new_tag('hr'))
chapter.append(end_notes_div_tag)
return self.utf8FromSoup(url,chapter) return self.utf8FromSoup(url,chapter)

View file

@ -0,0 +1,225 @@
# -*- coding: utf-8 -*-
# Copyright 2011 Fanficdownloader team, 2021 FanFicFare team
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
from __future__ import absolute_import
import logging
logger = logging.getLogger(__name__)
import re
from ..htmlcleanup import stripHTML
from .. import exceptions as exceptions
from .base_adapter import BaseSiteAdapter, makeDate
class FictionAlleyArchiveOrgSiteAdapter(BaseSiteAdapter):
def __init__(self, config, url):
BaseSiteAdapter.__init__(self, config, url)
self.story.setMetadata('siteabbrev','fa')
self.is_adult=False
# get storyId from url--url validation guarantees query correct
m = re.match(self.getSiteURLPattern(),url)
if m:
# normalized story URL.
url = "https://"+self.getSiteDomain()+"/authors/"+m.group('auth')+"/"+m.group('id')+".html"
self._setURL(url)
else:
raise exceptions.InvalidStoryURL(url,
self.getSiteDomain(),
self.getSiteExampleURLs())
# The date format will vary from site to site.
# http://docs.python.org/library/datetime.html#strftime-strptime-behavior
self.dateformat = "%m/%d/%Y"
def _setURL(self,url):
# logger.debug("set URL:%s"%url)
super(FictionAlleyArchiveOrgSiteAdapter, self)._setURL(url)
m = re.match(self.getSiteURLPattern(),url)
if m:
self.story.setMetadata('authorId',m.group('auth'))
self.story.setMetadata('storyId',m.group('id'))
@staticmethod
def getSiteDomain():
return 'www.fictionalley-archive.org'
@classmethod
def getAcceptDomains(cls):
return ['www.fictionalley-archive.org',
'www.fictionalley.org']
@classmethod
def getSiteExampleURLs(cls):
return "https://"+cls.getSiteDomain()+"/authors/drt/DA.html https://"+cls.getSiteDomain()+"/authors/drt/JOTP01a.html"
@classmethod
def getURLDomain(cls):
return 'https://' + cls.getSiteDomain()
def getSiteURLPattern(self):
# http://www.fictionalley-archive.org/authors/drt/DA.html
# http://www.fictionalley-archive.org/authors/drt/JOTP01a.html
return r"https?://www.fictionalley(-archive)?.org/authors/(?P<auth>[a-zA-Z0-9_]+)/(?P<id>[a-zA-Z0-9_]+)\.html"
def extractChapterUrlsAndMetadata(self):
## could be either chapter list page or one-shot text page.
logger.debug("URL: "+self.url)
(data,rurl) = self.get_request_redirected(self.url)
if rurl != self.url:
self._setURL(rurl)
logger.debug("set to redirected url:%s"%self.url)
soup = self.make_soup(data)
# If chapter list page, get the first chapter to look for adult check
chapterlinklist = soup.select('h5.mb-1 > a')
# logger.debug(chapterlinklist)
if not chapterlinklist:
# no chapter list, it's either a chapter URL or a single chapter story
# <nav aria-label="Chapter Navigation">
# <a class="page-link" href="/authors/mz_xxo/HPATOTFI.html">Index</a>
storya = soup.select_one('nav[aria-label="Chapter Navigation"] a')
# logger.debug(storya)
if storya:
## multi chapter story
self._setURL(self.getURLDomain()+storya['href'])
logger.debug("Normalizing to URL: "+self.url)
# ## title's right there...
# self.story.setMetadata('title',stripHTML(storya))
data = self.get_request(self.url)
soup = self.make_soup(data)
chapterlinklist = soup.select('h5.mb-1 > a')
# logger.debug(chapterlinklist)
else:
## single chapter story.
# logger.debug("Single chapter story")
pass
self.story.setMetadata('title',stripHTML(soup.select_one('h1')))
## authorid already set.
## <h1 class="title" align="center">Just Off The Platform II by <a href="http://www.fictionalley.org/authors/drt/">DrT</a></h1>
authora=soup.select_one('h1 + h3 > a')
self.story.setMetadata('author',stripHTML(authora))
self.story.setMetadata('authorUrl',self.getURLDomain()+authora['href'])
if chapterlinklist:
# Find the chapters:
for chapter in chapterlinklist:
listitem = chapter.parent.parent.parent
# logger.debug(listitem)
# date
date = stripHTML(listitem.select_one('small.text-nowrap'))
chapterDate = makeDate(date,self.dateformat)
wordshits = listitem.select('span.font-weight-normal')
chap_data = {
'date':chapterDate.strftime(self.getConfig("datechapter_format",self.getConfig("datePublished_format","%Y-%m-%d"))),
'words':stripHTML(wordshits[0]),
'hits':stripHTML(wordshits[1]),
'summary':stripHTML(listitem.select_one('p.my-2')),
}
# logger.debug(chap_data)
self.add_chapter(chapter,self.getURLDomain()+chapter['href'], chap_data)
else:
self.add_chapter(self.story.getMetadata('title'),self.url)
cardbody = soup.select_one('div.card-body')
searchs_to_meta = (
# sitetype, ffftype, islist
('Rating', 'rating', False),
('House', 'house', True),
('Character', 'characters', True),
('Genre', 'genre', True),
('Era', 'era', True),
('Spoiler', 'spoilers', True),
('Ship', 'ships', True),
)
for (sitetype,ffftype, islist) in searchs_to_meta:
# logger.debug((sitetype,ffftype, islist))
tags = cardbody.select('a[href^="/stories?Include.%s"]'%sitetype)
# logger.debug(tags)
if tags:
if islist:
self.story.extendList(ffftype, [ stripHTML(a) for a in tags ])
else:
self.story.setMetadata(ffftype, stripHTML(tags[0]))
# Published: 09/26/2003 Updated: 04/13/2004 Words: 14,268 Chapters: 5 Hits: 743
badgeinfos = cardbody.select('div.badge-info')
# logger.debug(badgeinfos)
for badge in badgeinfos:
txt = stripHTML(badge)
(key,val)=txt.split(':')
# logger.debug((key,val))
if key in ( 'Published', 'Updated'):
date = makeDate(val,self.dateformat)
self.story.setMetadata('date'+key,date)
elif key in ('Hits'):
self.story.setMetadata(key.lower(),val)
elif key == 'Words':
self.story.setMetadata('numWords',val)
summary = soup.find('dt',string='Story Summary:')
if summary:
summary = summary.find_next_sibling('dd')
summary.name='div'
self.setDescription(self.url,summary)
return
def getChapterText(self, url):
logger.debug('Getting chapter text from: %s' % url)
data = self.get_request(url)
soup = self.make_soup(data)
# this may be a brittle way to get the chapter text.
# Site doesn't give a lot of hints.
chaptext = soup.select_one('main#content div:not([class])')
# not sure how, but we can get html, etc tags still in some
# stories. That breaks later updates because it confuses
# epubutils.py
# Yes, this still applies to fictionalley-archive.
for tag in chaptext.find_all('head') + chaptext.find_all('meta') + chaptext.find_all('script'):
tag.extract()
for tag in chaptext.find_all('body') + chaptext.find_all('html'):
tag.name = 'div'
if self.getConfig('include_author_notes'):
row = chaptext.find_previous_sibling('div',class_='row')
logger.debug(row)
andt = row.find('dt',string="Author's Note:")
logger.debug(andt)
if andt:
chaptext.insert(0,andt.parent.extract())
# post notes aren't as structured(?)
for div in chaptext.find_next_siblings('div',class_='row'):
chaptext.append(div.extract())
# logger.debug(chaptext)
return self.utf8FromSoup(url,chaptext)
def getClass():
return FictionAlleyArchiveOrgSiteAdapter

View file

@ -1,228 +0,0 @@
# -*- coding: utf-8 -*-
# Copyright 2011 Fanficdownloader team, 2018 FanFicFare team
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
from __future__ import absolute_import
import logging
logger = logging.getLogger(__name__)
import re
from ..htmlcleanup import stripHTML
from .. import exceptions as exceptions
# py2 vs py3 transition
from .base_adapter import BaseSiteAdapter, makeDate
class FictionAlleyOrgSiteAdapter(BaseSiteAdapter):
def __init__(self, config, url):
BaseSiteAdapter.__init__(self, config, url)
self.story.setMetadata('siteabbrev','fa')
self.is_adult=False
# get storyId from url--url validation guarantees query correct
m = re.match(self.getSiteURLPattern(),url)
if m:
self.story.setMetadata('authorId',m.group('auth'))
self.story.setMetadata('storyId',m.group('id'))
# normalized story URL.
self._setURL(url)
else:
raise exceptions.InvalidStoryURL(url,
self.getSiteDomain(),
self.getSiteExampleURLs())
@staticmethod
def getSiteDomain():
return 'www.fictionalley.org'
@classmethod
def getSiteExampleURLs(cls):
return "http://"+cls.getSiteDomain()+"/authors/drt/DA.html http://"+cls.getSiteDomain()+"/authors/drt/JOTP01a.html"
def getSiteURLPattern(self):
# http://www.fictionalley.org/authors/drt/DA.html
# http://www.fictionalley.org/authors/drt/JOTP01a.html
return re.escape("http://"+self.getSiteDomain())+r"/authors/(?P<auth>[a-zA-Z0-9_]+)/(?P<id>[a-zA-Z0-9_]+)\.html"
def _postFetchWithIAmOld(self,url):
if self.is_adult or self.getConfig("is_adult"):
params={'iamold':'Yes',
'action':'ageanswer'}
logger.info("Attempting to get cookie for %s" % url)
## posting on list doesn't work, but doesn't hurt, either.
data = self.post_request(url,params)
else:
data = self.get_request(url)
return data
def extractChapterUrlsAndMetadata(self):
## could be either chapter list page or one-shot text page.
url = self.url
logger.debug("URL: "+url)
data = self._postFetchWithIAmOld(url)
soup = self.make_soup(data)
chapterdata = data
# If chapter list page, get the first chapter to look for adult check
chapterlinklist = soup.findAll('a',{'class':'chapterlink'})
if chapterlinklist:
chapterdata = self._postFetchWithIAmOld(chapterlinklist[0]['href'])
if "Are you over seventeen years old" in chapterdata:
raise exceptions.AdultCheckRequired(self.url)
if not chapterlinklist:
# no chapter list, chapter URL: change to list link.
# second a tag inside div breadcrumbs
storya = soup.find('div',{'class':'breadcrumbs'}).findAll('a')[1]
self._setURL(storya['href'])
url=self.url
logger.debug("Normalizing to URL: "+url)
## title's right there...
self.story.setMetadata('title',stripHTML(storya))
data = self.get_request(url)
soup = self.make_soup(data)
chapterlinklist = soup.findAll('a',{'class':'chapterlink'})
else:
## still need title from somewhere. If chapterlinklist,
## then chapterdata contains a chapter, find title the
## same way.
chapsoup = self.make_soup(chapterdata)
storya = chapsoup.find('div',{'class':'breadcrumbs'}).findAll('a')[1]
self.story.setMetadata('title',stripHTML(storya))
del chapsoup
del chapterdata
## authorid already set.
## <h1 class="title" align="center">Just Off The Platform II by <a href="http://www.fictionalley.org/authors/drt/">DrT</a></h1>
authora=soup.find('h1',{'class':'title'}).find('a')
self.story.setMetadata('author',authora.string)
self.story.setMetadata('authorUrl',authora['href'])
if len(chapterlinklist) == 1:
self.add_chapter(self.story.getMetadata('title'),chapterlinklist[0]['href'])
else:
# Find the chapters:
for chapter in chapterlinklist:
# just in case there's tags, like <i> in chapter titles.
self.add_chapter(chapter,chapter['href'])
## Go scrape the rest of the metadata from the author's page.
data = self.get_request(self.story.getMetadata('authorUrl'))
soup = self.make_soup(data)
# <dl><dt><a class = "Rid story" href = "http://www.fictionalley.org/authors/aafro_man_ziegod/TMH.html">
# [Rid] The Magical Hottiez</a> by <a class = "pen_name" href = "http://www.fictionalley.org/authors/aafro_man_ziegod/">Aafro Man Ziegod</a> </small></dt>
# <dd><small class = "storyinfo"><a href = "http://www.fictionalley.org/ratings.html" target = "_new">Rating:</a> PG-13 - Spoilers: PS/SS, CoS, PoA, GoF, QTTA, FB - 4264 hits - 5060 words<br />
# Genre: Humor, Romance - Main character(s): None - Ships: None - Era: Multiple Eras<br /></small>
# Chaos ensues after Witch Weekly, seeking to increase readers, decides to create a boyband out of five seemingly talentless wizards: Harry Potter, Draco Malfoy, Ron Weasley, Neville Longbottom, and Oliver "Toss Your Knickers Here" Wood.<br />
# <small class = "storyinfo">Published: June 3, 2002 (between Goblet of Fire and Order of Phoenix) - Updated: June 3, 2002</small>
# </dd></dl>
storya = soup.find('a',{'href':self.story.getMetadata('storyUrl')})
storydd = storya.findNext('dd')
# Rating: PG - Spoilers: None - 2525 hits - 736 words
# Genre: Humor - Main character(s): H, R - Ships: None - Era: Multiple Eras
# Harry and Ron are back at it again! They reeeeeeally don't want to be back, because they know what's awaiting them. "VH1 Goes Inside..." is back! Why? 'Cos there are soooo many more couples left to pick on.
# Published: September 25, 2004 (between Order of Phoenix and Half-Blood Prince) - Updated: September 25, 2004
## change to text and regexp find.
metastr = stripHTML(storydd).replace('\n',' ').replace('\t',' ')
m = re.match(r".*?Rating: (.+?) -.*?",metastr)
if m:
self.story.setMetadata('rating', m.group(1))
m = re.match(r".*?Genre: (.+?) -.*?",metastr)
if m:
for g in m.group(1).split(','):
self.story.addToList('genre',g)
m = re.match(r".*?Published: ([a-zA-Z]+ \d\d?, \d\d\d\d).*?",metastr)
if m:
self.story.setMetadata('datePublished',makeDate(m.group(1), "%B %d, %Y"))
m = re.match(r".*?Updated: ([a-zA-Z]+ \d\d?, \d\d\d\d).*?",metastr)
if m:
self.story.setMetadata('dateUpdated',makeDate(m.group(1), "%B %d, %Y"))
m = re.match(r".*? (\d+) words Genre.*?",metastr)
if m:
self.story.setMetadata('numWords', m.group(1))
for small in storydd.findAll('small'):
small.extract() ## removes the <small> tags, leaving only the summary.
storydd.name = 'div' ## change tag name else Calibre treats it oddly.
self.setDescription(url,storydd)
#self.story.setMetadata('description',stripHTML(storydd))
return
def getChapterText(self, url):
logger.debug('Getting chapter text from: %s' % url)
data = self.get_request(url)
# find <!-- headerend --> & <!-- footerstart --> and
# replaced with matching div pair for easier parsing.
# Yes, it's an evil kludge, but what can ya do? Using
# something other than div prevents soup from pairing
# our div with poor html inside the story text.
crazy = "crazytagstringnobodywouldstumbleonaccidently"
data = data.replace('<!-- headerend -->','<'+crazy+' id="storytext">').replace('<!-- footerstart -->','</'+crazy+'>')
# problems with some stories confusing Soup. This is a nasty
# hack, but it works.
data = data[data.index('<'+crazy+''):]
# ditto with extra crap at the end.
data = data[:data.index('</'+crazy+'>')+len('</'+crazy+'>')]
soup = self.make_soup(data)
body = soup.findAll('body') ## some stories use a nested body and body
## tag, in which case we don't
## need crazytagstringnobodywouldstumbleonaccidently
## and use the second one instead.
if len(body)>1:
text = body[1]
text.name='div' # force to be a div to avoid multiple body tags.
else:
text = soup.find(crazy, {'id' : 'storytext'})
text.name='div' # change to div tag.
if not data or not text:
raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url)
# not sure how, but we can get html, etc tags still in some
# stories. That breaks later updates because it confuses
# epubutils.py
for tag in text.findAll('head'):
tag.extract()
for tag in text.findAll('body') + text.findAll('html'):
tag.name = 'div'
return self.utf8FromSoup(url,text)
def getClass():
return FictionAlleyOrgSiteAdapter

View file

@ -1,6 +1,6 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# Copyright 2019 FanFicFare team # Copyright 2022 FanFicFare team
# #
# Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License. # you may not use this file except in compliance with the License.
@ -26,6 +26,93 @@ from ..htmlcleanup import stripHTML
from .base_adapter import BaseSiteAdapter, makeDate from .base_adapter import BaseSiteAdapter, makeDate
ampfandoms = ["A Falcone & Driscoll Investigation",
"Alias Smith & Jones",
"Atelier Escha & Logy",
"Austin & Ally",
"Baby & Me/赤ちゃんと僕",
"Barney & Friends",
"Between Love & Goodbye",
"Beyond Good & Evil",
"Bill & Ted's Excellent Adventure/Bogus Journey",
"BLACK & WHITE",
"Bonnie & Clyde",
"Brandy & Mr. Whiskers",
"Brothers & Sisters",
"Bucket & Skinner's Epic Adventures",
"Calvin & Hobbes",
"Cats & Dogs",
"Command & Conquer",
"Devil & Devil",
"Dharma & Greg",
"Dicky & Dawn",
"Drake & Josh",
"Edgar & Ellen",
"Franklin & Bash",
"Gabby Duran & The Unsittables",
"Girls und Panzer/ガールズ&パンツァー",
"Gnomeo & Juliet",
"Grim Adventures of Billy & Mandy",
"Half & Half/ハーフ・アンド・ハーフ",
"Hansel & Gretel",
"Hatfields & McCoys",
"High & Low - The Story of S.W.O.R.D.",
"Home & Away",
"Hudson & Rex",
"Huntik: Secrets & Seekers",
"Imagine Me & You",
"Jekyll & Hyde",
"Jonathan Strange & Mr. Norrell",
"Knight's & Magic/ナイツ&マジック",
"Law & Order: Los Angeles",
"Law & Order: Organized Crime",
"Lilo & Stitch",
"Locke & Key",
"Lockwood & Co.",
"Lost & Found Music Studios",
"Lu & Og",
"Me & My Brothers",
"Melissa & Joey",
"Mickey Mouse & Friends",
"Mike & Molly",
"Mike, Lu & Og",
"Miraculous: Tales of Ladybug & Cat Noir",
"Mork & Mindy",
"Mount&Blade",
"Mr. & Mrs. Smith",
"Mr. Peabody & Sherman",
"Muhyo & Roji",
"Nicky, Ricky, Dicky & Dawn",
"Oliver & Company",
"Ozzy & Drix",
"Panty & Stocking with Garterbelt/パンティストッキングwithガーターベルト",
"Penryn & the End of Days",
"Prep & Landing",
"Prince & Hero/王子とヒーロー",
"Prince & Me",
"Puzzle & Dragons",
"Ren & Stimpy Show",
"Rizzoli & Isles",
"Romeo & Juliet",
"Rosemary & Thyme",
"Sam & Cat",
"Sam & Max",
"Sapphire & Steel",
"Scott & Bailey",
"Shakespeare & Hathaway: Private Investigators",
"Soul Nomad & the World Eaters",
"Superman & Lois",
"Tiger & Bunny/タイガー&バニー",
"Trains & Automobiles",
"Upin & Ipin",
"Wallace & Gromit",
"Witch & Wizard",
"Wolverine & the X-Men",
"Yotsuba&!/よつばと!",
"Young & Hungry",
]
class FictionHuntComSiteAdapter(BaseSiteAdapter): class FictionHuntComSiteAdapter(BaseSiteAdapter):
def __init__(self, config, url): def __init__(self, config, url):
@ -57,7 +144,7 @@ class FictionHuntComSiteAdapter(BaseSiteAdapter):
# The date format will vary from site to site. # The date format will vary from site to site.
# http://docs.python.org/library/datetime.html#strftime-strptime-behavior # http://docs.python.org/library/datetime.html#strftime-strptime-behavior
self.dateformat = "%d %b %Y" self.dateformat = "%Y-%m-%d %H:%M:%S"
@staticmethod @staticmethod
def getSiteDomain(): def getSiteDomain():
@ -123,9 +210,11 @@ class FictionHuntComSiteAdapter(BaseSiteAdapter):
url = self.url url = self.url
data = self.get_request(url) data = self.get_request(url)
if self.needToLoginCheck(data): ## As per #784, site isn't requiring login anymore.
self.performLogin(url) ## Login check commented since we've seen it toggle before.
data = self.get_request(url,usecache=False) # if self.needToLoginCheck(data):
# self.performLogin(url)
# data = self.get_request(url,usecache=False)
soup = self.make_soup(data) soup = self.make_soup(data)
## detect old storyUrl, switch to new storyUrl: ## detect old storyUrl, switch to new storyUrl:
@ -143,9 +232,10 @@ class FictionHuntComSiteAdapter(BaseSiteAdapter):
self._setURL(soup.select_one("div.Story__details a")['href']) self._setURL(soup.select_one("div.Story__details a")['href'])
url = self.url url = self.url
# logger.debug(data)
self.story.setMetadata('title',stripHTML(soup.find('h1',{'class':'Story__title'}))) self.story.setMetadata('title',stripHTML(soup.find('h1',{'class':'Story__title'})))
summhead = soup.find('h5',text='Summary') summhead = soup.find('h5',string='Summary')
self.setDescription(url,summhead.find_next('div')) self.setDescription(url,summhead.find_next('div'))
## author: ## author:
@ -154,42 +244,43 @@ class FictionHuntComSiteAdapter(BaseSiteAdapter):
self.story.setMetadata('authorUrl',autha['href']) self.story.setMetadata('authorUrl',autha['href'])
self.story.setMetadata('author',autha.string) self.story.setMetadata('author',autha.string)
updlab = soup.find('label',string='Last Updated:')
if updlab:
update = updlab.find_next('time')['datetime']
self.story.setMetadata('dateUpdated', makeDate(update, self.dateformat))
publab = soup.find('label',string='Published:')
if publab:
pubdate = publab.find_next('time')['datetime']
self.story.setMetadata('datePublished', makeDate(pubdate, self.dateformat))
## need author page for some metadata. ## need author page for some metadata.
authsoup = None authsoup = None
authpagea = autha authpagea = autha
authstorya = None authstorya = None
## Rating and exact word count doesn't appear on the summary
## page, try to get from author page.
## find story url, might need to spin through author's pages. ## find story url, might need to spin through author's pages.
while authpagea and not authstorya: while authpagea and not authstorya:
logger.debug(authpagea)
authsoup = self.make_soup(self.get_request(authpagea['href'])) authsoup = self.make_soup(self.get_request(authpagea['href']))
authpagea = authsoup.find('a',{'class':'page-link','rel':'next'}) authpagea = authsoup.find('a',{'rel':'next'})
# CSS selectors don't allow : or / unquoted, which # CSS selectors don't allow : or / unquoted, which
# BS4(and dependencies) didn't used to enforce. # BS4(and dependencies) didn't used to enforce.
authstorya = authsoup.select('h4.Story__item-title a[href="%s"]'%self.url) authstorya = authsoup.select_one('h4.Story__item-title a[href="%s"]'%self.url)
if not authstorya: if not authstorya:
raise exceptions.FailedToDownload("Error finding %s on author page(s)" % self.url) raise exceptions.FailedToDownload("Error finding %s on author page(s)" % self.url)
meta = authstorya[0].parent.parent.select("div.Story__meta-info")[0] meta = authstorya.find_parent('li').find('div',class_='Story__meta-info')
## remove delimiters
for span in authstorya[0].parent.parent.select("div.Story__meta-info span.delimiter"):
span.extract()
meta.find('span').extract() # discard author link
update = stripHTML(meta.find('span').extract()).split(':')[1].strip()
self.story.setMetadata('dateUpdated', makeDate(update, self.dateformat))
pubdate = stripHTML(meta.find('span').extract()).split(':')[1].strip()
self.story.setMetadata('datePublished', makeDate(pubdate, self.dateformat))
meta=meta.text.split() meta=meta.text.split()
self.story.setMetadata('numWords',meta[meta.index('words')-1]) self.story.setMetadata('numWords',meta[meta.index('words')-1])
self.story.setMetadata('rating',meta[meta.index('Rating:')+1]) self.story.setMetadata('rating',meta[meta.index('Rating:')+1])
# logger.debug(meta) # logger.debug(meta)
# Find original ffnet URL # Find original ffnet URL
a = soup.find('a', text="Source") a = soup.find('a', string="Source")
self.story.setMetadata('origin',stripHTML(a)) self.story.setMetadata('origin',stripHTML(a))
self.story.setMetadata('originUrl',a['href']) self.story.setMetadata('originUrl',a['href'])
@ -208,8 +299,30 @@ class FictionHuntComSiteAdapter(BaseSiteAdapter):
for a in soup.select('a[href*="pairings="]'): for a in soup.select('a[href*="pairings="]'):
self.story.addToList('ships',stripHTML(a).replace("+","/")) self.story.addToList('ships',stripHTML(a).replace("+","/"))
for chapa in soup.select('ul.StoryContents__chapters a'): for a in soup.select('div.Story__type a[href*="fandoms="]'):
self.add_chapter(stripHTML(chapa.find('span',{'class':'chapter-title'})),chapa['href']) # logger.debug(a)
fandomstr=stripHTML(a).replace(' Fanfiction','').strip()
# logger.debug("'%s'"%fandomstr)
## haven't thought of a better way to detect and *not*
## split on fandoms with a '&' in them.
for ampfandom in ampfandoms:
if ampfandom in fandomstr:
self.story.addToList('category',ampfandom)
fandomstr = fandomstr.replace(ampfandom,'')
for fandom in fandomstr.split('&'):
if fandom:
self.story.addToList('category',fandom)
## Currently no 'Original' stories on the site, but does list
## it as a search type. Set extratags: and uncomment this if
## and when.
# if self.story.getList('category'):
# self.story.addToList('category', 'FanFiction')
# else:
# self.story.addToList('category', 'Original')
for chapli in soup.select('ul.StoryContents__chapters li'):
self.add_chapter(stripHTML(chapli.select_one('span.chapter-title')),chapli.select_one('a')['href'])
if self.num_chapters() == 0: if self.num_chapters() == 0:
raise exceptions.FailedToDownload("Story at %s has no chapters." % self.url) raise exceptions.FailedToDownload("Story at %s has no chapters." % self.url)

View file

@ -27,6 +27,7 @@
# per-user achivement tracking with fancy achievement-get animations # per-user achivement tracking with fancy achievement-get animations
# story scripting (shows script tags visible in the text, not computed values or input fields) # story scripting (shows script tags visible in the text, not computed values or input fields)
import re
import json import json
from datetime import datetime from datetime import datetime
@ -35,6 +36,8 @@ import itertools
import logging import logging
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
# __package__ = 'fanficfare.adapters' # fixes dev issues with unknown package base
from .base_adapter import BaseSiteAdapter from .base_adapter import BaseSiteAdapter
from ..htmlcleanup import stripHTML from ..htmlcleanup import stripHTML
from .. import exceptions as exceptions from .. import exceptions as exceptions
@ -52,6 +55,8 @@ class FictionLiveAdapter(BaseSiteAdapter):
self.story_id = self.parsedUrl.path.split('/')[3] self.story_id = self.parsedUrl.path.split('/')[3]
self.story.setMetadata('storyId', self.story_id) self.story.setMetadata('storyId', self.story_id)
self.chapter_id_to_api = {}
# normalize URL. omits title in the url # normalize URL. omits title in the url
self._setURL("https://fiction.live/stories//{s_id}".format(s_id = self.story_id)); self._setURL("https://fiction.live/stories//{s_id}".format(s_id = self.story_id));
@ -65,7 +70,7 @@ class FictionLiveAdapter(BaseSiteAdapter):
def getSiteURLPattern(self): def getSiteURLPattern(self):
# I'd like to thank regex101.com for helping me screw this up less # I'd like to thank regex101.com for helping me screw this up less
return r"https?://(beta\.)?fiction\.live/[^/]*/[^/]*/([a-zA-Z0-9\-]+)(/(home)?)?" return r"https?://(beta\.)?fiction\.live/[^/]*/[^/]*/([a-zA-Z0-9\-]+)(/(home)?)?$"
@classmethod @classmethod
def getSiteExampleURLs(cls): def getSiteExampleURLs(cls):
@ -74,11 +79,29 @@ class FictionLiveAdapter(BaseSiteAdapter):
+"https://fiction.live/Sci-fi/Example-Story-With-URL-Genre/17CharacterIDhere/ " +"https://fiction.live/Sci-fi/Example-Story-With-URL-Genre/17CharacterIDhere/ "
+"https://fiction.live/stories/Example-Story-With-UUID/00000000-0000-4000-0000-000000000000/") +"https://fiction.live/stories/Example-Story-With-UUID/00000000-0000-4000-0000-000000000000/")
@classmethod
def get_section_url(cls,url):
## minimal URL used for section names in INI and reject list
## for comparison
# logger.debug("pre--url:%s"%url)
url = re.sub(r"https?://(beta\.)?fiction\.live/[^/]*/[^/]*/(?P<id>[a-zA-Z0-9\-]+)(/(home)?)?$",r'https://fiction.live/stories//\g<id>',url)
# logger.debug("post-url:%s"%url)
return url
def parse_timestamp(self, timestamp): def parse_timestamp(self, timestamp):
# fiction.live date format is unix-epoch milliseconds. not a good fit for fanficfare's makeDate. # fiction.live date format is unix-epoch milliseconds. not a good fit for fanficfare's makeDate.
# doesn't use a timezone object and returns tz-naive datetimes. I *think* I can leave the rest to fanficfare # doesn't use a timezone object and returns tz-naive datetimes. I *think* I can leave the rest to fanficfare
return datetime.fromtimestamp(timestamp / 1000.0, None) return datetime.fromtimestamp(timestamp / 1000.0, None)
def img_url_trans(self,imgurl):
"Apparently site changed cdn URLs for images more than once."
# logger.debug("pre--imgurl:%s"%imgurl)
imgurl = re.sub(r'(\w+)\.cloudfront\.net',r'cdn6.fiction.live/file/fictionlive',imgurl)
imgurl = re.sub(r'www\.filepicker\.io/api/file/(\w+)',r'cdn4.fiction.live/fp/\1',imgurl)
imgurl = re.sub(r'cdn[34].fiction.live/(.+)',r'cdn6.fiction.live/file/fictionlive/\1',imgurl)
# logger.debug("post-imgurl:%s"%imgurl)
return imgurl
def doExtractChapterUrlsAndMetadata(self, get_cover=True): def doExtractChapterUrlsAndMetadata(self, get_cover=True):
metadata_url = "https://fiction.live/api/node/{s_id}/" metadata_url = "https://fiction.live/api/node/{s_id}/"
@ -150,7 +173,7 @@ class FictionLiveAdapter(BaseSiteAdapter):
tags = data['ta'] if 'ta' in data else [] tags = data['ta'] if 'ta' in data else []
if (self.story.getMetadata('rating') in {"nsfw", "adult"} or 'smut' in tags) and \ if (self.story.getMetadataRaw('rating') in {"nsfw", "adult"} or 'smut' in tags) and \
not (self.is_adult or self.getConfig("is_adult")): not (self.is_adult or self.getConfig("is_adult")):
raise exceptions.AdultCheckRequired(self.url) raise exceptions.AdultCheckRequired(self.url)
@ -186,7 +209,6 @@ class FictionLiveAdapter(BaseSiteAdapter):
if show_nsfw_cover_images or not nsfw_cover: if show_nsfw_cover_images or not nsfw_cover:
coverUrl = data['i'][0] coverUrl = data['i'][0]
self.setCoverImage(self.url, coverUrl) self.setCoverImage(self.url, coverUrl)
self.story.setMetadata('cover_image', "<a href=\"" + coverUrl + "\" />") # TODO: is this needed?
# gonna need these later for adding details to achievement-granting links in the text # gonna need these later for adding details to achievement-granting links in the text
try: try:
@ -219,6 +241,17 @@ class FictionLiveAdapter(BaseSiteAdapter):
a, b = itertools.tee(iterable, 2) a, b = itertools.tee(iterable, 2)
next(b, None) next(b, None)
return list(zip(a, b)) return list(zip(a, b))
def map_chap_ids_to_api(chapter_ids, route_ids, times):
for index, bounds in enumerate(times):
start, end = bounds
end -= 1
chapter_url = chunkrange_url.format(s_id = data['_id'], start = start, end = end)
self.chapter_id_to_api[chapter_ids[index]] = chapter_url
for route_id in route_ids:
chapter_url = route_chunkrange_url.format(c_id = route_id)
self.chapter_id_to_api[route_id] = chapter_url
## first thing to do is seperate out the appendices ## first thing to do is seperate out the appendices
appendices, maintext, routes = [], [], [] appendices, maintext, routes = [], [], []
@ -240,22 +273,25 @@ class FictionLiveAdapter(BaseSiteAdapter):
## main-text chapter extraction processing. *should* now handle all the edge cases. ## main-text chapter extraction processing. *should* now handle all the edge cases.
## relies on fanficfare ignoring empty chapters! ## relies on fanficfare ignoring empty chapters!
titles = [c['title'] for c in maintext] titles = ["Home"] + [c['title'] for c in maintext]
titles = ["Home"] + titles chapter_ids = ['home'] + [c['id'] for c in maintext]
times = [data['ct']] + [c['ct'] for c in maintext] + [self.most_recent_chunk + 2] # need to be 1 over, and add_url etc does -1
times = pair(times)
times = [c['ct'] for c in maintext] if self.getConfig('include_appendices', True): # Add appendices after main text if desired
times = [data['ct']] + times + [self.most_recent_chunk + 2] # need to be 1 over, and add_url etc does -1 titles = titles + ["Appendix: " + a['title'][9:] for a in appendices]
chapter_ids = chapter_ids + [a['id'] for a in appendices]
times = times + [(a['ct'], a['ct'] + 2) for a in appendices]
route_ids = [r['id'] for r in routes]
map_chap_ids_to_api(chapter_ids, route_ids, times) # Map chapter ids to API URLs for use when comparing the two
# doesn't actually run without the call to list. # doesn't actually run without the call to list.
list(map(add_chapter_url, titles, pair(times))) list(map(add_chapter_url, titles, times))
for a in appendices: # add appendices afterwards
chapter_start = a['ct']
chapter_title = "Appendix: " + a['title'][9:] # 'Appendix: ' rather than '#special' at beginning of name
add_chapter_url(chapter_title, (chapter_start, chapter_start + 2)) # 1 msec range = this one chunk only
for r in routes: # add route at the end, after appendices for r in routes: # add route at the end, after appendices
route_id = r['id'] # to get route chapter content, the route id is needed, not the timestamp route_id = r['id'] # to get route chapter content, the route id is needed, not the timestamp
chapter_title = "Route: " + r['title'] # 'Route: ' at beginning of name, since it's a multiroute chapter chapter_title = "Route: " + r['title'] # 'Route: ' at beginning of name, since it's a multiroute chapter
add_route_chapter_url(chapter_title, route_id) add_route_chapter_url(chapter_title, route_id)
@ -285,7 +321,7 @@ class FictionLiveAdapter(BaseSiteAdapter):
text += "<div>" # chapter chunks aren't always well-delimited in their contents text += "<div>" # chapter chunks aren't always well-delimited in their contents
# appendix chunks are mixed in with other things # appendix chunks are mixed in with other things
if not getting_appendix and 't' in chunk and chunk['t'].startswith("#special"): # t = title = bookmark if not getting_appendix and 't' in chunk and chunk['t'].startswith("#special"): # t = title = bookmark
continue continue
@ -300,7 +336,8 @@ class FictionLiveAdapter(BaseSiteAdapter):
text += "</div><br />\n" text += "</div><br />\n"
return text ## soup to repair the most egregious HTML errors.
return self.utf8FromSoup(url,self.make_soup(text))
### everything from here out is chunk data handling. ### everything from here out is chunk data handling.
@ -317,8 +354,7 @@ class FictionLiveAdapter(BaseSiteAdapter):
if self.achievements: if self.achievements:
soup = self.append_achievments(soup) soup = self.append_achievments(soup)
# utf8FromSoup does important processing e.g. sanitization and imageurl extraction return str(soup)
return self.utf8FromSoup(self.url, soup)
def add_spoiler_legends(self, soup): def add_spoiler_legends(self, soup):
# find spoiler links and change link-anchor block to legend block # find spoiler links and change link-anchor block to legend block
@ -398,7 +434,7 @@ class FictionLiveAdapter(BaseSiteAdapter):
# so let's just ignore non-int values here # so let's just ignore non-int values here
if not isinstance(v, int): if not isinstance(v, int):
continue continue
if 0 <= v <= len(choices): if 0 <= v < len(choices):
output[v] += 1 output[v] += 1
return output return output
@ -482,8 +518,10 @@ class FictionLiveAdapter(BaseSiteAdapter):
# now matches the site and does *not* include dicerolls as posts! # now matches the site and does *not* include dicerolls as posts!
num_votes = str(len(posts)) + " posts" if len(posts) != 0 else "be the first to post." num_votes = str(len(posts)) + " posts" if len(posts) != 0 else "be the first to post."
posts_title = chunk['b'] if 'b' in chunk else "Reader Posts"
output = "" output = ""
output += u"<h4><span>Reader Posts — <small> Posting " + closed output += u"<h4><span>" + posts_title + " — <small> Posting " + closed
output += u"" + num_votes + "</small></span></h4>\n" output += u"" + num_votes + "</small></span></h4>\n"
## so. a voter can roll with their post. these rolls are in a seperate dict, but have the **same uid**. ## so. a voter can roll with their post. these rolls are in a seperate dict, but have the **same uid**.
@ -509,6 +547,35 @@ class FictionLiveAdapter(BaseSiteAdapter):
return output return output
def normalize_chapterurl(self, url):
if url.startswith(r'https://fiction.live/api/anonkun/chapters'):
return url
pattern = None
if url.startswith(r'https://fiction.live/api/anonkun/route'):
pattern = r"https?://(?:beta\.)?fiction\.live/[^/]*/[^/]*/[a-zA-Z0-9]+/routes/([a-zA-Z0-9]+)"
elif url.startswith(r'https://fiction.live/'):
pattern = r"https?://(?:beta\.)?fiction\.live/[^/]*/[^/]*/[a-zA-Z0-9]+/[^/]*(/[a-zA-Z0-9]+|home)"
# regex101 rocks
if not pattern:
return url
match = re.match(pattern, url)
if not match:
return url
chapter_id = match.group(1)
if chapter_id.startswith('/'):
chapter_id = chapter_id[1:]
if chapter_id and chapter_id in self.chapter_id_to_api:
return self.chapter_id_to_api[chapter_id]
return url
def format_unknown(self, chunk): def format_unknown(self, chunk):
raise NotImplementedError("Unknown chunk type ({}) in fiction.live story.".format(chunk)) raise NotImplementedError("Unknown chunk type ({}) in fiction.live story.".format(chunk))
@ -523,5 +590,5 @@ class FictionLiveAdapter(BaseSiteAdapter):
# TODO: support chapter urls for single-chapter / chapter-range downloads # TODO: support chapter urls for single-chapter / chapter-range downloads
# complicated -- urls for getChapterText are API urls generated by add_chapters, not the public/website ones # complicated -- urls for getChapterText are API urls generated by add_chapters, not the public/website ones
# in particular, may need more API reversing to figure out how to get the *end* of the chunk range # in particular, may need more API reversing to figure out how to get the *end* of the chunk range
# find in 'bm' in the metadata? # find in 'bm' in the metadata?

View file

@ -40,10 +40,6 @@ class FictionManiaTVAdapter(BaseSiteAdapter):
self._setURL(self.READ_TEXT_STORY_URL_TEMPLATE % story_id) self._setURL(self.READ_TEXT_STORY_URL_TEMPLATE % story_id)
self.story.setMetadata('siteabbrev', self.SITE_ABBREVIATION) self.story.setMetadata('siteabbrev', self.SITE_ABBREVIATION)
# Always single chapters, probably should use the Anthology feature to
# merge chapters of a story
self.story.setMetadata('numChapters', 1)
@staticmethod @staticmethod
def getSiteDomain(): def getSiteDomain():
return FictionManiaTVAdapter.SITE_DOMAIN return FictionManiaTVAdapter.SITE_DOMAIN
@ -110,7 +106,7 @@ class FictionManiaTVAdapter(BaseSiteAdapter):
self.story.setMetadata('rating', value) self.story.setMetadata('rating', value)
elif key == 'Complete': elif key == 'Complete':
self.story.setMetadata('status', 'Completed' if value == 'Complete' else 'In-Progress') self.story.setMetadata('status', 'Completed' if value == 'yes' else 'In-Progress')
elif key == 'Categories': elif key == 'Categories':
for element in cells[1]('a'): for element in cells[1]('a'):
@ -167,14 +163,30 @@ class FictionManiaTVAdapter(BaseSiteAdapter):
# <div style="margin-left:10ex;margin-right:10ex"> # <div style="margin-left:10ex;margin-right:10ex">
## fetching SWI version now instead of text. ## fetching SWI version now instead of text.
htmlurl = url.replace('readtextstory','readhtmlstory') htmlurl = url.replace('readtextstory','readhtmlstory')
soup = self.make_soup(self.get_request(htmlurl)) ## Used to find by style, but it's inconsistent now. we've seen:
div = soup.find('div',style="margin-left:10ex;margin-right:10ex") ## margin-left:10ex;margin-right:10ex
if div: ## margin-right: 5%; margin-left: 5%
return self.utf8FromSoup(htmlurl,div) ## margin-left:5%; margin-right:5%
else: ## margin-left:5%; margin-right:5%; background: white
## And there's some without a <div> tag (or an unclosed div)
## Only the comments appear to be consistent.
beginmarker='<!--Read or display the file-->'
endmarker='''<hr size=1 noshade>
<!--review add read, top and bottom-->
'''
data = self.get_request(htmlurl)
try:
## if both markers are found, assume whatever is in between
## is the chapter text.
soup = self.make_soup(data[data.index(beginmarker):data.index(endmarker)])
return self.utf8FromSoup(htmlurl,soup)
except Exception as e:
# logger.debug(e)
# logger.debug(soup)
logger.debug("Story With Images(SWI) not found, falling back to HTML.") logger.debug("Story With Images(SWI) not found, falling back to HTML.")
## fetching html version now instead of text. ## fetching html version now instead of text.
## Note that html and SWI pages are *not* formatted the same.
soup = self.make_soup(self.get_request(url.replace('readtextstory','readxstory'))) soup = self.make_soup(self.get_request(url.replace('readtextstory','readxstory')))
# logger.debug(soup) # logger.debug(soup)

View file

@ -18,6 +18,7 @@
from __future__ import absolute_import from __future__ import absolute_import
import logging import logging
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
import re
# py2 vs py3 transition # py2 vs py3 transition
@ -46,6 +47,12 @@ class FictionPressComSiteAdapter(FanFictionNetSiteAdapter):
def _get_site_url_pattern(cls): def _get_site_url_pattern(cls):
return r"https?://(www|m)?\.fictionpress\.com/s/(?P<id>\d+)(/\d+)?(/(?P<title>[^/]+))?/?$" return r"https?://(www|m)?\.fictionpress\.com/s/(?P<id>\d+)(/\d+)?(/(?P<title>[^/]+))?/?$"
## normalized chapter URLs DO contain the story title now, but
## normalized to current urltitle in case of title changes.
def normalize_chapterurl(self,url):
return re.sub(r"https?://(www|m)\.(?P<keep>fictionpress\.com/s/\d+/\d+/).*",
r"https://www.\g<keep>",url)+self.urltitle
def getClass(): def getClass():
return FictionPressComSiteAdapter return FictionPressComSiteAdapter

View file

@ -66,7 +66,8 @@ class FicwadComSiteAdapter(BaseSiteAdapter):
params['username'])) params['username']))
d = self.post_request(loginUrl,params,usecache=False) d = self.post_request(loginUrl,params,usecache=False)
if "Login attempt failed..." in d: if "Login attempt failed..." in d or \
'<div id="error">Please enter your username and password.</div>' in d:
logger.info("Failed to login to URL %s as %s" % (loginUrl, logger.info("Failed to login to URL %s as %s" % (loginUrl,
params['username'])) params['username']))
raise exceptions.FailedToLogin(url,params['username']) raise exceptions.FailedToLogin(url,params['username'])
@ -114,7 +115,7 @@ class FicwadComSiteAdapter(BaseSiteAdapter):
titleh4 = soup.find('div',{'class':'storylist'}).find('h4') titleh4 = soup.find('div',{'class':'storylist'}).find('h4')
self.story.setMetadata('title', stripHTML(titleh4.a)) self.story.setMetadata('title', stripHTML(titleh4.a))
if 'Deleted story' in self.story.getMetadata('title'): if 'Deleted story' in self.story.getMetadataRaw('title'):
raise exceptions.StoryDoesNotExist("This story was deleted. %s"%self.url) raise exceptions.StoryDoesNotExist("This story was deleted. %s"%self.url)
# Find authorid and URL from... author url. # Find authorid and URL from... author url.
@ -129,14 +130,14 @@ class FicwadComSiteAdapter(BaseSiteAdapter):
#self.story.setMetadata('description', storydiv.find("blockquote",{'class':'summary'}).p.string) #self.story.setMetadata('description', storydiv.find("blockquote",{'class':'summary'}).p.string)
# most of the meta data is here: # most of the meta data is here:
metap = storydiv.find("p",{"class":"meta"}) metap = storydiv.find("div",{"class":"meta"})
self.story.addToList('category',metap.find("a",href=re.compile(r"^/category/\d+")).string) self.story.addToList('category',metap.find("a",href=re.compile(r"^/category/\d+")).string)
# warnings # warnings
# <span class="req"><a href="/help/38" title="Medium Spoilers">[!!] </a> <a href="/help/38" title="Rape/Sexual Violence">[R] </a> <a href="/help/38" title="Violence">[V] </a> <a href="/help/38" title="Child/Underage Sex">[Y] </a></span> # <span class="req"><a href="/help/38" title="Medium Spoilers">[!!] </a> <a href="/help/38" title="Rape/Sexual Violence">[R] </a> <a href="/help/38" title="Violence">[V] </a> <a href="/help/38" title="Child/Underage Sex">[Y] </a></span>
spanreq = metap.find("span",{"class":"story-warnings"}) spanreq = metap.find("span",{"class":"story-warnings"})
if spanreq: # can be no warnings. if spanreq: # can be no warnings.
for a in spanreq.findAll("a"): for a in spanreq.find_all("a"):
self.story.addToList('warnings',a['title']) self.story.addToList('warnings',a['title'])
## perhaps not the most efficient way to parse this, using ## perhaps not the most efficient way to parse this, using
@ -186,7 +187,7 @@ class FicwadComSiteAdapter(BaseSiteAdapter):
# no list found, so it's a one-chapter story. # no list found, so it's a one-chapter story.
self.add_chapter(self.story.getMetadata('title'),url) self.add_chapter(self.story.getMetadata('title'),url)
else: else:
chapterlistlis = storylistul.findAll('li') chapterlistlis = storylistul.find_all('li')
for chapterli in chapterlistlis: for chapterli in chapterlistlis:
if "blocked" in chapterli['class']: if "blocked" in chapterli['class']:
# paranoia check. We should already be logged in by now. # paranoia check. We should already be logged in by now.

View file

@ -99,6 +99,17 @@ class FimFictionNetSiteAdapter(BaseSiteAdapter):
params['username'])) params['username']))
raise exceptions.FailedToLogin(url,params['username']) raise exceptions.FailedToLogin(url,params['username'])
def make_soup(self,data):
soup = super(FimFictionNetSiteAdapter, self).make_soup(data)
for img in soup.select('img.lazy-img, img.user_image'):
## FimF has started a 'camo' mechanism for images that
## gets block by CF. attr data-source is original source.
if img.has_attr('data-source'):
img['src'] = img['data-source']
elif img.has_attr('data-src'):
img['src'] = img['data-src']
return soup
def doExtractChapterUrlsAndMetadata(self,get_cover=True): def doExtractChapterUrlsAndMetadata(self,get_cover=True):
if self.is_adult or self.getConfig("is_adult"): if self.is_adult or self.getConfig("is_adult"):
@ -106,7 +117,8 @@ class FimFictionNetSiteAdapter(BaseSiteAdapter):
## Only needed with password protected stories, which you have ## Only needed with password protected stories, which you have
## to have logged into in the website using this account. ## to have logged into in the website using this account.
self.performLogin(self.url) if self.getConfig("always_login"):
self.performLogin(self.url)
##--------------------------------------------------------------------------------------------------- ##---------------------------------------------------------------------------------------------------
## Get the story's title page. Check if it exists. ## Get the story's title page. Check if it exists.
@ -139,7 +151,8 @@ class FimFictionNetSiteAdapter(BaseSiteAdapter):
self.story.setMetadata("authorId", author['href'].split('/')[2]) self.story.setMetadata("authorId", author['href'].split('/')[2])
self.story.setMetadata("authorUrl", "https://%s/user/%s/%s" % (self.getSiteDomain(), self.story.setMetadata("authorUrl", "https://%s/user/%s/%s" % (self.getSiteDomain(),
self.story.getMetadata('authorId'), self.story.getMetadata('authorId'),
self.story.getMetadata('author'))) # meta entry author can be changed by the user.
stripHTML(author)))
#Rating text is replaced with full words for historical compatibility after the site changed #Rating text is replaced with full words for historical compatibility after the site changed
#on 2014-10-27 #on 2014-10-27
@ -167,12 +180,13 @@ class FimFictionNetSiteAdapter(BaseSiteAdapter):
# Cover image # Cover image
if get_cover: if get_cover:
storyImage = storyContentBox.find('img', {'class':'lazy-img'}) storyImage = soup.select_one('div.story_container__story_image img')
if storyImage: if storyImage:
coverurl = storyImage['data-fullsize'] coverurl = storyImage['data-fullsize']
# try setting from data-fullsize, if fails, try using data-src # try setting from data-fullsize, if fails, try using data-src
if self.setCoverImage(self.url,coverurl)[0] == "failedtoload": cover_set = self.setCoverImage(self.url,coverurl)[0]
coverurl = storyImage['data-src'] if not cover_set or cover_set.startswith("failedtoload"):
coverurl = storyImage['src']
self.setCoverImage(self.url,coverurl) self.setCoverImage(self.url,coverurl)
coverSource = storyImage.parent.find('a', {'class':'source'}) coverSource = storyImage.parent.find('a', {'class':'source'})
@ -284,16 +298,26 @@ class FimFictionNetSiteAdapter(BaseSiteAdapter):
descriptionMeta = soup.find('meta', {'property':'og:description'}) descriptionMeta = soup.find('meta', {'property':'og:description'})
self.story.setMetadata("short_description", stripHTML(descriptionMeta['content'])) self.story.setMetadata("short_description", stripHTML(descriptionMeta['content']))
#groups # groups.
# If there are more than X groups, there's a 'Show all' button
# that calls for a JSON containing HTML with the full list.
# But it doesn't work reliably with FlareSolverr.
groupList = None
groupButton = soup.find('button', {'data-click':'showAll'}) groupButton = soup.find('button', {'data-click':'showAll'})
if groupButton != None and groupButton.find('i', {'class':'fa-search-plus'}): if groupButton != None and groupButton.find('i', {'class':'fa-search-plus'}):
groupResponse = self.get_request("https://www.fimfiction.net/ajax/stories/%s/groups" % (self.story.getMetadata("storyId"))) try:
groupData = json.loads(groupResponse) groupResponse = self.get_request("https://www.fimfiction.net/ajax/stories/%s/groups" % (self.story.getMetadata("storyId")))
groupList = self.make_soup(groupData["content"]) groupData = json.loads(groupResponse)
else: groupList = self.make_soup(groupData["content"])
except Exception as e:
logger.warning("Collecting 'groups' (AKA 'Featured In') from JSON failed:%s"%e)
logger.warning("Only 'groups' initially shown on the page will be collected.")
logger.warning("This is a known issue with JSON and FlareSolverr. See #1122")
if not groupList:
groupList = soup.find('ul', {'id':'story-groups-list'}) groupList = soup.find('ul', {'id':'story-groups-list'})
if not (groupList == None): if groupList:
for groupContent in groupList.find_all('a'): for groupContent in groupList.find_all('a'):
self.story.addToList("groupsUrl", 'https://'+self.host+groupContent["href"]) self.story.addToList("groupsUrl", 'https://'+self.host+groupContent["href"])
groupName = groupContent.find('span', {"class":"group-name"}) groupName = groupContent.find('span', {"class":"group-name"})
@ -304,7 +328,7 @@ class FimFictionNetSiteAdapter(BaseSiteAdapter):
#sequels #sequels
for header in soup.find_all('h1', {'class':'header-stories'}): for header in soup.find_all('h1', {'class':'header-stories'}):
# I don't know why using text=re.compile with find() wouldn't work, but it didn't. # I don't know why using string=re.compile with find() wouldn't work, but it didn't.
if header.text.startswith('Sequels'): if header.text.startswith('Sequels'):
sequelContainer = header.parent sequelContainer = header.parent
for sequel in sequelContainer.find_all('a', {'class':'story_link'}): for sequel in sequelContainer.find_all('a', {'class':'story_link'}):
@ -384,3 +408,33 @@ class FimFictionNetSiteAdapter(BaseSiteAdapter):
# data = self.get_request(url) # data = self.get_request(url)
if self.getConfig("is_adult"): if self.getConfig("is_adult"):
self.set_adult_cookie() self.set_adult_cookie()
def get_urls_from_page(self,url,normalize):
iterate = self.getConfig('scrape_bookshelf', default=False)
if not re.search(r'fimfiction\.net/bookshelf/(?P<listid>.+?)/',url) or iterate == 'legacy':
return super().get_urls_from_page(url,normalize)
self.before_get_urls_from_page(url,normalize)
final_urls = list()
while True:
data = self.get_request(url,usecache=True)
soup = self.make_soup(data)
paginator = soup.select_one('div.paginator-container > div.page_list > ul').find_all('li')
logger.debug("Paginator: " + str(len(paginator)))
stories_container = soup.select_one('div.content > div.two-columns > div.left').find_all('article', recursive=False)
x = 0
logger.debug("Container "+str(len(stories_container)))
for story_raw in stories_container:
x += 1
story_url = story_raw.select_one('div.story_content_box > header.title > div > a.story_name').get('href')
url_story = ('https://' + self.getSiteDomain() + story_url)
#logger.debug(url_story)
final_urls.append(url_story)
logger.debug("Discovered %s new stories."%str(x))
next_button = paginator[-1].select_one('a')
logger.debug("Next button: " + next_button.get_text())
if next_button.get_text() or not iterate:
return {'urllist': final_urls}
url = ('https://' + self.getSiteDomain() + next_button.get('href'))

View file

@ -93,6 +93,9 @@ class FireFlyFansNetSiteAdapter(BaseSiteAdapter):
a = soup.find('a', href=re.compile(r"profileshow.aspx\?u=")) a = soup.find('a', href=re.compile(r"profileshow.aspx\?u="))
self.story.setMetadata('authorId', a['href'].split('=')[1]) self.story.setMetadata('authorId', a['href'].split('=')[1])
if not self.story.getMetadata('authorId'):
logger.warning("Site authorUrl missing authorId, using SiteMissingAuthorId")
self.story.setMetadata('authorId', 'SiteMissingAuthorId')
self.story.setMetadata('authorUrl', 'http://' + self.story.setMetadata('authorUrl', 'http://' +
self.host + '/' + a['href']) self.host + '/' + a['href'])
self.story.setMetadata('author', a.string) self.story.setMetadata('author', a.string)
@ -102,7 +105,6 @@ class FireFlyFansNetSiteAdapter(BaseSiteAdapter):
# to download them one at a time yourself. I'm also setting the status to # to download them one at a time yourself. I'm also setting the status to
# complete # complete
self.add_chapter(self.story.getMetadata('title'), self.url) self.add_chapter(self.story.getMetadata('title'), self.url)
self.story.setMetadata('numChapters', 1)
self.story.setMetadata('status', 'Completed') self.story.setMetadata('status', 'Completed')
## some stories do not have a summary listed, so I'm setting it here. ## some stories do not have a summary listed, so I'm setting it here.

View file

@ -1,6 +1,6 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# Copyright 2018 FanFicFare team # Copyright 2024 FanFicFare team
# #
# Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License. # you may not use this file except in compliance with the License.
@ -18,15 +18,15 @@
from __future__ import absolute_import from __future__ import absolute_import
import re import re
from .base_xenforoforum_adapter import BaseXenForoForumAdapter from .base_xenforo2forum_adapter import BaseXenForo2ForumAdapter
def getClass(): def getClass():
return QuestionablequestingComAdapter return QuestionablequestingComAdapter
class QuestionablequestingComAdapter(BaseXenForoForumAdapter): class QuestionablequestingComAdapter(BaseXenForo2ForumAdapter):
def __init__(self, config, url): def __init__(self, config, url):
BaseXenForoForumAdapter.__init__(self, config, url) BaseXenForo2ForumAdapter.__init__(self, config, url)
# Each adapter needs to have a unique site abbreviation. # Each adapter needs to have a unique site abbreviation.
self.story.setMetadata('siteabbrev','qq') self.story.setMetadata('siteabbrev','qq')

View file

@ -1,169 +0,0 @@
# -*- coding: utf-8 -*-
# Copyright 2011 Fanficdownloader team, 2018 FanFicFare team
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
from __future__ import absolute_import
import logging
logger = logging.getLogger(__name__)
import re
from ..htmlcleanup import stripHTML
from .. import exceptions as exceptions
# py2 vs py3 transition
from .base_adapter import BaseSiteAdapter, makeDate
class HarryPotterFanFictionComSiteAdapter(BaseSiteAdapter):
def __init__(self, config, url):
BaseSiteAdapter.__init__(self, config, url)
self.story.setMetadata('siteabbrev','hp')
self.is_adult=False
# get storyId from url--url validation guarantees query is only psid=1234
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
# The date format will vary from site to site.
# http://docs.python.org/library/datetime.html#strftime-strptime-behavior
self.dateformat = "%Y-%m-%d %H:%M%p"
# normalized story URL.
self._setURL('https://' + self.getSiteDomain() + '/viewstory.php?psid='+self.story.getMetadata('storyId'))
@staticmethod
def getSiteDomain():
return 'harrypotterfanfiction.com'
@classmethod
def getSiteExampleURLs(cls):
return "https://harrypotterfanfiction.com/viewstory.php?psid=1234"
def getSiteURLPattern(self):
return r"https?"+re.escape("://")+r"(www\.)?"+re.escape("harrypotterfanfiction.com/viewstory.php?psid=")+r"\d+$"
def extractChapterUrlsAndMetadata(self):
url = self.url
if self.is_adult or self.getConfig("is_adult"):
url = url+'&showRestricted'
logger.debug("URL: "+url)
data = self.get_request(url)
if "This story may contain chapters not appropriate for a general audience." in data and not (self.is_adult or self.getConfig("is_adult")):
raise exceptions.AdultCheckRequired(self.url)
## Don't know if these still apply
# if "Access denied. This story has not been validated by the adminstrators of this site." in data:
# raise exceptions.AccessDenied(self.getSiteDomain() +" says: Access denied. This story has not been validated by the adminstrators of this site.")
# elif "ERROR locating story meta for psid" in data:
# raise exceptions.StoryDoesNotExist(self.url)
soup = self.make_soup(data)
## Title
h2 = soup.find('h2')
h2.find('i').extract() # remove author
self.story.setMetadata('title',stripHTML(h2))
# Find authorid and URL from... author url.
a = soup.find('a', href=re.compile(r"viewuser.php\?uid=\d+"))
self.story.setMetadata('authorId',a['href'].split('=')[1])
self.story.setMetadata('authorUrl','https://'+self.host+'/'+a['href'])
self.story.setMetadata('author',a.string[3:]) # remove 'by '
## hpcom doesn't always give us total words--but it does give
## us words/chapter. I'd rather add than fetch and parse
## another page.
chapter_words=0
for tr in soup.find('table',{'class':'table-chapters'}).find('tbody').findAll('tr'):
tdstr = tr.findAll('td')[2].string
chapter = tr.find('a')
chpt=re.sub(r'^.*?(\?chapterid=\d+).*?',r'\1',chapter['href'])
added = self.add_chapter(chapter,'https://'+self.host+'/viewstory.php'+chpt)
if added and tdstr and tdstr.isdigit():
chapter_words+=int(tdstr)
## used below if total words from site not found
# fetch author page to get story description.
authorsoup = self.make_soup(self.get_request(self.story.getMetadata('authorUrl')))
for story in authorsoup.find_all('article',class_='story-summary'):
storya = story.find('h3').find('a',href=re.compile(r"^/viewstory.php\?psid="+self.story.getMetadata('storyId')))
if storya:
storydiv = storya.find_parent('div')
break
desc = storydiv.find('div',class_='story-summary__summary')
self.setDescription(url,desc)
# <div class='entry'>
# <div class='entry__key'>Rating</div>
# <div class='entry__value'>Mature</div>
# </div>
meta_key_map = {
'Rating':'rating',
'Words':'numWords',
'Characters':'characters',
'Primary Relationship':'ships',
'Secondary Relationship(s)':'ships',
'Genre(s)':'genre',
'Era':'era',
'Advisory':'warnings',
'Story Reviews':'reviews',
# 'Status':'', # Status is treated special
'First Published':'datePublished',
'Last Updated':'dateUpdated',
}
for key in soup.find_all('div',{'class':'entry__key'}):
value = stripHTML(key.find_next('div',{'class':'entry__value'}))
key = stripHTML(key)
meta = meta_key_map.get(key,None)
if meta:
if meta.startswith('date'):
value = makeDate(value,self.dateformat)
if meta in ('characters','genre','ships'):
self.story.extendList(meta,value.split(','))
else:
self.story.setMetadata(meta,value)
if key == 'Status':
if value == 'WIP':
value = 'In-Progress'
elif value == 'COMPLETED':
value = 'Completed'
# 'Abandoned' and other possible values used as-is
self.story.setMetadata('status',value)
# older stories don't present total words, use sum from chapters.
if not self.story.getMetadata('numWords'):
self.story.setMetadata('numWords',chapter_words)
def getChapterText(self, url):
logger.debug('Getting chapter text from: %s' % url)
data = self.get_request(url)
soup = self.make_soup(data)
div = soup.find('div', {'class' : 'storytext-container'})
if None == div:
raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url)
return self.utf8FromSoup(url,div)
def getClass():
return HarryPotterFanFictionComSiteAdapter

View file

@ -1,216 +0,0 @@
# -*- coding: utf-8 -*-
# Copyright 2012 Fanficdownloader team, 2018 FanFicFare team
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Software: eFiction
from __future__ import absolute_import
import logging
logger = logging.getLogger(__name__)
import re
from ..htmlcleanup import stripHTML
from .. import exceptions as exceptions
# py2 vs py3 transition
from ..six import text_type as unicode
from .base_adapter import BaseSiteAdapter, makeDate
def getClass():
return HLFictionNetAdapter
# Class name has to be unique. Our convention is camel case the
# sitename with Adapter at the end. www is skipped.
class HLFictionNetAdapter(BaseSiteAdapter):
def __init__(self, config, url):
BaseSiteAdapter.__init__(self, config, url)
self.username = "NoneGiven" # if left empty, site doesn't return any message at all.
self.password = ""
self.is_adult=False
# get storyId from url--url validation guarantees query is only sid=1234
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
# normalized story URL.
self._setURL('https://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId'))
# Each adapter needs to have a unique site abbreviation.
self.story.setMetadata('siteabbrev','hlf')
# The date format will vary from site to site.
# http://docs.python.org/library/datetime.html#strftime-strptime-behavior
self.dateformat = "%m/%d/%y"
@staticmethod # must be @staticmethod, don't remove it.
def getSiteDomain():
# The site domain. Does have www here, if it uses it.
return 'hlfiction.net'
@classmethod
def getSiteExampleURLs(cls):
return "https://"+cls.getSiteDomain()+"/viewstory.php?sid=1234"
def getSiteURLPattern(self):
return r"https?://"+re.escape(self.getSiteDomain()+"/viewstory.php?sid=")+r"\d+$"
## Getting the chapter list and the meta data, plus 'is adult' checking.
def extractChapterUrlsAndMetadata(self):
# index=1 makes sure we see the story chapter index. Some
# sites skip that for one-chapter stories.
url = self.url
logger.debug("URL: "+url)
data = self.get_request(url)
if "Access denied. This story has not been validated by the adminstrators of this site." in data:
raise exceptions.AccessDenied(self.getSiteDomain() +" says: Access denied. This story has not been validated by the adminstrators of this site.")
soup = self.make_soup(data)
# print data
## Title and author
a = soup.find('div', {'id' : 'pagetitle'})
aut = a.find('a', href=re.compile(r"viewuser.php\?uid=\d+"))
self.story.setMetadata('authorId',aut['href'].split('=')[1])
self.story.setMetadata('authorUrl','https://'+self.host+'/'+aut['href'])
self.story.setMetadata('author',aut.string)
aut.extract()
self.story.setMetadata('title',stripHTML(a)[:(len(a.string)-3)])
# Find the chapters:
chapters=soup.find('select')
if chapters != None:
for chapter in chapters.findAll('option'):
# just in case there's tags, like <i> in chapter titles.
self.add_chapter(chapter,'https://'+self.host+'/viewstory.php?sid='+self.story.getMetadata('storyId')+'&chapter='+chapter['value'])
else:
self.add_chapter(self.story.getMetadata('title'),url)
asoup = self.make_soup(self.get_request(self.story.getMetadata('authorUrl')))
for list in asoup.findAll('div', {'class' : re.compile('listbox')}):
a = list.find('a')
if ('viewstory.php?sid='+self.story.getMetadata('storyId')) in a['href']:
break
# eFiction sites don't help us out a lot with their meta data
# formating, so it's a little ugly.
# utility method
def defaultGetattr(d,k):
try:
return d[k]
except:
return ""
# <span class="label">Rated:</span> NC-17<br /> etc
labels = list.findAll('span', {'class' : 'classification'})
for labelspan in labels:
label = labelspan.string
value = labelspan.nextSibling
if 'Summary' in label:
## Everything until the next span class='label'
svalue = ""
while 'classification' not in defaultGetattr(value,'class'):
svalue += unicode(value)
value = value.nextSibling
self.setDescription(url,svalue)
#self.story.setMetadata('description',stripHTML(svalue))
if 'Rated' in label:
self.story.setMetadata('rating', value[:len(value)-2])
if 'Word count' in label:
self.story.setMetadata('numWords', value)
if 'Categories' in label:
cats = labelspan.parent.findAll('a',href=re.compile(r'categories.php\?catid=\d+'))
for cat in cats:
self.story.addToList('category',cat.string)
if 'Characters' in label:
for char in value.string.split(', '):
if not 'None' in char:
self.story.addToList('characters',char)
if 'Genre' in label:
for genre in value.string.split(', '):
if not 'None' in genre:
self.story.addToList('genre',genre)
if 'Warnings' in label:
for warning in value.string.split(', '):
if not 'None' in warning:
self.story.addToList('warnings',warning)
if 'Completed' in label:
if 'Yes' in value:
self.story.setMetadata('status', 'Completed')
else:
self.story.setMetadata('status', 'In-Progress')
if 'Published' in label:
self.story.setMetadata('datePublished', makeDate(stripHTML(value), self.dateformat))
if 'Updated' in label:
# there's a stray [ at the end.
#value = value[0:-1]
self.story.setMetadata('dateUpdated', makeDate(stripHTML(value), self.dateformat))
try:
# Find Series name from series URL.
a = list.find('a', href=re.compile(r"series.php\?seriesid=\d+"))
series_name = a.string
series_url = 'https://'+self.host+'/'+a['href']
seriessoup = self.make_soup(self.get_request(series_url))
# can't use ^viewstory...$ in case of higher rated stories with javascript href.
storyas = seriessoup.findAll('a', href=re.compile(r'viewstory.php\?sid=\d+'))
i=1
for a in storyas:
# skip 'report this' and 'TOC' links
if 'contact.php' not in a['href'] and 'index' not in a['href']:
if ('viewstory.php?sid='+self.story.getMetadata('storyId')) in a['href']:
self.setSeries(series_name, i)
self.story.setMetadata('seriesUrl',series_url)
break
i+=1
except:
# I find it hard to care if the series parsing fails
pass
# grab the text for an individual chapter.
def getChapterText(self, url):
logger.debug('Getting chapter text from: %s' % url)
soup = self.make_soup(self.get_request(url))
div = soup.find('div', {'id' : 'story'})
if None == div:
raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url)
return self.utf8FromSoup(url,div)

View file

@ -1,215 +0,0 @@
# -*- coding: utf-8 -*-
# Copyright 2012 Fanficdownloader team, 2018 FanFicFare team
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Software: eFiction
from __future__ import absolute_import
import logging
logger = logging.getLogger(__name__)
import re
from bs4.element import Comment
from ..htmlcleanup import stripHTML
from .. import exceptions as exceptions
# py2 vs py3 transition
from ..six import text_type as unicode
from .base_adapter import BaseSiteAdapter, makeDate
def getClass():
return HPFanficArchiveComAdapter
# Class name has to be unique. Our convention is camel case the
# sitename with Adapter at the end. www is skipped.
class HPFanficArchiveComAdapter(BaseSiteAdapter):
def __init__(self, config, url):
BaseSiteAdapter.__init__(self, config, url)
self.username = "NoneGiven" # if left empty, site doesn't return any message at all.
self.password = ""
self.is_adult=False
# get storyId from url--url validation guarantees query is only sid=1234
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
# normalized story URL.
self._setURL( self.getProtocol() + self.getSiteDomain() + '/stories/viewstory.php?sid='+self.story.getMetadata('storyId'))
# Each adapter needs to have a unique site abbreviation.
self.story.setMetadata('siteabbrev','hpffa')
# The date format will vary from site to site.
# http://docs.python.org/library/datetime.html#strftime-strptime-behavior
self.dateformat = "%B %d, %Y"
@staticmethod # must be @staticmethod, don't remove it.
def getSiteDomain():
# The site domain. Does have www here, if it uses it.
return 'hpfanficarchive.com'
@classmethod
def getProtocol(cls):
# has changed from http to https to http again.
return "http://"
@classmethod
def getSiteExampleURLs(cls):
return cls.getProtocol()+cls.getSiteDomain()+"/stories/viewstory.php?sid=1234"
def getSiteURLPattern(self):
return r"https?:"+re.escape("//"+self.getSiteDomain()+"/stories/viewstory.php?sid=")+r"\d+$"
## Getting the chapter list and the meta data, plus 'is adult' checking.
def extractChapterUrlsAndMetadata(self):
# index=1 makes sure we see the story chapter index. Some
# sites skip that for one-chapter stories.
url = self.url
logger.debug("URL: "+url)
data = self.get_request(url)
if "Access denied. This story has not been validated by the adminstrators of this site." in data:
raise exceptions.AccessDenied(self.getSiteDomain() +" says: Access denied. This story has not been validated by the adminstrators of this site.")
elif "That story either does not exist on this archive or has not been validated by the adminstrators of this site." in data:
raise exceptions.AccessDenied(self.getSiteDomain() +" says: That story either does not exist on this archive or has not been validated by the adminstrators of this site.")
soup = self.make_soup(data)
# print data
## Title
a = soup.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$"))
self.story.setMetadata('title',stripHTML(a))
# Find authorid and URL from... author url.
a = soup.find('div', id="mainpage").find('a', href=re.compile(r"viewuser.php\?uid=\d+"))
self.story.setMetadata('authorId',a['href'].split('=')[1])
self.story.setMetadata('authorUrl',self.getProtocol()+self.host+'/stories/'+a['href'])
self.story.setMetadata('author',a.string)
# Find the chapters:
for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+r"&chapter=\d+$")):
# just in case there's tags, like <i> in chapter titles.
self.add_chapter(chapter,self.getProtocol()+self.host+'/stories/'+chapter['href'])
# eFiction sites don't help us out a lot with their meta data
# formating, so it's a little ugly.
# utility method
def defaultGetattr(d,k):
try:
return d[k]
except:
return ""
# <span class="label">Rated:</span> NC-17<br /> etc
labels = soup.findAll('span',{'class':'label'})
for labelspan in labels:
val = labelspan.nextSibling
value = unicode('')
while val and not 'label' in defaultGetattr(val,'class'):
# print("val:%s"%val)
if not isinstance(val,Comment):
value += unicode(val)
val = val.nextSibling
label = labelspan.string
# print("label:%s\nvalue:%s"%(label,value))
if 'Summary' in label:
self.setDescription(url,value)
if 'Rated' in label:
self.story.setMetadata('rating', stripHTML(value))
if 'Word count' in label:
self.story.setMetadata('numWords', stripHTML(value))
if 'Categories' in label:
cats = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=categories'))
for cat in cats:
self.story.addToList('category',cat.string)
if 'Characters' in label:
chars = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=characters'))
for char in chars:
self.story.addToList('characters',char.string)
if 'Genre' in label:
genres = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=1')) # XXX
for genre in genres:
self.story.addToList('genre',genre.string)
if 'Pairing' in label:
ships = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=4'))
for ship in ships:
self.story.addToList('ships',ship.string)
if 'Warnings' in label:
warnings = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=2')) # XXX
for warning in warnings:
self.story.addToList('warnings',warning.string)
if 'Completed' in label:
if 'Yes' in stripHTML(value):
self.story.setMetadata('status', 'Completed')
else:
self.story.setMetadata('status', 'In-Progress')
if 'Published' in label:
self.story.setMetadata('datePublished', makeDate(stripHTML(value), self.dateformat))
if 'Updated' in label:
self.story.setMetadata('dateUpdated', makeDate(stripHTML(value), self.dateformat))
try:
# Find Series name from series URL.
a = soup.find('a', href=re.compile(r"viewseries.php\?seriesid=\d+"))
series_name = a.string
series_url = self.getProtocol()+self.host+'/stories/'+a['href']
seriessoup = self.make_soup(self.get_request(series_url))
# can't use ^viewstory...$ in case of higher rated stories with javascript href.
storyas = seriessoup.findAll('a', href=re.compile(r'viewstory.php\?sid=\d+'))
i=1
for a in storyas:
# skip 'report this' and 'TOC' links
if 'contact.php' not in a['href'] and 'index' not in a['href']:
if a['href'] == ('viewstory.php?sid='+self.story.getMetadata('storyId')):
self.setSeries(series_name, i)
self.story.setMetadata('seriesUrl',series_url)
break
i+=1
except:
# I find it hard to care if the series parsing fails
pass
# grab the text for an individual chapter.
def getChapterText(self, url):
logger.debug('Getting chapter text from: %s' % url)
soup = self.make_soup(self.get_request(url))
div = soup.find('div', {'id' : 'story'})
if None == div:
raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url)
return self.utf8FromSoup(url,div)

View file

@ -1,262 +0,0 @@
# -*- coding: utf-8 -*-
# Copyright 2011 Fanficdownloader team, 2018 FanFicFare team
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Software: eFiction
from __future__ import absolute_import
import logging
logger = logging.getLogger(__name__)
import re
from ..htmlcleanup import stripHTML
from .. import exceptions as exceptions
# py2 vs py3 transition
from ..six import text_type as unicode
from .base_adapter import BaseSiteAdapter, makeDate
def getClass():
return IkEternalNetAdapter
# Class name has to be unique. Our convention is camel case the
# sitename with Adapter at the end. www is skipped.
class IkEternalNetAdapter(BaseSiteAdapter):
def __init__(self, config, url):
BaseSiteAdapter.__init__(self, config, url)
self.username = "NoneGiven" # if left empty, site doesn't return any message at all.
self.password = ""
self.is_adult=False
# get storyId from url--url validation guarantees query is only sid=1234
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
# normalized story URL.
self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId'))
# Each adapter needs to have a unique site abbreviation.
self.story.setMetadata('siteabbrev','ike')
# The date format will vary from site to site.
# http://docs.python.org/library/datetime.html#strftime-strptime-behavior
self.dateformat = "%B %d, %Y"
@staticmethod # must be @staticmethod, don't remove it.
def getSiteDomain():
# The site domain. Does have www here, if it uses it.
return 'www.ik-eternal.net'
@classmethod
def getSiteExampleURLs(cls):
return "http://"+cls.getSiteDomain()+"/viewstory.php?sid=1234"
def getSiteURLPattern(self):
return re.escape("http://"+self.getSiteDomain()+"/viewstory.php?sid=")+r"\d+$"
## Login seems to be reasonably standard across eFiction sites.
def needToLoginCheck(self, data):
if 'Registered Users Only' in data \
or 'There is no such account on our website' in data \
or "That password doesn't match the one in our database" in data:
return True
else:
return False
def performLogin(self, url):
params = {}
if self.password:
params['penname'] = self.username
params['password'] = self.password
else:
params['penname'] = self.getConfig("username")
params['password'] = self.getConfig("password")
params['cookiecheck'] = '1'
params['submit'] = 'Submit'
loginUrl = 'http://' + self.getSiteDomain() + '/user.php?action=login'
logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
params['penname']))
d = self.post_request(loginUrl, params)
if "Member Account" not in d : #Member Account
logger.info("Failed to login to URL %s as %s" % (loginUrl,
params['penname']))
raise exceptions.FailedToLogin(url,params['penname'])
return False
else:
return True
## Getting the chapter list and the meta data, plus 'is adult' checking.
def extractChapterUrlsAndMetadata(self):
if self.is_adult or self.getConfig("is_adult"):
# Weirdly, different sites use different warning numbers.
# If the title search below fails, there's a good chance
# you need a different number. print data at that point
# and see what the 'click here to continue' url says.
addurl = "&warning=1"
else:
addurl=""
# index=1 makes sure we see the story chapter index. Some
# sites skip that for one-chapter stories.
url = self.url+'&index=1'+addurl
logger.debug("URL: "+url)
data = self.get_request(url)
if self.needToLoginCheck(data):
# need to log in for this one.
self.performLogin(url)
data = self.get_request(url)
# The actual text that is used to announce you need to be an
# adult varies from site to site. Again, print data before
# the title search to troubleshoot.
# Since the warning text can change by warning level, let's
# look for the warning pass url. ksarchive uses
# &amp;warning= -- actually, so do other sites. Must be an
# eFiction book.
# viewstory.php?sid=1882&amp;warning=4
# viewstory.php?sid=1654&amp;ageconsent=ok&amp;warning=5
#print data
#m = re.search(r"'viewstory.php\?sid=1882(&amp;warning=4)'",data)
m = re.search(r"'viewstory.php\?sid=\d+((?:&amp;ageconsent=ok)?&amp;warning=\d+)'",data)
if m != None:
if self.is_adult or self.getConfig("is_adult"):
# We tried the default and still got a warning, so
# let's pull the warning number from the 'continue'
# link and reload data.
addurl = m.group(1)
# correct stupid &amp; error in url.
addurl = addurl.replace("&amp;","&")
url = self.url+'&index=1'+addurl
logger.debug("URL 2nd try: "+url)
data = self.get_request(url)
else:
raise exceptions.AdultCheckRequired(self.url)
if "Access denied. This story has not been validated by the adminstrators of this site." in data:
raise exceptions.AccessDenied(self.getSiteDomain() +" says: Access denied. This story has not been validated by the adminstrators of this site.")
soup = self.make_soup(data)
# print data
## Title
a = soup.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$"))
self.story.setMetadata('title',stripHTML(a))
# Find authorid and URL from... author url.
a = soup.find('a', href=re.compile(r"viewuser.php\?uid=\d+"))
self.story.setMetadata('authorId',a['href'].split('=')[1])
self.story.setMetadata('authorUrl','http://'+self.host+'/'+a['href'])
self.story.setMetadata('author',a.string)
# Find the chapters:
for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+r"&chapter=\d+$")):
# just in case there's tags, like <i> in chapter titles.
self.add_chapter(chapter,'http://'+self.host+'/'+chapter['href']+addurl)
# eFiction sites don't help us out a lot with their meta data
# formating, so it's a little ugly.
# utility method
def defaultGetattr(d,k):
try:
return d[k]
except:
return ""
# <span class="label">Rated:</span> NC-17<br /> etc
asoup = soup.find('div', {'class': 'listbox'})
for a in asoup.findAll('p'):
a.name='br'
labels = asoup.findAll('span',{'class':'label'})
for labelspan in labels:
value = labelspan.nextSibling
label = labelspan.string
if 'Summary' in label:
## Everything until the next span class='label'
svalue = ""
while 'label' not in defaultGetattr(value,'class'):
svalue += unicode(value)
value = value.nextSibling
self.setDescription(url,svalue)
#self.story.setMetadata('description',stripHTML(svalue))
if 'Rated' in label:
self.story.setMetadata('rating', value)
if 'Word count' in label:
self.story.setMetadata('numWords', value)
if 'Categories' in label:
cats = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=categories'))
for cat in cats:
self.story.addToList('category',cat.string)
if 'Characters' in label:
chars = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=characters'))
for char in chars:
self.story.addToList('characters',char.string)
if 'Genre' in label:
genres = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=1'))
for genre in genres:
self.story.addToList('genre',genre.string)
if 'Warnings' in label:
warnings = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=2'))
for warning in warnings:
self.story.addToList('warnings',warning.string)
if 'Completed' in label:
if 'Yes' in value:
self.story.setMetadata('status', 'Completed')
else:
self.story.setMetadata('status', 'In-Progress')
if 'Published' in label:
self.story.setMetadata('datePublished', makeDate(stripHTML(value), self.dateformat))
if 'Updated' in label:
# there's a stray [ at the end.
#value = value[0:-1]
self.story.setMetadata('dateUpdated', makeDate(stripHTML(value), self.dateformat))
# grab the text for an individual chapter.
def getChapterText(self, url):
logger.debug('Getting chapter text from: %s' % url)
soup = self.make_soup(self.get_request(url))
div = soup.find('div', {'id' : 'story'})
if None == div:
raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url)
return self.utf8FromSoup(url,div)

View file

@ -161,7 +161,7 @@ class ImagineEFicComAdapter(BaseSiteAdapter):
self.story.setMetadata('author',a.string) self.story.setMetadata('author',a.string)
# Find the chapters: # Find the chapters:
for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+r"&chapter=\d+$")): for chapter in soup.find_all('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+r"&chapter=\d+$")):
# just in case there's tags, like <i> in chapter titles. # just in case there's tags, like <i> in chapter titles.
self.add_chapter(chapter,'https://'+self.host+'/'+chapter['href']+addurl) self.add_chapter(chapter,'https://'+self.host+'/'+chapter['href']+addurl)
@ -178,7 +178,7 @@ class ImagineEFicComAdapter(BaseSiteAdapter):
# <span class="label">Rated:</span> NC-17<br /> etc # <span class="label">Rated:</span> NC-17<br /> etc
labels = soup.findAll('span',{'class':'label'}) labels = soup.find_all('span',{'class':'label'})
for labelspan in labels: for labelspan in labels:
value = labelspan.nextSibling value = labelspan.nextSibling
label = labelspan.string label = labelspan.string
@ -199,22 +199,22 @@ class ImagineEFicComAdapter(BaseSiteAdapter):
self.story.setMetadata('numWords', value) self.story.setMetadata('numWords', value)
if 'Categories' in label: if 'Categories' in label:
cats = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=categories')) cats = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=categories'))
for cat in cats: for cat in cats:
self.story.addToList('category',cat.string) self.story.addToList('category',cat.string)
if 'Characters' in label: if 'Characters' in label:
chars = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=characters')) chars = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=characters'))
for char in chars: for char in chars:
self.story.addToList('characters',char.string) self.story.addToList('characters',char.string)
if 'Genre' in label: if 'Genre' in label:
genres = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=1')) genres = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=class&type_id=1'))
for genre in genres: for genre in genres:
self.story.addToList('genre',genre.string) self.story.addToList('genre',genre.string)
if 'Warnings' in label: if 'Warnings' in label:
warnings = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=2')) warnings = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=class&type_id=2'))
for warning in warnings: for warning in warnings:
self.story.addToList('warnings',warning.string) self.story.addToList('warnings',warning.string)
@ -238,7 +238,7 @@ class ImagineEFicComAdapter(BaseSiteAdapter):
seriessoup = self.make_soup(self.get_request(series_url)) seriessoup = self.make_soup(self.get_request(series_url))
# can't use ^viewstory...$ in case of higher rated stories with javascript href. # can't use ^viewstory...$ in case of higher rated stories with javascript href.
storyas = seriessoup.findAll('a', href=re.compile(r'viewstory.php\?sid=\d+')) storyas = seriessoup.find_all('a', href=re.compile(r'viewstory.php\?sid=\d+'))
i=1 i=1
for a in storyas: for a in storyas:
# skip 'report this' and 'TOC' links # skip 'report this' and 'TOC' links

28
fanficfare/adapters/adapter_inkbunnynet.py Normal file → Executable file
View file

@ -125,7 +125,7 @@ class InkBunnyNetSiteAdapter(BaseSiteAdapter):
soup = self.make_soup(self.get_request(url,usecache=False)) soup = self.make_soup(self.get_request(url,usecache=False))
# removing all of the scripts # removing all of the scripts
for tag in soup.findAll('script'): for tag in soup.find_all('script'):
tag.extract() tag.extract()
@ -134,7 +134,7 @@ class InkBunnyNetSiteAdapter(BaseSiteAdapter):
self.story.setMetadata('title', stripHTML(title)) self.story.setMetadata('title', stripHTML(title))
# Get Author # Get Author
authortag = soup.find('table',{'class':'pooltable'}).find('a',href=re.compile(r'/gallery/')) authortag = soup.find('table',{'class':'pooltable'}).find('a',href=re.compile(r'/gallery/|/scraps/'))
author = authortag['href'].split('/')[-1] # no separate ID author = authortag['href'].split('/')[-1] # no separate ID
self.story.setMetadata('author', author) self.story.setMetadata('author', author)
self.story.setMetadata('authorId', author) self.story.setMetadata('authorId', author)
@ -149,7 +149,7 @@ class InkBunnyNetSiteAdapter(BaseSiteAdapter):
if not self.getConfig('keep_summary_html'): if not self.getConfig('keep_summary_html'):
synopsis = stripHTML(synopsis) synopsis = stripHTML(synopsis)
self.setDescription(url, stripHTML(synopsis)) self.setDescription(url, synopsis)
#Getting Keywords/Genres #Getting Keywords/Genres
keywords = bookdetails.find('div', {'id':'kw_scroll'}).find_next_siblings('div')[0].div.div.find_all('a') keywords = bookdetails.find('div', {'id':'kw_scroll'}).find_next_siblings('div')[0].div.div.find_all('a')
@ -157,10 +157,11 @@ class InkBunnyNetSiteAdapter(BaseSiteAdapter):
self.story.addToList('genre', stripHTML(kword)) self.story.addToList('genre', stripHTML(kword))
# Getting the Category # Getting the Category
category = bookdetails.findChildren('div', recursive=False)[2].find('span', string='Type:').parent
category.find('span').decompose()
self.story.setMetadata('category', stripHTML(category))
for div in bookdetails.find_all('div'): for div in bookdetails.find_all('div'):
if 'Details' == stripHTML(div).strip(): if 'Rating:' == stripHTML(div)[:7]:
self.story.setMetadata('category', div.find_next_siblings('div')[0].span.next_sibling.strip())
elif 'Rating:' == stripHTML(div).strip()[:7]:
rating = div.span.next_sibling.strip() rating = div.span.next_sibling.strip()
self.story.setMetadata('rating', rating) self.story.setMetadata('rating', rating)
break break
@ -178,7 +179,14 @@ class InkBunnyNetSiteAdapter(BaseSiteAdapter):
if get_cover: if get_cover:
cover_img = soup.find('img', {'id':'magicbox'}) cover_img = soup.find('img', {'id':'magicbox'})
if cover_img: if cover_img:
# image content is treated like a normal image submission
self.setCoverImage(url, cover_img['src']) self.setCoverImage(url, cover_img['src'])
else:
# image content is present, but secondary to text file
cover_div = soup.find('div', {'class': 'content magicboxParent'})
cover_img = cover_div.find('img', {'class':'shadowedimage'}) if cover_div else None
if cover_img:
self.setCoverImage(url, cover_img['src'])
## Save for use below ## Save for use below
self.soup = soup self.soup = soup
@ -192,3 +200,11 @@ class InkBunnyNetSiteAdapter(BaseSiteAdapter):
raise exceptions.FailedToDownload("Error downloading Chapter: %s No text block found -- non-story URL?" % url) raise exceptions.FailedToDownload("Error downloading Chapter: %s No text block found -- non-story URL?" % url)
return self.utf8FromSoup(url, story) return self.utf8FromSoup(url, story)
def before_get_urls_from_page(self,url,normalize):
# To display the links to stories that are not available to guests.
if self.getConfig("username") and self.getConfig("always_login"):
# performLogin extracts token from the soup
soup = self.make_soup(self.get_request(url))
self.performLogin(url, soup)

View file

@ -1,47 +0,0 @@
# -*- coding: utf-8 -*-
# Copyright 2018 FanFicFare team
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Software: eFiction
from __future__ import absolute_import
import re
from .base_efiction_adapter import BaseEfictionAdapter
class ItCouldHappenNetSiteAdapter(BaseEfictionAdapter):
@staticmethod
def getSiteDomain():
return 'it-could-happen.net'
@classmethod
def getSiteAbbrev(seluuf):
return 'ich'
@classmethod
def getDateFormat(self):
return "%B %d, %Y"
def handleMetadataPair(self, key, value):
# This site is all one 'category' as it's usually defined and
# uses Category for what is usually genre.
if key == 'Categories':
for val in re.split(r"\s*,\s*", value):
self.story.addToList('genre', val)
else:
super(ItCouldHappenNetSiteAdapter, self).handleMetadataPair(key, value)
def getClass():
return ItCouldHappenNetSiteAdapter

View file

@ -0,0 +1,213 @@
# -*- coding: utf-8 -*-
# Copyright 2013 Fanficdownloader team, 2018 FanFicFare team
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
from __future__ import absolute_import
import logging, time
logger = logging.getLogger(__name__)
import re, json
from .. import exceptions as exceptions
# py2 vs py3 transition
from ..six.moves import http_cookiejar as cl
from .base_adapter import BaseSiteAdapter, makeDate
def getClass():
return KakuyomuJpAdapter
genres = {
'FANTASY': '異世界ファンタジー',
'ACTION': '現代ファンタジー',
'SF': 'SF',
'LOVE_STORY': '恋愛',
'ROMANCE': 'ラブコメ',
'DRAMA': '現代ドラマ',
'HORROR': 'ホラー',
'MYSTERY': 'ミステリー',
'NONFICTION': 'エッセイ・ノンフィクション',
'HISTORY': '歴史・時代・伝奇',
'CRITICISM': '創作論・評論',
'OTHERS': '詩・童話・その他',
'FAN_FICTION': '二次創作',
}
class KakuyomuJpAdapter(BaseSiteAdapter):
def __init__(self, config, url):
BaseSiteAdapter.__init__(self, config, url)
self.story.setMetadata('siteabbrev', 'kakuyomu')
self.story.setMetadata('language', 'Japanese')
self.storyId = self.path.split('/')[-1]
self.story.setMetadata('storyId', self.storyId)
@staticmethod
def getSiteDomain():
return 'kakuyomu.jp'
@classmethod
def getSiteExampleURLs(cls):
return ("https://kakuyomu.jp/works/12341234123412341234")
def getSiteURLPattern(self):
return r"^https?://kakuyomu\.jp/works/[0-9]+$"
def extractChapterUrlsAndMetadata(self):
data = self.get_request(self.url)
# Page could not be found
if 'お探しのページは見つかりませんでした' in data:
raise exceptions.StoryDoesNotExist(self.url)
soup = self.make_soup(data)
info = json.loads(soup.find(id='__NEXT_DATA__').contents[0])['props']['pageProps']['__APOLLO_STATE__']
workKey = 'Work:%s' % self.storyId
# Title
self.story.setMetadata('title', info[workKey]['title'])
# Author
authorKey = info[workKey]['author']['__ref']
self.story.setMetadata('authorId', authorKey.split(':')[1])
self.story.setMetadata('authorUrl', 'https://kakuyomu.jp/users/%s' % info[authorKey]['name'])
self.story.setMetadata('author', info[authorKey]['activityName'])
# Description
self.setDescription(self.url, info[workKey]['introduction'])
self.story.setMetadata('catchphrase', info[workKey]['catchphrase'])
# Date Published and Updated
# 2024-01-01T03:00:12Z
self.story.setMetadata('datePublished',
makeDate(info[workKey]['publishedAt'], '%Y-%m-%dT%H:%M:%SZ'))
self.story.setMetadata('dateUpdated',
makeDate(info[workKey]['editedAt'], '%Y-%m-%dT%H:%M:%SZ'))
# Character count
self.story.setMetadata('numWords', info[workKey]['totalCharacterCount'])
# Status
completed = info[workKey]['serialStatus'] == 'COMPLETED'
self.story.setMetadata('status', 'Completed' if completed else 'In-Progress')
# Warnings
rating = 'G'
if info[workKey]['isCruel']:
rating = 'R15'
self.story.addToList('warnings', '残酷描写有り')
if info[workKey]['isViolent']:
rating = 'R15'
self.story.addToList('warnings', '暴力描写有り')
if info[workKey]['isSexual']:
rating = 'R15'
self.story.addToList('warnings', '性描写有り')
# Tags
for tag in info[workKey]['tagLabels']:
if re.match(r'[Rr].?[1][5]', tag) is None:
self.story.addToList('freeformtags', tag)
else:
rating = 'R15'
# Rating
self.story.setMetadata('rating', rating)
# Genre
self.story.setMetadata('genre', genres[info[workKey]['genre']])
if info[workKey]['genre'] == 'FAN_FICTION':
fandomKey = info[workKey]['fanFictionSource']['__ref']
self.story.addToList('fandoms', info[fandomKey]['title'])
# Ratings, Comments, Etc.
self.story.setMetadata('reviews', info[workKey]['reviewCount'])
self.story.setMetadata('points', info[workKey]['totalReviewPoint'])
self.story.setMetadata('comments', info[workKey]['totalPublicEpisodeCommentCount'])
self.story.setMetadata('views', info[workKey]['totalReadCount'])
self.story.setMetadata('follows', info[workKey]['totalFollowers'])
self.story.setMetadata('collections', len(info[workKey]['publicWorkCollections']))
self.story.setMetadata('events', info[workKey]['totalWorkContestCount'] + info[workKey]['totalUserEventCount'])
self.story.setMetadata('published', info[workKey]['hasPublication'])
# visitorWorkFollowing
# workReviewByVisitor
# Chapters, Episodes
# TOC nodes are in a list
# each have a list of named episodes
# each can have a named chapter
# named chapters can be at depth 1 or 2
# episodes might be empty (premium subscription)
prependSectionTitles = self.getConfig('prepend_section_titles', 'firstepisode')
numEpisodes = 0
titles = []
nestingLevel = 0
newSection = False
for tocNodeRef in info[workKey]['tableOfContentsV2']:
tocNode = info[tocNodeRef['__ref']]
if tocNode['chapter'] is not None:
chapter = info[tocNode['chapter']['__ref']]
while chapter['level'] <= nestingLevel:
titles.pop()
nestingLevel -= 1
titles.append(chapter['title'])
nestingLevel = chapter['level']
newSection = True
else:
titles = []
nestingLevel = 0
newSection = False
for episodeRef in tocNode['episodeUnions']:
if not episodeRef['__ref'].startswith('EmptyEpisode'):
numEpisodes += 1
episode = info[episodeRef['__ref']]
epUrl = 'https://kakuyomu.jp/works/' + self.storyId + '/episodes/' + episode['id']
epTitle = episode['title']
if ((len(titles) > 0) and
((newSection and prependSectionTitles == 'firstepisode') or
prependSectionTitles == 'true')):
titles.append(epTitle)
# bracket with ZWSP to mark presence of section titles
epTitle = u'\u200b' + u'\u3000\u200b'.join(titles)
titles.pop()
self.add_chapter(epTitle, epUrl)
newSection = False
logger.debug("Story: <%s>", self.story)
return
def getChapterText(self, url):
logger.debug('Getting chapter text from <%s>' % url)
soup = self.make_soup(self.get_request(url))
soup = soup.find('div', {'class':'widget-episodeBody js-episode-body'})
if soup is None:
raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url)
soup.attrs = {'class':'episode-body'}
return self.utf8FromSoup(url, soup)

View file

@ -144,13 +144,13 @@ class KSArchiveComAdapter(BaseSiteAdapter): # XXX
# Find authorid and URL from... author urls. # Find authorid and URL from... author urls.
pagetitle = soup.find('div',id='pagetitle') pagetitle = soup.find('div',id='pagetitle')
for a in pagetitle.findAll('a', href=re.compile(r"viewuser.php\?uid=\d+")): for a in pagetitle.find_all('a', href=re.compile(r"viewuser.php\?uid=\d+")):
self.story.addToList('authorId',a['href'].split('=')[1]) self.story.addToList('authorId',a['href'].split('=')[1])
self.story.addToList('authorUrl','https://'+self.host+'/'+a['href']) self.story.addToList('authorUrl','https://'+self.host+'/'+a['href'])
self.story.addToList('author',stripHTML(a)) self.story.addToList('author',stripHTML(a))
# Find the chapters: # Find the chapters:
for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+r"&chapter=\d+$")): for chapter in soup.find_all('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+r"&chapter=\d+$")):
# just in case there's tags, like <i> in chapter titles. # just in case there's tags, like <i> in chapter titles.
self.add_chapter(chapter,'https://'+self.host+'/'+chapter['href']+addurl) self.add_chapter(chapter,'https://'+self.host+'/'+chapter['href']+addurl)
@ -166,7 +166,7 @@ class KSArchiveComAdapter(BaseSiteAdapter): # XXX
return "" return ""
# <span class="label">Rated:</span> NC-17<br /> etc # <span class="label">Rated:</span> NC-17<br /> etc
labels = soup.findAll('span',{'class':'label'}) labels = soup.find_all('span',{'class':'label'})
for labelspan in labels: for labelspan in labels:
value = labelspan.nextSibling value = labelspan.nextSibling
label = stripHTML(labelspan) label = stripHTML(labelspan)
@ -193,7 +193,7 @@ class KSArchiveComAdapter(BaseSiteAdapter): # XXX
self.story.setMetadata('numWords', value) self.story.setMetadata('numWords', value)
if 'Categories' in label: if 'Categories' in label:
cats = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=categories')) cats = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=categories'))
catstext = [stripHTML(cat) for cat in cats] catstext = [stripHTML(cat) for cat in cats]
for cat in catstext: for cat in catstext:
# ran across one story with an empty <a href="browse.php?type=categories&amp;catid=1"></a> # ran across one story with an empty <a href="browse.php?type=categories&amp;catid=1"></a>
@ -204,7 +204,7 @@ class KSArchiveComAdapter(BaseSiteAdapter): # XXX
if 'Characters' in label: if 'Characters' in label:
self.story.addToList('characters','Kirk') self.story.addToList('characters','Kirk')
self.story.addToList('characters','Spock') self.story.addToList('characters','Spock')
chars = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=characters')) chars = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=characters'))
charstext = [stripHTML(char) for char in chars] charstext = [stripHTML(char) for char in chars]
for char in charstext: for char in charstext:
self.story.addToList('characters',stripHTML(char)) self.story.addToList('characters',stripHTML(char))
@ -213,7 +213,7 @@ class KSArchiveComAdapter(BaseSiteAdapter): # XXX
## leaving it in. Check to make sure the type_id number ## leaving it in. Check to make sure the type_id number
## is correct, though--it's site specific. ## is correct, though--it's site specific.
if 'Genre' in label: if 'Genre' in label:
genres = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=1')) # XXX genres = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=class&type_id=1')) # XXX
genrestext = [stripHTML(genre) for genre in genres] genrestext = [stripHTML(genre) for genre in genres]
self.genre = ', '.join(genrestext) self.genre = ', '.join(genrestext)
for genre in genrestext: for genre in genrestext:
@ -223,7 +223,7 @@ class KSArchiveComAdapter(BaseSiteAdapter): # XXX
## has 'Story Type', which is much more what most sites ## has 'Story Type', which is much more what most sites
## call genre. ## call genre.
if 'Story Type' in label: if 'Story Type' in label:
genres = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=5')) # XXX genres = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=class&type_id=5')) # XXX
genrestext = [stripHTML(genre) for genre in genres] genrestext = [stripHTML(genre) for genre in genres]
self.genre = ', '.join(genrestext) self.genre = ', '.join(genrestext)
for genre in genrestext: for genre in genrestext:
@ -233,21 +233,21 @@ class KSArchiveComAdapter(BaseSiteAdapter): # XXX
## leaving it in. Check to make sure the type_id number ## leaving it in. Check to make sure the type_id number
## is correct, though--it's site specific. ## is correct, though--it's site specific.
if 'Warnings' in label: if 'Warnings' in label:
warnings = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=2')) # XXX warnings = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=class&type_id=2')) # XXX
warningstext = [stripHTML(warning) for warning in warnings] warningstext = [stripHTML(warning) for warning in warnings]
self.warning = ', '.join(warningstext) self.warning = ', '.join(warningstext)
for warning in warningstext: for warning in warningstext:
self.story.addToList('warnings',stripHTML(warning)) self.story.addToList('warnings',stripHTML(warning))
if 'Universe' in label: if 'Universe' in label:
universes = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=3')) # XXX universes = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=class&type_id=3')) # XXX
universestext = [stripHTML(universe) for universe in universes] universestext = [stripHTML(universe) for universe in universes]
self.universe = ', '.join(universestext) self.universe = ', '.join(universestext)
for universe in universestext: for universe in universestext:
self.story.addToList('universe',stripHTML(universe)) self.story.addToList('universe',stripHTML(universe))
if 'Crossover Fandom' in label: if 'Crossover Fandom' in label:
crossoverfandoms = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=4')) # XXX crossoverfandoms = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=class&type_id=4')) # XXX
crossoverfandomstext = [stripHTML(crossoverfandom) for crossoverfandom in crossoverfandoms] crossoverfandomstext = [stripHTML(crossoverfandom) for crossoverfandom in crossoverfandoms]
self.crossoverfandom = ', '.join(crossoverfandomstext) self.crossoverfandom = ', '.join(crossoverfandomstext)
for crossoverfandom in crossoverfandomstext: for crossoverfandom in crossoverfandomstext:
@ -274,7 +274,7 @@ class KSArchiveComAdapter(BaseSiteAdapter): # XXX
series_url = 'https://'+self.host+'/'+a['href'] series_url = 'https://'+self.host+'/'+a['href']
seriessoup = self.make_soup(self.get_request(series_url)) seriessoup = self.make_soup(self.get_request(series_url))
storyas = seriessoup.findAll('a', href=re.compile(r'viewstory.php\?sid=\d+')) storyas = seriessoup.find_all('a', href=re.compile(r'viewstory.php\?sid=\d+'))
i=1 i=1
for a in storyas: for a in storyas:
# skip 'report this' and 'TOC' links # skip 'report this' and 'TOC' links

View file

@ -17,6 +17,7 @@
# Software: eFiction # Software: eFiction
from __future__ import absolute_import from __future__ import absolute_import
from ..htmlcleanup import stripHTML
from .base_efiction_adapter import BaseEfictionAdapter from .base_efiction_adapter import BaseEfictionAdapter
class LibraryOfMoriaComAdapter(BaseEfictionAdapter): class LibraryOfMoriaComAdapter(BaseEfictionAdapter):
@ -37,5 +38,19 @@ class LibraryOfMoriaComAdapter(BaseEfictionAdapter):
def getDateFormat(self): def getDateFormat(self):
return "%B %d, %Y" return "%B %d, %Y"
def getRatingFromTOC(self):
# In many eFiction sites, the Rating is not included in
# print page, but is on the TOC page. At least one site's rating
# (libraryofmoriacom) differs enough to be problematic.
toc = self.url + "&index=1"
soup = self.make_soup(self.get_request(toc))
for label in soup.select('div.listbox b'):
if 'Rated:' in label or 'Rating:' in stripHTML(label):
rating = stripHTML(label.next_sibling)
if rating.endswith(' ['):
rating = rating[:-2]
self.story.setMetadata('rating',rating)
break
def getClass(): def getClass():
return LibraryOfMoriaComAdapter return LibraryOfMoriaComAdapter

View file

@ -19,6 +19,7 @@ from __future__ import absolute_import
import logging import logging
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
import re import re
import json
from bs4.element import Comment from bs4.element import Comment
from ..htmlcleanup import stripHTML from ..htmlcleanup import stripHTML
@ -37,7 +38,7 @@ class LiteroticaSiteAdapter(BaseSiteAdapter):
def __init__(self, config, url): def __init__(self, config, url):
BaseSiteAdapter.__init__(self, config, url) BaseSiteAdapter.__init__(self, config, url)
logger.debug("LiteroticaComAdapter:__init__ - url='%s'" % url) #logger.debug("LiteroticaComAdapter:__init__ - url='%s'" % url)
# Each adapter needs to have a unique site abbreviation. # Each adapter needs to have a unique site abbreviation.
self.story.setMetadata('siteabbrev','litero') self.story.setMetadata('siteabbrev','litero')
@ -47,16 +48,15 @@ class LiteroticaSiteAdapter(BaseSiteAdapter):
# where first chapter doesn't have '-ch-'. # where first chapter doesn't have '-ch-'.
# Now just rely on extractChapterUrlsAndMetadata to reset # Now just rely on extractChapterUrlsAndMetadata to reset
# storyId to first chapter link. # storyId to first chapter link.
storyId = self.parsedUrl.path.split('/',)[2]
## DON'T normalize to www.literotica.com--keep for language, ## DON'T normalize to www.literotica.com--keep for language,
## which will be set in _setURL(url). Also, multi-chapter ## which will be set in _setURL(url). Also, multi-chapter
## have been keeping the language when 'normalizing' to first ## have been keeping the language when 'normalizing' to first
## chapter. ## chapter.
url = re.sub(r"^(https?://)"+LANG_RE+r"(\.i)?", url = re.sub(r"^(https?://)"+LANG_RE+r"(\.i)?",
r"\1\2", r"https://\2",
url) url)
url = url.replace('/beta/s/','/s/') # to allow beta site URLs. url = url.replace('/beta/','/') # to allow beta site URLs.
## strip ?page=... ## strip ?page=...
url = re.sub(r"\?page=.*$", "", url) url = re.sub(r"\?page=.*$", "", url)
@ -66,7 +66,7 @@ class LiteroticaSiteAdapter(BaseSiteAdapter):
# The date format will vary from site to site. # The date format will vary from site to site.
# http://docs.python.org/library/datetime.html#strftime-strptime-behavior # http://docs.python.org/library/datetime.html#strftime-strptime-behavior
self.dateformat = "%m/%d/%y" self.dateformat = "%m/%d/%Y"
@staticmethod @staticmethod
def getSiteDomain(): def getSiteDomain():
@ -78,10 +78,12 @@ class LiteroticaSiteAdapter(BaseSiteAdapter):
@classmethod @classmethod
def getSiteExampleURLs(cls): def getSiteExampleURLs(cls):
return "http://www.literotica.com/s/story-title https://www.literotica.com/s/story-title http://portuguese.literotica.com/s/story-title http://german.literotica.com/s/story-title" return "https://www.literotica.com/s/story-title https://www.literotica.com/series/se/9999999 https://www.literotica.com/s/story-title https://www.literotica.com/i/image-or-comic-title https://www.literotica.com/p/poem-title https://portuguese.literotica.com/s/story-title https://german.literotica.com/s/story-title"
def getSiteURLPattern(self): def getSiteURLPattern(self):
return r"https?://"+LANG_RE+r"(\.i)?\.literotica\.com/(beta/)?s/([a-zA-Z0-9_-]+)" # also https://www.literotica.com/series/se/80075773
# /s/ for story, /i/ for image/comic, /p/ for poem
return r"https?://"+LANG_RE+r"(\.i)?\.literotica\.com/((beta/)?[sip]/([a-zA-Z0-9_-]+)|series/se/(?P<storyseriesid>[a-zA-Z0-9_-]+))"
def _setURL(self,url): def _setURL(self,url):
# logger.debug("set URL:%s"%url) # logger.debug("set URL:%s"%url)
@ -90,260 +92,337 @@ class LiteroticaSiteAdapter(BaseSiteAdapter):
lang = m.group('lang') lang = m.group('lang')
if lang not in ('www','other'): if lang not in ('www','other'):
self.story.setMetadata('language',lang.capitalize()) self.story.setMetadata('language',lang.capitalize())
# reset storyId
self.story.setMetadata('storyId',self.parsedUrl.path.split('/',)[-1])
# logger.debug("language:%s"%self.story.getMetadata('language')) # logger.debug("language:%s"%self.story.getMetadata('language'))
def getCategories(self, soup): ## apply clean_chapter_titles
if self.getConfig("use_meta_keywords"): def add_chapter(self,chapter_title,url,othermeta={}):
categories = soup.find("meta", {"name":"keywords"})['content'].split(',') if self.getConfig("clean_chapter_titles"):
categories = [c for c in categories if not self.story.getMetadata('title') in c] storytitle = self.story.getMetadataRaw('title').lower()
if self.story.getMetadata('author') in categories: chapter_name_type = None
categories.remove(self.story.getMetadata('author')) # strip trailing ch or pt before doing the chapter clean.
# logger.debug("Meta = %s" % categories) # doesn't remove from story title metadata
for category in categories: storytitle = re.sub(r'^(.*?)( (ch|pt))?$',r'\1',storytitle)
# logger.debug("\tCategory=%s" % category) if chapter_title.lower().startswith(storytitle):
# self.story.addToList('category', category.title()) chapter = chapter_title[len(storytitle):].strip()
self.story.addToList('eroticatags', category.title()) # logger.debug('\tChapter: "%s"' % chapter)
if chapter == '':
chapter_title = 'Chapter %d' % (self.num_chapters() + 1)
# Sometimes the first chapter does not have type of chapter
if self.num_chapters() == 0:
# logger.debug('\tChapter: first chapter without chapter type')
chapter_name_type = None
else:
separater_char = chapter[0]
# logger.debug('\tseparater_char: "%s"' % separater_char)
chapter = chapter[1:].strip() if separater_char in [":", "-"] else chapter
# logger.debug('\tChapter: "%s"' % chapter)
if chapter.lower().startswith('ch.'):
chapter = chapter[len('ch.'):].strip()
try:
chapter_title = 'Chapter %d' % int(chapter)
except:
chapter_title = 'Chapter %s' % chapter
chapter_name_type = 'Chapter' if chapter_name_type is None else chapter_name_type
# logger.debug('\tChapter: chapter_name_type="%s"' % chapter_name_type)
elif chapter.lower().startswith('pt.'):
chapter = chapter[len('pt.'):].strip()
try:
chapter_title = 'Part %d' % int(chapter)
except:
chapter_title = 'Part %s' % chapter
chapter_name_type = 'Part' if chapter_name_type is None else chapter_name_type
# logger.debug('\tChapter: chapter_name_type="%s"' % chapter_name_type)
elif separater_char in [":", "-"]:
chapter_title = chapter
# logger.debug('\tChapter: taking chapter text as whole')
super(LiteroticaSiteAdapter, self).add_chapter(chapter_title,url,othermeta)
def extractChapterUrlsAndMetadata(self): def extractChapterUrlsAndMetadata(self):
""" """
NOTE: Some stories can have versions, In April 2024, site introduced significant changes, including
e.g. /my-story-ch-05-version-10 adding a 'Story Series' page and link to it in each chapter.
NOTE: If two stories share the same title, a running index is added, But not all stories, one-shots don't have 'Story Series'.
e.g.: /my-story-ch-02-1
Strategy: literotica has 'Story Series' & 'Story'. FFF calls them 'Story' & 'Chapters'
* Go to author's page, search for the current story link, See https://github.com/JimmXinu/FanFicFare/issues/1058#issuecomment-2078490037
* If it's in a tr.root-story => One-part story
* , get metadata and be done So /series/se/ will be the story URL for multi chapters but
* If it's in a tr.sl => Chapter in series keep individual 'chapter' URL for one-shots.
* Search up from there until we find a tr.ser-ttl (this is the
story)
* Gather metadata
* Search down from there for all tr.sl until the next
tr.ser-ttl, foreach
* Chapter link is there
""" """
logger.debug("Chapter/Story URL: <%s> " % self.url)
if not (self.is_adult or self.getConfig("is_adult")): if not (self.is_adult or self.getConfig("is_adult")):
raise exceptions.AdultCheckRequired(self.url) raise exceptions.AdultCheckRequired(self.url)
# logger.debug("Chapter/Story URL: <%s> " % self.url) (data,rurl) = self.get_request_redirected(self.url)
# logger.debug(data)
(data1,rurl) = self.get_request_redirected(self.url)
## for language domains ## for language domains
self._setURL(rurl) self._setURL(rurl)
logger.debug("set opened url:%s"%self.url) logger.debug("set opened url:%s"%self.url)
soup1 = self.make_soup(data1) soup = self.make_soup(data)
#strip comments from soup
[comment.extract() for comment in soup1.findAll(text=lambda text:isinstance(text, Comment))]
if "This submission is awaiting moderator's approval" in data1: if "This submission is awaiting moderator's approval" in data:
raise exceptions.StoryDoesNotExist("This submission is awaiting moderator's approval. %s"%self.url) raise exceptions.StoryDoesNotExist("This submission is awaiting moderator's approval. %s"%self.url)
## 2025Feb - domains other than www now use different HTML.
## Need to look for two different versions of basically
## everything.
## not series URL, assumed to be a chapter. Look for Story
## Info block of post-beta page. I don't think it should happen?
if '/series/se' not in self.url:
#logger.debug(data)
## looking for /series/se URL to indicate this is a
## chapter.
if not soup.select_one('div.page__aside') and not soup.select_one('div.sidebar') and not soup.select_one('div[class^="_sidebar_"]'):
raise exceptions.FailedToDownload("Missing Story Info block, Beta turned off?")
storyseriestag = soup.select_one('a.bn_av')
if not storyseriestag:
storyseriestag = soup.select_one('a[class^="_files__link_"]')
# logger.debug("Story Series Tag:%s"%storyseriestag)
if storyseriestag:
self._setURL(storyseriestag['href'])
data = self.get_request(storyseriestag['href'])
# logger.debug(data)
soup = self.make_soup(data)
# logger.debug(soup)
else:
logger.debug("One-shot")
isSingleStory = '/series/se' not in self.url
if not isSingleStory:
# Normilize the url?
state = re.findall(r"prefix\=\"/series/\",state='(.+?)'</script>", data)
json_state = json.loads(state[0].replace("\\'","'").replace("\\\\","\\"))
url_series_id = unicode(re.match(self.getSiteURLPattern(),self.url).group('storyseriesid'))
json_series_id = unicode(json_state['series']['data']['id'])
if json_series_id != url_series_id:
res = re.sub(url_series_id, json_series_id, unicode(self.url))
logger.debug("Normalized url: %s"%res)
self._setURL(res)
## common between one-shots and multi-chapters
# title
self.story.setMetadata('title', stripHTML(soup.select_one('h1')))
# logger.debug(self.story.getMetadata('title'))
# author # author
authora = soup1.find("a", class_="y_eU") ## XXX This is still the author URL like:
## https://www.literotica.com/stories/memberpage.php?uid=999999&page=submissions
## because that's what's on the page. It redirects to the /authors/ page.
## Only way I know right now to get the /authors/ is to make
## the req and look at the redirect.
## Should change to /authors/ if/when it starts appearing.
## Assuming it's in the same place.
authora = soup.find("a", class_="y_eU")
if not authora:
authora = soup.select_one('a[class^="_author__title"]')
authorurl = authora['href'] authorurl = authora['href']
# logger.debug(authora)
# logger.debug(authorurl)
self.story.setMetadata('authorId', urlparse.parse_qs(authorurl.split('?')[1])['uid'][0])
if authorurl.startswith('//'): if authorurl.startswith('//'):
authorurl = self.parsedUrl.scheme+':'+authorurl authorurl = self.parsedUrl.scheme+':'+authorurl
# logger.debug(authora)
# logger.debug(authorurl)
self.story.setMetadata('author', stripHTML(authora))
self.story.setMetadata('authorUrl', authorurl) self.story.setMetadata('authorUrl', authorurl)
self.story.setMetadata('author', authora.text) if '?' in authorurl:
self.story.setMetadata('authorId', urlparse.parse_qs(authorurl.split('?')[1])['uid'][0])
elif '/authors/' in authorurl:
self.story.setMetadata('authorId', authorurl.split('/')[-1])
else: # if all else fails
self.story.setMetadata('authorId', stripHTML(authora))
# get the author page if soup.select('div#tabpanel-tags'):
dataAuth = self.get_request(authorurl) # logger.debug("tags1")
soupAuth = self.make_soup(dataAuth) self.story.extendList('eroticatags', [ stripHTML(t).title() for t in soup.select('div#tabpanel-tags a.av_as') ])
#strip comments from soup if soup.select('div[class^="_widget__tags_"]'):
[comment.extract() for comment in soupAuth.findAll(text=lambda text:isinstance(text, Comment))] # logger.debug("tags2")
# logger.debug(soupAuth) self.story.extendList('eroticatags', [ stripHTML(t).title() for t in soup.select('div[class^="_widget__tags_"] a[class^="_tag_item_"]') ])
# logger.debug(self.story.getList('eroticatags'))
## Find link to url in author's page ## look first for 'Series Introduction', then Info panel short desc
## site has started using //domain.name/asdf urls remove https?: from front ## series can have either, so put in common code.
## site has started putting https back on again. desc = []
## site is now using language specific german.lit... etc on author pages. introtag = soup.select_one('div.bp_rh')
## site is now back to using www.lit... etc on author pages. descdiv = soup.select_one('div#tabpanel-info div.bn_B') or \
search_url_re = r"https?://"+LANG_RE+r"(\.i)?\." + re.escape(self.getSiteDomain()) + self.url[self.url.index('/s/'):] soup.select_one('div[class^="_tab__pane_"] div[class^="_widget__info_"]')
logger.debug(search_url_re) if introtag and stripHTML(introtag):
storyLink = soupAuth.find('a', href=re.compile(search_url_re)) # make sure there's something in the tag.
# storyLink = soupAuth.find('a', href=re.compile(r'.*literotica.com/s/'+re.escape(self.story.getMetadata('storyId')) )) # logger.debug("intro %s"%introtag)
# storyLink = soupAuth.find('a', href=re.compile(r'(https?:)?'+re.escape(self.url[self.url.index(':')+1:]).replace(r'www',r'[^\.]+') )) desc.append(unicode(introtag))
# storyLink = soupAuth.find('a', href=self.url)#[self.url.index(':')+1:]) elif descdiv and stripHTML(descdiv):
# make sure there's something in the tag.
# logger.debug("desc %s"%descdiv)
desc.append(unicode(descdiv))
if not desc or self.getConfig("include_chapter_descriptions_in_summary"):
## Only for backward compatibility with 'stories' that
## don't have an intro or short desc.
descriptions = []
for i, chapterdesctag in enumerate(soup.select('p.br_rk')):
# remove category link, but only temporarily
a = chapterdesctag.a.extract()
descriptions.append("%d. %s" % (i + 1, stripHTML(chapterdesctag)))
# now put it back--it's used below
chapterdesctag.append(a)
desc.append(unicode("<p>"+"</p>\n<p>".join(descriptions)+"</p>"))
if storyLink is not None: self.setDescription(self.url,u''.join(desc))
# pull the published date from the author page
# default values from single link. Updated below if multiple chapter.
# logger.debug("Found story on the author page.")
date = storyLink.parent.parent.findAll('td')[-1].text
self.story.setMetadata('datePublished', makeDate(date, self.dateformat))
self.story.setMetadata('dateUpdated',makeDate(date, self.dateformat))
if storyLink is not None:
urlTr = storyLink.parent.parent
if "sl" in urlTr['class']:
isSingleStory = False
else:
isSingleStory = True
else:
raise exceptions.FailedToDownload("Couldn't find story <%s> on author's page <%s>" % (self.url, authorurl))
if isSingleStory: if isSingleStory:
self.story.setMetadata('title', storyLink.text.strip('/')) ## one-shots don't *display* date info, but they have it
# logger.debug('Title: "%s"' % storyLink.text.strip('/')) ## hidden in <script>
self.setDescription(authorurl, urlTr.findAll("td")[1].text) ## shows _date_approve "date_approve":"01/31/2024"
self.story.addToList('category', urlTr.findAll("td")[2].text)
# self.story.addToList('eroticatags', urlTr.findAll("td")[2].text) ## multichap also have "date_approve", but they have
date = urlTr.findAll('td')[-1].text ## several and they're more than just the story chapters.
self.story.setMetadata('datePublished', makeDate(date, self.dateformat)) date = re.search(r'"date_approve":"(\d\d/\d\d/\d\d\d\d)"',data)
self.story.setMetadata('dateUpdated',makeDate(date, self.dateformat)) if not date:
self.add_chapter(storyLink.text, self.url) date = re.search(r'date_approve:"(\d\d/\d\d/\d\d\d\d)"',data)
averrating = stripHTML(storyLink.parent) if date:
## title (0.00) dateval = makeDate(date.group(1), self.dateformat)
averrating = averrating[averrating.rfind('(')+1:averrating.rfind(')')] self.story.setMetadata('datePublished', dateval)
try: self.story.setMetadata('dateUpdated', dateval)
self.story.setMetadata('averrating', float(averrating))
except: ## one-shots don't have same json data to get aver_rating
pass ## from below. This kludge matches the data_approve
# self.story.setMetadata('averrating',averrating) rateall = re.search(r'rate_all:([\d\.]+)',data)
# parse out the list of chapters if rateall:
self.story.setMetadata('averrating', '%4.2f' % float(rateall.group(1)))
## one-shots assumed completed.
self.story.setMetadata('status','Completed')
# Add the category from the breadcumb.
breadcrumbs = soup.find('div', id='BreadCrumbComponent')
if not breadcrumbs:
breadcrumbs = soup.select_one('ul[class^="_breadcrumbs_list_"]')
if not breadcrumbs:
# _breadcrumbs_18u7l_1
breadcrumbs = soup.select_one('nav[class^="_breadcrumbs_"]')
self.story.addToList('category', breadcrumbs.find_all('a')[1].string)
## one-shot chapter
self.add_chapter(self.story.getMetadata('title'), self.url)
else: else:
seriesTr = urlTr.previousSibling ## Multi-chapter stories. AKA multi-part 'Story Series'.
while 'ser-ttl' not in seriesTr['class']: bn_antags = soup.select('div#tabpanel-info p.bn_an')
seriesTr = seriesTr.previousSibling # logger.debug(bn_antags)
m = re.match(r"^(?P<title>.*?):\s(?P<numChapters>\d+)\sPart\sSeries$", seriesTr.find("strong").text) if bn_antags and not self.getConfig("dates_from_chapters"):
self.story.setMetadata('title', m.group('title')) ## Use dates from series metadata unless dates_from_chapters is enabled
seriesTitle = m.group('title') dates = []
for datetag in bn_antags[:2]:
datetxt = stripHTML(datetag)
# remove 'Started:' 'Updated:'
# Assume can't use 'Started:' 'Updated:' (vs [0] or [1]) because of lang localization
datetxt = datetxt[datetxt.index(':')+1:]
dates.append(datetxt)
# logger.debug(dates)
self.story.setMetadata('datePublished', makeDate(dates[0], self.dateformat))
self.story.setMetadata('dateUpdated', makeDate(dates[1], self.dateformat))
## Walk the chapters ## bn_antags[2] contains "The author has completed this series." or "The author is still actively writing this series."
chapterTr = seriesTr.nextSibling ## I won't be surprised if this breaks later because of lang localization
dates = [] if "completed" in stripHTML(bn_antags[-1]):
descriptions = [] self.story.setMetadata('status','Completed')
ratings = [] else:
chapters = [] self.story.setMetadata('status','In-Progress')
chapter_name_type = None
while chapterTr is not None and 'sl' in chapterTr['class']:
description = "%d. %s" % (len(descriptions)+1,stripHTML(chapterTr.findAll("td")[1]))
description = stripHTML(chapterTr.findAll("td")[1])
chapterLink = chapterTr.find("td", "fc").find("a")
if self.getConfig('chapter_categories_use_all'):
self.story.addToList('category', chapterTr.findAll("td")[2].text)
# self.story.addToList('eroticatags', chapterTr.findAll("td")[2].text)
pub_date = makeDate(chapterTr.findAll('td')[-1].text, self.dateformat)
dates.append(pub_date)
chapterTr = chapterTr.nextSibling
chapter_title = chapterLink.text ## category from chapter list
if self.getConfig("clean_chapter_titles"): self.story.extendList('category',[ stripHTML(t) for t in soup.select('a.br_rl') ])
# logger.debug('\tChapter Name: "%s"' % chapterLink.text)
if chapterLink.text.lower().startswith(seriesTitle.lower()):
chapter = chapterLink.text[len(seriesTitle):].strip()
# logger.debug('\tChapter: "%s"' % chapter)
if chapter == '':
chapter_title = 'Chapter %d' % (self.num_chapters() + 1)
# Sometimes the first chapter does not have type of chapter
if self.num_chapters() == 0:
logger.debug('\tChapter: first chapter without chapter type')
chapter_name_type = None
else:
separater_char = chapter[0]
# logger.debug('\tseparater_char: "%s"' % separater_char)
chapter = chapter[1:].strip() if separater_char in [":", "-"] else chapter
# logger.debug('\tChapter: "%s"' % chapter)
if chapter.lower().startswith('ch.'):
chapter = chapter[len('ch.'):].strip()
try:
chapter_title = 'Chapter %d' % int(chapter)
except:
chapter_title = 'Chapter %s' % chapter
chapter_name_type = 'Chapter' if chapter_name_type is None else chapter_name_type
logger.debug('\tChapter: chapter_name_type="%s"' % chapter_name_type)
elif chapter.lower().startswith('pt.'):
chapter = chapter[len('pt.'):]
try:
chapter_title = 'Part %d' % int(chapter)
except:
chapter_title = 'Part %s' % chapter
chapter_name_type = 'Part' if chapter_name_type is None else chapter_name_type
logger.debug('\tChapter: chapter_name_type="%s"' % chapter_name_type)
elif separater_char in [":", "-"]:
chapter_title = chapter
logger.debug('\tChapter: taking chapter text as whole')
# pages include full URLs. for chapteratag in soup.select('a.br_rj'):
chapurl = chapterLink['href'] chapter_title = stripHTML(chapteratag)
if chapurl.startswith('//'): # logger.debug('\tChapter: "%s"' % chapteratag)
chapurl = self.parsedUrl.scheme + ':' + chapurl # /series/se does include full URLs current.
chapurl = chapteratag['href']
# logger.debug("Chapter URL: " + chapurl) # logger.debug("Chapter URL: " + chapurl)
# logger.debug("Chapter Title: " + chapter_title) self.add_chapter(chapter_title, chapurl)
# logger.debug("Chapter description: " + description)
chapters.append((chapter_title, chapurl, description, pub_date))
# self.add_chapter(chapter_title, chapurl)
numrating = stripHTML(chapterLink.parent)
## title (0.00)
numrating = numrating[numrating.rfind('(')+1:numrating.rfind(')')]
try:
ratings.append(float(numrating))
except:
pass
if self.getConfig("clean_chapter_titles") \ # <img src="https://uploads.literotica.com/series/cover/813-1695143444-desktop-x1.jpg" alt="Series cover">
and chapter_name_type is not None \ coverimg = soup.select_one('img[alt="Series cover"]')
and not chapters[0][0].startswith(chapter_name_type): if coverimg:
logger.debug('\tChapter: chapter_name_type="%s"' % chapter_name_type) self.setCoverImage(self.url,coverimg['src'])
logger.debug('\tChapter: first chapter="%s"' % chapters[0][0])
logger.debug('\tChapter: first chapter number="%s"' % chapters[0][0][len('Chapter'):])
chapters[0] = ("%s %s" % (chapter_name_type, chapters[0][0][len('Chapter'):].strip()),
chapters[0][1],
chapters[0][2],
chapters[0][3]
)
chapters = sorted(chapters, key=lambda chapter: chapter[3]) #### Attempting averrating from JS metadata.
for i, chapter in enumerate(chapters): #### also alternate chapters from json
self.add_chapter(chapter[0], chapter[1]) try:
descriptions.append("%d. %s" % (i + 1, chapter[2])) state_start="state='"
## Set the oldest date as publication date, the newest as update date state_end="'</script>"
dates.sort() i = data.index(state_start)
self.story.setMetadata('datePublished', dates[0]) if i:
self.story.setMetadata('dateUpdated', dates[-1]) state = data[i+len(state_start):data.index(state_end,i)].replace("\\'","'").replace("\\\\","\\")
self.story.setMetadata('datePublished', chapters[0][3]) if state:
self.story.setMetadata('dateUpdated', chapters[-1][3]) # logger.debug(state)
## Set description to joint chapter descriptions json_state = json.loads(state)
self.setDescription(authorurl,"<p>"+"</p>\n<p>".join(descriptions)+"</p>") # logger.debug(json.dumps(json_state, sort_keys=True,indent=2, separators=(',', ':')))
all_rates = []
if 'series' in json_state:
all_rates = [ float(x['rate_all']) for x in json_state['series']['works'] ]
if len(ratings) > 0: ## Extract dates from chapter approval dates if dates_from_chapters is enabled
self.story.setMetadata('averrating','%4.2f' % (sum(ratings) / float(len(ratings)))) if self.getConfig("dates_from_chapters"):
date_approvals = []
for work in json_state['series']['works']:
if 'date_approve' in work:
try:
date_approvals.append(makeDate(work['date_approve'], self.dateformat))
except:
pass
if date_approvals:
# Oldest date is published, newest is updated
date_approvals.sort()
self.story.setMetadata('datePublished', date_approvals[0])
self.story.setMetadata('dateUpdated', date_approvals[-1])
if all_rates:
self.story.setMetadata('averrating', '%4.2f' % (sum(all_rates) / float(len(all_rates))))
# normalize on first chapter URL. ## alternate chapters from JSON
self._setURL(self.get_chapter(0,'url')) if self.num_chapters() < 1:
logger.debug("Getting Chapters from series JSON")
seriesid = json_state.get('series',{}).get('data',{}).get('id',None)
if seriesid:
logger.info("Fetching chapter data from JSON")
logger.debug(seriesid)
series_json = json.loads(self.get_request('https://literotica.com/api/3/series/%s/works'%seriesid))
# logger.debug(json.dumps(series_json, sort_keys=True,indent=2, separators=(',', ':')))
for chap in series_json:
self.add_chapter(chap['title'], 'https://www.literotica.com/s/'+chap['url'])
# reset storyId to first chapter. ## Collect tags from series/story page if tags_from_chapters is enabled
self.story.setMetadata('storyId',self.parsedUrl.path.split('/',)[2]) if self.getConfig("tags_from_chapters"):
self.story.extendList('eroticatags', [ unicode(t['tag']).title() for t in chap['tags'] ])
# Add the category from the breadcumb. This might duplicate a category already added. except Exception as e:
self.story.addToList('category', soup1.find('div', id='BreadCrumbComponent').findAll('a')[1].string) logger.warning("Processing JSON failed. (%s)"%e)
self.getCategories(soup1)
## Features removed because not supportable by new site form:
## averrating metadata entry
## order_chapters_by_date option
## use_meta_keywords option
return return
def getPageText(self, raw_page, url): def getPageText(self, raw_page, url):
# logger.debug('Getting page text') logger.debug('Getting page text')
# logger.debug(soup)
raw_page = raw_page.replace('<div class="b-story-body-x x-r15"><div><p>','<div class="b-story-body-x x-r15"><div>') raw_page = raw_page.replace('<div class="b-story-body-x x-r15"><div><p>','<div class="b-story-body-x x-r15"><div>')
# logger.debug("\tChapter text: %s" % raw_page) # logger.debug("\tChapter text: %s" % raw_page)
page_soup = self.make_soup(raw_page) page_soup = self.make_soup(raw_page)
[comment.extract() for comment in page_soup.findAll(text=lambda text:isinstance(text, Comment))] [comment.extract() for comment in page_soup.find_all(string=lambda text:isinstance(text, Comment))]
story2 = page_soup.find('div', 'aa_ht').div fullhtml = ""
# logger.debug('getPageText - story2: %s' % story2) for aa_ht_div in page_soup.find_all('div', 'aa_ht') + page_soup.select('div[class^="_article__content_"]'):
if aa_ht_div.div:
fullhtml = unicode(story2) html = unicode(aa_ht_div.div)
# logger.debug(fullhtml) # Strip some starting and ending tags,
# Strip some starting and ending tags, html = re.sub(r'^<div.*?>', r'', html)
fullhtml = re.sub(r'^<div.*?>', r'', fullhtml) html = re.sub(r'</div>$', r'', html)
fullhtml = re.sub(r'</div>$', r'', fullhtml) html = re.sub(r'<p></p>$', r'', html)
fullhtml = re.sub(r'<p></p>$', r'', fullhtml) fullhtml = fullhtml + html
# logger.debug('getPageText - fullhtml: %s' % fullhtml) # logger.debug('getPageText - fullhtml: %s' % fullhtml)
return fullhtml return fullhtml
def getChapterText(self, url): def getChapterText(self, url):
@ -353,9 +432,15 @@ class LiteroticaSiteAdapter(BaseSiteAdapter):
raw_page = self.get_request(url) raw_page = self.get_request(url)
page_soup = self.make_soup(raw_page) page_soup = self.make_soup(raw_page)
pages = page_soup.find('div',class_='l_bH') pages = page_soup.find('div',class_='l_bH')
if not pages:
pages = page_soup.select_one('div._pagination_h0sum_1')
if not pages:
pages = page_soup.select_one('div.clearfix.panel._pagination_1400x_1')
if not pages:
pages = page_soup.select_one('div[class^="panel clearfix _pagination_"]')
# logger.debug(pages)
fullhtml = "" fullhtml = ""
self.getCategories(page_soup)
chapter_description = '' chapter_description = ''
if self.getConfig("description_in_chapter"): if self.getConfig("description_in_chapter"):
chapter_description = page_soup.find("meta", {"name" : "description"})['content'] chapter_description = page_soup.find("meta", {"name" : "description"})['content']
@ -366,7 +451,10 @@ class LiteroticaSiteAdapter(BaseSiteAdapter):
## look for highest numbered page, they're not all listed ## look for highest numbered page, they're not all listed
## when there are many. ## when there are many.
last_page_link = pages.find_all('a', class_='l_bJ')[-1] last_page_links = pages.find_all('a', class_='l_bJ')
if not last_page_links:
last_page_links = pages.select('a[class^="_pagination__item_"]')
last_page_link = last_page_links[-1]
last_page_no = int(urlparse.parse_qs(last_page_link['href'].split('?')[1])['page'][0]) last_page_no = int(urlparse.parse_qs(last_page_link['href'].split('?')[1])['page'][0])
# logger.debug(last_page_no) # logger.debug(last_page_no)
for page_no in range(2, last_page_no+1): for page_no in range(2, last_page_no+1):
@ -375,7 +463,7 @@ class LiteroticaSiteAdapter(BaseSiteAdapter):
raw_page = self.get_request(page_url) raw_page = self.get_request(page_url)
fullhtml += self.getPageText(raw_page, url) fullhtml += self.getPageText(raw_page, url)
# logger.debug(fullhtml) #logger.debug(fullhtml)
page_soup = self.make_soup(fullhtml) page_soup = self.make_soup(fullhtml)
fullhtml = self.utf8FromSoup(url, self.make_soup(fullhtml)) fullhtml = self.utf8FromSoup(url, self.make_soup(fullhtml))
fullhtml = chapter_description + fullhtml fullhtml = chapter_description + fullhtml
@ -383,6 +471,123 @@ class LiteroticaSiteAdapter(BaseSiteAdapter):
return fullhtml return fullhtml
def get_urls_from_page(self,url,normalize):
from ..geturls import get_urls_from_html
## hook for logins, etc.
self.before_get_urls_from_page(url,normalize)
# this way it uses User-Agent or other special settings.
data = self.get_request(url,usecache=False)
soup = self.make_soup(data)
page_urls = get_urls_from_html(soup, url, configuration=self.configuration, normalize=normalize)
if not self.getConfig("fetch_stories_from_api",True):
logger.debug('fetch_stories_from_api Not enabled')
return {'urllist': page_urls}
user_story_list = re.search(r'literotica\.com/authors/.+?/lists\?listid=(?P<list_id>\d+)', url)
fav_authors = re.search(r'literotica\.com/authors/.+?/favorites', url)
written = re.search(r'literotica.com/authors/.+?/works/', url)
logger.debug((bool(user_story_list), bool(fav_authors), bool(written)))
# If the url is not supported
if not user_story_list and not fav_authors and not written:
logger.debug('No supported link. %s', url)
return {'urllist':page_urls}
# Grabbing the main list where chapters are contained.
if user_story_list:
js_story_list = re.search(r';\$R\[\d+?\]\(\$R\[\d+?\],\$R\[\d+?\]\);\$R\[\d+?\]\(\$R\[\d+?\],\$R\[\d+?\]=\{success:!\d,current_page:(?P<current_page>\d+?),last_page:(?P<last_page>\d+?),total:\d+?,per_page:\d+,(has_series:!\d)?data:\$R\[\d+?\]=\[\$R\[\d+?\]=(?P<data>.+)\}\]\}\);', data) # }] } } }); \$R\[\d+?\]\(\$R\[\d+?\],\$R\[\d+?\]\);\$R\[\d+?]\(\$R\[\d+?\],\$R\[\d+?\]=\{sliders:
logger.debug('user_story_list ID [%s]'%user_story_list.group('list_id'))
else:
js_story_list = re.search(r'\$R\[\d+?\]\(\$R\[\d+?\],\$R\[\d+?\]={current_page:(?P<current_page>\d+?),last_page:(?P<last_page>\d+?),total:\d+?,per_page:\d+,(has_series:!\d,)?data:\$R\[\d+\]=\[\$R\[\d+\]=\{(?!aim)(?P<data>.+)\}\);_\$HY\.r\[', data)
# In case the regex becomes outdated
if not js_story_list:
logger.debug('Failed to grab data from the js.')
return {'urllist':page_urls}
user = None
script_tags = soup.find_all('script')
for script in script_tags:
if not script.string:
continue
# Getting author from the js.
user = re.search(r'_\$HY\.r\[\"AuthorQuery\[\\\"(?P<author>.+?)\\\"\]\"\]', script.string)
if user != None:
logger.debug("User: [%s]"%user.group('author'))
break
else:
logger.debug('Failed to get a username')
return {'urllist': page_urls}
# Extract the current (should be 1) and last page numbers from the js.
logger.debug("Pages %s/%s"%(js_story_list.group('current_page'), js_story_list.group('last_page')))
urls = []
# Necessary to format a proper link as there were no visible data specifying what kind of link that should be.
cat_to_link = {'adult-comics': 'i', 'erotic-art': 'i', 'illustrated-poetry': 'p', 'erotic-audio-poetry': 'p', 'erotic-poetry': 'p', 'non-erotic-poetry': 'p'}
stories_found = re.findall(r"category_info:\$R\[.*?type:\".+?\",pageUrl:\"(.+?)\"}.+?,type:\"(.+?)\",url:\"(.+?)\",", js_story_list.group('data'))
for story in stories_found:
story_category, story_type, story_url = story
urls.append('https://www.literotica.com/%s/%s'%(cat_to_link.get(story_category, 's'), story_url))
# Removes the duplicates
seen = set()
urls = [x for x in (page_urls + urls) if not (x in seen or seen.add(x))]
logger.debug("Found [%s] stories so far."%len(urls))
# Sometimes the rest of the stories are burried in the js so no fetching in necessery.
if js_story_list.group('last_page') == js_story_list.group('current_page'):
return {'urllist': urls}
user = urlparse.quote(user.group(1))
logger.debug("Escaped user: [%s]"%user)
if written:
category = re.search(r"_\$HY\.r\[\"AuthorSeriesAndWorksQuery\[\\\".+?\\\",\\\"\D+?\\\",\\\"(?P<type>\D+?)\\\"\]\"\]=\$R\[\d+?\]=\$R\[\d+?\]\(\$R\[\d+?\]=\{", data)
elif fav_authors:
category = re.search(r"_\$HY\.r\[\"AuthorFavoriteWorksQuery\[\\\".+?\\\",\\\"(?P<type>\D+?)\\\",\d\]\"\]=\$R\[\d+?\]=\$R\[\d+?\]\(\$R\[\d+?\]={", data)
if not user_story_list and not category:
logger.debug("Type of works not found")
return {'urllist': urls}
last_page = int(js_story_list.group('last_page'))
current_page = int(js_story_list.group('current_page')) + 1
# Fetching the remaining urls from api. Can't trust the number given about the pages left from a website. Sometimes even the api returns outdated number of pages.
while current_page <= last_page:
i = len(urls)
logger.debug("Pages %s/%s"%(current_page, int(last_page)))
if fav_authors:
jsn = self.get_request('https://literotica.com/api/3/users/{}/favorite/works?params=%7B%22page%22%3A{}%2C%22pageSize%22%3A50%2C%22type%22%3A%22{}%22%2C%22withSeriesDetails%22%3Atrue%7D'.format(user, current_page, category.group('type')))
elif user_story_list:
jsn = self.get_request('https://literotica.com/api/3/users/{}/list/{}?params=%7B%22page%22%3A{}%2C%22pageSize%22%3A50%2C%22withSeriesDetails%22%3Atrue%7D'.format(user, user_story_list.group('list_id'), current_page))
else:
jsn = self.get_request('https://literotica.com/api/3/users/{}/series_and_works?params=%7B%22page%22%3A{}%2C%22pageSize%22%3A50%2C%22sort%22%3A%22date%22%2C%22type%22%3A%22{}%22%2C%22listType%22%3A%22expanded%22%7D'.format(user, current_page, category.group('type')))
urls_data = json.loads(jsn)
last_page = urls_data["last_page"]
current_page = int(urls_data["current_page"]) + 1
for story in urls_data['data']:
#logger.debug('parts' in story)
if story['url'] and story.get('work_count') == None:
urls.append('https://www.literotica.com/%s/%s'%(cat_to_link.get(story["category_info"]["pageUrl"], 's'), str(story['url'])))
continue
# Most of the time series has no url specified and contains all of the story links belonging to the series
urls.append('https://www.literotica.com/series/se/%s'%str(story['id']))
for series_story in story['parts']:
urls.append('https://www.literotica.com/%s/%s'%(cat_to_link.get(series_story["category_info"]["pageUrl"], 's'), str(series_story['url'])))
logger.debug("Found [%s] stories."%(len(urls) - i))
# Again removing duplicates.
seen = set()
urls = [x for x in urls if not (x in seen or seen.add(x))]
logger.debug("Found total of [%s] stories"%len(urls))
return {'urllist':urls}
def getClass(): def getClass():
return LiteroticaSiteAdapter return LiteroticaSiteAdapter

View file

@ -1,50 +0,0 @@
# -*- coding: utf-8 -*-
# Copyright 2018 FanFicFare team
# Copyright 2018 FanFicFare team
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Software: eFiction
##################################################################################
### Rewritten by: GComyn on November, 06, 2016
### Original was adapter_fannation.py
##################################################################################
from __future__ import absolute_import
from .base_efiction_adapter import BaseEfictionAdapter
class LooseLugsComAdapter(BaseEfictionAdapter):
@staticmethod
def getSiteDomain():
return 'www.looselugs.com'
@classmethod
def getSiteAbbrev(self):
return 'looselugs'
@classmethod
def getDateFormat(self):
# The date format will vary from site to site.
# http://docs.python.org/library/datetime.html#strftime-strptime-behavior
return "%B %d, %Y"
##################################################################################
### The Efiction Base Adapter uses the Bulk story to retrieve the metadata, but
### on this site, the Rating is not present in the Bulk page...
### so it is not retrieved.
##################################################################################
def getClass():
return LooseLugsComAdapter

View file

@ -1,347 +0,0 @@
# -*- coding: utf-8 -*-
# Copyright 2011 Fanficdownloader team, 2018 FanFicFare team
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
##############################################################################
### Adapted by GComyn
### Completed on November, 22, 2016
##############################################################################
from __future__ import absolute_import
import logging
logger = logging.getLogger(__name__)
import re
from ..htmlcleanup import stripHTML
from .. import exceptions as exceptions
# py2 vs py3 transition
from ..six import text_type as unicode
from .base_adapter import BaseSiteAdapter, makeDate
class LOTRgficComAdapter(BaseSiteAdapter):
def __init__(self, config, url):
BaseSiteAdapter.__init__(self, config, url)
self.story.setMetadata('siteabbrev','lotrgfic')
self.username = "NoneGiven" # if left empty, site doesn't return any message at all.
self.password = ""
self.is_adult=False
# get storyId from url--url validation guarantees query is only sid=1234
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
# normalized story URL.
self._setURL('https://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId'))
@staticmethod
def getSiteDomain():
return 'www.lotrgfic.com'
@classmethod
def getSiteExampleURLs(cls):
return "https://"+cls.getSiteDomain()+"/viewstory.php?sid=1234"
def getSiteURLPattern(self):
return r"https?://"+re.escape(self.getSiteDomain()+"/viewstory.php?sid=")+r"\d+$"
def extractChapterUrlsAndMetadata(self):
if self.is_adult or self.getConfig("is_adult"):
addurl = "&warning=3"
else:
addurl=""
url = self.url+'&index=1'+addurl
logger.debug("URL: "+url)
data = self.get_request(url)
if "Content is only suitable for mature adults. May contain explicit language and adult themes. Equivalent of NC-17." in data:
raise exceptions.AdultCheckRequired(self.url)
elif "Access denied. This story has not been validated by the adminstrators of this site." in data:
raise exceptions.AccessDenied(self.getSiteDomain() +" says: Access denied. This story has not been validated by the adminstrators of this site.")
soup = self.make_soup(data)
### Main Content for the Table Of Contents page.
div = soup.find('div',{'id':'maincontent'})
divfooter = div.find('div',{'id':'footer'})
if divfooter != None:
divfooter.extract()
## Title
a = div.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$"))
self.story.setMetadata('title',stripHTML(a))
# Find authorid and URL from... author url.
a = div.find('a', href=re.compile(r"viewuser.php"))
self.story.setMetadata('authorId',a['href'].split('=')[1])
self.story.setMetadata('authorUrl','https://'+self.host+'/'+a['href'])
self.story.setMetadata('author',a.string)
# Find the chapters:
for chapter in div.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+r"&chapter=\d+$")):
# just in case there's tags, like <i> in chapter titles.
self.add_chapter(chapter,'https://'+self.host+'/'+chapter['href']+addurl)
### Metadata is contained
def defaultGetattr(d,k):
try:
return d[k]
except:
return ""
# <span class="label">Rated:</span> NC-17<br /> etc
### This site has the metadata formatted all over the place,
### so we have to do some very cludgy programming to get it.
### If someone can do it better, please do so, and let us know.
## I'm going to leave this section in, so we can get those
## elements that are "formatted correctly".
labels = soup.findAll('span',{'class':'label'})
for labelspan in labels:
value = labelspan.nextSibling
label = labelspan.string
if 'Summary' in label:
## the summary is not encased in a span label... so we can't do anything here.
## I'm going to leave it here just in case.
## Everything until the next span class='label'
svalue = ''
while value and 'label' not in defaultGetattr(value,'class'):
svalue += unicode(value)
value = value.nextSibling
# sometimes poorly formated desc (<p> w/o </p>) leads
# to all labels being included.
svalue=svalue[:svalue.find('<span class="label">')]
self.setDescription(url,svalue)
if 'Rated' in label:
self.story.setMetadata('rating', value)
if 'Word count' in label:
self.story.setMetadata('numWords', value)
if 'Categories' in label:
cats = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=categories'))
catstext = [cat.string for cat in cats]
for cat in catstext:
self.story.addToList('category',cat.string)
if 'Characters' in label:
chars = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=characters'))
charstext = [char.string for char in chars]
for char in charstext:
self.story.addToList('characters',char.string)
if 'Genre' in label:
genres = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=1'))
genrestext = [genre.string for genre in genres]
self.genre = ', '.join(genrestext)
for genre in genrestext:
self.story.addToList('genre',genre.string)
if 'Warnings' in label:
warnings = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=4'))
warningstext = [warning.string for warning in warnings]
self.warning = ', '.join(warningstext)
for warning in warningstext:
self.story.addToList('warnings',warning.string)
if 'Places' in label:
places = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=2'))
placestext = [place.string for place in places]
self.warning = ', '.join(placestext)
for place in placestext:
self.story.addToList('places',place.string)
if 'Times' in label:
times = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=3'))
timestext = [time.string for time in times]
self.warning = ', '.join(timestext)
for time in timestext:
self.story.addToList('times',time.string)
if 'Completed' in label:
if 'Yes' in value:
self.story.setMetadata('status', 'Completed')
else:
self.story.setMetadata('status', 'In-Progress')
if 'Published' in label:
self.story.setMetadata('datePublished', makeDate(value.strip(), "%d %b %Y"))
if 'Updated' in label:
# there's a stray [ at the end.
#value = value[0:-1]
self.story.setMetadata('dateUpdated', makeDate(value.strip(), "%d %b %Y"))
try:
# Find Series name from series URL.
a = soup.find('a', href=re.compile(r"viewseries.php\?seriesid=\d+"))
series_name = a.string
series_url = 'https://'+self.host+'/'+a['href']
seriessoup = self.make_soup(self.get_request(series_url))
storyas = seriessoup.findAll('a', href=re.compile(r'^viewstory.php\?sid=\d+$'))
i=1
for a in storyas:
if a['href'] == ('viewstory.php?sid='+self.story.getMetadata('storyId')):
self.setSeries(series_name, i)
self.story.setMetadata('seriesUrl',series_url)
break
i+=1
except:
# I find it hard to care if the series parsing fails
pass
## Now we are going to cludge together the rest of the metadata
metad = soup.findAll('p',{'class':'smaller'})
## Categories don't have a proper label, but do use links, so...
cats = soup.findAll('a',href=re.compile(r'browse.php\?type=categories'))
catstext = [cat.string for cat in cats]
for cat in catstext:
if cat != None:
self.story.addToList('category',cat.string)
## Characters don't have a proper label, but do use links, so...
chars = soup.findAll('a',href=re.compile(r'browse.php\?type=characters'))
charstext = [char.string for char in chars]
for char in charstext:
if char != None:
self.story.addToList('characters',char.string)
### Rating is not enclosed in a label, only in a p tag classed 'smaller' so...
ratng = metad[0].find('strong').get_text().replace('Rated','').strip()
self.story.setMetadata('rating', ratng)
## No we try to get the summary... it's not within it's own
## dedicated tag, so we have to split some hairs..
## This may not work every time... but I tested it with 6 stories...
mdata = metad[0]
while '<hr/>' not in unicode(mdata.nextSibling):
mdata = mdata.nextSibling
self.setDescription(url,mdata.previousSibling.previousSibling.get_text())
### the rest of the metadata are not in tags at all... so we have to be really cludgy.
## we don't need the rest of them, so we get rid of all but the last one
metad = metad[-1]
## we also don't need any of the links in here, so we'll get rid of them as well.
links = metad.findAll('a')
for link in links:
link.extract()
## and we've already done the labels, so let's remove them
labels = metad.findAll('span',{'class':'label'})
for label in labels:
label.extract()
## now we should only have text and <br>'s... somthing like this:
#<p class="smaller">Categories:
#<br/>
#Characters: , , ,
#<br/>
# , <br/> <br/> <br/> None<br/>
#Challenges: None
#<br/>
#Series: None
#<br/>
#Chapters: 1    |    Word count: 200    |    Read Count: 767
#<br/>
#Completed: Yes    |    Updated: 04/27/13    |    Published: 04/27/13
#<br/>
#</p>
## we'll have to remove the non-breaking spaces to get this to work.
metad = unicode(metad).replace(u"\xa0",'').replace('\n','')
for txt in metad.split('<br/>'):
if 'Challenges:' in txt:
txt = txt.replace('Challenges:','').strip()
self.story.setMetadata('challenges', txt)
elif 'Series:' in txt:
txt = txt.replace('Series:','').strip()
self.story.setMetadata('challenges', txt)
elif 'Chapters:' in txt:
for txt2 in txt.split('|'):
txt2 = txt2.replace('\n','').strip()
if 'Word count:' in txt2:
txt2 = txt2.replace('Word count:','').strip()
self.story.setMetadata('numWords', value)
elif 'Read Count:' in txt2:
txt2= txt2.replace('Read Count:','').strip()
self.story.setMetadata('readings', value)
elif 'Completed:' in txt:
for txt2 in txt.split('|'):
txt2 = txt2.strip()
if 'Completed:' in txt2:
if 'Yes' in txt2:
self.story.setMetadata('status', 'Completed')
else:
self.story.setMetadata('status', 'In-Progress')
elif 'Updated:' in txt2:
txt2= txt2.replace('Updated:','').strip()
self.story.setMetadata('dateUpdated', makeDate(txt2.strip(), "%b/%d/%y"))
elif 'Published:' in txt2:
txt2= txt2.replace('Published:','').strip()
self.story.setMetadata('datePublished', makeDate(txt2.strip(), "%b/%d/%y"))
def getChapterText(self, url):
logger.debug('Getting chapter text from: %s' % url)
data = self.get_request(url)
# problems with some stories, but only in calibre. I suspect
# issues with different SGML parsers in python. This is a
# nasty hack, but it works.
data = data[data.index("<body"):]
soup = self.make_soup(data)
span = soup.find('div', {'id' : 'maincontent'})
# Everything is encased in the maincontent section, so we have
# to remove as much as we can systematically
tables = span.findAll('table')
for table in tables:
table.extract()
headings = span.findAll('h3')
for heading in headings:
heading.extract()
links = span.findAll('a')
for link in links:
link.extract()
forms = span.findAll('form')
for form in forms:
form.extract()
divs = span.findAll('div')
for div in divs:
div.extract()
if None == span:
raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url)
return self.utf8FromSoup(url,span)
def getClass():
return LOTRgficComAdapter

View file

@ -116,7 +116,7 @@ class LumosSycophantHexComAdapter(BaseSiteAdapter):
self.story.setMetadata('rating', rating) self.story.setMetadata('rating', rating)
# Find the chapters: # Find the chapters:
for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+r"&chapter=\d+$")): for chapter in soup.find_all('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+r"&chapter=\d+$")):
# just in case there's tags, like <i> in chapter titles. # just in case there's tags, like <i> in chapter titles.
self.add_chapter(chapter,'http://'+self.host+'/'+chapter['href']+addurl) self.add_chapter(chapter,'http://'+self.host+'/'+chapter['href']+addurl)
@ -134,7 +134,7 @@ class LumosSycophantHexComAdapter(BaseSiteAdapter):
# <span class="label">Rated:</span> NC-17<br /> etc # <span class="label">Rated:</span> NC-17<br /> etc
labels = soup.findAll('span',{'class':'label'}) labels = soup.find_all('span',{'class':'label'})
value = labels[0].previousSibling value = labels[0].previousSibling
svalue = "" svalue = ""
@ -154,22 +154,22 @@ class LumosSycophantHexComAdapter(BaseSiteAdapter):
self.story.setMetadata('numWords', value.split(' -')[0]) self.story.setMetadata('numWords', value.split(' -')[0])
if 'Categories' in label: if 'Categories' in label:
cats = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=categories')) cats = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=categories'))
for cat in cats: for cat in cats:
self.story.addToList('category',cat.string) self.story.addToList('category',cat.string)
if 'Characters' in label: if 'Characters' in label:
chars = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=characters')) chars = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=characters'))
for char in chars: for char in chars:
self.story.addToList('characters',char.string) self.story.addToList('characters',char.string)
if 'Genre' in label: if 'Genre' in label:
genres = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=1')) genres = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=class&type_id=1'))
for genre in genres: for genre in genres:
self.story.addToList('genre',genre.string) self.story.addToList('genre',genre.string)
if 'Warnings' in label: if 'Warnings' in label:
warnings = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=2')) warnings = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=class&type_id=2'))
for warning in warnings: for warning in warnings:
self.story.addToList('warnings',warning.string) self.story.addToList('warnings',warning.string)
@ -194,7 +194,7 @@ class LumosSycophantHexComAdapter(BaseSiteAdapter):
series_url = 'http://'+self.host+'/'+a['href'] series_url = 'http://'+self.host+'/'+a['href']
seriessoup = self.make_soup(self.get_request(series_url)) seriessoup = self.make_soup(self.get_request(series_url))
storyas = seriessoup.findAll('a', href=re.compile(r'^viewstory.php\?sid=\d+$')) storyas = seriessoup.find_all('a', href=re.compile(r'^viewstory.php\?sid=\d+$'))
i=1 i=1
for a in storyas: for a in storyas:
if a['href'] == ('viewstory.php?sid='+self.story.getMetadata('storyId')): if a['href'] == ('viewstory.php?sid='+self.story.getMetadata('storyId')):

View file

@ -162,7 +162,7 @@ class MassEffect2InAdapter(BaseSiteAdapter):
self.story.extendList('authorId', [authorId]) self.story.extendList('authorId', [authorId])
self.story.extendList('authorUrl', [authorUrl]) self.story.extendList('authorUrl', [authorUrl])
if not self.story.getMetadata('rating'): if not self.story.getMetadataRaw('rating'):
ratingTitle = chapter.getRatingTitle() ratingTitle = chapter.getRatingTitle()
if ratingTitle: if ratingTitle:
self.story.setMetadata('rating', ratingTitle) self.story.setMetadata('rating', ratingTitle)
@ -204,7 +204,6 @@ class MassEffect2InAdapter(BaseSiteAdapter):
self.story.setMetadata('datePublished', datePublished) self.story.setMetadata('datePublished', datePublished)
self.story.setMetadata('dateUpdated', dateUpdated) self.story.setMetadata('dateUpdated', dateUpdated)
self.story.setMetadata('numWords', unicode(wordCount)) self.story.setMetadata('numWords', unicode(wordCount))
self.story.setMetadata('numChapters', len(chapters))
# Site-specific metadata. # Site-specific metadata.
self.story.setMetadata('language', self.SITE_LANGUAGE) self.story.setMetadata('language', self.SITE_LANGUAGE)
@ -678,7 +677,7 @@ class Chapter(object):
def _excludeEditorSignature(self, root): def _excludeEditorSignature(self, root):
"""Exclude editor signature from within `root' element.""" """Exclude editor signature from within `root' element."""
for textNode in root.findAll(text=True): for stringNode in root.find_all(string=True):
if re.match(self.SIGNED_PATTERN, textNode.string): if re.match(self.SIGNED_PATTERN, textNode.string):
editorLink = textNode.findNext('a') editorLink = textNode.findNext('a')
if editorLink: if editorLink:

View file

@ -64,7 +64,9 @@ class MCStoriesComSiteAdapter(BaseSiteAdapter):
return "https://mcstories.com/StoryTitle/ https://mcstories.com/StoryTitle/index.html https://mcstories.com/StoryTitle/StoryTitle1.html" return "https://mcstories.com/StoryTitle/ https://mcstories.com/StoryTitle/index.html https://mcstories.com/StoryTitle/StoryTitle1.html"
def getSiteURLPattern(self): def getSiteURLPattern(self):
return r"https?://(www\.)?mcstories\.com/([a-zA-Z0-9_-]+)/" ## Note that this uses a regular expression *negative*
## lookahead--story URLs *can't* have /Titles/ /Authors/ etc.
return r"https?://(www\.)?mcstories\.com(?!/(Titles|Authors|Tags|ReadersPicks)/)/[a-zA-Z0-9_-]+/"
def extractChapterUrlsAndMetadata(self): def extractChapterUrlsAndMetadata(self):
""" """
@ -83,7 +85,7 @@ class MCStoriesComSiteAdapter(BaseSiteAdapter):
data1 = self.get_request(self.url) data1 = self.get_request(self.url)
soup1 = self.make_soup(data1) soup1 = self.make_soup(data1)
#strip comments from soup #strip comments from soup
[comment.extract() for comment in soup1.find_all(text=lambda text:isinstance(text, Comment))] [comment.extract() for comment in soup1.find_all(string=lambda text:isinstance(text, Comment))]
if 'Page Not Found.' in data1: if 'Page Not Found.' in data1:
raise exceptions.StoryDoesNotExist(self.url) raise exceptions.StoryDoesNotExist(self.url)
@ -93,12 +95,13 @@ class MCStoriesComSiteAdapter(BaseSiteAdapter):
self.story.setMetadata('title', title.text) self.story.setMetadata('title', title.text)
# Author # Author
author = soup1.find('h3', class_='byline').a # byline = soup1.find('h3', class_='byline')
authorurl = urlparse.urljoin(self.url, author['href']) for author in soup1.select('h3.byline a'):
self.story.setMetadata('author', author.text) authorurl = urlparse.urljoin(self.url, author['href'])
self.story.setMetadata('authorUrl', authorurl) self.story.addToList('author', author.text)
authorid = os.path.splitext(os.path.basename(authorurl))[0] self.story.addToList('authorUrl', authorurl)
self.story.setMetadata('authorId', authorid) authorid = os.path.splitext(os.path.basename(authorurl))[0]
self.story.addToList('authorId', authorid)
# Description # Description
synopsis = soup1.find('section', class_='synopsis') synopsis = soup1.find('section', class_='synopsis')
@ -160,7 +163,7 @@ class MCStoriesComSiteAdapter(BaseSiteAdapter):
soup1 = self.make_soup(data1) soup1 = self.make_soup(data1)
#strip comments from soup #strip comments from soup
[comment.extract() for comment in soup1.find_all(text=lambda text:isinstance(text, Comment))] [comment.extract() for comment in soup1.find_all(string=lambda text:isinstance(text, Comment))]
# get story text # get story text
story1 = soup1.find('article', id='mcstories') story1 = soup1.find('article', id='mcstories')

View file

@ -68,7 +68,7 @@ class MediaMinerOrgSiteAdapter(BaseSiteAdapter):
# The date format will vary from site to site. # The date format will vary from site to site.
# http://docs.python.org/library/datetime.html#strftime-strptime-behavior # http://docs.python.org/library/datetime.html#strftime-strptime-behavior
self.dateformat = "%B %d, %Y %H:%M" self.dateformat = "%m.%d.%Y"
@staticmethod @staticmethod
def getSiteDomain(): def getSiteDomain():
@ -89,12 +89,16 @@ class MediaMinerOrgSiteAdapter(BaseSiteAdapter):
## even newer urls ## even newer urls
## https://www.mediaminer.org/fanfic/s/gundam-wing-fan-fiction/the-preventer-operatives/171000 ## https://www.mediaminer.org/fanfic/s/gundam-wing-fan-fiction/the-preventer-operatives/171000
## https://www.mediaminer.org/fanfic/c/gundam-wing-fan-fiction/the-preventer-operatives/171000/608822 ## https://www.mediaminer.org/fanfic/c/gundam-wing-fan-fiction/the-preventer-operatives/171000/608822
## email urls:
## https://www.mediaminer.org/fanfic/view_ch.php/161297/626395?utm_source=add_chapter&utm_medium=email
## author page urls:
## https://www.mediaminer.org/fanfic/view_st.php?id=145608&submit=View
return r"https?://"+re.escape(self.getSiteDomain())+r"/fanfic/"+\ return r"https?://"+re.escape(self.getSiteDomain())+r"/fanfic/"+\
r"((s/(?P<cattitle4>[^/]+)/(?P<urltitle4>[^/]+)/(?P<id4>\d+))|"+\ r"((s/(?P<cattitle4>[^/]+)/(?P<urltitle4>[^/]+)/(?P<id4>\d+))|"+\
r"((c/(?P<cattitle5>[^/]+)/(?P<urltitle5>[^/]+)/(?P<id5>\d+))/\d+)|"+\ r"((c/(?P<cattitle5>[^/]+)/(?P<urltitle5>[^/]+)/(?P<id5>\d+))/\d+)|"+\
r"(s/(?P<urltitle1>[^/]+)/(?P<id1>\d+))|"+\ r"(s/(?P<urltitle1>[^/]+)/(?P<id1>\d+))|"+\
r"((c/(?P<urltitle2>[^/]+)/[^/]+/(?P<id2>\d+))/\d+)|"+\ r"((c/(?P<urltitle2>[^/]+)/[^/]+/(?P<id2>\d+))/\d+)|"+\
r"(view_st\.php/(?P<id3>\d+)))" r"(view_(st|ch)\.php(/|\?id=)(?P<id3>\d+)))"
# Override stripURLParameters so the id parameter won't get stripped # Override stripURLParameters so the id parameter won't get stripped
@classmethod @classmethod
@ -142,21 +146,24 @@ class MediaMinerOrgSiteAdapter(BaseSiteAdapter):
for (atag,aurl,name) in [ (x,x['href'],stripHTML(x)) for x in chap_p.find_all('a') ]: for (atag,aurl,name) in [ (x,x['href'],stripHTML(x)) for x in chap_p.find_all('a') ]:
self.add_chapter(name,'https://'+self.host+aurl) self.add_chapter(name,'https://'+self.host+aurl)
# category # category
# <a href="/fanfic/src.php/a/567">Ranma 1/2</a> # <a href="/fanfic/src.php/a/567">Ranma 1/2</a>
for a in soup.findAll('a',href=re.compile(r"^/fanfic/a/")): for a in soup.find_all('a',href=re.compile(r"^/fanfic/a/")):
self.story.addToList('category',a.string) self.story.addToList('category',a.string)
# genre # genre
# <a href="/fanfic/src.php/g/567">Ranma 1/2</a> # <a href="/fanfic/src.php/g/567">Ranma 1/2</a>
for a in soup.findAll('a',href=re.compile(r"^/fanfic/src.php/g/")): for a in soup.find_all('a',href=re.compile(r"^/fanfic/src.php/g/")):
self.story.addToList('genre',a.string) self.story.addToList('genre',a.string)
metastr = stripHTML(soup.find("div",{"class":"post-meta"})) metasoup = soup.find("div",{"class":"post-meta"})
metastr = stripHTML(metasoup)
metahtml = unicode(metasoup)
self.setDescription(url, metahtml[metahtml.index('</a><br/>')+9:metahtml.index('<br/><b>')])
# Latest Revision: February 07, 2015 15:21 PST # Latest Revision: February 07, 2015 15:21 PST
m = re.match(r".*?(?:Latest Revision|Uploaded On): ([a-zA-Z]+ \d\d, \d\d\d\d \d\d:\d\d)",metastr) m = re.match(r".*?(?:Latest Revision|Uploaded On): ?(\d\d\.\d\d\.\d\d\d\d) ?",metastr)
if m: if m:
self.story.setMetadata('dateUpdated', makeDate(m.group(1), self.dateformat)) self.story.setMetadata('dateUpdated', makeDate(m.group(1), self.dateformat))
# site doesn't give date published on index page. # site doesn't give date published on index page.
@ -164,19 +171,20 @@ class MediaMinerOrgSiteAdapter(BaseSiteAdapter):
# self.story.setMetadata('datePublished', # self.story.setMetadata('datePublished',
# self.story.getMetadataRaw('dateUpdated')) # self.story.getMetadataRaw('dateUpdated'))
# Words: 123456 # Words:123 or 23.1K or 1.0M
m = re.match(r".*?\| Words: (\d+) \|",metastr) m = re.match(r".*?\| ?Words: ?([\.\d]+)(K|M|) ?\|",metastr)
if m: if m:
self.story.setMetadata('numWords', m.group(1)) if not m.group(2):
word_factor = 1
# Summary: .... elif m.group(2) == 'K':
m = re.match(r".*?Summary: (.*)$",metastr) word_factor = 1000
if m: elif m.group(2) == 'M':
self.setDescription(url, m.group(1)) word_factor = 1000000
#self.story.setMetadata('description', m.group(1)) num_words = int(float(m.group(1))*word_factor)
self.story.setMetadata('numWords', num_words)
# completed # completed
m = re.match(r".*?Status: Completed.*?",metastr) m = re.match(r".*?Status: ?Completed.*?",metastr)
if m: if m:
self.story.setMetadata('status','Completed') self.story.setMetadata('status','Completed')
else: else:
@ -194,7 +202,7 @@ class MediaMinerOrgSiteAdapter(BaseSiteAdapter):
# print("data:%s"%data) # print("data:%s"%data)
headerstr = stripHTML(soup.find('div',{'class':'post-meta'})) headerstr = stripHTML(soup.find('div',{'class':'post-meta'}))
m = re.match(r".*?Uploaded On: ([a-zA-Z]+ \d\d, \d\d\d\d \d\d:\d\d)",headerstr) m = re.match(r".*?Uploaded On: ?(\d\d\.\d\d\.\d\d\d\d)",headerstr)
if m: if m:
date = makeDate(m.group(1), self.dateformat) date = makeDate(m.group(1), self.dateformat)
if not self.story.getMetadataRaw('datePublished') or date < self.story.getMetadataRaw('datePublished'): if not self.story.getMetadataRaw('datePublished') or date < self.story.getMetadataRaw('datePublished'):

View file

@ -1,272 +0,0 @@
# -*- coding: utf-8 -*-
# Copyright 2012 Fanficdownloader team, 2018 FanFicFare team
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Software: eFiction
from __future__ import absolute_import
import logging
logger = logging.getLogger(__name__)
import re
from ..htmlcleanup import stripHTML
from .. import exceptions as exceptions
# py2 vs py3 transition
from ..six import text_type as unicode
from .base_adapter import BaseSiteAdapter, makeDate
def getClass():
return MerlinFicDtwinsCoUk
# Class name has to be unique. Our convention is camel case the
# sitename with Adapter at the end. www is skipped.
class MerlinFicDtwinsCoUk(BaseSiteAdapter):
def __init__(self, config, url):
BaseSiteAdapter.__init__(self, config, url)
self.username = "NoneGiven" # if left empty, site doesn't return any message at all.
self.password = ""
self.is_adult=False
# get storyId from url--url validation guarantees query is only sid=1234
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
# normalized story URL.
self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId'))
# Each adapter needs to have a unique site abbreviation.
self.story.setMetadata('siteabbrev','mrfd')
# The date format will vary from site to site.
# http://docs.python.org/library/datetime.html#strftime-strptime-behavior
self.dateformat = "%b %d, %Y"
@staticmethod # must be @staticmethod, don't remove it.
def getSiteDomain():
# The site domain. Does have www here, if it uses it.
return 'merlinfic.dtwins.co.uk'
@classmethod
def getSiteExampleURLs(cls):
return "http://"+cls.getSiteDomain()+"/viewstory.php?sid=1234"
def getSiteURLPattern(self):
return re.escape("http://"+self.getSiteDomain()+"/viewstory.php?sid=")+r"\d+$"
## Login seems to be reasonably standard across eFiction sites.
def needToLoginCheck(self, data):
if 'Registered Users Only' in data \
or 'There is no such account on our website' in data \
or "That password doesn't match the one in our database" in data:
return True
else:
return False
def performLogin(self, url):
params = {}
if self.password:
params['penname'] = self.username
params['password'] = self.password
else:
params['penname'] = self.getConfig("username")
params['password'] = self.getConfig("password")
params['cookiecheck'] = '1'
params['submit'] = 'Submit'
loginUrl = 'http://' + self.getSiteDomain() + '/user.php?action=login'
logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
params['penname']))
d = self.post_request(loginUrl, params)
if "Member Account" not in d : #Member Account
logger.info("Failed to login to URL %s as %s" % (loginUrl,
params['penname']))
raise exceptions.FailedToLogin(url,params['penname'])
return False
else:
return True
## Getting the chapter list and the meta data, plus 'is adult' checking.
def extractChapterUrlsAndMetadata(self):
if self.is_adult or self.getConfig("is_adult"):
# Weirdly, different sites use different warning numbers.
# If the title search below fails, there's a good chance
# you need a different number. print data at that point
# and see what the 'click here to continue' url says.
addurl = "&ageconsent=ok&warning=4"
else:
addurl=""
# index=1 makes sure we see the story chapter index. Some
# sites skip that for one-chapter stories.
url = self.url+'&index=1'+addurl
logger.debug("URL: "+url)
data = self.get_request(url)
if self.needToLoginCheck(data):
# need to log in for this one.
self.performLogin(url)
data = self.get_request(url)
m = re.search(r"'viewstory.php\?sid=\d+((?:&amp;ageconsent=ok)?&amp;warning=\d+)'",data)
if m != None:
if self.is_adult or self.getConfig("is_adult"):
# We tried the default and still got a warning, so
# let's pull the warning number from the 'continue'
# link and reload data.
addurl = m.group(1)
# correct stupid &amp; error in url.
addurl = addurl.replace("&amp;","&")
url = self.url+'&index=1'+addurl
logger.debug("URL 2nd try: "+url)
data = self.get_request(url)
else:
raise exceptions.AdultCheckRequired(self.url)
if "Access denied. This story has not been validated by the adminstrators of this site." in data:
raise exceptions.AccessDenied(self.getSiteDomain() +" says: Access denied. This story has not been validated by the adminstrators of this site.")
soup = self.make_soup(data)
# print data
## Title
a = soup.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$"))
self.story.setMetadata('title',stripHTML(a))
# Find authorid and URL from... author url.
a = soup.find('a', href=re.compile(r"viewuser.php\?uid=\d+"))
self.story.setMetadata('authorId',a['href'].split('=')[1])
self.story.setMetadata('authorUrl','http://'+self.host+'/'+a['href'])
self.story.setMetadata('author',a.string)
# Find the chapters:
for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+r"&chapter=\d+$")):
# just in case there's tags, like <i> in chapter titles.
self.add_chapter(chapter,'http://'+self.host+'/'+chapter['href']+addurl)
# eFiction sites don't help us out a lot with their meta data
# formating, so it's a little ugly.
# utility method
def defaultGetattr(d,k):
try:
return d[k]
except:
return ""
# <span class="label">Rated:</span> NC-17<br /> etc
labels = soup.findAll('span',{'class':'label'})
for labelspan in labels:
value = labelspan.nextSibling
label = labelspan.string
if 'Summary' in label:
## Everything until the next span class='label'
svalue = ""
while 'label' not in defaultGetattr(value,'class'):
svalue += unicode(value)
value = value.nextSibling
self.setDescription(url,svalue)
#self.story.setMetadata('description',stripHTML(svalue))
if 'Rated' in label:
self.story.setMetadata('rating', value)
if 'Word count' in label:
self.story.setMetadata('numWords', value)
if 'Categories' in label:
cats = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=categories'))
for cat in cats:
self.story.addToList('category',cat.string)
if 'Characters' in label:
chars = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=characters'))
for char in chars:
self.story.addToList('characters',char.string)
if 'Pairing' in label:
ships = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=1'))
for ship in ships:
self.story.addToList('ships',ship.string)
if 'Genre' in label:
genres = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=3'))
for genre in genres:
self.story.addToList('genre',genre.string)
if 'Warnings' in label:
warnings = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=2'))
for warning in warnings:
self.story.addToList('warnings',warning.string)
if 'Completed' in label:
if 'Yes' in value:
self.story.setMetadata('status', 'Completed')
else:
self.story.setMetadata('status', 'In-Progress')
if 'Published' in label:
self.story.setMetadata('datePublished', makeDate(stripHTML(value), self.dateformat))
if 'Updated' in label:
self.story.setMetadata('dateUpdated', makeDate(stripHTML(value), self.dateformat))
try:
# Find Series name from series URL.
a = soup.find('a', href=re.compile(r"viewseries.php\?seriesid=\d+"))
series_name = a.string
series_url = 'http://'+self.host+'/'+a['href']
seriessoup = self.make_soup(self.get_request(series_url))
# can't use ^viewstory...$ in case of higher rated stories with javascript href.
storyas = seriessoup.findAll('a', href=re.compile(r'viewstory.php\?sid=\d+'))
i=1
for a in storyas:
# skip 'report this' and 'TOC' links
if 'contact.php' not in a['href'] and 'index' not in a['href']:
if a['href'] == ('viewstory.php?sid='+self.story.getMetadata('storyId')):
self.setSeries(series_name, i)
self.story.setMetadata('seriesUrl',series_url)
break
i+=1
except:
# I find it hard to care if the series parsing fails
pass
# grab the text for an individual chapter.
def getChapterText(self, url):
logger.debug('Getting chapter text from: %s' % url)
soup = self.make_soup(self.get_request(url))
div = soup.find('div', {'id' : 'story'})
if None == div:
raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url)
return self.utf8FromSoup(url,div)

Some files were not shown because too many files have changed in this diff Show more