mirror of
https://github.com/JimmXinu/FanFicFare.git
synced 2026-05-09 05:21:13 +02:00
Compare commits
1494 commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
a172a7bd2b | ||
|
|
ab103dce6e | ||
|
|
892e9207f0 | ||
|
|
b4e392fae1 | ||
|
|
d9525d9726 | ||
|
|
cb77b12754 | ||
|
|
b41a633821 | ||
|
|
50c8db2992 | ||
|
|
ef6dd99bfe | ||
|
|
59796ff537 | ||
|
|
8ee0a6e898 | ||
|
|
c53fc362bd | ||
|
|
c87cfc1057 | ||
|
|
6ee151c90a | ||
|
|
db01c828a0 | ||
|
|
4d03874f06 | ||
|
|
36f56483e6 | ||
|
|
18e45a403b | ||
|
|
2e25172ba3 | ||
|
|
65e3fd562b | ||
|
|
7089bf6689 | ||
|
|
061dc1333f | ||
|
|
0a7fb5c090 | ||
|
|
cf02f729ae | ||
|
|
730c4f77f9 | ||
|
|
c02da29cbd | ||
|
|
b87d796221 | ||
|
|
436370fe5b | ||
|
|
ac77f31bc2 | ||
|
|
16f2c74e4b | ||
|
|
af5c2aa0bc | ||
|
|
31dec5b62d | ||
|
|
97d37fcfc1 | ||
|
|
c730aa2f68 | ||
|
|
4e2e359dee | ||
|
|
bb96049934 | ||
|
|
84965ef25f | ||
|
|
348d129a1e | ||
|
|
4794e9bc51 | ||
|
|
d46dc76ae1 | ||
|
|
08bae8d9be | ||
|
|
405c37aeb5 | ||
|
|
270e01c3c7 | ||
|
|
12d57f5950 | ||
|
|
562b3a4ecd | ||
|
|
e69045fd98 | ||
|
|
747bde3394 | ||
|
|
aa00c7ae03 | ||
|
|
0539f818f3 | ||
|
|
41a6f56f44 | ||
|
|
e3832245e6 | ||
|
|
909b64c83c | ||
|
|
732f5e2571 | ||
|
|
d9dd04396e | ||
|
|
36e2183d45 | ||
|
|
040b7205b8 | ||
|
|
d8ed180eb1 | ||
|
|
2a6c1e74db | ||
|
|
b7c8c96153 | ||
|
|
a16096592c | ||
|
|
bb34eecc7c | ||
|
|
ceed7ef1a8 | ||
|
|
1d2a887c2d | ||
|
|
a3f3302312 | ||
|
|
ecf005b145 | ||
|
|
3bd074fa2c | ||
|
|
0fd95daa8e | ||
|
|
1b57e49d98 | ||
|
|
db0d39c9cd | ||
|
|
cbde66cf41 | ||
|
|
17331e9eb3 | ||
|
|
9b96c151a5 | ||
|
|
1b65a30798 | ||
|
|
c9a47877f7 | ||
|
|
bdc77ad0f6 | ||
|
|
719971c76c | ||
|
|
c74dba472a | ||
|
|
c1fb7f0fc5 | ||
|
|
94c932cd2f | ||
|
|
27fb765c0d | ||
|
|
06ce46f64a | ||
|
|
c04d85fa97 | ||
|
|
b6cdc30db5 | ||
|
|
9bbb5e8b01 | ||
|
|
18ce6e6fba | ||
|
|
507910f5da | ||
|
|
ccf7801a89 | ||
|
|
9a52a10626 | ||
|
|
6963153aac | ||
|
|
ee357cd5b4 | ||
|
|
b84e3d2858 | ||
|
|
9377fc6671 | ||
|
|
aaa0fa613a | ||
|
|
eac5acfbfa | ||
|
|
8dca1ef343 | ||
|
|
28e8f61cf8 | ||
|
|
78abf476ea | ||
|
|
2b1f9446dd | ||
|
|
9815736b4e | ||
|
|
3f54cce9a1 | ||
|
|
223138b8e5 | ||
|
|
4aa47c8bab | ||
|
|
a97a85f357 | ||
|
|
ffc3696d84 | ||
|
|
86c4e1974b | ||
|
|
b6fd7c2ca4 | ||
|
|
326300b40e | ||
|
|
282bafe514 | ||
|
|
061a8feccf | ||
|
|
26c9b6d2ce | ||
|
|
ed02d61953 | ||
|
|
b58d54b8ea | ||
|
|
1bc3ffc269 | ||
|
|
cbd295f911 | ||
|
|
35653f533f | ||
|
|
ea7afea8c2 | ||
|
|
384a2fe8b7 | ||
|
|
b278cac620 | ||
|
|
e23de49fb5 | ||
|
|
f64f041546 | ||
|
|
1d53c506c9 | ||
|
|
c8d6ce8004 | ||
|
|
3f08417c04 | ||
|
|
79ebf6a02b | ||
|
|
41dfb8eab8 | ||
|
|
590b663170 | ||
|
|
9bb408c8b3 | ||
|
|
5d6a63a8ca | ||
|
|
4078ccfdb1 | ||
|
|
79c29121c3 | ||
|
|
dea48d9e07 | ||
|
|
c165196a35 | ||
|
|
c385013db9 | ||
|
|
8780aa3105 | ||
|
|
12c7bfe29c | ||
|
|
08d0b8a4e0 | ||
|
|
1d401f8dba | ||
|
|
193bb3ed61 | ||
|
|
63fd8cd660 | ||
|
|
26a1152390 | ||
|
|
e0907147f7 | ||
|
|
99bba3ff12 | ||
|
|
3fdb6630fb | ||
|
|
0d6b789c9f | ||
|
|
edaa03ef42 | ||
|
|
4e17a10792 | ||
|
|
9fd48e0168 | ||
|
|
818e990184 | ||
|
|
9bb7b54023 | ||
|
|
af6695e27f | ||
|
|
46293f2d02 | ||
|
|
7f968ba102 | ||
|
|
1e5cb9b184 | ||
|
|
9627e6e62c | ||
|
|
5e644098f9 | ||
|
|
fa3a56d096 | ||
|
|
ba18216ef8 | ||
|
|
f207e31b3b | ||
|
|
0e1ace18e4 | ||
|
|
b17a632640 | ||
|
|
485d4631f9 | ||
|
|
30929bc38e | ||
|
|
ae4311f4dd | ||
|
|
3a3c35ea1f | ||
|
|
19dd89fb4d | ||
|
|
b247a7465b | ||
|
|
d5c20db681 | ||
|
|
a599ff6ad2 | ||
|
|
e21c6604a1 | ||
|
|
273c1931f4 | ||
|
|
fdf29eeade | ||
|
|
06e55728d0 | ||
|
|
0a3ab4bc9d | ||
|
|
a4a91b373f | ||
|
|
a68e771026 | ||
|
|
d7c79fcb3b | ||
|
|
5cc05ed96d | ||
|
|
e5b5768f11 | ||
|
|
6cf2519ef9 | ||
|
|
f4f98e0877 | ||
|
|
bb8fb9efa5 | ||
|
|
be38778d72 | ||
|
|
55d8efbdcd | ||
|
|
9df7822e32 | ||
|
|
69e6a3d2cf | ||
|
|
8ea03be5f3 | ||
|
|
75a213beb9 | ||
|
|
ead830c60a | ||
|
|
20681315e7 | ||
|
|
e2961eaadf | ||
|
|
7f0d7f70be | ||
|
|
c5264c2147 | ||
|
|
ff402c16ca | ||
|
|
4a9da1c02e | ||
|
|
c14f1014b8 | ||
|
|
74bc398994 | ||
|
|
6e8e74fc55 | ||
|
|
68ad4c87aa | ||
|
|
fe82aed91d | ||
|
|
7d14bf6e90 | ||
|
|
39500a9386 | ||
|
|
d5f8891e4f | ||
|
|
edce6949ae | ||
|
|
bec6fac2ea | ||
|
|
a9bd19a079 | ||
|
|
7135ba5892 | ||
|
|
9ba4c100ca | ||
|
|
fe565149ba | ||
|
|
624f60a5c1 | ||
|
|
5c79ac0b5c | ||
|
|
615711f904 | ||
|
|
2f77bd9e97 | ||
|
|
abdc881812 | ||
|
|
1ba73bf316 | ||
|
|
a359c6b326 | ||
|
|
ff64356e85 | ||
|
|
0271b14f6c | ||
|
|
bf845e200f | ||
|
|
e94ff6e1e8 | ||
|
|
07313d2744 | ||
|
|
bd2026df7e | ||
|
|
0fa177ff79 | ||
|
|
d84c72a215 | ||
|
|
c319857da0 | ||
|
|
df586e9bb7 | ||
|
|
354a5708ce | ||
|
|
096face5d2 | ||
|
|
02e3bddd5c | ||
|
|
9dadef1905 | ||
|
|
2e8a899d8c | ||
|
|
623915f623 | ||
|
|
57865ca53d | ||
|
|
e9c4b9ef30 | ||
|
|
0ad088b663 | ||
|
|
e37a7f72be | ||
|
|
9befe122dd | ||
|
|
e6d6227ff1 | ||
|
|
d854a6efe7 | ||
|
|
a97af94f8a | ||
|
|
e2ea97e99a | ||
|
|
215f6dd8ff | ||
|
|
687aa9c3ba | ||
|
|
523cf78640 | ||
|
|
90e50964b6 | ||
|
|
a83823ea13 | ||
|
|
727aa6f1bc | ||
|
|
072d929298 | ||
|
|
992c5a1378 | ||
|
|
f8937c1af3 | ||
|
|
af5c78e2e9 | ||
|
|
4a26dfdfff | ||
|
|
a82ef5dbae | ||
|
|
6adc995fa5 | ||
|
|
f534efd3df | ||
|
|
f41e64141a | ||
|
|
94036e3fbb | ||
|
|
9142609c61 | ||
|
|
f9d7b893ee | ||
|
|
4e2ae7441d | ||
|
|
87dbef980f | ||
|
|
921f8c287b | ||
|
|
637c6e3cc3 | ||
|
|
ba90ff9f3a | ||
|
|
34e84b2942 | ||
|
|
31eb7f421a | ||
|
|
85d4656005 | ||
|
|
006b8873a5 | ||
|
|
3246036f88 | ||
|
|
6d114532e2 | ||
|
|
2edb1d58d5 | ||
|
|
8dc3c5d3d8 | ||
|
|
2ec8c97e28 | ||
|
|
c51161c3d1 | ||
|
|
bd645a97c7 | ||
|
|
f7cbfa56bb | ||
|
|
07fd16813f | ||
|
|
2fe971c79f | ||
|
|
e4082c6235 | ||
|
|
960d5ba11a | ||
|
|
066539793d | ||
|
|
5b312494fb | ||
|
|
e628b10247 | ||
|
|
61c063ed72 | ||
|
|
11d3f601c9 | ||
|
|
3b8d0f63d4 | ||
|
|
b8b30c6a78 | ||
|
|
b007f68a88 | ||
|
|
6d8a67ef2e | ||
|
|
ab66e9e285 | ||
|
|
b3f7add5a1 | ||
|
|
800be43d24 | ||
|
|
70f77e17e2 | ||
|
|
caf46ba421 | ||
|
|
686ed80230 | ||
|
|
56689a10c4 | ||
|
|
065d077752 | ||
|
|
c8f817e830 | ||
|
|
1432241319 | ||
|
|
0e9f60f8a6 | ||
|
|
74de62385f | ||
|
|
d2f69eb5d5 | ||
|
|
c3655d59ca | ||
|
|
aca07bbf59 | ||
|
|
3edd3c3e7b | ||
|
|
61ba096c6e | ||
|
|
47fd71c4b9 | ||
|
|
e1d0bed52d | ||
|
|
acb88cbefc | ||
|
|
f1e7cabf6a | ||
|
|
21ec27ffd4 | ||
|
|
5567e6417d | ||
|
|
af352a480c | ||
|
|
92069dc638 | ||
|
|
76e9421858 | ||
|
|
70558bf444 | ||
|
|
b60dfdcc28 | ||
|
|
b976439669 | ||
|
|
6de50509ed | ||
|
|
4d9c38d3c2 | ||
|
|
90ecb63be4 | ||
|
|
bd49f8e8fa | ||
|
|
21c0315e60 | ||
|
|
fc97fa6d5c | ||
|
|
2c3bf3c642 | ||
|
|
a9c725d32a | ||
|
|
f936c5b0fb | ||
|
|
53344afa49 | ||
|
|
d5addfa2fd | ||
|
|
6d8375a9f3 | ||
|
|
7bc03ac798 | ||
|
|
05d62a5343 | ||
|
|
31115f9245 | ||
|
|
26ee692208 | ||
|
|
dd43d25f76 | ||
|
|
fffd15d7ea | ||
|
|
7c2700c8ea | ||
|
|
94518c4f25 | ||
|
|
531b965b22 | ||
|
|
658b637716 | ||
|
|
44f5feacfb | ||
|
|
52451a3eba | ||
|
|
7123f7dd6f | ||
|
|
08a0f9b5fc | ||
|
|
74ac96a67e | ||
|
|
9eed0340e9 | ||
|
|
73b90c0291 | ||
|
|
c33a6e6b05 | ||
|
|
d77cc15586 | ||
|
|
21483f7227 | ||
|
|
6c0df42fe7 | ||
|
|
c3a90a8914 | ||
|
|
e7f66d293a | ||
|
|
e49b3a6be0 | ||
|
|
ae72efdc00 | ||
|
|
bc935e213a | ||
|
|
a8e0eabbd8 | ||
|
|
81b84a8133 | ||
|
|
a973b8c926 | ||
|
|
08ccc659ca | ||
|
|
fb610de27a | ||
|
|
29d2e3734b | ||
|
|
48cf17c7b7 | ||
|
|
ac61c2bb68 | ||
|
|
a12d2a688b | ||
|
|
52027eac46 | ||
|
|
a1d4fba728 | ||
|
|
69872b922c | ||
|
|
7bd1a1acfc | ||
|
|
80e5a22f0d | ||
|
|
3cd4188bd8 | ||
|
|
21d16dbe90 | ||
|
|
5ce7875851 | ||
|
|
35be14a168 | ||
|
|
930940c7fd | ||
|
|
f001f19a47 | ||
|
|
fd7382fb56 | ||
|
|
c69e940d2a | ||
|
|
31dcd8e6ff | ||
|
|
0bd85c10a8 | ||
|
|
b075c22261 | ||
|
|
87b3e04fa1 | ||
|
|
630f09e644 | ||
|
|
a0463fc85b | ||
|
|
de7d8079d9 | ||
|
|
4aad0ec913 | ||
|
|
c379b45cb9 | ||
|
|
82825d1b16 | ||
|
|
11b2d5643e | ||
|
|
06dc2add8f | ||
|
|
ab7198bb8f | ||
|
|
d854733ffa | ||
|
|
a2cc6bcdd3 | ||
|
|
c9accda3f8 | ||
|
|
8e55d1e6f4 | ||
|
|
9b8eb547fc | ||
|
|
62b3c9264e | ||
|
|
370be379f0 | ||
|
|
1addfe14fc | ||
|
|
e510fb027e | ||
|
|
86b807805f | ||
|
|
0ace02ee75 | ||
|
|
38ad74af68 | ||
|
|
6c70a60cdb | ||
|
|
80ee0ca9b9 | ||
|
|
8b143a0c1b | ||
|
|
9fb86da341 | ||
|
|
5c703122ec | ||
|
|
75f89beab1 | ||
|
|
fc9d184f20 | ||
|
|
6c411e054a | ||
|
|
dbef4719d9 | ||
|
|
da6b4c25f2 | ||
|
|
23004e3953 | ||
|
|
4a15c2a7d5 | ||
|
|
84dad2ec43 | ||
|
|
5ac38fc327 | ||
|
|
35e0ada643 | ||
|
|
a9533364ec | ||
|
|
4a03186ce6 | ||
|
|
a0271e2957 | ||
|
|
11491c6383 | ||
|
|
24dccc73f0 | ||
|
|
8e3a88776a | ||
|
|
28141ce9d1 | ||
|
|
ffaa3bf82a | ||
|
|
d0d05d6c3b | ||
|
|
6d74a58181 | ||
|
|
de85fd42f7 | ||
|
|
c4aebd40df | ||
|
|
81cb631491 | ||
|
|
35aa5d2143 | ||
|
|
a8b1489233 | ||
|
|
ffb179c9a1 | ||
|
|
6d8d7ab66f | ||
|
|
a128083ce8 | ||
|
|
9f78ec0177 | ||
|
|
d941810825 | ||
|
|
ba1975342c | ||
|
|
27cfac45e4 | ||
|
|
64a4eb2bb2 | ||
|
|
371f995fda | ||
|
|
816bbdfd66 | ||
|
|
cdd6df8a57 | ||
|
|
5d4489bb28 | ||
|
|
a9944cd255 | ||
|
|
c284b2a6c6 | ||
|
|
15dde72f14 | ||
|
|
ff0f22565c | ||
|
|
33813b4047 | ||
|
|
ae3accca27 | ||
|
|
d998467f7a | ||
|
|
29fddbce8e | ||
|
|
a4e1db32e0 | ||
|
|
81aea65555 | ||
|
|
9005f9db4c | ||
|
|
7de040d8db | ||
|
|
9c53cf236e | ||
|
|
2e6ac07020 | ||
|
|
3febac62a8 | ||
|
|
c4ea6ca5fd | ||
|
|
75f9fb2d38 | ||
|
|
e4f83c52ca | ||
|
|
eb54731ae9 | ||
|
|
eb24bcb2ac | ||
|
|
ffa533e5fd | ||
|
|
bd76066905 | ||
|
|
eb17af9252 | ||
|
|
4471b1f980 | ||
|
|
9cfd88c098 | ||
|
|
c1cf8995ea | ||
|
|
55995be7de | ||
|
|
869686f363 | ||
|
|
f45a05ddb6 | ||
|
|
434ff0de74 | ||
|
|
d0ece28197 | ||
|
|
cd1db0a462 | ||
|
|
075c5cb7c2 | ||
|
|
b8740ca1c7 | ||
|
|
3db3e28595 | ||
|
|
b610d49f6b | ||
|
|
35afca430a | ||
|
|
1499037e19 | ||
|
|
1aaa4102a5 | ||
|
|
049c9af0e4 | ||
|
|
482b6b67eb | ||
|
|
cdb752df6a | ||
|
|
0412355001 | ||
|
|
0dc049aedb | ||
|
|
832387dea0 | ||
|
|
94bd4bf236 | ||
|
|
493e76df30 | ||
|
|
44b6e752f6 | ||
|
|
5d6f2c91c1 | ||
|
|
04ae49f944 | ||
|
|
020606fea1 | ||
|
|
711698620e | ||
|
|
968687bb82 | ||
|
|
07ab6d137b | ||
|
|
d51ac5d6f5 | ||
|
|
478d2e8f17 | ||
|
|
67a1dcee90 | ||
|
|
af834b1e40 | ||
|
|
ae535e2518 | ||
|
|
96d36ae71a | ||
|
|
480b7239e5 | ||
|
|
2666164c5b | ||
|
|
6ef8d1b215 | ||
|
|
654619e7e2 | ||
|
|
4ea869a764 | ||
|
|
837df18cb0 | ||
|
|
248f1c022b | ||
|
|
4fabf9e65c | ||
|
|
b7c318f520 | ||
|
|
89a15e1b16 | ||
|
|
5b41097abc | ||
|
|
a672b6dbdf | ||
|
|
e4d5d43efa | ||
|
|
cc572857e0 | ||
|
|
2f52ae31c0 | ||
|
|
3ddf801925 | ||
|
|
182695b0af | ||
|
|
656e67cc57 | ||
|
|
34215ce0ee | ||
|
|
c706aed271 | ||
|
|
e5f8e5bba4 | ||
|
|
11d8fae876 | ||
|
|
4a14e5fc86 | ||
|
|
7548ce6ae0 | ||
|
|
e113bbfb1c | ||
|
|
d1ccdfd21f | ||
|
|
68e8f49e9f | ||
|
|
49a0328268 | ||
|
|
25ea3fcaad | ||
|
|
a5378ca419 | ||
|
|
e0b733b60d | ||
|
|
33b2b10bf3 | ||
|
|
c468c26208 | ||
|
|
9d29f888b3 | ||
|
|
d1e8a77489 | ||
|
|
ef66e73fa4 | ||
|
|
7f128587c0 | ||
|
|
53a7a60dbc | ||
|
|
71a61ff166 | ||
|
|
9c051e6c3b | ||
|
|
f0d89498dc | ||
|
|
abb370a852 | ||
|
|
4b9054d1b4 | ||
|
|
2d0db171a8 | ||
|
|
7f67465767 | ||
|
|
6801d5e01d | ||
|
|
b01914c24e | ||
|
|
dd41f99288 | ||
|
|
37db56e6b3 | ||
|
|
f0a08f7647 | ||
|
|
2593f742c9 | ||
|
|
6ac299c198 | ||
|
|
3eda289349 | ||
|
|
95a7bdd3a9 | ||
|
|
84257e7388 | ||
|
|
465bffd896 | ||
|
|
eabfd1bef3 | ||
|
|
8d6676617c | ||
|
|
c47b620f67 | ||
|
|
df94cc439e | ||
|
|
08032778bd | ||
|
|
52deec3fd8 | ||
|
|
5b443d4363 | ||
|
|
4170cfd9a6 | ||
|
|
ae4735df04 | ||
|
|
6041036787 | ||
|
|
d451265621 | ||
|
|
677f213337 | ||
|
|
8537702028 | ||
|
|
6d3d4d1ae6 | ||
|
|
1f42c188fa | ||
|
|
9346985718 | ||
|
|
4585afde50 | ||
|
|
bee6cb9ba6 | ||
|
|
581b627a3e | ||
|
|
4436001494 | ||
|
|
6116a19986 | ||
|
|
99fd4ea0e5 | ||
|
|
a613b842f2 | ||
|
|
6462c5c366 | ||
|
|
8c4a8cd2da | ||
|
|
7a0ea3ce96 | ||
|
|
f14fe9d3aa | ||
|
|
36add28269 | ||
|
|
87b4171dd4 | ||
|
|
951acf61b4 | ||
|
|
8674b54753 | ||
|
|
b7e5bf0468 | ||
|
|
0f12c127b6 | ||
|
|
50c51dc993 | ||
|
|
65bf03a613 | ||
|
|
0bb8421f98 | ||
|
|
108e603e63 | ||
|
|
1868ed842e | ||
|
|
6c505a6170 | ||
|
|
72d508b0bf | ||
|
|
d6f2faf170 | ||
|
|
92cbff7db9 | ||
|
|
4bb2d50921 | ||
|
|
c3d8bc4fd0 | ||
|
|
37ae6cbdbb | ||
|
|
b953daa3c2 | ||
|
|
463910cd54 | ||
|
|
95bfdf907f | ||
|
|
85550aeaf6 | ||
|
|
5b20926f2c | ||
|
|
c915aceb85 | ||
|
|
36d56b867c | ||
|
|
e1cec84075 | ||
|
|
ba3676d73f | ||
|
|
80f50b298f | ||
|
|
9120504249 | ||
|
|
55c7ca9c10 | ||
|
|
704ea89d72 | ||
|
|
8eecd0aa7d | ||
|
|
c53f99d01c | ||
|
|
438a1265f2 | ||
|
|
86766223cb | ||
|
|
1fa94de1d9 | ||
|
|
56d1cf19ef | ||
|
|
701c096ed4 | ||
|
|
aab3e1c601 | ||
|
|
8d040a4926 | ||
|
|
4453cbb143 | ||
|
|
0c173f8110 | ||
|
|
a14b39eb4c | ||
|
|
c9cb51f8c4 | ||
|
|
dbe6c6105c | ||
|
|
04231eecfe | ||
|
|
a55a4c93a5 | ||
|
|
dcd4f0f6a5 | ||
|
|
792ab02195 | ||
|
|
7a87310403 | ||
|
|
7e070528a1 | ||
|
|
4f3af1395f | ||
|
|
1fc4f3d70b | ||
|
|
12ee3dae5e | ||
|
|
cf28bc26f0 | ||
|
|
bd41796231 | ||
|
|
f21f039b3a | ||
|
|
7263f4120c | ||
|
|
22e0e8da66 | ||
|
|
7173bf0803 | ||
|
|
7246cdf853 | ||
|
|
c60b296bc9 | ||
|
|
a8a86533ad | ||
|
|
d1c5847a58 | ||
|
|
68e0d70fcb | ||
|
|
74b28f7ead | ||
|
|
acda805c3c | ||
|
|
a37fbbbd51 | ||
|
|
2cdb6036ea | ||
|
|
77afdc0208 | ||
|
|
7e0e68f66f | ||
|
|
bbec6fcd5f | ||
|
|
631fe6c9c9 | ||
|
|
a86755ad98 | ||
|
|
42d2b00007 | ||
|
|
ad10cad0b0 | ||
|
|
71d3589ebc | ||
|
|
84ed1827be | ||
|
|
ce29a6923e | ||
|
|
d96d194b2b | ||
|
|
5cb3bccf45 | ||
|
|
e6639323b7 | ||
|
|
f94e0eaf32 | ||
|
|
37bcb1284b | ||
|
|
295bd2e1ab | ||
|
|
45b4a8d8bf | ||
|
|
cdb60423fe | ||
|
|
50f913843b | ||
|
|
581d6f6657 | ||
|
|
e03f65332a | ||
|
|
3e9abec817 | ||
|
|
0d8f84ba23 | ||
|
|
c646419336 | ||
|
|
622a4eb44b | ||
|
|
d4fbc73b41 | ||
|
|
391f469a99 | ||
|
|
a0ca55d7f6 | ||
|
|
a4bbe27771 | ||
|
|
a5e2d1eb45 | ||
|
|
7a89d03339 | ||
|
|
ae638fd0a1 | ||
|
|
26a59b373a | ||
|
|
479c0b7d95 | ||
|
|
52a0bb6e0e | ||
|
|
f2f333c807 | ||
|
|
3f2f2a33d3 | ||
|
|
ba9272822b | ||
|
|
9575044262 | ||
|
|
7306e81a30 | ||
|
|
19f9132109 | ||
|
|
f340ba50da | ||
|
|
6e90c7ed7b | ||
|
|
0a81bc7c6b | ||
|
|
f5dd6b90fc | ||
|
|
e1a9438595 | ||
|
|
97a72380e6 | ||
|
|
a6a3a4e240 | ||
|
|
b6b1e6ecdc | ||
|
|
85cf21a32c | ||
|
|
918ed4a23e | ||
|
|
84d6106a30 | ||
|
|
6761cae9c1 | ||
|
|
e330ccbe94 | ||
|
|
da7059e978 | ||
|
|
893345dc33 | ||
|
|
9fcc6fe68a | ||
|
|
0c02f17d67 | ||
|
|
11c8805f4c | ||
|
|
cf065fa706 | ||
|
|
3c94c9d308 | ||
|
|
831bea725f | ||
|
|
b748283484 | ||
|
|
28af7e1722 | ||
|
|
1673da5a4b | ||
|
|
c97c0e822d | ||
|
|
ce24ac70d9 | ||
|
|
9ab4739710 | ||
|
|
685084e711 | ||
|
|
dd049ac297 | ||
|
|
516f7464b7 | ||
|
|
46be37e034 | ||
|
|
693f0aa774 | ||
|
|
646693ca3e | ||
|
|
22534986d3 | ||
|
|
18b183585a | ||
|
|
5862ba627e | ||
|
|
c38f4ab400 | ||
|
|
f5c9fcf029 | ||
|
|
9e206d2215 | ||
|
|
b1b2451fa6 | ||
|
|
91f2f84c10 | ||
|
|
16ba74c98e | ||
|
|
0cc3b81580 | ||
|
|
c769900332 | ||
|
|
a84e6ab385 | ||
|
|
af163c27e0 | ||
|
|
016452ec89 | ||
|
|
b584779a13 | ||
|
|
01d97ed770 | ||
|
|
607ef27fe1 | ||
|
|
448a9cfaef | ||
|
|
88fb6069fc | ||
|
|
cd5fd2cab4 | ||
|
|
a21fcf7e77 | ||
|
|
627a8dbff5 | ||
|
|
dd1207f11e | ||
|
|
49aec452ca | ||
|
|
e033f71ece | ||
|
|
62b097f3d5 | ||
|
|
3098c1983f | ||
|
|
37626680f9 | ||
|
|
d99fe607da | ||
|
|
c80f22cdd3 | ||
|
|
0b6402ca8a | ||
|
|
26a7633337 | ||
|
|
3ee7614441 | ||
|
|
718ae6ac83 | ||
|
|
e0686eada2 | ||
|
|
9f1fd42889 | ||
|
|
a088a34c89 | ||
|
|
14cdc10ee3 | ||
|
|
8667643e7c | ||
|
|
e6d123a17d | ||
|
|
ae28b714b3 | ||
|
|
33cd1642f8 | ||
|
|
63ec69f9f2 | ||
|
|
20ea9a00ed | ||
|
|
779222b66d | ||
|
|
afb2b9fe29 | ||
|
|
20052e1922 | ||
|
|
e03f3f40da | ||
|
|
00f6656d7d | ||
|
|
dd2c1a48b5 | ||
|
|
a37588a8f7 | ||
|
|
fc99805a85 | ||
|
|
d73b1732d3 | ||
|
|
043fb289bf | ||
|
|
a0332f27be | ||
|
|
99285763d3 | ||
|
|
26467d8f35 | ||
|
|
930ba5bb19 | ||
|
|
fb552c823a | ||
|
|
bfc0c4f3ef | ||
|
|
216cb27f03 | ||
|
|
21a5ded593 | ||
|
|
ff07987a02 | ||
|
|
bd6afdafb8 | ||
|
|
fd7c5ac867 | ||
|
|
87eb84b5fa | ||
|
|
784cb711d8 | ||
|
|
54a00a934b | ||
|
|
c638ac8457 | ||
|
|
b710a4cdc7 | ||
|
|
16c8c6b445 | ||
|
|
5cee35149f | ||
|
|
de201c7263 | ||
|
|
222a4f4828 | ||
|
|
7d6af47f60 | ||
|
|
1c05d58d1a | ||
|
|
8152b51353 | ||
|
|
d387eafff2 | ||
|
|
fe5605ea50 | ||
|
|
7f97decb8a | ||
|
|
cfd28dd1ff | ||
|
|
2c43eab432 | ||
|
|
fda597ddae | ||
|
|
7502c0f2fb | ||
|
|
eaeeda6911 | ||
|
|
8850c1a62b | ||
|
|
0205ec4ccb | ||
|
|
2600bf7be5 | ||
|
|
012ff40f0f | ||
|
|
0df9e39931 | ||
|
|
97fcc3af33 | ||
|
|
be40433377 | ||
|
|
a1f29cb034 | ||
|
|
b2b584d832 | ||
|
|
415cd6597e | ||
|
|
d1d5d61b87 | ||
|
|
2c11ecc5c8 | ||
|
|
0ac66425f8 | ||
|
|
367d3e4435 | ||
|
|
05b7147e64 | ||
|
|
200c877418 | ||
|
|
84323c1608 | ||
|
|
3ba2edef2d | ||
|
|
e5cc1cccf2 | ||
|
|
c50ffc40dc | ||
|
|
1f8106c1f3 | ||
|
|
d9ca72571e | ||
|
|
ecb0620929 | ||
|
|
c6b381e61a | ||
|
|
faf352bf80 | ||
|
|
269b7d5bd1 | ||
|
|
439d617364 | ||
|
|
d0c85feda5 | ||
|
|
25ebc603e7 | ||
|
|
1683d950c3 | ||
|
|
961bb28ecd | ||
|
|
bbb3db31a8 | ||
|
|
c917c5da3d | ||
|
|
edc2056e75 | ||
|
|
84b7cbcda2 | ||
|
|
44484670f2 | ||
|
|
0b442422ab | ||
|
|
d0448af52e | ||
|
|
e82585ecc7 | ||
|
|
ff36bd30c5 | ||
|
|
12b2117c77 | ||
|
|
34ec532eed | ||
|
|
2fa23ce9fd | ||
|
|
8399061dc9 | ||
|
|
86ab2806fa | ||
|
|
6f77504ca9 | ||
|
|
a259297092 | ||
|
|
2c662b6f33 | ||
|
|
548d6a5a58 | ||
|
|
f3d2513d32 | ||
|
|
8b20756095 | ||
|
|
8f093769ce | ||
|
|
f6dafecfa1 | ||
|
|
98f95a7da8 | ||
|
|
f3d373c8ca | ||
|
|
536ff35d66 | ||
|
|
6d31c5fb94 | ||
|
|
5730d3583a | ||
|
|
da64336967 | ||
|
|
480311c442 | ||
|
|
8b44e3d4b6 | ||
|
|
9049625ec2 | ||
|
|
d8c70ceae2 | ||
|
|
95bb8a0c7f | ||
|
|
9b1a64616b | ||
|
|
8a6894fa28 | ||
|
|
7c4e819c93 | ||
|
|
9bedeb55a0 | ||
|
|
6c92d45d97 | ||
|
|
c7c029c706 | ||
|
|
6fec02f79e | ||
|
|
fc3e8bb8ff | ||
|
|
3f52734da2 | ||
|
|
cde8a739fb | ||
|
|
dc5837badb | ||
|
|
43a2d5cd67 | ||
|
|
2c0a1d1046 | ||
|
|
64aaaf6daa | ||
|
|
dd2a076b6f | ||
|
|
cf7f84c886 | ||
|
|
98a5a120c1 | ||
|
|
77d35d88c7 | ||
|
|
f25ed9efbb | ||
|
|
de7e4803a3 | ||
|
|
1516b100d2 | ||
|
|
7ff2976dfe | ||
|
|
f4426d0532 | ||
|
|
f4fbbf0d34 | ||
|
|
57cf738df5 | ||
|
|
edb09d1a7e | ||
|
|
84c5e245e6 | ||
|
|
95cece7e9c | ||
|
|
ea345b059d | ||
|
|
6ca6d47066 | ||
|
|
fea04ed16c | ||
|
|
84b3b6d61e | ||
|
|
4f0be16f0b | ||
|
|
f8fc1a2881 | ||
|
|
f9471377bb | ||
|
|
152088de87 | ||
|
|
82702ea958 | ||
|
|
3432a786d5 | ||
|
|
4fd8972f6a | ||
|
|
e4847653c6 | ||
|
|
6e73c7400a | ||
|
|
5c40f4073a | ||
|
|
da3777a0ca | ||
|
|
dd636bb55f | ||
|
|
6fcfdaabf3 | ||
|
|
e26eb9d9cc | ||
|
|
732d40f5c8 | ||
|
|
814cf2931c | ||
|
|
5e4f041509 | ||
|
|
8862ec985f | ||
|
|
c887697d61 | ||
|
|
30115980af | ||
|
|
be057e296f | ||
|
|
a5d42e07c9 | ||
|
|
6484f588e4 | ||
|
|
83a5c28d71 | ||
|
|
96a129a70f | ||
|
|
51e6892a5e | ||
|
|
47ad5c1e1f | ||
|
|
bdb90941d3 | ||
|
|
a2e9d29cf6 | ||
|
|
b43bec4126 | ||
|
|
5992f835fb | ||
|
|
263c840f30 | ||
|
|
7786b1b5a9 | ||
|
|
b1ce5f8956 | ||
|
|
5e6ab494b9 | ||
|
|
b99560acca | ||
|
|
b146552e39 | ||
|
|
8468a502bb | ||
|
|
1b96617c78 | ||
|
|
7ac179e068 | ||
|
|
f29f3f973a | ||
|
|
e775bd451d | ||
|
|
bef71a49b6 | ||
|
|
e5ab3e1d0c | ||
|
|
bb06ffdaea | ||
|
|
5ce7aa5c48 | ||
|
|
85450360de | ||
|
|
ec6873f95f | ||
|
|
e4d5b61ef6 | ||
|
|
644bd369e4 | ||
|
|
dede2376c3 | ||
|
|
2bd727bec2 | ||
|
|
50c85d4835 | ||
|
|
7103630e55 | ||
|
|
a31d58bca3 | ||
|
|
6ae424d3ff | ||
|
|
3b703da1f3 | ||
|
|
6695f23079 | ||
|
|
5d4d8e6239 | ||
|
|
b14590c112 | ||
|
|
e11e09f935 | ||
|
|
4e0aa707b9 | ||
|
|
0845deb095 | ||
|
|
2719705a1a | ||
|
|
346da2cdee | ||
|
|
db39aaf4ff | ||
|
|
22ea1d4a15 | ||
|
|
4365e852fe | ||
|
|
6a474eb0a0 | ||
|
|
020d8d9e5b | ||
|
|
220ca33cc9 | ||
|
|
2cee4cca06 | ||
|
|
a31ace8032 | ||
|
|
6d0495eab8 | ||
|
|
6d6457a32f | ||
|
|
befe0e5254 | ||
|
|
2c41230b74 | ||
|
|
0e1e92750c | ||
|
|
b27854b8a5 | ||
|
|
2c504ae67e | ||
|
|
24d02895ef | ||
|
|
01887e37b4 | ||
|
|
628f76c20a | ||
|
|
f31e7b1860 | ||
|
|
073d52a17c | ||
|
|
eac3531f31 | ||
|
|
7873e25779 | ||
|
|
f468611b01 | ||
|
|
d3aea54b6c | ||
|
|
1d5afe8cd6 | ||
|
|
91d6aacc74 | ||
|
|
0036ba94d9 | ||
|
|
3711663a12 | ||
|
|
7e2eb531ba | ||
|
|
39cca07432 | ||
|
|
001cdd34c7 | ||
|
|
4cb0201970 | ||
|
|
56da4a2850 | ||
|
|
f613fea791 | ||
|
|
ccd25b0c93 | ||
|
|
60c14c2cef | ||
|
|
895274ad24 | ||
|
|
bf13b81837 | ||
|
|
adeb9f26c3 | ||
|
|
c3631f6ac7 | ||
|
|
1301fc3dc4 | ||
|
|
d76fa989d1 | ||
|
|
53dd0073f1 | ||
|
|
b6b0b0a8c5 | ||
|
|
c0573d76fd | ||
|
|
44b803a529 | ||
|
|
c6705a82db | ||
|
|
66813584f5 | ||
|
|
e61829052e | ||
|
|
701d358ea6 | ||
|
|
15d434fce2 | ||
|
|
c801729215 | ||
|
|
2e192380f0 | ||
|
|
4c4355a910 | ||
|
|
7c17a2dcd0 | ||
|
|
186a97042b | ||
|
|
d2f6d2d6b8 | ||
|
|
0c1bbd0c96 | ||
|
|
f5f9a7d303 | ||
|
|
224bd11821 | ||
|
|
6d6cac850b | ||
|
|
d81cc0bd4a | ||
|
|
73459f2b83 | ||
|
|
aa8c96de7b | ||
|
|
61a7701e78 | ||
|
|
337086b90b | ||
|
|
20003aa49d | ||
|
|
e1d5a68a90 | ||
|
|
ac5f94a6ac | ||
|
|
d85e3b977e | ||
|
|
fead675aae | ||
|
|
c33267750d | ||
|
|
9c5badc2bf | ||
|
|
b65713f902 | ||
|
|
8ad18383cc | ||
|
|
6e1892dd4e | ||
|
|
f593295d06 | ||
|
|
7eb142e598 | ||
|
|
4d322a8fae | ||
|
|
ccea7827ce | ||
|
|
ed2bb78657 | ||
|
|
8871352b2c | ||
|
|
04632728bc | ||
|
|
d92475b980 | ||
|
|
89c4b68b9f | ||
|
|
6e97d98118 | ||
|
|
e326b81b3f | ||
|
|
a7ced3d78a | ||
|
|
c78ff37f56 | ||
|
|
560abad128 | ||
|
|
1adba9193a | ||
|
|
a6d492d970 | ||
|
|
56a7f271ff | ||
|
|
3fffd22996 | ||
|
|
d11d4c5263 | ||
|
|
ed5260f035 | ||
|
|
5df1608d74 | ||
|
|
773b2600c5 | ||
|
|
d0fddf2da6 | ||
|
|
8ccc3dc129 | ||
|
|
2b001f003b | ||
|
|
dd88bef85a | ||
|
|
2a6e92e586 | ||
|
|
102b23434b | ||
|
|
7ea7c8497c | ||
|
|
2faafdd9f3 | ||
|
|
a09c84258f | ||
|
|
8a3ce58d4e | ||
|
|
599a89ee6a | ||
|
|
5b0b91eb46 | ||
|
|
cddfd8b835 | ||
|
|
770c9fa167 | ||
|
|
ecf4b10238 | ||
|
|
4c64b406df | ||
|
|
031b9052d1 | ||
|
|
f276b836c7 | ||
|
|
e63b05ff16 | ||
|
|
0113d07a63 | ||
|
|
c0b6e918ad | ||
|
|
92d3c7c8f0 | ||
|
|
543c741502 | ||
|
|
018f87767d | ||
|
|
238884ad53 | ||
|
|
cd83136278 | ||
|
|
6759803ccd | ||
|
|
b5f6a447b9 | ||
|
|
b26b124cfe | ||
|
|
e58df9ac97 | ||
|
|
11f7c6f115 | ||
|
|
662b808ba9 | ||
|
|
dbeba818f7 | ||
|
|
666c3b4143 | ||
|
|
e2dba246b2 | ||
|
|
4e57d27a57 | ||
|
|
4a58c43af9 | ||
|
|
1d2006761d | ||
|
|
23bc94451e | ||
|
|
a1f3349da0 | ||
|
|
f99889d5e8 | ||
|
|
137138a8ab | ||
|
|
640b0eac0e | ||
|
|
73b78d6335 | ||
|
|
7558c998df | ||
|
|
387aad83b6 | ||
|
|
43b07b6d6a | ||
|
|
b6abcc41cf | ||
|
|
a307c128fa | ||
|
|
16b78523e5 | ||
|
|
8084761154 | ||
|
|
d3dd5a86a8 | ||
|
|
69510094d3 | ||
|
|
b0ca83f760 | ||
|
|
2c707a74dd | ||
|
|
dfbbed0709 | ||
|
|
842b2d2d55 | ||
|
|
af22795cd5 | ||
|
|
cd71351181 | ||
|
|
86b3f49e6b | ||
|
|
7e53863d15 | ||
|
|
a5832e8d02 | ||
|
|
fc68c4574a | ||
|
|
f4a7a8657e | ||
|
|
943bf1f36c | ||
|
|
2482416ea5 | ||
|
|
431369ed42 | ||
|
|
314ff73280 | ||
|
|
ce6df518a2 | ||
|
|
99049da5c6 | ||
|
|
1d73c51712 | ||
|
|
dead6872d4 | ||
|
|
7a93a494ec | ||
|
|
93cfc97d1d | ||
|
|
bcd16b7840 | ||
|
|
be9f626c85 | ||
|
|
1133f5cc3a | ||
|
|
b37ae23af7 | ||
|
|
e9574d66df | ||
|
|
55f6b882df | ||
|
|
8692665724 | ||
|
|
93f483e42c | ||
|
|
05e3415059 | ||
|
|
e6b66636b9 | ||
|
|
13c6a1fd77 | ||
|
|
9b6c6da639 | ||
|
|
7b596c1110 | ||
|
|
23e0977218 | ||
|
|
7fbcb054ad | ||
|
|
40a2af2b3d | ||
|
|
0b8180a2cf | ||
|
|
33f3aa8dd2 | ||
|
|
6682a3117b | ||
|
|
38ea209a40 | ||
|
|
295868b923 | ||
|
|
fc8e96cc9e | ||
|
|
58387605e6 | ||
|
|
1d5e5d3722 | ||
|
|
4aa9c1bf34 | ||
|
|
d347523942 | ||
|
|
a181c36ccb | ||
|
|
28a2b5e926 | ||
|
|
65a7538452 | ||
|
|
bb3a86298e | ||
|
|
d01ae7004a | ||
|
|
31f3384c8e | ||
|
|
97823bc12b | ||
|
|
7f2514c177 | ||
|
|
e3b487205d | ||
|
|
b5dd8d4565 | ||
|
|
7341598cc3 | ||
|
|
04dd608930 | ||
|
|
8b64b415c4 | ||
|
|
0da8d430d9 | ||
|
|
38570c26c7 | ||
|
|
78c6b3e5cd | ||
|
|
7550554c3e | ||
|
|
68bb6f6fcf | ||
|
|
bf01b1a7de | ||
|
|
c53cbfe156 | ||
|
|
a1f839d732 | ||
|
|
71de6900ee | ||
|
|
11665834b5 | ||
|
|
36eed1bc43 | ||
|
|
b39d6a33b7 | ||
|
|
9c554375aa | ||
|
|
7c6c82e0ac | ||
|
|
ceccc5baab | ||
|
|
379d6ac634 | ||
|
|
53c75ce01c | ||
|
|
08044e5c0d | ||
|
|
63b1d7ac72 | ||
|
|
63450c65e1 | ||
|
|
e9d206bf9b | ||
|
|
3913028800 | ||
|
|
b8879d6b75 | ||
|
|
7df74c2bbb | ||
|
|
1782a32674 | ||
|
|
20574c7e94 | ||
|
|
a78eb07c77 | ||
|
|
a8bdcde4bf | ||
|
|
523aa75588 | ||
|
|
2b36871281 | ||
|
|
0cff71b9d6 | ||
|
|
e3d358e4e0 | ||
|
|
afacc475b4 | ||
|
|
8051ef7c9f | ||
|
|
d0a13b63ff | ||
|
|
c5734f96b8 | ||
|
|
adefbcfcf8 | ||
|
|
eb9e3ba9fe | ||
|
|
6e3055e753 | ||
|
|
6c3a133ccd | ||
|
|
75af89464d | ||
|
|
b40676518c | ||
|
|
86b86b50f9 | ||
|
|
5fd455b981 | ||
|
|
58a8ca411c | ||
|
|
d2ff6ba5d2 | ||
|
|
cb3f7e1644 | ||
|
|
e2c6d4be99 | ||
|
|
20802c8a6b | ||
|
|
2243edb175 | ||
|
|
80c4f4cb56 | ||
|
|
b43d0e4b79 | ||
|
|
3c95a6a533 | ||
|
|
d3d0865a00 | ||
|
|
41e2f5ed75 | ||
|
|
8653b1520f | ||
|
|
a67dd3d7b0 | ||
|
|
bdeb2a80f7 | ||
|
|
0eb543a726 | ||
|
|
9c9a2a22f5 | ||
|
|
8aeb05a22d | ||
|
|
11670b30ba | ||
|
|
ff0a9a7335 | ||
|
|
33272aaa22 | ||
|
|
75fc53f93a | ||
|
|
890f416eae | ||
|
|
3a35e4d2d0 | ||
|
|
81ef198d00 | ||
|
|
d7f149e990 | ||
|
|
6e86f51164 | ||
|
|
a086de264c | ||
|
|
dc28197c7b | ||
|
|
de8443298e | ||
|
|
eee92b4ebb | ||
|
|
10a07fe4bf | ||
|
|
a8c10bb017 | ||
|
|
ecfa75c235 | ||
|
|
21bd4b951d | ||
|
|
ff6950b2e2 | ||
|
|
f9a39897a2 | ||
|
|
eeac5f2b9a | ||
|
|
98ea6ba721 | ||
|
|
2ca954f048 | ||
|
|
fa7cf95ee2 | ||
|
|
5680027b72 | ||
|
|
8c6c6991c2 | ||
|
|
addc024e49 | ||
|
|
335bfb02c2 | ||
|
|
fb94a3f3f1 | ||
|
|
9ea9cf4c68 | ||
|
|
e977587fae | ||
|
|
0c02cd98e0 | ||
|
|
c67e19e0bf | ||
|
|
4e4360ec62 | ||
|
|
e786090aeb | ||
|
|
03f2657a6e | ||
|
|
16be4cbbe5 | ||
|
|
53c8b69f1e | ||
|
|
28238b18ff | ||
|
|
f4c06014dd | ||
|
|
fb8ab400b7 | ||
|
|
f2d74defca | ||
|
|
c1c18a5a87 | ||
|
|
54e952748f | ||
|
|
30470c8f6a | ||
|
|
4da7db4305 | ||
|
|
23a00fb15a | ||
|
|
951cc73e46 | ||
|
|
53452ca410 | ||
|
|
01ba441a63 | ||
|
|
582c1a6e7f | ||
|
|
77d1037a90 | ||
|
|
52587ef69b | ||
|
|
ea66ae350b | ||
|
|
ad3a16f423 | ||
|
|
4cf37d449e | ||
|
|
2c00752e23 | ||
|
|
05e15487e4 | ||
|
|
99236e82ad | ||
|
|
b9f5686a3c | ||
|
|
b99a7fe494 | ||
|
|
f028bc9b6c | ||
|
|
bd1bfbfaf9 | ||
|
|
f61696fb3f | ||
|
|
f47f859de0 | ||
|
|
6a18f3509b | ||
|
|
02734791cd | ||
|
|
6194f3d9e7 | ||
|
|
197c6dde81 | ||
|
|
ea87916f4b | ||
|
|
b710bdaafd | ||
|
|
7b2d6a91fb | ||
|
|
c7a542fd17 | ||
|
|
fa2b3c9511 | ||
|
|
d6258ab74d | ||
|
|
f633ef8137 | ||
|
|
a4c6fd9ff7 | ||
|
|
0812d13003 | ||
|
|
c97407ae56 | ||
|
|
b2b56e6366 | ||
|
|
78e3689062 | ||
|
|
9f77f3a60d | ||
|
|
db85c2c4b3 | ||
|
|
dc26cef572 | ||
|
|
bc149a2deb | ||
|
|
1e46c97bbd | ||
|
|
790744c9e1 | ||
|
|
033c38fc91 | ||
|
|
825a2070c5 | ||
|
|
5128dc6743 | ||
|
|
8828e1fc28 | ||
|
|
a43949d123 | ||
|
|
61bc732810 | ||
|
|
555872bdef | ||
|
|
c0d776f64c | ||
|
|
a2dd11326f | ||
|
|
0904101b7d | ||
|
|
6fc9aa6dfc | ||
|
|
3b72126f5f | ||
|
|
80fb72928e | ||
|
|
8ee9fc36ab | ||
|
|
89e731031c | ||
|
|
619bc8a6f9 | ||
|
|
a2523f1a1e | ||
|
|
3499548a2f | ||
|
|
4460ee00cf | ||
|
|
89290bf7a4 | ||
|
|
a07b36b61f | ||
|
|
6f305d6254 | ||
|
|
7e356b733e | ||
|
|
f2c8ae6a0a | ||
|
|
b1ab540c11 | ||
|
|
9ca0bfc5d8 | ||
|
|
7011250353 | ||
|
|
744400b161 | ||
|
|
d0b81c1c7b | ||
|
|
adfaf141d3 | ||
|
|
a8047ba0a9 | ||
|
|
b142654dfc | ||
|
|
56d4688f2c | ||
|
|
df26e74145 | ||
|
|
8dd9154982 | ||
|
|
23e4f9468d | ||
|
|
aa966de4bc | ||
|
|
a711083e90 | ||
|
|
99bafb052b | ||
|
|
61b5cd8e43 | ||
|
|
1466ff2422 | ||
|
|
af8a979984 | ||
|
|
1d562d1fe4 | ||
|
|
d437654320 | ||
|
|
1eb5eb2d54 | ||
|
|
dbc90cfce5 | ||
|
|
cf5c0fd68c | ||
|
|
b02f40318c | ||
|
|
bc6d65de26 | ||
|
|
09f2fc4d4b | ||
|
|
5c06b32a30 | ||
|
|
125c55e1e3 | ||
|
|
841fe6e396 | ||
|
|
f245310927 | ||
|
|
5e31182bc8 | ||
|
|
0ca4d20720 | ||
|
|
2ddce1acd5 | ||
|
|
dc88a00ea4 | ||
|
|
df61e88714 | ||
|
|
36efc7366e | ||
|
|
a829d01e7c | ||
|
|
1459ad8611 | ||
|
|
2e78b153d5 | ||
|
|
467d79120e | ||
|
|
9080349615 | ||
|
|
2085dda0a3 | ||
|
|
52e69abb88 | ||
|
|
06fa73666f | ||
|
|
d7940213ab | ||
|
|
da5ec5b357 | ||
|
|
605fc0dbcf | ||
|
|
9da07fd160 | ||
|
|
913f8dc256 | ||
|
|
f8cb9e9364 | ||
|
|
7ec234a052 | ||
|
|
bb12670ef3 | ||
|
|
120a82c82b | ||
|
|
bd9128044a | ||
|
|
9e54b8d82b | ||
|
|
1f3f09d713 | ||
|
|
17c9a26c8a | ||
|
|
5755d462cc | ||
|
|
f4de32550c | ||
|
|
2ae9c679e1 | ||
|
|
9dc4de0f07 | ||
|
|
a64a415f59 | ||
|
|
5e02fdc2ae | ||
|
|
6d75c4b464 | ||
|
|
ff05648b04 | ||
|
|
0a114cd313 | ||
|
|
db3b17ed5f | ||
|
|
14231fdd0a | ||
|
|
76565e959a | ||
|
|
70e67f7960 | ||
|
|
9f244b9c01 | ||
|
|
6a1dccd270 | ||
|
|
b146954afd | ||
|
|
58cc24e9c4 | ||
|
|
c97de461a8 | ||
|
|
e759240175 | ||
|
|
0eb3abd44a | ||
|
|
e699910675 | ||
|
|
c92acf2b3b | ||
|
|
b439fa8bf0 | ||
|
|
7ac8d1f1aa | ||
|
|
991b928edb | ||
|
|
fb815c0453 | ||
|
|
74ddae0fd9 | ||
|
|
4dcc9ec510 | ||
|
|
2245167580 | ||
|
|
97fe1bbcf6 | ||
|
|
93fc626332 | ||
|
|
16f19e6b4a | ||
|
|
66ed3478cd | ||
|
|
982fd32a06 | ||
|
|
3be15436a8 | ||
|
|
443a543bb5 | ||
|
|
e28773850f | ||
|
|
52e740cf58 | ||
|
|
bdd8921328 | ||
|
|
3f2596c247 | ||
|
|
73305fe0df | ||
|
|
5ca13c71b3 | ||
|
|
06730f3f7b | ||
|
|
464a7a3ee3 | ||
|
|
bd52738e4c | ||
|
|
3b6a4b85a9 | ||
|
|
03d030feab | ||
|
|
1082dc5417 | ||
|
|
afb9f38ab4 | ||
|
|
9754747785 | ||
|
|
8ea2aca735 | ||
|
|
f7dcce698b | ||
|
|
b94779f7d4 | ||
|
|
b24db52b3d | ||
|
|
19571e3b2b | ||
|
|
3ae3d6c677 | ||
|
|
6924828c8d | ||
|
|
5aa8f2b25c | ||
|
|
f0b14e680e | ||
|
|
fc3f1c6588 | ||
|
|
e62c771a3f | ||
|
|
72ada92aa4 | ||
|
|
18aa2776b0 | ||
|
|
3a30d2c5ea | ||
|
|
e859aa23bf | ||
|
|
ca3a453447 | ||
|
|
7132d16053 | ||
|
|
fcf8dc2cde | ||
|
|
3aebb20ec2 | ||
|
|
db1d6d9e0c | ||
|
|
0501e98b13 | ||
|
|
0609d8bfae | ||
|
|
89c6d45786 | ||
|
|
48065e5d83 | ||
|
|
5c3a8931ed | ||
|
|
f994c67cc5 | ||
|
|
466e706f1c | ||
|
|
de01752a8b | ||
|
|
e2a3b48481 |
261 changed files with 42856 additions and 39657 deletions
3
.gitignore
vendored
3
.gitignore
vendored
|
|
@ -20,6 +20,9 @@
|
||||||
# pycharm project specific settings files
|
# pycharm project specific settings files
|
||||||
.idea
|
.idea
|
||||||
|
|
||||||
|
# vscode project specific settings file
|
||||||
|
.vscode
|
||||||
|
|
||||||
cleanup.sh
|
cleanup.sh
|
||||||
FanFictionDownLoader.zip
|
FanFictionDownLoader.zip
|
||||||
*.epub
|
*.epub
|
||||||
|
|
|
||||||
23
README.md
23
README.md
|
|
@ -44,19 +44,23 @@ pip install FanFicFare
|
||||||
```
|
```
|
||||||
- _As of late November 2019, the web service version is shutdown. See the [Wiki Home](https://github.com/JimmXinu/FanFicFare/wiki#web-service-version) page for details._
|
- _As of late November 2019, the web service version is shutdown. See the [Wiki Home](https://github.com/JimmXinu/FanFicFare/wiki#web-service-version) page for details._
|
||||||
|
|
||||||
|
### Test Versions
|
||||||
|
|
||||||
|
FanFicFare is released roughly every month, but new test versions are posted more frequently as changes are made.
|
||||||
|
|
||||||
|
Test versions are available at:
|
||||||
|
|
||||||
|
- The [test plugin] is posted at MobileRead.
|
||||||
|
- The test version of CLI for pip install is uploaded to the testpypi repository and can be installed with:
|
||||||
|
```
|
||||||
|
pip install --extra-index-url https://test.pypi.org/simple/ --upgrade FanFicFare
|
||||||
|
```
|
||||||
|
|
||||||
### Other Releases
|
### Other Releases
|
||||||
|
|
||||||
Other versions may be available depending on your OS. I(JimmXinu) don't directly support these:
|
Other versions may be available depending on your OS. I(JimmXinu) don't directly support these:
|
||||||
|
|
||||||
- **Arch Linux**: The CLI can also be obtained on Arch Linux from the OS repositories:
|
- **Arch Linux**: The latest CLI release can be obtained from the [fanficfare](https://aur.archlinux.org/packages/fanficfare) AUR package. It will install the calibre plugin, if calibre is installed.
|
||||||
|
|
||||||
```
|
|
||||||
pacman -S fanficfare
|
|
||||||
```
|
|
||||||
|
|
||||||
...or from git via the [AUR package](https://aur.archlinux.org/packages/fanficfare-git)
|
|
||||||
(which will also update the calibre plugin, if calibre is installed).
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
[this post in the old FFDL thread]: https://www.mobileread.com/forums/showthread.php?p=1982785#post1982785
|
[this post in the old FFDL thread]: https://www.mobileread.com/forums/showthread.php?p=1982785#post1982785
|
||||||
|
|
@ -64,3 +68,4 @@ pacman -S fanficfare
|
||||||
[FanFicFare maillist]: https://groups.google.com/group/fanfic-downloader
|
[FanFicFare maillist]: https://groups.google.com/group/fanfic-downloader
|
||||||
[wiki]: https://github.com/JimmXinu/FanFicFare/wiki
|
[wiki]: https://github.com/JimmXinu/FanFicFare/wiki
|
||||||
[discussion thread]: https://www.mobileread.com/forums/showthread.php?t=259221
|
[discussion thread]: https://www.mobileread.com/forums/showthread.php?t=259221
|
||||||
|
[test plugin]: https://www.mobileread.com/forums/showthread.php?p=3084025&postcount=2
|
||||||
|
|
|
||||||
|
|
@ -1,8 +1,9 @@
|
||||||
[main]
|
[main]
|
||||||
host = https://www.transifex.com
|
host = https://www.transifex.com
|
||||||
|
|
||||||
[calibre-plugins.fanfictiondownloader]
|
[o:calibre:p:calibre-plugins:r:fanfictiondownloader]
|
||||||
file_filter = translations/<lang>.po
|
file_filter = translations/<lang>.po
|
||||||
|
source_file = translations/en.po
|
||||||
source_lang = en
|
source_lang = en
|
||||||
type = PO
|
type = PO
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -33,7 +33,7 @@ except NameError:
|
||||||
from calibre.customize import InterfaceActionBase
|
from calibre.customize import InterfaceActionBase
|
||||||
|
|
||||||
# pulled out from FanFicFareBase for saving in prefs.py
|
# pulled out from FanFicFareBase for saving in prefs.py
|
||||||
__version__ = (4, 4, 0)
|
__version__ = (4, 57, 7)
|
||||||
|
|
||||||
## Apparently the name for this class doesn't matter--it was still
|
## Apparently the name for this class doesn't matter--it was still
|
||||||
## 'demo' for the first few versions.
|
## 'demo' for the first few versions.
|
||||||
|
|
|
||||||
20
calibre-plugin/action_chains.py
Normal file
20
calibre-plugin/action_chains.py
Normal file
|
|
@ -0,0 +1,20 @@
|
||||||
|
from __future__ import (unicode_literals, division, absolute_import,
|
||||||
|
print_function)
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2024, Jim Miller'
|
||||||
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
|
## References:
|
||||||
|
## https://www.mobileread.com/forums/showthread.php?p=4435205&postcount=65
|
||||||
|
## https://www.mobileread.com/forums/showthread.php?p=4102834&postcount=389
|
||||||
|
|
||||||
|
from calibre_plugins.action_chains.events import ChainEvent
|
||||||
|
|
||||||
|
class FanFicFareDownloadFinished(ChainEvent):
|
||||||
|
|
||||||
|
# replace with the name of your event
|
||||||
|
name = 'FanFicFare Download Finished'
|
||||||
|
|
||||||
|
def get_event_signal(self):
|
||||||
|
return self.gui.iactions['FanFicFare'].download_finished_signal
|
||||||
|
|
@ -9,10 +9,7 @@ __docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
import re
|
import re
|
||||||
|
|
||||||
try:
|
from PyQt5.Qt import (Qt, QSyntaxHighlighter, QTextCharFormat, QBrush)
|
||||||
from PyQt5.Qt import (Qt, QSyntaxHighlighter, QTextCharFormat, QBrush)
|
|
||||||
except ImportError as e:
|
|
||||||
from PyQt4.Qt import (Qt, QSyntaxHighlighter, QTextCharFormat, QBrush)
|
|
||||||
|
|
||||||
from fanficfare.six import string_types
|
from fanficfare.six import string_types
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -2,7 +2,6 @@
|
||||||
|
|
||||||
from __future__ import (unicode_literals, division, absolute_import,
|
from __future__ import (unicode_literals, division, absolute_import,
|
||||||
print_function)
|
print_function)
|
||||||
import six
|
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2011, Grant Drake <grant.drake@gmail.com>, 2018, Jim Miller'
|
__copyright__ = '2011, Grant Drake <grant.drake@gmail.com>, 2018, Jim Miller'
|
||||||
|
|
@ -10,26 +9,21 @@ __docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
import os
|
import os
|
||||||
from contextlib import contextmanager
|
from contextlib import contextmanager
|
||||||
try:
|
from PyQt5.Qt import (QApplication, Qt, QIcon, QPixmap, QLabel, QDialog, QHBoxLayout,
|
||||||
from PyQt5 import QtWidgets as QtGui
|
QTableWidgetItem, QFont, QLineEdit, QComboBox,
|
||||||
from PyQt5.Qt import (QApplication, Qt, QIcon, QPixmap, QLabel, QDialog, QHBoxLayout,
|
QVBoxLayout, QDialogButtonBox, QStyledItemDelegate, QDateTime,
|
||||||
QTableWidgetItem, QFont, QLineEdit, QComboBox,
|
QTextEdit, QListWidget, QAbstractItemView, QCursor)
|
||||||
QVBoxLayout, QDialogButtonBox, QStyledItemDelegate, QDateTime,
|
|
||||||
QTextEdit, QListWidget, QAbstractItemView, QCursor)
|
|
||||||
except ImportError as e:
|
|
||||||
from PyQt4 import QtGui
|
|
||||||
from PyQt4.Qt import (QApplication, Qt, QIcon, QPixmap, QLabel, QDialog, QHBoxLayout,
|
|
||||||
QTableWidgetItem, QFont, QLineEdit, QComboBox,
|
|
||||||
QVBoxLayout, QDialogButtonBox, QStyledItemDelegate, QDateTime,
|
|
||||||
QTextEdit, QListWidget, QAbstractItemView, QCursor)
|
|
||||||
|
|
||||||
|
from calibre.constants import numeric_version as calibre_version
|
||||||
from calibre.constants import iswindows, DEBUG
|
from calibre.constants import iswindows, DEBUG
|
||||||
from calibre.gui2 import UNDEFINED_QDATETIME, gprefs, info_dialog
|
from calibre.gui2 import UNDEFINED_QDATETIME, gprefs, info_dialog
|
||||||
from calibre.gui2.actions import menu_action_unique_name
|
from calibre.gui2.actions import menu_action_unique_name
|
||||||
from calibre.gui2.keyboard import ShortcutConfig
|
from calibre.gui2.keyboard import ShortcutConfig
|
||||||
from calibre.utils.config import config_dir
|
from calibre.utils.config import config_dir
|
||||||
from calibre.utils.date import now, format_date, qt_to_dt, UNDEFINED_DATE
|
from calibre.utils.date import now, format_date, qt_to_dt, UNDEFINED_DATE
|
||||||
from fanficfare.six import text_type as unicode
|
|
||||||
|
import fanficfare.six as six
|
||||||
|
from six import text_type as unicode
|
||||||
|
|
||||||
# Global definition of our plugin name. Used for common functions that require this.
|
# Global definition of our plugin name. Used for common functions that require this.
|
||||||
plugin_name = None
|
plugin_name = None
|
||||||
|
|
@ -48,8 +42,41 @@ def set_plugin_icon_resources(name, resources):
|
||||||
plugin_name = name
|
plugin_name = name
|
||||||
plugin_icon_resources = resources
|
plugin_icon_resources = resources
|
||||||
|
|
||||||
|
# print_tracebacks_for_missing_resources first appears in cal 6.2.0
|
||||||
|
if calibre_version >= (6,2,0):
|
||||||
|
def get_icons_nolog(icon_name,plugin_name):
|
||||||
|
return get_icons(icon_name,
|
||||||
|
plugin_name,
|
||||||
|
print_tracebacks_for_missing_resources=False)
|
||||||
|
else:
|
||||||
|
get_icons_nolog = get_icons
|
||||||
|
|
||||||
def get_icon(icon_name):
|
def get_icon_6plus(icon_name):
|
||||||
|
'''
|
||||||
|
Retrieve a QIcon for the named image from
|
||||||
|
1. Calibre's image cache
|
||||||
|
2. resources/images
|
||||||
|
3. the icon theme
|
||||||
|
4. the plugin zip
|
||||||
|
Only plugin zip has images/ in the image name for backward
|
||||||
|
compatibility.
|
||||||
|
'''
|
||||||
|
icon = None
|
||||||
|
if icon_name:
|
||||||
|
icon = QIcon.ic(icon_name)
|
||||||
|
## both .ic and get_icons return an empty QIcon if not found.
|
||||||
|
if not icon or icon.isNull():
|
||||||
|
# don't need a tracestack from get_icons just because
|
||||||
|
# there's no icon in the theme
|
||||||
|
icon = get_icons_nolog(icon_name.replace('images/',''),
|
||||||
|
plugin_name)
|
||||||
|
if not icon or icon.isNull():
|
||||||
|
icon = get_icons(icon_name,plugin_name)
|
||||||
|
if not icon:
|
||||||
|
icon = QIcon()
|
||||||
|
return icon
|
||||||
|
|
||||||
|
def get_icon_old(icon_name):
|
||||||
'''
|
'''
|
||||||
Retrieve a QIcon for the named image from the zip file if it exists,
|
Retrieve a QIcon for the named image from the zip file if it exists,
|
||||||
or if not then from Calibre's image cache.
|
or if not then from Calibre's image cache.
|
||||||
|
|
@ -63,6 +90,11 @@ def get_icon(icon_name):
|
||||||
return QIcon(pixmap)
|
return QIcon(pixmap)
|
||||||
return QIcon()
|
return QIcon()
|
||||||
|
|
||||||
|
# get_icons changed in Cal6.
|
||||||
|
if calibre_version >= (6,0,0):
|
||||||
|
get_icon = get_icon_6plus
|
||||||
|
else:
|
||||||
|
get_icon = get_icon_old
|
||||||
|
|
||||||
def get_pixmap(icon_name):
|
def get_pixmap(icon_name):
|
||||||
'''
|
'''
|
||||||
|
|
@ -109,34 +141,6 @@ def get_local_images_dir(subfolder=None):
|
||||||
return images_dir
|
return images_dir
|
||||||
|
|
||||||
|
|
||||||
def create_menu_item(ia, parent_menu, menu_text, image=None, tooltip=None,
|
|
||||||
shortcut=(), triggered=None, is_checked=None):
|
|
||||||
'''
|
|
||||||
Create a menu action with the specified criteria and action
|
|
||||||
Note that if no shortcut is specified, will not appear in Preferences->Keyboard
|
|
||||||
This method should only be used for actions which either have no shortcuts,
|
|
||||||
or register their menus only once. Use create_menu_action_unique for all else.
|
|
||||||
'''
|
|
||||||
if shortcut is not None:
|
|
||||||
if len(shortcut) == 0:
|
|
||||||
shortcut = ()
|
|
||||||
else:
|
|
||||||
shortcut = shortcut
|
|
||||||
ac = ia.create_action(spec=(menu_text, None, tooltip, shortcut),
|
|
||||||
attr=menu_text)
|
|
||||||
if image:
|
|
||||||
ac.setIcon(get_icon(image))
|
|
||||||
if triggered is not None:
|
|
||||||
ac.triggered.connect(triggered)
|
|
||||||
if is_checked is not None:
|
|
||||||
ac.setCheckable(True)
|
|
||||||
if is_checked:
|
|
||||||
ac.setChecked(True)
|
|
||||||
|
|
||||||
parent_menu.addAction(ac)
|
|
||||||
return ac
|
|
||||||
|
|
||||||
|
|
||||||
def create_menu_action_unique(ia, parent_menu, menu_text, image=None, tooltip=None,
|
def create_menu_action_unique(ia, parent_menu, menu_text, image=None, tooltip=None,
|
||||||
shortcut=None, triggered=None, is_checked=None, shortcut_name=None,
|
shortcut=None, triggered=None, is_checked=None, shortcut_name=None,
|
||||||
unique_name=None):
|
unique_name=None):
|
||||||
|
|
@ -177,13 +181,6 @@ def create_menu_action_unique(ia, parent_menu, menu_text, image=None, tooltip=No
|
||||||
return ac
|
return ac
|
||||||
|
|
||||||
|
|
||||||
def swap_author_names(author):
|
|
||||||
if author.find(',') == -1:
|
|
||||||
return author
|
|
||||||
name_parts = author.strip().partition(',')
|
|
||||||
return name_parts[2].strip() + ' ' + name_parts[0]
|
|
||||||
|
|
||||||
|
|
||||||
def get_library_uuid(db):
|
def get_library_uuid(db):
|
||||||
try:
|
try:
|
||||||
library_uuid = db.library_id
|
library_uuid = db.library_id
|
||||||
|
|
@ -200,17 +197,6 @@ def busy_cursor():
|
||||||
finally:
|
finally:
|
||||||
QApplication.restoreOverrideCursor()
|
QApplication.restoreOverrideCursor()
|
||||||
|
|
||||||
|
|
||||||
class ImageLabel(QLabel):
|
|
||||||
|
|
||||||
def __init__(self, parent, icon_name, size=16):
|
|
||||||
QLabel.__init__(self, parent)
|
|
||||||
pixmap = get_pixmap(icon_name)
|
|
||||||
self.setPixmap(pixmap)
|
|
||||||
self.setMaximumSize(size, size)
|
|
||||||
self.setScaledContents(True)
|
|
||||||
|
|
||||||
|
|
||||||
class ImageTitleLayout(QHBoxLayout):
|
class ImageTitleLayout(QHBoxLayout):
|
||||||
'''
|
'''
|
||||||
A reusable layout widget displaying an image followed by a title
|
A reusable layout widget displaying an image followed by a title
|
||||||
|
|
@ -266,7 +252,7 @@ class EditableTableWidgetItem(QTableWidgetItem):
|
||||||
def __init__(self, text):
|
def __init__(self, text):
|
||||||
if text is None:
|
if text is None:
|
||||||
text = ''
|
text = ''
|
||||||
QTableWidgetItem.__init__(self, text, QtGui.QTableWidgetItem.UserType)
|
QTableWidgetItem.__init__(self, text)
|
||||||
self.setFlags(Qt.ItemIsSelectable|Qt.ItemIsEnabled|Qt.ItemIsEditable)
|
self.setFlags(Qt.ItemIsSelectable|Qt.ItemIsEnabled|Qt.ItemIsEditable)
|
||||||
|
|
||||||
class ReadOnlyTableWidgetItem(QTableWidgetItem):
|
class ReadOnlyTableWidgetItem(QTableWidgetItem):
|
||||||
|
|
@ -274,65 +260,10 @@ class ReadOnlyTableWidgetItem(QTableWidgetItem):
|
||||||
def __init__(self, text):
|
def __init__(self, text):
|
||||||
if text is None:
|
if text is None:
|
||||||
text = ''
|
text = ''
|
||||||
QTableWidgetItem.__init__(self, text, QtGui.QTableWidgetItem.UserType)
|
QTableWidgetItem.__init__(self, text)
|
||||||
self.setFlags(Qt.ItemIsSelectable|Qt.ItemIsEnabled)
|
self.setFlags(Qt.ItemIsSelectable|Qt.ItemIsEnabled)
|
||||||
|
|
||||||
|
|
||||||
class RatingTableWidgetItem(QTableWidgetItem):
|
|
||||||
|
|
||||||
def __init__(self, rating, is_read_only=False):
|
|
||||||
QTableWidgetItem.__init__(self, '', QtGui.QTableWidgetItem.UserType)
|
|
||||||
self.setData(Qt.DisplayRole, rating)
|
|
||||||
if is_read_only:
|
|
||||||
self.setFlags(Qt.ItemIsSelectable|Qt.ItemIsEnabled)
|
|
||||||
|
|
||||||
|
|
||||||
class DateTableWidgetItem(QTableWidgetItem):
|
|
||||||
|
|
||||||
def __init__(self, date_read, is_read_only=False, default_to_today=False):
|
|
||||||
if date_read == UNDEFINED_DATE and default_to_today:
|
|
||||||
date_read = now()
|
|
||||||
if is_read_only:
|
|
||||||
QTableWidgetItem.__init__(self, format_date(date_read, None), QtGui.QTableWidgetItem.UserType)
|
|
||||||
self.setFlags(Qt.ItemIsSelectable|Qt.ItemIsEnabled)
|
|
||||||
else:
|
|
||||||
QTableWidgetItem.__init__(self, '', QtGui.QTableWidgetItem.UserType)
|
|
||||||
self.setData(Qt.DisplayRole, QDateTime(date_read))
|
|
||||||
|
|
||||||
|
|
||||||
class NoWheelComboBox(QComboBox):
|
|
||||||
|
|
||||||
def wheelEvent (self, event):
|
|
||||||
# Disable the mouse wheel on top of the combo box changing selection as plays havoc in a grid
|
|
||||||
event.ignore()
|
|
||||||
|
|
||||||
|
|
||||||
class CheckableTableWidgetItem(QTableWidgetItem):
|
|
||||||
|
|
||||||
def __init__(self, checked=False, is_tristate=False):
|
|
||||||
QTableWidgetItem.__init__(self, '')
|
|
||||||
self.setFlags(Qt.ItemFlags(Qt.ItemIsSelectable | Qt.ItemIsUserCheckable | Qt.ItemIsEnabled ))
|
|
||||||
if is_tristate:
|
|
||||||
self.setFlags(self.flags() | Qt.ItemIsTristate)
|
|
||||||
if checked:
|
|
||||||
self.setCheckState(Qt.Checked)
|
|
||||||
else:
|
|
||||||
if is_tristate and checked is None:
|
|
||||||
self.setCheckState(Qt.PartiallyChecked)
|
|
||||||
else:
|
|
||||||
self.setCheckState(Qt.Unchecked)
|
|
||||||
|
|
||||||
def get_boolean_value(self):
|
|
||||||
'''
|
|
||||||
Return a boolean value indicating whether checkbox is checked
|
|
||||||
If this is a tristate checkbox, a partially checked value is returned as None
|
|
||||||
'''
|
|
||||||
if self.checkState() == Qt.PartiallyChecked:
|
|
||||||
return None
|
|
||||||
else:
|
|
||||||
return self.checkState() == Qt.Checked
|
|
||||||
|
|
||||||
|
|
||||||
class TextIconWidgetItem(QTableWidgetItem):
|
class TextIconWidgetItem(QTableWidgetItem):
|
||||||
|
|
||||||
def __init__(self, text, icon):
|
def __init__(self, text, icon):
|
||||||
|
|
@ -349,64 +280,6 @@ class ReadOnlyTextIconWidgetItem(ReadOnlyTableWidgetItem):
|
||||||
self.setIcon(icon)
|
self.setIcon(icon)
|
||||||
|
|
||||||
|
|
||||||
class ReadOnlyLineEdit(QLineEdit):
|
|
||||||
|
|
||||||
def __init__(self, text, parent):
|
|
||||||
if text is None:
|
|
||||||
text = ''
|
|
||||||
QLineEdit.__init__(self, text, parent)
|
|
||||||
self.setEnabled(False)
|
|
||||||
|
|
||||||
|
|
||||||
class KeyValueComboBox(QComboBox):
|
|
||||||
|
|
||||||
def __init__(self, parent, values, selected_key):
|
|
||||||
QComboBox.__init__(self, parent)
|
|
||||||
self.values = values
|
|
||||||
self.populate_combo(selected_key)
|
|
||||||
|
|
||||||
def populate_combo(self, selected_key):
|
|
||||||
self.clear()
|
|
||||||
selected_idx = idx = -1
|
|
||||||
for key, value in six.iteritems(self.values):
|
|
||||||
idx = idx + 1
|
|
||||||
self.addItem(value)
|
|
||||||
if key == selected_key:
|
|
||||||
selected_idx = idx
|
|
||||||
self.setCurrentIndex(selected_idx)
|
|
||||||
|
|
||||||
def selected_key(self):
|
|
||||||
for key, value in six.iteritems(self.values):
|
|
||||||
if value == unicode(self.currentText()).strip():
|
|
||||||
return key
|
|
||||||
|
|
||||||
|
|
||||||
class CustomColumnComboBox(QComboBox):
|
|
||||||
|
|
||||||
def __init__(self, parent, custom_columns, selected_column, initial_items=['']):
|
|
||||||
QComboBox.__init__(self, parent)
|
|
||||||
self.populate_combo(custom_columns, selected_column, initial_items)
|
|
||||||
|
|
||||||
def populate_combo(self, custom_columns, selected_column, initial_items=['']):
|
|
||||||
self.clear()
|
|
||||||
self.column_names = initial_items
|
|
||||||
if len(initial_items) > 0:
|
|
||||||
self.addItems(initial_items)
|
|
||||||
selected_idx = 0
|
|
||||||
for idx, value in enumerate(initial_items):
|
|
||||||
if value == selected_column:
|
|
||||||
selected_idx = idx
|
|
||||||
for key in sorted(custom_columns.keys()):
|
|
||||||
self.column_names.append(key)
|
|
||||||
self.addItem('%s (%s)'%(key, custom_columns[key]['name']))
|
|
||||||
if key == selected_column:
|
|
||||||
selected_idx = len(self.column_names) - 1
|
|
||||||
self.setCurrentIndex(selected_idx)
|
|
||||||
|
|
||||||
def get_selected_column(self):
|
|
||||||
return self.column_names[self.currentIndex()]
|
|
||||||
|
|
||||||
|
|
||||||
class KeyboardConfigDialog(SizePersistedDialog):
|
class KeyboardConfigDialog(SizePersistedDialog):
|
||||||
'''
|
'''
|
||||||
This dialog is used to allow editing of keyboard shortcuts.
|
This dialog is used to allow editing of keyboard shortcuts.
|
||||||
|
|
@ -440,43 +313,6 @@ class KeyboardConfigDialog(SizePersistedDialog):
|
||||||
self.accept()
|
self.accept()
|
||||||
|
|
||||||
|
|
||||||
class DateDelegate(QStyledItemDelegate):
|
|
||||||
'''
|
|
||||||
Delegate for dates. Because this delegate stores the
|
|
||||||
format as an instance variable, a new instance must be created for each
|
|
||||||
column. This differs from all the other delegates.
|
|
||||||
'''
|
|
||||||
def __init__(self, parent):
|
|
||||||
QStyledItemDelegate.__init__(self, parent)
|
|
||||||
self.format = 'dd MMM yyyy'
|
|
||||||
|
|
||||||
def displayText(self, val, locale):
|
|
||||||
d = val.toDateTime()
|
|
||||||
if d <= UNDEFINED_QDATETIME:
|
|
||||||
return ''
|
|
||||||
return format_date(qt_to_dt(d, as_utc=False), self.format)
|
|
||||||
|
|
||||||
def createEditor(self, parent, option, index):
|
|
||||||
qde = QStyledItemDelegate.createEditor(self, parent, option, index)
|
|
||||||
qde.setDisplayFormat(self.format)
|
|
||||||
qde.setMinimumDateTime(UNDEFINED_QDATETIME)
|
|
||||||
qde.setSpecialValueText(_('Undefined'))
|
|
||||||
qde.setCalendarPopup(True)
|
|
||||||
return qde
|
|
||||||
|
|
||||||
def setEditorData(self, editor, index):
|
|
||||||
val = index.model().data(index, Qt.DisplayRole).toDateTime()
|
|
||||||
if val is None or val == UNDEFINED_QDATETIME:
|
|
||||||
val = now()
|
|
||||||
editor.setDateTime(val)
|
|
||||||
|
|
||||||
def setModelData(self, editor, model, index):
|
|
||||||
val = editor.dateTime()
|
|
||||||
if val <= UNDEFINED_QDATETIME:
|
|
||||||
model.setData(index, UNDEFINED_QDATETIME, Qt.EditRole)
|
|
||||||
else:
|
|
||||||
model.setData(index, QDateTime(val), Qt.EditRole)
|
|
||||||
|
|
||||||
class PrefsViewerDialog(SizePersistedDialog):
|
class PrefsViewerDialog(SizePersistedDialog):
|
||||||
|
|
||||||
def __init__(self, gui, namespace):
|
def __init__(self, gui, namespace):
|
||||||
|
|
@ -507,7 +343,6 @@ class PrefsViewerDialog(SizePersistedDialog):
|
||||||
self.keys_list.setAlternatingRowColors(True)
|
self.keys_list.setAlternatingRowColors(True)
|
||||||
ml.addWidget(self.keys_list)
|
ml.addWidget(self.keys_list)
|
||||||
self.value_text = QTextEdit(self)
|
self.value_text = QTextEdit(self)
|
||||||
self.value_text.setTabStopWidth(24)
|
|
||||||
self.value_text.setReadOnly(True)
|
self.value_text.setReadOnly(True)
|
||||||
ml.addWidget(self.value_text, 1)
|
ml.addWidget(self.value_text, 1)
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -2,7 +2,6 @@
|
||||||
|
|
||||||
from __future__ import (unicode_literals, division, absolute_import,
|
from __future__ import (unicode_literals, division, absolute_import,
|
||||||
print_function)
|
print_function)
|
||||||
import six
|
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2021, Jim Miller'
|
__copyright__ = '2021, Jim Miller'
|
||||||
|
|
@ -15,37 +14,17 @@ import re
|
||||||
import threading
|
import threading
|
||||||
from collections import OrderedDict
|
from collections import OrderedDict
|
||||||
|
|
||||||
try:
|
from PyQt5 import QtWidgets as QtGui
|
||||||
from PyQt5 import QtWidgets as QtGui
|
from PyQt5.Qt import (QWidget, QVBoxLayout, QHBoxLayout, QGridLayout, QLabel,
|
||||||
from PyQt5.Qt import (QWidget, QVBoxLayout, QHBoxLayout, QGridLayout, QLabel,
|
QLineEdit, QComboBox, QCheckBox, QPushButton, QTabWidget,
|
||||||
QLineEdit, QComboBox, QCheckBox, QPushButton, QTabWidget,
|
QScrollArea, QGroupBox, QButtonGroup, QRadioButton,
|
||||||
QScrollArea, QGroupBox, QButtonGroup, QRadioButton,
|
Qt)
|
||||||
Qt)
|
|
||||||
except ImportError as e:
|
|
||||||
from PyQt4 import QtGui
|
|
||||||
from PyQt4.Qt import (QWidget, QVBoxLayout, QHBoxLayout, QGridLayout, QLabel,
|
|
||||||
QLineEdit, QComboBox, QCheckBox, QPushButton, QTabWidget,
|
|
||||||
QScrollArea, QGroupBox, QButtonGroup, QRadioButton,
|
|
||||||
Qt)
|
|
||||||
try:
|
|
||||||
from calibre.gui2 import QVariant
|
|
||||||
del QVariant
|
|
||||||
except ImportError:
|
|
||||||
is_qt4 = False
|
|
||||||
convert_qvariant = lambda x: x
|
|
||||||
else:
|
|
||||||
is_qt4 = True
|
|
||||||
def convert_qvariant(x):
|
|
||||||
vt = x.type()
|
|
||||||
if vt == x.String:
|
|
||||||
return unicode(x.toString())
|
|
||||||
if vt == x.List:
|
|
||||||
return [convert_qvariant(i) for i in x.toList()]
|
|
||||||
return x.toPyObject()
|
|
||||||
|
|
||||||
from calibre.gui2 import dynamic, info_dialog
|
from calibre.gui2 import dynamic, info_dialog
|
||||||
from calibre.gui2.complete2 import EditWithComplete
|
from calibre.gui2.complete2 import EditWithComplete
|
||||||
from fanficfare.six import text_type as unicode
|
from calibre.gui2.dialogs.confirm_delete import confirm
|
||||||
|
import fanficfare.six as six
|
||||||
|
from six import text_type as unicode
|
||||||
|
|
||||||
try:
|
try:
|
||||||
from calibre.ebooks.covers import generate_cover as cal_generate_cover
|
from calibre.ebooks.covers import generate_cover as cal_generate_cover
|
||||||
|
|
@ -300,7 +279,6 @@ class ConfigWidget(QWidget):
|
||||||
prefs['collision'] = save_collisions[unicode(self.basic_tab.collision.currentText())]
|
prefs['collision'] = save_collisions[unicode(self.basic_tab.collision.currentText())]
|
||||||
prefs['updatemeta'] = self.basic_tab.updatemeta.isChecked()
|
prefs['updatemeta'] = self.basic_tab.updatemeta.isChecked()
|
||||||
prefs['bgmeta'] = self.basic_tab.bgmeta.isChecked()
|
prefs['bgmeta'] = self.basic_tab.bgmeta.isChecked()
|
||||||
prefs['updateepubcover'] = self.basic_tab.updateepubcover.isChecked()
|
|
||||||
prefs['keeptags'] = self.basic_tab.keeptags.isChecked()
|
prefs['keeptags'] = self.basic_tab.keeptags.isChecked()
|
||||||
prefs['mark'] = self.basic_tab.mark.isChecked()
|
prefs['mark'] = self.basic_tab.mark.isChecked()
|
||||||
prefs['mark_success'] = self.basic_tab.mark_success.isChecked()
|
prefs['mark_success'] = self.basic_tab.mark_success.isChecked()
|
||||||
|
|
@ -317,6 +295,7 @@ class ConfigWidget(QWidget):
|
||||||
prefs['lookforurlinhtml'] = self.basic_tab.lookforurlinhtml.isChecked()
|
prefs['lookforurlinhtml'] = self.basic_tab.lookforurlinhtml.isChecked()
|
||||||
prefs['checkforseriesurlid'] = self.basic_tab.checkforseriesurlid.isChecked()
|
prefs['checkforseriesurlid'] = self.basic_tab.checkforseriesurlid.isChecked()
|
||||||
prefs['auto_reject_seriesurlid'] = self.basic_tab.auto_reject_seriesurlid.isChecked()
|
prefs['auto_reject_seriesurlid'] = self.basic_tab.auto_reject_seriesurlid.isChecked()
|
||||||
|
prefs['mark_series_anthologies'] = self.basic_tab.mark_series_anthologies.isChecked()
|
||||||
prefs['checkforurlchange'] = self.basic_tab.checkforurlchange.isChecked()
|
prefs['checkforurlchange'] = self.basic_tab.checkforurlchange.isChecked()
|
||||||
prefs['injectseries'] = self.basic_tab.injectseries.isChecked()
|
prefs['injectseries'] = self.basic_tab.injectseries.isChecked()
|
||||||
prefs['matchtitleauth'] = self.basic_tab.matchtitleauth.isChecked()
|
prefs['matchtitleauth'] = self.basic_tab.matchtitleauth.isChecked()
|
||||||
|
|
@ -354,9 +333,10 @@ class ConfigWidget(QWidget):
|
||||||
prefs['calibre_gen_cover'] = self.calibrecover_tab.calibre_gen_cover.isChecked()
|
prefs['calibre_gen_cover'] = self.calibrecover_tab.calibre_gen_cover.isChecked()
|
||||||
prefs['plugin_gen_cover'] = self.calibrecover_tab.plugin_gen_cover.isChecked()
|
prefs['plugin_gen_cover'] = self.calibrecover_tab.plugin_gen_cover.isChecked()
|
||||||
prefs['gcnewonly'] = self.calibrecover_tab.gcnewonly.isChecked()
|
prefs['gcnewonly'] = self.calibrecover_tab.gcnewonly.isChecked()
|
||||||
|
prefs['covernewonly'] = self.calibrecover_tab.covernewonly.isChecked()
|
||||||
gc_site_settings = {}
|
gc_site_settings = {}
|
||||||
for (site,combo) in six.iteritems(self.calibrecover_tab.gc_dropdowns):
|
for (site,combo) in six.iteritems(self.calibrecover_tab.gc_dropdowns):
|
||||||
val = unicode(convert_qvariant(combo.itemData(combo.currentIndex())))
|
val = unicode(combo.itemData(combo.currentIndex()))
|
||||||
if val != 'none':
|
if val != 'none':
|
||||||
gc_site_settings[site] = val
|
gc_site_settings[site] = val
|
||||||
#print("gc_site_settings[%s]:%s"%(site,gc_site_settings[site]))
|
#print("gc_site_settings[%s]:%s"%(site,gc_site_settings[site]))
|
||||||
|
|
@ -391,27 +371,29 @@ class ConfigWidget(QWidget):
|
||||||
prefs['suppresstitlesort'] = self.std_columns_tab.suppresstitlesort.isChecked()
|
prefs['suppresstitlesort'] = self.std_columns_tab.suppresstitlesort.isChecked()
|
||||||
prefs['authorcase'] = self.std_columns_tab.authorcase.isChecked()
|
prefs['authorcase'] = self.std_columns_tab.authorcase.isChecked()
|
||||||
prefs['titlecase'] = self.std_columns_tab.titlecase.isChecked()
|
prefs['titlecase'] = self.std_columns_tab.titlecase.isChecked()
|
||||||
|
prefs['seriescase'] = self.std_columns_tab.seriescase.isChecked()
|
||||||
prefs['setanthologyseries'] = self.std_columns_tab.setanthologyseries.isChecked()
|
prefs['setanthologyseries'] = self.std_columns_tab.setanthologyseries.isChecked()
|
||||||
|
|
||||||
prefs['set_author_url'] =self.std_columns_tab.set_author_url.isChecked()
|
prefs['set_author_url'] =self.std_columns_tab.set_author_url.isChecked()
|
||||||
|
prefs['set_series_url'] =self.std_columns_tab.set_series_url.isChecked()
|
||||||
prefs['includecomments'] =self.std_columns_tab.includecomments.isChecked()
|
prefs['includecomments'] =self.std_columns_tab.includecomments.isChecked()
|
||||||
prefs['anth_comments_newonly'] =self.std_columns_tab.anth_comments_newonly.isChecked()
|
prefs['anth_comments_newonly'] =self.std_columns_tab.anth_comments_newonly.isChecked()
|
||||||
|
|
||||||
# Custom Columns tab
|
# Custom Columns tab
|
||||||
# error column
|
# error column
|
||||||
prefs['errorcol'] = unicode(convert_qvariant(self.cust_columns_tab.errorcol.itemData(self.cust_columns_tab.errorcol.currentIndex())))
|
prefs['errorcol'] = unicode(self.cust_columns_tab.errorcol.itemData(self.cust_columns_tab.errorcol.currentIndex()))
|
||||||
prefs['save_all_errors'] = self.cust_columns_tab.save_all_errors.isChecked()
|
prefs['save_all_errors'] = self.cust_columns_tab.save_all_errors.isChecked()
|
||||||
|
|
||||||
# metadata column
|
# metadata column
|
||||||
prefs['savemetacol'] = unicode(convert_qvariant(self.cust_columns_tab.savemetacol.itemData(self.cust_columns_tab.savemetacol.currentIndex())))
|
prefs['savemetacol'] = unicode(self.cust_columns_tab.savemetacol.itemData(self.cust_columns_tab.savemetacol.currentIndex()))
|
||||||
|
|
||||||
# lastchecked column
|
# lastchecked column
|
||||||
prefs['lastcheckedcol'] = unicode(convert_qvariant(self.cust_columns_tab.lastcheckedcol.itemData(self.cust_columns_tab.lastcheckedcol.currentIndex())))
|
prefs['lastcheckedcol'] = unicode(self.cust_columns_tab.lastcheckedcol.itemData(self.cust_columns_tab.lastcheckedcol.currentIndex()))
|
||||||
|
|
||||||
# cust cols tab
|
# cust cols tab
|
||||||
colsmap = {}
|
colsmap = {}
|
||||||
for (col,combo) in six.iteritems(self.cust_columns_tab.custcol_dropdowns):
|
for (col,combo) in six.iteritems(self.cust_columns_tab.custcol_dropdowns):
|
||||||
val = unicode(convert_qvariant(combo.itemData(combo.currentIndex())))
|
val = unicode(combo.itemData(combo.currentIndex()))
|
||||||
if val != 'none':
|
if val != 'none':
|
||||||
colsmap[col] = val
|
colsmap[col] = val
|
||||||
#print("colsmap[%s]:%s"%(col,colsmap[col]))
|
#print("colsmap[%s]:%s"%(col,colsmap[col]))
|
||||||
|
|
@ -435,6 +417,10 @@ class ConfigWidget(QWidget):
|
||||||
prefs['auto_reject_from_email'] = self.imap_tab.auto_reject_from_email.isChecked()
|
prefs['auto_reject_from_email'] = self.imap_tab.auto_reject_from_email.isChecked()
|
||||||
prefs['update_existing_only_from_email'] = self.imap_tab.update_existing_only_from_email.isChecked()
|
prefs['update_existing_only_from_email'] = self.imap_tab.update_existing_only_from_email.isChecked()
|
||||||
prefs['download_from_email_immediately'] = self.imap_tab.download_from_email_immediately.isChecked()
|
prefs['download_from_email_immediately'] = self.imap_tab.download_from_email_immediately.isChecked()
|
||||||
|
|
||||||
|
prefs['site_split_jobs'] = self.other_tab.site_split_jobs.isChecked()
|
||||||
|
prefs['reconsolidate_jobs'] = self.other_tab.reconsolidate_jobs.isChecked()
|
||||||
|
|
||||||
prefs.save_to_db()
|
prefs.save_to_db()
|
||||||
self.plugin_action.set_popup_mode()
|
self.plugin_action.set_popup_mode()
|
||||||
|
|
||||||
|
|
@ -503,11 +489,6 @@ class BasicTab(QWidget):
|
||||||
self.updatemeta.setChecked(prefs['updatemeta'])
|
self.updatemeta.setChecked(prefs['updatemeta'])
|
||||||
horz.addWidget(self.updatemeta)
|
horz.addWidget(self.updatemeta)
|
||||||
|
|
||||||
self.updateepubcover = QCheckBox(_('Default Update EPUB Cover when Updating EPUB?'),self)
|
|
||||||
self.updateepubcover.setToolTip(_("On each download, FanFicFare offers an option to update the book cover image <i>inside</i> the EPUB from the web site when the EPUB is updated.<br />This sets whether that will default to on or off."))
|
|
||||||
self.updateepubcover.setChecked(prefs['updateepubcover'])
|
|
||||||
horz.addWidget(self.updateepubcover)
|
|
||||||
|
|
||||||
self.bgmeta = QCheckBox(_('Default Background Metadata?'),self)
|
self.bgmeta = QCheckBox(_('Default Background Metadata?'),self)
|
||||||
self.bgmeta.setToolTip(_("On each download, FanFicFare offers an option to Collect Metadata from sites in a Background process.<br />This returns control to you quicker while updating, but you won't be asked for username/passwords or if you are an adult--stories that need those will just fail.<br />Only available for Update/Overwrite of existing books in case URL given isn't canonical or matches to existing book by Title/Author."))
|
self.bgmeta.setToolTip(_("On each download, FanFicFare offers an option to Collect Metadata from sites in a Background process.<br />This returns control to you quicker while updating, but you won't be asked for username/passwords or if you are an adult--stories that need those will just fail.<br />Only available for Update/Overwrite of existing books in case URL given isn't canonical or matches to existing book by Title/Author."))
|
||||||
self.bgmeta.setChecked(prefs['bgmeta'])
|
self.bgmeta.setChecked(prefs['bgmeta'])
|
||||||
|
|
@ -538,10 +519,24 @@ class BasicTab(QWidget):
|
||||||
self.auto_reject_seriesurlid.setToolTip(_("Automatically reject storys with existing Series Anthology books.\nOnly works if 'Check for existing Series Anthology books' is on.\nDoesn't work when Collect Metadata in Background is selected."))
|
self.auto_reject_seriesurlid.setToolTip(_("Automatically reject storys with existing Series Anthology books.\nOnly works if 'Check for existing Series Anthology books' is on.\nDoesn't work when Collect Metadata in Background is selected."))
|
||||||
self.auto_reject_seriesurlid.setChecked(prefs['auto_reject_seriesurlid'])
|
self.auto_reject_seriesurlid.setChecked(prefs['auto_reject_seriesurlid'])
|
||||||
self.auto_reject_seriesurlid.setEnabled(self.checkforseriesurlid.isChecked())
|
self.auto_reject_seriesurlid.setEnabled(self.checkforseriesurlid.isChecked())
|
||||||
self.checkforseriesurlid.stateChanged.connect(lambda x : self.auto_reject_seriesurlid.setEnabled(self.checkforseriesurlid.isChecked()))
|
|
||||||
|
self.mark_series_anthologies = QCheckBox(_("Mark Matching Anthologies?"),self)
|
||||||
|
self.mark_series_anthologies.setToolTip(_("Mark and show existing Series Anthology books when individual updates are skipped.\nOnly works if 'Check for existing Series Anthology books' is on.\nDoesn't work when Collect Metadata in Background is selected."))
|
||||||
|
self.mark_series_anthologies.setChecked(prefs['mark_series_anthologies'])
|
||||||
|
self.mark_series_anthologies.setEnabled(self.checkforseriesurlid.isChecked())
|
||||||
|
|
||||||
|
def mark_anthologies():
|
||||||
|
self.auto_reject_seriesurlid.setEnabled(self.checkforseriesurlid.isChecked())
|
||||||
|
self.mark_series_anthologies.setEnabled(self.checkforseriesurlid.isChecked())
|
||||||
|
self.checkforseriesurlid.stateChanged.connect(mark_anthologies)
|
||||||
|
mark_anthologies()
|
||||||
|
|
||||||
horz = QHBoxLayout()
|
horz = QHBoxLayout()
|
||||||
horz.addItem(QtGui.QSpacerItem(20, 1))
|
horz.addItem(QtGui.QSpacerItem(20, 1))
|
||||||
horz.addWidget(self.auto_reject_seriesurlid)
|
vertright = QVBoxLayout()
|
||||||
|
horz.addLayout(vertright)
|
||||||
|
vertright.addWidget(self.auto_reject_seriesurlid)
|
||||||
|
vertright.addWidget(self.mark_series_anthologies)
|
||||||
self.l.addLayout(horz)
|
self.l.addLayout(horz)
|
||||||
|
|
||||||
self.checkforurlchange = QCheckBox(_("Check for changed Story URL?"),self)
|
self.checkforurlchange = QCheckBox(_("Check for changed Story URL?"),self)
|
||||||
|
|
@ -766,6 +761,7 @@ class BasicTab(QWidget):
|
||||||
tooltip=_("One URL per line:\n<b>http://...,note</b>\n<b>http://...,title by author - note</b>"),
|
tooltip=_("One URL per line:\n<b>http://...,note</b>\n<b>http://...,title by author - note</b>"),
|
||||||
rejectreasons=rejecturllist.get_reject_reasons(),
|
rejectreasons=rejecturllist.get_reject_reasons(),
|
||||||
reasonslabel=_('Add this reason to all URLs added:'),
|
reasonslabel=_('Add this reason to all URLs added:'),
|
||||||
|
accept_storyurls=True,
|
||||||
save_size_name='fff:Add Reject List')
|
save_size_name='fff:Add Reject List')
|
||||||
d.exec_()
|
d.exec_()
|
||||||
if d.result() == d.Accepted:
|
if d.result() == d.Accepted:
|
||||||
|
|
@ -1004,7 +1000,7 @@ class CalibreCoverTab(QWidget):
|
||||||
|
|
||||||
self.gencov_elements=[] ## used to disable/enable when gen
|
self.gencov_elements=[] ## used to disable/enable when gen
|
||||||
## cover is off/on. This is more
|
## cover is off/on. This is more
|
||||||
## about being a visual que than real
|
## about being a visual cue than real
|
||||||
## necessary function.
|
## necessary function.
|
||||||
|
|
||||||
topl = self.l = QVBoxLayout()
|
topl = self.l = QVBoxLayout()
|
||||||
|
|
@ -1048,9 +1044,17 @@ class CalibreCoverTab(QWidget):
|
||||||
horz.addWidget(self.updatecalcover)
|
horz.addWidget(self.updatecalcover)
|
||||||
self.l.addLayout(horz)
|
self.l.addLayout(horz)
|
||||||
|
|
||||||
|
self.covernewonly = QCheckBox(_("Set Calibre Cover Only for New Books"),self)
|
||||||
|
self.covernewonly.setToolTip(_("Set the Calibre cover from EPUB only for new\nbooks, not updates to existing books."))
|
||||||
|
self.covernewonly.setChecked(prefs['covernewonly'])
|
||||||
|
horz = QHBoxLayout()
|
||||||
|
horz.addItem(QtGui.QSpacerItem(20, 1))
|
||||||
|
horz.addWidget(self.covernewonly)
|
||||||
|
self.l.addLayout(horz)
|
||||||
|
self.l.addSpacing(5)
|
||||||
|
|
||||||
tooltip = _("Generate a Calibre book cover image when Calibre metadata is updated.<br />"
|
tooltip = _("Generate a Calibre book cover image when Calibre metadata is updated.<br />"
|
||||||
"Defaults to 'Yes, Always' for backward compatibility and because %(gc)s(Plugin)"
|
"Note that %(gc)s(Plugin) will only run if there is a %(gc)s setting configured below for Default or the appropriate site.")%no_trans
|
||||||
" will only run if configured for Default or site.")%no_trans
|
|
||||||
horz = QHBoxLayout()
|
horz = QHBoxLayout()
|
||||||
label = QLabel(_('Generate Calibre Cover:'))
|
label = QLabel(_('Generate Calibre Cover:'))
|
||||||
label.setToolTip(tooltip)
|
label.setToolTip(tooltip)
|
||||||
|
|
@ -1058,13 +1062,7 @@ class CalibreCoverTab(QWidget):
|
||||||
self.gencalcover = QComboBox(self)
|
self.gencalcover = QComboBox(self)
|
||||||
for i in gencalcover_order:
|
for i in gencalcover_order:
|
||||||
self.gencalcover.addItem(i)
|
self.gencalcover.addItem(i)
|
||||||
# back compat. If has own value, use.
|
|
||||||
# if prefs['gencalcover']:
|
|
||||||
self.gencalcover.setCurrentIndex(self.gencalcover.findText(prefs_save_options[prefs['gencalcover']]))
|
self.gencalcover.setCurrentIndex(self.gencalcover.findText(prefs_save_options[prefs['gencalcover']]))
|
||||||
# elif prefs['gencover']: # doesn't have own val, set YES if old value set.
|
|
||||||
# self.gencalcover.setCurrentIndex(self.gencalcover.findText(prefs_save_options[SAVE_YES]))
|
|
||||||
# else: # doesn't have own value, old value not set, NO.
|
|
||||||
# self.gencalcover.setCurrentIndex(self.gencalcover.findText(prefs_save_options[SAVE_NO]))
|
|
||||||
|
|
||||||
self.gencalcover.setToolTip(tooltip)
|
self.gencalcover.setToolTip(tooltip)
|
||||||
label.setBuddy(self.gencalcover)
|
label.setBuddy(self.gencalcover)
|
||||||
|
|
@ -1072,6 +1070,26 @@ class CalibreCoverTab(QWidget):
|
||||||
self.l.addLayout(horz)
|
self.l.addLayout(horz)
|
||||||
self.gencalcover.currentIndexChanged.connect(self.endisable_elements)
|
self.gencalcover.currentIndexChanged.connect(self.endisable_elements)
|
||||||
|
|
||||||
|
horz = QHBoxLayout()
|
||||||
|
horz.addItem(QtGui.QSpacerItem(20, 1))
|
||||||
|
vert = QVBoxLayout()
|
||||||
|
horz.addLayout(vert)
|
||||||
|
self.l.addLayout(horz)
|
||||||
|
|
||||||
|
self.gcnewonly = QCheckBox(_("Generate Covers Only for New Books")%no_trans,self)
|
||||||
|
self.gcnewonly.setToolTip(_("Default is to generate a cover any time the calibre metadata is"
|
||||||
|
" updated.<br />Used for both Calibre and Plugin generated covers."))
|
||||||
|
self.gcnewonly.setChecked(prefs['gcnewonly'])
|
||||||
|
vert.addWidget(self.gcnewonly)
|
||||||
|
self.gencov_elements.append(self.gcnewonly)
|
||||||
|
|
||||||
|
self.gc_polish_cover = QCheckBox(_("Inject/update the generated cover inside EPUB"),self)
|
||||||
|
self.gc_polish_cover.setToolTip(_("Calibre's Polish feature will be used to inject or update the generated"
|
||||||
|
" cover into the EPUB ebook file.<br />Used for both Calibre and Plugin generated covers."))
|
||||||
|
self.gc_polish_cover.setChecked(prefs['gc_polish_cover'])
|
||||||
|
vert.addWidget(self.gc_polish_cover)
|
||||||
|
self.gencov_elements.append(self.gc_polish_cover)
|
||||||
|
|
||||||
# can't be local or it's destroyed when __init__ is done and
|
# can't be local or it's destroyed when __init__ is done and
|
||||||
# connected things don't fire.
|
# connected things don't fire.
|
||||||
self.gencov_rdgrp = QButtonGroup()
|
self.gencov_rdgrp = QButtonGroup()
|
||||||
|
|
@ -1080,7 +1098,9 @@ class CalibreCoverTab(QWidget):
|
||||||
self.gencov_gb.setLayout(horz)
|
self.gencov_gb.setLayout(horz)
|
||||||
|
|
||||||
self.plugin_gen_cover = QRadioButton(_('Plugin %(gc)s')%no_trans,self)
|
self.plugin_gen_cover = QRadioButton(_('Plugin %(gc)s')%no_trans,self)
|
||||||
self.plugin_gen_cover.setToolTip(_("Use plugin to create covers. Additional settings are below."))
|
self.plugin_gen_cover.setToolTip(_("Use the %(gc)s plugin to create covers.<br>"
|
||||||
|
"Requires that you have the the %(gc)s plugin installed.<br>"
|
||||||
|
"Additional settings are below.")%no_trans)
|
||||||
self.gencov_rdgrp.addButton(self.plugin_gen_cover)
|
self.gencov_rdgrp.addButton(self.plugin_gen_cover)
|
||||||
# always, new only, when no cover from site, inject yes/no...
|
# always, new only, when no cover from site, inject yes/no...
|
||||||
self.plugin_gen_cover.setChecked(prefs['plugin_gen_cover'])
|
self.plugin_gen_cover.setChecked(prefs['plugin_gen_cover'])
|
||||||
|
|
@ -1102,20 +1122,6 @@ class CalibreCoverTab(QWidget):
|
||||||
#self.l.addLayout(horz)
|
#self.l.addLayout(horz)
|
||||||
self.l.addWidget(self.gencov_gb)
|
self.l.addWidget(self.gencov_gb)
|
||||||
|
|
||||||
self.gcnewonly = QCheckBox(_("Generate Covers Only for New Books")%no_trans,self)
|
|
||||||
self.gcnewonly.setToolTip(_("Default is to generate a cover any time the calibre metadata is"
|
|
||||||
" updated.<br />Used for both Calibre and Plugin generated covers."))
|
|
||||||
self.gcnewonly.setChecked(prefs['gcnewonly'])
|
|
||||||
self.l.addWidget(self.gcnewonly)
|
|
||||||
self.gencov_elements.append(self.gcnewonly)
|
|
||||||
|
|
||||||
self.gc_polish_cover = QCheckBox(_("Inject/update the cover inside EPUB"),self)
|
|
||||||
self.gc_polish_cover.setToolTip(_("Calibre's Polish feature will be used to inject or update the generated"
|
|
||||||
" cover into the EPUB ebook file.<br />Used for both Calibre and Plugin generated covers."))
|
|
||||||
self.gc_polish_cover.setChecked(prefs['gc_polish_cover'])
|
|
||||||
self.l.addWidget(self.gc_polish_cover)
|
|
||||||
self.gencov_elements.append(self.gc_polish_cover)
|
|
||||||
|
|
||||||
self.gcp_gb = QGroupBox(_("%(gc)s(Plugin) Settings")%no_trans)
|
self.gcp_gb = QGroupBox(_("%(gc)s(Plugin) Settings")%no_trans)
|
||||||
topl.addWidget(self.gcp_gb)
|
topl.addWidget(self.gcp_gb)
|
||||||
self.l = QVBoxLayout()
|
self.l = QVBoxLayout()
|
||||||
|
|
@ -1274,6 +1280,31 @@ class OtherTab(QWidget):
|
||||||
self.l = QVBoxLayout()
|
self.l = QVBoxLayout()
|
||||||
self.setLayout(self.l)
|
self.setLayout(self.l)
|
||||||
|
|
||||||
|
groupbox = QGroupBox()
|
||||||
|
self.l.addWidget(groupbox)
|
||||||
|
|
||||||
|
groupl = QVBoxLayout()
|
||||||
|
groupbox.setLayout(groupl)
|
||||||
|
|
||||||
|
label = QLabel("<h3>"+
|
||||||
|
_("Background Job Settings")+
|
||||||
|
"</h3>"
|
||||||
|
)
|
||||||
|
label.setWordWrap(True)
|
||||||
|
groupl.addWidget(label)
|
||||||
|
|
||||||
|
self.site_split_jobs = QCheckBox(_('Split downloads into separate background jobs by site'),self)
|
||||||
|
self.site_split_jobs.setToolTip(_("Launches a separate background Job for each site in the list of stories to download/update. Otherwise, there will be only one background job."))
|
||||||
|
self.site_split_jobs.setChecked(prefs['site_split_jobs'])
|
||||||
|
groupl.addWidget(self.site_split_jobs)
|
||||||
|
|
||||||
|
self.reconsolidate_jobs = QCheckBox(_('Reconsolidate split downloads before updating library'),self)
|
||||||
|
self.reconsolidate_jobs.setToolTip(_("Hold all downloads/updates launched together until they all finish. Otherwise, there will be a 'Proceed to update' dialog for each site."))
|
||||||
|
self.reconsolidate_jobs.setChecked(prefs['reconsolidate_jobs'])
|
||||||
|
groupl.addWidget(self.reconsolidate_jobs)
|
||||||
|
|
||||||
|
self.l.addSpacing(5)
|
||||||
|
|
||||||
label = QLabel(_("These controls aren't plugin settings as such, but convenience buttons for setting Keyboard shortcuts and getting all the FanFicFare confirmation dialogs back again."))
|
label = QLabel(_("These controls aren't plugin settings as such, but convenience buttons for setting Keyboard shortcuts and getting all the FanFicFare confirmation dialogs back again."))
|
||||||
label.setWordWrap(True)
|
label.setWordWrap(True)
|
||||||
self.l.addWidget(label)
|
self.l.addWidget(label)
|
||||||
|
|
@ -1579,22 +1610,39 @@ class StandardColumnsTab(QWidget):
|
||||||
self.titlecase.setChecked(prefs['titlecase'])
|
self.titlecase.setChecked(prefs['titlecase'])
|
||||||
row.append(self.titlecase)
|
row.append(self.titlecase)
|
||||||
elif key == 'authors':
|
elif key == 'authors':
|
||||||
|
self.set_author_url = QCheckBox(_('Set Calibre Author URL'),self)
|
||||||
|
self.set_author_url.setToolTip(_("Set Calibre Author URL to Author's URL on story site."))
|
||||||
|
self.set_author_url.setChecked(prefs['set_author_url'])
|
||||||
|
row.append(self.set_author_url)
|
||||||
|
|
||||||
self.suppressauthorsort = QCheckBox(_('Force Author into Author Sort?'),self)
|
self.suppressauthorsort = QCheckBox(_('Force Author into Author Sort?'),self)
|
||||||
self.suppressauthorsort.setToolTip(_("If checked, the author(s) as given will be used for the Author Sort, too.\nIf not checked, calibre will apply it's built in algorithm which makes 'Bob Smith' sort as 'Smith, Bob', etc."))
|
self.suppressauthorsort.setToolTip(_("If checked, the author(s) as given will be used for the Author Sort, too.\nIf not checked, calibre will apply it's built in algorithm which makes 'Bob Smith' sort as 'Smith, Bob', etc."))
|
||||||
self.suppressauthorsort.setChecked(prefs['suppressauthorsort'])
|
self.suppressauthorsort.setChecked(prefs['suppressauthorsort'])
|
||||||
row.append(self.suppressauthorsort)
|
row.append(self.suppressauthorsort)
|
||||||
|
|
||||||
self.authorcase = QCheckBox(_('Fix Author Case?'),self)
|
self.authorcase = QCheckBox(_('Fix Author Case?'),self)
|
||||||
self.authorcase.setToolTip(_("If checked, Calibre's routine for correcting the capitalization of author names will be applied.")
|
self.authorcase.setToolTip(_("If checked, Calibre's routine for correcting the capitalization of author names will be applied.")
|
||||||
+"\n"+_("Calibre remembers all authors in the library; changing the author case on one book will effect all books by that author.")
|
+"\n"+_("Calibre remembers all authors in the library; changing the author case on one book will effect all books by that author.")
|
||||||
+"\n"+_("This effects Calibre metadata only, not FanFicFare metadata in title page."))
|
+"\n"+_("This effects Calibre metadata only, not FanFicFare metadata in title page."))
|
||||||
self.authorcase.setChecked(prefs['authorcase'])
|
self.authorcase.setChecked(prefs['authorcase'])
|
||||||
row.append(self.authorcase)
|
row.append(self.authorcase)
|
||||||
|
|
||||||
elif key == 'series':
|
elif key == 'series':
|
||||||
|
self.set_series_url = QCheckBox(_('Set Calibre Series URL'),self)
|
||||||
|
self.set_series_url.setToolTip(_("Set Calibre Series URL to Series's URL on story site."))
|
||||||
|
self.set_series_url.setChecked(prefs['set_series_url'])
|
||||||
|
row.append(self.set_series_url)
|
||||||
|
|
||||||
self.setanthologyseries = QCheckBox(_("Set 'Series [0]' for New Anthologies?"),self)
|
self.setanthologyseries = QCheckBox(_("Set 'Series [0]' for New Anthologies?"),self)
|
||||||
self.setanthologyseries.setToolTip(_("If checked, the Series column will be set to 'Series Name [0]' when an Anthology for a series is first created."))
|
self.setanthologyseries.setToolTip(_("If checked, the Series column will be set to 'Series Name [0]' when an Anthology for a series is first created."))
|
||||||
self.setanthologyseries.setChecked(prefs['setanthologyseries'])
|
self.setanthologyseries.setChecked(prefs['setanthologyseries'])
|
||||||
row.append(self.setanthologyseries)
|
row.append(self.setanthologyseries)
|
||||||
|
|
||||||
|
self.seriescase = QCheckBox(_('Fix Series Case?'),self)
|
||||||
|
self.seriescase.setToolTip(_("If checked, Calibre's routine for correcting the capitalization of title will be applied.")
|
||||||
|
+"\n"+_("This effects Calibre metadata only, not FanFicFare metadata in title page."))
|
||||||
|
self.seriescase.setChecked(prefs['seriescase'])
|
||||||
|
row.append(self.seriescase)
|
||||||
grid = QGridLayout()
|
grid = QGridLayout()
|
||||||
for rownum, row in enumerate(rows):
|
for rownum, row in enumerate(rows):
|
||||||
for colnum, col in enumerate(row):
|
for colnum, col in enumerate(row):
|
||||||
|
|
@ -1607,11 +1655,6 @@ class StandardColumnsTab(QWidget):
|
||||||
self.l.addWidget(label)
|
self.l.addWidget(label)
|
||||||
self.l.addSpacing(5)
|
self.l.addSpacing(5)
|
||||||
|
|
||||||
self.set_author_url = QCheckBox(_('Set Calibre Author URL'),self)
|
|
||||||
self.set_author_url.setToolTip(_("Set Calibre Author URL to Author's URL on story site."))
|
|
||||||
self.set_author_url.setChecked(prefs['set_author_url'])
|
|
||||||
self.l.addWidget(self.set_author_url)
|
|
||||||
|
|
||||||
self.includecomments = QCheckBox(_("Include Books' Comments in Anthology Comments?"),self)
|
self.includecomments = QCheckBox(_("Include Books' Comments in Anthology Comments?"),self)
|
||||||
self.includecomments.setToolTip(_('''Include all the merged books' comments in the new book's comments.
|
self.includecomments.setToolTip(_('''Include all the merged books' comments in the new book's comments.
|
||||||
Default is a list of included titles only.'''))
|
Default is a list of included titles only.'''))
|
||||||
|
|
|
||||||
|
|
@ -16,43 +16,29 @@ logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
|
|
||||||
|
from PyQt5 import QtWidgets as QtGui
|
||||||
|
from PyQt5 import QtCore
|
||||||
|
from PyQt5.Qt import (QApplication, QDialog, QWidget, QTableWidget, QTableWidgetItem, QVBoxLayout,
|
||||||
|
QHBoxLayout, QGridLayout, QPushButton, QFont, QLabel, QCheckBox, QIcon,
|
||||||
|
QLineEdit, QComboBox, QProgressDialog, QTimer, QDialogButtonBox,
|
||||||
|
QScrollArea, QPixmap, Qt, QAbstractItemView, QTextEdit,
|
||||||
|
pyqtSignal, QGroupBox, QFrame, QTextCursor)
|
||||||
try:
|
try:
|
||||||
from PyQt5 import QtWidgets as QtGui
|
# qt6 Calibre v6+
|
||||||
from PyQt5 import QtCore
|
QTextEditNoWrap = QTextEdit.LineWrapMode.NoWrap
|
||||||
from PyQt5.Qt import (QApplication, QDialog, QWidget, QTableWidget, QVBoxLayout, QHBoxLayout,
|
MoveOperations = QTextCursor.MoveOperation
|
||||||
QGridLayout, QPushButton, QFont, QLabel, QCheckBox, QIcon,
|
MoveMode = QTextCursor.MoveMode
|
||||||
QLineEdit, QComboBox, QProgressDialog, QTimer, QDialogButtonBox,
|
except:
|
||||||
QScrollArea, QPixmap, Qt, QAbstractItemView, QTextEdit,
|
# qt5 Calibre v2-5
|
||||||
pyqtSignal, QGroupBox, QFrame)
|
QTextEditNoWrap = QTextEdit.NoWrap
|
||||||
except ImportError as e:
|
MoveOperations = QTextCursor
|
||||||
from PyQt4 import QtGui
|
MoveMode = QTextCursor
|
||||||
from PyQt4 import QtCore
|
|
||||||
from PyQt4.Qt import (QApplication, QDialog, QWidget, QTableWidget, QVBoxLayout, QHBoxLayout,
|
|
||||||
QGridLayout, QPushButton, QFont, QLabel, QCheckBox, QIcon,
|
|
||||||
QLineEdit, QComboBox, QProgressDialog, QTimer, QDialogButtonBox,
|
|
||||||
QScrollArea, QPixmap, Qt, QAbstractItemView, QTextEdit,
|
|
||||||
pyqtSignal, QGroupBox, QFrame)
|
|
||||||
|
|
||||||
try:
|
|
||||||
from calibre.gui2 import QVariant
|
|
||||||
del QVariant
|
|
||||||
except ImportError:
|
|
||||||
is_qt4 = False
|
|
||||||
convert_qvariant = lambda x: x
|
|
||||||
else:
|
|
||||||
is_qt4 = True
|
|
||||||
def convert_qvariant(x):
|
|
||||||
vt = x.type()
|
|
||||||
if vt == x.String:
|
|
||||||
return unicode(x.toString())
|
|
||||||
if vt == x.List:
|
|
||||||
return [convert_qvariant(i) for i in x.toList()]
|
|
||||||
return x.toPyObject()
|
|
||||||
|
|
||||||
from calibre.gui2 import gprefs
|
from calibre.gui2 import gprefs
|
||||||
show_download_options = 'fff:add new/update dialogs:show_download_options'
|
show_download_options = 'fff:add new/update dialogs:show_download_options'
|
||||||
from calibre.gui2.dialogs.confirm_delete import confirm
|
from calibre.gui2.dialogs.confirm_delete import confirm
|
||||||
from calibre.gui2.complete2 import EditWithComplete
|
from calibre.gui2.complete2 import EditWithComplete
|
||||||
|
from fanficfare.exceptions import NotGoingToDownload
|
||||||
from fanficfare.six import text_type as unicode, ensure_text
|
from fanficfare.six import text_type as unicode, ensure_text
|
||||||
|
|
||||||
# pulls in translation files for _() strings
|
# pulls in translation files for _() strings
|
||||||
|
|
@ -170,34 +156,57 @@ class RejectUrlEntry:
|
||||||
|
|
||||||
return retval
|
return retval
|
||||||
|
|
||||||
class NotGoingToDownload(Exception):
|
|
||||||
def __init__(self,error,icon='dialog_error.png',showerror=True):
|
|
||||||
self.error=error
|
|
||||||
self.icon=icon
|
|
||||||
self.showerror=showerror
|
|
||||||
|
|
||||||
def __str__(self):
|
|
||||||
return self.error
|
|
||||||
|
|
||||||
class DroppableQTextEdit(QTextEdit):
|
class DroppableQTextEdit(QTextEdit):
|
||||||
def __init__(self,parent):
|
def __init__(self,parent):
|
||||||
QTextEdit.__init__(self,parent)
|
QTextEdit.__init__(self,parent)
|
||||||
self.setTabChangesFocus(True)
|
self.setTabChangesFocus(True)
|
||||||
|
|
||||||
def dropEvent(self,event):
|
def dropEvent(self,event):
|
||||||
# print("event:%s"%event)
|
# logger.debug("dropEvent")
|
||||||
urllist = get_urls_from_mime(event.mimeData())
|
urllist = get_urls_from_mime(event.mimeData())
|
||||||
if urllist:
|
if urllist:
|
||||||
self.append("\n".join(urllist))
|
self.append("\n".join(urllist))
|
||||||
return None
|
return None
|
||||||
return QTextEdit.dropEvent(self,event)
|
return QTextEdit.dropEvent(self,event)
|
||||||
|
|
||||||
class AddNewDialog(SizePersistedDialog):
|
def insertFromMimeData(self, mime_data):
|
||||||
|
# logger.debug("insertFromMimeData")
|
||||||
|
# logger.debug(mime_data)
|
||||||
|
urllist = None
|
||||||
|
if mime_data.hasFormat('text/html'):
|
||||||
|
urllist = get_urls_from_mime(mime_data)
|
||||||
|
# logger.debug(urllist)
|
||||||
|
if urllist:
|
||||||
|
[ self.append(url) for url in urllist ]
|
||||||
|
else:
|
||||||
|
return QTextEdit.insertFromMimeData(self, mime_data)
|
||||||
|
|
||||||
|
class HotKeyedSizePersistedDialog(SizePersistedDialog):
|
||||||
|
|
||||||
|
def __init__(self, gui, save_size_name):
|
||||||
|
super(HotKeyedSizePersistedDialog,self).__init__(gui, save_size_name)
|
||||||
|
self.keys=dict()
|
||||||
|
|
||||||
|
def addCtrlKeyPress(self,key,func):
|
||||||
|
# print("addKeyPress: key(0x%x)"%key)
|
||||||
|
# print("control: 0x%x"%QtCore.Qt.ControlModifier)
|
||||||
|
self.keys[key]=func
|
||||||
|
|
||||||
|
def keyPressEvent(self, event):
|
||||||
|
# print("event: key(0x%x) modifiers(0x%x)"%(event.key(),event.modifiers()))
|
||||||
|
if (event.modifiers() & QtCore.Qt.ControlModifier) and event.key() in self.keys:
|
||||||
|
func = self.keys[event.key()]
|
||||||
|
return func()
|
||||||
|
else:
|
||||||
|
return super(HotKeyedSizePersistedDialog,self).keyPressEvent(event)
|
||||||
|
|
||||||
|
class AddNewDialog(HotKeyedSizePersistedDialog):
|
||||||
|
|
||||||
go_signal = pyqtSignal(object, object, object, object)
|
go_signal = pyqtSignal(object, object, object, object)
|
||||||
|
|
||||||
def __init__(self, gui, prefs, icon):
|
def __init__(self, gui, prefs, icon):
|
||||||
SizePersistedDialog.__init__(self, gui, 'fff:add new dialog')
|
super(AddNewDialog,self).__init__(gui, 'fff:add new dialog')
|
||||||
|
|
||||||
self.prefs = prefs
|
self.prefs = prefs
|
||||||
|
|
||||||
self.setMinimumWidth(300)
|
self.setMinimumWidth(300)
|
||||||
|
|
@ -219,19 +228,25 @@ class AddNewDialog(SizePersistedDialog):
|
||||||
self.toplabel=QLabel("Toplabel")
|
self.toplabel=QLabel("Toplabel")
|
||||||
self.l.addWidget(self.toplabel)
|
self.l.addWidget(self.toplabel)
|
||||||
|
|
||||||
## XXX add labels for series name and desc? Desc in tooltip?
|
## scrollable area for lengthy series comments.
|
||||||
row = 0
|
scrollable = QScrollArea()
|
||||||
|
scrollcontent = QWidget()
|
||||||
|
scrollable.setWidget(scrollcontent)
|
||||||
|
scrollable.setWidgetResizable(True)
|
||||||
|
self.l.addWidget(scrollable)
|
||||||
|
|
||||||
grid = QGridLayout()
|
grid = QGridLayout()
|
||||||
|
scrollcontent.setLayout(grid)
|
||||||
|
self.mergeshow.append(scrollable)
|
||||||
|
|
||||||
|
row = 0
|
||||||
label = QLabel('<b>'+_('Series')+':</b>')
|
label = QLabel('<b>'+_('Series')+':</b>')
|
||||||
grid.addWidget(label,row,0)
|
grid.addWidget(label,row,0)
|
||||||
self.mergedname=QLabel("mergedname")
|
self.mergedname=QLabel("mergedname")
|
||||||
tt = _('This name will be used with the %s setting to set the title of the new book.')%'<i>anthology_title_pattern</i>'
|
tt = _('This name will be used with the %s setting to set the title of the new book.')%'<i>anthology_title_pattern</i>'
|
||||||
label.setToolTip(tt)
|
label.setToolTip(tt)
|
||||||
self.mergeshow.append(label)
|
|
||||||
self.mergedname.setToolTip(tt)
|
self.mergedname.setToolTip(tt)
|
||||||
grid.addWidget(self.mergedname,row,1,1,-1)
|
grid.addWidget(self.mergedname,row,1,1,-1)
|
||||||
self.l.addLayout(grid)
|
|
||||||
self.mergeshow.append(self.mergedname)
|
|
||||||
|
|
||||||
row+=1
|
row+=1
|
||||||
label = QLabel('<b>'+_('Comments')+':</b>')
|
label = QLabel('<b>'+_('Comments')+':</b>')
|
||||||
|
|
@ -239,18 +254,15 @@ class AddNewDialog(SizePersistedDialog):
|
||||||
self.mergeddesc=QLabel("mergeddesc")
|
self.mergeddesc=QLabel("mergeddesc")
|
||||||
tt = _('These comments about the series will be included in the Comments of the new book.')+'<i></i>' # for html for auto-wrap
|
tt = _('These comments about the series will be included in the Comments of the new book.')+'<i></i>' # for html for auto-wrap
|
||||||
label.setToolTip(tt)
|
label.setToolTip(tt)
|
||||||
self.mergeshow.append(label)
|
|
||||||
self.mergeddesc.setToolTip(tt)
|
self.mergeddesc.setToolTip(tt)
|
||||||
self.mergeddesc.setWordWrap(True)
|
self.mergeddesc.setWordWrap(True)
|
||||||
grid.addWidget(self.mergeddesc,row,1,1,-1)
|
grid.addWidget(self.mergeddesc,row,1,1,-1)
|
||||||
self.l.addLayout(grid)
|
|
||||||
self.mergeshow.append(self.mergeddesc)
|
|
||||||
grid.setColumnStretch(1,1)
|
grid.setColumnStretch(1,1)
|
||||||
|
|
||||||
self.url = DroppableQTextEdit(self)
|
self.url = DroppableQTextEdit(self)
|
||||||
self.url.setToolTip("UrlTooltip")
|
self.url.setToolTip("UrlTooltip")
|
||||||
self.url.setLineWrapMode(QTextEdit.NoWrap)
|
self.url.setLineWrapMode(QTextEditNoWrap)
|
||||||
self.l.addWidget(self.url)
|
self.l.addWidget(self.url,1) # 1 higher 'stretch'==higher priority
|
||||||
|
|
||||||
self.groupbox = QGroupBox(_("Show Download Options"))
|
self.groupbox = QGroupBox(_("Show Download Options"))
|
||||||
self.groupbox.setCheckable(True)
|
self.groupbox.setCheckable(True)
|
||||||
|
|
@ -314,12 +326,6 @@ class AddNewDialog(SizePersistedDialog):
|
||||||
self.mergehide.append(self.updatemeta)
|
self.mergehide.append(self.updatemeta)
|
||||||
self.mergeupdateshow.append(self.updatemeta)
|
self.mergeupdateshow.append(self.updatemeta)
|
||||||
|
|
||||||
self.updateepubcover = QCheckBox(_('Update EPUB Cover?'),self)
|
|
||||||
self.updateepubcover.setToolTip(_('Update book cover image from site or defaults (if found) <i>inside</i> the EPUB when EPUB is updated.'))
|
|
||||||
self.updateepubcover.setChecked(self.prefs['updateepubcover'])
|
|
||||||
horz.addWidget(self.updateepubcover)
|
|
||||||
self.mergehide.append(self.updateepubcover)
|
|
||||||
|
|
||||||
self.gbl.addLayout(horz)
|
self.gbl.addLayout(horz)
|
||||||
|
|
||||||
## bgmeta not used with Add New because of stories that change
|
## bgmeta not used with Add New because of stories that change
|
||||||
|
|
@ -339,6 +345,9 @@ class AddNewDialog(SizePersistedDialog):
|
||||||
self.button_box.rejected.connect(self.reject)
|
self.button_box.rejected.connect(self.reject)
|
||||||
self.l.addWidget(self.button_box)
|
self.l.addWidget(self.button_box)
|
||||||
|
|
||||||
|
self.addCtrlKeyPress(QtCore.Qt.Key_Return,self.ok_clicked)
|
||||||
|
self.addCtrlKeyPress(QtCore.Qt.Key_Enter,self.ok_clicked) # num pad
|
||||||
|
|
||||||
def click_show_download_options(self,x):
|
def click_show_download_options(self,x):
|
||||||
self.gbf.setVisible(x)
|
self.gbf.setVisible(x)
|
||||||
gprefs[show_download_options] = x
|
gprefs[show_download_options] = x
|
||||||
|
|
@ -449,9 +458,6 @@ class AddNewDialog(SizePersistedDialog):
|
||||||
self.updatemeta.setChecked(self.prefs['updatemeta'])
|
self.updatemeta.setChecked(self.prefs['updatemeta'])
|
||||||
# self.bgmeta.setChecked(self.prefs['bgmeta'])
|
# self.bgmeta.setChecked(self.prefs['bgmeta'])
|
||||||
|
|
||||||
if not self.merge:
|
|
||||||
self.updateepubcover.setChecked(self.prefs['updateepubcover'])
|
|
||||||
|
|
||||||
self.url.setText(url_list_text)
|
self.url.setText(url_list_text)
|
||||||
if url_list_text:
|
if url_list_text:
|
||||||
self.button_box.button(QDialogButtonBox.Ok).setFocus()
|
self.button_box.button(QDialogButtonBox.Ok).setFocus()
|
||||||
|
|
@ -484,30 +490,29 @@ class AddNewDialog(SizePersistedDialog):
|
||||||
self.collision.setCurrentIndex(i)
|
self.collision.setCurrentIndex(i)
|
||||||
|
|
||||||
def get_fff_options(self):
|
def get_fff_options(self):
|
||||||
retval = {
|
retval = dict(self.extraoptions)
|
||||||
'fileform': unicode(self.fileform.currentText()),
|
retval.update( {
|
||||||
'collision': unicode(self.collision.currentText()),
|
'fileform': unicode(self.fileform.currentText()),
|
||||||
'updatemeta': self.updatemeta.isChecked(),
|
'collision': unicode(self.collision.currentText()),
|
||||||
'bgmeta': False, # self.bgmeta.isChecked(),
|
'updatemeta': self.updatemeta.isChecked(),
|
||||||
'updateepubcover': self.updateepubcover.isChecked(),
|
'bgmeta': False, # self.bgmeta.isChecked(),
|
||||||
'smarten_punctuation':self.prefs['smarten_punctuation'],
|
'smarten_punctuation':self.prefs['smarten_punctuation'],
|
||||||
'do_wordcount':self.prefs['do_wordcount'],
|
'do_wordcount':self.prefs['do_wordcount'],
|
||||||
}
|
} )
|
||||||
|
|
||||||
if self.merge:
|
if self.merge:
|
||||||
retval['fileform']=='epub'
|
retval['fileform']=='epub'
|
||||||
retval['updateepubcover']=True
|
|
||||||
if self.newmerge:
|
if self.newmerge:
|
||||||
retval['updatemeta']=True
|
retval['updatemeta']=True
|
||||||
retval['collision']=ADDNEW
|
retval['collision']=ADDNEW
|
||||||
|
|
||||||
|
logger.debug("self.extraoptions['anthology_url']:%s"%self.extraoptions.get('anthology_url','NOT FOUND'))
|
||||||
retval.update(self.extraoptions)
|
retval.update(self.extraoptions)
|
||||||
return retval
|
return retval
|
||||||
|
|
||||||
def get_urlstext(self):
|
def get_urlstext(self):
|
||||||
return unicode(self.url.toPlainText())
|
return unicode(self.url.toPlainText())
|
||||||
|
|
||||||
|
|
||||||
class FakeLineEdit():
|
class FakeLineEdit():
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
pass
|
pass
|
||||||
|
|
@ -583,35 +588,83 @@ class UserPassDialog(QDialog):
|
||||||
QDialog.__init__(self, gui)
|
QDialog.__init__(self, gui)
|
||||||
self.status=False
|
self.status=False
|
||||||
|
|
||||||
self.l = QGridLayout()
|
self.l = QVBoxLayout()
|
||||||
self.setLayout(self.l)
|
self.setLayout(self.l)
|
||||||
|
|
||||||
|
grid = QGridLayout()
|
||||||
|
self.l.addLayout(grid)
|
||||||
|
|
||||||
if exception and exception.passwdonly:
|
if exception and exception.passwdonly:
|
||||||
self.setWindowTitle(_('Password'))
|
self.setWindowTitle(_('Password'))
|
||||||
self.l.addWidget(QLabel(_("Author requires a password for this story(%s).")%exception.url),0,0,1,2)
|
grid.addWidget(QLabel(_("Author requires a password for this story(%s).")%exception.url),0,0,1,2)
|
||||||
# user isn't used, but it's easier to still have it for
|
# user isn't used, but it's easier to still have it for
|
||||||
# post processing.
|
# post processing.
|
||||||
self.user = FakeLineEdit()
|
self.user = FakeLineEdit()
|
||||||
else:
|
else:
|
||||||
self.setWindowTitle(_('User/Password'))
|
self.setWindowTitle(_('User/Password'))
|
||||||
self.l.addWidget(QLabel(_("%s requires you to login to download this story.")%site),0,0,1,2)
|
grid.addWidget(QLabel(_("%s requires you to login to download this story.")%site),0,0,1,2)
|
||||||
|
|
||||||
self.l.addWidget(QLabel(_("User:")),1,0)
|
grid.addWidget(QLabel(_("User:")),1,0)
|
||||||
self.user = QLineEdit(self)
|
self.user = QLineEdit(self)
|
||||||
self.l.addWidget(self.user,1,1)
|
grid.addWidget(self.user,1,1)
|
||||||
|
|
||||||
self.l.addWidget(QLabel(_("Password:")),2,0)
|
grid.addWidget(QLabel(_("Password:")),2,0)
|
||||||
self.passwd = QLineEdit(self)
|
self.passwd = QLineEdit(self)
|
||||||
self.passwd.setEchoMode(QLineEdit.Password)
|
self.passwd.setEchoMode(QLineEdit.Password)
|
||||||
self.l.addWidget(self.passwd,2,1)
|
grid.addWidget(self.passwd,2,1)
|
||||||
|
|
||||||
|
horz = QHBoxLayout()
|
||||||
|
self.l.addLayout(horz)
|
||||||
|
|
||||||
self.ok_button = QPushButton(_('OK'), self)
|
self.ok_button = QPushButton(_('OK'), self)
|
||||||
self.ok_button.clicked.connect(self.ok)
|
self.ok_button.clicked.connect(self.ok)
|
||||||
self.l.addWidget(self.ok_button,3,0)
|
horz.addWidget(self.ok_button)
|
||||||
|
|
||||||
self.cancel_button = QPushButton(_('Cancel'), self)
|
self.cancel_button = QPushButton(_('Cancel'), self)
|
||||||
self.cancel_button.clicked.connect(self.cancel)
|
self.cancel_button.clicked.connect(self.cancel)
|
||||||
self.l.addWidget(self.cancel_button,3,1)
|
horz.addWidget(self.cancel_button)
|
||||||
|
|
||||||
|
self.resize(self.sizeHint())
|
||||||
|
|
||||||
|
def ok(self):
|
||||||
|
self.status=True
|
||||||
|
self.hide()
|
||||||
|
|
||||||
|
def cancel(self):
|
||||||
|
self.status=False
|
||||||
|
self.hide()
|
||||||
|
|
||||||
|
class TOTPDialog(QDialog):
|
||||||
|
'''
|
||||||
|
Need to collect Timebased One Time Password(TOTP) for some sites.
|
||||||
|
'''
|
||||||
|
def __init__(self, gui, site, exception=None):
|
||||||
|
QDialog.__init__(self, gui)
|
||||||
|
self.status=False
|
||||||
|
|
||||||
|
self.l = QVBoxLayout()
|
||||||
|
self.setLayout(self.l)
|
||||||
|
|
||||||
|
grid = QGridLayout()
|
||||||
|
self.l.addLayout(grid)
|
||||||
|
|
||||||
|
self.setWindowTitle(_('Time-based One Time Password(TOTP)'))
|
||||||
|
grid.addWidget(QLabel(_("Site requires a Time-based One Time Password(TOTP) for this url:\n%s")%exception.url),0,0,1,2)
|
||||||
|
|
||||||
|
grid.addWidget(QLabel(_("TOTP:")),2,0)
|
||||||
|
self.totp = QLineEdit(self)
|
||||||
|
grid.addWidget(self.totp,2,1)
|
||||||
|
|
||||||
|
horz = QHBoxLayout()
|
||||||
|
self.l.addLayout(horz)
|
||||||
|
|
||||||
|
self.ok_button = QPushButton(_('OK'), self)
|
||||||
|
self.ok_button.clicked.connect(self.ok)
|
||||||
|
horz.addWidget(self.ok_button)
|
||||||
|
|
||||||
|
self.cancel_button = QPushButton(_('Cancel'), self)
|
||||||
|
self.cancel_button.clicked.connect(self.cancel)
|
||||||
|
horz.addWidget(self.cancel_button)
|
||||||
|
|
||||||
self.resize(self.sizeHint())
|
self.resize(self.sizeHint())
|
||||||
|
|
||||||
|
|
@ -629,13 +682,15 @@ def LoopProgressDialog(gui,
|
||||||
finish_function,
|
finish_function,
|
||||||
init_label=_("Fetching metadata for stories..."),
|
init_label=_("Fetching metadata for stories..."),
|
||||||
win_title=_("Downloading metadata for stories"),
|
win_title=_("Downloading metadata for stories"),
|
||||||
status_prefix=_("Fetched metadata for")):
|
status_prefix=_("Fetched metadata for"),
|
||||||
|
disable_cancel=False):
|
||||||
ld = _LoopProgressDialog(gui,
|
ld = _LoopProgressDialog(gui,
|
||||||
book_list,
|
book_list,
|
||||||
foreach_function,
|
foreach_function,
|
||||||
init_label,
|
init_label,
|
||||||
win_title,
|
win_title,
|
||||||
status_prefix)
|
status_prefix,
|
||||||
|
disable_cancel)
|
||||||
|
|
||||||
# Mac OS X gets upset if the finish_function is called from inside
|
# Mac OS X gets upset if the finish_function is called from inside
|
||||||
# the real _LoopProgressDialog class.
|
# the real _LoopProgressDialog class.
|
||||||
|
|
@ -653,10 +708,12 @@ class _LoopProgressDialog(QProgressDialog):
|
||||||
foreach_function,
|
foreach_function,
|
||||||
init_label=_("Fetching metadata for stories..."),
|
init_label=_("Fetching metadata for stories..."),
|
||||||
win_title=_("Downloading metadata for stories"),
|
win_title=_("Downloading metadata for stories"),
|
||||||
status_prefix=_("Fetched metadata for")):
|
status_prefix=_("Fetched metadata for"),
|
||||||
|
disable_cancel=False):
|
||||||
QProgressDialog.__init__(self,
|
QProgressDialog.__init__(self,
|
||||||
init_label,
|
init_label,
|
||||||
_('Cancel'), 0, len(book_list), gui)
|
_('Cancel'), 0, len(book_list), gui)
|
||||||
|
self.gui = gui
|
||||||
self.setWindowTitle(win_title)
|
self.setWindowTitle(win_title)
|
||||||
self.setMinimumWidth(500)
|
self.setMinimumWidth(500)
|
||||||
self.book_list = book_list
|
self.book_list = book_list
|
||||||
|
|
@ -664,7 +721,6 @@ class _LoopProgressDialog(QProgressDialog):
|
||||||
self.status_prefix = status_prefix
|
self.status_prefix = status_prefix
|
||||||
self.i = 0
|
self.i = 0
|
||||||
self.start_time = datetime.now()
|
self.start_time = datetime.now()
|
||||||
self.first = True
|
|
||||||
|
|
||||||
# can't import at file load.
|
# can't import at file load.
|
||||||
from calibre_plugins.fanficfare_plugin.prefs import prefs
|
from calibre_plugins.fanficfare_plugin.prefs import prefs
|
||||||
|
|
@ -673,11 +729,27 @@ class _LoopProgressDialog(QProgressDialog):
|
||||||
self.setLabelText('%s %d / %d' % (self.status_prefix, self.i, len(self.book_list)))
|
self.setLabelText('%s %d / %d' % (self.status_prefix, self.i, len(self.book_list)))
|
||||||
self.setValue(self.i)
|
self.setValue(self.i)
|
||||||
|
|
||||||
|
if disable_cancel:
|
||||||
|
self.setCancelButton(None)
|
||||||
|
self.reject = self.disabled_reject
|
||||||
|
self.closeEvent = self.disabled_closeEvent
|
||||||
|
|
||||||
## self.do_loop does QTimer.singleShot on self.do_loop also.
|
## self.do_loop does QTimer.singleShot on self.do_loop also.
|
||||||
## A weird way to do a loop, but that was the example I had.
|
## A weird way to do a loop, but that was the example I had.
|
||||||
QTimer.singleShot(0, self.do_loop)
|
## 100 instead of 0 on the first go due to Win10(and later
|
||||||
|
## qt6) not displaying dialog properly.
|
||||||
|
QTimer.singleShot(100, self.do_loop)
|
||||||
self.exec_()
|
self.exec_()
|
||||||
|
|
||||||
|
# used when disable_cancel = True
|
||||||
|
def disabled_reject(self):
|
||||||
|
pass
|
||||||
|
|
||||||
|
# used when disable_cancel = True
|
||||||
|
def disabled_closeEvent(self, event):
|
||||||
|
if event.spontaneous():
|
||||||
|
event.ignore()
|
||||||
|
|
||||||
def updateStatus(self):
|
def updateStatus(self):
|
||||||
remaining_time_string = ''
|
remaining_time_string = ''
|
||||||
if self.show_est_time and self.i > -1:
|
if self.show_est_time and self.i > -1:
|
||||||
|
|
@ -691,15 +763,6 @@ class _LoopProgressDialog(QProgressDialog):
|
||||||
|
|
||||||
def do_loop(self):
|
def do_loop(self):
|
||||||
|
|
||||||
if self.first:
|
|
||||||
## Windows 10 doesn't want to show the prog dialog content
|
|
||||||
## until after the timer's been called again. Something to
|
|
||||||
## do with cooperative multi threading maybe?
|
|
||||||
## So this just trips the timer loop an extra time at the start.
|
|
||||||
self.first = False
|
|
||||||
QTimer.singleShot(0, self.do_loop)
|
|
||||||
return
|
|
||||||
|
|
||||||
book = self.book_list[self.i]
|
book = self.book_list[self.i]
|
||||||
try:
|
try:
|
||||||
## collision spec passed into getadapter by partial from fff_plugin
|
## collision spec passed into getadapter by partial from fff_plugin
|
||||||
|
|
@ -898,11 +961,6 @@ class UpdateExistingDialog(SizePersistedDialog):
|
||||||
self.updatemeta.setChecked(self.prefs['updatemeta'])
|
self.updatemeta.setChecked(self.prefs['updatemeta'])
|
||||||
horz.addWidget(self.updatemeta)
|
horz.addWidget(self.updatemeta)
|
||||||
|
|
||||||
self.updateepubcover = QCheckBox(_('Update EPUB Cover?'),self)
|
|
||||||
self.updateepubcover.setToolTip(_('Update book cover image from site or defaults (if found) <i>inside</i> the EPUB when EPUB is updated.'))
|
|
||||||
self.updateepubcover.setChecked(self.prefs['updateepubcover'])
|
|
||||||
horz.addWidget(self.updateepubcover)
|
|
||||||
|
|
||||||
self.bgmeta = QCheckBox(_('Background Metadata?'),self)
|
self.bgmeta = QCheckBox(_('Background Metadata?'),self)
|
||||||
self.bgmeta.setToolTip(_("Collect Metadata from sites in a Background process.<br />This returns control to you quicker while updating, but you won't be asked for username/passwords or if you are an adult--stories that need those will just fail."))
|
self.bgmeta.setToolTip(_("Collect Metadata from sites in a Background process.<br />This returns control to you quicker while updating, but you won't be asked for username/passwords or if you are an adult--stories that need those will just fail."))
|
||||||
self.bgmeta.setChecked(self.prefs['bgmeta'])
|
self.bgmeta.setChecked(self.prefs['bgmeta'])
|
||||||
|
|
@ -954,7 +1012,6 @@ class UpdateExistingDialog(SizePersistedDialog):
|
||||||
'collision': unicode(self.collision.currentText()),
|
'collision': unicode(self.collision.currentText()),
|
||||||
'updatemeta': self.updatemeta.isChecked(),
|
'updatemeta': self.updatemeta.isChecked(),
|
||||||
'bgmeta': self.bgmeta.isChecked(),
|
'bgmeta': self.bgmeta.isChecked(),
|
||||||
'updateepubcover': self.updateepubcover.isChecked(),
|
|
||||||
'smarten_punctuation':self.prefs['smarten_punctuation'],
|
'smarten_punctuation':self.prefs['smarten_punctuation'],
|
||||||
'do_wordcount':self.prefs['do_wordcount'],
|
'do_wordcount':self.prefs['do_wordcount'],
|
||||||
}
|
}
|
||||||
|
|
@ -1027,7 +1084,7 @@ class StoryListTableWidget(QTableWidget):
|
||||||
books = []
|
books = []
|
||||||
#print("=========================\nbooks:%s"%self.books)
|
#print("=========================\nbooks:%s"%self.books)
|
||||||
for row in range(self.rowCount()):
|
for row in range(self.rowCount()):
|
||||||
rnum = convert_qvariant(self.item(row, 1).data(Qt.UserRole))
|
rnum = self.item(row, 1).data(Qt.UserRole)
|
||||||
book = self.books[rnum]
|
book = self.books[rnum]
|
||||||
books.append(book)
|
books.append(book)
|
||||||
return books
|
return books
|
||||||
|
|
@ -1035,6 +1092,7 @@ class StoryListTableWidget(QTableWidget):
|
||||||
def remove_selected_rows(self):
|
def remove_selected_rows(self):
|
||||||
self.setFocus()
|
self.setFocus()
|
||||||
rows = self.selectionModel().selectedRows()
|
rows = self.selectionModel().selectedRows()
|
||||||
|
rows = sorted(rows, key=lambda x: x.row(), reverse=True)
|
||||||
if len(rows) == 0:
|
if len(rows) == 0:
|
||||||
return
|
return
|
||||||
message = '<p>'+_('Are you sure you want to remove this book from the list?')
|
message = '<p>'+_('Are you sure you want to remove this book from the list?')
|
||||||
|
|
@ -1043,7 +1101,7 @@ class StoryListTableWidget(QTableWidget):
|
||||||
if not confirm(message,'fff_delete_item', self):
|
if not confirm(message,'fff_delete_item', self):
|
||||||
return
|
return
|
||||||
first_sel_row = self.currentRow()
|
first_sel_row = self.currentRow()
|
||||||
for selrow in reversed(rows):
|
for selrow in rows:
|
||||||
self.removeRow(selrow.row())
|
self.removeRow(selrow.row())
|
||||||
if first_sel_row < self.rowCount():
|
if first_sel_row < self.rowCount():
|
||||||
self.select_and_scroll_to_row(first_sel_row)
|
self.select_and_scroll_to_row(first_sel_row)
|
||||||
|
|
@ -1054,6 +1112,19 @@ class StoryListTableWidget(QTableWidget):
|
||||||
self.selectRow(row)
|
self.selectRow(row)
|
||||||
self.scrollToItem(self.currentItem())
|
self.scrollToItem(self.currentItem())
|
||||||
|
|
||||||
|
## Added to allow sorting by Notes column
|
||||||
|
class NotesWidgetItem(QTableWidgetItem):
|
||||||
|
def __init__(self,content):
|
||||||
|
QTableWidgetItem.__init__(self)
|
||||||
|
self.content=content
|
||||||
|
|
||||||
|
def currentText(self):
|
||||||
|
return self.content.currentText()
|
||||||
|
|
||||||
|
def __lt__(self, other):
|
||||||
|
return (unicode(self.currentText()).lower().strip() <
|
||||||
|
unicode(other.currentText()).lower().strip())
|
||||||
|
|
||||||
class RejectListTableWidget(QTableWidget):
|
class RejectListTableWidget(QTableWidget):
|
||||||
|
|
||||||
def __init__(self, parent,rejectreasons=[]):
|
def __init__(self, parent,rejectreasons=[]):
|
||||||
|
|
@ -1098,6 +1169,7 @@ class RejectListTableWidget(QTableWidget):
|
||||||
self.setItem(row, 1, EditableTableWidgetItem(rej.title))
|
self.setItem(row, 1, EditableTableWidgetItem(rej.title))
|
||||||
self.setItem(row, 2, EditableTableWidgetItem(rej.auth))
|
self.setItem(row, 2, EditableTableWidgetItem(rej.auth))
|
||||||
|
|
||||||
|
# sort_func orders dropdown-constant to preserve user order.
|
||||||
note_cell = EditWithComplete(self,sort_func=lambda x:1)
|
note_cell = EditWithComplete(self,sort_func=lambda x:1)
|
||||||
|
|
||||||
items = [rej.note]+self.rejectreasons
|
items = [rej.note]+self.rejectreasons
|
||||||
|
|
@ -1105,12 +1177,14 @@ class RejectListTableWidget(QTableWidget):
|
||||||
note_cell.show_initial_value(rej.note)
|
note_cell.show_initial_value(rej.note)
|
||||||
note_cell.set_separator(None)
|
note_cell.set_separator(None)
|
||||||
note_cell.setToolTip(_('Select or Edit Reject Note.'))
|
note_cell.setToolTip(_('Select or Edit Reject Note.'))
|
||||||
|
self.setItem(row, 3, NotesWidgetItem(note_cell))
|
||||||
self.setCellWidget(row, 3, note_cell)
|
self.setCellWidget(row, 3, note_cell)
|
||||||
note_cell.setCursorPosition(0)
|
note_cell.setCursorPosition(0)
|
||||||
|
|
||||||
def remove_selected_rows(self):
|
def remove_selected_rows(self):
|
||||||
self.setFocus()
|
self.setFocus()
|
||||||
rows = self.selectionModel().selectedRows()
|
rows = self.selectionModel().selectedRows()
|
||||||
|
rows = sorted(rows, key=lambda x: x.row(), reverse=True)
|
||||||
if len(rows) == 0:
|
if len(rows) == 0:
|
||||||
return
|
return
|
||||||
message = '<p>'+_('Are you sure you want to remove this URL from the list?')
|
message = '<p>'+_('Are you sure you want to remove this URL from the list?')
|
||||||
|
|
@ -1119,7 +1193,7 @@ class RejectListTableWidget(QTableWidget):
|
||||||
if not confirm(message,'fff_rejectlist_delete_item_again', self):
|
if not confirm(message,'fff_rejectlist_delete_item_again', self):
|
||||||
return
|
return
|
||||||
first_sel_row = self.currentRow()
|
first_sel_row = self.currentRow()
|
||||||
for selrow in reversed(rows):
|
for selrow in rows:
|
||||||
self.removeRow(selrow.row())
|
self.removeRow(selrow.row())
|
||||||
if first_sel_row < self.rowCount():
|
if first_sel_row < self.rowCount():
|
||||||
self.select_and_scroll_to_row(first_sel_row)
|
self.select_and_scroll_to_row(first_sel_row)
|
||||||
|
|
@ -1215,7 +1289,7 @@ class RejectListDialog(SizePersistedDialog):
|
||||||
rejectrows = []
|
rejectrows = []
|
||||||
for row in range(self.rejects_table.rowCount()):
|
for row in range(self.rejects_table.rowCount()):
|
||||||
url = unicode(self.rejects_table.item(row, 0).text()).strip()
|
url = unicode(self.rejects_table.item(row, 0).text()).strip()
|
||||||
book_id =convert_qvariant(self.rejects_table.item(row, 0).data(Qt.UserRole))
|
book_id =self.rejects_table.item(row, 0).data(Qt.UserRole)
|
||||||
title = unicode(self.rejects_table.item(row, 1).text()).strip()
|
title = unicode(self.rejects_table.item(row, 1).text()).strip()
|
||||||
auth = unicode(self.rejects_table.item(row, 2).text()).strip()
|
auth = unicode(self.rejects_table.item(row, 2).text()).strip()
|
||||||
note = unicode(self.rejects_table.cellWidget(row, 3).currentText()).strip()
|
note = unicode(self.rejects_table.cellWidget(row, 3).currentText()).strip()
|
||||||
|
|
@ -1225,7 +1299,7 @@ class RejectListDialog(SizePersistedDialog):
|
||||||
def get_reject_list_ids(self):
|
def get_reject_list_ids(self):
|
||||||
rejectrows = []
|
rejectrows = []
|
||||||
for row in range(self.rejects_table.rowCount()):
|
for row in range(self.rejects_table.rowCount()):
|
||||||
book_id = convert_qvariant(self.rejects_table.item(row, 0).data(Qt.UserRole))
|
book_id = self.rejects_table.item(row, 0).data(Qt.UserRole)
|
||||||
if book_id:
|
if book_id:
|
||||||
rejectrows.append(book_id)
|
rejectrows.append(book_id)
|
||||||
return rejectrows
|
return rejectrows
|
||||||
|
|
@ -1246,6 +1320,7 @@ class EditTextDialog(SizePersistedDialog):
|
||||||
icon=None, title=None, label=None, tooltip=None,
|
icon=None, title=None, label=None, tooltip=None,
|
||||||
read_only=False,
|
read_only=False,
|
||||||
rejectreasons=[],reasonslabel=None,
|
rejectreasons=[],reasonslabel=None,
|
||||||
|
accept_storyurls=False,
|
||||||
save_size_name='fff:edit text dialog',
|
save_size_name='fff:edit text dialog',
|
||||||
):
|
):
|
||||||
SizePersistedDialog.__init__(self, parent, save_size_name)
|
SizePersistedDialog.__init__(self, parent, save_size_name)
|
||||||
|
|
@ -1259,8 +1334,11 @@ class EditTextDialog(SizePersistedDialog):
|
||||||
self.setWindowIcon(icon)
|
self.setWindowIcon(icon)
|
||||||
self.l.addWidget(self.label)
|
self.l.addWidget(self.label)
|
||||||
|
|
||||||
self.textedit = QTextEdit(self)
|
if accept_storyurls:
|
||||||
self.textedit.setLineWrapMode(QTextEdit.NoWrap)
|
self.textedit = DroppableQTextEdit(self)
|
||||||
|
else:
|
||||||
|
self.textedit = QTextEdit(self)
|
||||||
|
self.textedit.setLineWrapMode(QTextEditNoWrap)
|
||||||
self.textedit.setReadOnly(read_only)
|
self.textedit.setReadOnly(read_only)
|
||||||
self.textedit.setText(text)
|
self.textedit.setText(text)
|
||||||
self.l.addWidget(self.textedit)
|
self.l.addWidget(self.textedit)
|
||||||
|
|
@ -1303,7 +1381,18 @@ class EditTextDialog(SizePersistedDialog):
|
||||||
def get_reason_text(self):
|
def get_reason_text(self):
|
||||||
return unicode(self.reason_edit.currentText()).strip()
|
return unicode(self.reason_edit.currentText()).strip()
|
||||||
|
|
||||||
class IniTextDialog(SizePersistedDialog):
|
class QTextEditPlainPaste(QTextEdit):
|
||||||
|
def insertFromMimeData(self, mimeData):
|
||||||
|
# logger.debug("insertFromMimeData called")
|
||||||
|
#Ensure it is text.
|
||||||
|
if (mimeData.hasText()):
|
||||||
|
text = mimeData.text()
|
||||||
|
self.insertPlainText(text)
|
||||||
|
#In case not text.
|
||||||
|
else:
|
||||||
|
QTextEdit.insertFromMimeData(self, mimeData)
|
||||||
|
|
||||||
|
class IniTextDialog(HotKeyedSizePersistedDialog):
|
||||||
|
|
||||||
def __init__(self, parent, text,
|
def __init__(self, parent, text,
|
||||||
icon=None, title=None, label=None,
|
icon=None, title=None, label=None,
|
||||||
|
|
@ -1311,9 +1400,7 @@ class IniTextDialog(SizePersistedDialog):
|
||||||
read_only=False,
|
read_only=False,
|
||||||
save_size_name='fff:ini text dialog',
|
save_size_name='fff:ini text dialog',
|
||||||
):
|
):
|
||||||
SizePersistedDialog.__init__(self, parent, save_size_name)
|
super(IniTextDialog,self).__init__(parent, save_size_name)
|
||||||
|
|
||||||
self.keys=dict()
|
|
||||||
|
|
||||||
self.l = QVBoxLayout()
|
self.l = QVBoxLayout()
|
||||||
self.setLayout(self.l)
|
self.setLayout(self.l)
|
||||||
|
|
@ -1324,7 +1411,7 @@ class IniTextDialog(SizePersistedDialog):
|
||||||
self.setWindowIcon(icon)
|
self.setWindowIcon(icon)
|
||||||
self.l.addWidget(self.label)
|
self.l.addWidget(self.label)
|
||||||
|
|
||||||
self.textedit = QTextEdit(self)
|
self.textedit = QTextEditPlainPaste(self)
|
||||||
|
|
||||||
highlighter = IniHighlighter(self.textedit,
|
highlighter = IniHighlighter(self.textedit,
|
||||||
sections=get_valid_sections(),
|
sections=get_valid_sections(),
|
||||||
|
|
@ -1333,7 +1420,7 @@ class IniTextDialog(SizePersistedDialog):
|
||||||
entry_keywords=get_valid_entry_keywords(),
|
entry_keywords=get_valid_entry_keywords(),
|
||||||
)
|
)
|
||||||
|
|
||||||
self.textedit.setLineWrapMode(QTextEdit.NoWrap)
|
self.textedit.setLineWrapMode(QTextEditNoWrap)
|
||||||
try:
|
try:
|
||||||
self.textedit.setFont(QFont("Courier",
|
self.textedit.setFont(QFont("Courier",
|
||||||
parent.font().pointSize()+1))
|
parent.font().pointSize()+1))
|
||||||
|
|
@ -1380,6 +1467,8 @@ class IniTextDialog(SizePersistedDialog):
|
||||||
self.addCtrlKeyPress(QtCore.Qt.Key_F,self.findFocus)
|
self.addCtrlKeyPress(QtCore.Qt.Key_F,self.findFocus)
|
||||||
self.addCtrlKeyPress(QtCore.Qt.Key_G,self.find)
|
self.addCtrlKeyPress(QtCore.Qt.Key_G,self.find)
|
||||||
|
|
||||||
|
self.addCtrlKeyPress(QtCore.Qt.Key_Return,self.accept)
|
||||||
|
self.addCtrlKeyPress(QtCore.Qt.Key_Enter,self.accept) # num pad
|
||||||
button_box = QDialogButtonBox(QDialogButtonBox.Ok | QDialogButtonBox.Cancel)
|
button_box = QDialogButtonBox(QDialogButtonBox.Ok | QDialogButtonBox.Cancel)
|
||||||
button_box.accepted.connect(self.accept)
|
button_box.accepted.connect(self.accept)
|
||||||
button_box.rejected.connect(self.reject)
|
button_box.rejected.connect(self.reject)
|
||||||
|
|
@ -1412,19 +1501,6 @@ class IniTextDialog(SizePersistedDialog):
|
||||||
# print("call parent accept")
|
# print("call parent accept")
|
||||||
return SizePersistedDialog.accept(self)
|
return SizePersistedDialog.accept(self)
|
||||||
|
|
||||||
def addCtrlKeyPress(self,key,func):
|
|
||||||
# print("addKeyPress: key(0x%x)"%key)
|
|
||||||
# print("control: 0x%x"%QtCore.Qt.ControlModifier)
|
|
||||||
self.keys[key]=func
|
|
||||||
|
|
||||||
def keyPressEvent(self, event):
|
|
||||||
# print("event: key(0x%x) modifiers(0x%x)"%(event.key(),event.modifiers()))
|
|
||||||
if (event.modifiers() & QtCore.Qt.ControlModifier) and event.key() in self.keys:
|
|
||||||
func = self.keys[event.key()]
|
|
||||||
return func()
|
|
||||||
else:
|
|
||||||
return SizePersistedDialog.keyPressEvent(self, event)
|
|
||||||
|
|
||||||
def get_plain_text(self):
|
def get_plain_text(self):
|
||||||
return unicode(self.textedit.toPlainText())
|
return unicode(self.textedit.toPlainText())
|
||||||
|
|
||||||
|
|
@ -1458,7 +1534,7 @@ class IniTextDialog(SizePersistedDialog):
|
||||||
else:
|
else:
|
||||||
# Make the next search start from the begining again
|
# Make the next search start from the begining again
|
||||||
self.lastStart = 0
|
self.lastStart = 0
|
||||||
self.textedit.moveCursor(self.textedit.textCursor().Start)
|
self.textedit.moveCursor(MoveOperations.Start)
|
||||||
|
|
||||||
def moveCursor(self,start,end):
|
def moveCursor(self,start,end):
|
||||||
|
|
||||||
|
|
@ -1470,7 +1546,8 @@ class IniTextDialog(SizePersistedDialog):
|
||||||
|
|
||||||
# Next we move the Cursor by over the match and pass the KeepAnchor parameter
|
# Next we move the Cursor by over the match and pass the KeepAnchor parameter
|
||||||
# which will make the cursor select the match's text
|
# which will make the cursor select the match's text
|
||||||
cursor.movePosition(cursor.Right,cursor.KeepAnchor,end - start)
|
cursor.movePosition(MoveOperations.Right,
|
||||||
|
MoveMode.KeepAnchor,end - start)
|
||||||
|
|
||||||
# And finally we set this new cursor as the parent's
|
# And finally we set this new cursor as the parent's
|
||||||
self.textedit.setTextCursor(cursor)
|
self.textedit.setTextCursor(cursor)
|
||||||
|
|
@ -1484,15 +1561,14 @@ class IniTextDialog(SizePersistedDialog):
|
||||||
cursor.setPosition(0)
|
cursor.setPosition(0)
|
||||||
|
|
||||||
# Next we move the Cursor down lineno times
|
# Next we move the Cursor down lineno times
|
||||||
cursor.movePosition(cursor.Down,cursor.MoveAnchor,lineno-1)
|
cursor.movePosition(MoveOperations.Down,MoveMode.MoveAnchor,lineno-1)
|
||||||
|
|
||||||
# Next we move the Cursor to the end of the line
|
# Next we move the Cursor to the end of the line
|
||||||
cursor.movePosition(cursor.EndOfLine,cursor.KeepAnchor,1)
|
cursor.movePosition(MoveOperations.EndOfLine,MoveMode.KeepAnchor,1)
|
||||||
|
|
||||||
# And finally we set this new cursor as the parent's
|
# And finally we set this new cursor as the parent's
|
||||||
self.textedit.setTextCursor(cursor)
|
self.textedit.setTextCursor(cursor)
|
||||||
|
|
||||||
|
|
||||||
class ViewLog(SizePersistedDialog):
|
class ViewLog(SizePersistedDialog):
|
||||||
|
|
||||||
def label_clicked(self, event, lineno=None):
|
def label_clicked(self, event, lineno=None):
|
||||||
|
|
@ -1581,28 +1657,30 @@ class EmailPassDialog(QDialog):
|
||||||
QDialog.__init__(self, gui)
|
QDialog.__init__(self, gui)
|
||||||
self.status=False
|
self.status=False
|
||||||
|
|
||||||
self.l = QGridLayout()
|
self.l = QVBoxLayout()
|
||||||
self.setLayout(self.l)
|
self.setLayout(self.l)
|
||||||
|
|
||||||
self.setWindowTitle(_('Password'))
|
grid = QGridLayout()
|
||||||
self.l.addWidget(QLabel(_("Enter Email Password for %s:")%user),0,0,1,2)
|
self.l.addLayout(grid)
|
||||||
|
|
||||||
# self.l.addWidget(QLabel(_("Password:")),1,0)
|
self.setWindowTitle(_('Password'))
|
||||||
|
grid.addWidget(QLabel(_("Enter Email Password for %s:")%user),0,0,1,2)
|
||||||
|
|
||||||
|
# grid.addWidget(QLabel(_("Password:")),1,0)
|
||||||
self.passwd = QLineEdit(self)
|
self.passwd = QLineEdit(self)
|
||||||
self.passwd.setEchoMode(QLineEdit.Password)
|
self.passwd.setEchoMode(QLineEdit.Password)
|
||||||
self.l.addWidget(self.passwd,1,0,1,2)
|
grid.addWidget(self.passwd,1,0,1,2)
|
||||||
|
|
||||||
|
horz = QHBoxLayout()
|
||||||
|
self.l.addLayout(horz)
|
||||||
|
|
||||||
self.ok_button = QPushButton(_('OK'), self)
|
self.ok_button = QPushButton(_('OK'), self)
|
||||||
self.ok_button.clicked.connect(self.ok)
|
self.ok_button.clicked.connect(self.ok)
|
||||||
self.l.addWidget(self.ok_button,2,0)
|
horz.addWidget(self.ok_button)
|
||||||
|
|
||||||
self.cancel_button = QPushButton(_('Cancel'), self)
|
self.cancel_button = QPushButton(_('Cancel'), self)
|
||||||
self.cancel_button.clicked.connect(self.cancel)
|
self.cancel_button.clicked.connect(self.cancel)
|
||||||
self.l.addWidget(self.cancel_button,2,1)
|
horz.addWidget(self.cancel_button)
|
||||||
|
|
||||||
# set stretch factors the same.
|
|
||||||
self.l.setColumnStretch(0,1)
|
|
||||||
self.l.setColumnStretch(1,1)
|
|
||||||
|
|
||||||
self.resize(self.sizeHint())
|
self.resize(self.sizeHint())
|
||||||
|
|
||||||
|
|
|
||||||
File diff suppressed because it is too large
Load diff
|
|
@ -7,7 +7,6 @@ __license__ = 'GPL v3'
|
||||||
__copyright__ = '2020, Jim Miller'
|
__copyright__ = '2020, Jim Miller'
|
||||||
__docformat__ = 'restructuredtext en'
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
import collections
|
|
||||||
from functools import reduce
|
from functools import reduce
|
||||||
|
|
||||||
from io import StringIO
|
from io import StringIO
|
||||||
|
|
@ -20,6 +19,7 @@ from fanficfare.configurable import Configuration
|
||||||
from calibre_plugins.fanficfare_plugin.prefs import prefs
|
from calibre_plugins.fanficfare_plugin.prefs import prefs
|
||||||
from fanficfare.six import ensure_text
|
from fanficfare.six import ensure_text
|
||||||
from fanficfare.six.moves import configparser
|
from fanficfare.six.moves import configparser
|
||||||
|
from fanficfare.six.moves import collections_abc
|
||||||
|
|
||||||
def get_fff_personalini():
|
def get_fff_personalini():
|
||||||
return prefs['personal.ini']
|
return prefs['personal.ini']
|
||||||
|
|
@ -33,8 +33,8 @@ def get_fff_config(url,fileform="epub",personalini=None):
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.debug("Failed trying to get ini config for url(%s): %s, using section %s instead"%(url,e,sections))
|
logger.debug("Failed trying to get ini config for url(%s): %s, using section %s instead"%(url,e,sections))
|
||||||
configuration = Configuration(sections,fileform)
|
configuration = Configuration(sections,fileform)
|
||||||
configuration.readfp(StringIO(ensure_text(get_resources("plugin-defaults.ini"))))
|
configuration.read_file(StringIO(ensure_text(get_resources("plugin-defaults.ini"))))
|
||||||
configuration.readfp(StringIO(ensure_text(personalini)))
|
configuration.read_file(StringIO(ensure_text(personalini)))
|
||||||
|
|
||||||
return configuration
|
return configuration
|
||||||
|
|
||||||
|
|
@ -52,7 +52,7 @@ def test_config(initext):
|
||||||
return errors
|
return errors
|
||||||
|
|
||||||
|
|
||||||
class OrderedSet(collections.MutableSet):
|
class OrderedSet(collections_abc.MutableSet):
|
||||||
|
|
||||||
def __init__(self, iterable=None):
|
def __init__(self, iterable=None):
|
||||||
self.end = end = []
|
self.end = end = []
|
||||||
|
|
|
||||||
Binary file not shown.
|
Before Width: | Height: | Size: 24 KiB |
|
|
@ -12,10 +12,17 @@ import re
|
||||||
import logging
|
import logging
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
from PyQt5.Qt import (QApplication, Qt, QColor, QSyntaxHighlighter,
|
||||||
|
QTextCharFormat, QBrush, QFont)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
from PyQt5.Qt import (QApplication, Qt, QColor, QSyntaxHighlighter, QTextCharFormat, QBrush, QFont)
|
# qt6 Calibre v6+
|
||||||
except ImportError as e:
|
QFontNormal = QFont.Weight.Normal
|
||||||
from PyQt4.Qt import (QApplication, Qt, QColor, QSyntaxHighlighter, QTextCharFormat, QBrush, QFont)
|
QFontBold = QFont.Weight.Bold
|
||||||
|
except:
|
||||||
|
# qt5 Calibre v2-5
|
||||||
|
QFontNormal = QFont.Normal
|
||||||
|
QFontBold = QFont.Bold
|
||||||
|
|
||||||
from fanficfare.six import string_types
|
from fanficfare.six import string_types
|
||||||
|
|
||||||
|
|
@ -51,7 +58,7 @@ class IniHighlighter(QSyntaxHighlighter):
|
||||||
'knownkeywords':QColor(Qt.blue).lighter(150),
|
'knownkeywords':QColor(Qt.blue).lighter(150),
|
||||||
'knownsections':Qt.darkCyan,
|
'knownsections':Qt.darkCyan,
|
||||||
'teststories':Qt.cyan,
|
'teststories':Qt.cyan,
|
||||||
'storyUrls':Qt.magenta,
|
'storyUrls':QColor(Qt.magenta).lighter(150),
|
||||||
'comments':Qt.yellow
|
'comments':Qt.yellow
|
||||||
}
|
}
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
|
@ -83,20 +90,21 @@ class IniHighlighter(QSyntaxHighlighter):
|
||||||
self.highlightingRules.append( HighlightingRule( r"^(add_to_)?"+rekeywords+r"(_filelist)?\s*[:=]", colors['knownkeywords'] ) )
|
self.highlightingRules.append( HighlightingRule( r"^(add_to_)?"+rekeywords+r"(_filelist)?\s*[:=]", colors['knownkeywords'] ) )
|
||||||
|
|
||||||
# *all* sections -- change known later.
|
# *all* sections -- change known later.
|
||||||
self.highlightingRules.append( HighlightingRule( r"^\[[^\]]+\].*?$", colors['errors'], QFont.Bold, blocknum=1 ) )
|
self.highlightingRules.append( HighlightingRule( r"^\[[^\]]+\].*?$", colors['errors'], QFontBold, blocknum=1 ) )
|
||||||
|
|
||||||
if sections:
|
if sections:
|
||||||
# *known* sections
|
# *known* sections
|
||||||
resections = r'('+(r'|'.join(sections))+r')'
|
resections = r'('+(r'|'.join(sections))+r')'
|
||||||
resections = resections.replace('.','\.') #escape dots.
|
resections = resections.replace('.',r'\.') #escape dots.
|
||||||
self.highlightingRules.append( HighlightingRule( r"^\["+resections+r"\]\s*$", colors['knownsections'], QFont.Bold, blocknum=2 ) )
|
self.highlightingRules.append( HighlightingRule( r"^\["+resections+r"\]\s*$", colors['knownsections'], QFontBold, blocknum=2 ) )
|
||||||
|
|
||||||
# test story sections
|
# test story sections
|
||||||
self.teststoryRule = HighlightingRule( r"^\[teststory:([0-9]+|defaults)\]", colors['teststories'], blocknum=3 )
|
self.teststoryRule = HighlightingRule( r"^\[teststory:([0-9]+|defaults)\]", colors['teststories'], blocknum=3 )
|
||||||
self.highlightingRules.append( self.teststoryRule )
|
self.highlightingRules.append( self.teststoryRule )
|
||||||
|
|
||||||
# storyUrl sections
|
# storyUrl sections
|
||||||
self.storyUrlRule = HighlightingRule( r"^\[https?://.*\]", colors['storyUrls'], blocknum=4 )
|
# StoryUrls are *not* checked beyond looking for https?://
|
||||||
|
self.storyUrlRule = HighlightingRule( r"^\[https?://.*\]", colors['storyUrls'], QFontBold, blocknum=2 )
|
||||||
self.highlightingRules.append( self.storyUrlRule )
|
self.highlightingRules.append( self.storyUrlRule )
|
||||||
|
|
||||||
# NOT comments -- but can be custom columns, so don't flag.
|
# NOT comments -- but can be custom columns, so don't flag.
|
||||||
|
|
@ -127,15 +135,16 @@ class IniHighlighter(QSyntaxHighlighter):
|
||||||
if blocknum == 3:
|
if blocknum == 3:
|
||||||
self.setFormat( 0, len(text), self.teststoryRule.highlight )
|
self.setFormat( 0, len(text), self.teststoryRule.highlight )
|
||||||
|
|
||||||
# storyUrl section rules:
|
## changed storyUrl section to also be blocknum=1 April 2023
|
||||||
if blocknum == 4:
|
## storyUrl section rules:
|
||||||
self.setFormat( 0, len(text), self.storyUrlRule.highlight )
|
# if blocknum == 4:
|
||||||
|
# self.setFormat( 0, len(text), self.storyUrlRule.highlight )
|
||||||
|
|
||||||
self.setCurrentBlockState( blocknum )
|
self.setCurrentBlockState( blocknum )
|
||||||
|
|
||||||
class HighlightingRule():
|
class HighlightingRule():
|
||||||
def __init__( self, pattern, color,
|
def __init__( self, pattern, color,
|
||||||
weight=QFont.Normal,
|
weight=QFontNormal,
|
||||||
style=Qt.SolidPattern,
|
style=Qt.SolidPattern,
|
||||||
blocknum=0):
|
blocknum=0):
|
||||||
if isinstance(pattern, string_types):
|
if isinstance(pattern, string_types):
|
||||||
|
|
|
||||||
|
|
@ -2,7 +2,6 @@
|
||||||
|
|
||||||
from __future__ import (unicode_literals, division, absolute_import,
|
from __future__ import (unicode_literals, division, absolute_import,
|
||||||
print_function)
|
print_function)
|
||||||
import six
|
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2020, Jim Miller, 2011, Grant Drake <grant.drake@gmail.com>'
|
__copyright__ = '2020, Jim Miller, 2011, Grant Drake <grant.drake@gmail.com>'
|
||||||
|
|
@ -15,10 +14,8 @@ from time import sleep
|
||||||
from datetime import time
|
from datetime import time
|
||||||
from io import StringIO
|
from io import StringIO
|
||||||
from collections import defaultdict
|
from collections import defaultdict
|
||||||
|
import sys
|
||||||
|
|
||||||
from calibre.utils.ipc.server import Empty, Server
|
|
||||||
from calibre.utils.ipc.job import ParallelJob
|
|
||||||
from calibre.constants import numeric_version as calibre_version
|
|
||||||
from calibre.utils.date import local_tz
|
from calibre.utils.date import local_tz
|
||||||
|
|
||||||
# pulls in translation files for _() strings
|
# pulls in translation files for _() strings
|
||||||
|
|
@ -33,160 +30,100 @@ except NameError:
|
||||||
#
|
#
|
||||||
# ------------------------------------------------------------------------------
|
# ------------------------------------------------------------------------------
|
||||||
|
|
||||||
def do_download_worker(book_list,
|
def do_download_worker_single(site,
|
||||||
options,
|
book_list,
|
||||||
cpus,
|
options,
|
||||||
merge=False,
|
merge,
|
||||||
notification=lambda x,y:x):
|
notification=lambda x,y:x):
|
||||||
'''
|
|
||||||
Coordinator job, to launch child jobs to do downloads.
|
|
||||||
This is run as a worker job in the background to keep the UI more
|
|
||||||
responsive and get around any memory leak issues as it will launch
|
|
||||||
a child job for each book as a worker process
|
|
||||||
'''
|
|
||||||
## Now running one BG proc per site, which downloads for the same
|
|
||||||
## site in serial.
|
|
||||||
logger.info("CPUs:%s"%cpus)
|
|
||||||
server = Server(pool_size=cpus)
|
|
||||||
|
|
||||||
logger.info(options['version'])
|
logger.info(options['version'])
|
||||||
|
|
||||||
sites_lists = defaultdict(list)
|
## same info debug calibre prints out at startup. For when users
|
||||||
[ sites_lists[x['site']].append(x) for x in book_list if x['good'] ]
|
## give me job output instead of debug log.
|
||||||
|
from calibre.debug import print_basic_debug_info
|
||||||
|
print_basic_debug_info(sys.stderr)
|
||||||
|
|
||||||
totals = {}
|
|
||||||
# can't do direct assignment in list comprehension? I'm sure it
|
|
||||||
# makes sense to some pythonista.
|
|
||||||
# [ totals[x['url']]=0.0 for x in book_list if x['good'] ]
|
|
||||||
[ totals.update({x['url']:0.0}) for x in book_list if x['good'] ]
|
|
||||||
# logger.debug(sites_lists.keys())
|
|
||||||
|
|
||||||
# Queue all the jobs
|
|
||||||
jobs_running = 0
|
|
||||||
for site in sites_lists.keys():
|
|
||||||
site_list = sites_lists[site]
|
|
||||||
logger.info(_("Launch background process for site %s:")%site + "\n" +
|
|
||||||
"\n".join([ x['url'] for x in site_list ]))
|
|
||||||
# logger.debug([ x['url'] for x in site_list])
|
|
||||||
args = ['calibre_plugins.fanficfare_plugin.jobs',
|
|
||||||
'do_download_site',
|
|
||||||
(site,site_list,options,merge)]
|
|
||||||
job = ParallelJob('arbitrary_n',
|
|
||||||
"site:(%s)"%site,
|
|
||||||
done=None,
|
|
||||||
args=args)
|
|
||||||
job._site_list = site_list
|
|
||||||
job._processed = False
|
|
||||||
server.add_job(job)
|
|
||||||
jobs_running += 1
|
|
||||||
|
|
||||||
# This server is an arbitrary_n job, so there is a notifier available.
|
|
||||||
# Set the % complete to a small number to avoid the 'unavailable' indicator
|
|
||||||
notification(0.01, _('Downloading FanFiction Stories'))
|
notification(0.01, _('Downloading FanFiction Stories'))
|
||||||
|
from calibre_plugins.fanficfare_plugin import FanFicFareBase
|
||||||
|
fffbase = FanFicFareBase(options['plugin_path'])
|
||||||
|
with fffbase: # so the sys.path was modified while loading the
|
||||||
|
# plug impl.
|
||||||
|
from fanficfare.fff_profile import do_cprofile
|
||||||
|
|
||||||
# dequeue the job results as they arrive, saving the results
|
## extra function just so I can easily use the same
|
||||||
count = 0
|
## @do_cprofile decorator
|
||||||
while True:
|
@do_cprofile
|
||||||
job = server.changed_jobs_queue.get()
|
def profiled_func():
|
||||||
# logger.debug("job get job._processed:%s"%job._processed)
|
count = 0
|
||||||
# A job can 'change' when it is not finished, for example if it
|
totals = {}
|
||||||
# produces a notification.
|
# can't do direct assignment in list comprehension? I'm sure it
|
||||||
msg = None
|
# makes sense to some pythonista.
|
||||||
try:
|
# [ totals[x['url']]=0.0 for x in book_list if x['good'] ]
|
||||||
## msg = book['url']
|
[ totals.update({x['url']:0.0}) for x in book_list if x['good'] ]
|
||||||
(percent,msg) = job.notifications.get_nowait()
|
# logger.debug(sites_lists.keys())
|
||||||
# logger.debug("%s<-%s"%(percent,msg))
|
|
||||||
if percent == 10.0: # Only when signaling d/l done.
|
def do_indiv_notif(percent,msg):
|
||||||
count += 1
|
|
||||||
totals[msg] = 1.0/len(totals)
|
|
||||||
# logger.info("Finished: %s"%msg)
|
|
||||||
else:
|
|
||||||
totals[msg] = percent/len(totals)
|
totals[msg] = percent/len(totals)
|
||||||
notification(max(0.01,sum(totals.values())), _('%(count)d of %(total)d stories finished downloading')%{'count':count,'total':len(totals)})
|
notification(max(0.01,sum(totals.values())), _('%(count)d of %(total)d stories finished downloading')%{'count':count,'total':len(totals)})
|
||||||
except Empty:
|
|
||||||
pass
|
|
||||||
# without update, is_finished will never be set. however, we
|
|
||||||
# do want to get all the notifications for status so we don't
|
|
||||||
# miss the 'done' ones.
|
|
||||||
job.update(consume_notifications=False)
|
|
||||||
|
|
||||||
# if not job._processed:
|
do_list = []
|
||||||
# sleep(0.5)
|
done_list = []
|
||||||
## Can have a race condition where job.is_finished before
|
logger.info("\n\n"+_("Downloading FanFiction Stories")+"\n%s\n"%("\n".join([ "%(status)s %(url)s %(comment)s" % book for book in book_list])))
|
||||||
## notifications for all downloads have been processed.
|
## pass failures from metadata through bg job so all results are
|
||||||
## Or even after the job has been finished.
|
## together.
|
||||||
# logger.debug("job.is_finished(%s) or job._processed(%s)"%(job.is_finished, job._processed))
|
|
||||||
if not job.is_finished:
|
|
||||||
continue
|
|
||||||
|
|
||||||
## only process each job once. We can get more than one loop
|
|
||||||
## after job.is_finished.
|
|
||||||
if not job._processed:
|
|
||||||
# sleep(1)
|
|
||||||
# A job really finished. Get the information.
|
|
||||||
|
|
||||||
## This is where bg proc details end up in GUI log.
|
|
||||||
## job.details is the whole debug log for each proc.
|
|
||||||
logger.info("\n\n" + ("="*80) + " " + job.details.replace('\r',''))
|
|
||||||
# logger.debug("Finished background process for site %s:\n%s"%(job._site_list[0]['site'],"\n".join([ x['url'] for x in job._site_list ])))
|
|
||||||
for b in job._site_list:
|
|
||||||
book_list.remove(b)
|
|
||||||
book_list.extend(job.result)
|
|
||||||
job._processed = True
|
|
||||||
jobs_running -= 1
|
|
||||||
|
|
||||||
## Can't use individual count--I've seen stories all reported
|
|
||||||
## finished before results of all jobs processed.
|
|
||||||
if jobs_running == 0:
|
|
||||||
book_list = sorted(book_list,key=lambda x : x['listorder'])
|
|
||||||
logger.info("\n"+_("Download Results:")+"\n%s\n"%("\n".join([ "%(status)s %(url)s %(comment)s" % book for book in book_list])))
|
|
||||||
|
|
||||||
good_lists = defaultdict(list)
|
|
||||||
bad_lists = defaultdict(list)
|
|
||||||
for book in book_list:
|
for book in book_list:
|
||||||
if book['good']:
|
if book['good']:
|
||||||
good_lists[book['status']].append(book)
|
do_list.append(book)
|
||||||
else:
|
else:
|
||||||
bad_lists[book['status']].append(book)
|
done_list.append(book)
|
||||||
|
for book in do_list:
|
||||||
|
# logger.info("%s"%book['url'])
|
||||||
|
done_list.append(do_download_for_worker(book,options,merge,do_indiv_notif))
|
||||||
|
count += 1
|
||||||
|
return finish_download(done_list)
|
||||||
|
return profiled_func()
|
||||||
|
|
||||||
order = [_('Add'),
|
def finish_download(donelist):
|
||||||
_('Update'),
|
book_list = sorted(donelist,key=lambda x : x['listorder'])
|
||||||
_('Meta'),
|
logger.info("\n"+_("Download Results:")+"\n%s\n"%("\n".join([ "%(status)s %(url)s %(comment)s" % book for book in book_list])))
|
||||||
_('Different URL'),
|
|
||||||
_('Rejected'),
|
|
||||||
_('Skipped'),
|
|
||||||
_('Bad'),
|
|
||||||
_('Error'),
|
|
||||||
]
|
|
||||||
j = 0
|
|
||||||
for d in [ good_lists, bad_lists ]:
|
|
||||||
for status in order:
|
|
||||||
if d[status]:
|
|
||||||
l = d[status]
|
|
||||||
logger.info("\n"+status+"\n%s\n"%("\n".join([book['url'] for book in l])))
|
|
||||||
for book in l:
|
|
||||||
book['reportorder'] = j
|
|
||||||
j += 1
|
|
||||||
del d[status]
|
|
||||||
# just in case a status is added but doesn't appear in order.
|
|
||||||
for status in d.keys():
|
|
||||||
logger.info("\n"+status+"\n%s\n"%("\n".join([book['url'] for book in d[status]])))
|
|
||||||
break
|
|
||||||
|
|
||||||
server.close()
|
good_lists = defaultdict(list)
|
||||||
|
bad_lists = defaultdict(list)
|
||||||
|
for book in book_list:
|
||||||
|
if book['good']:
|
||||||
|
good_lists[book['status']].append(book)
|
||||||
|
else:
|
||||||
|
bad_lists[book['status']].append(book)
|
||||||
|
|
||||||
|
order = [_('Add'),
|
||||||
|
_('Update'),
|
||||||
|
_('Meta'),
|
||||||
|
_('Different URL'),
|
||||||
|
_('Rejected'),
|
||||||
|
_('Skipped'),
|
||||||
|
_('Bad'),
|
||||||
|
_('Error'),
|
||||||
|
]
|
||||||
|
stnum = 0
|
||||||
|
for d in [ good_lists, bad_lists ]:
|
||||||
|
for status in order:
|
||||||
|
stnum += 1
|
||||||
|
if d[status]:
|
||||||
|
l = d[status]
|
||||||
|
logger.info("\n"+status+"\n%s\n"%("\n".join([book['url'] for book in l])))
|
||||||
|
for book in l:
|
||||||
|
# Add prior listorder to 10000 * status num for
|
||||||
|
# ordering of accumulated results with multiple bg
|
||||||
|
# jobs
|
||||||
|
book['reportorder'] = stnum*10000 + book['listorder']
|
||||||
|
del d[status]
|
||||||
|
# just in case a status is added but doesn't appear in order.
|
||||||
|
for status in d.keys():
|
||||||
|
logger.info("\n"+status+"\n%s\n"%("\n".join([book['url'] for book in d[status]])))
|
||||||
|
|
||||||
# return the book list as the job result
|
# return the book list as the job result
|
||||||
return book_list
|
return book_list
|
||||||
|
|
||||||
def do_download_site(site,book_list,options,merge,notification=lambda x,y:x):
|
|
||||||
# logger.info(_("Started job for %s")%site)
|
|
||||||
retval = []
|
|
||||||
for book in book_list:
|
|
||||||
# logger.info("%s"%book['url'])
|
|
||||||
retval.append(do_download_for_worker(book,options,merge,notification))
|
|
||||||
notification(10.0,book['url'])
|
|
||||||
return retval
|
|
||||||
|
|
||||||
def do_download_for_worker(book,options,merge,notification=lambda x,y:x):
|
def do_download_for_worker(book,options,merge,notification=lambda x,y:x):
|
||||||
'''
|
'''
|
||||||
Child job, to download story when run as a worker job
|
Child job, to download story when run as a worker job
|
||||||
|
|
@ -196,13 +133,13 @@ def do_download_for_worker(book,options,merge,notification=lambda x,y:x):
|
||||||
fffbase = FanFicFareBase(options['plugin_path'])
|
fffbase = FanFicFareBase(options['plugin_path'])
|
||||||
with fffbase: # so the sys.path was modified while loading the
|
with fffbase: # so the sys.path was modified while loading the
|
||||||
# plug impl.
|
# plug impl.
|
||||||
from calibre_plugins.fanficfare_plugin.dialogs import NotGoingToDownload
|
|
||||||
from calibre_plugins.fanficfare_plugin.prefs import (
|
from calibre_plugins.fanficfare_plugin.prefs import (
|
||||||
SAVE_YES, SAVE_YES_UNLESS_SITE, OVERWRITE, OVERWRITEALWAYS, UPDATE,
|
SAVE_YES, SAVE_YES_UNLESS_SITE, OVERWRITE, OVERWRITEALWAYS, UPDATE,
|
||||||
UPDATEALWAYS, ADDNEW, SKIP, CALIBREONLY, CALIBREONLYSAVECOL)
|
UPDATEALWAYS, ADDNEW, SKIP, CALIBREONLY, CALIBREONLYSAVECOL)
|
||||||
from calibre_plugins.fanficfare_plugin.wordcount import get_word_count
|
from calibre_plugins.fanficfare_plugin.wordcount import get_word_count
|
||||||
from fanficfare import adapters, writers
|
from fanficfare import adapters, writers
|
||||||
from fanficfare.epubutils import get_update_data
|
from fanficfare.epubutils import get_update_data
|
||||||
|
from fanficfare.exceptions import NotGoingToDownload
|
||||||
from fanficfare.six import text_type as unicode
|
from fanficfare.six import text_type as unicode
|
||||||
|
|
||||||
from calibre_plugins.fanficfare_plugin.fff_util import get_fff_config
|
from calibre_plugins.fanficfare_plugin.fff_util import get_fff_config
|
||||||
|
|
@ -222,9 +159,6 @@ def do_download_for_worker(book,options,merge,notification=lambda x,y:x):
|
||||||
options['fileform'],
|
options['fileform'],
|
||||||
options['personal.ini'])
|
options['personal.ini'])
|
||||||
|
|
||||||
if not options['updateepubcover'] and 'epub_for_update' in book and book['collision'] in (UPDATE, UPDATEALWAYS):
|
|
||||||
configuration.set("overrides","never_make_cover","true")
|
|
||||||
|
|
||||||
# images only for epub, html, even if the user mistakenly
|
# images only for epub, html, even if the user mistakenly
|
||||||
# turned it on else where.
|
# turned it on else where.
|
||||||
if options['fileform'] not in ("epub","html"):
|
if options['fileform'] not in ("epub","html"):
|
||||||
|
|
@ -234,18 +168,12 @@ def do_download_for_worker(book,options,merge,notification=lambda x,y:x):
|
||||||
adapter.is_adult = book['is_adult']
|
adapter.is_adult = book['is_adult']
|
||||||
adapter.username = book['username']
|
adapter.username = book['username']
|
||||||
adapter.password = book['password']
|
adapter.password = book['password']
|
||||||
|
adapter.totp = book['totp']
|
||||||
adapter.setChaptersRange(book['begin'],book['end'])
|
adapter.setChaptersRange(book['begin'],book['end'])
|
||||||
|
|
||||||
## each site download job starts with a new copy of the
|
## each site download job starts with a new copy of the
|
||||||
## cookiejar and basic_cache from the FG process. They
|
## cookiejar and basic_cache from the FG process. They
|
||||||
## are not shared between different sites' BG downloads
|
## are not shared between different sites' BG downloads
|
||||||
if configuration.getConfig('use_browser_cache'):
|
|
||||||
if 'browser_cache' in options:
|
|
||||||
configuration.set_browser_cache(options['browser_cache'])
|
|
||||||
else:
|
|
||||||
options['browser_cache'] = configuration.get_browser_cache()
|
|
||||||
if 'browser_cachefile' in options:
|
|
||||||
options['browser_cache'].load_cache(options['browser_cachefile'])
|
|
||||||
if 'basic_cache' in options:
|
if 'basic_cache' in options:
|
||||||
configuration.set_basic_cache(options['basic_cache'])
|
configuration.set_basic_cache(options['basic_cache'])
|
||||||
else:
|
else:
|
||||||
|
|
@ -261,6 +189,17 @@ def do_download_for_worker(book,options,merge,notification=lambda x,y:x):
|
||||||
if not story.getMetadata("series") and 'calibre_series' in book:
|
if not story.getMetadata("series") and 'calibre_series' in book:
|
||||||
adapter.setSeries(book['calibre_series'][0],book['calibre_series'][1])
|
adapter.setSeries(book['calibre_series'][0],book['calibre_series'][1])
|
||||||
|
|
||||||
|
# logger.debug(merge)
|
||||||
|
# logger.debug(book.get('epub_for_update','(NONE)'))
|
||||||
|
# logger.debug(options.get('mergebook','(NOMERGEBOOK)'))
|
||||||
|
|
||||||
|
# is a merge, is a pre-existing anthology, and is not a pre-existing book in anthology.
|
||||||
|
if merge and 'mergebook' in options and 'epub_for_update' not in book:
|
||||||
|
# internal for plugin anthologies to mark chapters
|
||||||
|
# (new) in new stories
|
||||||
|
story.setMetadata("newforanthology","true")
|
||||||
|
logger.debug("metadata newforanthology:%s"%story.getMetadata("newforanthology"))
|
||||||
|
|
||||||
# set PI version instead of default.
|
# set PI version instead of default.
|
||||||
if 'version' in options:
|
if 'version' in options:
|
||||||
story.setMetadata('version',options['version'])
|
story.setMetadata('version',options['version'])
|
||||||
|
|
@ -269,7 +208,6 @@ def do_download_for_worker(book,options,merge,notification=lambda x,y:x):
|
||||||
book['author_sort'] = book['author'] = story.getList("author", removeallentities=True)
|
book['author_sort'] = book['author'] = story.getList("author", removeallentities=True)
|
||||||
book['publisher'] = story.getMetadata("publisher")
|
book['publisher'] = story.getMetadata("publisher")
|
||||||
book['url'] = story.getMetadata("storyUrl", removeallentities=True)
|
book['url'] = story.getMetadata("storyUrl", removeallentities=True)
|
||||||
book['tags'] = story.getSubjectTags(removeallentities=True)
|
|
||||||
book['comments'] = story.get_sanitized_description()
|
book['comments'] = story.get_sanitized_description()
|
||||||
book['series'] = story.getMetadata("series", removeallentities=True)
|
book['series'] = story.getMetadata("series", removeallentities=True)
|
||||||
|
|
||||||
|
|
@ -346,20 +284,21 @@ def do_download_for_worker(book,options,merge,notification=lambda x,y:x):
|
||||||
adapter.oldchaptersmap,
|
adapter.oldchaptersmap,
|
||||||
adapter.oldchaptersdata) = get_update_data(book['epub_for_update'])[0:9]
|
adapter.oldchaptersdata) = get_update_data(book['epub_for_update'])[0:9]
|
||||||
|
|
||||||
# dup handling from fff_plugin needed for anthology updates.
|
# dup handling from fff_plugin needed for anthology updates & BG metadata.
|
||||||
if book['collision'] == UPDATE:
|
if book['collision'] in (UPDATE,UPDATEALWAYS):
|
||||||
if chaptercount == urlchaptercount:
|
if chaptercount == urlchaptercount and book['collision'] == UPDATE:
|
||||||
if merge:
|
if merge:
|
||||||
|
## Deliberately pass for UPDATEALWAYS merge.
|
||||||
book['comment']=_("Already contains %d chapters. Reuse as is.")%chaptercount
|
book['comment']=_("Already contains %d chapters. Reuse as is.")%chaptercount
|
||||||
book['all_metadata'] = story.getAllMetadata(removeallentities=True)
|
book['all_metadata'] = story.getAllMetadata(removeallentities=True)
|
||||||
if options['savemetacol'] != '':
|
if options['savemetacol'] != '':
|
||||||
book['savemetacol'] = story.dump_html_metadata()
|
book['savemetacol'] = story.dump_html_metadata()
|
||||||
book['outfile'] = book['epub_for_update'] # for anthology merge ops.
|
book['outfile'] = book['epub_for_update'] # for anthology merge ops.
|
||||||
return book
|
return book
|
||||||
else: # not merge,
|
else:
|
||||||
raise NotGoingToDownload(_("Already contains %d chapters.")%chaptercount,'edit-undo.png',showerror=False)
|
raise NotGoingToDownload(_("Already contains %d chapters.")%chaptercount,'edit-undo.png',showerror=False)
|
||||||
elif chaptercount > urlchaptercount:
|
elif chaptercount > urlchaptercount and not (book['collision'] == UPDATEALWAYS and adapter.getConfig('force_update_epub_always')):
|
||||||
raise NotGoingToDownload(_("Existing epub contains %d chapters, web site only has %d. Use Overwrite to force update.") % (chaptercount,urlchaptercount),'dialog_error.png')
|
raise NotGoingToDownload(_("Existing epub contains %d chapters, web site only has %d. Use Overwrite or force_update_epub_always to force update.") % (chaptercount,urlchaptercount),'dialog_error.png')
|
||||||
elif chaptercount == 0:
|
elif chaptercount == 0:
|
||||||
raise NotGoingToDownload(_("FanFicFare doesn't recognize chapters in existing epub, epub is probably from a different source. Use Overwrite to force update."),'dialog_error.png')
|
raise NotGoingToDownload(_("FanFicFare doesn't recognize chapters in existing epub, epub is probably from a different source. Use Overwrite to force update."),'dialog_error.png')
|
||||||
|
|
||||||
|
|
@ -397,7 +336,11 @@ def do_download_for_worker(book,options,merge,notification=lambda x,y:x):
|
||||||
options['do_wordcount'] == SAVE_YES_UNLESS_SITE and not story.getMetadataRaw('numWords') ):
|
options['do_wordcount'] == SAVE_YES_UNLESS_SITE and not story.getMetadataRaw('numWords') ):
|
||||||
try:
|
try:
|
||||||
wordcount = get_word_count(outfile)
|
wordcount = get_word_count(outfile)
|
||||||
# logger.info("get_word_count:%s"%wordcount)
|
# logger.info("get_word_count:%s"%wordcount)
|
||||||
|
# clear cache for the rather unusual case of
|
||||||
|
# numWords affecting other previously cached
|
||||||
|
# entries.
|
||||||
|
story.clear_processed_metadata_cache()
|
||||||
story.setMetadata('numWords',wordcount)
|
story.setMetadata('numWords',wordcount)
|
||||||
writer.writeStory(outfilename=outfile, forceOverwrite=True)
|
writer.writeStory(outfilename=outfile, forceOverwrite=True)
|
||||||
book['all_metadata'] = story.getAllMetadata(removeallentities=True)
|
book['all_metadata'] = story.getAllMetadata(removeallentities=True)
|
||||||
|
|
@ -406,8 +349,7 @@ def do_download_for_worker(book,options,merge,notification=lambda x,y:x):
|
||||||
except:
|
except:
|
||||||
logger.error("WordCount failed")
|
logger.error("WordCount failed")
|
||||||
|
|
||||||
if options['smarten_punctuation'] and options['fileform'] == "epub" \
|
if options['smarten_punctuation'] and options['fileform'] == "epub":
|
||||||
and calibre_version >= (0, 9, 39):
|
|
||||||
# for smarten punc
|
# for smarten punc
|
||||||
from calibre.ebooks.oeb.polish.main import polish, ALL_OPTS
|
from calibre.ebooks.oeb.polish.main import polish, ALL_OPTS
|
||||||
from calibre.utils.logging import Log
|
from calibre.utils.logging import Log
|
||||||
|
|
@ -417,12 +359,14 @@ def do_download_for_worker(book,options,merge,notification=lambda x,y:x):
|
||||||
data = {'smarten_punctuation':True}
|
data = {'smarten_punctuation':True}
|
||||||
opts = ALL_OPTS.copy()
|
opts = ALL_OPTS.copy()
|
||||||
opts.update(data)
|
opts.update(data)
|
||||||
O = namedtuple('Options', ' '.join(six.iterkeys(ALL_OPTS)))
|
O = namedtuple('Options', ' '.join(ALL_OPTS.keys()))
|
||||||
opts = O(**opts)
|
opts = O(**opts)
|
||||||
|
|
||||||
log = Log(level=Log.DEBUG)
|
log = Log(level=Log.DEBUG)
|
||||||
polish({outfile:outfile}, opts, log, logger.info)
|
polish({outfile:outfile}, opts, log, logger.info)
|
||||||
|
## here to catch tags set in chapters in literotica for
|
||||||
|
## both overwrites and updates.
|
||||||
|
book['tags'] = story.getSubjectTags(removeallentities=True)
|
||||||
except NotGoingToDownload as d:
|
except NotGoingToDownload as d:
|
||||||
book['good']=False
|
book['good']=False
|
||||||
book['status']=_('Bad')
|
book['status']=_('Bad')
|
||||||
|
|
@ -448,11 +392,12 @@ def inject_cal_cols(book,story,configuration):
|
||||||
if 'calibre_columns' in book:
|
if 'calibre_columns' in book:
|
||||||
injectini = ['[injected]']
|
injectini = ['[injected]']
|
||||||
extra_valid = []
|
extra_valid = []
|
||||||
for k, v in six.iteritems(book['calibre_columns']):
|
for k in book['calibre_columns'].keys():
|
||||||
|
v = book['calibre_columns'][k]
|
||||||
story.setMetadata(k,v['val'])
|
story.setMetadata(k,v['val'])
|
||||||
injectini.append('%s_label:%s'%(k,v['label']))
|
injectini.append('%s_label:%s'%(k,v['label']))
|
||||||
extra_valid.append(k)
|
extra_valid.append(k)
|
||||||
if extra_valid: # if empty, there's nothing to add.
|
if extra_valid: # if empty, there's nothing to add.
|
||||||
injectini.append("add_to_extra_valid_entries:,"+','.join(extra_valid))
|
injectini.append("add_to_extra_valid_entries:,"+','.join(extra_valid))
|
||||||
configuration.readfp(StringIO('\n'.join(injectini)))
|
configuration.read_file(StringIO('\n'.join(injectini)))
|
||||||
#print("added:\n%s\n"%('\n'.join(injectini)))
|
#print("added:\n%s\n"%('\n'.join(injectini)))
|
||||||
|
|
|
||||||
File diff suppressed because it is too large
Load diff
|
|
@ -3,22 +3,9 @@
|
||||||
|
|
||||||
[defaults]
|
[defaults]
|
||||||
## [defaults] section applies to all formats and sites but may be
|
## [defaults] section applies to all formats and sites but may be
|
||||||
## overridden at several levels. Example:
|
## overridden at several levels. See
|
||||||
|
## https://github.com/JimmXinu/FanFicFare/wiki/INI-File for more
|
||||||
## [defaults]
|
## details.
|
||||||
## titlepage_entries: category,genre, status
|
|
||||||
## [www.whofic.com]
|
|
||||||
## # overrides defaults.
|
|
||||||
## titlepage_entries: category,genre, status,dateUpdated,rating
|
|
||||||
## [epub]
|
|
||||||
## # overrides defaults & site section
|
|
||||||
## titlepage_entries: category,genre, status,datePublished,dateUpdated,dateCreated
|
|
||||||
## [www.whofic.com:epub]
|
|
||||||
## # overrides defaults, site section & format section
|
|
||||||
## titlepage_entries: category,genre, status,datePublished
|
|
||||||
## [overrides]
|
|
||||||
## # overrides all other sections
|
|
||||||
## titlepage_entries: category
|
|
||||||
|
|
||||||
## Some sites also require the user to confirm they are adult for
|
## Some sites also require the user to confirm they are adult for
|
||||||
## adult content. Uncomment by removing '#' in front of is_adult.
|
## adult content. Uncomment by removing '#' in front of is_adult.
|
||||||
|
|
@ -29,42 +16,32 @@
|
||||||
## want to make them all look the same? Strip them off, then add them
|
## want to make them all look the same? Strip them off, then add them
|
||||||
## back on with add_chapter_numbers. Don't like the way it strips
|
## back on with add_chapter_numbers. Don't like the way it strips
|
||||||
## numbers or adds them back? See chapter_title_strip_pattern and
|
## numbers or adds them back? See chapter_title_strip_pattern and
|
||||||
## chapter_title_add_pattern.
|
## chapter_title_add_pattern in defaults.ini.
|
||||||
#strip_chapter_numbers:true
|
#strip_chapter_numbers:true
|
||||||
#add_chapter_numbers:true
|
#add_chapter_numbers:true
|
||||||
|
|
||||||
## Add this to genre if there's more than one category.
|
|
||||||
#add_genre_when_multi_category: Crossover
|
|
||||||
|
|
||||||
[epub]
|
[epub]
|
||||||
## include images from img tags in the body and summary of stories.
|
## Include images from img tags in the body and summary of stories.
|
||||||
## Images will be converted to jpg for size if possible. Images work
|
## Images will be converted to jpg for size if possible. Images work
|
||||||
## in epub format only. To get mobi or other format with images,
|
## in epub format only. To get mobi or other format with images,
|
||||||
## download as epub and use Calibre to convert.
|
## download as epub and use Calibre to convert.
|
||||||
|
## true by default, uncomment and set false to not include images.
|
||||||
#include_images:true
|
#include_images:true
|
||||||
|
|
||||||
## Quality level to use when converting images to jpg. Range is 0-100,
|
## If set false, the summary will have all html stripped for safety.
|
||||||
## reasonable values likely to be in the range 70-95.
|
|
||||||
#jpg_quality: 95
|
|
||||||
|
|
||||||
## If not set, the summary will have all html stripped for safety.
|
|
||||||
## Both this and include_images must be true to get images in the
|
## Both this and include_images must be true to get images in the
|
||||||
## summary.
|
## summary.
|
||||||
|
## true by default, uncomment and set false to not keep summary html.
|
||||||
#keep_summary_html:true
|
#keep_summary_html:true
|
||||||
|
|
||||||
## If set, the first image found will be made the cover image. If
|
## If set true, and there isn't a specific cover image, the first
|
||||||
## keep_summary_html is true, any images in summary will be before any
|
## image found in the story will be made the cover image. If
|
||||||
|
## keep_summary_html is true, images in the summary will be before any
|
||||||
## in chapters.
|
## in chapters.
|
||||||
|
## true by default, uncomment and set false to turn off
|
||||||
#make_firstimage_cover:true
|
#make_firstimage_cover:true
|
||||||
|
|
||||||
## Resize images down to width, height, preserving aspect ratio.
|
|
||||||
## Nook size, with margin.
|
|
||||||
#image_max_size: 580, 725
|
|
||||||
|
|
||||||
## Change image to grayscale, if graphics library allows, to save
|
|
||||||
## space.
|
|
||||||
#grayscale_images: false
|
|
||||||
|
|
||||||
|
|
||||||
## Most common, I expect will be using this to save username/passwords
|
## Most common, I expect will be using this to save username/passwords
|
||||||
## for different sites. Here are a few examples. See defaults.ini
|
## for different sites. Here are a few examples. See defaults.ini
|
||||||
|
|
@ -76,28 +53,6 @@
|
||||||
## default is false
|
## default is false
|
||||||
#collect_series: true
|
#collect_series: true
|
||||||
|
|
||||||
[ficwad.com]
|
|
||||||
#username:YourUsername
|
|
||||||
#password:YourPassword
|
|
||||||
|
|
||||||
[www.adastrafanfic.com]
|
|
||||||
## Some sites do not require a login, but do require the user to
|
|
||||||
## confirm they are adult for adult content.
|
|
||||||
#is_adult:true
|
|
||||||
|
|
||||||
[www.twcslibrary.net]
|
|
||||||
#username:YourName
|
|
||||||
#password:yourpassword
|
|
||||||
#is_adult:true
|
|
||||||
## default is false
|
|
||||||
#collect_series: true
|
|
||||||
|
|
||||||
[www.fictionalley.org]
|
|
||||||
#is_adult:true
|
|
||||||
|
|
||||||
[www.harrypotterfanfiction.com]
|
|
||||||
#is_adult:true
|
|
||||||
|
|
||||||
[www.fimfiction.net]
|
[www.fimfiction.net]
|
||||||
#is_adult:true
|
#is_adult:true
|
||||||
#fail_on_password: false
|
#fail_on_password: false
|
||||||
|
|
@ -106,8 +61,9 @@
|
||||||
#is_adult:true
|
#is_adult:true
|
||||||
## tth is a little unusual--it doesn't require user/pass, but the site
|
## tth is a little unusual--it doesn't require user/pass, but the site
|
||||||
## keeps track of which chapters you've read and won't send another
|
## keeps track of which chapters you've read and won't send another
|
||||||
## update until it thinks you're up to date. This way, on download,
|
## update until it thinks you're up to date. If you set
|
||||||
## it thinks you're up to date.
|
## username/password, FFF will login to download. Then the site
|
||||||
|
## thinks you're up to date.
|
||||||
#username:YourName
|
#username:YourName
|
||||||
#password:yourpassword
|
#password:yourpassword
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -102,9 +102,6 @@ updatecalcover_order=[YES,YES_IF_IMG,NO]
|
||||||
gencalcover_order=[YES,YES_UNLESS_IMG,NO]
|
gencalcover_order=[YES,YES_UNLESS_IMG,NO]
|
||||||
do_wordcount_order=[YES,YES_UNLESS_SITE,NO]
|
do_wordcount_order=[YES,YES_UNLESS_SITE,NO]
|
||||||
|
|
||||||
# if don't have any settings for FanFicFarePlugin, copy from
|
|
||||||
# predecessor FanFictionDownLoaderPlugin.
|
|
||||||
FFDL_PREFS_NAMESPACE = 'FanFictionDownLoaderPlugin'
|
|
||||||
PREFS_NAMESPACE = 'FanFicFarePlugin'
|
PREFS_NAMESPACE = 'FanFicFarePlugin'
|
||||||
PREFS_KEY_SETTINGS = 'settings'
|
PREFS_KEY_SETTINGS = 'settings'
|
||||||
|
|
||||||
|
|
@ -123,12 +120,13 @@ default_prefs['reject_delete_default'] = True
|
||||||
|
|
||||||
default_prefs['updatemeta'] = True
|
default_prefs['updatemeta'] = True
|
||||||
default_prefs['bgmeta'] = False
|
default_prefs['bgmeta'] = False
|
||||||
default_prefs['updateepubcover'] = False
|
#default_prefs['updateepubcover'] = True # removed in favor of always True Oct 2022
|
||||||
default_prefs['keeptags'] = False
|
default_prefs['keeptags'] = False
|
||||||
default_prefs['suppressauthorsort'] = False
|
default_prefs['suppressauthorsort'] = False
|
||||||
default_prefs['suppresstitlesort'] = False
|
default_prefs['suppresstitlesort'] = False
|
||||||
default_prefs['authorcase'] = False
|
default_prefs['authorcase'] = False
|
||||||
default_prefs['titlecase'] = False
|
default_prefs['titlecase'] = False
|
||||||
|
default_prefs['seriescase'] = False
|
||||||
default_prefs['setanthologyseries'] = False
|
default_prefs['setanthologyseries'] = False
|
||||||
default_prefs['mark'] = False
|
default_prefs['mark'] = False
|
||||||
default_prefs['mark_success'] = True
|
default_prefs['mark_success'] = True
|
||||||
|
|
@ -146,6 +144,7 @@ default_prefs['adddialogstaysontop'] = False
|
||||||
default_prefs['lookforurlinhtml'] = False
|
default_prefs['lookforurlinhtml'] = False
|
||||||
default_prefs['checkforseriesurlid'] = True
|
default_prefs['checkforseriesurlid'] = True
|
||||||
default_prefs['auto_reject_seriesurlid'] = False
|
default_prefs['auto_reject_seriesurlid'] = False
|
||||||
|
default_prefs['mark_series_anthologies'] = False
|
||||||
default_prefs['checkforurlchange'] = True
|
default_prefs['checkforurlchange'] = True
|
||||||
default_prefs['injectseries'] = False
|
default_prefs['injectseries'] = False
|
||||||
default_prefs['matchtitleauth'] = True
|
default_prefs['matchtitleauth'] = True
|
||||||
|
|
@ -161,11 +160,12 @@ default_prefs['addtolistsonread'] = False
|
||||||
default_prefs['autounnew'] = False
|
default_prefs['autounnew'] = False
|
||||||
|
|
||||||
default_prefs['updatecalcover'] = SAVE_YES_IF_IMG
|
default_prefs['updatecalcover'] = SAVE_YES_IF_IMG
|
||||||
default_prefs['gencalcover'] = SAVE_YES
|
default_prefs['covernewonly'] = False
|
||||||
|
default_prefs['gencalcover'] = SAVE_YES_UNLESS_IMG
|
||||||
default_prefs['updatecover'] = False
|
default_prefs['updatecover'] = False
|
||||||
default_prefs['calibre_gen_cover'] = False
|
default_prefs['calibre_gen_cover'] = True
|
||||||
default_prefs['plugin_gen_cover'] = True
|
default_prefs['plugin_gen_cover'] = False
|
||||||
default_prefs['gcnewonly'] = False
|
default_prefs['gcnewonly'] = True
|
||||||
default_prefs['gc_site_settings'] = {}
|
default_prefs['gc_site_settings'] = {}
|
||||||
default_prefs['allow_gc_from_ini'] = True
|
default_prefs['allow_gc_from_ini'] = True
|
||||||
default_prefs['gc_polish_cover'] = False
|
default_prefs['gc_polish_cover'] = False
|
||||||
|
|
@ -183,6 +183,7 @@ default_prefs['allow_custcol_from_ini'] = True
|
||||||
|
|
||||||
default_prefs['std_cols_newonly'] = {}
|
default_prefs['std_cols_newonly'] = {}
|
||||||
default_prefs['set_author_url'] = True
|
default_prefs['set_author_url'] = True
|
||||||
|
default_prefs['set_series_url'] = True
|
||||||
default_prefs['includecomments'] = False
|
default_prefs['includecomments'] = False
|
||||||
default_prefs['anth_comments_newonly'] = True
|
default_prefs['anth_comments_newonly'] = True
|
||||||
|
|
||||||
|
|
@ -197,6 +198,11 @@ default_prefs['auto_reject_from_email'] = False
|
||||||
default_prefs['update_existing_only_from_email'] = False
|
default_prefs['update_existing_only_from_email'] = False
|
||||||
default_prefs['download_from_email_immediately'] = False
|
default_prefs['download_from_email_immediately'] = False
|
||||||
|
|
||||||
|
|
||||||
|
#default_prefs['single_proc_jobs'] = True # setting and code removed
|
||||||
|
default_prefs['site_split_jobs'] = True
|
||||||
|
default_prefs['reconsolidate_jobs'] = True
|
||||||
|
|
||||||
def set_library_config(library_config,db,setting=PREFS_KEY_SETTINGS):
|
def set_library_config(library_config,db,setting=PREFS_KEY_SETTINGS):
|
||||||
db.prefs.set_namespaced(PREFS_NAMESPACE,
|
db.prefs.set_namespaced(PREFS_NAMESPACE,
|
||||||
setting,
|
setting,
|
||||||
|
|
@ -211,12 +217,6 @@ def get_library_config(db,setting=PREFS_KEY_SETTINGS,def_prefs=default_prefs):
|
||||||
library_config = db.prefs.get_namespaced(PREFS_NAMESPACE,
|
library_config = db.prefs.get_namespaced(PREFS_NAMESPACE,
|
||||||
setting)
|
setting)
|
||||||
|
|
||||||
# if don't have any settings for FanFicFarePlugin, copy from
|
|
||||||
# predecessor FanFictionDownLoaderPlugin.
|
|
||||||
if library_config is None:
|
|
||||||
logger.info("Attempting to read settings from predecessor--FFDL")
|
|
||||||
library_config = db.prefs.get_namespaced(FFDL_PREFS_NAMESPACE,
|
|
||||||
setting)
|
|
||||||
if library_config is None:
|
if library_config is None:
|
||||||
# defaults.
|
# defaults.
|
||||||
logger.info("Using default settings")
|
logger.info("Using default settings")
|
||||||
|
|
|
||||||
2613
calibre-plugin/translations/ar.po
Normal file
2613
calibre-plugin/translations/ar.po
Normal file
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
2613
calibre-plugin/translations/ko.po
Normal file
2613
calibre-plugin/translations/ko.po
Normal file
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
2612
calibre-plugin/translations/mr.po
Normal file
2612
calibre-plugin/translations/mr.po
Normal file
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
2612
calibre-plugin/translations/ta.po
Normal file
2612
calibre-plugin/translations/ta.po
Normal file
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
|
|
@ -30,10 +30,14 @@ from .. import configurable as configurable
|
||||||
|
|
||||||
## must import each adapter here.
|
## must import each adapter here.
|
||||||
|
|
||||||
|
from . import base_adapter
|
||||||
from . import base_efiction_adapter
|
from . import base_efiction_adapter
|
||||||
from . import adapter_test1
|
from . import adapter_test1
|
||||||
|
from . import adapter_test2
|
||||||
|
from . import adapter_test3
|
||||||
|
from . import adapter_test4
|
||||||
from . import adapter_fanfictionnet
|
from . import adapter_fanfictionnet
|
||||||
from . import adapter_fictionalleyorg
|
from . import adapter_fictionalleyarchiveorg
|
||||||
from . import adapter_fictionpresscom
|
from . import adapter_fictionpresscom
|
||||||
from . import adapter_ficwadcom
|
from . import adapter_ficwadcom
|
||||||
from . import adapter_fimfictionnet
|
from . import adapter_fimfictionnet
|
||||||
|
|
@ -49,10 +53,7 @@ from . import adapter_archiveofourownorg
|
||||||
from . import adapter_ficbooknet
|
from . import adapter_ficbooknet
|
||||||
from . import adapter_midnightwhispers
|
from . import adapter_midnightwhispers
|
||||||
from . import adapter_ksarchivecom
|
from . import adapter_ksarchivecom
|
||||||
from . import adapter_archiveskyehawkecom
|
|
||||||
from . import adapter_squidgeorgpeja
|
|
||||||
from . import adapter_libraryofmoriacom
|
from . import adapter_libraryofmoriacom
|
||||||
from . import adapter_wraithbaitcom
|
|
||||||
from . import adapter_ashwindersycophanthexcom
|
from . import adapter_ashwindersycophanthexcom
|
||||||
from . import adapter_chaossycophanthexcom
|
from . import adapter_chaossycophanthexcom
|
||||||
from . import adapter_erosnsapphosycophanthexcom
|
from . import adapter_erosnsapphosycophanthexcom
|
||||||
|
|
@ -61,45 +62,27 @@ from . import adapter_occlumencysycophanthexcom
|
||||||
from . import adapter_phoenixsongnet
|
from . import adapter_phoenixsongnet
|
||||||
from . import adapter_walkingtheplankorg
|
from . import adapter_walkingtheplankorg
|
||||||
from . import adapter_dokugacom
|
from . import adapter_dokugacom
|
||||||
from . import adapter_iketernalnet
|
|
||||||
from . import adapter_storiesofardacom
|
from . import adapter_storiesofardacom
|
||||||
from . import adapter_destinysgatewaycom
|
|
||||||
from . import adapter_ncisfictioncom
|
from . import adapter_ncisfictioncom
|
||||||
from . import adapter_fanfiktionde
|
from . import adapter_fanfiktionde
|
||||||
from . import adapter_ponyfictionarchivenet
|
|
||||||
from . import adapter_themasquenet
|
from . import adapter_themasquenet
|
||||||
from . import adapter_pretendercentrecom
|
from . import adapter_pretendercentrecom
|
||||||
from . import adapter_darksolaceorg
|
from . import adapter_darksolaceorg
|
||||||
from . import adapter_finestoriescom
|
from . import adapter_storyroomcom
|
||||||
from . import adapter_hpfanficarchivecom
|
|
||||||
from . import adapter_nhamagicalworldsus
|
|
||||||
from . import adapter_hlfictionnet
|
|
||||||
from . import adapter_dracoandginnycom
|
from . import adapter_dracoandginnycom
|
||||||
from . import adapter_scarvesandcoffeenet
|
|
||||||
from . import adapter_wolverineandroguecom
|
from . import adapter_wolverineandroguecom
|
||||||
from . import adapter_merlinficdtwinscouk
|
|
||||||
from . import adapter_thehookupzonenet
|
from . import adapter_thehookupzonenet
|
||||||
from . import adapter_bloodtiesfancom
|
|
||||||
from . import adapter_qafficcom
|
|
||||||
from . import adapter_efpfanficnet
|
from . import adapter_efpfanficnet
|
||||||
from . import adapter_faeriearchivecom
|
|
||||||
from . import adapter_imagineeficcom
|
from . import adapter_imagineeficcom
|
||||||
from . import adapter_potterheadsanonymouscom
|
|
||||||
from . import adapter_storiesonlinenet
|
from . import adapter_storiesonlinenet
|
||||||
from . import adapter_trekiverseorg
|
|
||||||
from . import adapter_literotica
|
from . import adapter_literotica
|
||||||
from . import adapter_voracity2eficcom
|
from . import adapter_voracity2eficcom
|
||||||
from . import adapter_spikeluvercom
|
from . import adapter_spikeluvercom
|
||||||
from . import adapter_bloodshedversecom
|
from . import adapter_bloodshedversecom
|
||||||
from . import adapter_fanfichu
|
|
||||||
from . import adapter_fictionmaniatv
|
from . import adapter_fictionmaniatv
|
||||||
from . import adapter_themaplebookshelf
|
|
||||||
from . import adapter_sheppardweircom
|
from . import adapter_sheppardweircom
|
||||||
from . import adapter_samandjacknet
|
from . import adapter_samandjacknet
|
||||||
from . import adapter_csiforensicscom
|
|
||||||
from . import adapter_fanfictionjunkiesde
|
|
||||||
from . import adapter_tgstorytimecom
|
from . import adapter_tgstorytimecom
|
||||||
from . import adapter_itcouldhappennet
|
|
||||||
from . import adapter_forumsspacebattlescom
|
from . import adapter_forumsspacebattlescom
|
||||||
from . import adapter_forumssufficientvelocitycom
|
from . import adapter_forumssufficientvelocitycom
|
||||||
from . import adapter_forumquestionablequestingcom
|
from . import adapter_forumquestionablequestingcom
|
||||||
|
|
@ -107,8 +90,6 @@ from . import adapter_ninelivesarchivecom
|
||||||
from . import adapter_masseffect2in
|
from . import adapter_masseffect2in
|
||||||
from . import adapter_quotevcom
|
from . import adapter_quotevcom
|
||||||
from . import adapter_mcstoriescom
|
from . import adapter_mcstoriescom
|
||||||
from . import adapter_buffygilescom
|
|
||||||
from . import adapter_andromedawebcom
|
|
||||||
from . import adapter_naiceanilmenet
|
from . import adapter_naiceanilmenet
|
||||||
from . import adapter_adultfanfictionorg
|
from . import adapter_adultfanfictionorg
|
||||||
from . import adapter_fictionhuntcom
|
from . import adapter_fictionhuntcom
|
||||||
|
|
@ -118,58 +99,48 @@ from . import adapter_bdsmlibrarycom
|
||||||
from . import adapter_asexstoriescom
|
from . import adapter_asexstoriescom
|
||||||
from . import adapter_gluttonyfictioncom
|
from . import adapter_gluttonyfictioncom
|
||||||
from . import adapter_valentchambercom
|
from . import adapter_valentchambercom
|
||||||
from . import adapter_looselugscom
|
|
||||||
from . import adapter_wwwgiantessworldnet
|
from . import adapter_wwwgiantessworldnet
|
||||||
from . import adapter_lotrgficcom
|
|
||||||
from . import adapter_tomparisdormcom
|
|
||||||
from . import adapter_sugarquillnet
|
|
||||||
from . import adapter_starslibrarynet
|
from . import adapter_starslibrarynet
|
||||||
from . import adapter_fanficauthorsnet
|
from . import adapter_fanficauthorsnet
|
||||||
from . import adapter_fireflyfansnet
|
from . import adapter_fireflyfansnet
|
||||||
from . import adapter_sebklainenet
|
|
||||||
from . import adapter_shriftweborgbfa
|
|
||||||
from . import adapter_trekfanfictionnet
|
from . import adapter_trekfanfictionnet
|
||||||
from . import adapter_wuxiaworldcom
|
|
||||||
from . import adapter_wwwlushstoriescom
|
|
||||||
from . import adapter_wwwutopiastoriescom
|
from . import adapter_wwwutopiastoriescom
|
||||||
from . import adapter_sinfuldreamscomunicornfic
|
from . import adapter_sinfuldreamscomunicornfic
|
||||||
from . import adapter_sinfuldreamscomwhisperedmuse
|
|
||||||
from . import adapter_sinfuldreamscomwickedtemptation
|
from . import adapter_sinfuldreamscomwickedtemptation
|
||||||
from . import adapter_asianfanficscom
|
from . import adapter_asianfanficscom
|
||||||
from . import adapter_webnovelcom
|
|
||||||
from . import adapter_mttjustoncenet
|
from . import adapter_mttjustoncenet
|
||||||
from . import adapter_narutoficorg
|
from . import adapter_narutoficorg
|
||||||
from . import adapter_starskyhutcharchivenet
|
|
||||||
from . import adapter_swordborderlineangelcom
|
|
||||||
from . import adapter_tasteofpoisoninkubationnet
|
|
||||||
from . import adapter_thedelphicexpansecom
|
from . import adapter_thedelphicexpansecom
|
||||||
from . import adapter_wwwaneroticstorycom
|
from . import adapter_wwwaneroticstorycom
|
||||||
from . import adapter_lcfanficcom
|
from . import adapter_lcfanficcom
|
||||||
from . import adapter_noveltrovecom
|
|
||||||
from . import adapter_inkbunnynet
|
from . import adapter_inkbunnynet
|
||||||
from . import adapter_alternatehistorycom
|
from . import adapter_alternatehistorycom
|
||||||
from . import adapter_wattpadcom
|
from . import adapter_wattpadcom
|
||||||
from . import adapter_novelonlinefullcom
|
from . import adapter_novelonlinefullcom
|
||||||
from . import adapter_wwwnovelallcom
|
from . import adapter_wwwnovelallcom
|
||||||
from . import adapter_wuxiaworldco
|
|
||||||
from . import adapter_novelupdatescc
|
|
||||||
from . import adapter_harrypotterfanfictioncom
|
|
||||||
from . import adapter_hentaifoundrycom
|
from . import adapter_hentaifoundrycom
|
||||||
from . import adapter_mugglenetfanfictioncom
|
from . import adapter_mugglenetfanfictioncom
|
||||||
from . import adapter_swiorgru
|
|
||||||
from . import adapter_fanficsme
|
from . import adapter_fanficsme
|
||||||
from . import adapter_fanfictalkcom
|
from . import adapter_fanfictalkcom
|
||||||
from . import adapter_scifistoriescom
|
from . import adapter_scifistoriescom
|
||||||
from . import adapter_silmarillionwritersguildorg
|
|
||||||
from . import adapter_chireadscom
|
from . import adapter_chireadscom
|
||||||
from . import adapter_scribblehubcom
|
from . import adapter_scribblehubcom
|
||||||
from . import adapter_fictionlive
|
from . import adapter_fictionlive
|
||||||
from . import adapter_wuxiaworldsite
|
|
||||||
from . import adapter_thesietchcom
|
from . import adapter_thesietchcom
|
||||||
from . import adapter_fastnovelnet
|
|
||||||
from . import adapter_squidgeworldorg
|
from . import adapter_squidgeworldorg
|
||||||
from . import adapter_novelfull
|
from . import adapter_novelfull
|
||||||
from . import adapter_worldofxde
|
from . import adapter_psychficcom
|
||||||
|
from . import adapter_deviantartcom
|
||||||
|
from . import adapter_readonlymindcom
|
||||||
|
from . import adapter_wwwsunnydaleafterdarkcom
|
||||||
|
from . import adapter_syosetucom
|
||||||
|
from . import adapter_kakuyomujp
|
||||||
|
from . import adapter_fanfictionsfr
|
||||||
|
from . import adapter_touchfluffytail
|
||||||
|
from . import adapter_spiritfanfictioncom
|
||||||
|
from . import adapter_superlove
|
||||||
|
from . import adapter_cfaa
|
||||||
|
from . import adapter_althistorycom
|
||||||
|
|
||||||
## This bit of complexity allows adapters to be added by just adding
|
## This bit of complexity allows adapters to be added by just adding
|
||||||
## importing. It eliminates the long if/else clauses we used to need
|
## importing. It eliminates the long if/else clauses we used to need
|
||||||
|
|
@ -253,6 +224,21 @@ def get_section_url(url):
|
||||||
## return unchanged in that case.
|
## return unchanged in that case.
|
||||||
return url
|
return url
|
||||||
|
|
||||||
|
def get_url_search(url):
|
||||||
|
'''
|
||||||
|
For adapters that have story URLs that can change. This is
|
||||||
|
used for searching the Calibre library by identifiers:url for
|
||||||
|
sites (generally) that contain author or title that can
|
||||||
|
change, but also have a unique identifier that doesn't.
|
||||||
|
|
||||||
|
returns a string containing a regexp, not a compiled re object.
|
||||||
|
'''
|
||||||
|
cls = _get_class_for(url)[0]
|
||||||
|
if not cls:
|
||||||
|
## still apply common processing.
|
||||||
|
cls = base_adapter.BaseSiteAdapter
|
||||||
|
return cls.get_url_search(url)
|
||||||
|
|
||||||
def getAdapter(config,url,anyurl=False):
|
def getAdapter(config,url,anyurl=False):
|
||||||
|
|
||||||
#logger.debug("trying url:"+url)
|
#logger.debug("trying url:"+url)
|
||||||
|
|
|
||||||
|
|
@ -15,201 +15,24 @@
|
||||||
# limitations under the License.
|
# limitations under the License.
|
||||||
#
|
#
|
||||||
|
|
||||||
# Software: eFiction
|
|
||||||
from __future__ import absolute_import
|
from __future__ import absolute_import
|
||||||
import logging
|
import logging
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
import re
|
|
||||||
|
|
||||||
from ..htmlcleanup import stripHTML
|
from .base_otw_adapter import BaseOTWAdapter
|
||||||
from .. import exceptions as exceptions
|
|
||||||
|
|
||||||
# py2 vs py3 transition
|
|
||||||
from ..six import text_type as unicode
|
|
||||||
|
|
||||||
from .base_adapter import BaseSiteAdapter, makeDate
|
|
||||||
|
|
||||||
class AdAstraFanficComSiteAdapter(BaseSiteAdapter):
|
|
||||||
|
|
||||||
def __init__(self, config, url):
|
|
||||||
BaseSiteAdapter.__init__(self, config, url)
|
|
||||||
self.story.setMetadata('siteabbrev','aaff')
|
|
||||||
self.is_adult=False
|
|
||||||
|
|
||||||
# get storyId from url--url validation guarantees query is only sid=1234
|
|
||||||
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
|
|
||||||
|
|
||||||
|
|
||||||
# normalized story URL.
|
|
||||||
self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId'))
|
|
||||||
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def getSiteDomain():
|
|
||||||
return 'www.adastrafanfic.com'
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def getSiteExampleURLs(cls):
|
|
||||||
return "http://"+cls.getSiteDomain()+"/viewstory.php?sid=1234"
|
|
||||||
|
|
||||||
def getSiteURLPattern(self):
|
|
||||||
return re.escape("http://"+self.getSiteDomain()+"/viewstory.php?sid=")+r"\d+$"
|
|
||||||
|
|
||||||
def extractChapterUrlsAndMetadata(self):
|
|
||||||
|
|
||||||
if self.is_adult or self.getConfig("is_adult"):
|
|
||||||
addurl = "&warning=5"
|
|
||||||
else:
|
|
||||||
addurl=""
|
|
||||||
|
|
||||||
url = self.url+'&index=1'+addurl
|
|
||||||
logger.debug("URL: "+url)
|
|
||||||
|
|
||||||
data = self.get_request(url)
|
|
||||||
|
|
||||||
if "Content is only suitable for mature adults. May contain explicit language and adult themes. Equivalent of NC-17." in data:
|
|
||||||
raise exceptions.AdultCheckRequired(self.url)
|
|
||||||
|
|
||||||
# problems with some stories, but only in calibre. I suspect
|
|
||||||
# issues with different SGML parsers in python. This is a
|
|
||||||
# nasty hack, but it works.
|
|
||||||
data = data[data.index("<body"):]
|
|
||||||
|
|
||||||
soup = self.make_soup(data)
|
|
||||||
|
|
||||||
## Title
|
|
||||||
a = soup.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$"))
|
|
||||||
self.story.setMetadata('title',stripHTML(a))
|
|
||||||
|
|
||||||
# Find authorid and URL from... author url.
|
|
||||||
a = soup.find('a', href=re.compile(r"viewuser.php"))
|
|
||||||
self.story.setMetadata('authorId',a['href'].split('=')[1])
|
|
||||||
self.story.setMetadata('authorUrl','http://'+self.host+'/'+a['href'])
|
|
||||||
self.story.setMetadata('author',a.string)
|
|
||||||
|
|
||||||
# Find the chapters:
|
|
||||||
for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+r"&chapter=\d+$")):
|
|
||||||
# just in case there's tags, like <i> in chapter titles.
|
|
||||||
self.add_chapter(chapter,'http://'+self.host+'/'+chapter['href']+addurl)
|
|
||||||
|
|
||||||
|
|
||||||
## <meta name='description' content='<p>Description</p> ...' >
|
|
||||||
## Summary, strangely, is in the content attr of a <meta name='description'> tag
|
|
||||||
## which is escaped HTML. Unfortunately, we can't use it because they don't
|
|
||||||
## escape (') chars in the desc, breakin the tag.
|
|
||||||
#meta_desc = soup.find('meta',{'name':'description'})
|
|
||||||
#metasoup = bs.BeautifulStoneSoup(meta_desc['content'])
|
|
||||||
#self.story.setMetadata('description',stripHTML(metasoup))
|
|
||||||
|
|
||||||
def defaultGetattr(d,k):
|
|
||||||
try:
|
|
||||||
return d[k]
|
|
||||||
except:
|
|
||||||
return ""
|
|
||||||
|
|
||||||
# <span class="label">Rated:</span> NC-17<br /> etc
|
|
||||||
labels = soup.findAll('span',{'class':'label'})
|
|
||||||
for labelspan in labels:
|
|
||||||
value = labelspan.nextSibling
|
|
||||||
label = labelspan.string
|
|
||||||
|
|
||||||
if 'Summary' in label:
|
|
||||||
## Everything until the next span class='label'
|
|
||||||
svalue = ''
|
|
||||||
while value and 'label' not in defaultGetattr(value,'class'):
|
|
||||||
svalue += unicode(value)
|
|
||||||
value = value.nextSibling
|
|
||||||
# sometimes poorly formated desc (<p> w/o </p>) leads
|
|
||||||
# to all labels being included.
|
|
||||||
svalue=svalue[:svalue.find('<span class="label">')]
|
|
||||||
self.setDescription(url,svalue)
|
|
||||||
#self.story.setMetadata('description',stripHTML(svalue))
|
|
||||||
|
|
||||||
if 'Rated' in label:
|
|
||||||
self.story.setMetadata('rating', value)
|
|
||||||
|
|
||||||
if 'Word count' in label:
|
|
||||||
self.story.setMetadata('numWords', value)
|
|
||||||
|
|
||||||
if 'Categories' in label:
|
|
||||||
cats = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=categories'))
|
|
||||||
catstext = [cat.string for cat in cats]
|
|
||||||
for cat in catstext:
|
|
||||||
self.story.addToList('category',cat.string)
|
|
||||||
|
|
||||||
if 'Characters' in label:
|
|
||||||
chars = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=characters'))
|
|
||||||
charstext = [char.string for char in chars]
|
|
||||||
for char in charstext:
|
|
||||||
self.story.addToList('characters',char.string)
|
|
||||||
|
|
||||||
if 'Genre' in label:
|
|
||||||
genres = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=1'))
|
|
||||||
genrestext = [genre.string for genre in genres]
|
|
||||||
self.genre = ', '.join(genrestext)
|
|
||||||
for genre in genrestext:
|
|
||||||
self.story.addToList('genre',genre.string)
|
|
||||||
|
|
||||||
if 'Warnings' in label:
|
|
||||||
warnings = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=2'))
|
|
||||||
warningstext = [warning.string for warning in warnings]
|
|
||||||
self.warning = ', '.join(warningstext)
|
|
||||||
for warning in warningstext:
|
|
||||||
self.story.addToList('warnings',warning.string)
|
|
||||||
|
|
||||||
if 'Completed' in label:
|
|
||||||
if 'Yes' in value:
|
|
||||||
self.story.setMetadata('status', 'Completed')
|
|
||||||
else:
|
|
||||||
self.story.setMetadata('status', 'In-Progress')
|
|
||||||
|
|
||||||
if 'Published' in label:
|
|
||||||
self.story.setMetadata('datePublished', makeDate(value.strip(), "%d %b %Y"))
|
|
||||||
|
|
||||||
if 'Updated' in label:
|
|
||||||
# there's a stray [ at the end.
|
|
||||||
#value = value[0:-1]
|
|
||||||
self.story.setMetadata('dateUpdated', makeDate(value.strip(), "%d %b %Y"))
|
|
||||||
|
|
||||||
try:
|
|
||||||
# Find Series name from series URL.
|
|
||||||
a = soup.find('a', href=re.compile(r"viewseries.php\?seriesid=\d+"))
|
|
||||||
series_name = a.string
|
|
||||||
series_url = 'http://'+self.host+'/'+a['href']
|
|
||||||
|
|
||||||
seriessoup = self.make_soup(self.get_request(series_url))
|
|
||||||
storyas = seriessoup.findAll('a', href=re.compile(r'^viewstory.php\?sid=\d+$'))
|
|
||||||
i=1
|
|
||||||
for a in storyas:
|
|
||||||
if a['href'] == ('viewstory.php?sid='+self.story.getMetadata('storyId')):
|
|
||||||
self.setSeries(series_name, i)
|
|
||||||
self.story.setMetadata('seriesUrl',series_url)
|
|
||||||
break
|
|
||||||
i+=1
|
|
||||||
|
|
||||||
except:
|
|
||||||
# I find it hard to care if the series parsing fails
|
|
||||||
pass
|
|
||||||
|
|
||||||
|
|
||||||
def getChapterText(self, url):
|
|
||||||
|
|
||||||
logger.debug('Getting chapter text from: %s' % url)
|
|
||||||
|
|
||||||
data = self.get_request(url)
|
|
||||||
# problems with some stories, but only in calibre. I suspect
|
|
||||||
# issues with different SGML parsers in python. This is a
|
|
||||||
# nasty hack, but it works.
|
|
||||||
data = data[data.index("<body"):]
|
|
||||||
|
|
||||||
soup = self.make_soup(data)
|
|
||||||
|
|
||||||
span = soup.find('div', {'id' : 'story'})
|
|
||||||
|
|
||||||
if None == span:
|
|
||||||
raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url)
|
|
||||||
|
|
||||||
return self.utf8FromSoup(url,span)
|
|
||||||
|
|
||||||
def getClass():
|
def getClass():
|
||||||
return AdAstraFanficComSiteAdapter
|
return AdastrafanficComAdapter
|
||||||
|
|
||||||
|
class AdastrafanficComAdapter(BaseOTWAdapter):
|
||||||
|
|
||||||
|
def __init__(self, config, url):
|
||||||
|
BaseOTWAdapter.__init__(self, config, url)
|
||||||
|
|
||||||
|
# Each adapter needs to have a unique site abbreviation.
|
||||||
|
self.story.setMetadata('siteabbrev','aaff')
|
||||||
|
|
||||||
|
@staticmethod # must be @staticmethod, don't remove it.
|
||||||
|
def getSiteDomain():
|
||||||
|
# The site domain. Does have www here, if it uses it.
|
||||||
|
return 'www.adastrafanfic.com'
|
||||||
|
|
|
||||||
|
|
@ -57,8 +57,8 @@ class AdultFanFictionOrgAdapter(BaseSiteAdapter):
|
||||||
# normalized story URL.(checking self.zone against list
|
# normalized story URL.(checking self.zone against list
|
||||||
# removed--it was redundant w/getAcceptDomains and
|
# removed--it was redundant w/getAcceptDomains and
|
||||||
# getSiteURLPattern both)
|
# getSiteURLPattern both)
|
||||||
self._setURL('http://{0}.{1}/story.php?no={2}'.format(self.zone, self.getBaseDomain(), self.story.getMetadata('storyId')))
|
self._setURL('https://{0}.{1}/story.php?no={2}'.format(self.zone, self.getBaseDomain(), self.story.getMetadata('storyId')))
|
||||||
#self._setURL('http://' + self.zone + '.' + self.getBaseDomain() + '/story.php?no='+self.story.getMetadata('storyId'))
|
#self._setURL('https://' + self.zone + '.' + self.getBaseDomain() + '/story.php?no='+self.story.getMetadata('storyId'))
|
||||||
|
|
||||||
# Each adapter needs to have a unique site abbreviation.
|
# Each adapter needs to have a unique site abbreviation.
|
||||||
#self.story.setMetadata('siteabbrev',self.getSiteAbbrev())
|
#self.story.setMetadata('siteabbrev',self.getSiteAbbrev())
|
||||||
|
|
@ -68,9 +68,7 @@ class AdultFanFictionOrgAdapter(BaseSiteAdapter):
|
||||||
|
|
||||||
# The date format will vary from site to site.
|
# The date format will vary from site to site.
|
||||||
# http://docs.python.org/library/datetime.html#strftime-strptime-behavior
|
# http://docs.python.org/library/datetime.html#strftime-strptime-behavior
|
||||||
self.dateformat = "%Y-%m-%d"
|
self.dateformat = "%B %d, %Y"
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
## Added because adult-fanfiction.org does send you to
|
## Added because adult-fanfiction.org does send you to
|
||||||
## www.adult-fanfiction.org when you go to it and it also moves
|
## www.adult-fanfiction.org when you go to it and it also moves
|
||||||
|
|
@ -113,79 +111,31 @@ class AdultFanFictionOrgAdapter(BaseSiteAdapter):
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def getSiteExampleURLs(self):
|
def getSiteExampleURLs(self):
|
||||||
return ("http://anime.adult-fanfiction.org/story.php?no=123456789 "
|
return ("https://anime.adult-fanfiction.org/story.php?no=123456789 "
|
||||||
+ "http://anime2.adult-fanfiction.org/story.php?no=123456789 "
|
+ "https://anime2.adult-fanfiction.org/story.php?no=123456789 "
|
||||||
+ "http://bleach.adult-fanfiction.org/story.php?no=123456789 "
|
+ "https://bleach.adult-fanfiction.org/story.php?no=123456789 "
|
||||||
+ "http://books.adult-fanfiction.org/story.php?no=123456789 "
|
+ "https://books.adult-fanfiction.org/story.php?no=123456789 "
|
||||||
+ "http://buffy.adult-fanfiction.org/story.php?no=123456789 "
|
+ "https://buffy.adult-fanfiction.org/story.php?no=123456789 "
|
||||||
+ "http://cartoon.adult-fanfiction.org/story.php?no=123456789 "
|
+ "https://cartoon.adult-fanfiction.org/story.php?no=123456789 "
|
||||||
+ "http://celeb.adult-fanfiction.org/story.php?no=123456789 "
|
+ "https://celeb.adult-fanfiction.org/story.php?no=123456789 "
|
||||||
+ "http://comics.adult-fanfiction.org/story.php?no=123456789 "
|
+ "https://comics.adult-fanfiction.org/story.php?no=123456789 "
|
||||||
+ "http://ff.adult-fanfiction.org/story.php?no=123456789 "
|
+ "https://ff.adult-fanfiction.org/story.php?no=123456789 "
|
||||||
+ "http://games.adult-fanfiction.org/story.php?no=123456789 "
|
+ "https://games.adult-fanfiction.org/story.php?no=123456789 "
|
||||||
+ "http://hp.adult-fanfiction.org/story.php?no=123456789 "
|
+ "https://hp.adult-fanfiction.org/story.php?no=123456789 "
|
||||||
+ "http://inu.adult-fanfiction.org/story.php?no=123456789 "
|
+ "https://inu.adult-fanfiction.org/story.php?no=123456789 "
|
||||||
+ "http://lotr.adult-fanfiction.org/story.php?no=123456789 "
|
+ "https://lotr.adult-fanfiction.org/story.php?no=123456789 "
|
||||||
+ "http://manga.adult-fanfiction.org/story.php?no=123456789 "
|
+ "https://manga.adult-fanfiction.org/story.php?no=123456789 "
|
||||||
+ "http://movies.adult-fanfiction.org/story.php?no=123456789 "
|
+ "https://movies.adult-fanfiction.org/story.php?no=123456789 "
|
||||||
+ "http://naruto.adult-fanfiction.org/story.php?no=123456789 "
|
+ "https://naruto.adult-fanfiction.org/story.php?no=123456789 "
|
||||||
+ "http://ne.adult-fanfiction.org/story.php?no=123456789 "
|
+ "https://ne.adult-fanfiction.org/story.php?no=123456789 "
|
||||||
+ "http://original.adult-fanfiction.org/story.php?no=123456789 "
|
+ "https://original.adult-fanfiction.org/story.php?no=123456789 "
|
||||||
+ "http://tv.adult-fanfiction.org/story.php?no=123456789 "
|
+ "https://tv.adult-fanfiction.org/story.php?no=123456789 "
|
||||||
+ "http://xmen.adult-fanfiction.org/story.php?no=123456789 "
|
+ "https://xmen.adult-fanfiction.org/story.php?no=123456789 "
|
||||||
+ "http://ygo.adult-fanfiction.org/story.php?no=123456789 "
|
+ "https://ygo.adult-fanfiction.org/story.php?no=123456789 "
|
||||||
+ "http://yuyu.adult-fanfiction.org/story.php?no=123456789")
|
+ "https://yuyu.adult-fanfiction.org/story.php?no=123456789")
|
||||||
|
|
||||||
def getSiteURLPattern(self):
|
def getSiteURLPattern(self):
|
||||||
return r'http?://(anime|anime2|bleach|books|buffy|cartoon|celeb|comics|ff|games|hp|inu|lotr|manga|movies|naruto|ne|original|tv|xmen|ygo|yuyu)\.adult-fanfiction\.org/story\.php\?no=\d+$'
|
return r'https?://(anime|anime2|bleach|books|buffy|cartoon|celeb|comics|ff|games|hp|inu|lotr|manga|movies|naruto|ne|original|tv|xmen|ygo|yuyu)\.adult-fanfiction\.org/story\.php\?no=\d+$'
|
||||||
|
|
||||||
##This is not working right now, so I'm commenting it out, but leaving it for future testing
|
|
||||||
## Login seems to be reasonably standard across eFiction sites.
|
|
||||||
#def needToLoginCheck(self, data):
|
|
||||||
##This adapter will always require a login
|
|
||||||
# return True
|
|
||||||
|
|
||||||
# <form name="login" method="post" action="">
|
|
||||||
# <div class="top">E-mail: <span id="sprytextfield1">
|
|
||||||
# <input name="email" type="text" id="email" size="20" maxlength="255" />
|
|
||||||
# <span class="textfieldRequiredMsg">Email is required.</span><span class="textfieldInvalidFormatMsg">Invalid E-mail.</span></span></div>
|
|
||||||
# <div class="top">Password: <span id="sprytextfield2">
|
|
||||||
# <input name="pass1" type="password" id="pass1" size="20" maxlength="32" />
|
|
||||||
# <span class="textfieldRequiredMsg">password is required.</span><span class="textfieldMinCharsMsg">Minimum 8 characters8.</span><span class="textfieldMaxCharsMsg">Exceeded 32 characters.</span></span></div>
|
|
||||||
# <div class="top"><br /> <input name="loginsubmittop" type="hidden" id="loginsubmit" value="TRUE" />
|
|
||||||
# <input type="submit" value="Login" />
|
|
||||||
# </div>
|
|
||||||
# </form>
|
|
||||||
|
|
||||||
|
|
||||||
##This is not working right now, so I'm commenting it out, but leaving it for future testing
|
|
||||||
#def performLogin(self, url, soup):
|
|
||||||
# params = {}
|
|
||||||
|
|
||||||
# if self.password:
|
|
||||||
# params['email'] = self.username
|
|
||||||
# params['pass1'] = self.password
|
|
||||||
# else:
|
|
||||||
# params['email'] = self.getConfig("username")
|
|
||||||
# params['pass1'] = self.getConfig("password")
|
|
||||||
# params['submit'] = 'Login'
|
|
||||||
|
|
||||||
# # copy all hidden input tags to pick up appropriate tokens.
|
|
||||||
# for tag in soup.findAll('input',{'type':'hidden'}):
|
|
||||||
# params[tag['name']] = tag['value']
|
|
||||||
|
|
||||||
# logger.debug("Will now login to URL {0} as {1} with password: {2}".format(url, params['email'],params['pass1']))
|
|
||||||
|
|
||||||
# d = self.post_request(url, params, usecache=False)
|
|
||||||
# d = self.post_request(url, params, usecache=False)
|
|
||||||
# soup = self.make_soup(d)
|
|
||||||
|
|
||||||
#if not (soup.find('form', {'name' : 'login'}) == None):
|
|
||||||
# logger.info("Failed to login to URL %s as %s" % (url, params['email']))
|
|
||||||
# raise exceptions.FailedToLogin(url,params['email'])
|
|
||||||
# return False
|
|
||||||
#else:
|
|
||||||
# return True
|
|
||||||
|
|
||||||
## Getting the chapter list and the meta data, plus 'is adult' checking.
|
## Getting the chapter list and the meta data, plus 'is adult' checking.
|
||||||
def doExtractChapterUrlsAndMetadata(self, get_cover=True):
|
def doExtractChapterUrlsAndMetadata(self, get_cover=True):
|
||||||
|
|
@ -193,173 +143,97 @@ class AdultFanFictionOrgAdapter(BaseSiteAdapter):
|
||||||
## You need to have your is_adult set to true to get this story
|
## You need to have your is_adult set to true to get this story
|
||||||
if not (self.is_adult or self.getConfig("is_adult")):
|
if not (self.is_adult or self.getConfig("is_adult")):
|
||||||
raise exceptions.AdultCheckRequired(self.url)
|
raise exceptions.AdultCheckRequired(self.url)
|
||||||
|
else:
|
||||||
|
d = self.post_request('https://www.adult-fanfiction.org/globals/ajax/age-verify.php', {"verify":"1"})
|
||||||
|
if "Age verified successfully" not in d:
|
||||||
|
raise exceptions.FailedToDownload("Failed to Verify Age: {0}".format(d))
|
||||||
|
|
||||||
url = self.url
|
url = self.url
|
||||||
logger.debug("URL: "+url)
|
logger.debug("URL: "+url)
|
||||||
|
|
||||||
data = self.get_request(url)
|
data = self.get_request(url)
|
||||||
|
# logger.debug(data)
|
||||||
|
|
||||||
if "The dragons running the back end of the site can not seem to find the story you are looking for." in data:
|
if "The dragons running the back end of the site can not seem to find the story you are looking for." in data:
|
||||||
raise exceptions.StoryDoesNotExist("{0}.{1} says: The dragons running the back end of the site can not seem to find the story you are looking for.".format(self.zone, self.getBaseDomain()))
|
raise exceptions.StoryDoesNotExist("{0}.{1} says: The dragons running the back end of the site can not seem to find the story you are looking for.".format(self.zone, self.getBaseDomain()))
|
||||||
|
|
||||||
soup = self.make_soup(data)
|
soup = self.make_soup(data)
|
||||||
|
|
||||||
##This is not working right now, so I'm commenting it out, but leaving it for future testing
|
|
||||||
#self.performLogin(url, soup)
|
|
||||||
|
|
||||||
|
|
||||||
## Title
|
## Title
|
||||||
## Some of the titles have a backslash on the story page, but not on the Author's page
|
## Some of the titles have a backslash on the story page, but not on the Author's page
|
||||||
## So I am removing it from the title, so it can be found on the Author's page further in the code.
|
## So I am removing it from the title, so it can be found on the Author's page further in the code.
|
||||||
## Also, some titles may have extra spaces ' ', and the search on the Author's page removes them,
|
## Also, some titles may have extra spaces ' ', and the search on the Author's page removes them,
|
||||||
## so I have to here as well. I used multiple replaces to make sure, since I did the same below.
|
## so I have to here as well. I used multiple replaces to make sure, since I did the same below.
|
||||||
a = soup.find('a', href=re.compile(r'story.php\?no='+self.story.getMetadata('storyId')+"$"))
|
h1 = soup.find('h1')
|
||||||
self.story.setMetadata('title',stripHTML(a).replace('\\','').replace(' ',' ').replace(' ',' ').replace(' ',' ').strip())
|
# logger.debug("Title:%s"%h1)
|
||||||
|
self.story.setMetadata('title',stripHTML(h1).replace('\\','').replace(' ',' ').replace(' ',' ').replace(' ',' ').strip())
|
||||||
|
|
||||||
# Find the chapters:
|
# Find the chapters from first list only
|
||||||
chapters = soup.find('div',{'class':'dropdown-content'})
|
chapters = soup.select_one('select.chapter-select').select('option')
|
||||||
for i, chapter in enumerate(chapters.findAll('a')):
|
for chapter in chapters:
|
||||||
self.add_chapter(chapter,self.url+'&chapter='+unicode(i+1))
|
self.add_chapter(chapter,self.url+'&chapter='+chapter['value'])
|
||||||
|
|
||||||
|
|
||||||
# Find authorid and URL from... author url.
|
# Find authorid and URL from... author url.
|
||||||
a = soup.find('a', href=re.compile(r"profile.php\?no=\d+"))
|
a = soup.find('a', href=re.compile(r"profile.php\?id=\d+"))
|
||||||
if a == None:
|
if a == None:
|
||||||
# I know that the original author of fanficfare wants to always have metadata,
|
# I know that the original author of fanficfare wants to always have metadata,
|
||||||
# but I posit that if the story is there, even if we can't get the metadata from the
|
# but I posit that if the story is there, even if we can't get the metadata from the
|
||||||
# author page, the story should still be able to be downloaded, which is what I've done here.
|
# author page, the story should still be able to be downloaded, which is what I've done here.
|
||||||
self.story.setMetadata('authorId','000000000')
|
self.story.setMetadata('authorId','000000000')
|
||||||
self.story.setMetadata('authorUrl','http://www.adult-fanfiction.org')
|
self.story.setMetadata('authorUrl','https://www.adult-fanfiction.org')
|
||||||
self.story.setMetadata('author','Unknown')
|
self.story.setMetadata('author','Unknown')
|
||||||
logger.warning('There was no author found for the story... Metadata will not be retreived.')
|
logger.warning('There was no author found for the story... Metadata will not be retreived.')
|
||||||
self.setDescription(url,'>>>>>>>>>> No Summary Given <<<<<<<<<<')
|
self.setDescription(url,'>>>>>>>>>> No Summary Given, Unknown Author <<<<<<<<<<')
|
||||||
else:
|
else:
|
||||||
self.story.setMetadata('authorId',a['href'].split('=')[1])
|
self.story.setMetadata('authorId',a['href'].split('=')[1])
|
||||||
self.story.setMetadata('authorUrl',a['href'])
|
self.story.setMetadata('authorUrl',a['href'])
|
||||||
self.story.setMetadata('author',stripHTML(a))
|
self.story.setMetadata('author',stripHTML(a))
|
||||||
|
|
||||||
##The story page does not give much Metadata, so we go to the Author's page
|
## The story page does not give much Metadata, so we go to
|
||||||
|
## the Author's page. Except it's actually a sub-req for
|
||||||
|
## list of author's stories for that subdomain
|
||||||
|
author_Url = 'https://members.{0}/load-user-stories.php?subdomain={1}&uid={2}'.format(
|
||||||
|
self.getBaseDomain(),
|
||||||
|
self.zone,
|
||||||
|
self.story.getMetadata('authorId'))
|
||||||
|
|
||||||
##Get the first Author page to see if there are multiple pages.
|
logger.debug('Getting the load-user-stories page: {0}'.format(author_Url))
|
||||||
##AFF doesn't care if the page number is larger than the actual pages,
|
|
||||||
##it will continue to show the last page even if the variable is larger than the actual page
|
|
||||||
author_Url = '{0}&view=story&zone={1}&page=1'.format(self.story.getMetadata('authorUrl'), self.zone)
|
|
||||||
#author_Url = self.story.getMetadata('authorUrl')+'&view=story&zone='+self.zone+'&page=1'
|
|
||||||
|
|
||||||
##I'm resetting the author page to the zone for this story
|
|
||||||
self.story.setMetadata('authorUrl',author_Url)
|
|
||||||
|
|
||||||
logger.debug('Getting the author page: {0}'.format(author_Url))
|
|
||||||
adata = self.get_request(author_Url)
|
adata = self.get_request(author_Url)
|
||||||
|
|
||||||
if "The member you are looking for does not exist." in adata:
|
none_found = "No stories found in this category."
|
||||||
raise exceptions.StoryDoesNotExist("{0}.{1} says: The member you are looking for does not exist.".format(self.zone, self.getBaseDomain()))
|
if none_found in adata:
|
||||||
#raise exceptions.StoryDoesNotExist(self.zone+'.'+self.getBaseDomain() +" says: The member you are looking for does not exist.")
|
raise exceptions.StoryDoesNotExist("{0}.{1} says: {2}".format(self.zone, self.getBaseDomain(), none_found))
|
||||||
|
|
||||||
asoup = self.make_soup(adata)
|
asoup = self.make_soup(adata)
|
||||||
|
# logger.debug(asoup)
|
||||||
|
|
||||||
##Getting the number of pages
|
story_card = asoup.select_one('div.story-card:has(a[href="{0}"])'.format(url))
|
||||||
pages=asoup.find('div',{'class' : 'pagination'}).findAll('li')[-1].find('a')
|
# logger.debug(story_card)
|
||||||
if not pages == None:
|
|
||||||
pages = pages['href'].split('=')[-1]
|
|
||||||
else:
|
|
||||||
pages = 0
|
|
||||||
|
|
||||||
##If there is only 1 page of stories, check it to get the Metadata,
|
## Category
|
||||||
if pages == 0:
|
## I've only seen one category per story so far, but just in case:
|
||||||
a = asoup.findAll('li')
|
for cat in story_card.select('div.story-card-category'):
|
||||||
for lc2 in a:
|
# remove Category:, old code suggests Located: is also
|
||||||
if lc2.find('a', href=re.compile(r'story.php\?no='+self.story.getMetadata('storyId')+"$")):
|
# possible, so removing by <strong>
|
||||||
break
|
cat.find("strong").decompose()
|
||||||
## otherwise go through the pages
|
self.story.addToList('category',stripHTML(cat))
|
||||||
else:
|
|
||||||
page=1
|
|
||||||
i=0
|
|
||||||
while i == 0:
|
|
||||||
##We already have the first page, so if this is the first time through, skip getting the page
|
|
||||||
if page != 1:
|
|
||||||
author_Url = '{0}&view=story&zone={1}&page={2}'.format(self.story.getMetadata('authorUrl'), self.zone, unicode(page))
|
|
||||||
logger.debug('Getting the author page: {0}'.format(author_Url))
|
|
||||||
adata = self.get_request(author_Url)
|
|
||||||
##This will probably never be needed, since AFF doesn't seem to care what number you put as
|
|
||||||
## the page number, it will default to the last page, even if you use 1000, for an author
|
|
||||||
## that only hase 5 pages of stories, but I'm keeping it in to appease Saint Justin Case (just in case).
|
|
||||||
if "The member you are looking for does not exist." in adata:
|
|
||||||
raise exceptions.StoryDoesNotExist("{0}.{1} says: The member you are looking for does not exist.".format(self.zone, self.getBaseDomain()))
|
|
||||||
# we look for the li element that has the story here
|
|
||||||
asoup = self.make_soup(adata)
|
|
||||||
|
|
||||||
a = asoup.findAll('li')
|
self.setDescription(url,story_card.select_one('div.story-card-description'))
|
||||||
for lc2 in a:
|
|
||||||
if lc2.find('a', href=re.compile(r'story.php\?no='+self.story.getMetadata('storyId')+"$")):
|
|
||||||
i=1
|
|
||||||
break
|
|
||||||
page = page + 1
|
|
||||||
if page > int(pages):
|
|
||||||
break
|
|
||||||
|
|
||||||
##Split the Metadata up into a list
|
for tag in story_card.select('span.story-tag'):
|
||||||
##We have to change the soup type to a string, then remove the newlines, and double spaces,
|
self.story.addToList('eroticatags',stripHTML(tag))
|
||||||
##then changes the <br/> to '-:-', which seperates the different elemeents.
|
|
||||||
##Then we strip the HTML elements from the string.
|
## created/updates share formatting
|
||||||
##There is also a double <br/>, so we have to fix that, then remove the leading and trailing '-:-'.
|
for meta in story_card.select('div.story-card-meta-item span:last-child'):
|
||||||
##They are always in the same order.
|
meta = stripHTML(meta)
|
||||||
## EDIT 09/26/2016: Had some trouble with unicode errors... so I had to put in the decode/encode parts to fix it
|
if 'Created: ' in meta:
|
||||||
liMetadata = unicode(lc2).replace('\n','').replace('\r','').replace('\t',' ').replace(' ',' ').replace(' ',' ').replace(' ',' ')
|
meta = meta.replace('Created: ','')
|
||||||
liMetadata = stripHTML(liMetadata.replace(r'<br/>','-:-').replace('<!-- <br /-->','-:-'))
|
self.story.setMetadata('datePublished', makeDate(meta, self.dateformat))
|
||||||
liMetadata = liMetadata.strip('-:-').strip('-:-').encode('utf-8')
|
|
||||||
for i, value in enumerate(liMetadata.decode('utf-8').split('-:-')):
|
if 'Updated: ' in meta:
|
||||||
if i == 0:
|
meta = meta.replace('Updated: ','')
|
||||||
# The value for the title has been manipulated, so may not be the same as gotten at the start.
|
self.story.setMetadata('dateUpdated', makeDate(meta, self.dateformat))
|
||||||
# I'm going to use the href from the lc2 retrieved from the author's page to determine if it is correct.
|
|
||||||
if lc2.find('a', href=re.compile(r'story.php\?no='+self.story.getMetadata('storyId')+"$"))['href'] != url:
|
|
||||||
raise exceptions.StoryDoesNotExist('Did not find story in author story list: {0}'.format(author_Url))
|
|
||||||
elif i == 1:
|
|
||||||
##Get the description
|
|
||||||
self.setDescription(url,stripHTML(value.strip()))
|
|
||||||
else:
|
|
||||||
# the rest of the values can be missing, so instead of hardcoding the numbers, we search for them.
|
|
||||||
if 'Located :' in value:
|
|
||||||
self.story.setMetadata('category',value.replace(r'>',r'>').replace(r'Located :',r'').strip())
|
|
||||||
elif 'Category :' in value:
|
|
||||||
# Get the Category
|
|
||||||
self.story.setMetadata('category',value.replace(r'>',r'>').replace(r'Located :',r'').strip())
|
|
||||||
elif 'Content Tags :' in value:
|
|
||||||
# Get the Erotic Tags
|
|
||||||
value = stripHTML(value.replace(r'Content Tags :',r'')).strip()
|
|
||||||
for code in re.split(r'\s',value):
|
|
||||||
self.story.addToList('eroticatags',code)
|
|
||||||
elif 'Posted :' in value:
|
|
||||||
# Get the Posted Date
|
|
||||||
value = value.replace(r'Posted :',r'').strip()
|
|
||||||
if value.startswith('008'):
|
|
||||||
# It is unknown how the 200 became 008, but I'm going to change it back here
|
|
||||||
value = value.replace('008','200')
|
|
||||||
elif value.startswith('0000'):
|
|
||||||
# Since the date is showing as 0000,
|
|
||||||
# I'm going to put the memberdate here
|
|
||||||
value = asoup.find('div',{'id':'contentdata'}).find('p').get_text(strip=True).replace('Member Since','').strip()
|
|
||||||
self.story.setMetadata('datePublished', makeDate(stripHTML(value), self.dateformat))
|
|
||||||
elif 'Edited :' in value:
|
|
||||||
# Get the 'Updated' Edited date
|
|
||||||
# AFF has the time for the Updated date, and we only want the date,
|
|
||||||
# so we take the first 10 characters only
|
|
||||||
value = value.replace(r'Edited :',r'').strip()[0:10]
|
|
||||||
if value.startswith('008'):
|
|
||||||
# It is unknown how the 200 became 008, but I'm going to change it back here
|
|
||||||
value = value.replace('008','200')
|
|
||||||
self.story.setMetadata('dateUpdated', makeDate(stripHTML(value), self.dateformat))
|
|
||||||
elif value.startswith('0000') or '-00-' in value:
|
|
||||||
# Since the date is showing as 0000,
|
|
||||||
# or there is -00- in the date,
|
|
||||||
# I'm going to put the Published date here
|
|
||||||
self.story.setMetadata('dateUpdated', self.story.getMetadata('datPublished'))
|
|
||||||
else:
|
|
||||||
self.story.setMetadata('dateUpdated', makeDate(stripHTML(value), self.dateformat))
|
|
||||||
else:
|
|
||||||
# This catches the blank elements, and the Review and Dragon Prints.
|
|
||||||
# I am not interested in these, so do nothing
|
|
||||||
zzzzzzz=0
|
|
||||||
|
|
||||||
# grab the text for an individual chapter.
|
# grab the text for an individual chapter.
|
||||||
def getChapterText(self, url):
|
def getChapterText(self, url):
|
||||||
|
|
@ -367,10 +241,11 @@ class AdultFanFictionOrgAdapter(BaseSiteAdapter):
|
||||||
logger.debug('Getting chapter text from: %s' % url)
|
logger.debug('Getting chapter text from: %s' % url)
|
||||||
|
|
||||||
soup = self.make_soup(self.get_request(url))
|
soup = self.make_soup(self.get_request(url))
|
||||||
chaptertag = soup.find('div',{'class' : 'pagination'}).parent.findNext('td')
|
chaptertag = soup.select_one('div.chapter-body')
|
||||||
if None == chaptertag:
|
if None == chaptertag:
|
||||||
raise exceptions.FailedToDownload("Error downloading Chapter: {0}! Missing required element!".format(url))
|
raise exceptions.FailedToDownload("Error downloading Chapter: {0}! Missing required element!".format(url))
|
||||||
# Change td to a div.
|
## chapter text includes a copy of story title, author,
|
||||||
chaptertag.name='div'
|
## chapter title, & eroticatags specific to the chapter. Did
|
||||||
|
## before, too.
|
||||||
|
|
||||||
return self.utf8FromSoup(url,chaptertag)
|
return self.utf8FromSoup(url,chaptertag)
|
||||||
|
|
|
||||||
40
fanficfare/adapters/adapter_althistorycom.py
Normal file
40
fanficfare/adapters/adapter_althistorycom.py
Normal file
|
|
@ -0,0 +1,40 @@
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
# Copyright 2026 FanFicFare team
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
#
|
||||||
|
|
||||||
|
from __future__ import absolute_import
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .base_xenforo2forum_adapter import BaseXenForo2ForumAdapter
|
||||||
|
|
||||||
|
def getClass():
|
||||||
|
return AltHistoryComAdapter
|
||||||
|
|
||||||
|
## NOTE: This is a different site than www.alternatehistory.com.
|
||||||
|
|
||||||
|
class AltHistoryComAdapter(BaseXenForo2ForumAdapter):
|
||||||
|
|
||||||
|
def __init__(self, config, url):
|
||||||
|
BaseXenForo2ForumAdapter.__init__(self, config, url)
|
||||||
|
|
||||||
|
# Each adapter needs to have a unique site abbreviation.
|
||||||
|
self.story.setMetadata('siteabbrev','ahc')
|
||||||
|
|
||||||
|
@staticmethod # must be @staticmethod, don't remove it.
|
||||||
|
def getSiteDomain():
|
||||||
|
# The site domain. Does have www here, if it uses it.
|
||||||
|
return 'althistory.com'
|
||||||
|
|
||||||
|
|
@ -1,280 +0,0 @@
|
||||||
# -*- coding: utf-8 -*-
|
|
||||||
|
|
||||||
# Copyright 2018 FanFicFare team
|
|
||||||
#
|
|
||||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
# you may not use this file except in compliance with the License.
|
|
||||||
# You may obtain a copy of the License at
|
|
||||||
#
|
|
||||||
# http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
#
|
|
||||||
# Unless required by applicable law or agreed to in writing, software
|
|
||||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
# See the License for the specific language governing permissions and
|
|
||||||
# limitations under the License.
|
|
||||||
#
|
|
||||||
# ####### Not all lables are captured. they are not formtted correctly on the
|
|
||||||
# ####### webpage.
|
|
||||||
|
|
||||||
# Software: eFiction
|
|
||||||
from __future__ import absolute_import
|
|
||||||
import logging
|
|
||||||
logger = logging.getLogger(__name__)
|
|
||||||
import re
|
|
||||||
from ..htmlcleanup import stripHTML
|
|
||||||
from .. import exceptions as exceptions
|
|
||||||
|
|
||||||
# py2 vs py3 transition
|
|
||||||
from ..six import text_type as unicode
|
|
||||||
|
|
||||||
from .base_adapter import BaseSiteAdapter, makeDate
|
|
||||||
|
|
||||||
def getClass():
|
|
||||||
return AndromedaWebComAdapter # XXX
|
|
||||||
|
|
||||||
# Class name has to be unique. Our convention is camel case the
|
|
||||||
# sitename with Adapter at the end. www is skipped.
|
|
||||||
class AndromedaWebComAdapter(BaseSiteAdapter): # XXX
|
|
||||||
|
|
||||||
def __init__(self, config, url):
|
|
||||||
BaseSiteAdapter.__init__(self, config, url)
|
|
||||||
|
|
||||||
self.username = "NoneGiven" # if left empty, site doesn't return any message at all.
|
|
||||||
self.password = ""
|
|
||||||
self.is_adult=False
|
|
||||||
|
|
||||||
# get storyId from url--url validation guarantees query is only sid=1234
|
|
||||||
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
|
|
||||||
|
|
||||||
|
|
||||||
# normalized story URL.
|
|
||||||
# XXX Most sites don't have the /fiction part. Replace all to remove it usually.
|
|
||||||
self._setURL('http://' + self.getSiteDomain() + '/fiction/viewstory.php?sid='+self.story.getMetadata('storyId'))
|
|
||||||
|
|
||||||
# Each adapter needs to have a unique site abbreviation.
|
|
||||||
self.story.setMetadata('siteabbrev','awc') # XXX
|
|
||||||
|
|
||||||
# The date format will vary from site to site.
|
|
||||||
# http://docs.python.org/library/datetime.html#strftime-strptime-behavior
|
|
||||||
self.dateformat = "%d %b %Y" # XXX
|
|
||||||
|
|
||||||
@staticmethod # must be @staticmethod, don't remove it.
|
|
||||||
def getSiteDomain():
|
|
||||||
# The site domain. Does have www here, if it uses it.
|
|
||||||
return 'www.andromeda-web.com' # XXX
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def getSiteExampleURLs(cls):
|
|
||||||
return "http://"+cls.getSiteDomain()+"/fiction/viewstory.php?sid=1234"
|
|
||||||
|
|
||||||
def getSiteURLPattern(self):
|
|
||||||
return re.escape("http://"+self.getSiteDomain()+"/fiction/viewstory.php?sid=")+r"\d+$"
|
|
||||||
|
|
||||||
## Login seems to be reasonably standard across eFiction sites.
|
|
||||||
def needToLoginCheck(self, data):
|
|
||||||
if 'Registered Users Only' in data \
|
|
||||||
or 'There is no such account on our website' in data \
|
|
||||||
or "That password doesn't match the one in our database" in data:
|
|
||||||
return True
|
|
||||||
else:
|
|
||||||
return False
|
|
||||||
|
|
||||||
def performLogin(self, url):
|
|
||||||
params = {}
|
|
||||||
|
|
||||||
if self.password:
|
|
||||||
params['penname'] = self.username
|
|
||||||
params['password'] = self.password
|
|
||||||
else:
|
|
||||||
params['penname'] = self.getConfig("username")
|
|
||||||
params['password'] = self.getConfig("password")
|
|
||||||
params['cookiecheck'] = '1'
|
|
||||||
params['submit'] = 'Submit'
|
|
||||||
|
|
||||||
loginUrl = 'http://' + self.getSiteDomain() + '/user.php?action=login'
|
|
||||||
logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
|
|
||||||
params['penname']))
|
|
||||||
|
|
||||||
d = self.post_request(loginUrl, params)
|
|
||||||
|
|
||||||
if "Member Account" not in d : #Member Account
|
|
||||||
logger.info("Failed to login to URL %s as %s" % (loginUrl,
|
|
||||||
params['penname']))
|
|
||||||
raise exceptions.FailedToLogin(url,params['penname'])
|
|
||||||
return False
|
|
||||||
else:
|
|
||||||
return True
|
|
||||||
|
|
||||||
## Getting the chapter list and the meta data, plus 'is adult' checking.
|
|
||||||
def extractChapterUrlsAndMetadata(self):
|
|
||||||
|
|
||||||
if self.is_adult or self.getConfig("is_adult"):
|
|
||||||
# Weirdly, different sites use different warning numbers.
|
|
||||||
# If the title search below fails, there's a good chance
|
|
||||||
# you need a different number. print data at that point
|
|
||||||
# and see what the 'click here to continue' url says.
|
|
||||||
addurl = "&warning=2"
|
|
||||||
else:
|
|
||||||
addurl=""
|
|
||||||
|
|
||||||
# index=1 makes sure we see the story chapter index. Some
|
|
||||||
# sites skip that for one-chapter stories.
|
|
||||||
url = self.url+'&index=1'+addurl
|
|
||||||
logger.debug("URL: "+url)
|
|
||||||
|
|
||||||
data = self.get_request(url)
|
|
||||||
|
|
||||||
if self.needToLoginCheck(data):
|
|
||||||
# need to log in for this one.
|
|
||||||
self.performLogin(url)
|
|
||||||
data = self.get_request(url)
|
|
||||||
|
|
||||||
# Since the warning text can change by warning level, let's
|
|
||||||
# look for the warning pass url. ksarchive uses
|
|
||||||
# &warning= -- actually, so do other sites. Must be an
|
|
||||||
# eFiction book.
|
|
||||||
|
|
||||||
# fiction/viewstory.php?sid=1882&warning=4
|
|
||||||
# fiction/viewstory.php?sid=1654&ageconsent=ok&warning=2
|
|
||||||
#print data
|
|
||||||
m = re.search(r"'fiction/viewstory.php\?sid=10(&warning=2)'",data)
|
|
||||||
m = re.search(r"'fiction/viewstory.php\?sid=\d+((?:&ageconsent=ok)?&warning=\d+)'",data)
|
|
||||||
if m != None:
|
|
||||||
if self.is_adult or self.getConfig("is_adult"):
|
|
||||||
# We tried the default and still got a warning, so
|
|
||||||
# let's pull the warning number from the 'continue'
|
|
||||||
# link and reload data.
|
|
||||||
addurl = m.group(1)
|
|
||||||
# correct stupid & error in url.
|
|
||||||
addurl = addurl.replace("&","&")
|
|
||||||
url = self.url+'&index=1'+addurl
|
|
||||||
logger.debug("URL 2nd try: "+url)
|
|
||||||
|
|
||||||
data = self.get_request(url)
|
|
||||||
else:
|
|
||||||
raise exceptions.AdultCheckRequired(self.url)
|
|
||||||
|
|
||||||
if "Access denied. This story has not been validated by the adminstrators of this site." in data:
|
|
||||||
raise exceptions.FailedToDownload(self.getSiteDomain() +" says: Access denied. This story has not been validated by the adminstrators of this site.")
|
|
||||||
|
|
||||||
soup = self.make_soup(data)
|
|
||||||
# print data
|
|
||||||
|
|
||||||
|
|
||||||
pagetitle = soup.find('div',{'id':'content'})
|
|
||||||
|
|
||||||
## Title
|
|
||||||
a = pagetitle.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$"))
|
|
||||||
|
|
||||||
self.story.setMetadata('title',stripHTML(a))
|
|
||||||
|
|
||||||
# Find authorid and URL from... author url.
|
|
||||||
a = pagetitle.find('a', href=re.compile(r"viewuser.php\?uid=\d+"))
|
|
||||||
self.story.setMetadata('authorId',a['href'].split('=')[1])
|
|
||||||
self.story.setMetadata('authorUrl','http://'+self.host+'/'+a['href'])
|
|
||||||
self.story.setMetadata('author',a.string)
|
|
||||||
|
|
||||||
# Find the chapters:
|
|
||||||
for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+r"&chapter=\d+$")):
|
|
||||||
# just in case there's tags, like <i> in chapter titles.
|
|
||||||
self.add_chapter(chapter,'http://'+self.host+'/fiction/'+chapter['href']+addurl)
|
|
||||||
|
|
||||||
# eFiction sites don't help us out a lot with their meta data
|
|
||||||
# formating, so it's a little ugly.
|
|
||||||
|
|
||||||
# utility method
|
|
||||||
def defaultGetattr(d,k):
|
|
||||||
try:
|
|
||||||
return d[k]
|
|
||||||
except:
|
|
||||||
return ""
|
|
||||||
|
|
||||||
# <span class="label">Rated:</span> NC-17<br /> etc
|
|
||||||
labels = soup.findAll('span',{'class':'label'})
|
|
||||||
for labelspan in labels:
|
|
||||||
value = labelspan.nextSibling
|
|
||||||
label = labelspan.string
|
|
||||||
|
|
||||||
if 'Summary' in label:
|
|
||||||
## Everything until the next span class='label'
|
|
||||||
svalue = ""
|
|
||||||
while 'label' not in defaultGetattr(value,'class'):
|
|
||||||
svalue += unicode(value)
|
|
||||||
value = value.nextSibling
|
|
||||||
self.setDescription(url,svalue)
|
|
||||||
#self.story.setMetadata('description',stripHTML(svalue))
|
|
||||||
|
|
||||||
if 'Rated' in label:
|
|
||||||
self.story.setMetadata('rating', value)
|
|
||||||
|
|
||||||
if 'Word count' in label:
|
|
||||||
self.story.setMetadata('numWords', value)
|
|
||||||
|
|
||||||
if 'Categories' in label:
|
|
||||||
cats = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=categories'))
|
|
||||||
for cat in cats:
|
|
||||||
self.story.addToList('category',cat.string)
|
|
||||||
|
|
||||||
if 'Characters' in label:
|
|
||||||
chars = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=characters'))
|
|
||||||
for char in chars:
|
|
||||||
self.story.addToList('characters',char.string)
|
|
||||||
|
|
||||||
if 'Genre' in label:
|
|
||||||
genres = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=1'))
|
|
||||||
for genre in genres:
|
|
||||||
self.story.addToList('genre',genre.string)
|
|
||||||
|
|
||||||
if 'Warnings' in label:
|
|
||||||
warnings = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=3'))
|
|
||||||
for warning in warnings:
|
|
||||||
self.story.addToList('warnings',warning.string)
|
|
||||||
|
|
||||||
if 'Completed' in label:
|
|
||||||
if 'Yes' in value:
|
|
||||||
self.story.setMetadata('status', 'Completed')
|
|
||||||
else:
|
|
||||||
self.story.setMetadata('status', 'In-Progress')
|
|
||||||
|
|
||||||
if 'Published' in label:
|
|
||||||
self.story.setMetadata('datePublished', makeDate(stripHTML(value), self.dateformat))
|
|
||||||
|
|
||||||
if 'Updated' in label:
|
|
||||||
# there's a stray [ at the end.
|
|
||||||
#value = value[0:-1]
|
|
||||||
self.story.setMetadata('dateUpdated', makeDate(stripHTML(value), self.dateformat))
|
|
||||||
|
|
||||||
try:
|
|
||||||
# Find Series name from series URL.
|
|
||||||
a = soup.find('a', href=re.compile(r"fiction/viewseries.php\?seriesid=\d+"))
|
|
||||||
series_name = a.string
|
|
||||||
series_url = 'http://'+self.host+'/'+a['href']
|
|
||||||
|
|
||||||
seriessoup = self.make_soup(self.get_request(series_url))
|
|
||||||
storyas = seriessoup.findAll('a', href=re.compile(r'^fiction/viewstory.php\?sid=\d+$'))
|
|
||||||
i=1
|
|
||||||
for a in storyas:
|
|
||||||
if a['href'] == ('fiction/viewstory.php?sid='+self.story.getMetadata('storyId')):
|
|
||||||
self.setSeries(series_name, i)
|
|
||||||
self.story.setMetadata('seriesUrl',series_url)
|
|
||||||
break
|
|
||||||
i+=1
|
|
||||||
|
|
||||||
except:
|
|
||||||
# I find it hard to care if the series parsing fails
|
|
||||||
pass
|
|
||||||
|
|
||||||
# grab the text for an individual chapter.
|
|
||||||
def getChapterText(self, url):
|
|
||||||
|
|
||||||
logger.debug('Getting chapter text from: %s' % url)
|
|
||||||
|
|
||||||
soup = self.make_soup(self.get_request(url))
|
|
||||||
|
|
||||||
div = soup.find('div', {'class' : 'story'})
|
|
||||||
|
|
||||||
if None == div:
|
|
||||||
raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url)
|
|
||||||
|
|
||||||
return self.utf8FromSoup(url,div)
|
|
||||||
|
|
@ -18,55 +18,20 @@
|
||||||
from __future__ import absolute_import
|
from __future__ import absolute_import
|
||||||
import logging
|
import logging
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
import re
|
|
||||||
import json
|
|
||||||
|
|
||||||
from ..htmlcleanup import stripHTML
|
from .base_otw_adapter import BaseOTWAdapter
|
||||||
from .. import exceptions as exceptions
|
|
||||||
|
|
||||||
# py2 vs py3 transition
|
|
||||||
|
|
||||||
from .base_adapter import BaseSiteAdapter, makeDate
|
|
||||||
|
|
||||||
def getClass():
|
def getClass():
|
||||||
return ArchiveOfOurOwnOrgAdapter
|
return ArchiveOfOurOwnOrgAdapter
|
||||||
|
|
||||||
class ArchiveOfOurOwnOrgAdapter(BaseSiteAdapter):
|
class ArchiveOfOurOwnOrgAdapter(BaseOTWAdapter):
|
||||||
|
|
||||||
def __init__(self, config, url):
|
def __init__(self, config, url):
|
||||||
BaseSiteAdapter.__init__(self, config, url)
|
BaseOTWAdapter.__init__(self, config, url)
|
||||||
|
|
||||||
self.username = "NoneGiven" # if left empty, site doesn't return any message at all.
|
|
||||||
self.password = ""
|
|
||||||
self.is_adult=False
|
|
||||||
self.addurl = ""
|
|
||||||
|
|
||||||
self.full_work_soup = None
|
|
||||||
self.full_work_chapters = None
|
|
||||||
self.use_full_work_soup = True
|
|
||||||
|
|
||||||
# get storyId from url--url validation guarantees query is only sid=1234
|
|
||||||
self.story.setMetadata('storyId',self.parsedUrl.path.split('/',)[2])
|
|
||||||
|
|
||||||
# get storyId from url--url validation guarantees query correct
|
|
||||||
m = re.match(self.getSiteURLPattern(),url)
|
|
||||||
if m:
|
|
||||||
self.story.setMetadata('storyId',m.group('id'))
|
|
||||||
|
|
||||||
# normalized story URL.
|
|
||||||
self._setURL('https://' + self.getSiteDomain() + '/works/'+self.story.getMetadata('storyId'))
|
|
||||||
else:
|
|
||||||
raise exceptions.InvalidStoryURL(url,
|
|
||||||
self.getSiteDomain(),
|
|
||||||
self.getSiteExampleURLs())
|
|
||||||
|
|
||||||
# Each adapter needs to have a unique site abbreviation.
|
# Each adapter needs to have a unique site abbreviation.
|
||||||
self.story.setMetadata('siteabbrev','ao3')
|
self.story.setMetadata('siteabbrev','ao3')
|
||||||
|
|
||||||
# The date format will vary from site to site.
|
|
||||||
# http://docs.python.org/library/datetime.html#strftime-strptime-behavior
|
|
||||||
self.dateformat = "%Y-%b-%d"
|
|
||||||
|
|
||||||
@staticmethod # must be @staticmethod, don't remove it.
|
@staticmethod # must be @staticmethod, don't remove it.
|
||||||
def getSiteDomain():
|
def getSiteDomain():
|
||||||
# The site domain. Does have www here, if it uses it.
|
# The site domain. Does have www here, if it uses it.
|
||||||
|
|
@ -84,528 +49,21 @@ class ArchiveOfOurOwnOrgAdapter(BaseSiteAdapter):
|
||||||
return ['archiveofourown.org',
|
return ['archiveofourown.org',
|
||||||
'archiveofourown.com',
|
'archiveofourown.com',
|
||||||
'archiveofourown.net',
|
'archiveofourown.net',
|
||||||
|
'archiveofourown.gay',
|
||||||
'download.archiveofourown.org',
|
'download.archiveofourown.org',
|
||||||
'download.archiveofourown.com',
|
'download.archiveofourown.com',
|
||||||
'download.archiveofourown.net',
|
'download.archiveofourown.net',
|
||||||
'ao3.org',
|
'ao3.org',
|
||||||
]
|
]
|
||||||
|
|
||||||
@classmethod
|
def mod_url_request(self, url):
|
||||||
def getSiteExampleURLs(cls):
|
|
||||||
return "https://"+cls.getSiteDomain()+"/works/123456 https://"+cls.getSiteDomain()+"/collections/Some_Archive/works/123456 https://"+cls.getSiteDomain()+"/works/123456/chapters/78901"
|
|
||||||
|
|
||||||
def getSiteURLPattern(self):
|
|
||||||
# https://archiveofourown.org/collections/Smallville_Slash_Archive/works/159770
|
|
||||||
# Discard leading zeros from story ID numbers--AO3 doesn't use them in it's own chapter URLs.
|
|
||||||
# logger.debug(r"https?://" + r"|".join([x.replace('.','\.') for x in self.getAcceptDomains()]) + r"(/collections/[^/]+)?/works/0*(?P<id>\d+)")
|
|
||||||
return r"https?://(" + r"|".join([x.replace('.',r'\.') for x in self.getAcceptDomains()]) + r")(/collections/[^/]+)?/works/0*(?P<id>\d+)"
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def get_section_url(cls,url):
|
|
||||||
## minimal URL used for section names in INI and reject list
|
|
||||||
## for comparison
|
|
||||||
# logger.debug("pre--url:%s"%url)
|
|
||||||
## https://archiveofourown.org/works/19334905/chapters/71697933
|
|
||||||
url = re.sub(r'^(.*/works/\d+).*$',r'\1',url)
|
|
||||||
# logger.debug("post-url:%s"%url)
|
|
||||||
return url
|
return url
|
||||||
|
|
||||||
## Login
|
def mod_url_request(self, url):
|
||||||
def needToLoginCheck(self, data):
|
## add / to *not* replace media.archiveofourown.org
|
||||||
if 'This work is only available to registered users of the Archive.' in data \
|
if self.getConfig("use_archive_transformativeworks_org",False):
|
||||||
or "The password or user name you entered doesn't match our records" in data:
|
return url.replace("/archiveofourown.org","/archive.transformativeworks.org")
|
||||||
return True
|
elif self.getConfig("use_archiveofourown_gay",False):
|
||||||
|
return url.replace("/archiveofourown.org","/archiveofourown.gay")
|
||||||
else:
|
else:
|
||||||
return False
|
return url
|
||||||
|
|
||||||
def performLogin(self, url, data):
|
|
||||||
|
|
||||||
params = {}
|
|
||||||
if self.password:
|
|
||||||
params['user[login]'] = self.username
|
|
||||||
params['user[password]'] = self.password
|
|
||||||
else:
|
|
||||||
params['user[login]'] = self.getConfig("username")
|
|
||||||
params['user[password]'] = self.getConfig("password")
|
|
||||||
params['user[remember_me]'] = '1'
|
|
||||||
params['commit'] = 'Log in'
|
|
||||||
params['utf8'] = u'\x2713' # utf8 *is* required now. hex code works better than actual character for some reason. u'✓'
|
|
||||||
|
|
||||||
# authenticity_token now comes from a completely separate json call.
|
|
||||||
token_json = json.loads(self.get_request('https://' + self.getSiteDomain() + "/token_dispenser.json"))
|
|
||||||
params['authenticity_token'] = token_json['token']
|
|
||||||
|
|
||||||
loginUrl = 'https://' + self.getSiteDomain() + '/users/login'
|
|
||||||
logger.info("Will now login to URL (%s) as (%s)" % (loginUrl,
|
|
||||||
params['user[login]']))
|
|
||||||
|
|
||||||
d = self.post_request(loginUrl, params)
|
|
||||||
|
|
||||||
if 'href="/users/logout"' not in d :
|
|
||||||
logger.info("Failed to login to URL %s as %s" % (loginUrl,
|
|
||||||
params['user[login]']))
|
|
||||||
raise exceptions.FailedToLogin(url,params['user[login]'])
|
|
||||||
return False
|
|
||||||
else:
|
|
||||||
return True
|
|
||||||
|
|
||||||
## Getting the chapter list and the meta data, plus 'is adult' checking.
|
|
||||||
def extractChapterUrlsAndMetadata(self):
|
|
||||||
|
|
||||||
if self.is_adult or self.getConfig("is_adult"):
|
|
||||||
self.addurl = "?view_adult=true"
|
|
||||||
else:
|
|
||||||
self.addurl=""
|
|
||||||
|
|
||||||
metaurl = self.url+self.addurl
|
|
||||||
url = self.url+'/navigate'+self.addurl
|
|
||||||
logger.info("url: "+url)
|
|
||||||
logger.info("metaurl: "+metaurl)
|
|
||||||
|
|
||||||
data = self.get_request(url)
|
|
||||||
meta = self.get_request(metaurl)
|
|
||||||
|
|
||||||
if "This work could have adult content. If you proceed you have agreed that you are willing to see such content." in meta:
|
|
||||||
if self.addurl:
|
|
||||||
## "?view_adult=true" doesn't work on base story
|
|
||||||
## URL anymore, which means we have to
|
|
||||||
metasoup = self.make_soup(meta)
|
|
||||||
a = metasoup.find('a',text='Proceed')
|
|
||||||
metaurl = 'https://'+self.host+a['href']
|
|
||||||
meta = self.get_request(metaurl)
|
|
||||||
else:
|
|
||||||
raise exceptions.AdultCheckRequired(self.url)
|
|
||||||
|
|
||||||
if "Sorry, we couldn't find the work you were looking for." in data:
|
|
||||||
raise exceptions.StoryDoesNotExist(self.url)
|
|
||||||
|
|
||||||
# need to log in for this one, or always_login.
|
|
||||||
if self.needToLoginCheck(data) or \
|
|
||||||
( self.getConfig("always_login") and 'href="/users/logout"' not in data ):
|
|
||||||
self.performLogin(url,data)
|
|
||||||
data = self.get_request(url,usecache=False)
|
|
||||||
meta = self.get_request(metaurl,usecache=False)
|
|
||||||
|
|
||||||
soup = self.make_soup(data)
|
|
||||||
for tag in soup.findAll('div',id='admin-banner'):
|
|
||||||
tag.extract()
|
|
||||||
metasoup = self.make_soup(meta)
|
|
||||||
for tag in metasoup.findAll('div',id='admin-banner'):
|
|
||||||
tag.extract()
|
|
||||||
|
|
||||||
|
|
||||||
## Title
|
|
||||||
a = soup.find('a', href=re.compile(r"/works/\d+$"))
|
|
||||||
self.story.setMetadata('title',stripHTML(a))
|
|
||||||
|
|
||||||
if self.getConfig("always_login"):
|
|
||||||
# deliberately using always_login instead of checking for
|
|
||||||
# actual login so we don't have a case where these show up
|
|
||||||
# for a user only when they get user-restricted stories.
|
|
||||||
try:
|
|
||||||
# is bookmarked if has update /bookmarks/ form --
|
|
||||||
# create bookmark form uses different url
|
|
||||||
self.story.setMetadata('bookmarked',
|
|
||||||
None != metasoup.find('form',action=re.compile(r'^/bookmarks/')))
|
|
||||||
self.story.extendList('bookmarktags',
|
|
||||||
metasoup.find('input',id='bookmark_tag_string')['value'].split(', '))
|
|
||||||
self.story.setMetadata('bookmarkprivate',
|
|
||||||
metasoup.find('input',id='bookmark_private').has_attr('checked'))
|
|
||||||
self.story.setMetadata('bookmarkrec',
|
|
||||||
metasoup.find('input',id='bookmark_rec').has_attr('checked'))
|
|
||||||
except KeyError:
|
|
||||||
pass
|
|
||||||
self.story.setMetadata('bookmarksummary',
|
|
||||||
stripHTML(metasoup.find('textarea',id='bookmark_notes')))
|
|
||||||
|
|
||||||
if metasoup.find('img',alt='(Restricted)'):
|
|
||||||
self.story.setMetadata('restricted','Restricted')
|
|
||||||
|
|
||||||
# Find authorid and URL from... author url.
|
|
||||||
alist = soup.findAll('a', href=re.compile(r"/users/\w+/pseuds/.+"))
|
|
||||||
if len(alist) < 1: # ao3 allows for author 'Anonymous' with no author link.
|
|
||||||
self.story.setMetadata('author','Anonymous')
|
|
||||||
self.story.setMetadata('authorUrl','https://' + self.getSiteDomain() + '/')
|
|
||||||
self.story.setMetadata('authorId','0')
|
|
||||||
else:
|
|
||||||
for a in alist:
|
|
||||||
self.story.addToList('authorId',a['href'].split('/')[-1])
|
|
||||||
self.story.addToList('authorUrl','https://'+self.host+a['href'])
|
|
||||||
self.story.addToList('author',a.text)
|
|
||||||
|
|
||||||
byline = metasoup.find('h3',{'class':'byline'})
|
|
||||||
if byline:
|
|
||||||
self.story.setMetadata('byline',stripHTML(byline))
|
|
||||||
|
|
||||||
# byline:
|
|
||||||
# <h3 class="byline heading">
|
|
||||||
# Hope Roy [archived by <a href="/users/ssa_archivist/pseuds/ssa_archivist" rel="author">ssa_archivist</a>]
|
|
||||||
# </h3>
|
|
||||||
# stripped:"Hope Roy [archived by ssa_archivist]"
|
|
||||||
m = re.match(r'(?P<author>.*) \[archived by ?(?P<archivist>.*)\]',stripHTML(byline))
|
|
||||||
if( m and
|
|
||||||
len(alist) == 1 and
|
|
||||||
self.getConfig('use_archived_author') ):
|
|
||||||
self.story.setMetadata('author',m.group('author'))
|
|
||||||
|
|
||||||
newestChapter = None
|
|
||||||
self.newestChapterNum = None # save for comparing during update.
|
|
||||||
# Scan all chapters to find the oldest and newest, on AO3 it's
|
|
||||||
# possible for authors to insert new chapters out-of-order or
|
|
||||||
# change the dates of earlier ones by editing them--That WILL
|
|
||||||
# break epub update.
|
|
||||||
# Find the chapters:
|
|
||||||
chapters=soup.findAll('a', href=re.compile(r'/works/'+self.story.getMetadata('storyId')+r"/chapters/\d+$"))
|
|
||||||
self.story.setMetadata('numChapters',len(chapters))
|
|
||||||
logger.debug("numChapters: (%s)"%self.story.getMetadata('numChapters'))
|
|
||||||
if len(chapters)==1:
|
|
||||||
self.add_chapter(self.story.getMetadata('title'),'https://'+self.host+chapters[0]['href'])
|
|
||||||
else:
|
|
||||||
for index, chapter in enumerate(chapters):
|
|
||||||
# strip just in case there's tags, like <i> in chapter titles.
|
|
||||||
# (2013-09-21)
|
|
||||||
date = stripHTML(chapter.findNext('span'))[1:-1]
|
|
||||||
chapterDate = makeDate(date,self.dateformat)
|
|
||||||
self.add_chapter(chapter,'https://'+self.host+chapter['href'],
|
|
||||||
{'date':chapterDate.strftime(self.getConfig("datechapter_format",self.getConfig("datePublished_format","%Y-%m-%d")))})
|
|
||||||
if newestChapter == None or chapterDate > newestChapter:
|
|
||||||
newestChapter = chapterDate
|
|
||||||
self.newestChapterNum = index
|
|
||||||
|
|
||||||
a = metasoup.find('blockquote',{'class':'userstuff'})
|
|
||||||
if a != None:
|
|
||||||
a.name='div' # Change blockquote to div.
|
|
||||||
self.setDescription(url,a)
|
|
||||||
#self.story.setMetadata('description',a.text)
|
|
||||||
|
|
||||||
a = metasoup.find('dd',{'class':"rating tags"})
|
|
||||||
if a != None:
|
|
||||||
self.story.setMetadata('rating',stripHTML(a.text))
|
|
||||||
|
|
||||||
d = metasoup.find('dd',{'class':"language"})
|
|
||||||
if d != None:
|
|
||||||
self.story.setMetadata('language',stripHTML(d.text))
|
|
||||||
|
|
||||||
a = metasoup.find('dd',{'class':"fandom tags"})
|
|
||||||
if a != None:
|
|
||||||
fandoms = a.findAll('a',{'class':"tag"})
|
|
||||||
for fandom in fandoms:
|
|
||||||
self.story.addToList('fandoms',fandom.string)
|
|
||||||
|
|
||||||
a = metasoup.find('dd',{'class':"warning tags"})
|
|
||||||
if a != None:
|
|
||||||
warnings = a.findAll('a',{'class':"tag"})
|
|
||||||
for warning in warnings:
|
|
||||||
self.story.addToList('warnings',warning.string)
|
|
||||||
|
|
||||||
a = metasoup.find('dd',{'class':"freeform tags"})
|
|
||||||
if a != None:
|
|
||||||
genres = a.findAll('a',{'class':"tag"})
|
|
||||||
for genre in genres:
|
|
||||||
self.story.addToList('freeformtags',genre.string)
|
|
||||||
|
|
||||||
a = metasoup.find('dd',{'class':"category tags"})
|
|
||||||
if a != None:
|
|
||||||
genres = a.findAll('a',{'class':"tag"})
|
|
||||||
for genre in genres:
|
|
||||||
if genre != "Gen":
|
|
||||||
self.story.addToList('ao3categories',genre.string)
|
|
||||||
|
|
||||||
a = metasoup.find('dd',{'class':"character tags"})
|
|
||||||
if a != None:
|
|
||||||
chars = a.findAll('a',{'class':"tag"})
|
|
||||||
for char in chars:
|
|
||||||
self.story.addToList('characters',char.string)
|
|
||||||
|
|
||||||
a = metasoup.find('dd',{'class':"relationship tags"})
|
|
||||||
if a != None:
|
|
||||||
ships = a.findAll('a',{'class':"tag"})
|
|
||||||
for ship in ships:
|
|
||||||
self.story.addToList('ships',ship.string)
|
|
||||||
|
|
||||||
a = metasoup.find('dd',{'class':"collections"})
|
|
||||||
if a != None:
|
|
||||||
collections = a.findAll('a')
|
|
||||||
for collection in collections:
|
|
||||||
self.story.addToList('collections',collection.string)
|
|
||||||
|
|
||||||
stats = metasoup.find('dl',{'class':'stats'})
|
|
||||||
dt = stats.findAll('dt')
|
|
||||||
dd = stats.findAll('dd')
|
|
||||||
for x in range(0,len(dt)):
|
|
||||||
label = dt[x].text
|
|
||||||
value = dd[x].text
|
|
||||||
|
|
||||||
if 'Words:' in label:
|
|
||||||
self.story.setMetadata('numWords', value)
|
|
||||||
|
|
||||||
if 'Comments:' in label:
|
|
||||||
self.story.setMetadata('comments', value)
|
|
||||||
|
|
||||||
if 'Kudos:' in label:
|
|
||||||
self.story.setMetadata('kudos', value)
|
|
||||||
|
|
||||||
if 'Hits:' in label:
|
|
||||||
self.story.setMetadata('hits', value)
|
|
||||||
|
|
||||||
if 'Bookmarks:' in label:
|
|
||||||
self.story.setMetadata('bookmarks', value)
|
|
||||||
|
|
||||||
if 'Chapters:' in label:
|
|
||||||
self.story.setMetadata('chapterslashtotal', value)
|
|
||||||
if value.split('/')[0] == value.split('/')[1]:
|
|
||||||
self.story.setMetadata('status', 'Completed')
|
|
||||||
else:
|
|
||||||
self.story.setMetadata('status', 'In-Progress')
|
|
||||||
|
|
||||||
|
|
||||||
if 'Published' in label:
|
|
||||||
self.story.setMetadata('datePublished', makeDate(stripHTML(value), self.dateformat))
|
|
||||||
self.story.setMetadata('dateUpdated', makeDate(stripHTML(value), self.dateformat))
|
|
||||||
|
|
||||||
if 'Updated' in label:
|
|
||||||
self.story.setMetadata('dateUpdated', makeDate(stripHTML(value), self.dateformat))
|
|
||||||
|
|
||||||
if 'Completed' in label:
|
|
||||||
self.story.setMetadata('dateUpdated', makeDate(stripHTML(value), self.dateformat))
|
|
||||||
|
|
||||||
|
|
||||||
# Find Series name from series URL.
|
|
||||||
ddseries = metasoup.find('dd',{'class':"series"})
|
|
||||||
|
|
||||||
if ddseries:
|
|
||||||
for i, a in enumerate(ddseries.findAll('a', href=re.compile(r"/series/\d+"))):
|
|
||||||
series_name = stripHTML(a)
|
|
||||||
series_url = 'https://'+self.host+a['href']
|
|
||||||
series_index = int(stripHTML(a.previousSibling).replace(', ','').split(' ')[1]) # "Part # of" or ", Part #"
|
|
||||||
self.story.setMetadata('series%02d'%i,"%s [%s]"%(series_name,series_index))
|
|
||||||
self.story.setMetadata('series%02dUrl'%i,series_url)
|
|
||||||
if i == 0:
|
|
||||||
self.setSeries(series_name, series_index)
|
|
||||||
self.story.setMetadata('seriesUrl',series_url)
|
|
||||||
|
|
||||||
def hookForUpdates(self,chaptercount):
|
|
||||||
if self.newestChapterNum and self.oldchapters and len(self.oldchapters) > self.newestChapterNum:
|
|
||||||
logger.info("Existing epub has %s chapters\nNewest chapter is %s. Discarding old chapters from there on."%(len(self.oldchapters), self.newestChapterNum+1))
|
|
||||||
self.oldchapters = self.oldchapters[:self.newestChapterNum]
|
|
||||||
return len(self.oldchapters)
|
|
||||||
|
|
||||||
## Normalize chapter URLs because a) site has changed from http to
|
|
||||||
## https and b) in case of title change. That way updates to
|
|
||||||
## existing stories don't re-download all chapters.
|
|
||||||
def normalize_chapterurl(self,url):
|
|
||||||
url = re.sub(r"https?://("+self.getSiteDomain()+r"/works/\d+/chapters/\d+)(\?view_adult=true)?$",
|
|
||||||
r"https://\1",url)
|
|
||||||
return url
|
|
||||||
|
|
||||||
# grab the text for an individual chapter.
|
|
||||||
def getChapterTextNum(self, url, index):
|
|
||||||
## FYI: Chapter urls used to include ?view_adult=true in each
|
|
||||||
## one. With cookiejar being passed now, that's not
|
|
||||||
## necessary. However, there is a corner case with plugin--If
|
|
||||||
## a user-required story is attempted after gathering metadata
|
|
||||||
## for one that needs adult, but not user AND the user doesn't
|
|
||||||
## enter a valid user, the is_adult cookie from before can be
|
|
||||||
## lost.
|
|
||||||
logger.debug('Getting chapter text for: %s index: %s' % (url,index))
|
|
||||||
|
|
||||||
save_chapter_soup = self.make_soup('<div class="story"></div>')
|
|
||||||
## use the div because the full soup will also have <html><body>.
|
|
||||||
## need save_chapter_soup for .new_tag()
|
|
||||||
save_chapter=save_chapter_soup.find('div')
|
|
||||||
|
|
||||||
whole_dl_soup = chapter_dl_soup = None
|
|
||||||
|
|
||||||
if self.use_full_work_soup and self.getConfig("use_view_full_work",True) and self.num_chapters() > 1:
|
|
||||||
logger.debug("USE view_full_work")
|
|
||||||
## Assumed view_adult=true was cookied during metadata
|
|
||||||
if not self.full_work_soup:
|
|
||||||
self.full_work_soup = self.make_soup(self.get_request(self.url+"?view_full_work=true"+self.addurl.replace('?','&')))
|
|
||||||
## AO3 has had several cases now where chapter numbers
|
|
||||||
## are missing, breaking the link between
|
|
||||||
## <div id=chapter-##> and Chapter ##.
|
|
||||||
## But they should all still be there and in the right
|
|
||||||
## order, so array[index]
|
|
||||||
self.full_work_chapters = self.full_work_soup.find_all('div',{'id':re.compile(r'chapter-\d+')})
|
|
||||||
if len(self.full_work_chapters) != self.num_chapters():
|
|
||||||
## sanity check just in case.
|
|
||||||
self.use_full_work_soup = False
|
|
||||||
self.full_work_soup = None
|
|
||||||
logger.warning("chapter count in view_full_work(%s) disagrees with num of chapters(%s)--ending use_view_full_work"%(len(self.full_work_chapters),self.num_chapters()))
|
|
||||||
whole_dl_soup = self.full_work_soup
|
|
||||||
|
|
||||||
if whole_dl_soup:
|
|
||||||
chapter_dl_soup = self.full_work_chapters[index]
|
|
||||||
else:
|
|
||||||
whole_dl_soup = chapter_dl_soup = self.make_soup(self.get_request(url+self.addurl))
|
|
||||||
if None == chapter_dl_soup:
|
|
||||||
raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url)
|
|
||||||
|
|
||||||
exclude_notes=self.getConfigList('exclude_notes')
|
|
||||||
|
|
||||||
def append_tag(elem,tag,string=None,classes=None):
|
|
||||||
'''bs4 requires tags be added separately.'''
|
|
||||||
new_tag = save_chapter_soup.new_tag(tag)
|
|
||||||
if string:
|
|
||||||
new_tag.string=string
|
|
||||||
if classes:
|
|
||||||
new_tag['class']=[classes]
|
|
||||||
elem.append(new_tag)
|
|
||||||
return new_tag
|
|
||||||
|
|
||||||
## These are the over-all work's 'Notes at the beginning'.
|
|
||||||
## They only appear on the first chapter in individual chapter
|
|
||||||
## pages and before chapter-1 div. Appending removes
|
|
||||||
## headnotes from whole_dl_soup, so be sure to only do it on
|
|
||||||
## the first chapter.
|
|
||||||
head_notes_div = append_tag(save_chapter,'div',classes="fff_chapter_notes fff_head_notes")
|
|
||||||
if 'authorheadnotes' not in exclude_notes and index == 0:
|
|
||||||
headnotes = whole_dl_soup.find('div', {'class' : "preface group"}).find('div', {'class' : "notes module"})
|
|
||||||
if headnotes != None:
|
|
||||||
## Also include ul class='associations'.
|
|
||||||
ulassoc = headnotes.find('ul', {'class' : "associations"})
|
|
||||||
headnotes = headnotes.find('blockquote', {'class' : "userstuff"})
|
|
||||||
if headnotes != None or ulassoc != None:
|
|
||||||
append_tag(head_notes_div,'b',"Author's Note:")
|
|
||||||
if ulassoc != None:
|
|
||||||
# fix relative links--all examples so far have been.
|
|
||||||
for alink in ulassoc.find_all('a'):
|
|
||||||
if 'http' not in alink['href']:
|
|
||||||
alink['href']='https://' + self.getSiteDomain() + alink['href']
|
|
||||||
head_notes_div.append(ulassoc)
|
|
||||||
if headnotes != None:
|
|
||||||
head_notes_div.append(headnotes)
|
|
||||||
|
|
||||||
## Can appear on every chapter
|
|
||||||
if 'chaptersummary' not in exclude_notes:
|
|
||||||
chapsumm = chapter_dl_soup.find('div', {'id' : "summary"})
|
|
||||||
if chapsumm != None:
|
|
||||||
chapsumm = chapsumm.find('blockquote')
|
|
||||||
append_tag(head_notes_div,'b',"Summary for the Chapter:")
|
|
||||||
head_notes_div.append(chapsumm)
|
|
||||||
|
|
||||||
## Can appear on every chapter
|
|
||||||
if 'chapterheadnotes' not in exclude_notes:
|
|
||||||
chapnotes = chapter_dl_soup.find('div', {'id' : "notes"})
|
|
||||||
if chapnotes != None:
|
|
||||||
chapnotes = chapnotes.find('blockquote')
|
|
||||||
if chapnotes != None:
|
|
||||||
append_tag(head_notes_div,'b',"Notes for the Chapter:")
|
|
||||||
head_notes_div.append(chapnotes)
|
|
||||||
|
|
||||||
text = chapter_dl_soup.find('div', {'class' : "userstuff module"})
|
|
||||||
chtext = text.find('h3', {'class' : "landmark heading"})
|
|
||||||
if chtext:
|
|
||||||
chtext.extract()
|
|
||||||
save_chapter.append(text)
|
|
||||||
|
|
||||||
foot_notes_div = append_tag(save_chapter,'div',classes="fff_chapter_notes fff_foot_notes")
|
|
||||||
## Can appear on every chapter
|
|
||||||
if 'chapterfootnotes' not in exclude_notes:
|
|
||||||
chapfoot = chapter_dl_soup.find('div', {'class' : "end notes module", 'role' : "complementary"})
|
|
||||||
if chapfoot != None:
|
|
||||||
chapfoot = chapfoot.find('blockquote')
|
|
||||||
append_tag(foot_notes_div,'b',"Notes for the Chapter:")
|
|
||||||
foot_notes_div.append(chapfoot)
|
|
||||||
|
|
||||||
skip_on_update_tags = []
|
|
||||||
## These are the over-all work's 'Notes at the end'.
|
|
||||||
## They only appear on the last chapter in individual chapter
|
|
||||||
## pages and after chapter-# div. Appending removes
|
|
||||||
## headnotes from whole_dl_soup, so be sure to only do it on
|
|
||||||
## the last chapter.
|
|
||||||
if 'authorfootnotes' not in exclude_notes and index+1 == self.num_chapters():
|
|
||||||
footnotes = whole_dl_soup.find('div', {'id' : "work_endnotes"})
|
|
||||||
if footnotes != None:
|
|
||||||
footnotes = footnotes.find('blockquote')
|
|
||||||
if footnotes:
|
|
||||||
b = append_tag(foot_notes_div,'b',"Author's Note:")
|
|
||||||
skip_on_update_tags.append(b)
|
|
||||||
skip_on_update_tags.append(footnotes)
|
|
||||||
foot_notes_div.append(footnotes)
|
|
||||||
|
|
||||||
## It looks like 'Inspired by' links now all appear in the ul
|
|
||||||
## class=associations tag in authorheadnotes. This code is
|
|
||||||
## left in case I'm wrong and there are still stories with div
|
|
||||||
## id=children inspired links at the end.
|
|
||||||
if 'inspiredlinks' not in exclude_notes and index+1 == self.num_chapters():
|
|
||||||
inspiredlinks = whole_dl_soup.find('div', {'id' : "children"})
|
|
||||||
if inspiredlinks != None:
|
|
||||||
if inspiredlinks:
|
|
||||||
inspiredlinks.find('h3').name='b' # don't want a big h3 at the end.
|
|
||||||
# fix relative links--all examples so far have been.
|
|
||||||
for alink in inspiredlinks.find_all('a'):
|
|
||||||
if 'http' not in alink['href']:
|
|
||||||
alink['href']='https://' + self.getSiteDomain() + alink['href']
|
|
||||||
skip_on_update_tags.append(inspiredlinks)
|
|
||||||
foot_notes_div.append(inspiredlinks)
|
|
||||||
|
|
||||||
## remove empty head/food notes div(s)
|
|
||||||
if not head_notes_div.find(True):
|
|
||||||
head_notes_div.extract()
|
|
||||||
if not foot_notes_div.find(True):
|
|
||||||
foot_notes_div.extract()
|
|
||||||
## AO3 story end notes end up in the 'last' chapter, but if
|
|
||||||
## updated, then there's a new 'last' chapter. This option
|
|
||||||
## applies the 'skip_on_ffdl_update' class to those tags which
|
|
||||||
## means they will be removed during epub reading for update.
|
|
||||||
## Results: only the last chapter will have end notes.
|
|
||||||
## Side-effect: An 'Update Always' that doesn't add a new
|
|
||||||
## lasts chapter will remove the end notes.
|
|
||||||
if self.getConfig("remove_authorfootnotes_on_update"):
|
|
||||||
for skip_tag in skip_on_update_tags:
|
|
||||||
if skip_tag.has_attr('class'):
|
|
||||||
skip_tag['class'].append('skip_on_ffdl_update')
|
|
||||||
else:
|
|
||||||
skip_tag['class']=['skip_on_ffdl_update']
|
|
||||||
# logger.debug(skip_tag)
|
|
||||||
|
|
||||||
return self.utf8FromSoup(url,save_chapter)
|
|
||||||
|
|
||||||
def before_get_urls_from_page(self,url,normalize):
|
|
||||||
# special stuff to log into archiveofourown.org, if possible.
|
|
||||||
# Unlike most that show the links to 'adult' stories, but protect
|
|
||||||
# them, AO3 doesn't even show them if not logged in. Only works
|
|
||||||
# with saved user/pass--not going to prompt for list.
|
|
||||||
if self.getConfig("username"):
|
|
||||||
if self.getConfig("is_adult"):
|
|
||||||
if '?' in url:
|
|
||||||
addurl = "&view_adult=true"
|
|
||||||
else:
|
|
||||||
addurl = "?view_adult=true"
|
|
||||||
else:
|
|
||||||
addurl=""
|
|
||||||
# just to get an authenticity_token.
|
|
||||||
data = self.get_request(url+addurl)
|
|
||||||
# login the session.
|
|
||||||
self.performLogin(url,data)
|
|
||||||
# get the list page with logged in session.
|
|
||||||
|
|
||||||
def get_series_from_page(self,url,data,normalize=False):
|
|
||||||
'''
|
|
||||||
This method is to make it easier for adapters to detect a
|
|
||||||
series URL, pick out the series metadata and list of storyUrls
|
|
||||||
to return without needing to override get_urls_from_page
|
|
||||||
entirely.
|
|
||||||
'''
|
|
||||||
## easiest way to get all the weird URL possibilities and stay
|
|
||||||
## up to date with future changes.
|
|
||||||
m = re.match(self.getSiteURLPattern().replace('/works/','/series/'),url)
|
|
||||||
if m:
|
|
||||||
soup = self.make_soup(data)
|
|
||||||
retval = {}
|
|
||||||
retval['urllist']=[ 'https://'+self.host+a['href'] for a in soup.select('h4.heading a:first-child') ]
|
|
||||||
retval['name']=stripHTML(soup.select_one("h2.heading"))
|
|
||||||
desc=soup.select_one("div.wrapper dd blockquote.userstuff")
|
|
||||||
if desc:
|
|
||||||
desc.name='div' # change blockquote to div to match stories.
|
|
||||||
retval['desc']=desc
|
|
||||||
stats=stripHTML(soup.select_one("dl.series dl.stats"))
|
|
||||||
if 'Complete:Yes' in stats:
|
|
||||||
retval['status'] = "Completed"
|
|
||||||
elif 'Complete:No' in stats:
|
|
||||||
retval['status'] = "In-Progress"
|
|
||||||
return retval
|
|
||||||
## return dict with at least {'urllist':['storyUrl','storyUrl',...]}
|
|
||||||
## optionally 'name' and 'desc'?
|
|
||||||
return {}
|
|
||||||
|
|
|
||||||
|
|
@ -1,174 +0,0 @@
|
||||||
# -*- coding: utf-8 -*-
|
|
||||||
|
|
||||||
# Copyright 2011 Fanficdownloader team, 2018 FanFicFare team
|
|
||||||
#
|
|
||||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
# you may not use this file except in compliance with the License.
|
|
||||||
# You may obtain a copy of the License at
|
|
||||||
#
|
|
||||||
# http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
#
|
|
||||||
# Unless required by applicable law or agreed to in writing, software
|
|
||||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
# See the License for the specific language governing permissions and
|
|
||||||
# limitations under the License.
|
|
||||||
#
|
|
||||||
|
|
||||||
from __future__ import absolute_import
|
|
||||||
import logging
|
|
||||||
logger = logging.getLogger(__name__)
|
|
||||||
from ..htmlcleanup import stripHTML
|
|
||||||
from .. import exceptions as exceptions
|
|
||||||
|
|
||||||
# py2 vs py3 transition
|
|
||||||
|
|
||||||
from .base_adapter import BaseSiteAdapter, makeDate
|
|
||||||
|
|
||||||
|
|
||||||
def getClass():
|
|
||||||
return ArchiveSkyeHawkeComAdapter
|
|
||||||
|
|
||||||
# Class name has to be unique. Our convention is camel case the
|
|
||||||
# sitename with Adapter at the end. www is skipped.
|
|
||||||
class ArchiveSkyeHawkeComAdapter(BaseSiteAdapter):
|
|
||||||
|
|
||||||
def __init__(self, config, url):
|
|
||||||
BaseSiteAdapter.__init__(self, config, url)
|
|
||||||
|
|
||||||
self.username = "NoneGiven" # if left empty, site doesn't return any message at all.
|
|
||||||
self.password = ""
|
|
||||||
self.is_adult=False
|
|
||||||
|
|
||||||
# get storyId from url--url validation guarantees query is only sid=1234
|
|
||||||
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
|
|
||||||
|
|
||||||
|
|
||||||
# normalized story URL.
|
|
||||||
self._setURL('http://' + self.getSiteDomain() + '/story.php?no='+self.story.getMetadata('storyId'))
|
|
||||||
|
|
||||||
# Each adapter needs to have a unique site abbreviation.
|
|
||||||
self.story.setMetadata('siteabbrev','ash')
|
|
||||||
|
|
||||||
# The date format will vary from site to site.
|
|
||||||
# http://docs.python.org/library/datetime.html#strftime-strptime-behavior
|
|
||||||
self.dateformat = "%Y-%m-%d"
|
|
||||||
|
|
||||||
@staticmethod # must be @staticmethod, don't remove it.
|
|
||||||
def getSiteDomain():
|
|
||||||
# The site domain. Does have www here, if it uses it.
|
|
||||||
return 'archive.skyehawke.com'
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def getAcceptDomains(cls):
|
|
||||||
return ['archive.skyehawke.com','www.skyehawke.com']
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def getSiteExampleURLs(cls):
|
|
||||||
return "http://archive.skyehawke.com/story.php?no=1234 http://www.skyehawke.com/archive/story.php?no=1234 http://skyehawke.com/archive/story.php?no=1234"
|
|
||||||
|
|
||||||
def getSiteURLPattern(self):
|
|
||||||
return r"https?://(archive|www)\.skyehawke\.com/(archive/)?story\.php\?no=\d+$"
|
|
||||||
|
|
||||||
## Getting the chapter list and the meta data, plus 'is adult' checking.
|
|
||||||
def extractChapterUrlsAndMetadata(self):
|
|
||||||
|
|
||||||
url = self.url
|
|
||||||
logger.debug("URL: "+url)
|
|
||||||
|
|
||||||
data = self.get_request(url)
|
|
||||||
|
|
||||||
soup = self.make_soup(data)
|
|
||||||
# print data
|
|
||||||
|
|
||||||
|
|
||||||
## Title
|
|
||||||
a = soup.find('div', {'class':"story border"}).find('span',{'class':'left'})
|
|
||||||
title=stripHTML(a).split('"')[1]
|
|
||||||
self.story.setMetadata('title',title)
|
|
||||||
|
|
||||||
# Find authorid and URL from... author url.
|
|
||||||
author = a.find('a')
|
|
||||||
self.story.setMetadata('authorId',author['href'].split('=')[1])
|
|
||||||
self.story.setMetadata('authorUrl','http://'+self.host+'/'+author['href'])
|
|
||||||
self.story.setMetadata('author',author.string)
|
|
||||||
|
|
||||||
authorSoup = self.make_soup(self.get_request(self.story.getMetadata('authorUrl')))
|
|
||||||
|
|
||||||
chapter=soup.find('select',{'name':'chapter'}).findAll('option')
|
|
||||||
|
|
||||||
for i in range(1,len(chapter)):
|
|
||||||
ch=chapter[i]
|
|
||||||
self.add_chapter(ch,ch['value'])
|
|
||||||
|
|
||||||
|
|
||||||
# eFiction sites don't help us out a lot with their meta data
|
|
||||||
# formating, so it's a little ugly.
|
|
||||||
|
|
||||||
box=soup.find('div', {'class': "container borderridge"})
|
|
||||||
sum=box.find('span').text
|
|
||||||
self.setDescription(url,sum)
|
|
||||||
|
|
||||||
boxes=soup.findAll('div', {'class': "container bordersolid"})
|
|
||||||
for box in boxes:
|
|
||||||
if box.find('b') != None and box.find('b').text == "History and Story Information":
|
|
||||||
|
|
||||||
for b in box.findAll('b'):
|
|
||||||
if "words" in b.nextSibling:
|
|
||||||
self.story.setMetadata('numWords', b.text)
|
|
||||||
if "archived" in b.previousSibling:
|
|
||||||
self.story.setMetadata('datePublished', makeDate(stripHTML(b.text), self.dateformat))
|
|
||||||
if "updated" in b.previousSibling:
|
|
||||||
self.story.setMetadata('dateUpdated', makeDate(stripHTML(b.text), self.dateformat))
|
|
||||||
if "fandom" in b.nextSibling:
|
|
||||||
self.story.addToList('category', b.text)
|
|
||||||
|
|
||||||
for br in box.findAll('br'):
|
|
||||||
br.replaceWith('split')
|
|
||||||
genre=box.text.split("Genre:")[1].split("split")[0]
|
|
||||||
if not "Unspecified" in genre:
|
|
||||||
self.story.addToList('genre',genre)
|
|
||||||
|
|
||||||
|
|
||||||
if box.find('span') != None and box.find('span').text == "WARNING":
|
|
||||||
|
|
||||||
rating=box.findAll('span')[1]
|
|
||||||
rating.find('br').replaceWith('split')
|
|
||||||
rating=rating.text.replace("This story is rated",'').split('split')[0]
|
|
||||||
self.story.setMetadata('rating',rating)
|
|
||||||
logger.debug(self.story.getMetadata('rating'))
|
|
||||||
|
|
||||||
warnings=box.find('ol')
|
|
||||||
if warnings != None:
|
|
||||||
warnings=warnings.text.replace(']', '').replace('[', '').split(' ')
|
|
||||||
for warning in warnings:
|
|
||||||
self.story.addToList('warnings',warning)
|
|
||||||
|
|
||||||
|
|
||||||
for asoup in authorSoup.findAll('div', {'class':"story bordersolid"}):
|
|
||||||
if asoup.find('a')['href'] == 'story.php?no='+self.story.getMetadata('storyId'):
|
|
||||||
if '[ Completed ]' in asoup.text:
|
|
||||||
self.story.setMetadata('status', 'Completed')
|
|
||||||
else:
|
|
||||||
self.story.setMetadata('status', 'In-Progress')
|
|
||||||
chars=asoup.findNext('div').text.split('Characters')[1].split(']')[0]
|
|
||||||
for char in chars.split(','):
|
|
||||||
if not "None" in char:
|
|
||||||
self.story.addToList('characters',char)
|
|
||||||
break
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# grab the text for an individual chapter.
|
|
||||||
def getChapterText(self, url):
|
|
||||||
|
|
||||||
logger.debug('Getting chapter text from: %s' % url)
|
|
||||||
|
|
||||||
soup = self.make_soup(self.get_request(url))
|
|
||||||
|
|
||||||
div = soup.find('div',{'class':"chapter bordersolid"}).findNext('div').findNext('div')
|
|
||||||
|
|
||||||
if None == div:
|
|
||||||
raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url)
|
|
||||||
|
|
||||||
return self.utf8FromSoup(url,div)
|
|
||||||
|
|
@ -79,7 +79,7 @@ class ASexStoriesComAdapter(BaseSiteAdapter):
|
||||||
data1 = self.get_request(self.url)
|
data1 = self.get_request(self.url)
|
||||||
soup1 = self.make_soup(data1)
|
soup1 = self.make_soup(data1)
|
||||||
#strip comments from soup
|
#strip comments from soup
|
||||||
[comment.extract() for comment in soup1.find_all(text=lambda text:isinstance(text, Comment))]
|
[comment.extract() for comment in soup1.find_all(string=lambda text:isinstance(text, Comment))]
|
||||||
|
|
||||||
if 'Page Not Found.' in data1:
|
if 'Page Not Found.' in data1:
|
||||||
raise exceptions.StoryDoesNotExist(self.url)
|
raise exceptions.StoryDoesNotExist(self.url)
|
||||||
|
|
@ -92,7 +92,7 @@ class ASexStoriesComAdapter(BaseSiteAdapter):
|
||||||
self.story.setMetadata('title', title.string)
|
self.story.setMetadata('title', title.string)
|
||||||
|
|
||||||
# Author
|
# Author
|
||||||
author = soup1.find('div',{'class':'story-info'}).findAll('div',{'class':'story-info-bl'})[1].find('a')
|
author = soup1.find('div',{'class':'story-info'}).find_all('div',{'class':'story-info-bl'})[1].find('a')
|
||||||
authorurl = author['href']
|
authorurl = author['href']
|
||||||
self.story.setMetadata('author', author.string)
|
self.story.setMetadata('author', author.string)
|
||||||
self.story.setMetadata('authorUrl', authorurl)
|
self.story.setMetadata('authorUrl', authorurl)
|
||||||
|
|
@ -112,7 +112,7 @@ class ASexStoriesComAdapter(BaseSiteAdapter):
|
||||||
### add it before the rest of the pages, if any
|
### add it before the rest of the pages, if any
|
||||||
self.add_chapter('1', self.url)
|
self.add_chapter('1', self.url)
|
||||||
|
|
||||||
chapterTable = soup1.find('div',{'class':'pages'}).findAll('a')
|
chapterTable = soup1.find('div',{'class':'pages'}).find_all('a')
|
||||||
|
|
||||||
if chapterTable is not None:
|
if chapterTable is not None:
|
||||||
# Multi-chapter story
|
# Multi-chapter story
|
||||||
|
|
@ -124,7 +124,7 @@ class ASexStoriesComAdapter(BaseSiteAdapter):
|
||||||
self.add_chapter(chapterTitle, chapterUrl)
|
self.add_chapter(chapterTitle, chapterUrl)
|
||||||
|
|
||||||
|
|
||||||
rated = soup1.find('div',{'class':'story-info'}).findAll('div',{'class':'story-info-bl5'})[0].find('img')['title'].replace('- Rate','').strip()
|
rated = soup1.find('div',{'class':'story-info'}).find_all('div',{'class':'story-info-bl5'})[0].find('img')['title'].replace('- Rate','').strip()
|
||||||
self.story.setMetadata('rating',rated)
|
self.story.setMetadata('rating',rated)
|
||||||
|
|
||||||
self.story.setMetadata('dateUpdated', makeDate('01/01/2001', '%m/%d/%Y'))
|
self.story.setMetadata('dateUpdated', makeDate('01/01/2001', '%m/%d/%Y'))
|
||||||
|
|
|
||||||
|
|
@ -48,7 +48,7 @@ class AshwinderSycophantHexComAdapter(BaseSiteAdapter):
|
||||||
|
|
||||||
|
|
||||||
# normalized story URL.
|
# normalized story URL.
|
||||||
self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId'))
|
self._setURL('https://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId'))
|
||||||
|
|
||||||
# Each adapter needs to have a unique site abbreviation.
|
# Each adapter needs to have a unique site abbreviation.
|
||||||
self.story.setMetadata('siteabbrev','asph')
|
self.story.setMetadata('siteabbrev','asph')
|
||||||
|
|
@ -64,10 +64,10 @@ class AshwinderSycophantHexComAdapter(BaseSiteAdapter):
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def getSiteExampleURLs(cls):
|
def getSiteExampleURLs(cls):
|
||||||
return "http://"+cls.getSiteDomain()+"/viewstory.php?sid=1234"
|
return "https://"+cls.getSiteDomain()+"/viewstory.php?sid=1234"
|
||||||
|
|
||||||
def getSiteURLPattern(self):
|
def getSiteURLPattern(self):
|
||||||
return re.escape("http://"+self.getSiteDomain()+"/viewstory.php?sid=")+r"\d+$"
|
return r"https?://"+re.escape(self.getSiteDomain()+"/viewstory.php?sid=")+r"\d+$"
|
||||||
|
|
||||||
## Login seems to be reasonably standard across eFiction sites.
|
## Login seems to be reasonably standard across eFiction sites.
|
||||||
def needToLoginCheck(self, data):
|
def needToLoginCheck(self, data):
|
||||||
|
|
@ -92,7 +92,7 @@ class AshwinderSycophantHexComAdapter(BaseSiteAdapter):
|
||||||
params['intent'] = ''
|
params['intent'] = ''
|
||||||
params['submit'] = 'Submit'
|
params['submit'] = 'Submit'
|
||||||
|
|
||||||
loginUrl = 'http://' + self.getSiteDomain() + '/user.php'
|
loginUrl = 'https://' + self.getSiteDomain() + '/user.php'
|
||||||
logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
|
logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
|
||||||
params['penname']))
|
params['penname']))
|
||||||
|
|
||||||
|
|
@ -130,20 +130,20 @@ class AshwinderSycophantHexComAdapter(BaseSiteAdapter):
|
||||||
# Find authorid and URL from... author url.
|
# Find authorid and URL from... author url.
|
||||||
a = soup.find('a', href=re.compile(r"viewuser.php\?uid=\d+"))
|
a = soup.find('a', href=re.compile(r"viewuser.php\?uid=\d+"))
|
||||||
self.story.setMetadata('authorId',a['href'].split('=')[1])
|
self.story.setMetadata('authorId',a['href'].split('=')[1])
|
||||||
self.story.setMetadata('authorUrl','http://'+self.host+'/'+a['href'])
|
self.story.setMetadata('authorUrl','https://'+self.host+'/'+a['href'])
|
||||||
self.story.setMetadata('author',a.string)
|
self.story.setMetadata('author',a.string)
|
||||||
asoup = self.make_soup(self.get_request(self.story.getMetadata('authorUrl')))
|
asoup = self.make_soup(self.get_request(self.story.getMetadata('authorUrl')))
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# in case link points somewhere other than the first chapter
|
# in case link points somewhere other than the first chapter
|
||||||
a = soup.findAll('option')[1]['value']
|
a = soup.find_all('option')[1]['value']
|
||||||
self.story.setMetadata('storyId',a.split('=',)[1])
|
self.story.setMetadata('storyId',a.split('=',)[1])
|
||||||
url = 'http://'+self.host+'/'+a
|
url = 'https://'+self.host+'/'+a
|
||||||
soup = self.make_soup(self.get_request(url))
|
soup = self.make_soup(self.get_request(url))
|
||||||
except:
|
except:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
for info in asoup.findAll('table', {'width' : '100%', 'bordercolor' : re.compile(r'#')}):
|
for info in asoup.find_all('table', {'width' : '100%', 'bordercolor' : re.compile(r'#')}):
|
||||||
a = info.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$"))
|
a = info.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$"))
|
||||||
if a != None:
|
if a != None:
|
||||||
self.story.setMetadata('title',stripHTML(a))
|
self.story.setMetadata('title',stripHTML(a))
|
||||||
|
|
@ -151,13 +151,13 @@ class AshwinderSycophantHexComAdapter(BaseSiteAdapter):
|
||||||
|
|
||||||
|
|
||||||
# Find the chapters:
|
# Find the chapters:
|
||||||
chapters=soup.findAll('a', href=re.compile(r'viewstory.php\?sid=\d+&i=1$'))
|
chapters=soup.find_all('a', href=re.compile(r'viewstory.php\?sid=\d+&i=1$'))
|
||||||
if len(chapters) == 0:
|
if len(chapters) == 0:
|
||||||
self.add_chapter(self.story.getMetadata('title'),url)
|
self.add_chapter(self.story.getMetadata('title'),url)
|
||||||
else:
|
else:
|
||||||
for chapter in chapters:
|
for chapter in chapters:
|
||||||
# just in case there's tags, like <i> in chapter titles.
|
# just in case there's tags, like <i> in chapter titles.
|
||||||
self.add_chapter(chapter,'http://'+self.host+'/'+chapter['href'])
|
self.add_chapter(chapter,'https://'+self.host+'/'+chapter['href'])
|
||||||
|
|
||||||
|
|
||||||
# eFiction sites don't help us out a lot with their meta data
|
# eFiction sites don't help us out a lot with their meta data
|
||||||
|
|
@ -170,7 +170,7 @@ class AshwinderSycophantHexComAdapter(BaseSiteAdapter):
|
||||||
except:
|
except:
|
||||||
return ""
|
return ""
|
||||||
|
|
||||||
cats = info.findAll('a',href=re.compile('categories.php'))
|
cats = info.find_all('a',href=re.compile('categories.php'))
|
||||||
for cat in cats:
|
for cat in cats:
|
||||||
self.story.addToList('category',cat.string)
|
self.story.addToList('category',cat.string)
|
||||||
|
|
||||||
|
|
@ -188,7 +188,7 @@ class AshwinderSycophantHexComAdapter(BaseSiteAdapter):
|
||||||
## <td><span class="sb"><b>Published:</b> 04/08/2007</td>
|
## <td><span class="sb"><b>Published:</b> 04/08/2007</td>
|
||||||
|
|
||||||
## one story had <b>Updated...</b> in the description. Restrict to sub-table
|
## one story had <b>Updated...</b> in the description. Restrict to sub-table
|
||||||
labels = info.find('table').findAll('b')
|
labels = info.find('table').find_all('b')
|
||||||
for labelspan in labels:
|
for labelspan in labels:
|
||||||
value = labelspan.nextSibling
|
value = labelspan.nextSibling
|
||||||
label = stripHTML(labelspan)
|
label = stripHTML(labelspan)
|
||||||
|
|
|
||||||
|
|
@ -111,11 +111,17 @@ class AsianFanFicsComAdapter(BaseSiteAdapter):
|
||||||
def doExtractChapterUrlsAndMetadata(self,get_cover=True):
|
def doExtractChapterUrlsAndMetadata(self,get_cover=True):
|
||||||
url = self.url
|
url = self.url
|
||||||
logger.info("url: "+url)
|
logger.info("url: "+url)
|
||||||
data = self.get_request(url)
|
soup = None
|
||||||
|
try:
|
||||||
|
data = self.get_request(url)
|
||||||
|
soup = self.make_soup(data)
|
||||||
|
except exceptions.HTTPErrorFFF as e:
|
||||||
|
if e.status_code != 404:
|
||||||
|
raise
|
||||||
|
data = self.decode_data(e.data)
|
||||||
|
|
||||||
soup = self.make_soup(data)
|
# logger.debug(data)
|
||||||
|
if not soup or self.loginNeededCheck(data):
|
||||||
if self.loginNeededCheck(data):
|
|
||||||
# always login if not already to avoid lots of headaches
|
# always login if not already to avoid lots of headaches
|
||||||
self.performLogin(url,data)
|
self.performLogin(url,data)
|
||||||
# refresh website after logging in
|
# refresh website after logging in
|
||||||
|
|
@ -140,8 +146,8 @@ class AsianFanFicsComAdapter(BaseSiteAdapter):
|
||||||
|
|
||||||
# Find authorid and URL from... author url.
|
# Find authorid and URL from... author url.
|
||||||
mainmeta = soup.find('footer', {'class': 'main-meta'})
|
mainmeta = soup.find('footer', {'class': 'main-meta'})
|
||||||
alist = mainmeta.find('span', text='Author(s)')
|
alist = mainmeta.find('span', string='Author(s)')
|
||||||
alist = alist.parent.findAll('a', href=re.compile(r"/profile/view/\d+"))
|
alist = alist.parent.find_all('a', href=re.compile(r"/profile/u/[^/]+"))
|
||||||
for a in alist:
|
for a in alist:
|
||||||
self.story.addToList('authorId',a['href'].split('/')[-1])
|
self.story.addToList('authorId',a['href'].split('/')[-1])
|
||||||
self.story.addToList('authorUrl','https://'+self.host+a['href'])
|
self.story.addToList('authorUrl','https://'+self.host+a['href'])
|
||||||
|
|
@ -153,10 +159,10 @@ class AsianFanFicsComAdapter(BaseSiteAdapter):
|
||||||
chapters=soup.find('select',{'name':'chapter-nav'})
|
chapters=soup.find('select',{'name':'chapter-nav'})
|
||||||
hrefattr=None
|
hrefattr=None
|
||||||
if chapters:
|
if chapters:
|
||||||
chapters=chapters.findAll('option')
|
chapters=chapters.find_all('option')
|
||||||
hrefattr='value'
|
hrefattr='value'
|
||||||
else: # didn't find <select name='chapter-nav', look for alternative
|
else: # didn't find <select name='chapter-nav', look for alternative
|
||||||
chapters=soup.find('div',{'class':'widget--chapters'}).findAll('a')
|
chapters=soup.find('div',{'class':'widget--chapters'}).find_all('a')
|
||||||
hrefattr='href'
|
hrefattr='href'
|
||||||
for index, chapter in enumerate(chapters):
|
for index, chapter in enumerate(chapters):
|
||||||
if chapter.text != 'Foreword' and 'Collapse chapters' not in chapter.text:
|
if chapter.text != 'Foreword' and 'Collapse chapters' not in chapter.text:
|
||||||
|
|
@ -165,9 +171,9 @@ class AsianFanFicsComAdapter(BaseSiteAdapter):
|
||||||
|
|
||||||
|
|
||||||
# find timestamp
|
# find timestamp
|
||||||
a = soup.find('span', text='Updated')
|
a = soup.find('span', string='Updated')
|
||||||
if a == None:
|
if a == None:
|
||||||
a = soup.find('span', text='Published') # use published date if work was never updated
|
a = soup.find('span', string='Published') # use published date if work was never updated
|
||||||
a = a.parent.find('time')
|
a = a.parent.find('time')
|
||||||
chapterDate = makeDate(a['datetime'],self.dateformat)
|
chapterDate = makeDate(a['datetime'],self.dateformat)
|
||||||
if newestChapter == None or chapterDate > newestChapter:
|
if newestChapter == None or chapterDate > newestChapter:
|
||||||
|
|
@ -175,7 +181,7 @@ class AsianFanFicsComAdapter(BaseSiteAdapter):
|
||||||
self.newestChapterNum = index
|
self.newestChapterNum = index
|
||||||
|
|
||||||
# story status
|
# story status
|
||||||
a = mainmeta.find('span', text='Completed')
|
a = mainmeta.find('span', string='Completed')
|
||||||
if a:
|
if a:
|
||||||
self.story.setMetadata('status', 'Completed')
|
self.story.setMetadata('status', 'Completed')
|
||||||
else:
|
else:
|
||||||
|
|
@ -194,37 +200,37 @@ class AsianFanFicsComAdapter(BaseSiteAdapter):
|
||||||
self.setDescription(url,a)
|
self.setDescription(url,a)
|
||||||
|
|
||||||
# story tags
|
# story tags
|
||||||
a = mainmeta.find('span',text='Tags')
|
a = mainmeta.find('span',string='Tags')
|
||||||
if a:
|
if a:
|
||||||
tags = a.parent.findAll('a')
|
tags = a.parent.find_all('a')
|
||||||
for tag in tags:
|
for tag in tags:
|
||||||
self.story.addToList('tags', tag.text)
|
self.story.addToList('tags', tag.text)
|
||||||
|
|
||||||
# story tags
|
# story tags
|
||||||
a = mainmeta.find('span',text='Characters')
|
a = mainmeta.find('span',string='Characters')
|
||||||
if a:
|
if a:
|
||||||
self.story.addToList('characters', a.nextSibling)
|
self.story.addToList('characters', a.nextSibling)
|
||||||
|
|
||||||
# published on
|
# published on
|
||||||
a = soup.find('span', text='Published')
|
a = soup.find('span', string='Published')
|
||||||
a = a.parent.find('time')
|
a = a.parent.find('time')
|
||||||
self.story.setMetadata('datePublished', makeDate(a['datetime'], self.dateformat))
|
self.story.setMetadata('datePublished', makeDate(a['datetime'], self.dateformat))
|
||||||
|
|
||||||
# updated on
|
# updated on
|
||||||
a = soup.find('span', text='Updated')
|
a = soup.find('span', string='Updated')
|
||||||
if a:
|
if a:
|
||||||
a = a.parent.find('time')
|
a = a.parent.find('time')
|
||||||
self.story.setMetadata('dateUpdated', makeDate(a['datetime'], self.dateformat))
|
self.story.setMetadata('dateUpdated', makeDate(a['datetime'], self.dateformat))
|
||||||
|
|
||||||
# word count
|
# word count
|
||||||
a = soup.find('span', text='Total Word Count')
|
a = soup.find('span', string='Total Word Count')
|
||||||
if a:
|
if a:
|
||||||
a = a.find_next('span')
|
a = a.find_next('span')
|
||||||
self.story.setMetadata('numWords', int(a.text.split()[0]))
|
self.story.setMetadata('numWords', int(a.text.split()[0]))
|
||||||
|
|
||||||
# upvote, subs, and views
|
# upvote, subs, and views
|
||||||
a = soup.find('div',{'class':'title-meta'})
|
a = soup.find('div',{'class':'title-meta'})
|
||||||
spans = a.findAll('span', recursive=False)
|
spans = a.find_all('span', recursive=False)
|
||||||
self.story.setMetadata('upvotes', re.search(r'\(([^)]+)', spans[0].find('span').text).group(1))
|
self.story.setMetadata('upvotes', re.search(r'\(([^)]+)', spans[0].find('span').text).group(1))
|
||||||
self.story.setMetadata('subscribers', re.search(r'\(([^)]+)', spans[1].find('span').text).group(1))
|
self.story.setMetadata('subscribers', re.search(r'\(([^)]+)', spans[1].find('span').text).group(1))
|
||||||
if len(spans) > 2: # views can be private
|
if len(spans) > 2: # views can be private
|
||||||
|
|
@ -246,24 +252,39 @@ class AsianFanFicsComAdapter(BaseSiteAdapter):
|
||||||
|
|
||||||
data = self.get_request(url)
|
data = self.get_request(url)
|
||||||
soup = self.make_soup(data)
|
soup = self.make_soup(data)
|
||||||
|
# logger.debug(data)
|
||||||
|
|
||||||
try:
|
ageform = soup.select_one('form[action="/account/toggle_age"]')
|
||||||
# <script>var postApi = "https://www.asianfanfics.com/api/chapters/4791923/chapter_46d32e413d1a702a26f7637eabbfb6f3.json";</script>
|
# logger.debug(ageform)
|
||||||
jsonlink = soup.find('script',string=re.compile(r'/api/chapters/[0-9]+/chapter_[0-9a-z]+.json')).get_text().split('"')[1] # grabs url from quotation marks
|
if ageform and (self.is_adult or self.getConfig("is_adult")):
|
||||||
chap_json = json.loads(self.get_request(jsonlink))
|
params = {}
|
||||||
content = self.make_soup(chap_json['post']).find('body') # BS4 adds <html><body> if not present.
|
params['is_of_age']=ageform.select_one('input#is_of_age')['value']
|
||||||
content.name='div' # change body to a div.
|
params['current_url']=ageform.select_one('input#current_url')['value']
|
||||||
if self.getConfig('inject_chapter_title'):
|
params['csrf_aff_token']=ageform.select_one('input[name="csrf_aff_token"]')['value']
|
||||||
# the dumbest workaround ever for the abbreviated chapter titles from before
|
loginUrl = 'https://' + self.getSiteDomain() + '/account/mark_over_18'
|
||||||
logger.debug("Injecting full-length chapter title")
|
logger.info("Will now toggle age to URL (%s)" % (loginUrl))
|
||||||
newTitle = soup.find('h1', {'id' : 'chapter-title'}).text
|
# logger.debug(params)
|
||||||
newTitle = self.make_soup('<h3>%s</h3>' % (newTitle)).find('body') # BS4 adds <html><body> if not present.
|
data = self.post_request(loginUrl, params)
|
||||||
newTitle.name='div' # change body to a div.
|
soup = self.make_soup(data)
|
||||||
newTitle.append(content)
|
# logger.debug(data)
|
||||||
return self.utf8FromSoup(url,newTitle)
|
|
||||||
else:
|
content = soup.find('div', {'id': 'user-submitted-body'})
|
||||||
return self.utf8FromSoup(url,content)
|
|
||||||
except Exception as e:
|
if self.getConfig('inject_chapter_image'):
|
||||||
logger.debug("json lookup failed, going on with HTML chapter")
|
logger.debug("Injecting chapter image")
|
||||||
content = soup.find('div', {'id': 'user-submitted-body'})
|
imgdiv = soup.select_one('div#bodyText div.bot-spacer')
|
||||||
return self.utf8FromSoup(url,content)
|
if imgdiv:
|
||||||
|
content.insert(0, "\n")
|
||||||
|
content.insert(0, imgdiv)
|
||||||
|
content.insert(0, "\n")
|
||||||
|
|
||||||
|
if self.getConfig('inject_chapter_title'):
|
||||||
|
logger.debug("Injecting full-length chapter title")
|
||||||
|
title = soup.find('h1', {'id' : 'chapter-title'}).text
|
||||||
|
newTitle = soup.new_tag('h3')
|
||||||
|
newTitle.string = title
|
||||||
|
content.insert(0, "\n")
|
||||||
|
content.insert(0, newTitle)
|
||||||
|
content.insert(0, "\n")
|
||||||
|
|
||||||
|
return self.utf8FromSoup(url,content)
|
||||||
|
|
|
||||||
|
|
@ -126,7 +126,7 @@ class BDSMLibraryComSiteAdapter(BaseSiteAdapter):
|
||||||
|
|
||||||
# Find the chapters:
|
# Find the chapters:
|
||||||
# The update date is with the chapter links... so we will update it here as well
|
# The update date is with the chapter links... so we will update it here as well
|
||||||
for chapter in soup.findAll('a', href=re.compile(r'/stories/chapter.php\?storyid='+self.story.getMetadata('storyId')+r"&chapterid=\d+$")):
|
for chapter in soup.find_all('a', href=re.compile(r'/stories/chapter.php\?storyid='+self.story.getMetadata('storyId')+r"&chapterid=\d+$")):
|
||||||
value = chapter.findNext('td').findNext('td').string.replace('(added on','').replace(')','').strip()
|
value = chapter.findNext('td').findNext('td').string.replace('(added on','').replace(')','').strip()
|
||||||
self.story.setMetadata('dateUpdated', makeDate(value, self.dateformat))
|
self.story.setMetadata('dateUpdated', makeDate(value, self.dateformat))
|
||||||
self.add_chapter(chapter,'https://'+self.getSiteDomain()+chapter['href'])
|
self.add_chapter(chapter,'https://'+self.getSiteDomain()+chapter['href'])
|
||||||
|
|
@ -134,11 +134,11 @@ class BDSMLibraryComSiteAdapter(BaseSiteAdapter):
|
||||||
|
|
||||||
# Get the MetaData
|
# Get the MetaData
|
||||||
# Erotia Tags
|
# Erotia Tags
|
||||||
tags = soup.findAll('a',href=re.compile(r'/stories/search.php\?selectedcode'))
|
tags = soup.find_all('a',href=re.compile(r'/stories/search.php\?selectedcode'))
|
||||||
for tag in tags:
|
for tag in tags:
|
||||||
self.story.addToList('eroticatags',tag.text)
|
self.story.addToList('eroticatags',tag.text)
|
||||||
|
|
||||||
for td in soup.findAll('td'):
|
for td in soup.find_all('td'):
|
||||||
if len(td.text)>0:
|
if len(td.text)>0:
|
||||||
if 'Added on:' in td.text and '<table' not in unicode(td):
|
if 'Added on:' in td.text and '<table' not in unicode(td):
|
||||||
value = td.text.replace('Added on:','').strip()
|
value = td.text.replace('Added on:','').strip()
|
||||||
|
|
@ -169,20 +169,20 @@ class BDSMLibraryComSiteAdapter(BaseSiteAdapter):
|
||||||
raise exceptions.FailedToDownload("Error downloading Chapter: {0}! Missing required element!".format(url))
|
raise exceptions.FailedToDownload("Error downloading Chapter: {0}! Missing required element!".format(url))
|
||||||
|
|
||||||
#strip comments from soup
|
#strip comments from soup
|
||||||
[comment.extract() for comment in chaptertag.findAll(text=lambda text:isinstance(text, Comment))]
|
[comment.extract() for comment in chaptertag.find_all(string=lambda text:isinstance(text, Comment))]
|
||||||
|
|
||||||
# BDSM Library basically wraps it's own html around the document,
|
# BDSM Library basically wraps it's own html around the document,
|
||||||
# so we will be removing the script, title and meta content from the
|
# so we will be removing the script, title and meta content from the
|
||||||
# storyblock
|
# storyblock
|
||||||
for tag in chaptertag.findAll('head') + chaptertag.findAll('style') + chaptertag.findAll('title') + chaptertag.findAll('meta') + chaptertag.findAll('o:p') + chaptertag.findAll('link'):
|
for tag in chaptertag.find_all('head') + chaptertag.find_all('style') + chaptertag.find_all('title') + chaptertag.find_all('meta') + chaptertag.find_all('o:p') + chaptertag.find_all('link'):
|
||||||
tag.extract()
|
tag.extract()
|
||||||
|
|
||||||
for tag in chaptertag.findAll('o:smarttagtype'):
|
for tag in chaptertag.find_all('o:smarttagtype'):
|
||||||
tag.name = 'span'
|
tag.name = 'span'
|
||||||
|
|
||||||
## I'm going to take the attributes off all of the tags
|
## I'm going to take the attributes off all of the tags
|
||||||
## because they usually refer to the style that we removed above.
|
## because they usually refer to the style that we removed above.
|
||||||
for tag in chaptertag.findAll(True):
|
for tag in chaptertag.find_all(True):
|
||||||
tag.attrs = None
|
tag.attrs = None
|
||||||
|
|
||||||
return self.utf8FromSoup(url,chaptertag)
|
return self.utf8FromSoup(url,chaptertag)
|
||||||
|
|
|
||||||
|
|
@ -117,7 +117,7 @@ class BloodshedverseComAdapter(BaseSiteAdapter):
|
||||||
|
|
||||||
summary_div = list_box.find('div', {'class': 'list_summary'})
|
summary_div = list_box.find('div', {'class': 'list_summary'})
|
||||||
if not self.getConfig('keep_summary_html'):
|
if not self.getConfig('keep_summary_html'):
|
||||||
summary = ''.join(summary_div(text=True))
|
summary = ''.join(summary_div(string=True))
|
||||||
else:
|
else:
|
||||||
summary = self.utf8FromSoup(author_url, summary_div)
|
summary = self.utf8FromSoup(author_url, summary_div)
|
||||||
|
|
||||||
|
|
@ -157,9 +157,6 @@ class BloodshedverseComAdapter(BaseSiteAdapter):
|
||||||
|
|
||||||
self.story.addToList('warnings', warning)
|
self.story.addToList('warnings', warning)
|
||||||
|
|
||||||
elif key == 'Chapters':
|
|
||||||
self.story.setMetadata('numChapters', int(value))
|
|
||||||
|
|
||||||
elif key == 'Words':
|
elif key == 'Words':
|
||||||
# Apparently only numChapters need to be an integer for
|
# Apparently only numChapters need to be an integer for
|
||||||
# some strange reason. Remove possible ',' characters as to
|
# some strange reason. Remove possible ',' characters as to
|
||||||
|
|
@ -174,7 +171,7 @@ class BloodshedverseComAdapter(BaseSiteAdapter):
|
||||||
# ugly %p(am/pm) hack moved into makeDate so other sites can use it.
|
# ugly %p(am/pm) hack moved into makeDate so other sites can use it.
|
||||||
self.story.setMetadata('dateUpdated', date)
|
self.story.setMetadata('dateUpdated', date)
|
||||||
|
|
||||||
if self.story.getMetadata('rating') == 'NC-17' and not (self.is_adult or self.getConfig('is_adult')):
|
if self.story.getMetadataRaw('rating') == 'NC-17' and not (self.is_adult or self.getConfig('is_adult')):
|
||||||
raise exceptions.AdultCheckRequired(self.url)
|
raise exceptions.AdultCheckRequired(self.url)
|
||||||
|
|
||||||
def getChapterText(self, url):
|
def getChapterText(self, url):
|
||||||
|
|
|
||||||
|
|
@ -1,310 +0,0 @@
|
||||||
# -*- coding: utf-8 -*-
|
|
||||||
|
|
||||||
# Copyright 2011 Fanficdownloader team, 2018 FanFicFare team
|
|
||||||
#
|
|
||||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
# you may not use this file except in compliance with the License.
|
|
||||||
# You may obtain a copy of the License at
|
|
||||||
#
|
|
||||||
# http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
#
|
|
||||||
# Unless required by applicable law or agreed to in writing, software
|
|
||||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
# See the License for the specific language governing permissions and
|
|
||||||
# limitations under the License.
|
|
||||||
#
|
|
||||||
|
|
||||||
# Software: eFiction
|
|
||||||
from __future__ import absolute_import
|
|
||||||
import logging
|
|
||||||
logger = logging.getLogger(__name__)
|
|
||||||
import re
|
|
||||||
from bs4.element import Tag
|
|
||||||
from ..htmlcleanup import stripHTML
|
|
||||||
from .. import exceptions as exceptions
|
|
||||||
|
|
||||||
# py2 vs py3 transition
|
|
||||||
from ..six import text_type as unicode
|
|
||||||
|
|
||||||
from .base_adapter import BaseSiteAdapter, makeDate
|
|
||||||
|
|
||||||
# By virtue of being recent and requiring both is_adult and user/pass,
|
|
||||||
# adapter_fanficcastletvnet.py is the best choice for learning to
|
|
||||||
# write adapters--especially for sites that use the eFiction system.
|
|
||||||
# Most sites that have ".../viewstory.php?sid=123" in the story URL
|
|
||||||
# are eFiction.
|
|
||||||
|
|
||||||
# For non-eFiction sites, it can be considerably more complex, but
|
|
||||||
# this is still a good starting point.
|
|
||||||
|
|
||||||
# In general an 'adapter' needs to do these five things:
|
|
||||||
|
|
||||||
# - 'Register' correctly with the downloader
|
|
||||||
# - Site Login (if needed)
|
|
||||||
# - 'Are you adult?' check (if needed--some do one, some the other, some both)
|
|
||||||
# - Grab the chapter list
|
|
||||||
# - Grab the story meta-data (some (non-eFiction) adapters have to get it from the author page)
|
|
||||||
# - Grab the chapter texts
|
|
||||||
|
|
||||||
# Search for XXX comments--that's where things are most likely to need changing.
|
|
||||||
|
|
||||||
# This function is called by the downloader in all adapter_*.py files
|
|
||||||
# in this dir to register the adapter class. So it needs to be
|
|
||||||
# updated to reflect the class below it. That, plus getSiteDomain()
|
|
||||||
# take care of 'Registering'.
|
|
||||||
def getClass():
|
|
||||||
return BloodTiesFansComAdapter # XXX
|
|
||||||
|
|
||||||
# Class name has to be unique. Our convention is camel case the
|
|
||||||
# sitename with Adapter at the end. www is skipped.
|
|
||||||
class BloodTiesFansComAdapter(BaseSiteAdapter): # XXX
|
|
||||||
|
|
||||||
def __init__(self, config, url):
|
|
||||||
BaseSiteAdapter.__init__(self, config, url)
|
|
||||||
|
|
||||||
self.is_adult=False
|
|
||||||
|
|
||||||
# get storyId from url--url validation guarantees query is only sid=1234
|
|
||||||
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
|
|
||||||
|
|
||||||
|
|
||||||
# normalized story URL.
|
|
||||||
# XXX Most sites don't have the /fanfic part. Replace all to remove it usually.
|
|
||||||
self._setURL('http://' + self.getSiteDomain() + '/fiction/viewstory.php?sid='+self.story.getMetadata('storyId'))
|
|
||||||
|
|
||||||
# Each adapter needs to have a unique site abbreviation.
|
|
||||||
self.story.setMetadata('siteabbrev','btf') # XXX
|
|
||||||
|
|
||||||
# The date format will vary from site to site.
|
|
||||||
# http://docs.python.org/library/datetime.html#strftime-strptime-behavior
|
|
||||||
self.dateformat = "%d %b %Y" # XXX
|
|
||||||
|
|
||||||
@staticmethod # must be @staticmethod, don't remove it.
|
|
||||||
def getSiteDomain():
|
|
||||||
# The site domain. Does have www here, if it uses it.
|
|
||||||
return 'bloodties-fans.com' # XXX
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def getSiteExampleURLs(cls):
|
|
||||||
return "http://"+cls.getSiteDomain()+"/fiction/viewstory.php?sid=1234"
|
|
||||||
|
|
||||||
def getSiteURLPattern(self):
|
|
||||||
return re.escape("http://"+self.getSiteDomain()+"/fiction/viewstory.php?sid=")+r"\d+$"
|
|
||||||
|
|
||||||
## Login seems to be reasonably standard across eFiction sites.
|
|
||||||
def needToLoginCheck(self, data):
|
|
||||||
if 'Registered Users Only' in data \
|
|
||||||
or 'There is no such account on our website' in data \
|
|
||||||
or "That password doesn't match the one in our database" in data:
|
|
||||||
return True
|
|
||||||
else:
|
|
||||||
return False
|
|
||||||
|
|
||||||
def performLogin(self, url):
|
|
||||||
params = {}
|
|
||||||
|
|
||||||
if self.password:
|
|
||||||
params['penname'] = self.username
|
|
||||||
params['password'] = self.password
|
|
||||||
else:
|
|
||||||
params['penname'] = self.getConfig("username")
|
|
||||||
params['password'] = self.getConfig("password")
|
|
||||||
params['cookiecheck'] = '1'
|
|
||||||
params['submit'] = 'Submit'
|
|
||||||
|
|
||||||
loginUrl = 'http://' + self.getSiteDomain() + '/fiction/user.php?action=login'
|
|
||||||
logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
|
|
||||||
params['penname']))
|
|
||||||
|
|
||||||
d = self.post_request(loginUrl, params)
|
|
||||||
|
|
||||||
if "Member Account" not in d : #Member Account
|
|
||||||
logger.info("Failed to login to URL %s as %s" % (loginUrl,
|
|
||||||
params['penname']))
|
|
||||||
raise exceptions.FailedToLogin(url,params['penname'])
|
|
||||||
return False
|
|
||||||
else:
|
|
||||||
return True
|
|
||||||
|
|
||||||
## Getting the chapter list and the meta data, plus 'is adult' checking.
|
|
||||||
def extractChapterUrlsAndMetadata(self):
|
|
||||||
|
|
||||||
if self.is_adult or self.getConfig("is_adult"):
|
|
||||||
# Weirdly, different sites use different warning numbers.
|
|
||||||
# If the title search below fails, there's a good chance
|
|
||||||
# you need a different number. print data at that point
|
|
||||||
# and see what the 'click here to continue' url says.
|
|
||||||
|
|
||||||
# Furthermore, there's a couple sites now with more than
|
|
||||||
# one warning level for different ratings. And they're
|
|
||||||
# fussy about it. midnightwhispers has three: 4, 2 & 1.
|
|
||||||
# we'll try 1 first.
|
|
||||||
addurl = "&ageconsent=ok&warning=4" # XXX
|
|
||||||
else:
|
|
||||||
addurl=""
|
|
||||||
|
|
||||||
# index=1 makes sure we see the story chapter index. Some
|
|
||||||
# sites skip that for one-chapter stories.
|
|
||||||
url = self.url+'&index=1'+addurl
|
|
||||||
logger.debug("URL: "+url)
|
|
||||||
|
|
||||||
data = self.get_request(url)
|
|
||||||
|
|
||||||
# The actual text that is used to announce you need to be an
|
|
||||||
# adult varies from site to site. Again, print data before
|
|
||||||
# the title search to troubleshoot.
|
|
||||||
|
|
||||||
# Since the warning text can change by warning level, let's
|
|
||||||
# look for the warning pass url. nfacommunity uses
|
|
||||||
# &warning= -- actually, so do other sites. Must be an
|
|
||||||
# eFiction book.
|
|
||||||
|
|
||||||
# viewstory.php?sid=561&warning=4
|
|
||||||
# viewstory.php?sid=561&warning=1
|
|
||||||
# viewstory.php?sid=561&warning=2
|
|
||||||
#print data
|
|
||||||
#m = re.search(r"'viewstory.php\?sid=1882(&warning=4)'",data)
|
|
||||||
m = re.search(r"'viewstory.php\?sid=\d+((?:&ageconsent=ok)?&warning=\d+)'",data)
|
|
||||||
if m != None:
|
|
||||||
if self.is_adult or self.getConfig("is_adult"):
|
|
||||||
# We tried the default and still got a warning, so
|
|
||||||
# let's pull the warning number from the 'continue'
|
|
||||||
# link and reload data.
|
|
||||||
addurl = m.group(1)
|
|
||||||
# correct stupid & error in url.
|
|
||||||
addurl = addurl.replace("&","&")
|
|
||||||
url = self.url+'&index=1'+addurl
|
|
||||||
logger.debug("URL 2nd try: "+url)
|
|
||||||
|
|
||||||
data = self.get_request(url)
|
|
||||||
else:
|
|
||||||
raise exceptions.AdultCheckRequired(self.url)
|
|
||||||
|
|
||||||
if "Access denied. This story has not been validated by the adminstrators of this site." in data:
|
|
||||||
raise exceptions.AccessDenied(self.getSiteDomain() +" says: Access denied. This story has not been validated by the adminstrators of this site.")
|
|
||||||
|
|
||||||
soup = self.make_soup(data)
|
|
||||||
# print data
|
|
||||||
|
|
||||||
|
|
||||||
## Title
|
|
||||||
a = soup.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$"))
|
|
||||||
self.story.setMetadata('title',stripHTML(a))
|
|
||||||
|
|
||||||
# Find authorid and URL from... author url.
|
|
||||||
a = soup.find('a', href=re.compile(r"viewuser.php\?uid=\d+"))
|
|
||||||
self.story.setMetadata('authorId',a['href'].split('=')[1])
|
|
||||||
self.story.setMetadata('authorUrl','http://'+self.host+'/fiction/'+a['href'])
|
|
||||||
self.story.setMetadata('author',a.string)
|
|
||||||
|
|
||||||
# Find the chapters:
|
|
||||||
for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+r"&chapter=\d+$")):
|
|
||||||
# just in case there's tags, like <i> in chapter titles.
|
|
||||||
self.add_chapter(chapter,'http://'+self.host+'/fiction/'+chapter['href']+addurl)
|
|
||||||
|
|
||||||
|
|
||||||
# eFiction sites don't help us out a lot with their meta data
|
|
||||||
# formating, so it's a little ugly.
|
|
||||||
|
|
||||||
# utility method
|
|
||||||
def defaultGetattr(d,k):
|
|
||||||
try:
|
|
||||||
return d[k]
|
|
||||||
except:
|
|
||||||
return ""
|
|
||||||
|
|
||||||
listbox = soup.find('div',{'class':'listbox'})
|
|
||||||
# <strong>Rating:</strong> M<br /> etc
|
|
||||||
labels = listbox.findAll('strong')
|
|
||||||
for labelspan in labels:
|
|
||||||
value = labelspan.nextSibling
|
|
||||||
label = labelspan.string
|
|
||||||
|
|
||||||
if 'Summary' in label:
|
|
||||||
## Everything until the next strong tag.
|
|
||||||
svalue = ""
|
|
||||||
while not isinstance(value,Tag) or value.name != 'strong':
|
|
||||||
svalue += unicode(value)
|
|
||||||
value = value.nextSibling
|
|
||||||
self.setDescription(url,svalue)
|
|
||||||
#self.story.setMetadata('description',stripHTML(svalue))
|
|
||||||
|
|
||||||
if 'Rating' in label:
|
|
||||||
self.story.setMetadata('rating', value)
|
|
||||||
|
|
||||||
if 'Words' in label:
|
|
||||||
value=re.sub(r"\|",r"",value)
|
|
||||||
self.story.setMetadata('numWords', value)
|
|
||||||
|
|
||||||
if 'Categories' in label:
|
|
||||||
cats = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=categories'))
|
|
||||||
catstext = [cat.string for cat in cats]
|
|
||||||
for cat in catstext:
|
|
||||||
self.story.addToList('category',cat.string)
|
|
||||||
|
|
||||||
if 'Characters' in label:
|
|
||||||
chars = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=characters'))
|
|
||||||
charstext = [char.string for char in chars]
|
|
||||||
for char in charstext:
|
|
||||||
self.story.addToList('characters',char.string)
|
|
||||||
|
|
||||||
if 'Completed' in label:
|
|
||||||
if 'Yes' in value:
|
|
||||||
self.story.setMetadata('status', 'Completed')
|
|
||||||
else:
|
|
||||||
self.story.setMetadata('status', 'In-Progress')
|
|
||||||
|
|
||||||
if 'Published' in label:
|
|
||||||
value=re.sub(r"\|",r"",value)
|
|
||||||
self.story.setMetadata('datePublished', makeDate(stripHTML(value), self.dateformat))
|
|
||||||
|
|
||||||
if 'Updated' in label:
|
|
||||||
value=re.sub(r"\|",r"",value)
|
|
||||||
self.story.setMetadata('dateUpdated', makeDate(stripHTML(value), self.dateformat))
|
|
||||||
|
|
||||||
# moved outside because they changed *most*, but not *all* labels to <strong>
|
|
||||||
ships = listbox.findAll('a',href=re.compile(r'browse.php.type=class&(amp;)?type_id=2')) # crappy html: & vs & in url.
|
|
||||||
shipstext = [ship.string for ship in ships]
|
|
||||||
for ship in shipstext:
|
|
||||||
self.story.addToList('ships',ship.string)
|
|
||||||
|
|
||||||
genres = listbox.findAll('a',href=re.compile(r'browse.php\?type=class&(amp;)?type_id=1')) # crappy html: & vs & in url.
|
|
||||||
genrestext = [genre.string for genre in genres]
|
|
||||||
for genre in genrestext:
|
|
||||||
self.story.addToList('genre',genre.string)
|
|
||||||
|
|
||||||
|
|
||||||
try:
|
|
||||||
# Find Series name from series URL.
|
|
||||||
a = soup.find('a', href=re.compile(r"viewseries.php\?seriesid=\d+"))
|
|
||||||
series_name = a.string
|
|
||||||
series_url = 'http://'+self.host+'/fiction/'+a['href']
|
|
||||||
|
|
||||||
seriessoup = self.make_soup(self.get_request(series_url))
|
|
||||||
storyas = seriessoup.findAll('a', href=re.compile(r'^viewstory.php\?sid=\d+$'))
|
|
||||||
i=1
|
|
||||||
for a in storyas:
|
|
||||||
if a['href'] == ('viewstory.php?sid='+self.story.getMetadata('storyId')):
|
|
||||||
self.setSeries(series_name, i)
|
|
||||||
self.story.setMetadata('seriesUrl',series_url)
|
|
||||||
break
|
|
||||||
i+=1
|
|
||||||
|
|
||||||
except:
|
|
||||||
# I find it hard to care if the series parsing fails
|
|
||||||
pass
|
|
||||||
|
|
||||||
# grab the text for an individual chapter.
|
|
||||||
def getChapterText(self, url):
|
|
||||||
|
|
||||||
logger.debug('Getting chapter text from: %s' % url)
|
|
||||||
|
|
||||||
soup = self.make_soup(self.get_request(url))
|
|
||||||
|
|
||||||
div = soup.find('div', {'id' : 'story'})
|
|
||||||
|
|
||||||
if None == div:
|
|
||||||
raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url)
|
|
||||||
|
|
||||||
return self.utf8FromSoup(url,div)
|
|
||||||
|
|
@ -1,279 +0,0 @@
|
||||||
# -*- coding: utf-8 -*-
|
|
||||||
|
|
||||||
# Copyright 2018 FanFicFare team
|
|
||||||
#
|
|
||||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
# you may not use this file except in compliance with the License.
|
|
||||||
# You may obtain a copy of the License at
|
|
||||||
#
|
|
||||||
# http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
#
|
|
||||||
# Unless required by applicable law or agreed to in writing, software
|
|
||||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
# See the License for the specific language governing permissions and
|
|
||||||
# limitations under the License.
|
|
||||||
#
|
|
||||||
|
|
||||||
# Software: eFiction
|
|
||||||
from __future__ import absolute_import
|
|
||||||
import logging
|
|
||||||
logger = logging.getLogger(__name__)
|
|
||||||
import re
|
|
||||||
from ..htmlcleanup import stripHTML
|
|
||||||
from .. import exceptions as exceptions
|
|
||||||
|
|
||||||
# py2 vs py3 transition
|
|
||||||
from ..six import text_type as unicode
|
|
||||||
|
|
||||||
from .base_adapter import BaseSiteAdapter, makeDate
|
|
||||||
|
|
||||||
def getClass():
|
|
||||||
return BuffyGilesComAdapter
|
|
||||||
|
|
||||||
# Class name has to be unique. Our convention is camel case the
|
|
||||||
# sitename with Adapter at the end. www is skipped.
|
|
||||||
class BuffyGilesComAdapter(BaseSiteAdapter):
|
|
||||||
|
|
||||||
def __init__(self, config, url):
|
|
||||||
BaseSiteAdapter.__init__(self, config, url)
|
|
||||||
|
|
||||||
self.username = "NoneGiven" # if left empty, site doesn't return any message at all.
|
|
||||||
self.password = ""
|
|
||||||
self.is_adult=False
|
|
||||||
|
|
||||||
# get storyId from url--url validation guarantees query is only sid=1234
|
|
||||||
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
|
|
||||||
|
|
||||||
|
|
||||||
# normalized story URL.
|
|
||||||
# XXX Most sites don't have the /efiction part. Replace all to remove it usually.
|
|
||||||
self._setURL('http://' + self.getSiteDomain() + '/efiction/viewstory.php?sid='+self.story.getMetadata('storyId'))
|
|
||||||
|
|
||||||
# Each adapter needs to have a unique site abbreviation.
|
|
||||||
self.story.setMetadata('siteabbrev','bufg')
|
|
||||||
|
|
||||||
# The date format will vary from site to site.
|
|
||||||
# http://docs.python.org/library/datetime.html#strftime-strptime-behavior
|
|
||||||
self.dateformat = "%d/%m/%y"
|
|
||||||
|
|
||||||
@staticmethod # must be @staticmethod, don't remove it.
|
|
||||||
def getSiteDomain():
|
|
||||||
# The site domain. Does have www here, if it uses it.
|
|
||||||
return 'buffygiles.velocitygrass.com'
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def getSiteExampleURLs(cls):
|
|
||||||
return "http://"+cls.getSiteDomain()+"/efiction/viewstory.php?sid=1234"
|
|
||||||
|
|
||||||
def getSiteURLPattern(self):
|
|
||||||
return re.escape("http://"+self.getSiteDomain()+"/efiction/viewstory.php?sid=")+r"\d+$"
|
|
||||||
|
|
||||||
## Login seems to be reasonably standard across eFiction sites.
|
|
||||||
def needToLoginCheck(self, data):
|
|
||||||
if 'Registered Users Only' in data \
|
|
||||||
or 'There is no such account on our website' in data \
|
|
||||||
or "That password doesn't match the one in our database" in data:
|
|
||||||
return True
|
|
||||||
else:
|
|
||||||
return False
|
|
||||||
|
|
||||||
def performLogin(self, url):
|
|
||||||
params = {}
|
|
||||||
|
|
||||||
if self.password:
|
|
||||||
params['penname'] = self.username
|
|
||||||
params['password'] = self.password
|
|
||||||
else:
|
|
||||||
params['penname'] = self.getConfig("username")
|
|
||||||
params['password'] = self.getConfig("password")
|
|
||||||
params['cookiecheck'] = '1'
|
|
||||||
params['submit'] = 'Submit'
|
|
||||||
|
|
||||||
loginUrl = 'http://' + self.getSiteDomain() + '/user.php?action=login'
|
|
||||||
logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
|
|
||||||
params['penname']))
|
|
||||||
|
|
||||||
d = self.post_request(loginUrl, params)
|
|
||||||
|
|
||||||
if "Member Account" not in d : #Member Account
|
|
||||||
logger.info("Failed to login to URL %s as %s" % (loginUrl,
|
|
||||||
params['penname']))
|
|
||||||
raise exceptions.FailedToLogin(url,params['penname'])
|
|
||||||
return False
|
|
||||||
else:
|
|
||||||
return True
|
|
||||||
|
|
||||||
## Getting the chapter list and the meta data, plus 'is adult' checking.
|
|
||||||
def extractChapterUrlsAndMetadata(self):
|
|
||||||
|
|
||||||
if self.is_adult or self.getConfig("is_adult"):
|
|
||||||
# Weirdly, different sites use different warning numbers.
|
|
||||||
# If the title search below fails, there's a good chance
|
|
||||||
# you need a different number. print data at that point
|
|
||||||
# and see what the 'click here to continue' url says.
|
|
||||||
addurl = "&warning=5"
|
|
||||||
else:
|
|
||||||
addurl=""
|
|
||||||
|
|
||||||
# index=1 makes sure we see the story chapter index. Some
|
|
||||||
# sites skip that for one-chapter stories.
|
|
||||||
url = self.url+'&index=1'+addurl
|
|
||||||
logger.debug("URL: "+url)
|
|
||||||
|
|
||||||
data = self.get_request(url)
|
|
||||||
|
|
||||||
if self.needToLoginCheck(data):
|
|
||||||
# need to log in for this one.
|
|
||||||
self.performLogin(url)
|
|
||||||
data = self.get_request(url)
|
|
||||||
|
|
||||||
# Since the warning text can change by warning level, let's
|
|
||||||
# look for the warning pass url. ksarchive uses
|
|
||||||
# &warning= -- actually, so do other sites. Must be an
|
|
||||||
# eFiction book.
|
|
||||||
|
|
||||||
# efiction/viewstory.php?sid=1882&warning=4
|
|
||||||
# efiction/viewstory.php?sid=1654&ageconsent=ok&warning=5
|
|
||||||
#print data
|
|
||||||
m = re.search(r"'efiction/viewstory.php\?sid=542(&warning=5)'",data)
|
|
||||||
m = re.search(r"'efiction/viewstory.php\?sid=\d+((?:&ageconsent=ok)?&warning=\d+)'",data)
|
|
||||||
if m != None:
|
|
||||||
if self.is_adult or self.getConfig("is_adult"):
|
|
||||||
# We tried the default and still got a warning, so
|
|
||||||
# let's pull the warning number from the 'continue'
|
|
||||||
# link and reload data.
|
|
||||||
addurl = m.group(1)
|
|
||||||
# correct stupid & error in url.
|
|
||||||
addurl = addurl.replace("&","&")
|
|
||||||
url = self.url+'&index=1'+addurl
|
|
||||||
logger.debug("URL 2nd try: "+url)
|
|
||||||
|
|
||||||
data = self.get_request(url)
|
|
||||||
else:
|
|
||||||
raise exceptions.AdultCheckRequired(self.url)
|
|
||||||
|
|
||||||
if "Access denied. This story has not been validated by the adminstrators of this site." in data:
|
|
||||||
raise exceptions.FailedToDownload(self.getSiteDomain() +" says: Access denied. This story has not been validated by the adminstrators of this site.")
|
|
||||||
|
|
||||||
soup = self.make_soup(data)
|
|
||||||
# print data
|
|
||||||
|
|
||||||
|
|
||||||
pagetitle = soup.find('div',{'id':'pagetitle'})
|
|
||||||
|
|
||||||
## Title
|
|
||||||
a = pagetitle.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$"))
|
|
||||||
|
|
||||||
self.story.setMetadata('title',stripHTML(a))
|
|
||||||
|
|
||||||
# Find authorid and URL from... author url.
|
|
||||||
a = pagetitle.find('a', href=re.compile(r"viewuser.php\?uid=\d+"))
|
|
||||||
self.story.setMetadata('authorId',a['href'].split('=')[1])
|
|
||||||
self.story.setMetadata('authorUrl','http://'+self.host+'/'+a['href'])
|
|
||||||
self.story.setMetadata('author',a.string)
|
|
||||||
|
|
||||||
# Find the chapters:
|
|
||||||
for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+r"&chapter=\d+$")):
|
|
||||||
# just in case there's tags, like <i> in chapter titles.
|
|
||||||
self.add_chapter(chapter,'http://'+self.host+'/efiction/'+chapter['href']+addurl)
|
|
||||||
|
|
||||||
|
|
||||||
# eFiction sites don't help us out a lot with their meta data
|
|
||||||
# formating, so it's a little ugly.
|
|
||||||
|
|
||||||
# utility method
|
|
||||||
def defaultGetattr(d,k):
|
|
||||||
try:
|
|
||||||
return d[k]
|
|
||||||
except:
|
|
||||||
return ""
|
|
||||||
|
|
||||||
# <span class="label">Rated:</span> NC-17<br /> etc
|
|
||||||
labels = soup.findAll('span',{'class':'label'})
|
|
||||||
for labelspan in labels:
|
|
||||||
value = labelspan.nextSibling
|
|
||||||
label = labelspan.string
|
|
||||||
|
|
||||||
if 'Summary' in label:
|
|
||||||
## Everything until the next span class='label'
|
|
||||||
svalue = ""
|
|
||||||
while 'label' not in defaultGetattr(value,'class'):
|
|
||||||
svalue += unicode(value)
|
|
||||||
value = value.nextSibling
|
|
||||||
self.setDescription(url,svalue)
|
|
||||||
#self.story.setMetadata('description',stripHTML(svalue))
|
|
||||||
|
|
||||||
if 'Rated' in label:
|
|
||||||
self.story.setMetadata('rating', value)
|
|
||||||
|
|
||||||
if 'Word count' in label:
|
|
||||||
self.story.setMetadata('numWords', value)
|
|
||||||
|
|
||||||
if 'Categories' in label:
|
|
||||||
cats = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=categories'))
|
|
||||||
for cat in cats:
|
|
||||||
self.story.addToList('category',cat.string)
|
|
||||||
|
|
||||||
if 'Characters' in label:
|
|
||||||
chars = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=characters'))
|
|
||||||
for char in chars:
|
|
||||||
self.story.addToList('characters',char.string)
|
|
||||||
|
|
||||||
if 'Genre' in label:
|
|
||||||
genres = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=1'))
|
|
||||||
for genre in genres:
|
|
||||||
self.story.addToList('genre',genre.string)
|
|
||||||
|
|
||||||
if 'Warnings' in label:
|
|
||||||
warnings = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=3'))
|
|
||||||
for warning in warnings:
|
|
||||||
self.story.addToList('warnings',warning.string)
|
|
||||||
|
|
||||||
if 'Completed' in label:
|
|
||||||
if 'Yes' in value:
|
|
||||||
self.story.setMetadata('status', 'Completed')
|
|
||||||
else:
|
|
||||||
self.story.setMetadata('status', 'In-Progress')
|
|
||||||
|
|
||||||
if 'Published' in label:
|
|
||||||
self.story.setMetadata('datePublished', makeDate(stripHTML(value), self.dateformat))
|
|
||||||
|
|
||||||
if 'Updated' in label:
|
|
||||||
# there's a stray [ at the end.
|
|
||||||
#value = value[0:-1]
|
|
||||||
self.story.setMetadata('dateUpdated', makeDate(stripHTML(value), self.dateformat))
|
|
||||||
|
|
||||||
try:
|
|
||||||
# Find Series name from series URL.
|
|
||||||
a = soup.find('a', href=re.compile(r"efiction/viewseries.php\?seriesid=\d+"))
|
|
||||||
series_name = a.string
|
|
||||||
series_url = 'http://'+self.host+'/'+a['href']
|
|
||||||
|
|
||||||
seriessoup = self.make_soup(self.get_request(series_url))
|
|
||||||
storyas = seriessoup.findAll('a', href=re.compile(r'^efiction/viewstory.php\?sid=\d+$'))
|
|
||||||
i=1
|
|
||||||
for a in storyas:
|
|
||||||
if a['href'] == ('efiction/viewstory.php?sid='+self.story.getMetadata('storyId')):
|
|
||||||
self.setSeries(series_name, i)
|
|
||||||
self.story.setMetadata('seriesUrl',series_url)
|
|
||||||
break
|
|
||||||
i+=1
|
|
||||||
|
|
||||||
except:
|
|
||||||
# I find it hard to care if the series parsing fails
|
|
||||||
pass
|
|
||||||
|
|
||||||
# grab the text for an individual chapter.
|
|
||||||
def getChapterText(self, url):
|
|
||||||
|
|
||||||
logger.debug('Getting chapter text from: %s' % url)
|
|
||||||
|
|
||||||
soup = self.make_soup(self.get_request(url))
|
|
||||||
|
|
||||||
div = soup.find('div', {'id' : 'story'})
|
|
||||||
|
|
||||||
if None == div:
|
|
||||||
raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url)
|
|
||||||
|
|
||||||
return self.utf8FromSoup(url,div)
|
|
||||||
|
|
@ -1,6 +1,6 @@
|
||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
# Copyright 2012 Fanficdownloader team, 2018 FanFicFare team
|
# Copyright 2024 FanFicFare team
|
||||||
#
|
#
|
||||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
# you may not use this file except in compliance with the License.
|
# you may not use this file except in compliance with the License.
|
||||||
|
|
@ -15,26 +15,24 @@
|
||||||
# limitations under the License.
|
# limitations under the License.
|
||||||
#
|
#
|
||||||
|
|
||||||
# Software: eFiction
|
|
||||||
from __future__ import absolute_import
|
from __future__ import absolute_import
|
||||||
from .base_efiction_adapter import BaseEfictionAdapter
|
import logging
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
# Class name has to be unique. Our convention is camel case the
|
from .base_otw_adapter import BaseOTWAdapter
|
||||||
# sitename with Adapter at the end. www is skipped.
|
|
||||||
class NHAMagicalWorldsUsAdapter(BaseEfictionAdapter):
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def getSiteDomain():
|
|
||||||
return 'nha.magical-worlds.us'
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def getSiteAbbrev(self):
|
|
||||||
return 'nha'
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def getDateFormat(self):
|
|
||||||
return "%d/%m/%y"
|
|
||||||
|
|
||||||
def getClass():
|
def getClass():
|
||||||
return NHAMagicalWorldsUsAdapter
|
return CFAAAdapter
|
||||||
|
|
||||||
|
class CFAAAdapter(BaseOTWAdapter):
|
||||||
|
|
||||||
|
def __init__(self, config, url):
|
||||||
|
BaseOTWAdapter.__init__(self, config, url)
|
||||||
|
|
||||||
|
# Each adapter needs to have a unique site abbreviation.
|
||||||
|
self.story.setMetadata('siteabbrev','cfaa')
|
||||||
|
|
||||||
|
@staticmethod # must be @staticmethod, don't remove it.
|
||||||
|
def getSiteDomain():
|
||||||
|
# The site domain. Does have www here, if it uses it.
|
||||||
|
return 'www.cfaarchive.org'
|
||||||
|
|
@ -116,7 +116,7 @@ class ChaosSycophantHexComAdapter(BaseSiteAdapter):
|
||||||
self.story.setMetadata('rating', rating)
|
self.story.setMetadata('rating', rating)
|
||||||
|
|
||||||
# Find the chapters:
|
# Find the chapters:
|
||||||
for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+r"&chapter=\d+$")):
|
for chapter in soup.find_all('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+r"&chapter=\d+$")):
|
||||||
# just in case there's tags, like <i> in chapter titles.
|
# just in case there's tags, like <i> in chapter titles.
|
||||||
self.add_chapter(chapter,'http://'+self.host+'/'+chapter['href']+addurl)
|
self.add_chapter(chapter,'http://'+self.host+'/'+chapter['href']+addurl)
|
||||||
|
|
||||||
|
|
@ -134,7 +134,7 @@ class ChaosSycophantHexComAdapter(BaseSiteAdapter):
|
||||||
|
|
||||||
# <span class="label">Rated:</span> NC-17<br /> etc
|
# <span class="label">Rated:</span> NC-17<br /> etc
|
||||||
|
|
||||||
labels = soup.findAll('span',{'class':'label'})
|
labels = soup.find_all('span',{'class':'label'})
|
||||||
|
|
||||||
value = labels[0].previousSibling
|
value = labels[0].previousSibling
|
||||||
svalue = ""
|
svalue = ""
|
||||||
|
|
@ -154,22 +154,22 @@ class ChaosSycophantHexComAdapter(BaseSiteAdapter):
|
||||||
self.story.setMetadata('numWords', value.split(' -')[0])
|
self.story.setMetadata('numWords', value.split(' -')[0])
|
||||||
|
|
||||||
if 'Categories' in label:
|
if 'Categories' in label:
|
||||||
cats = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=categories'))
|
cats = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=categories'))
|
||||||
for cat in cats:
|
for cat in cats:
|
||||||
self.story.addToList('category',cat.string)
|
self.story.addToList('category',cat.string)
|
||||||
|
|
||||||
if 'Characters' in label:
|
if 'Characters' in label:
|
||||||
chars = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=characters'))
|
chars = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=characters'))
|
||||||
for char in chars:
|
for char in chars:
|
||||||
self.story.addToList('characters',char.string)
|
self.story.addToList('characters',char.string)
|
||||||
|
|
||||||
if 'Genre' in label:
|
if 'Genre' in label:
|
||||||
genres = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=1'))
|
genres = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=class&type_id=1'))
|
||||||
for genre in genres:
|
for genre in genres:
|
||||||
self.story.addToList('genre',genre.string)
|
self.story.addToList('genre',genre.string)
|
||||||
|
|
||||||
if 'Warnings' in label:
|
if 'Warnings' in label:
|
||||||
warnings = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=2'))
|
warnings = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=class&type_id=2'))
|
||||||
for warning in warnings:
|
for warning in warnings:
|
||||||
self.story.addToList('warnings',warning.string)
|
self.story.addToList('warnings',warning.string)
|
||||||
|
|
||||||
|
|
@ -194,7 +194,7 @@ class ChaosSycophantHexComAdapter(BaseSiteAdapter):
|
||||||
series_url = 'http://'+self.host+'/'+a['href']
|
series_url = 'http://'+self.host+'/'+a['href']
|
||||||
|
|
||||||
seriessoup = self.make_soup(self.get_request(series_url))
|
seriessoup = self.make_soup(self.get_request(series_url))
|
||||||
storyas = seriessoup.findAll('a', href=re.compile(r'^viewstory.php\?sid=\d+$'))
|
storyas = seriessoup.find_all('a', href=re.compile(r'^viewstory.php\?sid=\d+$'))
|
||||||
i=1
|
i=1
|
||||||
for a in storyas:
|
for a in storyas:
|
||||||
if a['href'] == ('viewstory.php?sid='+self.story.getMetadata('storyId')):
|
if a['href'] == ('viewstory.php?sid='+self.story.getMetadata('storyId')):
|
||||||
|
|
|
||||||
|
|
@ -88,8 +88,8 @@ class ChireadsComSiteAdapter(BaseSiteAdapter):
|
||||||
intro = stripHTML(info.select_one('.inform-inform-txt').span)
|
intro = stripHTML(info.select_one('.inform-inform-txt').span)
|
||||||
self.setDescription(self.url, intro)
|
self.setDescription(self.url, intro)
|
||||||
|
|
||||||
for content in soup.findAll('div', {'id': 'content'}):
|
for content in soup.find_all('div', {'id': 'content'}):
|
||||||
for a in content.findAll('a'):
|
for a in content.find_all('a'):
|
||||||
self.add_chapter(a.get_text(), a['href'])
|
self.add_chapter(a.get_text(), a['href'])
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -49,7 +49,7 @@ class ChosenTwoFanFicArchiveAdapter(BaseSiteAdapter):
|
||||||
|
|
||||||
|
|
||||||
# normalized story URL.
|
# normalized story URL.
|
||||||
self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId'))
|
self._setURL('https://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId'))
|
||||||
|
|
||||||
# Each adapter needs to have a unique site abbreviation.
|
# Each adapter needs to have a unique site abbreviation.
|
||||||
self.story.setMetadata('siteabbrev','chosen2')
|
self.story.setMetadata('siteabbrev','chosen2')
|
||||||
|
|
@ -65,10 +65,10 @@ class ChosenTwoFanFicArchiveAdapter(BaseSiteAdapter):
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def getSiteExampleURLs(cls):
|
def getSiteExampleURLs(cls):
|
||||||
return "http://"+cls.getSiteDomain()+"/viewstory.php?sid=1234"
|
return "https://"+cls.getSiteDomain()+"/viewstory.php?sid=1234"
|
||||||
|
|
||||||
def getSiteURLPattern(self):
|
def getSiteURLPattern(self):
|
||||||
return re.escape("http://"+self.getSiteDomain()+"/viewstory.php?sid=")+r"\d+$"
|
return r"https?"+re.escape("://"+self.getSiteDomain()+"/viewstory.php?sid=")+r"\d+$"
|
||||||
|
|
||||||
## Getting the chapter list and the meta data, plus 'is adult' checking.
|
## Getting the chapter list and the meta data, plus 'is adult' checking.
|
||||||
def extractChapterUrlsAndMetadata(self):
|
def extractChapterUrlsAndMetadata(self):
|
||||||
|
|
@ -98,7 +98,7 @@ class ChosenTwoFanFicArchiveAdapter(BaseSiteAdapter):
|
||||||
## Title
|
## Title
|
||||||
## Some stories have a banner that has it's own a tag before the actual text title...
|
## Some stories have a banner that has it's own a tag before the actual text title...
|
||||||
## so I'm checking the pagetitle div for all a tags that match the criteria, then taking the last.
|
## so I'm checking the pagetitle div for all a tags that match the criteria, then taking the last.
|
||||||
a = soup.find('div',{'id':'pagetitle'}).findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$"))[-1]
|
a = soup.find('div',{'id':'pagetitle'}).find_all('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$"))[-1]
|
||||||
self.story.setMetadata('title',stripHTML(a))
|
self.story.setMetadata('title',stripHTML(a))
|
||||||
|
|
||||||
# Find authorid and URL from... author url.
|
# Find authorid and URL from... author url.
|
||||||
|
|
@ -106,14 +106,14 @@ class ChosenTwoFanFicArchiveAdapter(BaseSiteAdapter):
|
||||||
# so I'm checking the pagetitle div for this as well
|
# so I'm checking the pagetitle div for this as well
|
||||||
a = soup.find('div',{'id':'pagetitle'}).find('a', href=re.compile(r"viewuser.php\?uid=\d+"))
|
a = soup.find('div',{'id':'pagetitle'}).find('a', href=re.compile(r"viewuser.php\?uid=\d+"))
|
||||||
self.story.setMetadata('authorId',a['href'].split('=')[1])
|
self.story.setMetadata('authorId',a['href'].split('=')[1])
|
||||||
self.story.setMetadata('authorUrl','http://'+self.host+'/'+a['href'])
|
self.story.setMetadata('authorUrl','https://'+self.host+'/'+a['href'])
|
||||||
self.story.setMetadata('author',a.string)
|
self.story.setMetadata('author',a.string)
|
||||||
|
|
||||||
# Find the chapters:
|
# Find the chapters:
|
||||||
for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+r"&chapter=\d+$")):
|
for chapter in soup.find_all('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+r"&chapter=\d+$")):
|
||||||
# just in case there's tags, like <i> in chapter titles.
|
# just in case there's tags, like <i> in chapter titles.
|
||||||
#self.add_chapter(chapter,'http://'+self.host+'/'+chapter['href'])
|
#self.add_chapter(chapter,'http://'+self.host+'/'+chapter['href'])
|
||||||
self.add_chapter(chapter,'http://{0}/{1}{2}'.format(self.host, chapter['href'],addURL))
|
self.add_chapter(chapter,'https://{0}/{1}{2}'.format(self.host, chapter['href'],addURL))
|
||||||
|
|
||||||
|
|
||||||
# eFiction sites don't help us out a lot with their meta data
|
# eFiction sites don't help us out a lot with their meta data
|
||||||
|
|
@ -127,7 +127,7 @@ class ChosenTwoFanFicArchiveAdapter(BaseSiteAdapter):
|
||||||
return ""
|
return ""
|
||||||
|
|
||||||
# <span class="label">Rated:</span> NC-17<br /> etc
|
# <span class="label">Rated:</span> NC-17<br /> etc
|
||||||
labels = soup.findAll('span',{'class':'label'})
|
labels = soup.find_all('span',{'class':'label'})
|
||||||
for labelspan in labels:
|
for labelspan in labels:
|
||||||
val = labelspan.nextSibling
|
val = labelspan.nextSibling
|
||||||
value = unicode('')
|
value = unicode('')
|
||||||
|
|
@ -149,27 +149,27 @@ class ChosenTwoFanFicArchiveAdapter(BaseSiteAdapter):
|
||||||
self.story.setMetadata('numWords', stripHTML(value))
|
self.story.setMetadata('numWords', stripHTML(value))
|
||||||
|
|
||||||
if 'Categories' in label:
|
if 'Categories' in label:
|
||||||
cats = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=categories'))
|
cats = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=categories'))
|
||||||
for cat in cats:
|
for cat in cats:
|
||||||
self.story.addToList('category',cat.string)
|
self.story.addToList('category',cat.string)
|
||||||
|
|
||||||
if 'Characters' in label:
|
if 'Characters' in label:
|
||||||
chars = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=characters'))
|
chars = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=characters'))
|
||||||
for char in chars:
|
for char in chars:
|
||||||
self.story.addToList('characters',char.string)
|
self.story.addToList('characters',char.string)
|
||||||
|
|
||||||
if 'Genre' in label:
|
if 'Genre' in label:
|
||||||
genres = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=1')) # XXX
|
genres = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=class&type_id=1')) # XXX
|
||||||
for genre in genres:
|
for genre in genres:
|
||||||
self.story.addToList('genre',genre.string)
|
self.story.addToList('genre',genre.string)
|
||||||
|
|
||||||
if 'Pairing' in label:
|
if 'Pairing' in label:
|
||||||
ships = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=4'))
|
ships = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=class&type_id=4'))
|
||||||
for ship in ships:
|
for ship in ships:
|
||||||
self.story.addToList('ships',ship.string)
|
self.story.addToList('ships',ship.string)
|
||||||
|
|
||||||
if 'Warnings' in label:
|
if 'Warnings' in label:
|
||||||
warnings = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=2')) # XXX
|
warnings = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=class&type_id=2')) # XXX
|
||||||
for warning in warnings:
|
for warning in warnings:
|
||||||
self.story.addToList('warnings',warning.string)
|
self.story.addToList('warnings',warning.string)
|
||||||
|
|
||||||
|
|
@ -192,16 +192,16 @@ class ChosenTwoFanFicArchiveAdapter(BaseSiteAdapter):
|
||||||
# Find Series name from series URL.
|
# Find Series name from series URL.
|
||||||
a = soup.find('a', href=re.compile(r"viewseries.php\?seriesid=\d+"))
|
a = soup.find('a', href=re.compile(r"viewseries.php\?seriesid=\d+"))
|
||||||
series_name = a.string
|
series_name = a.string
|
||||||
series_url = 'http://'+self.host+'/'+a['href']
|
series_url = 'https://'+self.host+'/'+a['href']
|
||||||
|
|
||||||
seriessoup = self.make_soup(self.get_request(series_url))
|
seriessoup = self.make_soup(self.get_request(series_url))
|
||||||
# can't use ^viewstory...$ in case of higher rated stories with javascript href.
|
# can't use ^viewstory...$ in case of higher rated stories with javascript href.
|
||||||
storyas = seriessoup.findAll('a', href=re.compile(r'viewstory.php\?sid=\d+'))
|
storyas = seriessoup.find_all('a', href=re.compile(r'viewstory.php\?sid=\d+'))
|
||||||
i=1
|
i=1
|
||||||
for a in storyas:
|
for a in storyas:
|
||||||
# skip 'report this' and 'TOC' links
|
# this site has several links to each story.
|
||||||
if 'contact.php' not in a['href'] and 'index' not in a['href']:
|
if a.text == 'Latest Chapter':
|
||||||
if a['href'] == ('viewstory.php?sid='+self.story.getMetadata('storyId')):
|
if ('viewstory.php?sid='+self.story.getMetadata('storyId')) in a['href']:
|
||||||
self.setSeries(series_name, i)
|
self.setSeries(series_name, i)
|
||||||
self.story.setMetadata('seriesUrl',series_url)
|
self.story.setMetadata('seriesUrl',series_url)
|
||||||
break
|
break
|
||||||
|
|
|
||||||
|
|
@ -1,220 +0,0 @@
|
||||||
# -*- coding: utf-8 -*-
|
|
||||||
|
|
||||||
# Copyright 2011 Fanficdownloader team, 2018 FanFicFare team
|
|
||||||
#
|
|
||||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
# you may not use this file except in compliance with the License.
|
|
||||||
# You may obtain a copy of the License at
|
|
||||||
#
|
|
||||||
# http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
#
|
|
||||||
# Unless required by applicable law or agreed to in writing, software
|
|
||||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
# See the License for the specific language governing permissions and
|
|
||||||
# limitations under the License.
|
|
||||||
#
|
|
||||||
|
|
||||||
from __future__ import absolute_import
|
|
||||||
import logging
|
|
||||||
logger = logging.getLogger(__name__)
|
|
||||||
import re
|
|
||||||
from ..htmlcleanup import stripHTML
|
|
||||||
from .. import exceptions as exceptions
|
|
||||||
|
|
||||||
# py2 vs py3 transition
|
|
||||||
|
|
||||||
from .base_adapter import BaseSiteAdapter, makeDate
|
|
||||||
|
|
||||||
|
|
||||||
def getClass():
|
|
||||||
return CSIForensicsComAdapter
|
|
||||||
|
|
||||||
# Class name has to be unique. Our convention is camel case the
|
|
||||||
# sitename with Adapter at the end. www is skipped.
|
|
||||||
class CSIForensicsComAdapter(BaseSiteAdapter):
|
|
||||||
|
|
||||||
def __init__(self, config, url):
|
|
||||||
BaseSiteAdapter.__init__(self, config, url)
|
|
||||||
|
|
||||||
self.username = "NoneGiven" # if left empty, site doesn't return any message at all.
|
|
||||||
self.password = ""
|
|
||||||
self.is_adult=False
|
|
||||||
|
|
||||||
# get storyId from url--url validation guarantees query is only sid=1234
|
|
||||||
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
self._setURL('https://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId'))
|
|
||||||
|
|
||||||
# Each adapter needs to have a unique site abbreviation.
|
|
||||||
self.story.setMetadata('siteabbrev','csiforensics')
|
|
||||||
|
|
||||||
# The date format will vary from site to site.
|
|
||||||
# http://docs.python.org/library/datetime.html#strftime-strptime-behavior
|
|
||||||
self.dateformat = "%d %b %Y"
|
|
||||||
|
|
||||||
@staticmethod # must be @staticmethod, don't remove it.
|
|
||||||
def getSiteDomain():
|
|
||||||
# The site domain. Does have www here, if it uses it.
|
|
||||||
return 'csi-forensics.com'
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def getSiteExampleURLs(cls):
|
|
||||||
return "https://"+cls.getSiteDomain()+"/viewstory.php?sid=1234"
|
|
||||||
|
|
||||||
def getSiteURLPattern(self):
|
|
||||||
return r"https?://"+re.escape(self.getSiteDomain()+"/viewstory.php?sid=")+r"\d+$"
|
|
||||||
|
|
||||||
## Getting the chapter list and the meta data, plus 'is adult' checking.
|
|
||||||
def extractChapterUrlsAndMetadata(self):
|
|
||||||
|
|
||||||
if self.is_adult or self.getConfig("is_adult"):
|
|
||||||
# Weirdly, different sites use different warning numbers.
|
|
||||||
# If the title search below fails, there's a good chance
|
|
||||||
# you need a different number. print data at that point
|
|
||||||
# and see what the 'click here to continue' url says.
|
|
||||||
addurl = "&ageconsent=ok&warning=5&skin=elegantcsi"
|
|
||||||
else:
|
|
||||||
addurl="&skin=elegantcsi"
|
|
||||||
|
|
||||||
# index=1 makes sure we see the story chapter index. Some
|
|
||||||
# sites skip that for one-chapter stories.
|
|
||||||
url = self.url+'&index=1'+addurl
|
|
||||||
logger.debug("URL: "+url)
|
|
||||||
|
|
||||||
data = self.get_request(url)
|
|
||||||
|
|
||||||
# The actual text that is used to announce you need to be an
|
|
||||||
# adult varies from site to site. Again, print data before
|
|
||||||
# the title search to troubleshoot.
|
|
||||||
if "This story is rated NC-17, and therefore is not suitable for minors. If you are below the age required to view such material in your locality, please return from whence you came." in data: # XXX
|
|
||||||
raise exceptions.AdultCheckRequired(self.url)
|
|
||||||
|
|
||||||
if "Access denied. This story has not been validated by the adminstrators of this site." in data:
|
|
||||||
raise exceptions.AccessDenied(self.getSiteDomain() +" says: Access denied. This story has not been validated by the adminstrators of this site.")
|
|
||||||
|
|
||||||
soup = self.make_soup(data)
|
|
||||||
# print data
|
|
||||||
|
|
||||||
|
|
||||||
## Title
|
|
||||||
|
|
||||||
pt = soup.find('div', {'id' : 'pagetitle'})
|
|
||||||
a = pt.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$"))
|
|
||||||
self.story.setMetadata('title',a.string)
|
|
||||||
|
|
||||||
# Find authorid and URL from... author url.
|
|
||||||
a = soup.find('a', href=re.compile(r"viewuser.php\?uid=\d+"))
|
|
||||||
self.story.setMetadata('authorId',a['href'].split('=')[1])
|
|
||||||
self.story.setMetadata('authorUrl','https://'+self.host+'/'+a['href'])
|
|
||||||
self.story.setMetadata('author',a.string)
|
|
||||||
|
|
||||||
# Rating
|
|
||||||
rate = stripHTML(soup.find('div',{'id':'pagetitle'}))
|
|
||||||
rate = rate[rate.rindex('[')+1:rate.rindex(']')]
|
|
||||||
self.story.setMetadata('rating', rate)
|
|
||||||
|
|
||||||
# Find the chapters:
|
|
||||||
for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+r"&chapter=\d+$")):
|
|
||||||
# just in case there's tags, like <i> in chapter titles.
|
|
||||||
self.add_chapter(chapter,'https://'+self.host+'/'+chapter['href']+addurl)
|
|
||||||
|
|
||||||
|
|
||||||
# eFiction sites don't help us out a lot with their meta data
|
|
||||||
# formating, so it's a little ugly.
|
|
||||||
|
|
||||||
# utility method
|
|
||||||
def defaultGetattr(d,k):
|
|
||||||
try:
|
|
||||||
return d[k]
|
|
||||||
except:
|
|
||||||
return ""
|
|
||||||
|
|
||||||
smalldiv = soup.find('div', {'class' : 'small'})
|
|
||||||
|
|
||||||
|
|
||||||
chars = smalldiv.findAll('a',href=re.compile(r'browse.php\?type=characters'))
|
|
||||||
for char in chars:
|
|
||||||
self.story.addToList('characters',char.string)
|
|
||||||
|
|
||||||
metatext = stripHTML(smalldiv)
|
|
||||||
|
|
||||||
if 'Completed: Yes' in metatext:
|
|
||||||
self.story.setMetadata('status', 'Completed')
|
|
||||||
else:
|
|
||||||
self.story.setMetadata('status', 'In-Progress')
|
|
||||||
|
|
||||||
word=soup.find(text=re.compile("Word count:")).split(':')
|
|
||||||
self.story.setMetadata('numWords', word[1])
|
|
||||||
|
|
||||||
cats = smalldiv.findAll('a',href=re.compile(r'browse.php\?type=categories'))
|
|
||||||
for cat in cats:
|
|
||||||
self.story.addToList('category',cat.string)
|
|
||||||
|
|
||||||
warnings = smalldiv.findAll('a',href=re.compile(r'browse.php\?type=class(&)type_id=2(&)classid=\d+'))
|
|
||||||
for warning in warnings:
|
|
||||||
self.story.addToList('warnings',warning.string)
|
|
||||||
|
|
||||||
date=soup.find('div',{'class' : 'bottom'})
|
|
||||||
pd=date.find(text=re.compile("Published:")).string.split(': ')
|
|
||||||
self.story.setMetadata('datePublished', makeDate(stripHTML(pd[1].split(' U')[0]), self.dateformat))
|
|
||||||
self.story.setMetadata('dateUpdated', makeDate(stripHTML(pd[2]), self.dateformat))
|
|
||||||
|
|
||||||
# <span class="label">Rated:</span> NC-17<br /> etc
|
|
||||||
labels = soup.findAll('span',{'class':'label'})
|
|
||||||
pub=0
|
|
||||||
for labelspan in labels:
|
|
||||||
value = labelspan.nextSibling
|
|
||||||
label = labelspan.string
|
|
||||||
|
|
||||||
if 'Genres' in label:
|
|
||||||
genres = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=1'))
|
|
||||||
for genre in genres:
|
|
||||||
self.story.addToList('genre',genre.string)
|
|
||||||
|
|
||||||
if 'Warnings' in label:
|
|
||||||
warnings = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=2'))
|
|
||||||
for warning in warnings:
|
|
||||||
self.story.addToList('warnings',warning.string)
|
|
||||||
|
|
||||||
try:
|
|
||||||
# Find Series name from series URL.
|
|
||||||
a = soup.find('a', href=re.compile(r"viewseries.php\?seriesid=\d+"))
|
|
||||||
series_name = a.string
|
|
||||||
series_url = 'https://'+self.host+'/'+a['href']
|
|
||||||
|
|
||||||
seriessoup = self.make_soup(self.get_request(series_url))
|
|
||||||
storyas = seriessoup.findAll('a', href=re.compile(r'^viewstory.php\?sid=\d+$'))
|
|
||||||
i=1
|
|
||||||
for a in storyas:
|
|
||||||
if a['href'] == ('viewstory.php?sid='+self.story.getMetadata('storyId')):
|
|
||||||
self.setSeries(series_name, i)
|
|
||||||
self.story.setMetadata('seriesUrl',series_url)
|
|
||||||
break
|
|
||||||
i+=1
|
|
||||||
|
|
||||||
except:
|
|
||||||
# I find it hard to care if the series parsing fails
|
|
||||||
pass
|
|
||||||
|
|
||||||
smalldiv.extract()
|
|
||||||
|
|
||||||
# Summary
|
|
||||||
summary = soup.find('div', {'class' : 'content'})
|
|
||||||
self.setDescription(url,summary)
|
|
||||||
|
|
||||||
# grab the text for an individual chapter.
|
|
||||||
def getChapterText(self, url):
|
|
||||||
|
|
||||||
logger.debug('Getting chapter text from: %s' % url)
|
|
||||||
|
|
||||||
soup = self.make_soup(self.get_request(url))
|
|
||||||
|
|
||||||
div = soup.find('div', {'id' : 'story'})
|
|
||||||
|
|
||||||
if None == div:
|
|
||||||
raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url)
|
|
||||||
|
|
||||||
return self.utf8FromSoup(url,div)
|
|
||||||
256
fanficfare/adapters/adapter_deviantartcom.py
Normal file
256
fanficfare/adapters/adapter_deviantartcom.py
Normal file
|
|
@ -0,0 +1,256 @@
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
# Copyright 2021 FanFicFare team
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
#
|
||||||
|
|
||||||
|
|
||||||
|
from __future__ import absolute_import
|
||||||
|
import logging
|
||||||
|
import re
|
||||||
|
# py2 vs py3 transition
|
||||||
|
from ..six.moves.urllib.parse import urlparse
|
||||||
|
|
||||||
|
from .base_adapter import BaseSiteAdapter, makeDate
|
||||||
|
from fanficfare.htmlcleanup import stripHTML
|
||||||
|
from .. import exceptions as exceptions
|
||||||
|
from fanficfare.dateutils import parse_relative_date_string
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
def getClass():
|
||||||
|
return DeviantArtComSiteAdapter
|
||||||
|
|
||||||
|
|
||||||
|
class DeviantArtComSiteAdapter(BaseSiteAdapter):
|
||||||
|
def __init__(self, config, url):
|
||||||
|
BaseSiteAdapter.__init__(self, config, url)
|
||||||
|
self.story.setMetadata('siteabbrev', 'dac')
|
||||||
|
|
||||||
|
self.username = 'NoneGiven'
|
||||||
|
self.password = ''
|
||||||
|
self.is_adult = False
|
||||||
|
|
||||||
|
match = re.match(self.getSiteURLPattern(), url)
|
||||||
|
if not match:
|
||||||
|
raise exceptions.InvalidStoryURL(url, self.getSiteDomain(), self.getSiteExampleURLs())
|
||||||
|
|
||||||
|
story_id = match.group('id')
|
||||||
|
author = match.group('author')
|
||||||
|
self.story.setMetadata('author', author)
|
||||||
|
self.story.setMetadata('authorId', author)
|
||||||
|
self.story.setMetadata('authorUrl', 'https://www.deviantart.com/' + author)
|
||||||
|
self._setURL(url)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def getSiteDomain():
|
||||||
|
return 'www.deviantart.com'
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def getAcceptDomains(cls):
|
||||||
|
return ['www.deviantart.com']
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def getProtocol(self):
|
||||||
|
return 'https'
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def getSiteExampleURLs(cls):
|
||||||
|
return 'https://%s/<author>/art/<work-name>' % cls.getSiteDomain()
|
||||||
|
|
||||||
|
def getSiteURLPattern(self):
|
||||||
|
return r'https?://www\.deviantart\.com/(?P<author>[^/]+)/art/(?P<id>[^/]+)/?'
|
||||||
|
|
||||||
|
def performLogin(self, url):
|
||||||
|
if self.username and self.username != 'NoneGiven':
|
||||||
|
username = self.username
|
||||||
|
else:
|
||||||
|
username = self.getConfig('username')
|
||||||
|
|
||||||
|
# logger.debug("\n\nusername:(%s)\n\n"%username)
|
||||||
|
if not username:
|
||||||
|
logger.info("Login Required for URL %s" % url)
|
||||||
|
raise exceptions.FailedToLogin(url,username)
|
||||||
|
|
||||||
|
data = self.get_request_raw('https://www.deviantart.com/users/login', referer=url, usecache=False)
|
||||||
|
data = self.decode_data(data)
|
||||||
|
soup = self.make_soup(data)
|
||||||
|
params = {
|
||||||
|
'referer': 'https://www.deviantart.com/_sisu/do/signin', # soup.find('input', {'name': 'referer'})['value'],
|
||||||
|
'referer_type': soup.find('input', {'name': 'referer_type'})['value'],
|
||||||
|
'csrf_token': soup.find('input', {'name': 'csrf_token'})['value'],
|
||||||
|
'challenge': soup.find('input', {'name': 'challenge'})['value'],
|
||||||
|
'lu_token': soup.find('input', {'name': 'lu_token'})['value'],
|
||||||
|
'remember': 'on',
|
||||||
|
'username': username
|
||||||
|
}
|
||||||
|
|
||||||
|
loginUrl = 'https://' + self.getSiteDomain() + '/_sisu/do/step2'
|
||||||
|
logger.debug('Will now login to deviantARt as (%s)' % username)
|
||||||
|
|
||||||
|
result = self.post_request(loginUrl, params, usecache=False)
|
||||||
|
soup = self.make_soup(result)
|
||||||
|
if not soup.find('input', {'name': 'lu_token2'}):
|
||||||
|
logger.info("Login Failed for URL %s (no lu_token2 found)" % url)
|
||||||
|
raise exceptions.FailedToLogin(url,username)
|
||||||
|
|
||||||
|
params = {
|
||||||
|
'referer': 'https://www.deviantart.com/_sisu/do/signin', # soup.find('input', {'name': 'referer'})['value'],
|
||||||
|
'referer_type': soup.find('input', {'name': 'referer_type'})['value'],
|
||||||
|
'csrf_token': soup.find('input', {'name': 'csrf_token'})['value'],
|
||||||
|
'challenge': soup.find('input', {'name': 'challenge'})['value'],
|
||||||
|
'lu_token': soup.find('input', {'name': 'lu_token'})['value'],
|
||||||
|
'lu_token2': soup.find('input', {'name': 'lu_token2'})['value'],
|
||||||
|
'remember': 'on',
|
||||||
|
'username': ''
|
||||||
|
}
|
||||||
|
|
||||||
|
if self.password:
|
||||||
|
params['password'] = self.password
|
||||||
|
else:
|
||||||
|
params['password'] = self.getConfig('password')
|
||||||
|
|
||||||
|
# logger.debug("\n\nparams['password']:(%s)\n\n"%params['password'])
|
||||||
|
loginUrl = 'https://' + self.getSiteDomain() + '/_sisu/do/signin'
|
||||||
|
logger.debug('Will now send password to deviantARt')
|
||||||
|
|
||||||
|
result = self.post_request(loginUrl, params, usecache=False)
|
||||||
|
|
||||||
|
if 'Log In | DeviantArt' in result:
|
||||||
|
logger.error('Failed to login to deviantArt as %s' % username)
|
||||||
|
raise exceptions.FailedToLogin('https://www.deviantart.com', username)
|
||||||
|
else:
|
||||||
|
return True
|
||||||
|
|
||||||
|
def requiresLogin(self, data):
|
||||||
|
return '</a> has limited the viewing of this artwork to members of the DeviantArt community only' in data
|
||||||
|
|
||||||
|
def isLoggedIn(self, data):
|
||||||
|
return '<form id="logout-form" action="https://www.deviantart.com/users/logout" method="POST">' in data
|
||||||
|
|
||||||
|
def isWatchersOnly(self, data):
|
||||||
|
return '>Watchers-Only Deviation<' in data
|
||||||
|
|
||||||
|
def requiresMatureContentEnabled(self, data):
|
||||||
|
return (
|
||||||
|
'>This content is intended for mature audiences<' in data
|
||||||
|
or '>This deviation is intended for mature audiences<' in data
|
||||||
|
or '>This filter hides content that may be inappropriate for some viewers<' in data
|
||||||
|
or '>May contain sensitive content<' in data
|
||||||
|
or '>Log in to view<' in data
|
||||||
|
or '>This deviation has been labeled as containing themes not suitable for all deviants.<' in data
|
||||||
|
)
|
||||||
|
|
||||||
|
def extractChapterUrlsAndMetadata(self):
|
||||||
|
logger.debug('URL: %s', self.url)
|
||||||
|
|
||||||
|
data = self.get_request(self.url)
|
||||||
|
soup = self.make_soup(data)
|
||||||
|
|
||||||
|
## story can require login outright, or it can show up as
|
||||||
|
## watchers-only or mature-enabled without the same 'requires
|
||||||
|
## login' strings.
|
||||||
|
if self.requiresLogin(data) or ( not self.isLoggedIn(data) and
|
||||||
|
(self.isWatchersOnly(data) or
|
||||||
|
self.requiresMatureContentEnabled(data)) ):
|
||||||
|
if self.performLogin(self.url):
|
||||||
|
data = self.get_request(self.url, usecache=False)
|
||||||
|
soup = self.make_soup(data)
|
||||||
|
|
||||||
|
## Check watchers only and mature enabled again, separately,
|
||||||
|
## after login because they can still apply after login.
|
||||||
|
if self.isWatchersOnly(data):
|
||||||
|
raise exceptions.FailedToDownload(
|
||||||
|
'Deviation is only available for watchers.' +
|
||||||
|
'You must watch this author before you can download it.'
|
||||||
|
)
|
||||||
|
if self.requiresMatureContentEnabled(data):
|
||||||
|
raise exceptions.FailedToDownload(
|
||||||
|
'Deviation is set as mature, you must go into your account ' +
|
||||||
|
'and enable showing of mature content.'
|
||||||
|
)
|
||||||
|
|
||||||
|
appurl = soup.select_one('meta[property="og:url"]')['content']
|
||||||
|
if appurl:
|
||||||
|
story_id = urlparse(appurl).path.lstrip('/')
|
||||||
|
else:
|
||||||
|
logger.debug("Looking for JS story id")
|
||||||
|
## after login, this is only found in a JS block. Dunno why.
|
||||||
|
## F875A309-B0DB-860E-5079-790D0FBE5668
|
||||||
|
match = re.match(r'\\"deviationUuid\\":\\"(?P<id>[A-Z0-9-]+)\\",',data)
|
||||||
|
if match:
|
||||||
|
story_id = match.group('id')
|
||||||
|
else:
|
||||||
|
raise exceptions.FailedToDownload('Failed to find Story ID.')
|
||||||
|
self.story.setMetadata('storyId', story_id)
|
||||||
|
|
||||||
|
title = soup.select_one('h1').get_text()
|
||||||
|
self.story.setMetadata('title', stripHTML(title))
|
||||||
|
|
||||||
|
## dA has no concept of status
|
||||||
|
# self.story.setMetadata('status', 'Completed')
|
||||||
|
|
||||||
|
pubdate = soup.select_one('time').get_text()
|
||||||
|
|
||||||
|
# Maybe do this better, but this works
|
||||||
|
try:
|
||||||
|
self.story.setMetadata('datePublished', makeDate(pubdate, '%b %d, %Y'))
|
||||||
|
except:
|
||||||
|
self.story.setMetadata('datePublished', parse_relative_date_string(pubdate))
|
||||||
|
|
||||||
|
# do description here if appropriate
|
||||||
|
|
||||||
|
story_tags = soup.select('a[href^="https://www.deviantart.com/tag"] span')
|
||||||
|
if story_tags is not None:
|
||||||
|
for tag in story_tags:
|
||||||
|
self.story.addToList('genre', tag.get_text())
|
||||||
|
|
||||||
|
self.add_chapter(title, self.url)
|
||||||
|
|
||||||
|
def getChapterText(self, url):
|
||||||
|
logger.debug('Getting chapter text from: %s', url)
|
||||||
|
data = self.get_request(url)
|
||||||
|
# logger.debug(data)
|
||||||
|
soup = self.make_soup(data)
|
||||||
|
|
||||||
|
# remove comments section to avoid false matches
|
||||||
|
comments = soup.select_one('[data-hook=comments_thread]')
|
||||||
|
if comments:
|
||||||
|
comments.decompose()
|
||||||
|
# previous search not always found in some stories.
|
||||||
|
# <div id="comments"></div> inside the real containing
|
||||||
|
# div seems more common
|
||||||
|
commentsdiv = soup.select_one('div#comments')
|
||||||
|
if commentsdiv:
|
||||||
|
commentsdiv.parent.decompose()
|
||||||
|
|
||||||
|
# three different 'content' tags to look for.
|
||||||
|
# This is the current in Oct 2024
|
||||||
|
content = soup.select_one('[data-editor-viewer="1"]')
|
||||||
|
|
||||||
|
if content is None:
|
||||||
|
# older story? I can't find any of this style in Oct2024
|
||||||
|
content = soup.select_one('[data-id="rich-content-viewer"]')
|
||||||
|
|
||||||
|
if content is None:
|
||||||
|
# olderer story, but used by some older (2018) posts
|
||||||
|
content = soup.select_one('.legacy-journal')
|
||||||
|
|
||||||
|
if content is None:
|
||||||
|
raise exceptions.FailedToDownload(
|
||||||
|
'Could not find story text. Please open a bug with the URL %s' % self.url
|
||||||
|
)
|
||||||
|
|
||||||
|
return self.utf8FromSoup(url, content)
|
||||||
|
|
@ -95,7 +95,7 @@ class DokugaComAdapter(BaseSiteAdapter):
|
||||||
params['Submit'] = 'Submit'
|
params['Submit'] = 'Submit'
|
||||||
|
|
||||||
# copy all hidden input tags to pick up appropriate tokens.
|
# copy all hidden input tags to pick up appropriate tokens.
|
||||||
for tag in soup.findAll('input',{'type':'hidden'}):
|
for tag in soup.find_all('input',{'type':'hidden'}):
|
||||||
params[tag['name']] = tag['value']
|
params[tag['name']] = tag['value']
|
||||||
|
|
||||||
loginUrl = 'http://' + self.getSiteDomain() + '/fanfiction'
|
loginUrl = 'http://' + self.getSiteDomain() + '/fanfiction'
|
||||||
|
|
@ -153,7 +153,7 @@ class DokugaComAdapter(BaseSiteAdapter):
|
||||||
self.story.setMetadata('title',stripHTML(a))
|
self.story.setMetadata('title',stripHTML(a))
|
||||||
|
|
||||||
# Find the chapters:
|
# Find the chapters:
|
||||||
chapters = soup.find('select').findAll('option')
|
chapters = soup.find('select').find_all('option')
|
||||||
if len(chapters)==1:
|
if len(chapters)==1:
|
||||||
self.add_chapter(self.story.getMetadata('title'),'http://'+self.host+'/'+self.section+'/story/'+self.story.getMetadata('storyId')+'/1')
|
self.add_chapter(self.story.getMetadata('title'),'http://'+self.host+'/'+self.section+'/story/'+self.story.getMetadata('storyId')+'/1')
|
||||||
else:
|
else:
|
||||||
|
|
@ -168,7 +168,7 @@ class DokugaComAdapter(BaseSiteAdapter):
|
||||||
asoup=asoup.find('div', {'id' : 'cb_tabid_52'}).find('div')
|
asoup=asoup.find('div', {'id' : 'cb_tabid_52'}).find('div')
|
||||||
|
|
||||||
#grab the rest of the metadata from the author's page
|
#grab the rest of the metadata from the author's page
|
||||||
for div in asoup.findAll('div'):
|
for div in asoup.find_all('div'):
|
||||||
nav=div.find('a', href=re.compile(r'/fanfiction/story/'+self.story.getMetadata('storyId')+"/1$"))
|
nav=div.find('a', href=re.compile(r'/fanfiction/story/'+self.story.getMetadata('storyId')+"/1$"))
|
||||||
if nav != None:
|
if nav != None:
|
||||||
break
|
break
|
||||||
|
|
@ -208,7 +208,7 @@ class DokugaComAdapter(BaseSiteAdapter):
|
||||||
|
|
||||||
else:
|
else:
|
||||||
asoup=asoup.find('div', {'id' : 'maincol'}).find('div', {'class' : 'padding'})
|
asoup=asoup.find('div', {'id' : 'maincol'}).find('div', {'class' : 'padding'})
|
||||||
for div in asoup.findAll('div'):
|
for div in asoup.find_all('div'):
|
||||||
nav=div.find('a', href=re.compile(r'/spark/story/'+self.story.getMetadata('storyId')+"/1$"))
|
nav=div.find('a', href=re.compile(r'/spark/story/'+self.story.getMetadata('storyId')+"/1$"))
|
||||||
if nav != None:
|
if nav != None:
|
||||||
break
|
break
|
||||||
|
|
|
||||||
|
|
@ -161,7 +161,7 @@ class DracoAndGinnyComAdapter(BaseSiteAdapter):
|
||||||
self.story.setMetadata('author',a.string)
|
self.story.setMetadata('author',a.string)
|
||||||
|
|
||||||
# Find the chapters:
|
# Find the chapters:
|
||||||
for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+r"&chapter=\d+$")):
|
for chapter in soup.find_all('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+r"&chapter=\d+$")):
|
||||||
# just in case there's tags, like <i> in chapter titles.
|
# just in case there's tags, like <i> in chapter titles.
|
||||||
self.add_chapter(chapter,'http://'+self.host+'/'+chapter['href']+addurl)
|
self.add_chapter(chapter,'http://'+self.host+'/'+chapter['href']+addurl)
|
||||||
|
|
||||||
|
|
@ -181,13 +181,13 @@ class DracoAndGinnyComAdapter(BaseSiteAdapter):
|
||||||
|
|
||||||
self.setDescription(url,content.find('blockquote'))
|
self.setDescription(url,content.find('blockquote'))
|
||||||
|
|
||||||
for genre in content.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=1')):
|
for genre in content.find_all('a',href=re.compile(r'browse.php\?type=class&type_id=1')):
|
||||||
self.story.addToList('genre',genre.string)
|
self.story.addToList('genre',genre.string)
|
||||||
|
|
||||||
for warning in content.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=2')):
|
for warning in content.find_all('a',href=re.compile(r'browse.php\?type=class&type_id=2')):
|
||||||
self.story.addToList('warnings',warning.string)
|
self.story.addToList('warnings',warning.string)
|
||||||
|
|
||||||
labels = content.findAll('b')
|
labels = content.find_all('b')
|
||||||
|
|
||||||
for labelspan in labels:
|
for labelspan in labels:
|
||||||
value = labelspan.nextSibling
|
value = labelspan.nextSibling
|
||||||
|
|
@ -208,22 +208,22 @@ class DracoAndGinnyComAdapter(BaseSiteAdapter):
|
||||||
self.story.setMetadata('rating', value)
|
self.story.setMetadata('rating', value)
|
||||||
|
|
||||||
if 'Categories' in label:
|
if 'Categories' in label:
|
||||||
cats = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=categories'))
|
cats = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=categories'))
|
||||||
for cat in cats:
|
for cat in cats:
|
||||||
self.story.addToList('category',cat.string)
|
self.story.addToList('category',cat.string)
|
||||||
|
|
||||||
if 'Characters' in label:
|
if 'Characters' in label:
|
||||||
chars = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=characters'))
|
chars = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=characters'))
|
||||||
for char in chars:
|
for char in chars:
|
||||||
self.story.addToList('characters',char.string)
|
self.story.addToList('characters',char.string)
|
||||||
|
|
||||||
if 'Genre' in label:
|
if 'Genre' in label:
|
||||||
genres = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=1'))
|
genres = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=class&type_id=1'))
|
||||||
for genre in genres:
|
for genre in genres:
|
||||||
self.story.addToList('genre',genre.string)
|
self.story.addToList('genre',genre.string)
|
||||||
|
|
||||||
if 'Warnings' in label:
|
if 'Warnings' in label:
|
||||||
warnings = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=2'))
|
warnings = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=class&type_id=2'))
|
||||||
for warning in warnings:
|
for warning in warnings:
|
||||||
self.story.addToList('warnings',warning.string)
|
self.story.addToList('warnings',warning.string)
|
||||||
|
|
||||||
|
|
@ -247,7 +247,7 @@ class DracoAndGinnyComAdapter(BaseSiteAdapter):
|
||||||
|
|
||||||
seriessoup = self.make_soup(self.get_request(series_url))
|
seriessoup = self.make_soup(self.get_request(series_url))
|
||||||
# can't use ^viewstory...$ in case of higher rated stories with javascript href.
|
# can't use ^viewstory...$ in case of higher rated stories with javascript href.
|
||||||
storyas = seriessoup.findAll('a', href=re.compile(r'viewstory.php\?sid=\d+'))
|
storyas = seriessoup.find_all('a', href=re.compile(r'viewstory.php\?sid=\d+'))
|
||||||
i=1
|
i=1
|
||||||
for a in storyas:
|
for a in storyas:
|
||||||
# skip 'report this' and 'TOC' links
|
# skip 'report this' and 'TOC' links
|
||||||
|
|
|
||||||
|
|
@ -138,7 +138,7 @@ class EFPFanFicNet(BaseSiteAdapter):
|
||||||
# no selector found, so it's a one-chapter story.
|
# no selector found, so it's a one-chapter story.
|
||||||
self.add_chapter(self.story.getMetadata('title'),url)
|
self.add_chapter(self.story.getMetadata('title'),url)
|
||||||
else:
|
else:
|
||||||
allOptions = select.findAll('option', {'value' : re.compile(r'viewstory')})
|
allOptions = select.find_all('option', {'value' : re.compile(r'viewstory')})
|
||||||
for o in allOptions:
|
for o in allOptions:
|
||||||
url = u'https://%s/%s' % ( self.getSiteDomain(),
|
url = u'https://%s/%s' % ( self.getSiteDomain(),
|
||||||
o['value'])
|
o['value'])
|
||||||
|
|
@ -170,14 +170,14 @@ class EFPFanFicNet(BaseSiteAdapter):
|
||||||
if authsoup != None:
|
if authsoup != None:
|
||||||
# last author link with offset should be the 'next' link.
|
# last author link with offset should be the 'next' link.
|
||||||
authurl = u'https://%s/%s' % ( self.getSiteDomain(),
|
authurl = u'https://%s/%s' % ( self.getSiteDomain(),
|
||||||
authsoup.findAll('a',href=re.compile(r'viewuser\.php\?uid=\d+&catid=&offset='))[-1]['href'] )
|
authsoup.find_all('a',href=re.compile(r'viewuser\.php\?uid=\d+&catid=&offset='))[-1]['href'] )
|
||||||
|
|
||||||
# Need author page for most of the metadata.
|
# Need author page for most of the metadata.
|
||||||
logger.debug("fetching author page: (%s)"%authurl)
|
logger.debug("fetching author page: (%s)"%authurl)
|
||||||
authsoup = self.make_soup(self.get_request(authurl))
|
authsoup = self.make_soup(self.get_request(authurl))
|
||||||
#print("authsoup:%s"%authsoup)
|
#print("authsoup:%s"%authsoup)
|
||||||
|
|
||||||
storyas = authsoup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+r'&i=1$'))
|
storyas = authsoup.find_all('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+r'&i=1$'))
|
||||||
for storya in storyas:
|
for storya in storyas:
|
||||||
#print("======storya:%s"%storya)
|
#print("======storya:%s"%storya)
|
||||||
storyblock = storya.findParent('div',{'class':'storybloc'})
|
storyblock = storya.findParent('div',{'class':'storybloc'})
|
||||||
|
|
@ -194,7 +194,7 @@ class EFPFanFicNet(BaseSiteAdapter):
|
||||||
# Tipo di coppia: Het | Personaggi: Akasuna no Sasori , Akatsuki, Nuovo Personaggio | Note: OOC | Avvertimenti: Tematiche delicate<br />
|
# Tipo di coppia: Het | Personaggi: Akasuna no Sasori , Akatsuki, Nuovo Personaggio | Note: OOC | Avvertimenti: Tematiche delicate<br />
|
||||||
# Categoria: <a href="categories.php?catid=1&parentcatid=1">Anime & Manga</a> > <a href="categories.php?catid=108&parentcatid=108">Naruto</a> | Contesto: Naruto Shippuuden | Leggi le <a href="reviews.php?sid=1331275&a=">3</a> recensioni</div>
|
# Categoria: <a href="categories.php?catid=1&parentcatid=1">Anime & Manga</a> > <a href="categories.php?catid=108&parentcatid=108">Naruto</a> | Contesto: Naruto Shippuuden | Leggi le <a href="reviews.php?sid=1331275&a=">3</a> recensioni</div>
|
||||||
|
|
||||||
cats = noteblock.findAll('a',href=re.compile(r'browse.php\?type=categories'))
|
cats = noteblock.find_all('a',href=re.compile(r'browse.php\?type=categories'))
|
||||||
for cat in cats:
|
for cat in cats:
|
||||||
self.story.addToList('category',cat.string)
|
self.story.addToList('category',cat.string)
|
||||||
|
|
||||||
|
|
@ -262,7 +262,7 @@ class EFPFanFicNet(BaseSiteAdapter):
|
||||||
|
|
||||||
seriessoup = self.make_soup(self.get_request(series_url))
|
seriessoup = self.make_soup(self.get_request(series_url))
|
||||||
# can't use ^viewstory...$ in case of higher rated stories with javascript href.
|
# can't use ^viewstory...$ in case of higher rated stories with javascript href.
|
||||||
storyas = seriessoup.findAll('a', href=re.compile(r'viewstory.php\?sid=\d+&i=1'))
|
storyas = seriessoup.find_all('a', href=re.compile(r'viewstory.php\?sid=\d+&i=1'))
|
||||||
i=1
|
i=1
|
||||||
for a in storyas:
|
for a in storyas:
|
||||||
if a['href'] == ('viewstory.php?sid='+self.story.getMetadata('storyId'))+'&i=1':
|
if a['href'] == ('viewstory.php?sid='+self.story.getMetadata('storyId'))+'&i=1':
|
||||||
|
|
@ -288,11 +288,11 @@ class EFPFanFicNet(BaseSiteAdapter):
|
||||||
raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url)
|
raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url)
|
||||||
|
|
||||||
# remove any header and 'o:p' tags.
|
# remove any header and 'o:p' tags.
|
||||||
for tag in div.findAll("head") + div.findAll("o:p"):
|
for tag in div.find_all("head") + div.find_all("o:p"):
|
||||||
tag.extract()
|
tag.extract()
|
||||||
|
|
||||||
# change any html and body tags to div.
|
# change any html and body tags to div.
|
||||||
for tag in div.findAll("html") + div.findAll("body"):
|
for tag in div.find_all("html") + div.find_all("body"):
|
||||||
tag.name='div'
|
tag.name='div'
|
||||||
|
|
||||||
# remove extra bogus doctype.
|
# remove extra bogus doctype.
|
||||||
|
|
|
||||||
|
|
@ -126,7 +126,7 @@ class ErosnSapphoSycophantHexComAdapter(BaseSiteAdapter):
|
||||||
self.story.setMetadata('rating', rating)
|
self.story.setMetadata('rating', rating)
|
||||||
|
|
||||||
# Find the chapters:
|
# Find the chapters:
|
||||||
for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+r"&chapter=\d+$")):
|
for chapter in soup.find_all('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+r"&chapter=\d+$")):
|
||||||
# just in case there's tags, like <i> in chapter titles.
|
# just in case there's tags, like <i> in chapter titles.
|
||||||
self.add_chapter(chapter,'http://'+self.host+'/'+chapter['href']+addurl)
|
self.add_chapter(chapter,'http://'+self.host+'/'+chapter['href']+addurl)
|
||||||
|
|
||||||
|
|
@ -144,7 +144,7 @@ class ErosnSapphoSycophantHexComAdapter(BaseSiteAdapter):
|
||||||
|
|
||||||
# <span class="label">Rated:</span> NC-17<br /> etc
|
# <span class="label">Rated:</span> NC-17<br /> etc
|
||||||
|
|
||||||
labels = soup.findAll('span',{'class':'label'})
|
labels = soup.find_all('span',{'class':'label'})
|
||||||
|
|
||||||
value = labels[0].previousSibling
|
value = labels[0].previousSibling
|
||||||
svalue = ""
|
svalue = ""
|
||||||
|
|
@ -164,22 +164,22 @@ class ErosnSapphoSycophantHexComAdapter(BaseSiteAdapter):
|
||||||
self.story.setMetadata('numWords', value.split(' -')[0])
|
self.story.setMetadata('numWords', value.split(' -')[0])
|
||||||
|
|
||||||
if 'Categories' in label:
|
if 'Categories' in label:
|
||||||
cats = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=categories'))
|
cats = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=categories'))
|
||||||
for cat in cats:
|
for cat in cats:
|
||||||
self.story.addToList('category',cat.string)
|
self.story.addToList('category',cat.string)
|
||||||
|
|
||||||
if 'Characters' in label:
|
if 'Characters' in label:
|
||||||
chars = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=characters'))
|
chars = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=characters'))
|
||||||
for char in chars:
|
for char in chars:
|
||||||
self.story.addToList('characters',char.string)
|
self.story.addToList('characters',char.string)
|
||||||
|
|
||||||
if 'Genre' in label:
|
if 'Genre' in label:
|
||||||
genres = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=1'))
|
genres = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=class&type_id=1'))
|
||||||
for genre in genres:
|
for genre in genres:
|
||||||
self.story.addToList('genre',genre.string)
|
self.story.addToList('genre',genre.string)
|
||||||
|
|
||||||
if 'Warnings' in label:
|
if 'Warnings' in label:
|
||||||
warnings = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=2'))
|
warnings = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=class&type_id=2'))
|
||||||
for warning in warnings:
|
for warning in warnings:
|
||||||
self.story.addToList('warnings',warning.string)
|
self.story.addToList('warnings',warning.string)
|
||||||
|
|
||||||
|
|
@ -204,7 +204,7 @@ class ErosnSapphoSycophantHexComAdapter(BaseSiteAdapter):
|
||||||
series_url = 'http://'+self.host+'/'+a['href']
|
series_url = 'http://'+self.host+'/'+a['href']
|
||||||
|
|
||||||
seriessoup = self.make_soup(self.get_request(series_url))
|
seriessoup = self.make_soup(self.get_request(series_url))
|
||||||
storyas = seriessoup.findAll('a', href=re.compile(r'viewstory.php\?sid=\d+'))
|
storyas = seriessoup.find_all('a', href=re.compile(r'viewstory.php\?sid=\d+'))
|
||||||
i=1
|
i=1
|
||||||
for a in storyas:
|
for a in storyas:
|
||||||
# skip 'report this' and 'TOC' links
|
# skip 'report this' and 'TOC' links
|
||||||
|
|
|
||||||
|
|
@ -1,218 +0,0 @@
|
||||||
# -*- coding: utf-8 -*-
|
|
||||||
|
|
||||||
# Copyright 2013 Fanficdownloader team, 2018 FanFicFare team
|
|
||||||
#
|
|
||||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
# you may not use this file except in compliance with the License.
|
|
||||||
# You may obtain a copy of the License at
|
|
||||||
#
|
|
||||||
# http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
#
|
|
||||||
# Unless required by applicable law or agreed to in writing, software
|
|
||||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
# See the License for the specific language governing permissions and
|
|
||||||
# limitations under the License.
|
|
||||||
#
|
|
||||||
|
|
||||||
# Software: eFiction
|
|
||||||
from __future__ import absolute_import
|
|
||||||
import logging
|
|
||||||
logger = logging.getLogger(__name__)
|
|
||||||
import re
|
|
||||||
from ..htmlcleanup import stripHTML
|
|
||||||
from .. import exceptions as exceptions
|
|
||||||
|
|
||||||
# py2 vs py3 transition
|
|
||||||
from ..six import text_type as unicode
|
|
||||||
|
|
||||||
from .base_adapter import BaseSiteAdapter, makeDate
|
|
||||||
|
|
||||||
def getClass():
|
|
||||||
return FaerieArchiveComAdapter
|
|
||||||
|
|
||||||
# Class name has to be unique. Our convention is camel case the
|
|
||||||
# sitename with Adapter at the end. www is skipped.
|
|
||||||
class FaerieArchiveComAdapter(BaseSiteAdapter):
|
|
||||||
|
|
||||||
def __init__(self, config, url):
|
|
||||||
BaseSiteAdapter.__init__(self, config, url)
|
|
||||||
|
|
||||||
self.username = "NoneGiven" # if left empty, site doesn't return any message at all.
|
|
||||||
self.password = ""
|
|
||||||
self.is_adult=False
|
|
||||||
|
|
||||||
# get storyId from url--url validation guarantees query is only sid=1234
|
|
||||||
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
|
|
||||||
|
|
||||||
|
|
||||||
# normalized story URL.
|
|
||||||
self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId'))
|
|
||||||
|
|
||||||
# Each adapter needs to have a unique site abbreviation.
|
|
||||||
self.story.setMetadata('siteabbrev','fae')
|
|
||||||
|
|
||||||
# The date format will vary from site to site.
|
|
||||||
# http://docs.python.org/library/datetime.html#strftime-strptime-behavior
|
|
||||||
self.dateformat = "%B %d, %Y"
|
|
||||||
|
|
||||||
@staticmethod # must be @staticmethod, don't remove it.
|
|
||||||
def getSiteDomain():
|
|
||||||
# The site domain. Does have www here, if it uses it.
|
|
||||||
return 'faerie-archive.com'
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def getAcceptDomains(cls):
|
|
||||||
# for backward compatibility
|
|
||||||
return ['efiction.esteliel.de',cls.getSiteDomain()]
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def getConfigSections(cls):
|
|
||||||
"Only needs to be overriden if has additional ini sections."
|
|
||||||
# for backward compatibility
|
|
||||||
return ['efiction.esteliel.de',cls.getSiteDomain()]
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def getSiteExampleURLs(cls):
|
|
||||||
return "http://"+cls.getSiteDomain()+"/viewstory.php?sid=1234"
|
|
||||||
|
|
||||||
def getSiteURLPattern(self):
|
|
||||||
siter = "("+"|".join([re.escape(x) for x in self.getAcceptDomains()])+")"
|
|
||||||
return re.escape("http://")+siter+re.escape("/viewstory.php?sid=")+r"\d+$"
|
|
||||||
|
|
||||||
## Getting the chapter list and the meta data, plus 'is adult' checking.
|
|
||||||
def extractChapterUrlsAndMetadata(self):
|
|
||||||
|
|
||||||
# index=1 makes sure we see the story chapter index. Some
|
|
||||||
# sites skip that for one-chapter stories.
|
|
||||||
url = self.url+'&index=1'
|
|
||||||
logger.debug("URL: "+url)
|
|
||||||
|
|
||||||
data = self.get_request(url)
|
|
||||||
|
|
||||||
if "Access denied. This story has not been validated by the adminstrators of this site." in data:
|
|
||||||
raise exceptions.AccessDenied(self.getSiteDomain() +" says: Access denied. This story has not been validated by the adminstrators of this site.")
|
|
||||||
|
|
||||||
|
|
||||||
## Title and author
|
|
||||||
soup = self.make_soup(data)
|
|
||||||
# print data
|
|
||||||
|
|
||||||
|
|
||||||
pagetitle = soup.find('div',{'id':'pagetitle'})
|
|
||||||
## Title
|
|
||||||
a = pagetitle.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$"))
|
|
||||||
self.story.setMetadata('title',stripHTML(a))
|
|
||||||
|
|
||||||
# Find authorid and URL from... author url.
|
|
||||||
a = pagetitle.find('a', href=re.compile(r"viewuser.php\?uid=\d+"))
|
|
||||||
self.story.setMetadata('authorId',a['href'].split('=')[1])
|
|
||||||
self.story.setMetadata('authorUrl','http://'+self.host+'/'+a['href'])
|
|
||||||
self.story.setMetadata('author',a.string)
|
|
||||||
|
|
||||||
# Find the chapters:
|
|
||||||
for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+r"&chapter=\d+$")):
|
|
||||||
# just in case there's tags, like <i> in chapter titles.
|
|
||||||
self.add_chapter(chapter,'http://'+self.host+'/'+chapter['href'])
|
|
||||||
|
|
||||||
|
|
||||||
# eFiction sites don't help us out a lot with their meta data
|
|
||||||
# formating, so it's a little ugly.
|
|
||||||
|
|
||||||
# utility method
|
|
||||||
def defaultGetattr(d,k):
|
|
||||||
try:
|
|
||||||
return d[k]
|
|
||||||
except:
|
|
||||||
return ""
|
|
||||||
|
|
||||||
# <span class="label">Rated:</span> NC-17<br /> etc
|
|
||||||
list = soup.find('div', {'class':'listbox'})
|
|
||||||
labelspan=list.find('span',{'class':'label'})
|
|
||||||
value = labelspan.nextSibling
|
|
||||||
label = labelspan.string
|
|
||||||
genres = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=1'))
|
|
||||||
for genre in genres:
|
|
||||||
self.story.addToList('genre',genre.string)
|
|
||||||
|
|
||||||
labels = list.findAll('b')
|
|
||||||
for labelspan in labels:
|
|
||||||
value = labelspan.nextSibling
|
|
||||||
label = labelspan.string
|
|
||||||
|
|
||||||
if 'Summary' in label:
|
|
||||||
## Everything until the next span class='label'
|
|
||||||
svalue = ""
|
|
||||||
while 'Rating' not in unicode(value):
|
|
||||||
svalue += unicode(value)
|
|
||||||
value = value.nextSibling
|
|
||||||
self.setDescription(url,svalue)
|
|
||||||
#self.story.setMetadata('description',stripHTML(svalue))
|
|
||||||
|
|
||||||
if 'Rating' in label:
|
|
||||||
self.story.setMetadata('rating', value)
|
|
||||||
|
|
||||||
if 'Words' in label:
|
|
||||||
self.story.setMetadata('numWords', value)
|
|
||||||
|
|
||||||
if 'Category' in label:
|
|
||||||
cats = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=categories'))
|
|
||||||
for cat in cats:
|
|
||||||
self.story.addToList('category',cat.string)
|
|
||||||
|
|
||||||
if 'Characters' in label:
|
|
||||||
chars = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=characters'))
|
|
||||||
for char in chars:
|
|
||||||
self.story.addToList('characters',char.string)
|
|
||||||
|
|
||||||
if 'Completed' in label:
|
|
||||||
if 'Yes' in value:
|
|
||||||
self.story.setMetadata('status', 'Completed')
|
|
||||||
else:
|
|
||||||
self.story.setMetadata('status', 'In-Progress')
|
|
||||||
|
|
||||||
if 'Published' in label:
|
|
||||||
self.story.setMetadata('datePublished', makeDate(stripHTML(value), self.dateformat))
|
|
||||||
|
|
||||||
if 'Updated' in label:
|
|
||||||
# there's a stray [ at the end.
|
|
||||||
#value = value[0:-1]
|
|
||||||
self.story.setMetadata('dateUpdated', makeDate(stripHTML(value), self.dateformat))
|
|
||||||
|
|
||||||
try:
|
|
||||||
if list.find('a', href=re.compile(r"series.php")) != None:
|
|
||||||
for series in asoup.findAll('a', href=re.compile(r"series.php\?seriesid=\d+")):
|
|
||||||
# Find Series name from series URL.
|
|
||||||
series_url = 'http://'+self.host+'/'+series['href']
|
|
||||||
seriessoup = self.make_soup(self.get_request(series_url))
|
|
||||||
storyas = seriessoup.findAll('a', href=re.compile(r'^viewstory.php\?sid=\d+$'))
|
|
||||||
i=1
|
|
||||||
for a in storyas:
|
|
||||||
if a['href'] == ('viewstory.php?sid='+self.story.getMetadata('storyId')):
|
|
||||||
name=seriessoup.find('div', {'id' : 'pagetitle'})
|
|
||||||
name.find('a').extract()
|
|
||||||
self.setSeries(name.text.split(' by[')[0], i)
|
|
||||||
self.story.setMetadata('seriesUrl',series_url)
|
|
||||||
i=0
|
|
||||||
break
|
|
||||||
i+=1
|
|
||||||
if i == 0:
|
|
||||||
break
|
|
||||||
|
|
||||||
except:
|
|
||||||
# I find it hard to care if the series parsing fails
|
|
||||||
pass
|
|
||||||
|
|
||||||
# grab the text for an individual chapter.
|
|
||||||
def getChapterText(self, url):
|
|
||||||
|
|
||||||
logger.debug('Getting chapter text from: %s' % url)
|
|
||||||
|
|
||||||
soup = self.make_soup(self.get_request(url))
|
|
||||||
|
|
||||||
div = soup.find('div', {'id' : 'story'})
|
|
||||||
|
|
||||||
if None == div:
|
|
||||||
raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url)
|
|
||||||
|
|
||||||
return self.utf8FromSoup(url,div)
|
|
||||||
|
|
@ -53,6 +53,9 @@ class FanficAuthorsNetAdapter(BaseSiteAdapter):
|
||||||
#Setting the 'Zone' for each "Site"
|
#Setting the 'Zone' for each "Site"
|
||||||
self.zone = self.parsedUrl.netloc.replace('.fanficauthors.net','')
|
self.zone = self.parsedUrl.netloc.replace('.fanficauthors.net','')
|
||||||
|
|
||||||
|
# site change .nsns to -nsns
|
||||||
|
self.zone = self.zone.replace('.nsns','-nsns')
|
||||||
|
|
||||||
# normalized story URL.
|
# normalized story URL.
|
||||||
self._setURL('https://{0}.{1}/{2}/'.format(
|
self._setURL('https://{0}.{1}/{2}/'.format(
|
||||||
self.zone, self.getBaseDomain(), self.story.getMetadata('storyId')))
|
self.zone, self.getBaseDomain(), self.story.getMetadata('storyId')))
|
||||||
|
|
@ -79,7 +82,10 @@ class FanficAuthorsNetAdapter(BaseSiteAdapter):
|
||||||
@classmethod
|
@classmethod
|
||||||
def getAcceptDomains(cls):
|
def getAcceptDomains(cls):
|
||||||
|
|
||||||
|
# need both .nsns(old) and -nsns(new) because it's a domain
|
||||||
|
# change, not just URL change.
|
||||||
return ['aaran-st-vines.nsns.fanficauthors.net',
|
return ['aaran-st-vines.nsns.fanficauthors.net',
|
||||||
|
'aaran-st-vines-nsns.fanficauthors.net',
|
||||||
'abraxan.fanficauthors.net',
|
'abraxan.fanficauthors.net',
|
||||||
'bobmin.fanficauthors.net',
|
'bobmin.fanficauthors.net',
|
||||||
'canoncansodoff.fanficauthors.net',
|
'canoncansodoff.fanficauthors.net',
|
||||||
|
|
@ -95,9 +101,12 @@ class FanficAuthorsNetAdapter(BaseSiteAdapter):
|
||||||
'jeconais.fanficauthors.net',
|
'jeconais.fanficauthors.net',
|
||||||
'kinsfire.fanficauthors.net',
|
'kinsfire.fanficauthors.net',
|
||||||
'kokopelli.nsns.fanficauthors.net',
|
'kokopelli.nsns.fanficauthors.net',
|
||||||
|
'kokopelli-nsns.fanficauthors.net',
|
||||||
'ladya.nsns.fanficauthors.net',
|
'ladya.nsns.fanficauthors.net',
|
||||||
|
'ladya-nsns.fanficauthors.net',
|
||||||
'lorddwar.fanficauthors.net',
|
'lorddwar.fanficauthors.net',
|
||||||
'mrintel.nsns.fanficauthors.net',
|
'mrintel.nsns.fanficauthors.net',
|
||||||
|
'mrintel-nsns.fanficauthors.net',
|
||||||
'musings-of-apathy.fanficauthors.net',
|
'musings-of-apathy.fanficauthors.net',
|
||||||
'ruskbyte.fanficauthors.net',
|
'ruskbyte.fanficauthors.net',
|
||||||
'seelvor.fanficauthors.net',
|
'seelvor.fanficauthors.net',
|
||||||
|
|
@ -108,7 +117,7 @@ class FanficAuthorsNetAdapter(BaseSiteAdapter):
|
||||||
################################################################################################
|
################################################################################################
|
||||||
@classmethod
|
@classmethod
|
||||||
def getSiteExampleURLs(self):
|
def getSiteExampleURLs(self):
|
||||||
return ("https://aaran-st-vines.nsns.fanficauthors.net/A_Story_Name/ "
|
return ("https://aaran-st-vines-nsns.fanficauthors.net/A_Story_Name/ "
|
||||||
+ "https://abraxan.fanficauthors.net/A_Story_Name/ "
|
+ "https://abraxan.fanficauthors.net/A_Story_Name/ "
|
||||||
+ "https://bobmin.fanficauthors.net/A_Story_Name/ "
|
+ "https://bobmin.fanficauthors.net/A_Story_Name/ "
|
||||||
+ "https://canoncansodoff.fanficauthors.net/A_Story_Name/ "
|
+ "https://canoncansodoff.fanficauthors.net/A_Story_Name/ "
|
||||||
|
|
@ -123,10 +132,10 @@ class FanficAuthorsNetAdapter(BaseSiteAdapter):
|
||||||
+ "https://jbern.fanficauthors.net/A_Story_Name/ "
|
+ "https://jbern.fanficauthors.net/A_Story_Name/ "
|
||||||
+ "https://jeconais.fanficauthors.net/A_Story_Name/ "
|
+ "https://jeconais.fanficauthors.net/A_Story_Name/ "
|
||||||
+ "https://kinsfire.fanficauthors.net/A_Story_Name/ "
|
+ "https://kinsfire.fanficauthors.net/A_Story_Name/ "
|
||||||
+ "https://kokopelli.nsns.fanficauthors.net/A_Story_Name/ "
|
+ "https://kokopelli-nsns.fanficauthors.net/A_Story_Name/ "
|
||||||
+ "https://ladya.nsns.fanficauthors.net/A_Story_Name/ "
|
+ "https://ladya-nsns.fanficauthors.net/A_Story_Name/ "
|
||||||
+ "https://lorddwar.fanficauthors.net/A_Story_Name/ "
|
+ "https://lorddwar.fanficauthors.net/A_Story_Name/ "
|
||||||
+ "https://mrintel.nsns.fanficauthors.net/A_Story_Name/ "
|
+ "https://mrintel-nsns.fanficauthors.net/A_Story_Name/ "
|
||||||
+ "https://musings-of-apathy.fanficauthors.net/A_Story_Name/ "
|
+ "https://musings-of-apathy.fanficauthors.net/A_Story_Name/ "
|
||||||
+ "https://ruskbyte.fanficauthors.net/A_Story_Name/ "
|
+ "https://ruskbyte.fanficauthors.net/A_Story_Name/ "
|
||||||
+ "https://seelvor.fanficauthors.net/A_Story_Name/ "
|
+ "https://seelvor.fanficauthors.net/A_Story_Name/ "
|
||||||
|
|
@ -136,8 +145,16 @@ class FanficAuthorsNetAdapter(BaseSiteAdapter):
|
||||||
|
|
||||||
################################################################################################
|
################################################################################################
|
||||||
def getSiteURLPattern(self):
|
def getSiteURLPattern(self):
|
||||||
|
## .nsns kept here to match both . and -
|
||||||
return r'https?://(aaran-st-vines.nsns|abraxan|bobmin|canoncansodoff|chemprof|copperbadge|crys|deluded-musings|draco664|fp|frenchsession|ishtar|jbern|jeconais|kinsfire|kokopelli.nsns|ladya.nsns|lorddwar|mrintel.nsns|musings-of-apathy|ruskbyte|seelvor|tenhawk|viridian|whydoyouneedtoknow)\.fanficauthors\.net/([a-zA-Z0-9_]+)/'
|
return r'https?://(aaran-st-vines.nsns|abraxan|bobmin|canoncansodoff|chemprof|copperbadge|crys|deluded-musings|draco664|fp|frenchsession|ishtar|jbern|jeconais|kinsfire|kokopelli.nsns|ladya.nsns|lorddwar|mrintel.nsns|musings-of-apathy|ruskbyte|seelvor|tenhawk|viridian|whydoyouneedtoknow)\.fanficauthors\.net/([a-zA-Z0-9_]+)/'
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def get_section_url(cls,url):
|
||||||
|
## only changing .nsns to -nsns and only when part of the
|
||||||
|
## domain.
|
||||||
|
url = url.replace('.nsns.fanficauthors.net','-nsns.fanficauthors.net')
|
||||||
|
return url
|
||||||
|
|
||||||
################################################################################################
|
################################################################################################
|
||||||
def doExtractChapterUrlsAndMetadata(self, get_cover=True):
|
def doExtractChapterUrlsAndMetadata(self, get_cover=True):
|
||||||
|
|
||||||
|
|
@ -163,7 +180,7 @@ class FanficAuthorsNetAdapter(BaseSiteAdapter):
|
||||||
# Find the chapters:
|
# Find the chapters:
|
||||||
# The published and update dates are with the chapter links...
|
# The published and update dates are with the chapter links...
|
||||||
# so we have to get them from there.
|
# so we have to get them from there.
|
||||||
chapters = soup.findAll('a', href=re.compile('/'+self.story.getMetadata(
|
chapters = soup.find_all('a', href=re.compile('/'+self.story.getMetadata(
|
||||||
'storyId')+'/([a-zA-Z0-9_]+)/'))
|
'storyId')+'/([a-zA-Z0-9_]+)/'))
|
||||||
|
|
||||||
# Here we are getting the published date. It is the date the first chapter was "updated"
|
# Here we are getting the published date. It is the date the first chapter was "updated"
|
||||||
|
|
@ -202,7 +219,7 @@ class FanficAuthorsNetAdapter(BaseSiteAdapter):
|
||||||
## Raising AdultCheckRequired after collecting chapters gives
|
## Raising AdultCheckRequired after collecting chapters gives
|
||||||
## a double chapter list. So does genre, but it de-dups
|
## a double chapter list. So does genre, but it de-dups
|
||||||
## automatically.
|
## automatically.
|
||||||
if( self.story.getMetadata('rating') == 'Mature'
|
if( self.story.getMetadataRaw('rating') in ['Mature','Adult Only']
|
||||||
and not (self.is_adult or self.getConfig("is_adult")) ):
|
and not (self.is_adult or self.getConfig("is_adult")) ):
|
||||||
raise exceptions.AdultCheckRequired(self.url)
|
raise exceptions.AdultCheckRequired(self.url)
|
||||||
|
|
||||||
|
|
@ -226,7 +243,7 @@ class FanficAuthorsNetAdapter(BaseSiteAdapter):
|
||||||
# grab the text for an individual chapter.
|
# grab the text for an individual chapter.
|
||||||
def getChapterText(self, url):
|
def getChapterText(self, url):
|
||||||
logger.debug('Getting chapter text from: %s' % url)
|
logger.debug('Getting chapter text from: %s' % url)
|
||||||
if( self.story.getMetadata('rating') == 'Mature' and
|
if( self.story.getMetadataRaw('rating') in ['Mature','Adult Only'] and
|
||||||
(self.is_adult or self.getConfig("is_adult")) ):
|
(self.is_adult or self.getConfig("is_adult")) ):
|
||||||
addurl = "?bypass=1"
|
addurl = "?bypass=1"
|
||||||
else:
|
else:
|
||||||
|
|
@ -241,8 +258,8 @@ class FanficAuthorsNetAdapter(BaseSiteAdapter):
|
||||||
"Error downloading Chapter: '{0}'! Missing required element!".format(url))
|
"Error downloading Chapter: '{0}'! Missing required element!".format(url))
|
||||||
|
|
||||||
#Now, there are a lot of extranious tags within the story division.. so we will remove them.
|
#Now, there are a lot of extranious tags within the story division.. so we will remove them.
|
||||||
for tag in story.findAll('ul',{'class':'pager'}) + story.findAll(
|
for tag in story.find_all('ul',{'class':'pager'}) + story.find_all(
|
||||||
'div',{'class':'alert'}) + story.findAll('div', {'class':'btn-group'}):
|
'div',{'class':'alert'}) + story.find_all('div', {'class':'btn-group'}):
|
||||||
tag.extract()
|
tag.extract()
|
||||||
|
|
||||||
return self.utf8FromSoup(url,story)
|
return self.utf8FromSoup(url,story)
|
||||||
|
|
|
||||||
|
|
@ -1,187 +0,0 @@
|
||||||
# -*- coding: utf-8 -*-
|
|
||||||
|
|
||||||
# Copyright 2014 Fanficdownloader team, 2018 FanFicFare team
|
|
||||||
#
|
|
||||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
# you may not use this file except in compliance with the License.
|
|
||||||
# You may obtain a copy of the License at
|
|
||||||
#
|
|
||||||
# http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
#
|
|
||||||
# Unless required by applicable law or agreed to in writing, software
|
|
||||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
# See the License for the specific language governing permissions and
|
|
||||||
# limitations under the License.
|
|
||||||
#
|
|
||||||
|
|
||||||
from __future__ import absolute_import
|
|
||||||
import re
|
|
||||||
# py2 vs py3 transition
|
|
||||||
from ..six import ensure_text
|
|
||||||
from ..six.moves.urllib import parse as urlparse
|
|
||||||
|
|
||||||
from .base_adapter import BaseSiteAdapter, makeDate
|
|
||||||
from .. import exceptions
|
|
||||||
|
|
||||||
|
|
||||||
_SOURCE_CODE_ENCODING = 'utf-8'
|
|
||||||
|
|
||||||
|
|
||||||
def getClass():
|
|
||||||
return FanficHuAdapter
|
|
||||||
|
|
||||||
|
|
||||||
def _get_query_data(url):
|
|
||||||
components = urlparse.urlparse(url)
|
|
||||||
query_data = urlparse.parse_qs(components.query)
|
|
||||||
return dict((key, data[0]) for key, data in query_data.items())
|
|
||||||
|
|
||||||
|
|
||||||
class FanficHuAdapter(BaseSiteAdapter):
|
|
||||||
SITE_ABBREVIATION = 'ffh'
|
|
||||||
SITE_DOMAIN = 'fanfic.hu'
|
|
||||||
SITE_LANGUAGE = 'Hungarian'
|
|
||||||
|
|
||||||
BASE_URL = 'https://' + SITE_DOMAIN + '/merengo/'
|
|
||||||
VIEW_STORY_URL_TEMPLATE = BASE_URL + 'viewstory.php?sid=%s'
|
|
||||||
|
|
||||||
DATE_FORMAT = '%m/%d/%Y'
|
|
||||||
|
|
||||||
def __init__(self, config, url):
|
|
||||||
BaseSiteAdapter.__init__(self, config, url)
|
|
||||||
|
|
||||||
query_data = urlparse.parse_qs(self.parsedUrl.query)
|
|
||||||
story_id = query_data['sid'][0]
|
|
||||||
|
|
||||||
self.story.setMetadata('storyId', story_id)
|
|
||||||
self._setURL(self.VIEW_STORY_URL_TEMPLATE % story_id)
|
|
||||||
self.story.setMetadata('siteabbrev', self.SITE_ABBREVIATION)
|
|
||||||
self.story.setMetadata('language', self.SITE_LANGUAGE)
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def getSiteDomain():
|
|
||||||
return FanficHuAdapter.SITE_DOMAIN
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def getSiteExampleURLs(cls):
|
|
||||||
return cls.VIEW_STORY_URL_TEMPLATE % 1234
|
|
||||||
|
|
||||||
def getSiteURLPattern(self):
|
|
||||||
return re.escape(self.VIEW_STORY_URL_TEMPLATE[:-2]).replace('https','https?') + r'\d+$'
|
|
||||||
|
|
||||||
def extractChapterUrlsAndMetadata(self):
|
|
||||||
soup = self.make_soup(self.get_request(self.url + '&i=1'))
|
|
||||||
|
|
||||||
if ensure_text(soup.title.string).strip(u' :') == u'írta':
|
|
||||||
raise exceptions.StoryDoesNotExist(self.url)
|
|
||||||
|
|
||||||
chapter_options = soup.find('form', action='viewstory.php').select('option')
|
|
||||||
# Remove redundant "Fejezetek" option
|
|
||||||
chapter_options.pop(0)
|
|
||||||
|
|
||||||
# If there is still more than one entry remove chapter overview entry
|
|
||||||
if len(chapter_options) > 1:
|
|
||||||
chapter_options.pop(0)
|
|
||||||
|
|
||||||
for option in chapter_options:
|
|
||||||
url = urlparse.urljoin(self.url, option['value'])
|
|
||||||
self.add_chapter(option.string, url)
|
|
||||||
|
|
||||||
author_url = urlparse.urljoin(self.BASE_URL, soup.find('a', href=lambda href: href and href.startswith('viewuser.php?uid='))['href'])
|
|
||||||
soup = self.make_soup(self.get_request(author_url))
|
|
||||||
|
|
||||||
story_id = self.story.getMetadata('storyId')
|
|
||||||
for table in soup('table', {'class': 'mainnav'}):
|
|
||||||
title_anchor = table.find('span', {'class': 'storytitle'}).a
|
|
||||||
href = title_anchor['href']
|
|
||||||
if href.startswith('javascript:'):
|
|
||||||
href = href.rsplit(' ', 1)[1].strip("'")
|
|
||||||
query_data = _get_query_data(href)
|
|
||||||
|
|
||||||
if query_data['sid'] == story_id:
|
|
||||||
break
|
|
||||||
else:
|
|
||||||
# This should never happen, the story must be found on the author's
|
|
||||||
# page.
|
|
||||||
raise exceptions.FailedToDownload(self.url)
|
|
||||||
|
|
||||||
self.story.setMetadata('title', title_anchor.string)
|
|
||||||
|
|
||||||
rows = table('tr')
|
|
||||||
|
|
||||||
anchors = rows[0].div('a')
|
|
||||||
author_anchor = anchors[1]
|
|
||||||
query_data = _get_query_data(author_anchor['href'])
|
|
||||||
self.story.setMetadata('author', author_anchor.string)
|
|
||||||
self.story.setMetadata('authorId', query_data['uid'])
|
|
||||||
self.story.setMetadata('authorUrl', urlparse.urljoin(self.BASE_URL, author_anchor['href']))
|
|
||||||
self.story.setMetadata('reviews', anchors[3].string)
|
|
||||||
|
|
||||||
if self.getConfig('keep_summary_html'):
|
|
||||||
self.story.setMetadata('description', self.utf8FromSoup(author_url, rows[1].td))
|
|
||||||
else:
|
|
||||||
self.story.setMetadata('description', ''.join(rows[1].td(text=True)))
|
|
||||||
|
|
||||||
for row in rows[3:]:
|
|
||||||
index = 0
|
|
||||||
cells = row('td')
|
|
||||||
|
|
||||||
while index < len(cells):
|
|
||||||
cell = cells[index]
|
|
||||||
key = ensure_text(cell.b.string).strip(u':')
|
|
||||||
try:
|
|
||||||
value = ensure_text(cells[index+1].string)
|
|
||||||
except:
|
|
||||||
value = None
|
|
||||||
|
|
||||||
if key == u'Kategória':
|
|
||||||
for anchor in cells[index+1]('a'):
|
|
||||||
self.story.addToList('category', anchor.string)
|
|
||||||
|
|
||||||
elif key == u'Szereplõk':
|
|
||||||
if cells[index+1].string:
|
|
||||||
for name in cells[index+1].string.split(', '):
|
|
||||||
self.story.addToList('character', name)
|
|
||||||
|
|
||||||
elif key == u'Korhatár':
|
|
||||||
if value != 'nem korhatáros':
|
|
||||||
self.story.setMetadata('rating', value)
|
|
||||||
|
|
||||||
elif key == u'Figyelmeztetések':
|
|
||||||
for b_tag in cells[index+1]('b'):
|
|
||||||
self.story.addToList('warnings', b_tag.string)
|
|
||||||
|
|
||||||
elif key == u'Jellemzõk':
|
|
||||||
for genre in cells[index+1].string.split(', '):
|
|
||||||
self.story.addToList('genre', genre)
|
|
||||||
|
|
||||||
elif key == u'Fejezetek':
|
|
||||||
self.story.setMetadata('numChapters', int(value))
|
|
||||||
|
|
||||||
elif key == u'Megjelenés':
|
|
||||||
self.story.setMetadata('datePublished', makeDate(value, self.DATE_FORMAT))
|
|
||||||
|
|
||||||
elif key == u'Frissítés':
|
|
||||||
self.story.setMetadata('dateUpdated', makeDate(value, self.DATE_FORMAT))
|
|
||||||
|
|
||||||
elif key == u'Szavak':
|
|
||||||
self.story.setMetadata('numWords', value)
|
|
||||||
|
|
||||||
elif key == u'Befejezett':
|
|
||||||
self.story.setMetadata('status', 'Completed' if value == 'Nem' else 'In-Progress')
|
|
||||||
|
|
||||||
index += 2
|
|
||||||
|
|
||||||
if self.story.getMetadata('rating') == '18':
|
|
||||||
if not (self.is_adult or self.getConfig('is_adult')):
|
|
||||||
raise exceptions.AdultCheckRequired(self.url)
|
|
||||||
|
|
||||||
def getChapterText(self, url):
|
|
||||||
soup = self.make_soup(self.get_request(url))
|
|
||||||
story_cell = soup.find('form', action='viewstory.php').parent.parent
|
|
||||||
|
|
||||||
for div in story_cell('div'):
|
|
||||||
div.extract()
|
|
||||||
|
|
||||||
return self.utf8FromSoup(url, story_cell)
|
|
||||||
|
|
@ -134,7 +134,7 @@ class FanFicsMeAdapter(BaseSiteAdapter):
|
||||||
## restrict meta searches to header.
|
## restrict meta searches to header.
|
||||||
fichead = soup.find('div',class_='FicHead')
|
fichead = soup.find('div',class_='FicHead')
|
||||||
def get_meta_content(title):
|
def get_meta_content(title):
|
||||||
val_label = fichead.find('div',string=title+u':')
|
val_label = fichead.find('div',string=re.compile(u'^'+title+u':'))
|
||||||
if val_label:
|
if val_label:
|
||||||
return val_label.find_next('div')
|
return val_label.find_next('div')
|
||||||
|
|
||||||
|
|
@ -150,7 +150,7 @@ class FanFicsMeAdapter(BaseSiteAdapter):
|
||||||
self.story.setMetadata('rating',stripHTML(get_meta_content(u'Рейтинг')))
|
self.story.setMetadata('rating',stripHTML(get_meta_content(u'Рейтинг')))
|
||||||
|
|
||||||
## Need to login for any rating higher than General.
|
## Need to login for any rating higher than General.
|
||||||
if self.story.getMetadata('rating') != 'General' and self.needToLoginCheck(data):
|
if self.story.getMetadataRaw('rating') != 'General' and self.needToLoginCheck(data):
|
||||||
self.performLogin(url)
|
self.performLogin(url)
|
||||||
# reload after login.
|
# reload after login.
|
||||||
data = self.get_request(url,usecache=False)
|
data = self.get_request(url,usecache=False)
|
||||||
|
|
@ -168,7 +168,7 @@ class FanFicsMeAdapter(BaseSiteAdapter):
|
||||||
self.story.setMetadata('title',stripHTML(h))
|
self.story.setMetadata('title',stripHTML(h))
|
||||||
|
|
||||||
## author(s):
|
## author(s):
|
||||||
content = get_meta_content(u'Автор')
|
content = get_meta_content(u'Авторы?')
|
||||||
if content:
|
if content:
|
||||||
alist = content.find_all('a', class_='user')
|
alist = content.find_all('a', class_='user')
|
||||||
for a in alist:
|
for a in alist:
|
||||||
|
|
@ -181,12 +181,8 @@ class FanFicsMeAdapter(BaseSiteAdapter):
|
||||||
self.story.setMetadata('authorUrl','https://'+self.host)
|
self.story.setMetadata('authorUrl','https://'+self.host)
|
||||||
self.story.setMetadata('authorId','0')
|
self.story.setMetadata('authorId','0')
|
||||||
|
|
||||||
# translator(s)
|
# translator(s) in different strings
|
||||||
content = get_meta_content(u'Переводчик')
|
content = get_meta_content(u'Переводчикк?и?')
|
||||||
if not content:
|
|
||||||
# Переводчик vs Переводчи is 'Translator' vs 'TranslatorS'
|
|
||||||
content = get_meta_content(u'Переводчи')
|
|
||||||
logger.debug(content)
|
|
||||||
if content:
|
if content:
|
||||||
for a in content.find_all('a', class_='user'):
|
for a in content.find_all('a', class_='user'):
|
||||||
self.story.addToList('translatorsId',a['href'].split('/user')[-1])
|
self.story.addToList('translatorsId',a['href'].split('/user')[-1])
|
||||||
|
|
@ -241,7 +237,7 @@ class FanFicsMeAdapter(BaseSiteAdapter):
|
||||||
|
|
||||||
## size block, only saving word count.
|
## size block, only saving word count.
|
||||||
content = get_meta_content(u'Размер')
|
content = get_meta_content(u'Размер')
|
||||||
words = stripHTML(content.find_all('li')[1])
|
words = stripHTML(content.find('a'))
|
||||||
words = re.sub(r'[^0-9]','',words) # only keep numbers
|
words = re.sub(r'[^0-9]','',words) # only keep numbers
|
||||||
self.story.setMetadata('numWords',words)
|
self.story.setMetadata('numWords',words)
|
||||||
|
|
||||||
|
|
@ -301,6 +297,10 @@ class FanFicsMeAdapter(BaseSiteAdapter):
|
||||||
# grab the text for an individual chapter.
|
# grab the text for an individual chapter.
|
||||||
def getChapterTextNum(self, url, index):
|
def getChapterTextNum(self, url, index):
|
||||||
logger.debug('Getting chapter text for: %s index: %s' % (url,index))
|
logger.debug('Getting chapter text for: %s index: %s' % (url,index))
|
||||||
|
m = re.match(r'.*&chapter=(\d+).*',url)
|
||||||
|
if m:
|
||||||
|
index=m.group(1)
|
||||||
|
logger.debug("Using index(%s) from &chapter="%index)
|
||||||
|
|
||||||
chapter_div = None
|
chapter_div = None
|
||||||
if self.use_full_work_soup and self.getConfig("use_view_full_work",True) and self.num_chapters() > 1:
|
if self.use_full_work_soup and self.getConfig("use_view_full_work",True) and self.num_chapters() > 1:
|
||||||
|
|
|
||||||
|
|
@ -44,9 +44,8 @@ class FanfictalkComAdapter(BaseSiteAdapter):
|
||||||
# get storyId from url--url validation guarantees query is only sid=1234
|
# get storyId from url--url validation guarantees query is only sid=1234
|
||||||
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
|
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
|
||||||
|
|
||||||
|
|
||||||
# normalized story URL.
|
# normalized story URL.
|
||||||
self._setURL('https://' + self.getSiteDomain() + '/archive/viewstory.php?sid='+self.story.getMetadata('storyId'))
|
self._setURL('https://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId'))
|
||||||
|
|
||||||
# Each adapter needs to have a unique site abbreviation.
|
# Each adapter needs to have a unique site abbreviation.
|
||||||
self.story.setMetadata('siteabbrev','ahpfftc')
|
self.story.setMetadata('siteabbrev','ahpfftc')
|
||||||
|
|
@ -57,24 +56,24 @@ class FanfictalkComAdapter(BaseSiteAdapter):
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def getAcceptDomains(cls):
|
def getAcceptDomains(cls):
|
||||||
return [cls.getSiteDomain(),'archive.hpfanfictalk.com']
|
return [cls.getSiteDomain(),'archive.hpfanfictalk.com','fanfictalk.com']
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def getConfigSections(cls):
|
def getConfigSections(cls):
|
||||||
"Only needs to be overriden if has additional ini sections."
|
"Only needs to be overriden if has additional ini sections."
|
||||||
return [cls.getConfigSection(),'archive.hpfanfictalk.com']
|
return [cls.getConfigSection(),'archive.hpfanfictalk.com','fanfictalk.com']
|
||||||
|
|
||||||
@staticmethod # must be @staticmethod, don't remove it.
|
@staticmethod # must be @stgetAcceptDomainsaticmethod, don't remove it.
|
||||||
def getSiteDomain():
|
def getSiteDomain():
|
||||||
# The site domain. Does have www here, if it uses it.
|
# The site domain. Does have www here, if it uses it.
|
||||||
return 'fanfictalk.com'
|
return 'archive.fanfictalk.com'
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def getSiteExampleURLs(cls):
|
def getSiteExampleURLs(cls):
|
||||||
return "https://"+cls.getSiteDomain()+"/archive/viewstory.php?sid=1234"
|
return "https://"+cls.getSiteDomain()+"/viewstory.php?sid=1234"
|
||||||
|
|
||||||
def getSiteURLPattern(self):
|
def getSiteURLPattern(self):
|
||||||
return r"https?://(archive\.hp)?"+re.escape(self.getSiteDomain())+r"(/archive)?/viewstory\.php\?sid=\d+$"
|
return r"https?://("+r"|".join([x.replace('.',r'\.') for x in self.getAcceptDomains()])+r")(/archive)?/viewstory\.php\?sid=\d+$"
|
||||||
|
|
||||||
## Getting the chapter list and the meta data, plus 'is adult' checking.
|
## Getting the chapter list and the meta data, plus 'is adult' checking.
|
||||||
def extractChapterUrlsAndMetadata(self):
|
def extractChapterUrlsAndMetadata(self):
|
||||||
|
|
@ -118,7 +117,7 @@ class FanfictalkComAdapter(BaseSiteAdapter):
|
||||||
# Find the chapters:
|
# Find the chapters:
|
||||||
for chapter in soup.find_all('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+r"&chapter=\d+$")):
|
for chapter in soup.find_all('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+r"&chapter=\d+$")):
|
||||||
# just in case there's tags, like <i> in chapter titles.
|
# just in case there's tags, like <i> in chapter titles.
|
||||||
self.add_chapter(chapter,'https://'+self.host+'/archive/'+chapter['href'])
|
self.add_chapter(chapter,'https://'+self.host+'/'+chapter['href'])
|
||||||
|
|
||||||
# categories
|
# categories
|
||||||
for a in soup.select("div#sort a"):
|
for a in soup.select("div#sort a"):
|
||||||
|
|
@ -171,14 +170,14 @@ class FanfictalkComAdapter(BaseSiteAdapter):
|
||||||
# Site allows stories to be in several series at once. FFF
|
# Site allows stories to be in several series at once. FFF
|
||||||
# isn't thrilled with that, we have series00, series01, etc.
|
# isn't thrilled with that, we have series00, series01, etc.
|
||||||
# Example:
|
# Example:
|
||||||
# https://fanfictalk.com/archive/viewstory.php?sid=483
|
# https://archive.fanfictalk.com/viewstory.php?sid=483
|
||||||
|
|
||||||
if self.getConfig("collect_series"):
|
if self.getConfig("collect_series"):
|
||||||
seriesspan = soup.find('span',label='Series')
|
seriesspan = soup.find('span',label='Series')
|
||||||
for i, seriesa in enumerate(seriesspan.find_all('a', href=re.compile(r"viewseries\.php\?seriesid=\d+"))):
|
for i, seriesa in enumerate(seriesspan.find_all('a', href=re.compile(r"viewseries\.php\?seriesid=\d+"))):
|
||||||
# logger.debug(seriesa)
|
# logger.debug(seriesa)
|
||||||
series_name = stripHTML(seriesa)
|
series_name = stripHTML(seriesa)
|
||||||
series_url = 'https://'+self.host+'/archive/'+seriesa['href']
|
series_url = 'https://'+self.host+'/'+seriesa['href']
|
||||||
|
|
||||||
seriessoup = self.make_soup(self.get_request(series_url))
|
seriessoup = self.make_soup(self.get_request(series_url))
|
||||||
storyas = seriessoup.find_all('a', href=re.compile(r'viewstory.php\?sid=\d+'))
|
storyas = seriessoup.find_all('a', href=re.compile(r'viewstory.php\?sid=\d+'))
|
||||||
|
|
@ -205,9 +204,17 @@ class FanfictalkComAdapter(BaseSiteAdapter):
|
||||||
# grab the text for an individual chapter.
|
# grab the text for an individual chapter.
|
||||||
def getChapterText(self, url):
|
def getChapterText(self, url):
|
||||||
|
|
||||||
logger.debug('Getting chapter text from: %s' % url)
|
if self.is_adult or self.getConfig("is_adult"):
|
||||||
|
# Weirdly, different sites use different warning numbers.
|
||||||
|
# If the title search below fails, there's a good chance
|
||||||
|
# you need a different number. print data at that point
|
||||||
|
# and see what the 'click here to continue' url says.
|
||||||
|
addurl = "&ageconsent=ok&warning=3"
|
||||||
|
else:
|
||||||
|
addurl=""
|
||||||
|
|
||||||
soup = self.make_soup(self.get_request(url))
|
logger.debug('Getting chapter text from: %s' % (url+addurl))
|
||||||
|
soup = self.make_soup(self.get_request(url+addurl))
|
||||||
|
|
||||||
div = soup.find('div', {'id' : 'story'})
|
div = soup.find('div', {'id' : 'story'})
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,274 +0,0 @@
|
||||||
# -*- coding: utf-8 -*-
|
|
||||||
|
|
||||||
# Copyright 2011 Fanficdownloader team, 2018 FanFicFare team
|
|
||||||
#
|
|
||||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
# you may not use this file except in compliance with the License.
|
|
||||||
# You may obtain a copy of the License at
|
|
||||||
#
|
|
||||||
# http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
#
|
|
||||||
# Unless required by applicable law or agreed to in writing, software
|
|
||||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
# See the License for the specific language governing permissions and
|
|
||||||
# limitations under the License.
|
|
||||||
#
|
|
||||||
|
|
||||||
# Software: eFiction
|
|
||||||
from __future__ import absolute_import
|
|
||||||
import logging
|
|
||||||
logger = logging.getLogger(__name__)
|
|
||||||
import re
|
|
||||||
from ..htmlcleanup import stripHTML
|
|
||||||
from .. import exceptions as exceptions
|
|
||||||
|
|
||||||
# py2 vs py3 transition
|
|
||||||
|
|
||||||
from .base_adapter import BaseSiteAdapter, makeDate
|
|
||||||
|
|
||||||
# By virtue of being recent and requiring both is_adult and user/pass,
|
|
||||||
# adapter_fanficcastletvnet.py is the best choice for learning to
|
|
||||||
# write adapters--especially for sites that use the eFiction system.
|
|
||||||
# Most sites that have ".../viewstory.php?sid=123" in the story URL
|
|
||||||
# are eFiction.
|
|
||||||
|
|
||||||
# For non-eFiction sites, it can be considerably more complex, but
|
|
||||||
# this is still a good starting point.
|
|
||||||
|
|
||||||
# In general an 'adapter' needs to do these five things:
|
|
||||||
|
|
||||||
# - 'Register' correctly with the downloader
|
|
||||||
# - Site Login (if needed)
|
|
||||||
# - 'Are you adult?' check (if needed--some do one, some the other, some both)
|
|
||||||
# - Grab the chapter list
|
|
||||||
# - Grab the story meta-data (some (non-eFiction) adapters have to get it from the author page)
|
|
||||||
# - Grab the chapter texts
|
|
||||||
|
|
||||||
# Search for XXX comments--that's where things are most likely to need changing.
|
|
||||||
|
|
||||||
# This function is called by the downloader in all adapter_*.py files
|
|
||||||
# in this dir to register the adapter class. So it needs to be
|
|
||||||
# updated to reflect the class below it. That, plus getSiteDomain()
|
|
||||||
# take care of 'Registering'.
|
|
||||||
def getClass():
|
|
||||||
return FanfictionJunkiesDeAdapter # XXX
|
|
||||||
|
|
||||||
# Class name has to be unique. Our convention is camel case the
|
|
||||||
# sitename with Adapter at the end. www is skipped.
|
|
||||||
class FanfictionJunkiesDeAdapter(BaseSiteAdapter): # XXX
|
|
||||||
|
|
||||||
def __init__(self, config, url):
|
|
||||||
BaseSiteAdapter.__init__(self, config, url)
|
|
||||||
|
|
||||||
self.username = "NoneGiven" # if left empty, site doesn't return any message at all.
|
|
||||||
self.password = ""
|
|
||||||
self.is_adult=False
|
|
||||||
|
|
||||||
# get storyId from url--url validation guarantees query is only sid=1234
|
|
||||||
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
|
|
||||||
|
|
||||||
|
|
||||||
# normalized story URL.
|
|
||||||
# XXX Most sites don't have the /fanfic part. Replace all to remove it usually.
|
|
||||||
self._setURL('http://' + self.getSiteDomain() + '/efiction/viewstory.php?sid='+self.story.getMetadata('storyId'))
|
|
||||||
|
|
||||||
# Each adapter needs to have a unique site abbreviation.
|
|
||||||
self.story.setMetadata('siteabbrev','ffjde') # XXX
|
|
||||||
|
|
||||||
# The date format will vary from site to site.
|
|
||||||
# http://docs.python.org/library/datetime.html#strftime-strptime-behavior
|
|
||||||
self.dateformat = "%d/%m/%y" # XXX
|
|
||||||
|
|
||||||
@staticmethod # must be @staticmethod, don't remove it.
|
|
||||||
def getSiteDomain():
|
|
||||||
# The site domain. Does have www here, if it uses it.
|
|
||||||
return 'fanfiction-junkies.de' # XXX
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def getSiteExampleURLs(cls):
|
|
||||||
return "http://"+cls.getSiteDomain()+"/efiction/viewstory.php?sid=1234"
|
|
||||||
|
|
||||||
def getSiteURLPattern(self):
|
|
||||||
return re.escape("http://"+self.getSiteDomain()+"/efiction/viewstory.php?sid=")+r"\d+$"
|
|
||||||
|
|
||||||
## Login seems to be reasonably standard across eFiction sites.
|
|
||||||
def needToLoginCheck(self, data):
|
|
||||||
if 'Registered Users Only' in data \
|
|
||||||
or 'There is no such account on our website' in data \
|
|
||||||
or "That password doesn't match the one in our database" in data:
|
|
||||||
return True
|
|
||||||
else:
|
|
||||||
return False
|
|
||||||
|
|
||||||
def performLogin(self, url):
|
|
||||||
params = {}
|
|
||||||
|
|
||||||
if self.password:
|
|
||||||
params['penname'] = self.username
|
|
||||||
params['password'] = self.password
|
|
||||||
else:
|
|
||||||
params['penname'] = self.getConfig("username")
|
|
||||||
params['password'] = self.getConfig("password")
|
|
||||||
params['cookiecheck'] = '1'
|
|
||||||
params['submit'] = 'Submit'
|
|
||||||
|
|
||||||
loginUrl = 'http://' + self.getSiteDomain() + '/efiction/user.php?action=login'
|
|
||||||
logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
|
|
||||||
params['penname']))
|
|
||||||
|
|
||||||
d = self.post_request(loginUrl, params)
|
|
||||||
|
|
||||||
if "Member Account" not in d : #Member Account
|
|
||||||
logger.info("Failed to login to URL %s as %s" % (loginUrl,
|
|
||||||
params['penname']))
|
|
||||||
raise exceptions.FailedToLogin(url,params['penname'])
|
|
||||||
return False
|
|
||||||
else:
|
|
||||||
return True
|
|
||||||
|
|
||||||
## Getting the chapter list and the meta data, plus 'is adult' checking.
|
|
||||||
def extractChapterUrlsAndMetadata(self):
|
|
||||||
|
|
||||||
if self.is_adult or self.getConfig("is_adult"):
|
|
||||||
# Weirdly, different sites use different warning numbers.
|
|
||||||
# If the title search below fails, there's a good chance
|
|
||||||
# you need a different number. print data at that point
|
|
||||||
# and see what the 'click here to continue' url says.
|
|
||||||
addurl = "&ageconsent=ok&warning=1" # XXX
|
|
||||||
else:
|
|
||||||
addurl=""
|
|
||||||
|
|
||||||
# index=1 makes sure we see the story chapter index. Some
|
|
||||||
# sites skip that for one-chapter stories.
|
|
||||||
url = self.url+'&index=1'+addurl
|
|
||||||
logger.debug("URL: "+url)
|
|
||||||
|
|
||||||
data = self.get_request(url)
|
|
||||||
|
|
||||||
if self.needToLoginCheck(data):
|
|
||||||
# need to log in for this one.
|
|
||||||
self.performLogin(url)
|
|
||||||
data = self.get_request(url)
|
|
||||||
|
|
||||||
# The actual text that is used to announce you need to be an
|
|
||||||
# adult varies from site to site. Again, print data before
|
|
||||||
# the title search to troubleshoot.
|
|
||||||
if "For adults only " in data: # XXX
|
|
||||||
raise exceptions.AdultCheckRequired(self.url)
|
|
||||||
|
|
||||||
if "Access denied. This story has not been validated by the adminstrators of this site." in data:
|
|
||||||
raise exceptions.AccessDenied(self.getSiteDomain() +" says: Access denied. This story has not been validated by the adminstrators of this site.")
|
|
||||||
|
|
||||||
soup = self.make_soup(data)
|
|
||||||
# print data
|
|
||||||
|
|
||||||
|
|
||||||
pagetitle = soup.find('h4')
|
|
||||||
## Title
|
|
||||||
a = pagetitle.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$"))
|
|
||||||
self.story.setMetadata('title',a.string)
|
|
||||||
|
|
||||||
# Find authorid and URL from... author url.
|
|
||||||
a = pagetitle.find('a', href=re.compile(r"viewuser.php\?uid=\d+"))
|
|
||||||
self.story.setMetadata('authorId',a['href'].split('=')[1])
|
|
||||||
self.story.setMetadata('authorUrl','http://'+self.host+'/efiction/'+a['href'])
|
|
||||||
self.story.setMetadata('author',a.string)
|
|
||||||
|
|
||||||
# Reviews
|
|
||||||
reviewdata = soup.find('div', {'id' : 'sort'})
|
|
||||||
a = reviewdata.findAll('a', href=re.compile(r'reviews.php\?type=ST&(amp;)?item='+self.story.getMetadata('storyId')+"$"))[1] # second one.
|
|
||||||
self.story.setMetadata('reviews',stripHTML(a))
|
|
||||||
|
|
||||||
# Find the chapters:
|
|
||||||
for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+r"&chapter=\d+$")):
|
|
||||||
# just in case there's tags, like <i> in chapter titles.
|
|
||||||
self.add_chapter(chapter,'http://'+self.host+'/efiction/'+chapter['href']+addurl)
|
|
||||||
|
|
||||||
|
|
||||||
# eFiction sites don't help us out a lot with their meta data
|
|
||||||
# formating, so it's a little ugly.
|
|
||||||
|
|
||||||
# utility method
|
|
||||||
def defaultGetattr(d,k):
|
|
||||||
try:
|
|
||||||
return d[k]
|
|
||||||
except:
|
|
||||||
return ""
|
|
||||||
|
|
||||||
|
|
||||||
# <span class="label">Rated:</span> NC-17<br /> etc
|
|
||||||
list = soup.find('div', {'class':'listbox'})
|
|
||||||
|
|
||||||
|
|
||||||
labels = list.findAll('b')
|
|
||||||
for labelspan in labels:
|
|
||||||
value = labelspan.nextSibling
|
|
||||||
label = labelspan.string
|
|
||||||
|
|
||||||
if 'Zusammenfassung' in label:
|
|
||||||
self.setDescription(url,value)
|
|
||||||
|
|
||||||
if 'Eingestuft' in label:
|
|
||||||
self.story.setMetadata('rating', value)
|
|
||||||
|
|
||||||
if u'Wörter' in label:
|
|
||||||
self.story.setMetadata('numWords', value)
|
|
||||||
|
|
||||||
if 'Kategorie' in label:
|
|
||||||
cats = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=categories'))
|
|
||||||
for cat in cats:
|
|
||||||
self.story.addToList('category',cat.string)
|
|
||||||
|
|
||||||
if 'Charaktere' in label:
|
|
||||||
chars = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=characters'))
|
|
||||||
for char in chars:
|
|
||||||
self.story.addToList('characters',char.string)
|
|
||||||
|
|
||||||
if 'Abgeschlossen' in label:
|
|
||||||
if 'Yes' in value:
|
|
||||||
self.story.setMetadata('status', 'Completed')
|
|
||||||
else:
|
|
||||||
self.story.setMetadata('status', 'In-Progress')
|
|
||||||
|
|
||||||
if u'Veröffentlicht' in label:
|
|
||||||
self.story.setMetadata('datePublished', makeDate(stripHTML(value), self.dateformat))
|
|
||||||
|
|
||||||
if 'Aktualisiert' in label:
|
|
||||||
# there's a stray [ at the end.
|
|
||||||
#value = value[0:-1]
|
|
||||||
self.story.setMetadata('dateUpdated', makeDate(stripHTML(value), self.dateformat))
|
|
||||||
|
|
||||||
try:
|
|
||||||
# Find Series name from series URL.
|
|
||||||
a = soup.find('a', href=re.compile(r"viewseries.php\?seriesid=\d+"))
|
|
||||||
series_name = a.string
|
|
||||||
series_url = 'http://'+self.host+'/efiction/'+a['href']
|
|
||||||
|
|
||||||
seriessoup = self.make_soup(self.get_request(series_url))
|
|
||||||
storyas = seriessoup.findAll('a', href=re.compile(r'^viewstory.php\?sid=\d+$'))
|
|
||||||
i=1
|
|
||||||
for a in storyas:
|
|
||||||
if a['href'] == ('viewstory.php?sid='+self.story.getMetadata('storyId')):
|
|
||||||
self.setSeries(series_name, i)
|
|
||||||
self.story.setMetadata('seriesUrl',series_url)
|
|
||||||
break
|
|
||||||
i+=1
|
|
||||||
|
|
||||||
except:
|
|
||||||
# I find it hard to care if the series parsing fails
|
|
||||||
pass
|
|
||||||
|
|
||||||
# grab the text for an individual chapter.
|
|
||||||
def getChapterText(self, url):
|
|
||||||
|
|
||||||
logger.debug('Getting chapter text from: %s' % url)
|
|
||||||
|
|
||||||
soup = self.make_soup(self.get_request(url))
|
|
||||||
|
|
||||||
div = soup.find('div', {'id' : 'story'})
|
|
||||||
|
|
||||||
if None == div:
|
|
||||||
raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url)
|
|
||||||
|
|
||||||
return self.utf8FromSoup(url,div)
|
|
||||||
|
|
@ -35,7 +35,7 @@ ffnetgenres=["Adventure", "Angst", "Crime", "Drama", "Family", "Fantasy",
|
||||||
"Mystery", "Parody", "Poetry", "Romance", "Sci-Fi", "Spiritual",
|
"Mystery", "Parody", "Poetry", "Romance", "Sci-Fi", "Spiritual",
|
||||||
"Supernatural", "Suspense", "Tragedy", "Western"]
|
"Supernatural", "Suspense", "Tragedy", "Western"]
|
||||||
|
|
||||||
ffnetpluscategories=["+Anima", "Rosario + Vampire", "Blood+",
|
ffnetpluscategories=["+Anima", "Alex + Ada", "Rosario + Vampire", "Blood+",
|
||||||
"+C: Sword and Cornett", "Norn9 - ノルン+ノネット",
|
"+C: Sword and Cornett", "Norn9 - ノルン+ノネット",
|
||||||
"Haré+Guu/ジャングルはいつもハレのちグゥ", "Lost+Brain",
|
"Haré+Guu/ジャングルはいつもハレのちグゥ", "Lost+Brain",
|
||||||
"Wicked + The Divine", "Alex + Ada", "RE: Alistair++",
|
"Wicked + The Divine", "Alex + Ada", "RE: Alistair++",
|
||||||
|
|
@ -93,15 +93,47 @@ class FanFictionNetSiteAdapter(BaseSiteAdapter):
|
||||||
# logger.debug("post-url:%s"%url)
|
# logger.debug("post-url:%s"%url)
|
||||||
return url
|
return url
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def get_url_search(cls,url):
|
||||||
|
regexp = super(getClass(), cls).get_url_search(url)
|
||||||
|
regexp = re.sub(r"^(?P<keep>.*net/s/\d+/\d+/)(?P<urltitle>[^\$]*)?",
|
||||||
|
r"\g<keep>(.*)",regexp)
|
||||||
|
logger.debug(regexp)
|
||||||
|
return regexp
|
||||||
|
|
||||||
def getSiteURLPattern(self):
|
def getSiteURLPattern(self):
|
||||||
return self._get_site_url_pattern()
|
return self._get_site_url_pattern()
|
||||||
|
|
||||||
## not actually putting urltitle on multi-chapters below, but
|
## normalized chapter URLs DO contain the story title now, but
|
||||||
## one-shots will have it, so this is still useful. normalized
|
## normalized to current urltitle in case of title changes.
|
||||||
## chapter URLs do NOT contain the story title.
|
|
||||||
def normalize_chapterurl(self,url):
|
def normalize_chapterurl(self,url):
|
||||||
return re.sub(r"https?://(www|m)\.(?P<keep>fanfiction\.net/s/\d+/\d+/).*",
|
return re.sub(r"https?://(www|m)\.(?P<keep>fanfiction\.net/s/\d+/\d+/).*",
|
||||||
r"https://www.\g<keep>",url)
|
r"https://www.\g<keep>",url)+self.urltitle
|
||||||
|
|
||||||
|
def get_request(self,url,usecache=True):
|
||||||
|
## use super version if not set or isn't a chapter URL with a
|
||||||
|
## title.
|
||||||
|
if( not self.getConfig("try_shortened_title_urls") or
|
||||||
|
not re.match(r"https?://www\.fanfiction\.net/s/\d+/\d+/(?P<title>[^/]+)$", url) ):
|
||||||
|
return super(getClass(), self).get_request(url,usecache)
|
||||||
|
|
||||||
|
## kludgey way to attempt more than one URL variant by
|
||||||
|
## removing title one letter at a time. Note that network and
|
||||||
|
## open_pages_in_browser retries still happen first.
|
||||||
|
titlelen = len(url.split('/')[-1])
|
||||||
|
maxcut = min([4,titlelen])
|
||||||
|
j = 0
|
||||||
|
while j < maxcut: # should actually leave loop either by
|
||||||
|
# return or exception raise.
|
||||||
|
try:
|
||||||
|
useurl = url
|
||||||
|
if j: # j==0, full URL, then remove letters.
|
||||||
|
useurl = url[:-j]
|
||||||
|
return super(getClass(), self).get_request(useurl,usecache)
|
||||||
|
except exceptions.HTTPErrorFFF as fffe:
|
||||||
|
if j >= maxcut or 'Page not found or expired' not in unicode(fffe):
|
||||||
|
raise
|
||||||
|
j = j+1
|
||||||
|
|
||||||
def doExtractChapterUrlsAndMetadata(self,get_cover=True):
|
def doExtractChapterUrlsAndMetadata(self,get_cover=True):
|
||||||
|
|
||||||
|
|
@ -130,18 +162,18 @@ class FanFictionNetSiteAdapter(BaseSiteAdapter):
|
||||||
canonicalurl = soup.select_one('link[rel=canonical]')['href']
|
canonicalurl = soup.select_one('link[rel=canonical]')['href']
|
||||||
self.set_story_idurl(canonicalurl)
|
self.set_story_idurl(canonicalurl)
|
||||||
|
|
||||||
|
## ffnet used to have a tendency to send out update notices in
|
||||||
|
## email before all their servers were showing the update on
|
||||||
|
## the first chapter. It generates another server request and
|
||||||
|
## doesn't seem to be needed lately, so now default it to off.
|
||||||
|
try:
|
||||||
|
chapcount = len(soup.find('select', { 'name' : 'chapter' } ).find_all('option'))
|
||||||
|
# get chapter part of url.
|
||||||
|
except:
|
||||||
|
chapcount = 1
|
||||||
|
have_later_meta = False
|
||||||
if self.getConfig('check_next_chapter'):
|
if self.getConfig('check_next_chapter'):
|
||||||
try:
|
try:
|
||||||
## ffnet used to have a tendency to send out update
|
|
||||||
## notices in email before all their servers were
|
|
||||||
## showing the update on the first chapter. It
|
|
||||||
## generates another server request and doesn't seem
|
|
||||||
## to be needed lately, so now default it to off.
|
|
||||||
try:
|
|
||||||
chapcount = len(soup.find('select', { 'name' : 'chapter' } ).findAll('option'))
|
|
||||||
# get chapter part of url.
|
|
||||||
except:
|
|
||||||
chapcount = 1
|
|
||||||
tryurl = "https://%s/s/%s/%d/%s"%(self.getSiteDomain(),
|
tryurl = "https://%s/s/%s/%d/%s"%(self.getSiteDomain(),
|
||||||
self.story.getMetadata('storyId'),
|
self.story.getMetadata('storyId'),
|
||||||
chapcount+1,
|
chapcount+1,
|
||||||
|
|
@ -152,9 +184,20 @@ class FanFictionNetSiteAdapter(BaseSiteAdapter):
|
||||||
and "This request takes too long to process, it is timed out by the server." not in newdata:
|
and "This request takes too long to process, it is timed out by the server." not in newdata:
|
||||||
logger.debug('=======Found newer chapter: %s' % tryurl)
|
logger.debug('=======Found newer chapter: %s' % tryurl)
|
||||||
soup = self.make_soup(newdata)
|
soup = self.make_soup(newdata)
|
||||||
|
have_later_meta = True
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.warning("Caught exception in check_next_chapter URL: %s Exception %s."%(unicode(tryurl),unicode(e)))
|
logger.warning("Caught exception in check_next_chapter URL: %s Exception %s."%(unicode(tryurl),unicode(e)))
|
||||||
|
|
||||||
|
if self.getConfig('meta_from_last_chapter') and not have_later_meta and chapcount > 1:
|
||||||
|
tryurl = "https://%s/s/%s/%d/%s"%(self.getSiteDomain(),
|
||||||
|
self.story.getMetadata('storyId'),
|
||||||
|
chapcount,
|
||||||
|
self.urltitle)
|
||||||
|
logger.debug('=Trying last chapter for meta_from_last_chapter: %s' % tryurl)
|
||||||
|
newdata = self.get_request(tryurl)
|
||||||
|
soup = self.make_soup(newdata)
|
||||||
|
have_later_meta = True
|
||||||
|
|
||||||
# Find authorid and URL from... author url.
|
# Find authorid and URL from... author url.
|
||||||
a = soup.find('a', href=re.compile(r"^/u/\d+"))
|
a = soup.find('a', href=re.compile(r"^/u/\d+"))
|
||||||
self.story.setMetadata('authorId',a['href'].split('/')[2])
|
self.story.setMetadata('authorId',a['href'].split('/')[2])
|
||||||
|
|
@ -169,7 +212,7 @@ class FanFictionNetSiteAdapter(BaseSiteAdapter):
|
||||||
## For 1, use the second link.
|
## For 1, use the second link.
|
||||||
## For 2, fetch the crossover page and pull the two categories from there.
|
## For 2, fetch the crossover page and pull the two categories from there.
|
||||||
pre_links = soup.find('div',{'id':'pre_story_links'})
|
pre_links = soup.find('div',{'id':'pre_story_links'})
|
||||||
categories = pre_links.findAll('a',{'class':'xcontrast_txt'})
|
categories = pre_links.find_all('a',{'class':'xcontrast_txt'})
|
||||||
#print("xcontrast_txt a:%s"%categories)
|
#print("xcontrast_txt a:%s"%categories)
|
||||||
if len(categories) > 1:
|
if len(categories) > 1:
|
||||||
# Strangely, the ones with *two* links are the
|
# Strangely, the ones with *two* links are the
|
||||||
|
|
@ -208,7 +251,7 @@ class FanFictionNetSiteAdapter(BaseSiteAdapter):
|
||||||
|
|
||||||
|
|
||||||
grayspan = gui_table1i.find('span', {'class':'xgray xcontrast_txt'})
|
grayspan = gui_table1i.find('span', {'class':'xgray xcontrast_txt'})
|
||||||
# for b in grayspan.findAll('button'):
|
# for b in grayspan.find_all('button'):
|
||||||
# b.extract()
|
# b.extract()
|
||||||
metatext = stripHTML(grayspan).replace('Hurt/Comfort','Hurt-Comfort')
|
metatext = stripHTML(grayspan).replace('Hurt/Comfort','Hurt-Comfort')
|
||||||
#logger.debug("metatext:(%s)"%metatext)
|
#logger.debug("metatext:(%s)"%metatext)
|
||||||
|
|
@ -247,7 +290,7 @@ class FanFictionNetSiteAdapter(BaseSiteAdapter):
|
||||||
|
|
||||||
# Updated: <span data-xutime='1368059198'>5/8</span> - Published: <span data-xutime='1278984264'>7/12/2010</span>
|
# Updated: <span data-xutime='1368059198'>5/8</span> - Published: <span data-xutime='1278984264'>7/12/2010</span>
|
||||||
# Published: <span data-xutime='1384358726'>8m ago</span>
|
# Published: <span data-xutime='1384358726'>8m ago</span>
|
||||||
dates = soup.findAll('span',{'data-xutime':re.compile(r'^\d+$')})
|
dates = soup.find_all('span',{'data-xutime':re.compile(r'^\d+$')})
|
||||||
if len(dates) > 1 :
|
if len(dates) > 1 :
|
||||||
# updated get set to the same as published upstream if not found.
|
# updated get set to the same as published upstream if not found.
|
||||||
self.story.setMetadata('dateUpdated',datetime.fromtimestamp(float(dates[0]['data-xutime'])))
|
self.story.setMetadata('dateUpdated',datetime.fromtimestamp(float(dates[0]['data-xutime'])))
|
||||||
|
|
@ -298,15 +341,14 @@ class FanFictionNetSiteAdapter(BaseSiteAdapter):
|
||||||
img = soup.select_one('img.lazy.cimage')
|
img = soup.select_one('img.lazy.cimage')
|
||||||
cover_url=img['data-original']
|
cover_url=img['data-original']
|
||||||
except:
|
except:
|
||||||
img = soup.select_one('img.cimage:not(.lazy)')
|
## Nov 2023 - src is always "/static/images/d_60_90.jpg" now
|
||||||
if img:
|
## Only take cover if there's data-original
|
||||||
cover_url=img['src']
|
## Primary motivator is to prevent unneeded author page hits.
|
||||||
## Nov 19, 2020, ffnet lazy cover images returning 0 byte
|
pass
|
||||||
## files.
|
logger.debug("cover_url:%s"%cover_url)
|
||||||
# logger.debug("cover_url:%s"%cover_url)
|
|
||||||
|
|
||||||
authimg_url = ""
|
authimg_url = ""
|
||||||
if cover_url and self.getConfig('skip_author_cover'):
|
if cover_url and self.getConfig('skip_author_cover') and self.getConfig('include_images'):
|
||||||
try:
|
try:
|
||||||
authsoup = self.make_soup(self.get_request(self.story.getMetadata('authorUrl')))
|
authsoup = self.make_soup(self.get_request(self.story.getMetadata('authorUrl')))
|
||||||
try:
|
try:
|
||||||
|
|
@ -353,31 +395,37 @@ class FanFictionNetSiteAdapter(BaseSiteAdapter):
|
||||||
# no selector found, so it's a one-chapter story.
|
# no selector found, so it's a one-chapter story.
|
||||||
self.add_chapter(self.story.getMetadata('title'),url)
|
self.add_chapter(self.story.getMetadata('title'),url)
|
||||||
else:
|
else:
|
||||||
allOptions = select.findAll('option')
|
allOptions = select.find_all('option')
|
||||||
for o in allOptions:
|
for o in allOptions:
|
||||||
url = u'https://%s/s/%s/%s/' % ( self.getSiteDomain(),
|
## title URL will be put back on chapter URL during
|
||||||
self.story.getMetadata('storyId'),
|
## normalize_chapterurl() anyway, but also here for
|
||||||
o['value'])
|
## clarity
|
||||||
|
url = u'https://%s/s/%s/%s/%s' % ( self.getSiteDomain(),
|
||||||
|
self.story.getMetadata('storyId'),
|
||||||
|
o['value'],
|
||||||
|
self.urltitle)
|
||||||
# just in case there's tags, like <i> in chapter titles.
|
# just in case there's tags, like <i> in chapter titles.
|
||||||
title = u"%s" % o
|
title = u"%s" % o
|
||||||
title = re.sub(r'<[^>]+>','',title)
|
title = re.sub(r'<[^>]+>','',title)
|
||||||
self.add_chapter(title,url)
|
self.add_chapter(title,url)
|
||||||
|
|
||||||
|
|
||||||
return
|
return
|
||||||
|
|
||||||
def getChapterText(self, url):
|
def getChapterText(self, url):
|
||||||
logger.debug('Getting chapter text from: %s' % url)
|
logger.debug('Getting chapter text from: %s' % (url))
|
||||||
|
|
||||||
## AND explicitly put title URL back on chapter URL for fetch
|
## title URL was put back on chapter URL during
|
||||||
## *only*--normalized chapter URL does NOT have urltitle
|
## normalize_chapterurl()
|
||||||
data = self.get_request(url+self.urltitle)
|
data = self.get_request(url)
|
||||||
|
|
||||||
if "Please email this error message in full to <a href='mailto:support@fanfiction.com'>support@fanfiction.com</a>" in data:
|
if "Please email this error message in full to <a href='mailto:" in data:
|
||||||
raise exceptions.FailedToDownload("Error downloading Chapter: %s! FanFiction.net Site Error!" % url)
|
raise exceptions.FailedToDownload("Error downloading Chapter: %s! FanFiction.net Site Error!" % url)
|
||||||
|
|
||||||
soup = self.make_soup(data)
|
soup = self.make_soup(data)
|
||||||
|
|
||||||
|
## remove inline ads -- only seen with flaresolverr
|
||||||
|
for adtag in soup.select("div.google-auto-placed"):
|
||||||
|
adtag.decompose()
|
||||||
|
|
||||||
div = soup.find('div', {'id' : 'storytextp'})
|
div = soup.find('div', {'id' : 'storytextp'})
|
||||||
|
|
||||||
if None == div:
|
if None == div:
|
||||||
|
|
|
||||||
157
fanficfare/adapters/adapter_fanfictionsfr.py
Normal file
157
fanficfare/adapters/adapter_fanfictionsfr.py
Normal file
|
|
@ -0,0 +1,157 @@
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
# Copyright 2024 FanFicFare team
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
#
|
||||||
|
|
||||||
|
|
||||||
|
from __future__ import absolute_import
|
||||||
|
import io
|
||||||
|
import logging
|
||||||
|
import re
|
||||||
|
import zipfile
|
||||||
|
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
# py2 vs py3 transition
|
||||||
|
|
||||||
|
from .base_adapter import BaseSiteAdapter, makeDate
|
||||||
|
from fanficfare.htmlcleanup import stripHTML
|
||||||
|
from .. import exceptions as exceptions
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
def getClass():
|
||||||
|
return FanfictionsFrSiteAdapter
|
||||||
|
|
||||||
|
|
||||||
|
class FanfictionsFrSiteAdapter(BaseSiteAdapter):
|
||||||
|
def __init__(self, config, url):
|
||||||
|
BaseSiteAdapter.__init__(self, config, url)
|
||||||
|
self.story.setMetadata('siteabbrev', 'fanfictionsfr')
|
||||||
|
self.story.setMetadata('langcode','fr')
|
||||||
|
self.story.setMetadata('language','Français')
|
||||||
|
|
||||||
|
# get storyId from url--url validation guarantees query correct
|
||||||
|
match = re.match(self.getSiteURLPattern(), url)
|
||||||
|
if not match:
|
||||||
|
raise exceptions.InvalidStoryURL(url, self.getSiteDomain(), self.getSiteExampleURLs())
|
||||||
|
|
||||||
|
story_id = match.group('id')
|
||||||
|
self.story.setMetadata('storyId', story_id)
|
||||||
|
fandom_name = match.group('fandom')
|
||||||
|
|
||||||
|
self._setURL('https://%s/fanfictions/%s/%s/chapters.html' % (self.getSiteDomain(), fandom_name, story_id))
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def getSiteDomain():
|
||||||
|
return 'www.fanfictions.fr'
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def getSiteExampleURLs(cls):
|
||||||
|
return 'https://%s/fanfictions/fandom/fanfiction-id/chapters.html' % cls.getSiteDomain()
|
||||||
|
|
||||||
|
def getSiteURLPattern(self):
|
||||||
|
return r'https?://(?:www\.)?fanfictions\.fr/fanfictions/(?P<fandom>[^/]+)/(?P<id>[^/]+)(/chapters.html)?'
|
||||||
|
|
||||||
|
def extractChapterUrlsAndMetadata(self):
|
||||||
|
logger.debug('URL: %s', self.url)
|
||||||
|
|
||||||
|
data = self.get_request(self.url)
|
||||||
|
soup = self.make_soup(data)
|
||||||
|
|
||||||
|
# detect if the fanfiction is 'suspended' (chapters unavailable)
|
||||||
|
alert_div = soup.find('div', id='alertInactiveFic')
|
||||||
|
if alert_div:
|
||||||
|
raise exceptions.FailedToDownload("Failed to download the fanfiction, most likely because it is suspended.")
|
||||||
|
|
||||||
|
title_element = soup.find('h1', itemprop='name')
|
||||||
|
self.story.setMetadata('title', stripHTML(title_element))
|
||||||
|
|
||||||
|
author_div = soup.find('div', itemprop='author')
|
||||||
|
author_name = stripHTML(author_div.a)
|
||||||
|
author_id = author_div.a['href'].split('/')[-1].replace('.html', '')
|
||||||
|
|
||||||
|
self.story.setMetadata('author', author_name)
|
||||||
|
self.story.setMetadata('authorId', author_id)
|
||||||
|
|
||||||
|
published_date_element = soup.find('span', class_='date-distance')
|
||||||
|
published_date_text = published_date_element['data-date']
|
||||||
|
published_date = makeDate(published_date_text, '%Y-%m-%d %H:%M:%S')
|
||||||
|
if published_date:
|
||||||
|
self.story.setMetadata('datePublished', published_date)
|
||||||
|
|
||||||
|
status_element = soup.find('p', title="Statut de la fanfiction").find('span', class_='badge')
|
||||||
|
french_status = stripHTML(status_element)
|
||||||
|
status_translation = {
|
||||||
|
"En cours": "In-Progress",
|
||||||
|
"Terminée": "Completed",
|
||||||
|
"One-shot": "Completed",
|
||||||
|
}
|
||||||
|
self.story.setMetadata('status', status_translation.get(french_status, french_status))
|
||||||
|
|
||||||
|
genre_elements = soup.find('div', title="Format et genres").find_all('span', class_="highlightable")
|
||||||
|
self.story.extendList('genre', [ stripHTML(genre) for genre in genre_elements[1:] ])
|
||||||
|
|
||||||
|
category_elements = soup.find_all('li', class_="breadcrumb-item")
|
||||||
|
self.story.extendList('category', [ stripHTML(category) for category in category_elements[-2].find_all('a') ])
|
||||||
|
|
||||||
|
first_description = soup.find('p', itemprop='abstract')
|
||||||
|
self.setDescription(self.url, first_description)
|
||||||
|
|
||||||
|
chapter_cards = soup.find_all(class_=['card', 'chapter'])
|
||||||
|
|
||||||
|
for chapter_card in chapter_cards:
|
||||||
|
chapter_title_tag = chapter_card.find('h2')
|
||||||
|
if chapter_title_tag:
|
||||||
|
chapter_title = stripHTML(chapter_title_tag)
|
||||||
|
chapter_link = 'https://'+self.getSiteDomain()+chapter_title_tag.find('a')['href']
|
||||||
|
|
||||||
|
# Clean up the chapter title by replacing multiple spaces and newline characters with a single space
|
||||||
|
chapter_title = re.sub(r'\s+', ' ', chapter_title)
|
||||||
|
|
||||||
|
self.add_chapter(chapter_title, chapter_link)
|
||||||
|
|
||||||
|
last_chapter_div = chapter_cards[-1]
|
||||||
|
updated_date_element = last_chapter_div.find('span', class_='date-distance')
|
||||||
|
last_chapter_update_date = updated_date_element['data-date']
|
||||||
|
date = makeDate(last_chapter_update_date, '%Y-%m-%d %H:%M:%S')
|
||||||
|
if date:
|
||||||
|
self.story.setMetadata('dateUpdated', date)
|
||||||
|
|
||||||
|
|
||||||
|
def getChapterText(self, url):
|
||||||
|
logger.debug('Getting chapter text from: %s' % url)
|
||||||
|
|
||||||
|
response, redirection_url = self.get_request_redirected(url)
|
||||||
|
|
||||||
|
if "telecharger_pdf.html" in redirection_url:
|
||||||
|
with zipfile.ZipFile(io.BytesIO(response.encode('latin1'))) as z:
|
||||||
|
# Assuming there's only one text file inside the zip
|
||||||
|
file_list = z.namelist()
|
||||||
|
if len(file_list) != 1:
|
||||||
|
raise exceptions.FailedToDownload("Error downloading Chapter: %s! Zip file should contain exactly one text file!" % url)
|
||||||
|
text_filename = file_list[0]
|
||||||
|
with z.open(text_filename) as text_file:
|
||||||
|
# Decode the text file with windows-1252 encoding
|
||||||
|
text = text_file.read().decode('windows-1252')
|
||||||
|
return text.replace("\r\n", "<br>\r\n")
|
||||||
|
else:
|
||||||
|
soup = self.make_soup(response)
|
||||||
|
|
||||||
|
div_content = soup.find('div', id='readarea')
|
||||||
|
if div_content is None:
|
||||||
|
raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url)
|
||||||
|
|
||||||
|
return self.utf8FromSoup(url, div_content)
|
||||||
|
|
@ -119,7 +119,7 @@ class FanFiktionDeAdapter(BaseSiteAdapter):
|
||||||
raise exceptions.FailedToDownload(self.getSiteDomain() +" says: Auserhalb der Zeit von 23:00 Uhr bis 04:00 Uhr ist diese Geschichte nur nach einer erfolgreichen Altersverifikation zuganglich.")
|
raise exceptions.FailedToDownload(self.getSiteDomain() +" says: Auserhalb der Zeit von 23:00 Uhr bis 04:00 Uhr ist diese Geschichte nur nach einer erfolgreichen Altersverifikation zuganglich.")
|
||||||
|
|
||||||
soup = self.make_soup(data)
|
soup = self.make_soup(data)
|
||||||
# print data
|
# logger.debug(data)
|
||||||
|
|
||||||
|
|
||||||
## Title
|
## Title
|
||||||
|
|
@ -134,7 +134,7 @@ class FanFiktionDeAdapter(BaseSiteAdapter):
|
||||||
self.story.setMetadata('author',stripHTML(a))
|
self.story.setMetadata('author',stripHTML(a))
|
||||||
|
|
||||||
# Find the chapters:
|
# Find the chapters:
|
||||||
for chapter in soup.find('select').findAll('option'):
|
for chapter in soup.find('select').find_all('option'):
|
||||||
self.add_chapter(chapter,'https://'+self.host+'/s/'+self.story.getMetadata('storyId')+'/'+chapter['value'])
|
self.add_chapter(chapter,'https://'+self.host+'/s/'+self.story.getMetadata('storyId')+'/'+chapter['value'])
|
||||||
|
|
||||||
## title="Wörter" failed with max_zalgo:1
|
## title="Wörter" failed with max_zalgo:1
|
||||||
|
|
@ -163,29 +163,31 @@ class FanFiktionDeAdapter(BaseSiteAdapter):
|
||||||
except e:
|
except e:
|
||||||
logger.debug("Failed to find native status:%s"%e)
|
logger.debug("Failed to find native status:%s"%e)
|
||||||
|
|
||||||
if head.find('span',title='Fertiggestellt'):
|
if head.find('span',title='fertiggestellt'):
|
||||||
self.story.setMetadata('status', 'Completed')
|
self.story.setMetadata('status', 'Completed')
|
||||||
elif head.find('span',title='Pausiert'):
|
elif head.find('span',title='pausiert'):
|
||||||
self.story.setMetadata('status', 'Paused')
|
self.story.setMetadata('status', 'Paused')
|
||||||
elif head.find('span',title='Abgebrochen'):
|
elif head.find('span',title='abgebrochen'):
|
||||||
self.story.setMetadata('status', 'Cancelled')
|
self.story.setMetadata('status', 'Cancelled')
|
||||||
else:
|
else:
|
||||||
self.story.setMetadata('status', 'In-Progress')
|
self.story.setMetadata('status', 'In-Progress')
|
||||||
|
|
||||||
## Get description from own URL:
|
## Get description
|
||||||
## /?a=v&storyid=46ccbef30000616306614050&s=1
|
descdiv = soup.select_one('div#story-summary-inline div')
|
||||||
descsoup = self.make_soup(self.get_request("https://"+self.getSiteDomain()+"/?a=v&storyid="+self.story.getMetadata('storyId')+"&s=1"))
|
if descdiv:
|
||||||
self.setDescription(url,stripHTML(descsoup))
|
if 'center' in descdiv['class']:
|
||||||
|
del descdiv['class']
|
||||||
|
self.setDescription(url,descdiv)
|
||||||
|
|
||||||
# #find metadata on the author's page
|
# #find metadata on the author's page
|
||||||
# asoup = self.make_soup(self.get_request("https://"+self.getSiteDomain()+"?a=q&a1=v&t=nickdetailsstories&lbi=stories&ar=0&nick="+self.story.getMetadata('authorId')))
|
# asoup = self.make_soup(self.get_request("https://"+self.getSiteDomain()+"?a=q&a1=v&t=nickdetailsstories&lbi=stories&ar=0&nick="+self.story.getMetadata('authorId')))
|
||||||
# tr=asoup.findAll('tr')
|
# tr=asoup.find_all('tr')
|
||||||
# for i in range(1,len(tr)):
|
# for i in range(1,len(tr)):
|
||||||
# a = tr[i].find('a')
|
# a = tr[i].find('a')
|
||||||
# if '/s/'+self.story.getMetadata('storyId')+'/1/' in a['href']:
|
# if '/s/'+self.story.getMetadata('storyId')+'/1/' in a['href']:
|
||||||
# break
|
# break
|
||||||
|
|
||||||
# td = tr[i].findAll('td')
|
# td = tr[i].find_all('td')
|
||||||
# self.story.addToList('category',stripHTML(td[2]))
|
# self.story.addToList('category',stripHTML(td[2]))
|
||||||
# self.story.setMetadata('rating', stripHTML(td[5]))
|
# self.story.setMetadata('rating', stripHTML(td[5]))
|
||||||
# self.story.setMetadata('numWords', stripHTML(td[6]))
|
# self.story.setMetadata('numWords', stripHTML(td[6]))
|
||||||
|
|
@ -202,7 +204,7 @@ class FanFiktionDeAdapter(BaseSiteAdapter):
|
||||||
soup = self.make_soup(self.get_request(url))
|
soup = self.make_soup(self.get_request(url))
|
||||||
|
|
||||||
div = soup.find('div', {'id' : 'storytext'})
|
div = soup.find('div', {'id' : 'storytext'})
|
||||||
for a in div.findAll('script'):
|
for a in div.find_all('script'):
|
||||||
a.extract()
|
a.extract()
|
||||||
|
|
||||||
if None == div:
|
if None == div:
|
||||||
|
|
|
||||||
|
|
@ -1,134 +0,0 @@
|
||||||
# -*- coding: utf-8 -*-
|
|
||||||
# Copyright 2018 FanFicFare team
|
|
||||||
#
|
|
||||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
# you may not use this file except in compliance with the License.
|
|
||||||
# You may obtain a copy of the License at
|
|
||||||
#
|
|
||||||
# http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
#
|
|
||||||
# Unless required by applicable law or agreed to in writing, software
|
|
||||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
# See the License for the specific language governing permissions and
|
|
||||||
# limitations under the License.
|
|
||||||
#
|
|
||||||
####################################################################################################
|
|
||||||
### Adapted by Rikkit on November 7. 2017
|
|
||||||
###=================================================================================================
|
|
||||||
### Tested with Calibre
|
|
||||||
####################################################################################################
|
|
||||||
|
|
||||||
from __future__ import absolute_import
|
|
||||||
import logging
|
|
||||||
import re
|
|
||||||
# py2 vs py3 transition
|
|
||||||
|
|
||||||
from .base_adapter import BaseSiteAdapter, makeDate
|
|
||||||
|
|
||||||
from ..htmlcleanup import stripHTML
|
|
||||||
from .. import exceptions as exceptions
|
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
|
||||||
|
|
||||||
def getClass():
|
|
||||||
''' Initializing the class '''
|
|
||||||
return FastNovelNetAdapter
|
|
||||||
|
|
||||||
class FastNovelNetAdapter(BaseSiteAdapter):
|
|
||||||
''' Adapter for FASTNOVEL.net '''
|
|
||||||
def __init__(self, config, url):
|
|
||||||
BaseSiteAdapter.__init__(self, config, url)
|
|
||||||
|
|
||||||
self.story.setMetadata('siteabbrev', 'fstnvl')
|
|
||||||
|
|
||||||
self.dateformat = '%d/%m/%Y'
|
|
||||||
|
|
||||||
# get storyId from url--url validation guarantees query correct
|
|
||||||
match = re.match(self.getSiteURLPattern(), url)
|
|
||||||
if not match:
|
|
||||||
raise exceptions.InvalidStoryURL(url, self.getSiteDomain(), self.getSiteExampleURLs())
|
|
||||||
|
|
||||||
story_id = match.group('id')
|
|
||||||
self.story.setMetadata('storyId', story_id)
|
|
||||||
self._setURL('https://%s/%s/' % (self.getSiteDomain(), story_id))
|
|
||||||
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def getSiteDomain():
|
|
||||||
return 'fastnovel.net'
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def getSiteExampleURLs(cls):
|
|
||||||
return "https://fastnovel.net/a-story-name-id"
|
|
||||||
|
|
||||||
def getSiteURLPattern(self):
|
|
||||||
# https://fastnovel.net/ultimate-scheming-system-158/
|
|
||||||
return r"https?://fastnovel\.net/(?P<id>[^/]+)"
|
|
||||||
|
|
||||||
def extractChapterUrlsAndMetadata(self):
|
|
||||||
logger.debug('URL: %s', self.url)
|
|
||||||
|
|
||||||
data = self.get_request(self.url)
|
|
||||||
|
|
||||||
soup = self.make_soup(data)
|
|
||||||
|
|
||||||
self.story.setMetadata('title', soup.find('h1').string)
|
|
||||||
|
|
||||||
for li in soup.select('.meta-data li'):
|
|
||||||
label = li.select_one('label')
|
|
||||||
if not label:
|
|
||||||
continue
|
|
||||||
|
|
||||||
if label.string == "Author:":
|
|
||||||
for a in li.select('a'):
|
|
||||||
self.story.setMetadata('authorId', a["href"].split('/')[2])
|
|
||||||
self.story.setMetadata('authorUrl','https://'+self.host+a["href"])
|
|
||||||
self.story.setMetadata('author', a["title"])
|
|
||||||
|
|
||||||
if label.string == "Genre:":
|
|
||||||
for a in li.select('a'):
|
|
||||||
self.story.addToList('genre',a["title"])
|
|
||||||
|
|
||||||
if label.string == "Status:":
|
|
||||||
if li.select_one('strong').string.strip() == "Completed":
|
|
||||||
self.story.setMetadata('status', 'Completed')
|
|
||||||
else:
|
|
||||||
self.story.setMetadata('status', 'In-Progress')
|
|
||||||
|
|
||||||
if label.string == "Last updated:":
|
|
||||||
dateUpd = label.next_sibling.strip()
|
|
||||||
self.story.setMetadata('dateUpdated', makeDate(stripHTML(dateUpd), self.dateformat))
|
|
||||||
|
|
||||||
coverurl = soup.select_one('div.book-cover')["data-original"]
|
|
||||||
if coverurl != "https://fastnovel.net/images/novel/default.jpg":
|
|
||||||
self.setCoverImage(self.url, coverurl)
|
|
||||||
|
|
||||||
tags = soup.select_one('.tags')
|
|
||||||
if tags:
|
|
||||||
for a in tags.select("li.tag-item a"):
|
|
||||||
self.story.addToList('tags', a["title"])
|
|
||||||
# extract tags, because it inside description
|
|
||||||
tags.extract()
|
|
||||||
|
|
||||||
# remove title from description
|
|
||||||
soup.select_one('.film-content h3').extract()
|
|
||||||
desc = soup.select_one('.film-content').extract()
|
|
||||||
self.setDescription(self.url, desc)
|
|
||||||
|
|
||||||
for book in soup.select("#list-chapters .book"):
|
|
||||||
volume = book.select_one('.title a').string
|
|
||||||
for a in book.select(".list-chapters a.chapter"):
|
|
||||||
title = volume + " " + stripHTML(a)
|
|
||||||
self.add_chapter(title, 'https://' + self.host + a["href"])
|
|
||||||
|
|
||||||
def getChapterText(self, url):
|
|
||||||
data = self.get_request(url)
|
|
||||||
soup = self.make_soup(data)
|
|
||||||
|
|
||||||
story = soup.select_one('#chapter-body')
|
|
||||||
if not story:
|
|
||||||
raise exceptions.FailedToDownload(
|
|
||||||
"Error downloading Chapter: %s! Missing required element!" % url)
|
|
||||||
|
|
||||||
return self.utf8FromSoup(url, story)
|
|
||||||
|
|
@ -15,16 +15,16 @@
|
||||||
# limitations under the License.
|
# limitations under the License.
|
||||||
#
|
#
|
||||||
|
|
||||||
from __future__ import absolute_import
|
from __future__ import absolute_import,unicode_literals
|
||||||
import datetime
|
# import datetime
|
||||||
import logging
|
import logging
|
||||||
logger = logging.getLogger(__name__)
|
import json
|
||||||
import re
|
import re
|
||||||
from .. import translit
|
# from .. import translit
|
||||||
|
|
||||||
|
|
||||||
from ..htmlcleanup import stripHTML
|
from ..htmlcleanup import stripHTML
|
||||||
from .. import exceptions as exceptions
|
from .. import exceptions# as exceptions
|
||||||
|
|
||||||
# py2 vs py3 transition
|
# py2 vs py3 transition
|
||||||
|
|
||||||
|
|
@ -58,7 +58,7 @@ class FicBookNetAdapter(BaseSiteAdapter):
|
||||||
|
|
||||||
# The date format will vary from site to site.
|
# The date format will vary from site to site.
|
||||||
# http://docs.python.org/library/datetime.html#strftime-strptime-behavior
|
# http://docs.python.org/library/datetime.html#strftime-strptime-behavior
|
||||||
self.dateformat = "%d %m %Y"
|
self.dateformat = u"%d %m %Y г., %H:%M"
|
||||||
|
|
||||||
@staticmethod # must be @staticmethod, don't remove it.
|
@staticmethod # must be @staticmethod, don't remove it.
|
||||||
def getSiteDomain():
|
def getSiteDomain():
|
||||||
|
|
@ -67,17 +67,33 @@ class FicBookNetAdapter(BaseSiteAdapter):
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def getSiteExampleURLs(cls):
|
def getSiteExampleURLs(cls):
|
||||||
return "https://"+cls.getSiteDomain()+"/readfic/12345 https://"+cls.getSiteDomain()+"/readfic/93626/246417#part_content"
|
return "https://"+cls.getSiteDomain()+"/readfic/12345 https://"+cls.getSiteDomain()+"/readfic/93626/246417#part_content https://"+cls.getSiteDomain()+"/readfic/578de1cd-a8b4-7ff1-aa49-750426508b82 https://"+cls.getSiteDomain()+"/readfic/578de1cd-a8b4-7ff1-aa49-750426508b82/94793742#part_content"
|
||||||
|
|
||||||
def getSiteURLPattern(self):
|
def getSiteURLPattern(self):
|
||||||
return r"https?://"+re.escape(self.getSiteDomain()+"/readfic/")+r"\d+"
|
return r"https?://"+re.escape(self.getSiteDomain()+"/readfic/")+r"[\d\-a-zA-Z]+"
|
||||||
|
|
||||||
|
def performLogin(self,url,data):
|
||||||
|
params = {}
|
||||||
|
if self.password:
|
||||||
|
params['login'] = self.username
|
||||||
|
params['password'] = self.password
|
||||||
|
else:
|
||||||
|
params['login'] = self.getConfig("username")
|
||||||
|
params['password'] = self.getConfig("password")
|
||||||
|
|
||||||
|
logger.debug("Try to login in as (%s)" % params['login'])
|
||||||
|
d = self.post_request('https://' + self.getSiteDomain() + '/login_check_static',params,usecache=False)
|
||||||
|
|
||||||
|
if 'Войти используя аккаунт на сайте' in d:
|
||||||
|
raise exceptions.FailedToLogin(url,params['login'])
|
||||||
|
|
||||||
|
return True
|
||||||
|
|
||||||
## Getting the chapter list and the meta data, plus 'is adult' checking.
|
## Getting the chapter list and the meta data, plus 'is adult' checking.
|
||||||
def extractChapterUrlsAndMetadata(self):
|
def extractChapterUrlsAndMetadata(self,get_cover=True):
|
||||||
url=self.url
|
url=self.url
|
||||||
logger.debug("URL: "+url)
|
logger.debug("URL: "+url)
|
||||||
data = self.get_request(url)
|
data = self.get_request(url)
|
||||||
|
|
||||||
soup = self.make_soup(data)
|
soup = self.make_soup(data)
|
||||||
|
|
||||||
adult_div = soup.find('div',id='adultCoverWarning')
|
adult_div = soup.find('div',id='adultCoverWarning')
|
||||||
|
|
@ -87,9 +103,11 @@ class FicBookNetAdapter(BaseSiteAdapter):
|
||||||
else:
|
else:
|
||||||
raise exceptions.AdultCheckRequired(self.url)
|
raise exceptions.AdultCheckRequired(self.url)
|
||||||
|
|
||||||
|
|
||||||
## Title
|
## Title
|
||||||
a = soup.find('section',{'class':'chapter-info'}).find('h1')
|
try:
|
||||||
|
a = soup.find('section',{'class':'chapter-info'}).find('h1')
|
||||||
|
except AttributeError:
|
||||||
|
raise exceptions.FailedToDownload("Error collecting meta: %s! Missing required element!" % url)
|
||||||
# kill '+' marks if present.
|
# kill '+' marks if present.
|
||||||
sup = a.find('sup')
|
sup = a.find('sup')
|
||||||
if sup:
|
if sup:
|
||||||
|
|
@ -99,40 +117,12 @@ class FicBookNetAdapter(BaseSiteAdapter):
|
||||||
|
|
||||||
# Find authorid and URL from... author url.
|
# Find authorid and URL from... author url.
|
||||||
# assume first avatar-nickname -- there can be a second marked 'beta'.
|
# assume first avatar-nickname -- there can be a second marked 'beta'.
|
||||||
a = soup.find('a',{'class':'creator-nickname'})
|
a = soup.find('a',{'class':'creator-username'})
|
||||||
self.story.setMetadata('authorId',a.text) # Author's name is unique
|
self.story.setMetadata('authorId',a.text) # Author's name is unique
|
||||||
self.story.setMetadata('authorUrl','https://'+self.host+a['href'])
|
self.story.setMetadata('authorUrl','https://'+self.host+a['href'])
|
||||||
self.story.setMetadata('author',a.text)
|
self.story.setMetadata('author',a.text)
|
||||||
logger.debug("Author: (%s)"%self.story.getMetadata('author'))
|
logger.debug("Author: (%s)"%self.story.getMetadata('author'))
|
||||||
|
|
||||||
# Find the chapters:
|
|
||||||
pubdate = None
|
|
||||||
chapters = soup.find('ul', {'class' : 'list-of-fanfic-parts'})
|
|
||||||
if chapters != None:
|
|
||||||
for chapdiv in chapters.findAll('li', {'class':'part'}):
|
|
||||||
chapter=chapdiv.find('a',href=re.compile(r'/readfic/'+self.story.getMetadata('storyId')+r"/\d+#part_content$"))
|
|
||||||
churl='https://'+self.host+chapter['href']
|
|
||||||
self.add_chapter(chapter,churl)
|
|
||||||
|
|
||||||
datespan = chapdiv.find('span')
|
|
||||||
if pubdate == None and datespan:
|
|
||||||
pubdate = translit.translit(stripHTML(datespan))
|
|
||||||
update = translit.translit(stripHTML(datespan))
|
|
||||||
else:
|
|
||||||
self.add_chapter(self.story.getMetadata('title'),url)
|
|
||||||
self.story.setMetadata('numChapters',1)
|
|
||||||
pubdate=translit.translit(stripHTML(soup.find('div',{'class':'title-area'}).find('span')))
|
|
||||||
update=pubdate
|
|
||||||
|
|
||||||
logger.debug("numChapters: (%s)"%self.story.getMetadata('numChapters'))
|
|
||||||
|
|
||||||
if not ',' in pubdate:
|
|
||||||
pubdate=datetime.date.today().strftime(self.dateformat)
|
|
||||||
if not ',' in update:
|
|
||||||
update=datetime.date.today().strftime(self.dateformat)
|
|
||||||
pubdate=pubdate.split(',')[0]
|
|
||||||
update=update.split(',')[0]
|
|
||||||
|
|
||||||
fullmon = {"yanvarya":"01", u"января":"01",
|
fullmon = {"yanvarya":"01", u"января":"01",
|
||||||
"fievralya":"02", u"февраля":"02",
|
"fievralya":"02", u"февраля":"02",
|
||||||
"marta":"03", u"марта":"03",
|
"marta":"03", u"марта":"03",
|
||||||
|
|
@ -146,31 +136,50 @@ class FicBookNetAdapter(BaseSiteAdapter):
|
||||||
"noyabrya":"11", u"ноября":"11",
|
"noyabrya":"11", u"ноября":"11",
|
||||||
"diekabrya":"12", u"декабря":"12" }
|
"diekabrya":"12", u"декабря":"12" }
|
||||||
|
|
||||||
for (name,num) in fullmon.items():
|
# Find the chapters:
|
||||||
if name in pubdate:
|
pubdate = None
|
||||||
pubdate = pubdate.replace(name,num)
|
chapters = soup.find('ul', {'class' : 'list-of-fanfic-parts'})
|
||||||
if name in update:
|
if chapters is not None:
|
||||||
update = update.replace(name,num)
|
for chapdiv in chapters.find_all('li', {'class':'part'}):
|
||||||
|
chapter=chapdiv.find('a',href=re.compile(r'/readfic/'+self.story.getMetadata('storyId')+r"/\d+#part_content$"))
|
||||||
|
churl='https://'+self.host+chapter['href']
|
||||||
|
|
||||||
self.story.setMetadata('dateUpdated', makeDate(update, self.dateformat))
|
# Find the chapter dates.
|
||||||
self.story.setMetadata('datePublished', makeDate(pubdate, self.dateformat))
|
date_str = chapdiv.find('span', {'title': True})['title'].replace(u"\u202fг. в", "")
|
||||||
|
for month_name, month_num in fullmon.items():
|
||||||
|
date_str = date_str.replace(month_name, month_num)
|
||||||
|
chapterdate = makeDate(date_str,self.dateformat)
|
||||||
|
self.add_chapter(chapter,churl,
|
||||||
|
{'date':chapterdate.strftime(self.getConfig("datechapter_format",self.getConfig("datePublished_format",self.dateformat)))})
|
||||||
|
|
||||||
|
if pubdate is None and chapterdate:
|
||||||
|
pubdate = chapterdate
|
||||||
|
update = chapterdate
|
||||||
|
else:
|
||||||
|
self.add_chapter(self.story.getMetadata('title'),url)
|
||||||
|
date_str = soup.find('div', {'class' : 'part-date'}).find('span', {'title': True})['title'].replace(u"\u202fг. в", "")
|
||||||
|
for month_name, month_num in fullmon.items():
|
||||||
|
date_str = date_str.replace(month_name, month_num)
|
||||||
|
pubdate = update = makeDate(date_str,self.dateformat)
|
||||||
|
|
||||||
|
logger.debug("numChapters: (%s)"%self.story.getMetadata('numChapters'))
|
||||||
|
|
||||||
|
self.story.setMetadata('dateUpdated', update)
|
||||||
|
self.story.setMetadata('datePublished', pubdate)
|
||||||
self.story.setMetadata('language','Russian')
|
self.story.setMetadata('language','Russian')
|
||||||
|
|
||||||
## after site change, I don't see word count anywhere.
|
dlinfo = soup.select_one('header.d-flex.flex-column.gap-12.word-break')
|
||||||
# pr=soup.find('a', href=re.compile(r'/printfic/\w+'))
|
|
||||||
# pr='https://'+self.host+pr['href']
|
|
||||||
# pr = self.make_soup(self.get_request(pr))
|
|
||||||
# pr=pr.findAll('div', {'class' : 'part_text'})
|
|
||||||
# i=0
|
|
||||||
# for part in pr:
|
|
||||||
# i=i+len(stripHTML(part).split(' '))
|
|
||||||
# self.story.setMetadata('numWords', unicode(i))
|
|
||||||
|
|
||||||
|
series_label = dlinfo.select_one('div.description.word-break').find('strong', string='Серия:')
|
||||||
dlinfo = soup.find('div',{'class':'fanfic-main-info'})
|
logger.debug('Series: %s'%str(series_label))
|
||||||
|
if series_label:
|
||||||
|
series_div = series_label.find_next_sibling("div")
|
||||||
|
# No accurate series number as for that, additional request needs to be made
|
||||||
|
self.setSeries(stripHTML(series_div.a), 1)
|
||||||
|
self.story.setMetadata('seriesUrl','https://' + self.getSiteDomain() + series_div.a.get('href'))
|
||||||
|
|
||||||
i=0
|
i=0
|
||||||
fandoms = dlinfo.find('div').findAll('a', href=re.compile(r'/fanfiction/\w+'))
|
fandoms = dlinfo.select_one('div:not([class])').find_all('a', href=re.compile(r'/fanfiction/\w+'))
|
||||||
for fandom in fandoms:
|
for fandom in fandoms:
|
||||||
self.story.addToList('category',fandom.string)
|
self.story.addToList('category',fandom.string)
|
||||||
i=i+1
|
i=i+1
|
||||||
|
|
@ -179,13 +188,16 @@ class FicBookNetAdapter(BaseSiteAdapter):
|
||||||
|
|
||||||
tags = soup.find('div',{'class':'tags'})
|
tags = soup.find('div',{'class':'tags'})
|
||||||
if tags:
|
if tags:
|
||||||
for genre in tags.findAll('a',href=re.compile(r'/tags/')):
|
for genre in tags.find_all('a',href=re.compile(r'/tags/')):
|
||||||
self.story.addToList('genre',stripHTML(genre))
|
self.story.addToList('genre',stripHTML(genre))
|
||||||
|
|
||||||
ratingdt = dlinfo.find('strong',{'class':re.compile(r'badge-rating-.*')})
|
logger.debug("category: (%s)"%self.story.getMetadata('category'))
|
||||||
self.story.setMetadata('rating', stripHTML(ratingdt.find_next('span')))
|
logger.debug("genre: (%s)"%self.story.getMetadata('genre'))
|
||||||
|
|
||||||
# meta=table.findAll('a', href=re.compile(r'/ratings/'))
|
ratingdt = dlinfo.find('div',{'class':re.compile(r'badge-rating-.*')})
|
||||||
|
self.story.setMetadata('rating', stripHTML(ratingdt.find('span')))
|
||||||
|
|
||||||
|
# meta=table.find_all('a', href=re.compile(r'/ratings/'))
|
||||||
# i=0
|
# i=0
|
||||||
# for m in meta:
|
# for m in meta:
|
||||||
# if i == 0:
|
# if i == 0:
|
||||||
|
|
@ -198,12 +210,17 @@ class FicBookNetAdapter(BaseSiteAdapter):
|
||||||
# elif i == 2:
|
# elif i == 2:
|
||||||
# self.story.addToList('warnings', m.find('b').text)
|
# self.story.addToList('warnings', m.find('b').text)
|
||||||
|
|
||||||
if dlinfo.find('span', {'class':'badge-status-finished'}):
|
if dlinfo.find('div', {'class':'badge-status-finished'}):
|
||||||
self.story.setMetadata('status', 'Completed')
|
self.story.setMetadata('status', 'Completed')
|
||||||
else:
|
else:
|
||||||
self.story.setMetadata('status', 'In-Progress')
|
self.story.setMetadata('status', 'In-Progress')
|
||||||
|
|
||||||
paircharsdt = soup.find('strong',text='Пэйринг и персонажи:')
|
try:
|
||||||
|
self.story.setMetadata('universe', stripHTML(dlinfo.find('a', href=re.compile('/fandom_universe/'))))
|
||||||
|
except AttributeError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
paircharsdt = soup.find('strong',string='Пэйринг и персонажи:')
|
||||||
# site keeps both ships and indiv chars in /pairings/ links.
|
# site keeps both ships and indiv chars in /pairings/ links.
|
||||||
if paircharsdt:
|
if paircharsdt:
|
||||||
for paira in paircharsdt.find_next('div').find_all('a', href=re.compile(r'/pairings/')):
|
for paira in paircharsdt.find_next('div').find_all('a', href=re.compile(r'/pairings/')):
|
||||||
|
|
@ -215,9 +232,99 @@ class FicBookNetAdapter(BaseSiteAdapter):
|
||||||
else:
|
else:
|
||||||
self.story.addToList('characters',stripHTML(paira))
|
self.story.addToList('characters',stripHTML(paira))
|
||||||
|
|
||||||
summary=soup.find('div', {'class' : 'urlize'})
|
summary=soup.find('div', itemprop='description')
|
||||||
self.setDescription(url,summary)
|
if summary:
|
||||||
#self.story.setMetadata('description', summary.text)
|
# Fix for the text not displaying properly
|
||||||
|
summary['class'].append('part_text')
|
||||||
|
self.setDescription(url,summary)
|
||||||
|
#self.story.setMetadata('description', summary.text)
|
||||||
|
|
||||||
|
stats = soup.find('div', {'class':'hat-actions-container'})
|
||||||
|
targetdata = stats.find_all('span', {'class' : 'main-info'})
|
||||||
|
for data in targetdata:
|
||||||
|
svg_class = data.find('svg')['class'][1] if data.find('svg') else None
|
||||||
|
value = int(stripHTML(data)) if stripHTML(data).isdigit() else 0
|
||||||
|
|
||||||
|
if svg_class == 'ic_thumbs-up' and value > 0:
|
||||||
|
self.story.setMetadata('likes', value)
|
||||||
|
#logger.debug("likes: (%s)"%self.story.getMetadata('likes'))
|
||||||
|
elif svg_class == 'ic_bubble-dark' and value > 0:
|
||||||
|
self.story.setMetadata('reviews', value)
|
||||||
|
#logger.debug("reviews: (%s)"%self.story.getMetadata('reviews'))
|
||||||
|
elif svg_class == 'ic_bookmark' and value > 0:
|
||||||
|
self.story.setMetadata('numCollections', value)
|
||||||
|
logger.debug("numCollections: (%s)"%self.story.getMetadata('numCollections'))
|
||||||
|
|
||||||
|
# Grab the amount of pages and words
|
||||||
|
targetpages = soup.find('strong',string='Размер:').find_next('div')
|
||||||
|
if targetpages:
|
||||||
|
targetpages_text = re.sub(r"(?<!\,)\s| ", "", targetpages.text, flags=re.UNICODE | re.MULTILINE)
|
||||||
|
|
||||||
|
pages_raw = re.search(r'(\d+)(?:страницы|страниц)', targetpages_text, re.UNICODE)
|
||||||
|
pages = int(pages_raw.group(1))
|
||||||
|
if pages > 0:
|
||||||
|
self.story.setMetadata('pages', pages)
|
||||||
|
logger.debug("pages: (%s)"%self.story.getMetadata('pages'))
|
||||||
|
|
||||||
|
numWords_raw = re.search(r"(\d+)(?:слова|слов)", targetpages_text, re.UNICODE)
|
||||||
|
numWords = int(numWords_raw.group(1))
|
||||||
|
if numWords > 0:
|
||||||
|
self.story.setMetadata('numWords', numWords)
|
||||||
|
logger.debug("numWords: (%s)"%self.story.getMetadata('numWords'))
|
||||||
|
|
||||||
|
# Grab FBN Category
|
||||||
|
class_tag = soup.select_one('div[class^="badge-with-icon direction"]').find('span', {'class' : 'badge-text'}).text
|
||||||
|
if class_tag:
|
||||||
|
self.story.setMetadata('classification',class_tag)
|
||||||
|
#logger.debug("classification: (%s)"%self.story.getMetadata('classification'))
|
||||||
|
|
||||||
|
# Find dedication.
|
||||||
|
ded = soup.find('div', {'class' : 'js-public-beta-dedication'})
|
||||||
|
if ded:
|
||||||
|
ded['class'].append('part_text')
|
||||||
|
self.story.setMetadata('dedication',ded)
|
||||||
|
|
||||||
|
# Find author comment
|
||||||
|
comm = soup.find('div', {'class' : 'js-public-beta-author-comment'})
|
||||||
|
if comm:
|
||||||
|
comm['class'].append('part_text')
|
||||||
|
self.story.setMetadata('authorcomment',comm)
|
||||||
|
|
||||||
|
follows = stats.find('fanfic-follow-button')[':follow-count']
|
||||||
|
if int(follows) > 0:
|
||||||
|
self.story.setMetadata('follows', int(follows))
|
||||||
|
logger.debug("follows: (%s)"%self.story.getMetadata('follows'))
|
||||||
|
|
||||||
|
# Grab the amount of awards
|
||||||
|
numAwards = 0
|
||||||
|
try:
|
||||||
|
awards = soup.find('fanfic-reward-list')[':initial-fic-rewards-list']
|
||||||
|
award_list = json.loads(awards)
|
||||||
|
numAwards = int(len(award_list))
|
||||||
|
# Grab the awards, but if multiple awards have the same name, only one will be kept; only an issue with hundreds of them.
|
||||||
|
self.story.extendList('awards', [str(award['user_text']) for award in award_list])
|
||||||
|
#logger.debug("awards (%s)"%self.story.getMetadata('awards'))
|
||||||
|
except (TypeError, KeyError):
|
||||||
|
logger.debug("Could not grab the awards")
|
||||||
|
|
||||||
|
if numAwards > 0:
|
||||||
|
self.story.setMetadata('numAwards', numAwards)
|
||||||
|
logger.debug("Num Awards (%s)"%self.story.getMetadata('numAwards'))
|
||||||
|
|
||||||
|
if get_cover:
|
||||||
|
cover = soup.find('fanfic-cover', {'class':"jsVueComponent"})
|
||||||
|
if cover is not None:
|
||||||
|
self.setCoverImage(url,cover['src-original'])
|
||||||
|
|
||||||
|
def replace_formatting(self,tag):
|
||||||
|
tname = tag.name
|
||||||
|
## operating on plain text because BS4 is hard to work on
|
||||||
|
## text with.
|
||||||
|
## stripHTML() discards whitespace around other tags, like <i>
|
||||||
|
txt = tag.get_text()
|
||||||
|
txt = txt.replace("\n","<br/>")
|
||||||
|
soup = self.make_soup("<"+tname+">"+txt+"</"+tname+">")
|
||||||
|
return soup.find(tname)
|
||||||
|
|
||||||
# grab the text for an individual chapter.
|
# grab the text for an individual chapter.
|
||||||
def getChapterText(self, url):
|
def getChapterText(self, url):
|
||||||
|
|
@ -227,10 +334,60 @@ class FicBookNetAdapter(BaseSiteAdapter):
|
||||||
soup = self.make_soup(self.get_request(url))
|
soup = self.make_soup(self.get_request(url))
|
||||||
|
|
||||||
chapter = soup.find('div', {'id' : 'content'})
|
chapter = soup.find('div', {'id' : 'content'})
|
||||||
if chapter == None: ## still needed?
|
if chapter is None: ## still needed?
|
||||||
chapter = soup.find('div', {'class' : 'public_beta_disabled'})
|
chapter = soup.find('div', {'class' : 'public_beta_disabled'})
|
||||||
|
|
||||||
if None == chapter:
|
if chapter is None:
|
||||||
raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url)
|
raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url)
|
||||||
|
|
||||||
|
## ficbook uses weird CSS white-space: pre-wrap; for
|
||||||
|
## paragraphing. Doesn't work with txt output
|
||||||
|
if 'part_text' in chapter['class'] and self.getConfig('replace_text_formatting'):
|
||||||
|
## copy classes, except part_text
|
||||||
|
divclasses = chapter['class']
|
||||||
|
divclasses.remove('part_text')
|
||||||
|
chapter = self.replace_formatting(chapter)
|
||||||
|
chapter['class'] = divclasses
|
||||||
|
|
||||||
|
exclude_notes=self.getConfigList('exclude_notes')
|
||||||
|
if 'headnotes' not in exclude_notes:
|
||||||
|
# Find the headnote
|
||||||
|
head_note = soup.select_one("div.part-comment-top div.js-public-beta-comment-before")
|
||||||
|
if head_note:
|
||||||
|
# Create the structure for the headnote
|
||||||
|
head_notes_div_tag = soup.new_tag('div', attrs={'class': 'fff_chapter_notes fff_head_notes'})
|
||||||
|
head_b_tag = soup.new_tag('b')
|
||||||
|
head_b_tag.string = 'Примечания:'
|
||||||
|
if 'text-preline' in head_note['class'] and self.getConfig('replace_text_formatting'):
|
||||||
|
head_blockquote_tag = self.replace_formatting(head_note)
|
||||||
|
head_blockquote_tag.name = 'blockquote'
|
||||||
|
else:
|
||||||
|
head_blockquote_tag = soup.new_tag('blockquote')
|
||||||
|
head_blockquote_tag.string = stripHTML(head_note)
|
||||||
|
head_notes_div_tag.append(head_b_tag)
|
||||||
|
head_notes_div_tag.append(head_blockquote_tag)
|
||||||
|
# Prepend the headnotes to the chapter, <hr> to mimic the site
|
||||||
|
chapter.insert(0, head_notes_div_tag)
|
||||||
|
chapter.insert(1, soup.new_tag('hr'))
|
||||||
|
|
||||||
|
if 'footnotes' not in exclude_notes:
|
||||||
|
# Find the endnote
|
||||||
|
end_note = soup.select_one("div.part-comment-bottom div.js-public-beta-comment-after")
|
||||||
|
if end_note:
|
||||||
|
# Create the structure for the footnote
|
||||||
|
end_notes_div_tag = soup.new_tag('div', attrs={'class': 'fff_chapter_notes fff_foot_notes'})
|
||||||
|
end_b_tag = soup.new_tag('b')
|
||||||
|
end_b_tag.string = 'Примечания:'
|
||||||
|
if 'text-preline' in end_note['class'] and self.getConfig('replace_text_formatting'):
|
||||||
|
end_blockquote_tag = self.replace_formatting(end_note)
|
||||||
|
end_blockquote_tag.name = 'blockquote'
|
||||||
|
else:
|
||||||
|
end_blockquote_tag = soup.new_tag('blockquote')
|
||||||
|
end_blockquote_tag.string = stripHTML(end_note)
|
||||||
|
end_notes_div_tag.append(end_b_tag)
|
||||||
|
end_notes_div_tag.append(end_blockquote_tag)
|
||||||
|
# Append the endnotes to the chapter, <hr> to mimic the site
|
||||||
|
chapter.append(soup.new_tag('hr'))
|
||||||
|
chapter.append(end_notes_div_tag)
|
||||||
|
|
||||||
return self.utf8FromSoup(url,chapter)
|
return self.utf8FromSoup(url,chapter)
|
||||||
|
|
|
||||||
225
fanficfare/adapters/adapter_fictionalleyarchiveorg.py
Normal file
225
fanficfare/adapters/adapter_fictionalleyarchiveorg.py
Normal file
|
|
@ -0,0 +1,225 @@
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
# Copyright 2011 Fanficdownloader team, 2021 FanFicFare team
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
#
|
||||||
|
|
||||||
|
from __future__ import absolute_import
|
||||||
|
import logging
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
import re
|
||||||
|
from ..htmlcleanup import stripHTML
|
||||||
|
from .. import exceptions as exceptions
|
||||||
|
|
||||||
|
from .base_adapter import BaseSiteAdapter, makeDate
|
||||||
|
|
||||||
|
class FictionAlleyArchiveOrgSiteAdapter(BaseSiteAdapter):
|
||||||
|
|
||||||
|
def __init__(self, config, url):
|
||||||
|
BaseSiteAdapter.__init__(self, config, url)
|
||||||
|
self.story.setMetadata('siteabbrev','fa')
|
||||||
|
self.is_adult=False
|
||||||
|
|
||||||
|
# get storyId from url--url validation guarantees query correct
|
||||||
|
m = re.match(self.getSiteURLPattern(),url)
|
||||||
|
if m:
|
||||||
|
# normalized story URL.
|
||||||
|
url = "https://"+self.getSiteDomain()+"/authors/"+m.group('auth')+"/"+m.group('id')+".html"
|
||||||
|
self._setURL(url)
|
||||||
|
else:
|
||||||
|
raise exceptions.InvalidStoryURL(url,
|
||||||
|
self.getSiteDomain(),
|
||||||
|
self.getSiteExampleURLs())
|
||||||
|
# The date format will vary from site to site.
|
||||||
|
# http://docs.python.org/library/datetime.html#strftime-strptime-behavior
|
||||||
|
self.dateformat = "%m/%d/%Y"
|
||||||
|
|
||||||
|
def _setURL(self,url):
|
||||||
|
# logger.debug("set URL:%s"%url)
|
||||||
|
super(FictionAlleyArchiveOrgSiteAdapter, self)._setURL(url)
|
||||||
|
m = re.match(self.getSiteURLPattern(),url)
|
||||||
|
if m:
|
||||||
|
self.story.setMetadata('authorId',m.group('auth'))
|
||||||
|
self.story.setMetadata('storyId',m.group('id'))
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def getSiteDomain():
|
||||||
|
return 'www.fictionalley-archive.org'
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def getAcceptDomains(cls):
|
||||||
|
return ['www.fictionalley-archive.org',
|
||||||
|
'www.fictionalley.org']
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def getSiteExampleURLs(cls):
|
||||||
|
return "https://"+cls.getSiteDomain()+"/authors/drt/DA.html https://"+cls.getSiteDomain()+"/authors/drt/JOTP01a.html"
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def getURLDomain(cls):
|
||||||
|
return 'https://' + cls.getSiteDomain()
|
||||||
|
|
||||||
|
def getSiteURLPattern(self):
|
||||||
|
# http://www.fictionalley-archive.org/authors/drt/DA.html
|
||||||
|
# http://www.fictionalley-archive.org/authors/drt/JOTP01a.html
|
||||||
|
return r"https?://www.fictionalley(-archive)?.org/authors/(?P<auth>[a-zA-Z0-9_]+)/(?P<id>[a-zA-Z0-9_]+)\.html"
|
||||||
|
|
||||||
|
def extractChapterUrlsAndMetadata(self):
|
||||||
|
|
||||||
|
## could be either chapter list page or one-shot text page.
|
||||||
|
logger.debug("URL: "+self.url)
|
||||||
|
|
||||||
|
(data,rurl) = self.get_request_redirected(self.url)
|
||||||
|
if rurl != self.url:
|
||||||
|
self._setURL(rurl)
|
||||||
|
logger.debug("set to redirected url:%s"%self.url)
|
||||||
|
soup = self.make_soup(data)
|
||||||
|
|
||||||
|
# If chapter list page, get the first chapter to look for adult check
|
||||||
|
chapterlinklist = soup.select('h5.mb-1 > a')
|
||||||
|
# logger.debug(chapterlinklist)
|
||||||
|
|
||||||
|
if not chapterlinklist:
|
||||||
|
# no chapter list, it's either a chapter URL or a single chapter story
|
||||||
|
# <nav aria-label="Chapter Navigation">
|
||||||
|
# <a class="page-link" href="/authors/mz_xxo/HPATOTFI.html">Index</a>
|
||||||
|
storya = soup.select_one('nav[aria-label="Chapter Navigation"] a')
|
||||||
|
# logger.debug(storya)
|
||||||
|
if storya:
|
||||||
|
## multi chapter story
|
||||||
|
self._setURL(self.getURLDomain()+storya['href'])
|
||||||
|
logger.debug("Normalizing to URL: "+self.url)
|
||||||
|
# ## title's right there...
|
||||||
|
# self.story.setMetadata('title',stripHTML(storya))
|
||||||
|
data = self.get_request(self.url)
|
||||||
|
soup = self.make_soup(data)
|
||||||
|
chapterlinklist = soup.select('h5.mb-1 > a')
|
||||||
|
# logger.debug(chapterlinklist)
|
||||||
|
else:
|
||||||
|
## single chapter story.
|
||||||
|
# logger.debug("Single chapter story")
|
||||||
|
pass
|
||||||
|
|
||||||
|
self.story.setMetadata('title',stripHTML(soup.select_one('h1')))
|
||||||
|
|
||||||
|
## authorid already set.
|
||||||
|
## <h1 class="title" align="center">Just Off The Platform II by <a href="http://www.fictionalley.org/authors/drt/">DrT</a></h1>
|
||||||
|
authora=soup.select_one('h1 + h3 > a')
|
||||||
|
self.story.setMetadata('author',stripHTML(authora))
|
||||||
|
self.story.setMetadata('authorUrl',self.getURLDomain()+authora['href'])
|
||||||
|
|
||||||
|
if chapterlinklist:
|
||||||
|
# Find the chapters:
|
||||||
|
for chapter in chapterlinklist:
|
||||||
|
listitem = chapter.parent.parent.parent
|
||||||
|
# logger.debug(listitem)
|
||||||
|
# date
|
||||||
|
date = stripHTML(listitem.select_one('small.text-nowrap'))
|
||||||
|
chapterDate = makeDate(date,self.dateformat)
|
||||||
|
wordshits = listitem.select('span.font-weight-normal')
|
||||||
|
chap_data = {
|
||||||
|
'date':chapterDate.strftime(self.getConfig("datechapter_format",self.getConfig("datePublished_format","%Y-%m-%d"))),
|
||||||
|
'words':stripHTML(wordshits[0]),
|
||||||
|
'hits':stripHTML(wordshits[1]),
|
||||||
|
'summary':stripHTML(listitem.select_one('p.my-2')),
|
||||||
|
}
|
||||||
|
# logger.debug(chap_data)
|
||||||
|
self.add_chapter(chapter,self.getURLDomain()+chapter['href'], chap_data)
|
||||||
|
else:
|
||||||
|
self.add_chapter(self.story.getMetadata('title'),self.url)
|
||||||
|
|
||||||
|
cardbody = soup.select_one('div.card-body')
|
||||||
|
|
||||||
|
searchs_to_meta = (
|
||||||
|
# sitetype, ffftype, islist
|
||||||
|
('Rating', 'rating', False),
|
||||||
|
('House', 'house', True),
|
||||||
|
('Character', 'characters', True),
|
||||||
|
('Genre', 'genre', True),
|
||||||
|
('Era', 'era', True),
|
||||||
|
('Spoiler', 'spoilers', True),
|
||||||
|
('Ship', 'ships', True),
|
||||||
|
)
|
||||||
|
for (sitetype,ffftype, islist) in searchs_to_meta:
|
||||||
|
# logger.debug((sitetype,ffftype, islist))
|
||||||
|
tags = cardbody.select('a[href^="/stories?Include.%s"]'%sitetype)
|
||||||
|
# logger.debug(tags)
|
||||||
|
if tags:
|
||||||
|
if islist:
|
||||||
|
self.story.extendList(ffftype, [ stripHTML(a) for a in tags ])
|
||||||
|
else:
|
||||||
|
self.story.setMetadata(ffftype, stripHTML(tags[0]))
|
||||||
|
|
||||||
|
|
||||||
|
# Published: 09/26/2003 Updated: 04/13/2004 Words: 14,268 Chapters: 5 Hits: 743
|
||||||
|
badgeinfos = cardbody.select('div.badge-info')
|
||||||
|
# logger.debug(badgeinfos)
|
||||||
|
for badge in badgeinfos:
|
||||||
|
txt = stripHTML(badge)
|
||||||
|
(key,val)=txt.split(':')
|
||||||
|
# logger.debug((key,val))
|
||||||
|
if key in ( 'Published', 'Updated'):
|
||||||
|
date = makeDate(val,self.dateformat)
|
||||||
|
self.story.setMetadata('date'+key,date)
|
||||||
|
elif key in ('Hits'):
|
||||||
|
self.story.setMetadata(key.lower(),val)
|
||||||
|
elif key == 'Words':
|
||||||
|
self.story.setMetadata('numWords',val)
|
||||||
|
|
||||||
|
summary = soup.find('dt',string='Story Summary:')
|
||||||
|
if summary:
|
||||||
|
summary = summary.find_next_sibling('dd')
|
||||||
|
summary.name='div'
|
||||||
|
self.setDescription(self.url,summary)
|
||||||
|
|
||||||
|
return
|
||||||
|
|
||||||
|
def getChapterText(self, url):
|
||||||
|
|
||||||
|
logger.debug('Getting chapter text from: %s' % url)
|
||||||
|
|
||||||
|
data = self.get_request(url)
|
||||||
|
soup = self.make_soup(data)
|
||||||
|
|
||||||
|
# this may be a brittle way to get the chapter text.
|
||||||
|
# Site doesn't give a lot of hints.
|
||||||
|
chaptext = soup.select_one('main#content div:not([class])')
|
||||||
|
|
||||||
|
# not sure how, but we can get html, etc tags still in some
|
||||||
|
# stories. That breaks later updates because it confuses
|
||||||
|
# epubutils.py
|
||||||
|
# Yes, this still applies to fictionalley-archive.
|
||||||
|
|
||||||
|
for tag in chaptext.find_all('head') + chaptext.find_all('meta') + chaptext.find_all('script'):
|
||||||
|
tag.extract()
|
||||||
|
|
||||||
|
for tag in chaptext.find_all('body') + chaptext.find_all('html'):
|
||||||
|
tag.name = 'div'
|
||||||
|
|
||||||
|
if self.getConfig('include_author_notes'):
|
||||||
|
row = chaptext.find_previous_sibling('div',class_='row')
|
||||||
|
logger.debug(row)
|
||||||
|
andt = row.find('dt',string="Author's Note:")
|
||||||
|
logger.debug(andt)
|
||||||
|
if andt:
|
||||||
|
chaptext.insert(0,andt.parent.extract())
|
||||||
|
# post notes aren't as structured(?)
|
||||||
|
for div in chaptext.find_next_siblings('div',class_='row'):
|
||||||
|
chaptext.append(div.extract())
|
||||||
|
|
||||||
|
# logger.debug(chaptext)
|
||||||
|
return self.utf8FromSoup(url,chaptext)
|
||||||
|
|
||||||
|
def getClass():
|
||||||
|
return FictionAlleyArchiveOrgSiteAdapter
|
||||||
|
|
@ -1,228 +0,0 @@
|
||||||
# -*- coding: utf-8 -*-
|
|
||||||
|
|
||||||
# Copyright 2011 Fanficdownloader team, 2018 FanFicFare team
|
|
||||||
#
|
|
||||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
# you may not use this file except in compliance with the License.
|
|
||||||
# You may obtain a copy of the License at
|
|
||||||
#
|
|
||||||
# http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
#
|
|
||||||
# Unless required by applicable law or agreed to in writing, software
|
|
||||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
# See the License for the specific language governing permissions and
|
|
||||||
# limitations under the License.
|
|
||||||
#
|
|
||||||
|
|
||||||
from __future__ import absolute_import
|
|
||||||
import logging
|
|
||||||
logger = logging.getLogger(__name__)
|
|
||||||
import re
|
|
||||||
from ..htmlcleanup import stripHTML
|
|
||||||
from .. import exceptions as exceptions
|
|
||||||
|
|
||||||
# py2 vs py3 transition
|
|
||||||
|
|
||||||
from .base_adapter import BaseSiteAdapter, makeDate
|
|
||||||
|
|
||||||
class FictionAlleyOrgSiteAdapter(BaseSiteAdapter):
|
|
||||||
|
|
||||||
def __init__(self, config, url):
|
|
||||||
BaseSiteAdapter.__init__(self, config, url)
|
|
||||||
self.story.setMetadata('siteabbrev','fa')
|
|
||||||
self.is_adult=False
|
|
||||||
|
|
||||||
# get storyId from url--url validation guarantees query correct
|
|
||||||
m = re.match(self.getSiteURLPattern(),url)
|
|
||||||
if m:
|
|
||||||
self.story.setMetadata('authorId',m.group('auth'))
|
|
||||||
self.story.setMetadata('storyId',m.group('id'))
|
|
||||||
|
|
||||||
# normalized story URL.
|
|
||||||
self._setURL(url)
|
|
||||||
else:
|
|
||||||
raise exceptions.InvalidStoryURL(url,
|
|
||||||
self.getSiteDomain(),
|
|
||||||
self.getSiteExampleURLs())
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def getSiteDomain():
|
|
||||||
return 'www.fictionalley.org'
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def getSiteExampleURLs(cls):
|
|
||||||
return "http://"+cls.getSiteDomain()+"/authors/drt/DA.html http://"+cls.getSiteDomain()+"/authors/drt/JOTP01a.html"
|
|
||||||
|
|
||||||
def getSiteURLPattern(self):
|
|
||||||
# http://www.fictionalley.org/authors/drt/DA.html
|
|
||||||
# http://www.fictionalley.org/authors/drt/JOTP01a.html
|
|
||||||
return re.escape("http://"+self.getSiteDomain())+r"/authors/(?P<auth>[a-zA-Z0-9_]+)/(?P<id>[a-zA-Z0-9_]+)\.html"
|
|
||||||
|
|
||||||
def _postFetchWithIAmOld(self,url):
|
|
||||||
if self.is_adult or self.getConfig("is_adult"):
|
|
||||||
params={'iamold':'Yes',
|
|
||||||
'action':'ageanswer'}
|
|
||||||
logger.info("Attempting to get cookie for %s" % url)
|
|
||||||
## posting on list doesn't work, but doesn't hurt, either.
|
|
||||||
data = self.post_request(url,params)
|
|
||||||
else:
|
|
||||||
data = self.get_request(url)
|
|
||||||
return data
|
|
||||||
|
|
||||||
def extractChapterUrlsAndMetadata(self):
|
|
||||||
|
|
||||||
## could be either chapter list page or one-shot text page.
|
|
||||||
url = self.url
|
|
||||||
logger.debug("URL: "+url)
|
|
||||||
|
|
||||||
data = self._postFetchWithIAmOld(url)
|
|
||||||
|
|
||||||
soup = self.make_soup(data)
|
|
||||||
|
|
||||||
chapterdata = data
|
|
||||||
# If chapter list page, get the first chapter to look for adult check
|
|
||||||
chapterlinklist = soup.findAll('a',{'class':'chapterlink'})
|
|
||||||
if chapterlinklist:
|
|
||||||
chapterdata = self._postFetchWithIAmOld(chapterlinklist[0]['href'])
|
|
||||||
|
|
||||||
if "Are you over seventeen years old" in chapterdata:
|
|
||||||
raise exceptions.AdultCheckRequired(self.url)
|
|
||||||
|
|
||||||
if not chapterlinklist:
|
|
||||||
# no chapter list, chapter URL: change to list link.
|
|
||||||
# second a tag inside div breadcrumbs
|
|
||||||
storya = soup.find('div',{'class':'breadcrumbs'}).findAll('a')[1]
|
|
||||||
self._setURL(storya['href'])
|
|
||||||
url=self.url
|
|
||||||
logger.debug("Normalizing to URL: "+url)
|
|
||||||
## title's right there...
|
|
||||||
self.story.setMetadata('title',stripHTML(storya))
|
|
||||||
data = self.get_request(url)
|
|
||||||
soup = self.make_soup(data)
|
|
||||||
chapterlinklist = soup.findAll('a',{'class':'chapterlink'})
|
|
||||||
else:
|
|
||||||
## still need title from somewhere. If chapterlinklist,
|
|
||||||
## then chapterdata contains a chapter, find title the
|
|
||||||
## same way.
|
|
||||||
chapsoup = self.make_soup(chapterdata)
|
|
||||||
storya = chapsoup.find('div',{'class':'breadcrumbs'}).findAll('a')[1]
|
|
||||||
self.story.setMetadata('title',stripHTML(storya))
|
|
||||||
del chapsoup
|
|
||||||
|
|
||||||
del chapterdata
|
|
||||||
|
|
||||||
## authorid already set.
|
|
||||||
## <h1 class="title" align="center">Just Off The Platform II by <a href="http://www.fictionalley.org/authors/drt/">DrT</a></h1>
|
|
||||||
authora=soup.find('h1',{'class':'title'}).find('a')
|
|
||||||
self.story.setMetadata('author',authora.string)
|
|
||||||
self.story.setMetadata('authorUrl',authora['href'])
|
|
||||||
|
|
||||||
if len(chapterlinklist) == 1:
|
|
||||||
self.add_chapter(self.story.getMetadata('title'),chapterlinklist[0]['href'])
|
|
||||||
else:
|
|
||||||
# Find the chapters:
|
|
||||||
for chapter in chapterlinklist:
|
|
||||||
# just in case there's tags, like <i> in chapter titles.
|
|
||||||
self.add_chapter(chapter,chapter['href'])
|
|
||||||
|
|
||||||
|
|
||||||
## Go scrape the rest of the metadata from the author's page.
|
|
||||||
data = self.get_request(self.story.getMetadata('authorUrl'))
|
|
||||||
soup = self.make_soup(data)
|
|
||||||
|
|
||||||
# <dl><dt><a class = "Rid story" href = "http://www.fictionalley.org/authors/aafro_man_ziegod/TMH.html">
|
|
||||||
# [Rid] The Magical Hottiez</a> by <a class = "pen_name" href = "http://www.fictionalley.org/authors/aafro_man_ziegod/">Aafro Man Ziegod</a> </small></dt>
|
|
||||||
# <dd><small class = "storyinfo"><a href = "http://www.fictionalley.org/ratings.html" target = "_new">Rating:</a> PG-13 - Spoilers: PS/SS, CoS, PoA, GoF, QTTA, FB - 4264 hits - 5060 words<br />
|
|
||||||
# Genre: Humor, Romance - Main character(s): None - Ships: None - Era: Multiple Eras<br /></small>
|
|
||||||
# Chaos ensues after Witch Weekly, seeking to increase readers, decides to create a boyband out of five seemingly talentless wizards: Harry Potter, Draco Malfoy, Ron Weasley, Neville Longbottom, and Oliver "Toss Your Knickers Here" Wood.<br />
|
|
||||||
# <small class = "storyinfo">Published: June 3, 2002 (between Goblet of Fire and Order of Phoenix) - Updated: June 3, 2002</small>
|
|
||||||
# </dd></dl>
|
|
||||||
|
|
||||||
storya = soup.find('a',{'href':self.story.getMetadata('storyUrl')})
|
|
||||||
storydd = storya.findNext('dd')
|
|
||||||
|
|
||||||
# Rating: PG - Spoilers: None - 2525 hits - 736 words
|
|
||||||
# Genre: Humor - Main character(s): H, R - Ships: None - Era: Multiple Eras
|
|
||||||
# Harry and Ron are back at it again! They reeeeeeally don't want to be back, because they know what's awaiting them. "VH1 Goes Inside..." is back! Why? 'Cos there are soooo many more couples left to pick on.
|
|
||||||
# Published: September 25, 2004 (between Order of Phoenix and Half-Blood Prince) - Updated: September 25, 2004
|
|
||||||
|
|
||||||
## change to text and regexp find.
|
|
||||||
metastr = stripHTML(storydd).replace('\n',' ').replace('\t',' ')
|
|
||||||
|
|
||||||
m = re.match(r".*?Rating: (.+?) -.*?",metastr)
|
|
||||||
if m:
|
|
||||||
self.story.setMetadata('rating', m.group(1))
|
|
||||||
|
|
||||||
m = re.match(r".*?Genre: (.+?) -.*?",metastr)
|
|
||||||
if m:
|
|
||||||
for g in m.group(1).split(','):
|
|
||||||
self.story.addToList('genre',g)
|
|
||||||
|
|
||||||
m = re.match(r".*?Published: ([a-zA-Z]+ \d\d?, \d\d\d\d).*?",metastr)
|
|
||||||
if m:
|
|
||||||
self.story.setMetadata('datePublished',makeDate(m.group(1), "%B %d, %Y"))
|
|
||||||
|
|
||||||
m = re.match(r".*?Updated: ([a-zA-Z]+ \d\d?, \d\d\d\d).*?",metastr)
|
|
||||||
if m:
|
|
||||||
self.story.setMetadata('dateUpdated',makeDate(m.group(1), "%B %d, %Y"))
|
|
||||||
|
|
||||||
m = re.match(r".*? (\d+) words Genre.*?",metastr)
|
|
||||||
if m:
|
|
||||||
self.story.setMetadata('numWords', m.group(1))
|
|
||||||
|
|
||||||
for small in storydd.findAll('small'):
|
|
||||||
small.extract() ## removes the <small> tags, leaving only the summary.
|
|
||||||
storydd.name = 'div' ## change tag name else Calibre treats it oddly.
|
|
||||||
self.setDescription(url,storydd)
|
|
||||||
#self.story.setMetadata('description',stripHTML(storydd))
|
|
||||||
|
|
||||||
return
|
|
||||||
|
|
||||||
def getChapterText(self, url):
|
|
||||||
|
|
||||||
logger.debug('Getting chapter text from: %s' % url)
|
|
||||||
|
|
||||||
data = self.get_request(url)
|
|
||||||
# find <!-- headerend --> & <!-- footerstart --> and
|
|
||||||
# replaced with matching div pair for easier parsing.
|
|
||||||
# Yes, it's an evil kludge, but what can ya do? Using
|
|
||||||
# something other than div prevents soup from pairing
|
|
||||||
# our div with poor html inside the story text.
|
|
||||||
crazy = "crazytagstringnobodywouldstumbleonaccidently"
|
|
||||||
data = data.replace('<!-- headerend -->','<'+crazy+' id="storytext">').replace('<!-- footerstart -->','</'+crazy+'>')
|
|
||||||
|
|
||||||
# problems with some stories confusing Soup. This is a nasty
|
|
||||||
# hack, but it works.
|
|
||||||
data = data[data.index('<'+crazy+''):]
|
|
||||||
# ditto with extra crap at the end.
|
|
||||||
data = data[:data.index('</'+crazy+'>')+len('</'+crazy+'>')]
|
|
||||||
|
|
||||||
soup = self.make_soup(data)
|
|
||||||
body = soup.findAll('body') ## some stories use a nested body and body
|
|
||||||
## tag, in which case we don't
|
|
||||||
## need crazytagstringnobodywouldstumbleonaccidently
|
|
||||||
## and use the second one instead.
|
|
||||||
if len(body)>1:
|
|
||||||
text = body[1]
|
|
||||||
text.name='div' # force to be a div to avoid multiple body tags.
|
|
||||||
else:
|
|
||||||
text = soup.find(crazy, {'id' : 'storytext'})
|
|
||||||
text.name='div' # change to div tag.
|
|
||||||
|
|
||||||
if not data or not text:
|
|
||||||
raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url)
|
|
||||||
|
|
||||||
# not sure how, but we can get html, etc tags still in some
|
|
||||||
# stories. That breaks later updates because it confuses
|
|
||||||
# epubutils.py
|
|
||||||
for tag in text.findAll('head'):
|
|
||||||
tag.extract()
|
|
||||||
|
|
||||||
for tag in text.findAll('body') + text.findAll('html'):
|
|
||||||
tag.name = 'div'
|
|
||||||
|
|
||||||
return self.utf8FromSoup(url,text)
|
|
||||||
|
|
||||||
def getClass():
|
|
||||||
return FictionAlleyOrgSiteAdapter
|
|
||||||
|
|
@ -1,6 +1,6 @@
|
||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
# Copyright 2019 FanFicFare team
|
# Copyright 2022 FanFicFare team
|
||||||
#
|
#
|
||||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
# you may not use this file except in compliance with the License.
|
# you may not use this file except in compliance with the License.
|
||||||
|
|
@ -26,6 +26,93 @@ from ..htmlcleanup import stripHTML
|
||||||
|
|
||||||
from .base_adapter import BaseSiteAdapter, makeDate
|
from .base_adapter import BaseSiteAdapter, makeDate
|
||||||
|
|
||||||
|
ampfandoms = ["A Falcone & Driscoll Investigation",
|
||||||
|
"Alias Smith & Jones",
|
||||||
|
"Atelier Escha & Logy",
|
||||||
|
"Austin & Ally",
|
||||||
|
"Baby & Me/赤ちゃんと僕",
|
||||||
|
"Barney & Friends",
|
||||||
|
"Between Love & Goodbye",
|
||||||
|
"Beyond Good & Evil",
|
||||||
|
"Bill & Ted's Excellent Adventure/Bogus Journey",
|
||||||
|
"BLACK & WHITE",
|
||||||
|
"Bonnie & Clyde",
|
||||||
|
"Brandy & Mr. Whiskers",
|
||||||
|
"Brothers & Sisters",
|
||||||
|
"Bucket & Skinner's Epic Adventures",
|
||||||
|
"Calvin & Hobbes",
|
||||||
|
"Cats & Dogs",
|
||||||
|
"Command & Conquer",
|
||||||
|
"Devil & Devil",
|
||||||
|
"Dharma & Greg",
|
||||||
|
"Dicky & Dawn",
|
||||||
|
"Drake & Josh",
|
||||||
|
"Edgar & Ellen",
|
||||||
|
"Franklin & Bash",
|
||||||
|
"Gabby Duran & The Unsittables",
|
||||||
|
"Girls und Panzer/ガールズ&パンツァー",
|
||||||
|
"Gnomeo & Juliet",
|
||||||
|
"Grim Adventures of Billy & Mandy",
|
||||||
|
"Half & Half/ハーフ・アンド・ハーフ",
|
||||||
|
"Hansel & Gretel",
|
||||||
|
"Hatfields & McCoys",
|
||||||
|
"High & Low - The Story of S.W.O.R.D.",
|
||||||
|
"Home & Away",
|
||||||
|
"Hudson & Rex",
|
||||||
|
"Huntik: Secrets & Seekers",
|
||||||
|
"Imagine Me & You",
|
||||||
|
"Jekyll & Hyde",
|
||||||
|
"Jonathan Strange & Mr. Norrell",
|
||||||
|
"Knight's & Magic/ナイツ&マジック",
|
||||||
|
"Law & Order: Los Angeles",
|
||||||
|
"Law & Order: Organized Crime",
|
||||||
|
"Lilo & Stitch",
|
||||||
|
"Locke & Key",
|
||||||
|
"Lockwood & Co.",
|
||||||
|
"Lost & Found Music Studios",
|
||||||
|
"Lu & Og",
|
||||||
|
"Me & My Brothers",
|
||||||
|
"Melissa & Joey",
|
||||||
|
"Mickey Mouse & Friends",
|
||||||
|
"Mike & Molly",
|
||||||
|
"Mike, Lu & Og",
|
||||||
|
"Miraculous: Tales of Ladybug & Cat Noir",
|
||||||
|
"Mork & Mindy",
|
||||||
|
"Mount&Blade",
|
||||||
|
"Mr. & Mrs. Smith",
|
||||||
|
"Mr. Peabody & Sherman",
|
||||||
|
"Muhyo & Roji",
|
||||||
|
"Nicky, Ricky, Dicky & Dawn",
|
||||||
|
"Oliver & Company",
|
||||||
|
"Ozzy & Drix",
|
||||||
|
"Panty & Stocking with Garterbelt/パンティ&ストッキングwithガーターベルト",
|
||||||
|
"Penryn & the End of Days",
|
||||||
|
"Prep & Landing",
|
||||||
|
"Prince & Hero/王子とヒーロー",
|
||||||
|
"Prince & Me",
|
||||||
|
"Puzzle & Dragons",
|
||||||
|
"Ren & Stimpy Show",
|
||||||
|
"Rizzoli & Isles",
|
||||||
|
"Romeo & Juliet",
|
||||||
|
"Rosemary & Thyme",
|
||||||
|
"Sam & Cat",
|
||||||
|
"Sam & Max",
|
||||||
|
"Sapphire & Steel",
|
||||||
|
"Scott & Bailey",
|
||||||
|
"Shakespeare & Hathaway: Private Investigators",
|
||||||
|
"Soul Nomad & the World Eaters",
|
||||||
|
"Superman & Lois",
|
||||||
|
"Tiger & Bunny/タイガー&バニー",
|
||||||
|
"Trains & Automobiles",
|
||||||
|
"Upin & Ipin",
|
||||||
|
"Wallace & Gromit",
|
||||||
|
"Witch & Wizard",
|
||||||
|
"Wolverine & the X-Men",
|
||||||
|
"Yotsuba&!/よつばと!",
|
||||||
|
"Young & Hungry",
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
class FictionHuntComSiteAdapter(BaseSiteAdapter):
|
class FictionHuntComSiteAdapter(BaseSiteAdapter):
|
||||||
|
|
||||||
def __init__(self, config, url):
|
def __init__(self, config, url):
|
||||||
|
|
@ -57,7 +144,7 @@ class FictionHuntComSiteAdapter(BaseSiteAdapter):
|
||||||
|
|
||||||
# The date format will vary from site to site.
|
# The date format will vary from site to site.
|
||||||
# http://docs.python.org/library/datetime.html#strftime-strptime-behavior
|
# http://docs.python.org/library/datetime.html#strftime-strptime-behavior
|
||||||
self.dateformat = "%d %b %Y"
|
self.dateformat = "%Y-%m-%d %H:%M:%S"
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def getSiteDomain():
|
def getSiteDomain():
|
||||||
|
|
@ -123,9 +210,11 @@ class FictionHuntComSiteAdapter(BaseSiteAdapter):
|
||||||
url = self.url
|
url = self.url
|
||||||
data = self.get_request(url)
|
data = self.get_request(url)
|
||||||
|
|
||||||
if self.needToLoginCheck(data):
|
## As per #784, site isn't requiring login anymore.
|
||||||
self.performLogin(url)
|
## Login check commented since we've seen it toggle before.
|
||||||
data = self.get_request(url,usecache=False)
|
# if self.needToLoginCheck(data):
|
||||||
|
# self.performLogin(url)
|
||||||
|
# data = self.get_request(url,usecache=False)
|
||||||
|
|
||||||
soup = self.make_soup(data)
|
soup = self.make_soup(data)
|
||||||
## detect old storyUrl, switch to new storyUrl:
|
## detect old storyUrl, switch to new storyUrl:
|
||||||
|
|
@ -143,9 +232,10 @@ class FictionHuntComSiteAdapter(BaseSiteAdapter):
|
||||||
self._setURL(soup.select_one("div.Story__details a")['href'])
|
self._setURL(soup.select_one("div.Story__details a")['href'])
|
||||||
url = self.url
|
url = self.url
|
||||||
|
|
||||||
|
# logger.debug(data)
|
||||||
self.story.setMetadata('title',stripHTML(soup.find('h1',{'class':'Story__title'})))
|
self.story.setMetadata('title',stripHTML(soup.find('h1',{'class':'Story__title'})))
|
||||||
|
|
||||||
summhead = soup.find('h5',text='Summary')
|
summhead = soup.find('h5',string='Summary')
|
||||||
self.setDescription(url,summhead.find_next('div'))
|
self.setDescription(url,summhead.find_next('div'))
|
||||||
|
|
||||||
## author:
|
## author:
|
||||||
|
|
@ -154,42 +244,43 @@ class FictionHuntComSiteAdapter(BaseSiteAdapter):
|
||||||
self.story.setMetadata('authorUrl',autha['href'])
|
self.story.setMetadata('authorUrl',autha['href'])
|
||||||
self.story.setMetadata('author',autha.string)
|
self.story.setMetadata('author',autha.string)
|
||||||
|
|
||||||
|
updlab = soup.find('label',string='Last Updated:')
|
||||||
|
if updlab:
|
||||||
|
update = updlab.find_next('time')['datetime']
|
||||||
|
self.story.setMetadata('dateUpdated', makeDate(update, self.dateformat))
|
||||||
|
|
||||||
|
publab = soup.find('label',string='Published:')
|
||||||
|
if publab:
|
||||||
|
pubdate = publab.find_next('time')['datetime']
|
||||||
|
self.story.setMetadata('datePublished', makeDate(pubdate, self.dateformat))
|
||||||
|
|
||||||
## need author page for some metadata.
|
## need author page for some metadata.
|
||||||
authsoup = None
|
authsoup = None
|
||||||
authpagea = autha
|
authpagea = autha
|
||||||
authstorya = None
|
authstorya = None
|
||||||
|
|
||||||
|
## Rating and exact word count doesn't appear on the summary
|
||||||
|
## page, try to get from author page.
|
||||||
|
|
||||||
## find story url, might need to spin through author's pages.
|
## find story url, might need to spin through author's pages.
|
||||||
while authpagea and not authstorya:
|
while authpagea and not authstorya:
|
||||||
logger.debug(authpagea)
|
|
||||||
authsoup = self.make_soup(self.get_request(authpagea['href']))
|
authsoup = self.make_soup(self.get_request(authpagea['href']))
|
||||||
authpagea = authsoup.find('a',{'class':'page-link','rel':'next'})
|
authpagea = authsoup.find('a',{'rel':'next'})
|
||||||
# CSS selectors don't allow : or / unquoted, which
|
# CSS selectors don't allow : or / unquoted, which
|
||||||
# BS4(and dependencies) didn't used to enforce.
|
# BS4(and dependencies) didn't used to enforce.
|
||||||
authstorya = authsoup.select('h4.Story__item-title a[href="%s"]'%self.url)
|
authstorya = authsoup.select_one('h4.Story__item-title a[href="%s"]'%self.url)
|
||||||
|
|
||||||
if not authstorya:
|
if not authstorya:
|
||||||
raise exceptions.FailedToDownload("Error finding %s on author page(s)" % self.url)
|
raise exceptions.FailedToDownload("Error finding %s on author page(s)" % self.url)
|
||||||
|
|
||||||
meta = authstorya[0].parent.parent.select("div.Story__meta-info")[0]
|
meta = authstorya.find_parent('li').find('div',class_='Story__meta-info')
|
||||||
## remove delimiters
|
|
||||||
for span in authstorya[0].parent.parent.select("div.Story__meta-info span.delimiter"):
|
|
||||||
span.extract()
|
|
||||||
meta.find('span').extract() # discard author link
|
|
||||||
|
|
||||||
update = stripHTML(meta.find('span').extract()).split(':')[1].strip()
|
|
||||||
self.story.setMetadata('dateUpdated', makeDate(update, self.dateformat))
|
|
||||||
|
|
||||||
pubdate = stripHTML(meta.find('span').extract()).split(':')[1].strip()
|
|
||||||
self.story.setMetadata('datePublished', makeDate(pubdate, self.dateformat))
|
|
||||||
|
|
||||||
meta=meta.text.split()
|
meta=meta.text.split()
|
||||||
self.story.setMetadata('numWords',meta[meta.index('words')-1])
|
self.story.setMetadata('numWords',meta[meta.index('words')-1])
|
||||||
self.story.setMetadata('rating',meta[meta.index('Rating:')+1])
|
self.story.setMetadata('rating',meta[meta.index('Rating:')+1])
|
||||||
# logger.debug(meta)
|
# logger.debug(meta)
|
||||||
|
|
||||||
# Find original ffnet URL
|
# Find original ffnet URL
|
||||||
a = soup.find('a', text="Source")
|
a = soup.find('a', string="Source")
|
||||||
self.story.setMetadata('origin',stripHTML(a))
|
self.story.setMetadata('origin',stripHTML(a))
|
||||||
self.story.setMetadata('originUrl',a['href'])
|
self.story.setMetadata('originUrl',a['href'])
|
||||||
|
|
||||||
|
|
@ -208,8 +299,30 @@ class FictionHuntComSiteAdapter(BaseSiteAdapter):
|
||||||
for a in soup.select('a[href*="pairings="]'):
|
for a in soup.select('a[href*="pairings="]'):
|
||||||
self.story.addToList('ships',stripHTML(a).replace("+","/"))
|
self.story.addToList('ships',stripHTML(a).replace("+","/"))
|
||||||
|
|
||||||
for chapa in soup.select('ul.StoryContents__chapters a'):
|
for a in soup.select('div.Story__type a[href*="fandoms="]'):
|
||||||
self.add_chapter(stripHTML(chapa.find('span',{'class':'chapter-title'})),chapa['href'])
|
# logger.debug(a)
|
||||||
|
fandomstr=stripHTML(a).replace(' Fanfiction','').strip()
|
||||||
|
# logger.debug("'%s'"%fandomstr)
|
||||||
|
## haven't thought of a better way to detect and *not*
|
||||||
|
## split on fandoms with a '&' in them.
|
||||||
|
for ampfandom in ampfandoms:
|
||||||
|
if ampfandom in fandomstr:
|
||||||
|
self.story.addToList('category',ampfandom)
|
||||||
|
fandomstr = fandomstr.replace(ampfandom,'')
|
||||||
|
for fandom in fandomstr.split('&'):
|
||||||
|
if fandom:
|
||||||
|
self.story.addToList('category',fandom)
|
||||||
|
|
||||||
|
## Currently no 'Original' stories on the site, but does list
|
||||||
|
## it as a search type. Set extratags: and uncomment this if
|
||||||
|
## and when.
|
||||||
|
# if self.story.getList('category'):
|
||||||
|
# self.story.addToList('category', 'FanFiction')
|
||||||
|
# else:
|
||||||
|
# self.story.addToList('category', 'Original')
|
||||||
|
|
||||||
|
for chapli in soup.select('ul.StoryContents__chapters li'):
|
||||||
|
self.add_chapter(stripHTML(chapli.select_one('span.chapter-title')),chapli.select_one('a')['href'])
|
||||||
|
|
||||||
if self.num_chapters() == 0:
|
if self.num_chapters() == 0:
|
||||||
raise exceptions.FailedToDownload("Story at %s has no chapters." % self.url)
|
raise exceptions.FailedToDownload("Story at %s has no chapters." % self.url)
|
||||||
|
|
|
||||||
|
|
@ -27,6 +27,7 @@
|
||||||
# per-user achivement tracking with fancy achievement-get animations
|
# per-user achivement tracking with fancy achievement-get animations
|
||||||
# story scripting (shows script tags visible in the text, not computed values or input fields)
|
# story scripting (shows script tags visible in the text, not computed values or input fields)
|
||||||
|
|
||||||
|
import re
|
||||||
import json
|
import json
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
|
|
||||||
|
|
@ -35,6 +36,8 @@ import itertools
|
||||||
import logging
|
import logging
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
# __package__ = 'fanficfare.adapters' # fixes dev issues with unknown package base
|
||||||
|
|
||||||
from .base_adapter import BaseSiteAdapter
|
from .base_adapter import BaseSiteAdapter
|
||||||
from ..htmlcleanup import stripHTML
|
from ..htmlcleanup import stripHTML
|
||||||
from .. import exceptions as exceptions
|
from .. import exceptions as exceptions
|
||||||
|
|
@ -52,6 +55,8 @@ class FictionLiveAdapter(BaseSiteAdapter):
|
||||||
self.story_id = self.parsedUrl.path.split('/')[3]
|
self.story_id = self.parsedUrl.path.split('/')[3]
|
||||||
self.story.setMetadata('storyId', self.story_id)
|
self.story.setMetadata('storyId', self.story_id)
|
||||||
|
|
||||||
|
self.chapter_id_to_api = {}
|
||||||
|
|
||||||
# normalize URL. omits title in the url
|
# normalize URL. omits title in the url
|
||||||
self._setURL("https://fiction.live/stories//{s_id}".format(s_id = self.story_id));
|
self._setURL("https://fiction.live/stories//{s_id}".format(s_id = self.story_id));
|
||||||
|
|
||||||
|
|
@ -65,7 +70,7 @@ class FictionLiveAdapter(BaseSiteAdapter):
|
||||||
|
|
||||||
def getSiteURLPattern(self):
|
def getSiteURLPattern(self):
|
||||||
# I'd like to thank regex101.com for helping me screw this up less
|
# I'd like to thank regex101.com for helping me screw this up less
|
||||||
return r"https?://(beta\.)?fiction\.live/[^/]*/[^/]*/([a-zA-Z0-9\-]+)(/(home)?)?"
|
return r"https?://(beta\.)?fiction\.live/[^/]*/[^/]*/([a-zA-Z0-9\-]+)(/(home)?)?$"
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def getSiteExampleURLs(cls):
|
def getSiteExampleURLs(cls):
|
||||||
|
|
@ -74,11 +79,29 @@ class FictionLiveAdapter(BaseSiteAdapter):
|
||||||
+"https://fiction.live/Sci-fi/Example-Story-With-URL-Genre/17CharacterIDhere/ "
|
+"https://fiction.live/Sci-fi/Example-Story-With-URL-Genre/17CharacterIDhere/ "
|
||||||
+"https://fiction.live/stories/Example-Story-With-UUID/00000000-0000-4000-0000-000000000000/")
|
+"https://fiction.live/stories/Example-Story-With-UUID/00000000-0000-4000-0000-000000000000/")
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def get_section_url(cls,url):
|
||||||
|
## minimal URL used for section names in INI and reject list
|
||||||
|
## for comparison
|
||||||
|
# logger.debug("pre--url:%s"%url)
|
||||||
|
url = re.sub(r"https?://(beta\.)?fiction\.live/[^/]*/[^/]*/(?P<id>[a-zA-Z0-9\-]+)(/(home)?)?$",r'https://fiction.live/stories//\g<id>',url)
|
||||||
|
# logger.debug("post-url:%s"%url)
|
||||||
|
return url
|
||||||
|
|
||||||
def parse_timestamp(self, timestamp):
|
def parse_timestamp(self, timestamp):
|
||||||
# fiction.live date format is unix-epoch milliseconds. not a good fit for fanficfare's makeDate.
|
# fiction.live date format is unix-epoch milliseconds. not a good fit for fanficfare's makeDate.
|
||||||
# doesn't use a timezone object and returns tz-naive datetimes. I *think* I can leave the rest to fanficfare
|
# doesn't use a timezone object and returns tz-naive datetimes. I *think* I can leave the rest to fanficfare
|
||||||
return datetime.fromtimestamp(timestamp / 1000.0, None)
|
return datetime.fromtimestamp(timestamp / 1000.0, None)
|
||||||
|
|
||||||
|
def img_url_trans(self,imgurl):
|
||||||
|
"Apparently site changed cdn URLs for images more than once."
|
||||||
|
# logger.debug("pre--imgurl:%s"%imgurl)
|
||||||
|
imgurl = re.sub(r'(\w+)\.cloudfront\.net',r'cdn6.fiction.live/file/fictionlive',imgurl)
|
||||||
|
imgurl = re.sub(r'www\.filepicker\.io/api/file/(\w+)',r'cdn4.fiction.live/fp/\1',imgurl)
|
||||||
|
imgurl = re.sub(r'cdn[34].fiction.live/(.+)',r'cdn6.fiction.live/file/fictionlive/\1',imgurl)
|
||||||
|
# logger.debug("post-imgurl:%s"%imgurl)
|
||||||
|
return imgurl
|
||||||
|
|
||||||
def doExtractChapterUrlsAndMetadata(self, get_cover=True):
|
def doExtractChapterUrlsAndMetadata(self, get_cover=True):
|
||||||
|
|
||||||
metadata_url = "https://fiction.live/api/node/{s_id}/"
|
metadata_url = "https://fiction.live/api/node/{s_id}/"
|
||||||
|
|
@ -150,7 +173,7 @@ class FictionLiveAdapter(BaseSiteAdapter):
|
||||||
|
|
||||||
tags = data['ta'] if 'ta' in data else []
|
tags = data['ta'] if 'ta' in data else []
|
||||||
|
|
||||||
if (self.story.getMetadata('rating') in {"nsfw", "adult"} or 'smut' in tags) and \
|
if (self.story.getMetadataRaw('rating') in {"nsfw", "adult"} or 'smut' in tags) and \
|
||||||
not (self.is_adult or self.getConfig("is_adult")):
|
not (self.is_adult or self.getConfig("is_adult")):
|
||||||
raise exceptions.AdultCheckRequired(self.url)
|
raise exceptions.AdultCheckRequired(self.url)
|
||||||
|
|
||||||
|
|
@ -186,7 +209,6 @@ class FictionLiveAdapter(BaseSiteAdapter):
|
||||||
if show_nsfw_cover_images or not nsfw_cover:
|
if show_nsfw_cover_images or not nsfw_cover:
|
||||||
coverUrl = data['i'][0]
|
coverUrl = data['i'][0]
|
||||||
self.setCoverImage(self.url, coverUrl)
|
self.setCoverImage(self.url, coverUrl)
|
||||||
self.story.setMetadata('cover_image', "<a href=\"" + coverUrl + "\" />") # TODO: is this needed?
|
|
||||||
|
|
||||||
# gonna need these later for adding details to achievement-granting links in the text
|
# gonna need these later for adding details to achievement-granting links in the text
|
||||||
try:
|
try:
|
||||||
|
|
@ -220,6 +242,17 @@ class FictionLiveAdapter(BaseSiteAdapter):
|
||||||
next(b, None)
|
next(b, None)
|
||||||
return list(zip(a, b))
|
return list(zip(a, b))
|
||||||
|
|
||||||
|
def map_chap_ids_to_api(chapter_ids, route_ids, times):
|
||||||
|
for index, bounds in enumerate(times):
|
||||||
|
start, end = bounds
|
||||||
|
end -= 1
|
||||||
|
chapter_url = chunkrange_url.format(s_id = data['_id'], start = start, end = end)
|
||||||
|
self.chapter_id_to_api[chapter_ids[index]] = chapter_url
|
||||||
|
|
||||||
|
for route_id in route_ids:
|
||||||
|
chapter_url = route_chunkrange_url.format(c_id = route_id)
|
||||||
|
self.chapter_id_to_api[route_id] = chapter_url
|
||||||
|
|
||||||
## first thing to do is seperate out the appendices
|
## first thing to do is seperate out the appendices
|
||||||
appendices, maintext, routes = [], [], []
|
appendices, maintext, routes = [], [], []
|
||||||
chapters = data['bm'] if 'bm' in data else []
|
chapters = data['bm'] if 'bm' in data else []
|
||||||
|
|
@ -240,22 +273,25 @@ class FictionLiveAdapter(BaseSiteAdapter):
|
||||||
## main-text chapter extraction processing. *should* now handle all the edge cases.
|
## main-text chapter extraction processing. *should* now handle all the edge cases.
|
||||||
## relies on fanficfare ignoring empty chapters!
|
## relies on fanficfare ignoring empty chapters!
|
||||||
|
|
||||||
titles = [c['title'] for c in maintext]
|
titles = ["Home"] + [c['title'] for c in maintext]
|
||||||
titles = ["Home"] + titles
|
chapter_ids = ['home'] + [c['id'] for c in maintext]
|
||||||
|
times = [data['ct']] + [c['ct'] for c in maintext] + [self.most_recent_chunk + 2] # need to be 1 over, and add_url etc does -1
|
||||||
|
times = pair(times)
|
||||||
|
|
||||||
times = [c['ct'] for c in maintext]
|
if self.getConfig('include_appendices', True): # Add appendices after main text if desired
|
||||||
times = [data['ct']] + times + [self.most_recent_chunk + 2] # need to be 1 over, and add_url etc does -1
|
titles = titles + ["Appendix: " + a['title'][9:] for a in appendices]
|
||||||
|
chapter_ids = chapter_ids + [a['id'] for a in appendices]
|
||||||
|
times = times + [(a['ct'], a['ct'] + 2) for a in appendices]
|
||||||
|
|
||||||
|
route_ids = [r['id'] for r in routes]
|
||||||
|
|
||||||
|
map_chap_ids_to_api(chapter_ids, route_ids, times) # Map chapter ids to API URLs for use when comparing the two
|
||||||
|
|
||||||
# doesn't actually run without the call to list.
|
# doesn't actually run without the call to list.
|
||||||
list(map(add_chapter_url, titles, pair(times)))
|
list(map(add_chapter_url, titles, times))
|
||||||
|
|
||||||
for a in appendices: # add appendices afterwards
|
|
||||||
chapter_start = a['ct']
|
|
||||||
chapter_title = "Appendix: " + a['title'][9:] # 'Appendix: ' rather than '#special' at beginning of name
|
|
||||||
add_chapter_url(chapter_title, (chapter_start, chapter_start + 2)) # 1 msec range = this one chunk only
|
|
||||||
|
|
||||||
for r in routes: # add route at the end, after appendices
|
for r in routes: # add route at the end, after appendices
|
||||||
route_id = r['id'] # to get route chapter content, the route id is needed, not the timestamp
|
route_id = r['id'] # to get route chapter content, the route id is needed, not the timestamp
|
||||||
chapter_title = "Route: " + r['title'] # 'Route: ' at beginning of name, since it's a multiroute chapter
|
chapter_title = "Route: " + r['title'] # 'Route: ' at beginning of name, since it's a multiroute chapter
|
||||||
add_route_chapter_url(chapter_title, route_id)
|
add_route_chapter_url(chapter_title, route_id)
|
||||||
|
|
||||||
|
|
@ -300,7 +336,8 @@ class FictionLiveAdapter(BaseSiteAdapter):
|
||||||
|
|
||||||
text += "</div><br />\n"
|
text += "</div><br />\n"
|
||||||
|
|
||||||
return text
|
## soup to repair the most egregious HTML errors.
|
||||||
|
return self.utf8FromSoup(url,self.make_soup(text))
|
||||||
|
|
||||||
### everything from here out is chunk data handling.
|
### everything from here out is chunk data handling.
|
||||||
|
|
||||||
|
|
@ -317,8 +354,7 @@ class FictionLiveAdapter(BaseSiteAdapter):
|
||||||
if self.achievements:
|
if self.achievements:
|
||||||
soup = self.append_achievments(soup)
|
soup = self.append_achievments(soup)
|
||||||
|
|
||||||
# utf8FromSoup does important processing e.g. sanitization and imageurl extraction
|
return str(soup)
|
||||||
return self.utf8FromSoup(self.url, soup)
|
|
||||||
|
|
||||||
def add_spoiler_legends(self, soup):
|
def add_spoiler_legends(self, soup):
|
||||||
# find spoiler links and change link-anchor block to legend block
|
# find spoiler links and change link-anchor block to legend block
|
||||||
|
|
@ -398,7 +434,7 @@ class FictionLiveAdapter(BaseSiteAdapter):
|
||||||
# so let's just ignore non-int values here
|
# so let's just ignore non-int values here
|
||||||
if not isinstance(v, int):
|
if not isinstance(v, int):
|
||||||
continue
|
continue
|
||||||
if 0 <= v <= len(choices):
|
if 0 <= v < len(choices):
|
||||||
output[v] += 1
|
output[v] += 1
|
||||||
return output
|
return output
|
||||||
|
|
||||||
|
|
@ -482,8 +518,10 @@ class FictionLiveAdapter(BaseSiteAdapter):
|
||||||
# now matches the site and does *not* include dicerolls as posts!
|
# now matches the site and does *not* include dicerolls as posts!
|
||||||
num_votes = str(len(posts)) + " posts" if len(posts) != 0 else "be the first to post."
|
num_votes = str(len(posts)) + " posts" if len(posts) != 0 else "be the first to post."
|
||||||
|
|
||||||
|
posts_title = chunk['b'] if 'b' in chunk else "Reader Posts"
|
||||||
|
|
||||||
output = ""
|
output = ""
|
||||||
output += u"<h4><span>Reader Posts — <small> Posting " + closed
|
output += u"<h4><span>" + posts_title + " — <small> Posting " + closed
|
||||||
output += u" — " + num_votes + "</small></span></h4>\n"
|
output += u" — " + num_votes + "</small></span></h4>\n"
|
||||||
|
|
||||||
## so. a voter can roll with their post. these rolls are in a seperate dict, but have the **same uid**.
|
## so. a voter can roll with their post. these rolls are in a seperate dict, but have the **same uid**.
|
||||||
|
|
@ -509,6 +547,35 @@ class FictionLiveAdapter(BaseSiteAdapter):
|
||||||
|
|
||||||
return output
|
return output
|
||||||
|
|
||||||
|
def normalize_chapterurl(self, url):
|
||||||
|
if url.startswith(r'https://fiction.live/api/anonkun/chapters'):
|
||||||
|
return url
|
||||||
|
|
||||||
|
pattern = None
|
||||||
|
|
||||||
|
if url.startswith(r'https://fiction.live/api/anonkun/route'):
|
||||||
|
pattern = r"https?://(?:beta\.)?fiction\.live/[^/]*/[^/]*/[a-zA-Z0-9]+/routes/([a-zA-Z0-9]+)"
|
||||||
|
elif url.startswith(r'https://fiction.live/'):
|
||||||
|
pattern = r"https?://(?:beta\.)?fiction\.live/[^/]*/[^/]*/[a-zA-Z0-9]+/[^/]*(/[a-zA-Z0-9]+|home)"
|
||||||
|
# regex101 rocks
|
||||||
|
|
||||||
|
if not pattern:
|
||||||
|
return url
|
||||||
|
|
||||||
|
match = re.match(pattern, url)
|
||||||
|
if not match:
|
||||||
|
return url
|
||||||
|
|
||||||
|
chapter_id = match.group(1)
|
||||||
|
|
||||||
|
if chapter_id.startswith('/'):
|
||||||
|
chapter_id = chapter_id[1:]
|
||||||
|
|
||||||
|
if chapter_id and chapter_id in self.chapter_id_to_api:
|
||||||
|
return self.chapter_id_to_api[chapter_id]
|
||||||
|
|
||||||
|
return url
|
||||||
|
|
||||||
def format_unknown(self, chunk):
|
def format_unknown(self, chunk):
|
||||||
raise NotImplementedError("Unknown chunk type ({}) in fiction.live story.".format(chunk))
|
raise NotImplementedError("Unknown chunk type ({}) in fiction.live story.".format(chunk))
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -40,10 +40,6 @@ class FictionManiaTVAdapter(BaseSiteAdapter):
|
||||||
self._setURL(self.READ_TEXT_STORY_URL_TEMPLATE % story_id)
|
self._setURL(self.READ_TEXT_STORY_URL_TEMPLATE % story_id)
|
||||||
self.story.setMetadata('siteabbrev', self.SITE_ABBREVIATION)
|
self.story.setMetadata('siteabbrev', self.SITE_ABBREVIATION)
|
||||||
|
|
||||||
# Always single chapters, probably should use the Anthology feature to
|
|
||||||
# merge chapters of a story
|
|
||||||
self.story.setMetadata('numChapters', 1)
|
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def getSiteDomain():
|
def getSiteDomain():
|
||||||
return FictionManiaTVAdapter.SITE_DOMAIN
|
return FictionManiaTVAdapter.SITE_DOMAIN
|
||||||
|
|
@ -110,7 +106,7 @@ class FictionManiaTVAdapter(BaseSiteAdapter):
|
||||||
self.story.setMetadata('rating', value)
|
self.story.setMetadata('rating', value)
|
||||||
|
|
||||||
elif key == 'Complete':
|
elif key == 'Complete':
|
||||||
self.story.setMetadata('status', 'Completed' if value == 'Complete' else 'In-Progress')
|
self.story.setMetadata('status', 'Completed' if value == 'yes' else 'In-Progress')
|
||||||
|
|
||||||
elif key == 'Categories':
|
elif key == 'Categories':
|
||||||
for element in cells[1]('a'):
|
for element in cells[1]('a'):
|
||||||
|
|
@ -167,14 +163,30 @@ class FictionManiaTVAdapter(BaseSiteAdapter):
|
||||||
# <div style="margin-left:10ex;margin-right:10ex">
|
# <div style="margin-left:10ex;margin-right:10ex">
|
||||||
## fetching SWI version now instead of text.
|
## fetching SWI version now instead of text.
|
||||||
htmlurl = url.replace('readtextstory','readhtmlstory')
|
htmlurl = url.replace('readtextstory','readhtmlstory')
|
||||||
soup = self.make_soup(self.get_request(htmlurl))
|
## Used to find by style, but it's inconsistent now. we've seen:
|
||||||
div = soup.find('div',style="margin-left:10ex;margin-right:10ex")
|
## margin-left:10ex;margin-right:10ex
|
||||||
if div:
|
## margin-right: 5%; margin-left: 5%
|
||||||
return self.utf8FromSoup(htmlurl,div)
|
## margin-left:5%; margin-right:5%
|
||||||
else:
|
## margin-left:5%; margin-right:5%; background: white
|
||||||
|
## And there's some without a <div> tag (or an unclosed div)
|
||||||
|
## Only the comments appear to be consistent.
|
||||||
|
beginmarker='<!--Read or display the file-->'
|
||||||
|
endmarker='''<hr size=1 noshade>
|
||||||
|
<!--review add read, top and bottom-->
|
||||||
|
'''
|
||||||
|
data = self.get_request(htmlurl)
|
||||||
|
try:
|
||||||
|
## if both markers are found, assume whatever is in between
|
||||||
|
## is the chapter text.
|
||||||
|
soup = self.make_soup(data[data.index(beginmarker):data.index(endmarker)])
|
||||||
|
return self.utf8FromSoup(htmlurl,soup)
|
||||||
|
except Exception as e:
|
||||||
|
# logger.debug(e)
|
||||||
|
# logger.debug(soup)
|
||||||
logger.debug("Story With Images(SWI) not found, falling back to HTML.")
|
logger.debug("Story With Images(SWI) not found, falling back to HTML.")
|
||||||
|
|
||||||
## fetching html version now instead of text.
|
## fetching html version now instead of text.
|
||||||
|
## Note that html and SWI pages are *not* formatted the same.
|
||||||
soup = self.make_soup(self.get_request(url.replace('readtextstory','readxstory')))
|
soup = self.make_soup(self.get_request(url.replace('readtextstory','readxstory')))
|
||||||
# logger.debug(soup)
|
# logger.debug(soup)
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -18,6 +18,7 @@
|
||||||
from __future__ import absolute_import
|
from __future__ import absolute_import
|
||||||
import logging
|
import logging
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
import re
|
||||||
|
|
||||||
# py2 vs py3 transition
|
# py2 vs py3 transition
|
||||||
|
|
||||||
|
|
@ -46,6 +47,12 @@ class FictionPressComSiteAdapter(FanFictionNetSiteAdapter):
|
||||||
def _get_site_url_pattern(cls):
|
def _get_site_url_pattern(cls):
|
||||||
return r"https?://(www|m)?\.fictionpress\.com/s/(?P<id>\d+)(/\d+)?(/(?P<title>[^/]+))?/?$"
|
return r"https?://(www|m)?\.fictionpress\.com/s/(?P<id>\d+)(/\d+)?(/(?P<title>[^/]+))?/?$"
|
||||||
|
|
||||||
|
## normalized chapter URLs DO contain the story title now, but
|
||||||
|
## normalized to current urltitle in case of title changes.
|
||||||
|
def normalize_chapterurl(self,url):
|
||||||
|
return re.sub(r"https?://(www|m)\.(?P<keep>fictionpress\.com/s/\d+/\d+/).*",
|
||||||
|
r"https://www.\g<keep>",url)+self.urltitle
|
||||||
|
|
||||||
def getClass():
|
def getClass():
|
||||||
return FictionPressComSiteAdapter
|
return FictionPressComSiteAdapter
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -66,7 +66,8 @@ class FicwadComSiteAdapter(BaseSiteAdapter):
|
||||||
params['username']))
|
params['username']))
|
||||||
d = self.post_request(loginUrl,params,usecache=False)
|
d = self.post_request(loginUrl,params,usecache=False)
|
||||||
|
|
||||||
if "Login attempt failed..." in d:
|
if "Login attempt failed..." in d or \
|
||||||
|
'<div id="error">Please enter your username and password.</div>' in d:
|
||||||
logger.info("Failed to login to URL %s as %s" % (loginUrl,
|
logger.info("Failed to login to URL %s as %s" % (loginUrl,
|
||||||
params['username']))
|
params['username']))
|
||||||
raise exceptions.FailedToLogin(url,params['username'])
|
raise exceptions.FailedToLogin(url,params['username'])
|
||||||
|
|
@ -114,7 +115,7 @@ class FicwadComSiteAdapter(BaseSiteAdapter):
|
||||||
titleh4 = soup.find('div',{'class':'storylist'}).find('h4')
|
titleh4 = soup.find('div',{'class':'storylist'}).find('h4')
|
||||||
self.story.setMetadata('title', stripHTML(titleh4.a))
|
self.story.setMetadata('title', stripHTML(titleh4.a))
|
||||||
|
|
||||||
if 'Deleted story' in self.story.getMetadata('title'):
|
if 'Deleted story' in self.story.getMetadataRaw('title'):
|
||||||
raise exceptions.StoryDoesNotExist("This story was deleted. %s"%self.url)
|
raise exceptions.StoryDoesNotExist("This story was deleted. %s"%self.url)
|
||||||
|
|
||||||
# Find authorid and URL from... author url.
|
# Find authorid and URL from... author url.
|
||||||
|
|
@ -129,14 +130,14 @@ class FicwadComSiteAdapter(BaseSiteAdapter):
|
||||||
#self.story.setMetadata('description', storydiv.find("blockquote",{'class':'summary'}).p.string)
|
#self.story.setMetadata('description', storydiv.find("blockquote",{'class':'summary'}).p.string)
|
||||||
|
|
||||||
# most of the meta data is here:
|
# most of the meta data is here:
|
||||||
metap = storydiv.find("p",{"class":"meta"})
|
metap = storydiv.find("div",{"class":"meta"})
|
||||||
self.story.addToList('category',metap.find("a",href=re.compile(r"^/category/\d+")).string)
|
self.story.addToList('category',metap.find("a",href=re.compile(r"^/category/\d+")).string)
|
||||||
|
|
||||||
# warnings
|
# warnings
|
||||||
# <span class="req"><a href="/help/38" title="Medium Spoilers">[!!] </a> <a href="/help/38" title="Rape/Sexual Violence">[R] </a> <a href="/help/38" title="Violence">[V] </a> <a href="/help/38" title="Child/Underage Sex">[Y] </a></span>
|
# <span class="req"><a href="/help/38" title="Medium Spoilers">[!!] </a> <a href="/help/38" title="Rape/Sexual Violence">[R] </a> <a href="/help/38" title="Violence">[V] </a> <a href="/help/38" title="Child/Underage Sex">[Y] </a></span>
|
||||||
spanreq = metap.find("span",{"class":"story-warnings"})
|
spanreq = metap.find("span",{"class":"story-warnings"})
|
||||||
if spanreq: # can be no warnings.
|
if spanreq: # can be no warnings.
|
||||||
for a in spanreq.findAll("a"):
|
for a in spanreq.find_all("a"):
|
||||||
self.story.addToList('warnings',a['title'])
|
self.story.addToList('warnings',a['title'])
|
||||||
|
|
||||||
## perhaps not the most efficient way to parse this, using
|
## perhaps not the most efficient way to parse this, using
|
||||||
|
|
@ -186,7 +187,7 @@ class FicwadComSiteAdapter(BaseSiteAdapter):
|
||||||
# no list found, so it's a one-chapter story.
|
# no list found, so it's a one-chapter story.
|
||||||
self.add_chapter(self.story.getMetadata('title'),url)
|
self.add_chapter(self.story.getMetadata('title'),url)
|
||||||
else:
|
else:
|
||||||
chapterlistlis = storylistul.findAll('li')
|
chapterlistlis = storylistul.find_all('li')
|
||||||
for chapterli in chapterlistlis:
|
for chapterli in chapterlistlis:
|
||||||
if "blocked" in chapterli['class']:
|
if "blocked" in chapterli['class']:
|
||||||
# paranoia check. We should already be logged in by now.
|
# paranoia check. We should already be logged in by now.
|
||||||
|
|
|
||||||
|
|
@ -99,6 +99,17 @@ class FimFictionNetSiteAdapter(BaseSiteAdapter):
|
||||||
params['username']))
|
params['username']))
|
||||||
raise exceptions.FailedToLogin(url,params['username'])
|
raise exceptions.FailedToLogin(url,params['username'])
|
||||||
|
|
||||||
|
def make_soup(self,data):
|
||||||
|
soup = super(FimFictionNetSiteAdapter, self).make_soup(data)
|
||||||
|
for img in soup.select('img.lazy-img, img.user_image'):
|
||||||
|
## FimF has started a 'camo' mechanism for images that
|
||||||
|
## gets block by CF. attr data-source is original source.
|
||||||
|
if img.has_attr('data-source'):
|
||||||
|
img['src'] = img['data-source']
|
||||||
|
elif img.has_attr('data-src'):
|
||||||
|
img['src'] = img['data-src']
|
||||||
|
return soup
|
||||||
|
|
||||||
def doExtractChapterUrlsAndMetadata(self,get_cover=True):
|
def doExtractChapterUrlsAndMetadata(self,get_cover=True):
|
||||||
|
|
||||||
if self.is_adult or self.getConfig("is_adult"):
|
if self.is_adult or self.getConfig("is_adult"):
|
||||||
|
|
@ -106,7 +117,8 @@ class FimFictionNetSiteAdapter(BaseSiteAdapter):
|
||||||
|
|
||||||
## Only needed with password protected stories, which you have
|
## Only needed with password protected stories, which you have
|
||||||
## to have logged into in the website using this account.
|
## to have logged into in the website using this account.
|
||||||
self.performLogin(self.url)
|
if self.getConfig("always_login"):
|
||||||
|
self.performLogin(self.url)
|
||||||
|
|
||||||
##---------------------------------------------------------------------------------------------------
|
##---------------------------------------------------------------------------------------------------
|
||||||
## Get the story's title page. Check if it exists.
|
## Get the story's title page. Check if it exists.
|
||||||
|
|
@ -139,7 +151,8 @@ class FimFictionNetSiteAdapter(BaseSiteAdapter):
|
||||||
self.story.setMetadata("authorId", author['href'].split('/')[2])
|
self.story.setMetadata("authorId", author['href'].split('/')[2])
|
||||||
self.story.setMetadata("authorUrl", "https://%s/user/%s/%s" % (self.getSiteDomain(),
|
self.story.setMetadata("authorUrl", "https://%s/user/%s/%s" % (self.getSiteDomain(),
|
||||||
self.story.getMetadata('authorId'),
|
self.story.getMetadata('authorId'),
|
||||||
self.story.getMetadata('author')))
|
# meta entry author can be changed by the user.
|
||||||
|
stripHTML(author)))
|
||||||
|
|
||||||
#Rating text is replaced with full words for historical compatibility after the site changed
|
#Rating text is replaced with full words for historical compatibility after the site changed
|
||||||
#on 2014-10-27
|
#on 2014-10-27
|
||||||
|
|
@ -167,12 +180,13 @@ class FimFictionNetSiteAdapter(BaseSiteAdapter):
|
||||||
|
|
||||||
# Cover image
|
# Cover image
|
||||||
if get_cover:
|
if get_cover:
|
||||||
storyImage = storyContentBox.find('img', {'class':'lazy-img'})
|
storyImage = soup.select_one('div.story_container__story_image img')
|
||||||
if storyImage:
|
if storyImage:
|
||||||
coverurl = storyImage['data-fullsize']
|
coverurl = storyImage['data-fullsize']
|
||||||
# try setting from data-fullsize, if fails, try using data-src
|
# try setting from data-fullsize, if fails, try using data-src
|
||||||
if self.setCoverImage(self.url,coverurl)[0] == "failedtoload":
|
cover_set = self.setCoverImage(self.url,coverurl)[0]
|
||||||
coverurl = storyImage['data-src']
|
if not cover_set or cover_set.startswith("failedtoload"):
|
||||||
|
coverurl = storyImage['src']
|
||||||
self.setCoverImage(self.url,coverurl)
|
self.setCoverImage(self.url,coverurl)
|
||||||
|
|
||||||
coverSource = storyImage.parent.find('a', {'class':'source'})
|
coverSource = storyImage.parent.find('a', {'class':'source'})
|
||||||
|
|
@ -284,16 +298,26 @@ class FimFictionNetSiteAdapter(BaseSiteAdapter):
|
||||||
descriptionMeta = soup.find('meta', {'property':'og:description'})
|
descriptionMeta = soup.find('meta', {'property':'og:description'})
|
||||||
self.story.setMetadata("short_description", stripHTML(descriptionMeta['content']))
|
self.story.setMetadata("short_description", stripHTML(descriptionMeta['content']))
|
||||||
|
|
||||||
#groups
|
# groups.
|
||||||
|
# If there are more than X groups, there's a 'Show all' button
|
||||||
|
# that calls for a JSON containing HTML with the full list.
|
||||||
|
# But it doesn't work reliably with FlareSolverr.
|
||||||
|
groupList = None
|
||||||
groupButton = soup.find('button', {'data-click':'showAll'})
|
groupButton = soup.find('button', {'data-click':'showAll'})
|
||||||
if groupButton != None and groupButton.find('i', {'class':'fa-search-plus'}):
|
if groupButton != None and groupButton.find('i', {'class':'fa-search-plus'}):
|
||||||
groupResponse = self.get_request("https://www.fimfiction.net/ajax/stories/%s/groups" % (self.story.getMetadata("storyId")))
|
try:
|
||||||
groupData = json.loads(groupResponse)
|
groupResponse = self.get_request("https://www.fimfiction.net/ajax/stories/%s/groups" % (self.story.getMetadata("storyId")))
|
||||||
groupList = self.make_soup(groupData["content"])
|
groupData = json.loads(groupResponse)
|
||||||
else:
|
groupList = self.make_soup(groupData["content"])
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning("Collecting 'groups' (AKA 'Featured In') from JSON failed:%s"%e)
|
||||||
|
logger.warning("Only 'groups' initially shown on the page will be collected.")
|
||||||
|
logger.warning("This is a known issue with JSON and FlareSolverr. See #1122")
|
||||||
|
|
||||||
|
if not groupList:
|
||||||
groupList = soup.find('ul', {'id':'story-groups-list'})
|
groupList = soup.find('ul', {'id':'story-groups-list'})
|
||||||
|
|
||||||
if not (groupList == None):
|
if groupList:
|
||||||
for groupContent in groupList.find_all('a'):
|
for groupContent in groupList.find_all('a'):
|
||||||
self.story.addToList("groupsUrl", 'https://'+self.host+groupContent["href"])
|
self.story.addToList("groupsUrl", 'https://'+self.host+groupContent["href"])
|
||||||
groupName = groupContent.find('span', {"class":"group-name"})
|
groupName = groupContent.find('span', {"class":"group-name"})
|
||||||
|
|
@ -304,7 +328,7 @@ class FimFictionNetSiteAdapter(BaseSiteAdapter):
|
||||||
|
|
||||||
#sequels
|
#sequels
|
||||||
for header in soup.find_all('h1', {'class':'header-stories'}):
|
for header in soup.find_all('h1', {'class':'header-stories'}):
|
||||||
# I don't know why using text=re.compile with find() wouldn't work, but it didn't.
|
# I don't know why using string=re.compile with find() wouldn't work, but it didn't.
|
||||||
if header.text.startswith('Sequels'):
|
if header.text.startswith('Sequels'):
|
||||||
sequelContainer = header.parent
|
sequelContainer = header.parent
|
||||||
for sequel in sequelContainer.find_all('a', {'class':'story_link'}):
|
for sequel in sequelContainer.find_all('a', {'class':'story_link'}):
|
||||||
|
|
@ -384,3 +408,33 @@ class FimFictionNetSiteAdapter(BaseSiteAdapter):
|
||||||
# data = self.get_request(url)
|
# data = self.get_request(url)
|
||||||
if self.getConfig("is_adult"):
|
if self.getConfig("is_adult"):
|
||||||
self.set_adult_cookie()
|
self.set_adult_cookie()
|
||||||
|
|
||||||
|
def get_urls_from_page(self,url,normalize):
|
||||||
|
iterate = self.getConfig('scrape_bookshelf', default=False)
|
||||||
|
if not re.search(r'fimfiction\.net/bookshelf/(?P<listid>.+?)/',url) or iterate == 'legacy':
|
||||||
|
return super().get_urls_from_page(url,normalize)
|
||||||
|
|
||||||
|
self.before_get_urls_from_page(url,normalize)
|
||||||
|
|
||||||
|
final_urls = list()
|
||||||
|
while True:
|
||||||
|
data = self.get_request(url,usecache=True)
|
||||||
|
soup = self.make_soup(data)
|
||||||
|
paginator = soup.select_one('div.paginator-container > div.page_list > ul').find_all('li')
|
||||||
|
logger.debug("Paginator: " + str(len(paginator)))
|
||||||
|
stories_container = soup.select_one('div.content > div.two-columns > div.left').find_all('article', recursive=False)
|
||||||
|
x = 0
|
||||||
|
logger.debug("Container "+str(len(stories_container)))
|
||||||
|
for story_raw in stories_container:
|
||||||
|
x += 1
|
||||||
|
story_url = story_raw.select_one('div.story_content_box > header.title > div > a.story_name').get('href')
|
||||||
|
url_story = ('https://' + self.getSiteDomain() + story_url)
|
||||||
|
#logger.debug(url_story)
|
||||||
|
final_urls.append(url_story)
|
||||||
|
logger.debug("Discovered %s new stories."%str(x))
|
||||||
|
|
||||||
|
next_button = paginator[-1].select_one('a')
|
||||||
|
logger.debug("Next button: " + next_button.get_text())
|
||||||
|
if next_button.get_text() or not iterate:
|
||||||
|
return {'urllist': final_urls}
|
||||||
|
url = ('https://' + self.getSiteDomain() + next_button.get('href'))
|
||||||
|
|
|
||||||
|
|
@ -93,6 +93,9 @@ class FireFlyFansNetSiteAdapter(BaseSiteAdapter):
|
||||||
|
|
||||||
a = soup.find('a', href=re.compile(r"profileshow.aspx\?u="))
|
a = soup.find('a', href=re.compile(r"profileshow.aspx\?u="))
|
||||||
self.story.setMetadata('authorId', a['href'].split('=')[1])
|
self.story.setMetadata('authorId', a['href'].split('=')[1])
|
||||||
|
if not self.story.getMetadata('authorId'):
|
||||||
|
logger.warning("Site authorUrl missing authorId, using SiteMissingAuthorId")
|
||||||
|
self.story.setMetadata('authorId', 'SiteMissingAuthorId')
|
||||||
self.story.setMetadata('authorUrl', 'http://' +
|
self.story.setMetadata('authorUrl', 'http://' +
|
||||||
self.host + '/' + a['href'])
|
self.host + '/' + a['href'])
|
||||||
self.story.setMetadata('author', a.string)
|
self.story.setMetadata('author', a.string)
|
||||||
|
|
@ -102,7 +105,6 @@ class FireFlyFansNetSiteAdapter(BaseSiteAdapter):
|
||||||
# to download them one at a time yourself. I'm also setting the status to
|
# to download them one at a time yourself. I'm also setting the status to
|
||||||
# complete
|
# complete
|
||||||
self.add_chapter(self.story.getMetadata('title'), self.url)
|
self.add_chapter(self.story.getMetadata('title'), self.url)
|
||||||
self.story.setMetadata('numChapters', 1)
|
|
||||||
self.story.setMetadata('status', 'Completed')
|
self.story.setMetadata('status', 'Completed')
|
||||||
|
|
||||||
## some stories do not have a summary listed, so I'm setting it here.
|
## some stories do not have a summary listed, so I'm setting it here.
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,6 @@
|
||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
# Copyright 2018 FanFicFare team
|
# Copyright 2024 FanFicFare team
|
||||||
#
|
#
|
||||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
# you may not use this file except in compliance with the License.
|
# you may not use this file except in compliance with the License.
|
||||||
|
|
@ -18,15 +18,15 @@
|
||||||
from __future__ import absolute_import
|
from __future__ import absolute_import
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from .base_xenforoforum_adapter import BaseXenForoForumAdapter
|
from .base_xenforo2forum_adapter import BaseXenForo2ForumAdapter
|
||||||
|
|
||||||
def getClass():
|
def getClass():
|
||||||
return QuestionablequestingComAdapter
|
return QuestionablequestingComAdapter
|
||||||
|
|
||||||
class QuestionablequestingComAdapter(BaseXenForoForumAdapter):
|
class QuestionablequestingComAdapter(BaseXenForo2ForumAdapter):
|
||||||
|
|
||||||
def __init__(self, config, url):
|
def __init__(self, config, url):
|
||||||
BaseXenForoForumAdapter.__init__(self, config, url)
|
BaseXenForo2ForumAdapter.__init__(self, config, url)
|
||||||
|
|
||||||
# Each adapter needs to have a unique site abbreviation.
|
# Each adapter needs to have a unique site abbreviation.
|
||||||
self.story.setMetadata('siteabbrev','qq')
|
self.story.setMetadata('siteabbrev','qq')
|
||||||
|
|
|
||||||
|
|
@ -1,169 +0,0 @@
|
||||||
# -*- coding: utf-8 -*-
|
|
||||||
|
|
||||||
# Copyright 2011 Fanficdownloader team, 2018 FanFicFare team
|
|
||||||
#
|
|
||||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
# you may not use this file except in compliance with the License.
|
|
||||||
# You may obtain a copy of the License at
|
|
||||||
#
|
|
||||||
# http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
#
|
|
||||||
# Unless required by applicable law or agreed to in writing, software
|
|
||||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
# See the License for the specific language governing permissions and
|
|
||||||
# limitations under the License.
|
|
||||||
#
|
|
||||||
|
|
||||||
from __future__ import absolute_import
|
|
||||||
import logging
|
|
||||||
logger = logging.getLogger(__name__)
|
|
||||||
import re
|
|
||||||
from ..htmlcleanup import stripHTML
|
|
||||||
from .. import exceptions as exceptions
|
|
||||||
|
|
||||||
# py2 vs py3 transition
|
|
||||||
|
|
||||||
from .base_adapter import BaseSiteAdapter, makeDate
|
|
||||||
|
|
||||||
class HarryPotterFanFictionComSiteAdapter(BaseSiteAdapter):
|
|
||||||
|
|
||||||
def __init__(self, config, url):
|
|
||||||
BaseSiteAdapter.__init__(self, config, url)
|
|
||||||
self.story.setMetadata('siteabbrev','hp')
|
|
||||||
self.is_adult=False
|
|
||||||
|
|
||||||
# get storyId from url--url validation guarantees query is only psid=1234
|
|
||||||
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
|
|
||||||
|
|
||||||
# The date format will vary from site to site.
|
|
||||||
# http://docs.python.org/library/datetime.html#strftime-strptime-behavior
|
|
||||||
self.dateformat = "%Y-%m-%d %H:%M%p"
|
|
||||||
|
|
||||||
# normalized story URL.
|
|
||||||
self._setURL('https://' + self.getSiteDomain() + '/viewstory.php?psid='+self.story.getMetadata('storyId'))
|
|
||||||
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def getSiteDomain():
|
|
||||||
return 'harrypotterfanfiction.com'
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def getSiteExampleURLs(cls):
|
|
||||||
return "https://harrypotterfanfiction.com/viewstory.php?psid=1234"
|
|
||||||
|
|
||||||
def getSiteURLPattern(self):
|
|
||||||
return r"https?"+re.escape("://")+r"(www\.)?"+re.escape("harrypotterfanfiction.com/viewstory.php?psid=")+r"\d+$"
|
|
||||||
|
|
||||||
def extractChapterUrlsAndMetadata(self):
|
|
||||||
|
|
||||||
url = self.url
|
|
||||||
if self.is_adult or self.getConfig("is_adult"):
|
|
||||||
url = url+'&showRestricted'
|
|
||||||
logger.debug("URL: "+url)
|
|
||||||
|
|
||||||
data = self.get_request(url)
|
|
||||||
|
|
||||||
if "This story may contain chapters not appropriate for a general audience." in data and not (self.is_adult or self.getConfig("is_adult")):
|
|
||||||
raise exceptions.AdultCheckRequired(self.url)
|
|
||||||
|
|
||||||
## Don't know if these still apply
|
|
||||||
# if "Access denied. This story has not been validated by the adminstrators of this site." in data:
|
|
||||||
# raise exceptions.AccessDenied(self.getSiteDomain() +" says: Access denied. This story has not been validated by the adminstrators of this site.")
|
|
||||||
# elif "ERROR locating story meta for psid" in data:
|
|
||||||
# raise exceptions.StoryDoesNotExist(self.url)
|
|
||||||
|
|
||||||
soup = self.make_soup(data)
|
|
||||||
|
|
||||||
## Title
|
|
||||||
h2 = soup.find('h2')
|
|
||||||
h2.find('i').extract() # remove author
|
|
||||||
self.story.setMetadata('title',stripHTML(h2))
|
|
||||||
|
|
||||||
# Find authorid and URL from... author url.
|
|
||||||
a = soup.find('a', href=re.compile(r"viewuser.php\?uid=\d+"))
|
|
||||||
self.story.setMetadata('authorId',a['href'].split('=')[1])
|
|
||||||
self.story.setMetadata('authorUrl','https://'+self.host+'/'+a['href'])
|
|
||||||
self.story.setMetadata('author',a.string[3:]) # remove 'by '
|
|
||||||
|
|
||||||
## hpcom doesn't always give us total words--but it does give
|
|
||||||
## us words/chapter. I'd rather add than fetch and parse
|
|
||||||
## another page.
|
|
||||||
chapter_words=0
|
|
||||||
for tr in soup.find('table',{'class':'table-chapters'}).find('tbody').findAll('tr'):
|
|
||||||
tdstr = tr.findAll('td')[2].string
|
|
||||||
chapter = tr.find('a')
|
|
||||||
chpt=re.sub(r'^.*?(\?chapterid=\d+).*?',r'\1',chapter['href'])
|
|
||||||
added = self.add_chapter(chapter,'https://'+self.host+'/viewstory.php'+chpt)
|
|
||||||
if added and tdstr and tdstr.isdigit():
|
|
||||||
chapter_words+=int(tdstr)
|
|
||||||
## used below if total words from site not found
|
|
||||||
|
|
||||||
# fetch author page to get story description.
|
|
||||||
authorsoup = self.make_soup(self.get_request(self.story.getMetadata('authorUrl')))
|
|
||||||
|
|
||||||
for story in authorsoup.find_all('article',class_='story-summary'):
|
|
||||||
storya = story.find('h3').find('a',href=re.compile(r"^/viewstory.php\?psid="+self.story.getMetadata('storyId')))
|
|
||||||
if storya:
|
|
||||||
storydiv = storya.find_parent('div')
|
|
||||||
break
|
|
||||||
|
|
||||||
desc = storydiv.find('div',class_='story-summary__summary')
|
|
||||||
self.setDescription(url,desc)
|
|
||||||
|
|
||||||
# <div class='entry'>
|
|
||||||
# <div class='entry__key'>Rating</div>
|
|
||||||
# <div class='entry__value'>Mature</div>
|
|
||||||
# </div>
|
|
||||||
|
|
||||||
meta_key_map = {
|
|
||||||
'Rating':'rating',
|
|
||||||
'Words':'numWords',
|
|
||||||
'Characters':'characters',
|
|
||||||
'Primary Relationship':'ships',
|
|
||||||
'Secondary Relationship(s)':'ships',
|
|
||||||
'Genre(s)':'genre',
|
|
||||||
'Era':'era',
|
|
||||||
'Advisory':'warnings',
|
|
||||||
'Story Reviews':'reviews',
|
|
||||||
# 'Status':'', # Status is treated special
|
|
||||||
'First Published':'datePublished',
|
|
||||||
'Last Updated':'dateUpdated',
|
|
||||||
}
|
|
||||||
for key in soup.find_all('div',{'class':'entry__key'}):
|
|
||||||
value = stripHTML(key.find_next('div',{'class':'entry__value'}))
|
|
||||||
key = stripHTML(key)
|
|
||||||
meta = meta_key_map.get(key,None)
|
|
||||||
if meta:
|
|
||||||
if meta.startswith('date'):
|
|
||||||
value = makeDate(value,self.dateformat)
|
|
||||||
if meta in ('characters','genre','ships'):
|
|
||||||
self.story.extendList(meta,value.split(','))
|
|
||||||
else:
|
|
||||||
self.story.setMetadata(meta,value)
|
|
||||||
if key == 'Status':
|
|
||||||
if value == 'WIP':
|
|
||||||
value = 'In-Progress'
|
|
||||||
elif value == 'COMPLETED':
|
|
||||||
value = 'Completed'
|
|
||||||
# 'Abandoned' and other possible values used as-is
|
|
||||||
self.story.setMetadata('status',value)
|
|
||||||
|
|
||||||
# older stories don't present total words, use sum from chapters.
|
|
||||||
if not self.story.getMetadata('numWords'):
|
|
||||||
self.story.setMetadata('numWords',chapter_words)
|
|
||||||
|
|
||||||
def getChapterText(self, url):
|
|
||||||
|
|
||||||
logger.debug('Getting chapter text from: %s' % url)
|
|
||||||
|
|
||||||
data = self.get_request(url)
|
|
||||||
soup = self.make_soup(data)
|
|
||||||
div = soup.find('div', {'class' : 'storytext-container'})
|
|
||||||
if None == div:
|
|
||||||
raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url)
|
|
||||||
|
|
||||||
return self.utf8FromSoup(url,div)
|
|
||||||
|
|
||||||
def getClass():
|
|
||||||
return HarryPotterFanFictionComSiteAdapter
|
|
||||||
|
|
@ -1,216 +0,0 @@
|
||||||
# -*- coding: utf-8 -*-
|
|
||||||
|
|
||||||
# Copyright 2012 Fanficdownloader team, 2018 FanFicFare team
|
|
||||||
#
|
|
||||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
# you may not use this file except in compliance with the License.
|
|
||||||
# You may obtain a copy of the License at
|
|
||||||
#
|
|
||||||
# http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
#
|
|
||||||
# Unless required by applicable law or agreed to in writing, software
|
|
||||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
# See the License for the specific language governing permissions and
|
|
||||||
# limitations under the License.
|
|
||||||
#
|
|
||||||
|
|
||||||
# Software: eFiction
|
|
||||||
from __future__ import absolute_import
|
|
||||||
import logging
|
|
||||||
logger = logging.getLogger(__name__)
|
|
||||||
import re
|
|
||||||
from ..htmlcleanup import stripHTML
|
|
||||||
from .. import exceptions as exceptions
|
|
||||||
|
|
||||||
# py2 vs py3 transition
|
|
||||||
from ..six import text_type as unicode
|
|
||||||
|
|
||||||
from .base_adapter import BaseSiteAdapter, makeDate
|
|
||||||
|
|
||||||
def getClass():
|
|
||||||
return HLFictionNetAdapter
|
|
||||||
|
|
||||||
# Class name has to be unique. Our convention is camel case the
|
|
||||||
# sitename with Adapter at the end. www is skipped.
|
|
||||||
class HLFictionNetAdapter(BaseSiteAdapter):
|
|
||||||
|
|
||||||
def __init__(self, config, url):
|
|
||||||
BaseSiteAdapter.__init__(self, config, url)
|
|
||||||
|
|
||||||
self.username = "NoneGiven" # if left empty, site doesn't return any message at all.
|
|
||||||
self.password = ""
|
|
||||||
self.is_adult=False
|
|
||||||
|
|
||||||
# get storyId from url--url validation guarantees query is only sid=1234
|
|
||||||
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
|
|
||||||
|
|
||||||
|
|
||||||
# normalized story URL.
|
|
||||||
self._setURL('https://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId'))
|
|
||||||
|
|
||||||
# Each adapter needs to have a unique site abbreviation.
|
|
||||||
self.story.setMetadata('siteabbrev','hlf')
|
|
||||||
|
|
||||||
# The date format will vary from site to site.
|
|
||||||
# http://docs.python.org/library/datetime.html#strftime-strptime-behavior
|
|
||||||
self.dateformat = "%m/%d/%y"
|
|
||||||
|
|
||||||
@staticmethod # must be @staticmethod, don't remove it.
|
|
||||||
def getSiteDomain():
|
|
||||||
# The site domain. Does have www here, if it uses it.
|
|
||||||
return 'hlfiction.net'
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def getSiteExampleURLs(cls):
|
|
||||||
return "https://"+cls.getSiteDomain()+"/viewstory.php?sid=1234"
|
|
||||||
|
|
||||||
def getSiteURLPattern(self):
|
|
||||||
return r"https?://"+re.escape(self.getSiteDomain()+"/viewstory.php?sid=")+r"\d+$"
|
|
||||||
|
|
||||||
## Getting the chapter list and the meta data, plus 'is adult' checking.
|
|
||||||
def extractChapterUrlsAndMetadata(self):
|
|
||||||
|
|
||||||
# index=1 makes sure we see the story chapter index. Some
|
|
||||||
# sites skip that for one-chapter stories.
|
|
||||||
url = self.url
|
|
||||||
logger.debug("URL: "+url)
|
|
||||||
|
|
||||||
data = self.get_request(url)
|
|
||||||
|
|
||||||
if "Access denied. This story has not been validated by the adminstrators of this site." in data:
|
|
||||||
raise exceptions.AccessDenied(self.getSiteDomain() +" says: Access denied. This story has not been validated by the adminstrators of this site.")
|
|
||||||
|
|
||||||
soup = self.make_soup(data)
|
|
||||||
# print data
|
|
||||||
|
|
||||||
|
|
||||||
## Title and author
|
|
||||||
a = soup.find('div', {'id' : 'pagetitle'})
|
|
||||||
|
|
||||||
aut = a.find('a', href=re.compile(r"viewuser.php\?uid=\d+"))
|
|
||||||
self.story.setMetadata('authorId',aut['href'].split('=')[1])
|
|
||||||
self.story.setMetadata('authorUrl','https://'+self.host+'/'+aut['href'])
|
|
||||||
self.story.setMetadata('author',aut.string)
|
|
||||||
aut.extract()
|
|
||||||
|
|
||||||
self.story.setMetadata('title',stripHTML(a)[:(len(a.string)-3)])
|
|
||||||
|
|
||||||
# Find the chapters:
|
|
||||||
chapters=soup.find('select')
|
|
||||||
if chapters != None:
|
|
||||||
for chapter in chapters.findAll('option'):
|
|
||||||
# just in case there's tags, like <i> in chapter titles.
|
|
||||||
self.add_chapter(chapter,'https://'+self.host+'/viewstory.php?sid='+self.story.getMetadata('storyId')+'&chapter='+chapter['value'])
|
|
||||||
else:
|
|
||||||
self.add_chapter(self.story.getMetadata('title'),url)
|
|
||||||
|
|
||||||
|
|
||||||
asoup = self.make_soup(self.get_request(self.story.getMetadata('authorUrl')))
|
|
||||||
|
|
||||||
for list in asoup.findAll('div', {'class' : re.compile('listbox')}):
|
|
||||||
a = list.find('a')
|
|
||||||
if ('viewstory.php?sid='+self.story.getMetadata('storyId')) in a['href']:
|
|
||||||
break
|
|
||||||
|
|
||||||
# eFiction sites don't help us out a lot with their meta data
|
|
||||||
# formating, so it's a little ugly.
|
|
||||||
|
|
||||||
# utility method
|
|
||||||
def defaultGetattr(d,k):
|
|
||||||
try:
|
|
||||||
return d[k]
|
|
||||||
except:
|
|
||||||
return ""
|
|
||||||
|
|
||||||
# <span class="label">Rated:</span> NC-17<br /> etc
|
|
||||||
labels = list.findAll('span', {'class' : 'classification'})
|
|
||||||
for labelspan in labels:
|
|
||||||
label = labelspan.string
|
|
||||||
value = labelspan.nextSibling
|
|
||||||
|
|
||||||
if 'Summary' in label:
|
|
||||||
## Everything until the next span class='label'
|
|
||||||
svalue = ""
|
|
||||||
while 'classification' not in defaultGetattr(value,'class'):
|
|
||||||
svalue += unicode(value)
|
|
||||||
value = value.nextSibling
|
|
||||||
self.setDescription(url,svalue)
|
|
||||||
#self.story.setMetadata('description',stripHTML(svalue))
|
|
||||||
|
|
||||||
if 'Rated' in label:
|
|
||||||
self.story.setMetadata('rating', value[:len(value)-2])
|
|
||||||
|
|
||||||
if 'Word count' in label:
|
|
||||||
self.story.setMetadata('numWords', value)
|
|
||||||
|
|
||||||
if 'Categories' in label:
|
|
||||||
cats = labelspan.parent.findAll('a',href=re.compile(r'categories.php\?catid=\d+'))
|
|
||||||
for cat in cats:
|
|
||||||
self.story.addToList('category',cat.string)
|
|
||||||
|
|
||||||
if 'Characters' in label:
|
|
||||||
for char in value.string.split(', '):
|
|
||||||
if not 'None' in char:
|
|
||||||
self.story.addToList('characters',char)
|
|
||||||
|
|
||||||
if 'Genre' in label:
|
|
||||||
for genre in value.string.split(', '):
|
|
||||||
if not 'None' in genre:
|
|
||||||
self.story.addToList('genre',genre)
|
|
||||||
|
|
||||||
if 'Warnings' in label:
|
|
||||||
for warning in value.string.split(', '):
|
|
||||||
if not 'None' in warning:
|
|
||||||
self.story.addToList('warnings',warning)
|
|
||||||
|
|
||||||
if 'Completed' in label:
|
|
||||||
if 'Yes' in value:
|
|
||||||
self.story.setMetadata('status', 'Completed')
|
|
||||||
else:
|
|
||||||
self.story.setMetadata('status', 'In-Progress')
|
|
||||||
|
|
||||||
if 'Published' in label:
|
|
||||||
self.story.setMetadata('datePublished', makeDate(stripHTML(value), self.dateformat))
|
|
||||||
|
|
||||||
if 'Updated' in label:
|
|
||||||
# there's a stray [ at the end.
|
|
||||||
#value = value[0:-1]
|
|
||||||
self.story.setMetadata('dateUpdated', makeDate(stripHTML(value), self.dateformat))
|
|
||||||
|
|
||||||
try:
|
|
||||||
# Find Series name from series URL.
|
|
||||||
a = list.find('a', href=re.compile(r"series.php\?seriesid=\d+"))
|
|
||||||
series_name = a.string
|
|
||||||
series_url = 'https://'+self.host+'/'+a['href']
|
|
||||||
|
|
||||||
seriessoup = self.make_soup(self.get_request(series_url))
|
|
||||||
# can't use ^viewstory...$ in case of higher rated stories with javascript href.
|
|
||||||
storyas = seriessoup.findAll('a', href=re.compile(r'viewstory.php\?sid=\d+'))
|
|
||||||
i=1
|
|
||||||
for a in storyas:
|
|
||||||
# skip 'report this' and 'TOC' links
|
|
||||||
if 'contact.php' not in a['href'] and 'index' not in a['href']:
|
|
||||||
if ('viewstory.php?sid='+self.story.getMetadata('storyId')) in a['href']:
|
|
||||||
self.setSeries(series_name, i)
|
|
||||||
self.story.setMetadata('seriesUrl',series_url)
|
|
||||||
break
|
|
||||||
i+=1
|
|
||||||
|
|
||||||
except:
|
|
||||||
# I find it hard to care if the series parsing fails
|
|
||||||
pass
|
|
||||||
|
|
||||||
# grab the text for an individual chapter.
|
|
||||||
def getChapterText(self, url):
|
|
||||||
|
|
||||||
logger.debug('Getting chapter text from: %s' % url)
|
|
||||||
|
|
||||||
soup = self.make_soup(self.get_request(url))
|
|
||||||
|
|
||||||
div = soup.find('div', {'id' : 'story'})
|
|
||||||
|
|
||||||
if None == div:
|
|
||||||
raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url)
|
|
||||||
|
|
||||||
return self.utf8FromSoup(url,div)
|
|
||||||
|
|
@ -1,215 +0,0 @@
|
||||||
# -*- coding: utf-8 -*-
|
|
||||||
|
|
||||||
# Copyright 2012 Fanficdownloader team, 2018 FanFicFare team
|
|
||||||
#
|
|
||||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
# you may not use this file except in compliance with the License.
|
|
||||||
# You may obtain a copy of the License at
|
|
||||||
#
|
|
||||||
# http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
#
|
|
||||||
# Unless required by applicable law or agreed to in writing, software
|
|
||||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
# See the License for the specific language governing permissions and
|
|
||||||
# limitations under the License.
|
|
||||||
#
|
|
||||||
|
|
||||||
# Software: eFiction
|
|
||||||
from __future__ import absolute_import
|
|
||||||
import logging
|
|
||||||
logger = logging.getLogger(__name__)
|
|
||||||
import re
|
|
||||||
from bs4.element import Comment
|
|
||||||
from ..htmlcleanup import stripHTML
|
|
||||||
from .. import exceptions as exceptions
|
|
||||||
|
|
||||||
# py2 vs py3 transition
|
|
||||||
from ..six import text_type as unicode
|
|
||||||
|
|
||||||
from .base_adapter import BaseSiteAdapter, makeDate
|
|
||||||
|
|
||||||
def getClass():
|
|
||||||
return HPFanficArchiveComAdapter
|
|
||||||
|
|
||||||
# Class name has to be unique. Our convention is camel case the
|
|
||||||
# sitename with Adapter at the end. www is skipped.
|
|
||||||
class HPFanficArchiveComAdapter(BaseSiteAdapter):
|
|
||||||
|
|
||||||
def __init__(self, config, url):
|
|
||||||
BaseSiteAdapter.__init__(self, config, url)
|
|
||||||
|
|
||||||
self.username = "NoneGiven" # if left empty, site doesn't return any message at all.
|
|
||||||
self.password = ""
|
|
||||||
self.is_adult=False
|
|
||||||
|
|
||||||
# get storyId from url--url validation guarantees query is only sid=1234
|
|
||||||
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
|
|
||||||
|
|
||||||
# normalized story URL.
|
|
||||||
self._setURL( self.getProtocol() + self.getSiteDomain() + '/stories/viewstory.php?sid='+self.story.getMetadata('storyId'))
|
|
||||||
|
|
||||||
# Each adapter needs to have a unique site abbreviation.
|
|
||||||
self.story.setMetadata('siteabbrev','hpffa')
|
|
||||||
|
|
||||||
# The date format will vary from site to site.
|
|
||||||
# http://docs.python.org/library/datetime.html#strftime-strptime-behavior
|
|
||||||
self.dateformat = "%B %d, %Y"
|
|
||||||
|
|
||||||
@staticmethod # must be @staticmethod, don't remove it.
|
|
||||||
def getSiteDomain():
|
|
||||||
# The site domain. Does have www here, if it uses it.
|
|
||||||
return 'hpfanficarchive.com'
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def getProtocol(cls):
|
|
||||||
# has changed from http to https to http again.
|
|
||||||
return "http://"
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def getSiteExampleURLs(cls):
|
|
||||||
return cls.getProtocol()+cls.getSiteDomain()+"/stories/viewstory.php?sid=1234"
|
|
||||||
|
|
||||||
def getSiteURLPattern(self):
|
|
||||||
return r"https?:"+re.escape("//"+self.getSiteDomain()+"/stories/viewstory.php?sid=")+r"\d+$"
|
|
||||||
|
|
||||||
## Getting the chapter list and the meta data, plus 'is adult' checking.
|
|
||||||
def extractChapterUrlsAndMetadata(self):
|
|
||||||
|
|
||||||
# index=1 makes sure we see the story chapter index. Some
|
|
||||||
# sites skip that for one-chapter stories.
|
|
||||||
url = self.url
|
|
||||||
logger.debug("URL: "+url)
|
|
||||||
|
|
||||||
data = self.get_request(url)
|
|
||||||
|
|
||||||
if "Access denied. This story has not been validated by the adminstrators of this site." in data:
|
|
||||||
raise exceptions.AccessDenied(self.getSiteDomain() +" says: Access denied. This story has not been validated by the adminstrators of this site.")
|
|
||||||
elif "That story either does not exist on this archive or has not been validated by the adminstrators of this site." in data:
|
|
||||||
raise exceptions.AccessDenied(self.getSiteDomain() +" says: That story either does not exist on this archive or has not been validated by the adminstrators of this site.")
|
|
||||||
|
|
||||||
soup = self.make_soup(data)
|
|
||||||
# print data
|
|
||||||
|
|
||||||
|
|
||||||
## Title
|
|
||||||
a = soup.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$"))
|
|
||||||
self.story.setMetadata('title',stripHTML(a))
|
|
||||||
|
|
||||||
# Find authorid and URL from... author url.
|
|
||||||
a = soup.find('div', id="mainpage").find('a', href=re.compile(r"viewuser.php\?uid=\d+"))
|
|
||||||
self.story.setMetadata('authorId',a['href'].split('=')[1])
|
|
||||||
self.story.setMetadata('authorUrl',self.getProtocol()+self.host+'/stories/'+a['href'])
|
|
||||||
self.story.setMetadata('author',a.string)
|
|
||||||
|
|
||||||
# Find the chapters:
|
|
||||||
for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+r"&chapter=\d+$")):
|
|
||||||
# just in case there's tags, like <i> in chapter titles.
|
|
||||||
self.add_chapter(chapter,self.getProtocol()+self.host+'/stories/'+chapter['href'])
|
|
||||||
|
|
||||||
|
|
||||||
# eFiction sites don't help us out a lot with their meta data
|
|
||||||
# formating, so it's a little ugly.
|
|
||||||
|
|
||||||
# utility method
|
|
||||||
def defaultGetattr(d,k):
|
|
||||||
try:
|
|
||||||
return d[k]
|
|
||||||
except:
|
|
||||||
return ""
|
|
||||||
|
|
||||||
# <span class="label">Rated:</span> NC-17<br /> etc
|
|
||||||
labels = soup.findAll('span',{'class':'label'})
|
|
||||||
for labelspan in labels:
|
|
||||||
val = labelspan.nextSibling
|
|
||||||
value = unicode('')
|
|
||||||
while val and not 'label' in defaultGetattr(val,'class'):
|
|
||||||
# print("val:%s"%val)
|
|
||||||
if not isinstance(val,Comment):
|
|
||||||
value += unicode(val)
|
|
||||||
val = val.nextSibling
|
|
||||||
label = labelspan.string
|
|
||||||
# print("label:%s\nvalue:%s"%(label,value))
|
|
||||||
|
|
||||||
if 'Summary' in label:
|
|
||||||
self.setDescription(url,value)
|
|
||||||
|
|
||||||
if 'Rated' in label:
|
|
||||||
self.story.setMetadata('rating', stripHTML(value))
|
|
||||||
|
|
||||||
if 'Word count' in label:
|
|
||||||
self.story.setMetadata('numWords', stripHTML(value))
|
|
||||||
|
|
||||||
if 'Categories' in label:
|
|
||||||
cats = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=categories'))
|
|
||||||
for cat in cats:
|
|
||||||
self.story.addToList('category',cat.string)
|
|
||||||
|
|
||||||
if 'Characters' in label:
|
|
||||||
chars = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=characters'))
|
|
||||||
for char in chars:
|
|
||||||
self.story.addToList('characters',char.string)
|
|
||||||
|
|
||||||
if 'Genre' in label:
|
|
||||||
genres = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=1')) # XXX
|
|
||||||
for genre in genres:
|
|
||||||
self.story.addToList('genre',genre.string)
|
|
||||||
|
|
||||||
if 'Pairing' in label:
|
|
||||||
ships = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=4'))
|
|
||||||
for ship in ships:
|
|
||||||
self.story.addToList('ships',ship.string)
|
|
||||||
|
|
||||||
if 'Warnings' in label:
|
|
||||||
warnings = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=2')) # XXX
|
|
||||||
for warning in warnings:
|
|
||||||
self.story.addToList('warnings',warning.string)
|
|
||||||
|
|
||||||
if 'Completed' in label:
|
|
||||||
if 'Yes' in stripHTML(value):
|
|
||||||
self.story.setMetadata('status', 'Completed')
|
|
||||||
else:
|
|
||||||
self.story.setMetadata('status', 'In-Progress')
|
|
||||||
|
|
||||||
if 'Published' in label:
|
|
||||||
self.story.setMetadata('datePublished', makeDate(stripHTML(value), self.dateformat))
|
|
||||||
|
|
||||||
if 'Updated' in label:
|
|
||||||
self.story.setMetadata('dateUpdated', makeDate(stripHTML(value), self.dateformat))
|
|
||||||
|
|
||||||
try:
|
|
||||||
# Find Series name from series URL.
|
|
||||||
a = soup.find('a', href=re.compile(r"viewseries.php\?seriesid=\d+"))
|
|
||||||
series_name = a.string
|
|
||||||
series_url = self.getProtocol()+self.host+'/stories/'+a['href']
|
|
||||||
|
|
||||||
seriessoup = self.make_soup(self.get_request(series_url))
|
|
||||||
# can't use ^viewstory...$ in case of higher rated stories with javascript href.
|
|
||||||
storyas = seriessoup.findAll('a', href=re.compile(r'viewstory.php\?sid=\d+'))
|
|
||||||
i=1
|
|
||||||
for a in storyas:
|
|
||||||
# skip 'report this' and 'TOC' links
|
|
||||||
if 'contact.php' not in a['href'] and 'index' not in a['href']:
|
|
||||||
if a['href'] == ('viewstory.php?sid='+self.story.getMetadata('storyId')):
|
|
||||||
self.setSeries(series_name, i)
|
|
||||||
self.story.setMetadata('seriesUrl',series_url)
|
|
||||||
break
|
|
||||||
i+=1
|
|
||||||
|
|
||||||
except:
|
|
||||||
# I find it hard to care if the series parsing fails
|
|
||||||
pass
|
|
||||||
|
|
||||||
# grab the text for an individual chapter.
|
|
||||||
def getChapterText(self, url):
|
|
||||||
|
|
||||||
logger.debug('Getting chapter text from: %s' % url)
|
|
||||||
|
|
||||||
soup = self.make_soup(self.get_request(url))
|
|
||||||
|
|
||||||
div = soup.find('div', {'id' : 'story'})
|
|
||||||
|
|
||||||
if None == div:
|
|
||||||
raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url)
|
|
||||||
|
|
||||||
return self.utf8FromSoup(url,div)
|
|
||||||
|
|
@ -1,262 +0,0 @@
|
||||||
# -*- coding: utf-8 -*-
|
|
||||||
|
|
||||||
# Copyright 2011 Fanficdownloader team, 2018 FanFicFare team
|
|
||||||
#
|
|
||||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
# you may not use this file except in compliance with the License.
|
|
||||||
# You may obtain a copy of the License at
|
|
||||||
#
|
|
||||||
# http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
#
|
|
||||||
# Unless required by applicable law or agreed to in writing, software
|
|
||||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
# See the License for the specific language governing permissions and
|
|
||||||
# limitations under the License.
|
|
||||||
#
|
|
||||||
|
|
||||||
# Software: eFiction
|
|
||||||
from __future__ import absolute_import
|
|
||||||
import logging
|
|
||||||
logger = logging.getLogger(__name__)
|
|
||||||
import re
|
|
||||||
from ..htmlcleanup import stripHTML
|
|
||||||
from .. import exceptions as exceptions
|
|
||||||
|
|
||||||
# py2 vs py3 transition
|
|
||||||
from ..six import text_type as unicode
|
|
||||||
|
|
||||||
from .base_adapter import BaseSiteAdapter, makeDate
|
|
||||||
|
|
||||||
def getClass():
|
|
||||||
return IkEternalNetAdapter
|
|
||||||
|
|
||||||
# Class name has to be unique. Our convention is camel case the
|
|
||||||
# sitename with Adapter at the end. www is skipped.
|
|
||||||
class IkEternalNetAdapter(BaseSiteAdapter):
|
|
||||||
|
|
||||||
def __init__(self, config, url):
|
|
||||||
BaseSiteAdapter.__init__(self, config, url)
|
|
||||||
|
|
||||||
self.username = "NoneGiven" # if left empty, site doesn't return any message at all.
|
|
||||||
self.password = ""
|
|
||||||
self.is_adult=False
|
|
||||||
|
|
||||||
# get storyId from url--url validation guarantees query is only sid=1234
|
|
||||||
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
|
|
||||||
|
|
||||||
|
|
||||||
# normalized story URL.
|
|
||||||
self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId'))
|
|
||||||
|
|
||||||
# Each adapter needs to have a unique site abbreviation.
|
|
||||||
self.story.setMetadata('siteabbrev','ike')
|
|
||||||
|
|
||||||
# The date format will vary from site to site.
|
|
||||||
# http://docs.python.org/library/datetime.html#strftime-strptime-behavior
|
|
||||||
self.dateformat = "%B %d, %Y"
|
|
||||||
|
|
||||||
@staticmethod # must be @staticmethod, don't remove it.
|
|
||||||
def getSiteDomain():
|
|
||||||
# The site domain. Does have www here, if it uses it.
|
|
||||||
return 'www.ik-eternal.net'
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def getSiteExampleURLs(cls):
|
|
||||||
return "http://"+cls.getSiteDomain()+"/viewstory.php?sid=1234"
|
|
||||||
|
|
||||||
def getSiteURLPattern(self):
|
|
||||||
return re.escape("http://"+self.getSiteDomain()+"/viewstory.php?sid=")+r"\d+$"
|
|
||||||
|
|
||||||
## Login seems to be reasonably standard across eFiction sites.
|
|
||||||
def needToLoginCheck(self, data):
|
|
||||||
if 'Registered Users Only' in data \
|
|
||||||
or 'There is no such account on our website' in data \
|
|
||||||
or "That password doesn't match the one in our database" in data:
|
|
||||||
return True
|
|
||||||
else:
|
|
||||||
return False
|
|
||||||
|
|
||||||
def performLogin(self, url):
|
|
||||||
params = {}
|
|
||||||
|
|
||||||
if self.password:
|
|
||||||
params['penname'] = self.username
|
|
||||||
params['password'] = self.password
|
|
||||||
else:
|
|
||||||
params['penname'] = self.getConfig("username")
|
|
||||||
params['password'] = self.getConfig("password")
|
|
||||||
params['cookiecheck'] = '1'
|
|
||||||
params['submit'] = 'Submit'
|
|
||||||
|
|
||||||
loginUrl = 'http://' + self.getSiteDomain() + '/user.php?action=login'
|
|
||||||
logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
|
|
||||||
params['penname']))
|
|
||||||
|
|
||||||
d = self.post_request(loginUrl, params)
|
|
||||||
|
|
||||||
if "Member Account" not in d : #Member Account
|
|
||||||
logger.info("Failed to login to URL %s as %s" % (loginUrl,
|
|
||||||
params['penname']))
|
|
||||||
raise exceptions.FailedToLogin(url,params['penname'])
|
|
||||||
return False
|
|
||||||
else:
|
|
||||||
return True
|
|
||||||
|
|
||||||
## Getting the chapter list and the meta data, plus 'is adult' checking.
|
|
||||||
def extractChapterUrlsAndMetadata(self):
|
|
||||||
|
|
||||||
if self.is_adult or self.getConfig("is_adult"):
|
|
||||||
# Weirdly, different sites use different warning numbers.
|
|
||||||
# If the title search below fails, there's a good chance
|
|
||||||
# you need a different number. print data at that point
|
|
||||||
# and see what the 'click here to continue' url says.
|
|
||||||
addurl = "&warning=1"
|
|
||||||
else:
|
|
||||||
addurl=""
|
|
||||||
|
|
||||||
# index=1 makes sure we see the story chapter index. Some
|
|
||||||
# sites skip that for one-chapter stories.
|
|
||||||
url = self.url+'&index=1'+addurl
|
|
||||||
logger.debug("URL: "+url)
|
|
||||||
|
|
||||||
data = self.get_request(url)
|
|
||||||
|
|
||||||
if self.needToLoginCheck(data):
|
|
||||||
# need to log in for this one.
|
|
||||||
self.performLogin(url)
|
|
||||||
data = self.get_request(url)
|
|
||||||
|
|
||||||
# The actual text that is used to announce you need to be an
|
|
||||||
# adult varies from site to site. Again, print data before
|
|
||||||
# the title search to troubleshoot.
|
|
||||||
|
|
||||||
# Since the warning text can change by warning level, let's
|
|
||||||
# look for the warning pass url. ksarchive uses
|
|
||||||
# &warning= -- actually, so do other sites. Must be an
|
|
||||||
# eFiction book.
|
|
||||||
|
|
||||||
# viewstory.php?sid=1882&warning=4
|
|
||||||
# viewstory.php?sid=1654&ageconsent=ok&warning=5
|
|
||||||
#print data
|
|
||||||
#m = re.search(r"'viewstory.php\?sid=1882(&warning=4)'",data)
|
|
||||||
m = re.search(r"'viewstory.php\?sid=\d+((?:&ageconsent=ok)?&warning=\d+)'",data)
|
|
||||||
if m != None:
|
|
||||||
if self.is_adult or self.getConfig("is_adult"):
|
|
||||||
# We tried the default and still got a warning, so
|
|
||||||
# let's pull the warning number from the 'continue'
|
|
||||||
# link and reload data.
|
|
||||||
addurl = m.group(1)
|
|
||||||
# correct stupid & error in url.
|
|
||||||
addurl = addurl.replace("&","&")
|
|
||||||
url = self.url+'&index=1'+addurl
|
|
||||||
logger.debug("URL 2nd try: "+url)
|
|
||||||
|
|
||||||
data = self.get_request(url)
|
|
||||||
else:
|
|
||||||
raise exceptions.AdultCheckRequired(self.url)
|
|
||||||
|
|
||||||
if "Access denied. This story has not been validated by the adminstrators of this site." in data:
|
|
||||||
raise exceptions.AccessDenied(self.getSiteDomain() +" says: Access denied. This story has not been validated by the adminstrators of this site.")
|
|
||||||
|
|
||||||
soup = self.make_soup(data)
|
|
||||||
# print data
|
|
||||||
|
|
||||||
|
|
||||||
## Title
|
|
||||||
a = soup.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$"))
|
|
||||||
self.story.setMetadata('title',stripHTML(a))
|
|
||||||
|
|
||||||
# Find authorid and URL from... author url.
|
|
||||||
a = soup.find('a', href=re.compile(r"viewuser.php\?uid=\d+"))
|
|
||||||
self.story.setMetadata('authorId',a['href'].split('=')[1])
|
|
||||||
self.story.setMetadata('authorUrl','http://'+self.host+'/'+a['href'])
|
|
||||||
self.story.setMetadata('author',a.string)
|
|
||||||
|
|
||||||
# Find the chapters:
|
|
||||||
for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+r"&chapter=\d+$")):
|
|
||||||
# just in case there's tags, like <i> in chapter titles.
|
|
||||||
self.add_chapter(chapter,'http://'+self.host+'/'+chapter['href']+addurl)
|
|
||||||
|
|
||||||
|
|
||||||
# eFiction sites don't help us out a lot with their meta data
|
|
||||||
# formating, so it's a little ugly.
|
|
||||||
|
|
||||||
# utility method
|
|
||||||
def defaultGetattr(d,k):
|
|
||||||
try:
|
|
||||||
return d[k]
|
|
||||||
except:
|
|
||||||
return ""
|
|
||||||
|
|
||||||
# <span class="label">Rated:</span> NC-17<br /> etc
|
|
||||||
asoup = soup.find('div', {'class': 'listbox'})
|
|
||||||
for a in asoup.findAll('p'):
|
|
||||||
a.name='br'
|
|
||||||
labels = asoup.findAll('span',{'class':'label'})
|
|
||||||
for labelspan in labels:
|
|
||||||
value = labelspan.nextSibling
|
|
||||||
label = labelspan.string
|
|
||||||
|
|
||||||
if 'Summary' in label:
|
|
||||||
## Everything until the next span class='label'
|
|
||||||
svalue = ""
|
|
||||||
while 'label' not in defaultGetattr(value,'class'):
|
|
||||||
svalue += unicode(value)
|
|
||||||
value = value.nextSibling
|
|
||||||
self.setDescription(url,svalue)
|
|
||||||
#self.story.setMetadata('description',stripHTML(svalue))
|
|
||||||
|
|
||||||
if 'Rated' in label:
|
|
||||||
self.story.setMetadata('rating', value)
|
|
||||||
|
|
||||||
if 'Word count' in label:
|
|
||||||
self.story.setMetadata('numWords', value)
|
|
||||||
|
|
||||||
if 'Categories' in label:
|
|
||||||
cats = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=categories'))
|
|
||||||
for cat in cats:
|
|
||||||
self.story.addToList('category',cat.string)
|
|
||||||
|
|
||||||
if 'Characters' in label:
|
|
||||||
chars = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=characters'))
|
|
||||||
for char in chars:
|
|
||||||
self.story.addToList('characters',char.string)
|
|
||||||
|
|
||||||
if 'Genre' in label:
|
|
||||||
genres = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=1'))
|
|
||||||
for genre in genres:
|
|
||||||
self.story.addToList('genre',genre.string)
|
|
||||||
|
|
||||||
if 'Warnings' in label:
|
|
||||||
warnings = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=2'))
|
|
||||||
for warning in warnings:
|
|
||||||
self.story.addToList('warnings',warning.string)
|
|
||||||
|
|
||||||
if 'Completed' in label:
|
|
||||||
if 'Yes' in value:
|
|
||||||
self.story.setMetadata('status', 'Completed')
|
|
||||||
else:
|
|
||||||
self.story.setMetadata('status', 'In-Progress')
|
|
||||||
|
|
||||||
if 'Published' in label:
|
|
||||||
self.story.setMetadata('datePublished', makeDate(stripHTML(value), self.dateformat))
|
|
||||||
|
|
||||||
if 'Updated' in label:
|
|
||||||
# there's a stray [ at the end.
|
|
||||||
#value = value[0:-1]
|
|
||||||
self.story.setMetadata('dateUpdated', makeDate(stripHTML(value), self.dateformat))
|
|
||||||
|
|
||||||
# grab the text for an individual chapter.
|
|
||||||
def getChapterText(self, url):
|
|
||||||
|
|
||||||
logger.debug('Getting chapter text from: %s' % url)
|
|
||||||
|
|
||||||
soup = self.make_soup(self.get_request(url))
|
|
||||||
|
|
||||||
div = soup.find('div', {'id' : 'story'})
|
|
||||||
|
|
||||||
if None == div:
|
|
||||||
raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url)
|
|
||||||
|
|
||||||
return self.utf8FromSoup(url,div)
|
|
||||||
|
|
@ -161,7 +161,7 @@ class ImagineEFicComAdapter(BaseSiteAdapter):
|
||||||
self.story.setMetadata('author',a.string)
|
self.story.setMetadata('author',a.string)
|
||||||
|
|
||||||
# Find the chapters:
|
# Find the chapters:
|
||||||
for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+r"&chapter=\d+$")):
|
for chapter in soup.find_all('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+r"&chapter=\d+$")):
|
||||||
# just in case there's tags, like <i> in chapter titles.
|
# just in case there's tags, like <i> in chapter titles.
|
||||||
self.add_chapter(chapter,'https://'+self.host+'/'+chapter['href']+addurl)
|
self.add_chapter(chapter,'https://'+self.host+'/'+chapter['href']+addurl)
|
||||||
|
|
||||||
|
|
@ -178,7 +178,7 @@ class ImagineEFicComAdapter(BaseSiteAdapter):
|
||||||
|
|
||||||
|
|
||||||
# <span class="label">Rated:</span> NC-17<br /> etc
|
# <span class="label">Rated:</span> NC-17<br /> etc
|
||||||
labels = soup.findAll('span',{'class':'label'})
|
labels = soup.find_all('span',{'class':'label'})
|
||||||
for labelspan in labels:
|
for labelspan in labels:
|
||||||
value = labelspan.nextSibling
|
value = labelspan.nextSibling
|
||||||
label = labelspan.string
|
label = labelspan.string
|
||||||
|
|
@ -199,22 +199,22 @@ class ImagineEFicComAdapter(BaseSiteAdapter):
|
||||||
self.story.setMetadata('numWords', value)
|
self.story.setMetadata('numWords', value)
|
||||||
|
|
||||||
if 'Categories' in label:
|
if 'Categories' in label:
|
||||||
cats = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=categories'))
|
cats = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=categories'))
|
||||||
for cat in cats:
|
for cat in cats:
|
||||||
self.story.addToList('category',cat.string)
|
self.story.addToList('category',cat.string)
|
||||||
|
|
||||||
if 'Characters' in label:
|
if 'Characters' in label:
|
||||||
chars = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=characters'))
|
chars = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=characters'))
|
||||||
for char in chars:
|
for char in chars:
|
||||||
self.story.addToList('characters',char.string)
|
self.story.addToList('characters',char.string)
|
||||||
|
|
||||||
if 'Genre' in label:
|
if 'Genre' in label:
|
||||||
genres = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=1'))
|
genres = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=class&type_id=1'))
|
||||||
for genre in genres:
|
for genre in genres:
|
||||||
self.story.addToList('genre',genre.string)
|
self.story.addToList('genre',genre.string)
|
||||||
|
|
||||||
if 'Warnings' in label:
|
if 'Warnings' in label:
|
||||||
warnings = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=2'))
|
warnings = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=class&type_id=2'))
|
||||||
for warning in warnings:
|
for warning in warnings:
|
||||||
self.story.addToList('warnings',warning.string)
|
self.story.addToList('warnings',warning.string)
|
||||||
|
|
||||||
|
|
@ -238,7 +238,7 @@ class ImagineEFicComAdapter(BaseSiteAdapter):
|
||||||
|
|
||||||
seriessoup = self.make_soup(self.get_request(series_url))
|
seriessoup = self.make_soup(self.get_request(series_url))
|
||||||
# can't use ^viewstory...$ in case of higher rated stories with javascript href.
|
# can't use ^viewstory...$ in case of higher rated stories with javascript href.
|
||||||
storyas = seriessoup.findAll('a', href=re.compile(r'viewstory.php\?sid=\d+'))
|
storyas = seriessoup.find_all('a', href=re.compile(r'viewstory.php\?sid=\d+'))
|
||||||
i=1
|
i=1
|
||||||
for a in storyas:
|
for a in storyas:
|
||||||
# skip 'report this' and 'TOC' links
|
# skip 'report this' and 'TOC' links
|
||||||
|
|
|
||||||
28
fanficfare/adapters/adapter_inkbunnynet.py
Normal file → Executable file
28
fanficfare/adapters/adapter_inkbunnynet.py
Normal file → Executable file
|
|
@ -125,7 +125,7 @@ class InkBunnyNetSiteAdapter(BaseSiteAdapter):
|
||||||
soup = self.make_soup(self.get_request(url,usecache=False))
|
soup = self.make_soup(self.get_request(url,usecache=False))
|
||||||
|
|
||||||
# removing all of the scripts
|
# removing all of the scripts
|
||||||
for tag in soup.findAll('script'):
|
for tag in soup.find_all('script'):
|
||||||
tag.extract()
|
tag.extract()
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -134,7 +134,7 @@ class InkBunnyNetSiteAdapter(BaseSiteAdapter):
|
||||||
self.story.setMetadata('title', stripHTML(title))
|
self.story.setMetadata('title', stripHTML(title))
|
||||||
|
|
||||||
# Get Author
|
# Get Author
|
||||||
authortag = soup.find('table',{'class':'pooltable'}).find('a',href=re.compile(r'/gallery/'))
|
authortag = soup.find('table',{'class':'pooltable'}).find('a',href=re.compile(r'/gallery/|/scraps/'))
|
||||||
author = authortag['href'].split('/')[-1] # no separate ID
|
author = authortag['href'].split('/')[-1] # no separate ID
|
||||||
self.story.setMetadata('author', author)
|
self.story.setMetadata('author', author)
|
||||||
self.story.setMetadata('authorId', author)
|
self.story.setMetadata('authorId', author)
|
||||||
|
|
@ -149,7 +149,7 @@ class InkBunnyNetSiteAdapter(BaseSiteAdapter):
|
||||||
if not self.getConfig('keep_summary_html'):
|
if not self.getConfig('keep_summary_html'):
|
||||||
synopsis = stripHTML(synopsis)
|
synopsis = stripHTML(synopsis)
|
||||||
|
|
||||||
self.setDescription(url, stripHTML(synopsis))
|
self.setDescription(url, synopsis)
|
||||||
|
|
||||||
#Getting Keywords/Genres
|
#Getting Keywords/Genres
|
||||||
keywords = bookdetails.find('div', {'id':'kw_scroll'}).find_next_siblings('div')[0].div.div.find_all('a')
|
keywords = bookdetails.find('div', {'id':'kw_scroll'}).find_next_siblings('div')[0].div.div.find_all('a')
|
||||||
|
|
@ -157,10 +157,11 @@ class InkBunnyNetSiteAdapter(BaseSiteAdapter):
|
||||||
self.story.addToList('genre', stripHTML(kword))
|
self.story.addToList('genre', stripHTML(kword))
|
||||||
|
|
||||||
# Getting the Category
|
# Getting the Category
|
||||||
|
category = bookdetails.findChildren('div', recursive=False)[2].find('span', string='Type:').parent
|
||||||
|
category.find('span').decompose()
|
||||||
|
self.story.setMetadata('category', stripHTML(category))
|
||||||
for div in bookdetails.find_all('div'):
|
for div in bookdetails.find_all('div'):
|
||||||
if 'Details' == stripHTML(div).strip():
|
if 'Rating:' == stripHTML(div)[:7]:
|
||||||
self.story.setMetadata('category', div.find_next_siblings('div')[0].span.next_sibling.strip())
|
|
||||||
elif 'Rating:' == stripHTML(div).strip()[:7]:
|
|
||||||
rating = div.span.next_sibling.strip()
|
rating = div.span.next_sibling.strip()
|
||||||
self.story.setMetadata('rating', rating)
|
self.story.setMetadata('rating', rating)
|
||||||
break
|
break
|
||||||
|
|
@ -178,7 +179,14 @@ class InkBunnyNetSiteAdapter(BaseSiteAdapter):
|
||||||
if get_cover:
|
if get_cover:
|
||||||
cover_img = soup.find('img', {'id':'magicbox'})
|
cover_img = soup.find('img', {'id':'magicbox'})
|
||||||
if cover_img:
|
if cover_img:
|
||||||
|
# image content is treated like a normal image submission
|
||||||
self.setCoverImage(url, cover_img['src'])
|
self.setCoverImage(url, cover_img['src'])
|
||||||
|
else:
|
||||||
|
# image content is present, but secondary to text file
|
||||||
|
cover_div = soup.find('div', {'class': 'content magicboxParent'})
|
||||||
|
cover_img = cover_div.find('img', {'class':'shadowedimage'}) if cover_div else None
|
||||||
|
if cover_img:
|
||||||
|
self.setCoverImage(url, cover_img['src'])
|
||||||
|
|
||||||
## Save for use below
|
## Save for use below
|
||||||
self.soup = soup
|
self.soup = soup
|
||||||
|
|
@ -192,3 +200,11 @@ class InkBunnyNetSiteAdapter(BaseSiteAdapter):
|
||||||
raise exceptions.FailedToDownload("Error downloading Chapter: %s No text block found -- non-story URL?" % url)
|
raise exceptions.FailedToDownload("Error downloading Chapter: %s No text block found -- non-story URL?" % url)
|
||||||
|
|
||||||
return self.utf8FromSoup(url, story)
|
return self.utf8FromSoup(url, story)
|
||||||
|
|
||||||
|
def before_get_urls_from_page(self,url,normalize):
|
||||||
|
# To display the links to stories that are not available to guests.
|
||||||
|
if self.getConfig("username") and self.getConfig("always_login"):
|
||||||
|
# performLogin extracts token from the soup
|
||||||
|
soup = self.make_soup(self.get_request(url))
|
||||||
|
|
||||||
|
self.performLogin(url, soup)
|
||||||
|
|
|
||||||
|
|
@ -1,47 +0,0 @@
|
||||||
# -*- coding: utf-8 -*-
|
|
||||||
|
|
||||||
# Copyright 2018 FanFicFare team
|
|
||||||
#
|
|
||||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
# you may not use this file except in compliance with the License.
|
|
||||||
# You may obtain a copy of the License at
|
|
||||||
#
|
|
||||||
# http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
#
|
|
||||||
# Unless required by applicable law or agreed to in writing, software
|
|
||||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
# See the License for the specific language governing permissions and
|
|
||||||
# limitations under the License.
|
|
||||||
#
|
|
||||||
|
|
||||||
# Software: eFiction
|
|
||||||
from __future__ import absolute_import
|
|
||||||
import re
|
|
||||||
from .base_efiction_adapter import BaseEfictionAdapter
|
|
||||||
|
|
||||||
class ItCouldHappenNetSiteAdapter(BaseEfictionAdapter):
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def getSiteDomain():
|
|
||||||
return 'it-could-happen.net'
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def getSiteAbbrev(seluuf):
|
|
||||||
return 'ich'
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def getDateFormat(self):
|
|
||||||
return "%B %d, %Y"
|
|
||||||
|
|
||||||
def handleMetadataPair(self, key, value):
|
|
||||||
# This site is all one 'category' as it's usually defined and
|
|
||||||
# uses Category for what is usually genre.
|
|
||||||
if key == 'Categories':
|
|
||||||
for val in re.split(r"\s*,\s*", value):
|
|
||||||
self.story.addToList('genre', val)
|
|
||||||
else:
|
|
||||||
super(ItCouldHappenNetSiteAdapter, self).handleMetadataPair(key, value)
|
|
||||||
|
|
||||||
def getClass():
|
|
||||||
return ItCouldHappenNetSiteAdapter
|
|
||||||
213
fanficfare/adapters/adapter_kakuyomujp.py
Normal file
213
fanficfare/adapters/adapter_kakuyomujp.py
Normal file
|
|
@ -0,0 +1,213 @@
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
# Copyright 2013 Fanficdownloader team, 2018 FanFicFare team
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
#
|
||||||
|
|
||||||
|
from __future__ import absolute_import
|
||||||
|
import logging, time
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
import re, json
|
||||||
|
|
||||||
|
from .. import exceptions as exceptions
|
||||||
|
|
||||||
|
# py2 vs py3 transition
|
||||||
|
from ..six.moves import http_cookiejar as cl
|
||||||
|
|
||||||
|
from .base_adapter import BaseSiteAdapter, makeDate
|
||||||
|
|
||||||
|
def getClass():
|
||||||
|
return KakuyomuJpAdapter
|
||||||
|
|
||||||
|
genres = {
|
||||||
|
'FANTASY': '異世界ファンタジー',
|
||||||
|
'ACTION': '現代ファンタジー',
|
||||||
|
'SF': 'SF',
|
||||||
|
'LOVE_STORY': '恋愛',
|
||||||
|
'ROMANCE': 'ラブコメ',
|
||||||
|
'DRAMA': '現代ドラマ',
|
||||||
|
'HORROR': 'ホラー',
|
||||||
|
'MYSTERY': 'ミステリー',
|
||||||
|
'NONFICTION': 'エッセイ・ノンフィクション',
|
||||||
|
'HISTORY': '歴史・時代・伝奇',
|
||||||
|
'CRITICISM': '創作論・評論',
|
||||||
|
'OTHERS': '詩・童話・その他',
|
||||||
|
'FAN_FICTION': '二次創作',
|
||||||
|
}
|
||||||
|
|
||||||
|
class KakuyomuJpAdapter(BaseSiteAdapter):
|
||||||
|
|
||||||
|
def __init__(self, config, url):
|
||||||
|
BaseSiteAdapter.__init__(self, config, url)
|
||||||
|
|
||||||
|
self.story.setMetadata('siteabbrev', 'kakuyomu')
|
||||||
|
self.story.setMetadata('language', 'Japanese')
|
||||||
|
|
||||||
|
self.storyId = self.path.split('/')[-1]
|
||||||
|
self.story.setMetadata('storyId', self.storyId)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def getSiteDomain():
|
||||||
|
return 'kakuyomu.jp'
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def getSiteExampleURLs(cls):
|
||||||
|
return ("https://kakuyomu.jp/works/12341234123412341234")
|
||||||
|
|
||||||
|
def getSiteURLPattern(self):
|
||||||
|
return r"^https?://kakuyomu\.jp/works/[0-9]+$"
|
||||||
|
|
||||||
|
def extractChapterUrlsAndMetadata(self):
|
||||||
|
data = self.get_request(self.url)
|
||||||
|
|
||||||
|
# Page could not be found
|
||||||
|
if 'お探しのページは見つかりませんでした' in data:
|
||||||
|
raise exceptions.StoryDoesNotExist(self.url)
|
||||||
|
|
||||||
|
soup = self.make_soup(data)
|
||||||
|
info = json.loads(soup.find(id='__NEXT_DATA__').contents[0])['props']['pageProps']['__APOLLO_STATE__']
|
||||||
|
|
||||||
|
workKey = 'Work:%s' % self.storyId
|
||||||
|
|
||||||
|
# Title
|
||||||
|
self.story.setMetadata('title', info[workKey]['title'])
|
||||||
|
|
||||||
|
# Author
|
||||||
|
authorKey = info[workKey]['author']['__ref']
|
||||||
|
self.story.setMetadata('authorId', authorKey.split(':')[1])
|
||||||
|
self.story.setMetadata('authorUrl', 'https://kakuyomu.jp/users/%s' % info[authorKey]['name'])
|
||||||
|
self.story.setMetadata('author', info[authorKey]['activityName'])
|
||||||
|
|
||||||
|
# Description
|
||||||
|
self.setDescription(self.url, info[workKey]['introduction'])
|
||||||
|
self.story.setMetadata('catchphrase', info[workKey]['catchphrase'])
|
||||||
|
|
||||||
|
# Date Published and Updated
|
||||||
|
# 2024-01-01T03:00:12Z
|
||||||
|
self.story.setMetadata('datePublished',
|
||||||
|
makeDate(info[workKey]['publishedAt'], '%Y-%m-%dT%H:%M:%SZ'))
|
||||||
|
self.story.setMetadata('dateUpdated',
|
||||||
|
makeDate(info[workKey]['editedAt'], '%Y-%m-%dT%H:%M:%SZ'))
|
||||||
|
|
||||||
|
# Character count
|
||||||
|
self.story.setMetadata('numWords', info[workKey]['totalCharacterCount'])
|
||||||
|
|
||||||
|
# Status
|
||||||
|
completed = info[workKey]['serialStatus'] == 'COMPLETED'
|
||||||
|
self.story.setMetadata('status', 'Completed' if completed else 'In-Progress')
|
||||||
|
|
||||||
|
# Warnings
|
||||||
|
rating = 'G'
|
||||||
|
if info[workKey]['isCruel']:
|
||||||
|
rating = 'R15'
|
||||||
|
self.story.addToList('warnings', '残酷描写有り')
|
||||||
|
if info[workKey]['isViolent']:
|
||||||
|
rating = 'R15'
|
||||||
|
self.story.addToList('warnings', '暴力描写有り')
|
||||||
|
if info[workKey]['isSexual']:
|
||||||
|
rating = 'R15'
|
||||||
|
self.story.addToList('warnings', '性描写有り')
|
||||||
|
|
||||||
|
# Tags
|
||||||
|
for tag in info[workKey]['tagLabels']:
|
||||||
|
if re.match(r'[RrR].?[11][55]', tag) is None:
|
||||||
|
self.story.addToList('freeformtags', tag)
|
||||||
|
else:
|
||||||
|
rating = 'R15'
|
||||||
|
|
||||||
|
# Rating
|
||||||
|
self.story.setMetadata('rating', rating)
|
||||||
|
|
||||||
|
# Genre
|
||||||
|
self.story.setMetadata('genre', genres[info[workKey]['genre']])
|
||||||
|
|
||||||
|
if info[workKey]['genre'] == 'FAN_FICTION':
|
||||||
|
fandomKey = info[workKey]['fanFictionSource']['__ref']
|
||||||
|
self.story.addToList('fandoms', info[fandomKey]['title'])
|
||||||
|
|
||||||
|
# Ratings, Comments, Etc.
|
||||||
|
self.story.setMetadata('reviews', info[workKey]['reviewCount'])
|
||||||
|
self.story.setMetadata('points', info[workKey]['totalReviewPoint'])
|
||||||
|
self.story.setMetadata('comments', info[workKey]['totalPublicEpisodeCommentCount'])
|
||||||
|
self.story.setMetadata('views', info[workKey]['totalReadCount'])
|
||||||
|
self.story.setMetadata('follows', info[workKey]['totalFollowers'])
|
||||||
|
self.story.setMetadata('collections', len(info[workKey]['publicWorkCollections']))
|
||||||
|
self.story.setMetadata('events', info[workKey]['totalWorkContestCount'] + info[workKey]['totalUserEventCount'])
|
||||||
|
self.story.setMetadata('published', info[workKey]['hasPublication'])
|
||||||
|
|
||||||
|
# visitorWorkFollowing
|
||||||
|
# workReviewByVisitor
|
||||||
|
|
||||||
|
# Chapters, Episodes
|
||||||
|
|
||||||
|
# TOC nodes are in a list
|
||||||
|
# each have a list of named episodes
|
||||||
|
# each can have a named chapter
|
||||||
|
# named chapters can be at depth 1 or 2
|
||||||
|
# episodes might be empty (premium subscription)
|
||||||
|
|
||||||
|
prependSectionTitles = self.getConfig('prepend_section_titles', 'firstepisode')
|
||||||
|
|
||||||
|
numEpisodes = 0
|
||||||
|
titles = []
|
||||||
|
nestingLevel = 0
|
||||||
|
newSection = False
|
||||||
|
for tocNodeRef in info[workKey]['tableOfContentsV2']:
|
||||||
|
tocNode = info[tocNodeRef['__ref']]
|
||||||
|
|
||||||
|
if tocNode['chapter'] is not None:
|
||||||
|
chapter = info[tocNode['chapter']['__ref']]
|
||||||
|
while chapter['level'] <= nestingLevel:
|
||||||
|
titles.pop()
|
||||||
|
nestingLevel -= 1
|
||||||
|
titles.append(chapter['title'])
|
||||||
|
nestingLevel = chapter['level']
|
||||||
|
newSection = True
|
||||||
|
else:
|
||||||
|
titles = []
|
||||||
|
nestingLevel = 0
|
||||||
|
newSection = False
|
||||||
|
|
||||||
|
for episodeRef in tocNode['episodeUnions']:
|
||||||
|
if not episodeRef['__ref'].startswith('EmptyEpisode'):
|
||||||
|
numEpisodes += 1
|
||||||
|
episode = info[episodeRef['__ref']]
|
||||||
|
epUrl = 'https://kakuyomu.jp/works/' + self.storyId + '/episodes/' + episode['id']
|
||||||
|
epTitle = episode['title']
|
||||||
|
|
||||||
|
if ((len(titles) > 0) and
|
||||||
|
((newSection and prependSectionTitles == 'firstepisode') or
|
||||||
|
prependSectionTitles == 'true')):
|
||||||
|
titles.append(epTitle)
|
||||||
|
# bracket with ZWSP to mark presence of section titles
|
||||||
|
epTitle = u'\u200b' + u'\u3000\u200b'.join(titles)
|
||||||
|
titles.pop()
|
||||||
|
|
||||||
|
self.add_chapter(epTitle, epUrl)
|
||||||
|
newSection = False
|
||||||
|
|
||||||
|
logger.debug("Story: <%s>", self.story)
|
||||||
|
return
|
||||||
|
|
||||||
|
def getChapterText(self, url):
|
||||||
|
logger.debug('Getting chapter text from <%s>' % url)
|
||||||
|
|
||||||
|
soup = self.make_soup(self.get_request(url))
|
||||||
|
soup = soup.find('div', {'class':'widget-episodeBody js-episode-body'})
|
||||||
|
if soup is None:
|
||||||
|
raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url)
|
||||||
|
soup.attrs = {'class':'episode-body'}
|
||||||
|
|
||||||
|
return self.utf8FromSoup(url, soup)
|
||||||
|
|
||||||
|
|
@ -144,13 +144,13 @@ class KSArchiveComAdapter(BaseSiteAdapter): # XXX
|
||||||
|
|
||||||
# Find authorid and URL from... author urls.
|
# Find authorid and URL from... author urls.
|
||||||
pagetitle = soup.find('div',id='pagetitle')
|
pagetitle = soup.find('div',id='pagetitle')
|
||||||
for a in pagetitle.findAll('a', href=re.compile(r"viewuser.php\?uid=\d+")):
|
for a in pagetitle.find_all('a', href=re.compile(r"viewuser.php\?uid=\d+")):
|
||||||
self.story.addToList('authorId',a['href'].split('=')[1])
|
self.story.addToList('authorId',a['href'].split('=')[1])
|
||||||
self.story.addToList('authorUrl','https://'+self.host+'/'+a['href'])
|
self.story.addToList('authorUrl','https://'+self.host+'/'+a['href'])
|
||||||
self.story.addToList('author',stripHTML(a))
|
self.story.addToList('author',stripHTML(a))
|
||||||
|
|
||||||
# Find the chapters:
|
# Find the chapters:
|
||||||
for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+r"&chapter=\d+$")):
|
for chapter in soup.find_all('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+r"&chapter=\d+$")):
|
||||||
# just in case there's tags, like <i> in chapter titles.
|
# just in case there's tags, like <i> in chapter titles.
|
||||||
self.add_chapter(chapter,'https://'+self.host+'/'+chapter['href']+addurl)
|
self.add_chapter(chapter,'https://'+self.host+'/'+chapter['href']+addurl)
|
||||||
|
|
||||||
|
|
@ -166,7 +166,7 @@ class KSArchiveComAdapter(BaseSiteAdapter): # XXX
|
||||||
return ""
|
return ""
|
||||||
|
|
||||||
# <span class="label">Rated:</span> NC-17<br /> etc
|
# <span class="label">Rated:</span> NC-17<br /> etc
|
||||||
labels = soup.findAll('span',{'class':'label'})
|
labels = soup.find_all('span',{'class':'label'})
|
||||||
for labelspan in labels:
|
for labelspan in labels:
|
||||||
value = labelspan.nextSibling
|
value = labelspan.nextSibling
|
||||||
label = stripHTML(labelspan)
|
label = stripHTML(labelspan)
|
||||||
|
|
@ -193,7 +193,7 @@ class KSArchiveComAdapter(BaseSiteAdapter): # XXX
|
||||||
self.story.setMetadata('numWords', value)
|
self.story.setMetadata('numWords', value)
|
||||||
|
|
||||||
if 'Categories' in label:
|
if 'Categories' in label:
|
||||||
cats = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=categories'))
|
cats = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=categories'))
|
||||||
catstext = [stripHTML(cat) for cat in cats]
|
catstext = [stripHTML(cat) for cat in cats]
|
||||||
for cat in catstext:
|
for cat in catstext:
|
||||||
# ran across one story with an empty <a href="browse.php?type=categories&catid=1"></a>
|
# ran across one story with an empty <a href="browse.php?type=categories&catid=1"></a>
|
||||||
|
|
@ -204,7 +204,7 @@ class KSArchiveComAdapter(BaseSiteAdapter): # XXX
|
||||||
if 'Characters' in label:
|
if 'Characters' in label:
|
||||||
self.story.addToList('characters','Kirk')
|
self.story.addToList('characters','Kirk')
|
||||||
self.story.addToList('characters','Spock')
|
self.story.addToList('characters','Spock')
|
||||||
chars = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=characters'))
|
chars = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=characters'))
|
||||||
charstext = [stripHTML(char) for char in chars]
|
charstext = [stripHTML(char) for char in chars]
|
||||||
for char in charstext:
|
for char in charstext:
|
||||||
self.story.addToList('characters',stripHTML(char))
|
self.story.addToList('characters',stripHTML(char))
|
||||||
|
|
@ -213,7 +213,7 @@ class KSArchiveComAdapter(BaseSiteAdapter): # XXX
|
||||||
## leaving it in. Check to make sure the type_id number
|
## leaving it in. Check to make sure the type_id number
|
||||||
## is correct, though--it's site specific.
|
## is correct, though--it's site specific.
|
||||||
if 'Genre' in label:
|
if 'Genre' in label:
|
||||||
genres = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=1')) # XXX
|
genres = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=class&type_id=1')) # XXX
|
||||||
genrestext = [stripHTML(genre) for genre in genres]
|
genrestext = [stripHTML(genre) for genre in genres]
|
||||||
self.genre = ', '.join(genrestext)
|
self.genre = ', '.join(genrestext)
|
||||||
for genre in genrestext:
|
for genre in genrestext:
|
||||||
|
|
@ -223,7 +223,7 @@ class KSArchiveComAdapter(BaseSiteAdapter): # XXX
|
||||||
## has 'Story Type', which is much more what most sites
|
## has 'Story Type', which is much more what most sites
|
||||||
## call genre.
|
## call genre.
|
||||||
if 'Story Type' in label:
|
if 'Story Type' in label:
|
||||||
genres = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=5')) # XXX
|
genres = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=class&type_id=5')) # XXX
|
||||||
genrestext = [stripHTML(genre) for genre in genres]
|
genrestext = [stripHTML(genre) for genre in genres]
|
||||||
self.genre = ', '.join(genrestext)
|
self.genre = ', '.join(genrestext)
|
||||||
for genre in genrestext:
|
for genre in genrestext:
|
||||||
|
|
@ -233,21 +233,21 @@ class KSArchiveComAdapter(BaseSiteAdapter): # XXX
|
||||||
## leaving it in. Check to make sure the type_id number
|
## leaving it in. Check to make sure the type_id number
|
||||||
## is correct, though--it's site specific.
|
## is correct, though--it's site specific.
|
||||||
if 'Warnings' in label:
|
if 'Warnings' in label:
|
||||||
warnings = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=2')) # XXX
|
warnings = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=class&type_id=2')) # XXX
|
||||||
warningstext = [stripHTML(warning) for warning in warnings]
|
warningstext = [stripHTML(warning) for warning in warnings]
|
||||||
self.warning = ', '.join(warningstext)
|
self.warning = ', '.join(warningstext)
|
||||||
for warning in warningstext:
|
for warning in warningstext:
|
||||||
self.story.addToList('warnings',stripHTML(warning))
|
self.story.addToList('warnings',stripHTML(warning))
|
||||||
|
|
||||||
if 'Universe' in label:
|
if 'Universe' in label:
|
||||||
universes = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=3')) # XXX
|
universes = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=class&type_id=3')) # XXX
|
||||||
universestext = [stripHTML(universe) for universe in universes]
|
universestext = [stripHTML(universe) for universe in universes]
|
||||||
self.universe = ', '.join(universestext)
|
self.universe = ', '.join(universestext)
|
||||||
for universe in universestext:
|
for universe in universestext:
|
||||||
self.story.addToList('universe',stripHTML(universe))
|
self.story.addToList('universe',stripHTML(universe))
|
||||||
|
|
||||||
if 'Crossover Fandom' in label:
|
if 'Crossover Fandom' in label:
|
||||||
crossoverfandoms = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=4')) # XXX
|
crossoverfandoms = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=class&type_id=4')) # XXX
|
||||||
crossoverfandomstext = [stripHTML(crossoverfandom) for crossoverfandom in crossoverfandoms]
|
crossoverfandomstext = [stripHTML(crossoverfandom) for crossoverfandom in crossoverfandoms]
|
||||||
self.crossoverfandom = ', '.join(crossoverfandomstext)
|
self.crossoverfandom = ', '.join(crossoverfandomstext)
|
||||||
for crossoverfandom in crossoverfandomstext:
|
for crossoverfandom in crossoverfandomstext:
|
||||||
|
|
@ -274,7 +274,7 @@ class KSArchiveComAdapter(BaseSiteAdapter): # XXX
|
||||||
series_url = 'https://'+self.host+'/'+a['href']
|
series_url = 'https://'+self.host+'/'+a['href']
|
||||||
|
|
||||||
seriessoup = self.make_soup(self.get_request(series_url))
|
seriessoup = self.make_soup(self.get_request(series_url))
|
||||||
storyas = seriessoup.findAll('a', href=re.compile(r'viewstory.php\?sid=\d+'))
|
storyas = seriessoup.find_all('a', href=re.compile(r'viewstory.php\?sid=\d+'))
|
||||||
i=1
|
i=1
|
||||||
for a in storyas:
|
for a in storyas:
|
||||||
# skip 'report this' and 'TOC' links
|
# skip 'report this' and 'TOC' links
|
||||||
|
|
|
||||||
|
|
@ -17,6 +17,7 @@
|
||||||
|
|
||||||
# Software: eFiction
|
# Software: eFiction
|
||||||
from __future__ import absolute_import
|
from __future__ import absolute_import
|
||||||
|
from ..htmlcleanup import stripHTML
|
||||||
from .base_efiction_adapter import BaseEfictionAdapter
|
from .base_efiction_adapter import BaseEfictionAdapter
|
||||||
|
|
||||||
class LibraryOfMoriaComAdapter(BaseEfictionAdapter):
|
class LibraryOfMoriaComAdapter(BaseEfictionAdapter):
|
||||||
|
|
@ -37,5 +38,19 @@ class LibraryOfMoriaComAdapter(BaseEfictionAdapter):
|
||||||
def getDateFormat(self):
|
def getDateFormat(self):
|
||||||
return "%B %d, %Y"
|
return "%B %d, %Y"
|
||||||
|
|
||||||
|
def getRatingFromTOC(self):
|
||||||
|
# In many eFiction sites, the Rating is not included in
|
||||||
|
# print page, but is on the TOC page. At least one site's rating
|
||||||
|
# (libraryofmoriacom) differs enough to be problematic.
|
||||||
|
toc = self.url + "&index=1"
|
||||||
|
soup = self.make_soup(self.get_request(toc))
|
||||||
|
for label in soup.select('div.listbox b'):
|
||||||
|
if 'Rated:' in label or 'Rating:' in stripHTML(label):
|
||||||
|
rating = stripHTML(label.next_sibling)
|
||||||
|
if rating.endswith(' ['):
|
||||||
|
rating = rating[:-2]
|
||||||
|
self.story.setMetadata('rating',rating)
|
||||||
|
break
|
||||||
|
|
||||||
def getClass():
|
def getClass():
|
||||||
return LibraryOfMoriaComAdapter
|
return LibraryOfMoriaComAdapter
|
||||||
|
|
|
||||||
|
|
@ -19,6 +19,7 @@ from __future__ import absolute_import
|
||||||
import logging
|
import logging
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
import re
|
import re
|
||||||
|
import json
|
||||||
|
|
||||||
from bs4.element import Comment
|
from bs4.element import Comment
|
||||||
from ..htmlcleanup import stripHTML
|
from ..htmlcleanup import stripHTML
|
||||||
|
|
@ -37,7 +38,7 @@ class LiteroticaSiteAdapter(BaseSiteAdapter):
|
||||||
|
|
||||||
def __init__(self, config, url):
|
def __init__(self, config, url):
|
||||||
BaseSiteAdapter.__init__(self, config, url)
|
BaseSiteAdapter.__init__(self, config, url)
|
||||||
logger.debug("LiteroticaComAdapter:__init__ - url='%s'" % url)
|
#logger.debug("LiteroticaComAdapter:__init__ - url='%s'" % url)
|
||||||
|
|
||||||
# Each adapter needs to have a unique site abbreviation.
|
# Each adapter needs to have a unique site abbreviation.
|
||||||
self.story.setMetadata('siteabbrev','litero')
|
self.story.setMetadata('siteabbrev','litero')
|
||||||
|
|
@ -47,16 +48,15 @@ class LiteroticaSiteAdapter(BaseSiteAdapter):
|
||||||
# where first chapter doesn't have '-ch-'.
|
# where first chapter doesn't have '-ch-'.
|
||||||
# Now just rely on extractChapterUrlsAndMetadata to reset
|
# Now just rely on extractChapterUrlsAndMetadata to reset
|
||||||
# storyId to first chapter link.
|
# storyId to first chapter link.
|
||||||
storyId = self.parsedUrl.path.split('/',)[2]
|
|
||||||
|
|
||||||
## DON'T normalize to www.literotica.com--keep for language,
|
## DON'T normalize to www.literotica.com--keep for language,
|
||||||
## which will be set in _setURL(url). Also, multi-chapter
|
## which will be set in _setURL(url). Also, multi-chapter
|
||||||
## have been keeping the language when 'normalizing' to first
|
## have been keeping the language when 'normalizing' to first
|
||||||
## chapter.
|
## chapter.
|
||||||
url = re.sub(r"^(https?://)"+LANG_RE+r"(\.i)?",
|
url = re.sub(r"^(https?://)"+LANG_RE+r"(\.i)?",
|
||||||
r"\1\2",
|
r"https://\2",
|
||||||
url)
|
url)
|
||||||
url = url.replace('/beta/s/','/s/') # to allow beta site URLs.
|
url = url.replace('/beta/','/') # to allow beta site URLs.
|
||||||
|
|
||||||
## strip ?page=...
|
## strip ?page=...
|
||||||
url = re.sub(r"\?page=.*$", "", url)
|
url = re.sub(r"\?page=.*$", "", url)
|
||||||
|
|
@ -66,7 +66,7 @@ class LiteroticaSiteAdapter(BaseSiteAdapter):
|
||||||
|
|
||||||
# The date format will vary from site to site.
|
# The date format will vary from site to site.
|
||||||
# http://docs.python.org/library/datetime.html#strftime-strptime-behavior
|
# http://docs.python.org/library/datetime.html#strftime-strptime-behavior
|
||||||
self.dateformat = "%m/%d/%y"
|
self.dateformat = "%m/%d/%Y"
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def getSiteDomain():
|
def getSiteDomain():
|
||||||
|
|
@ -78,10 +78,12 @@ class LiteroticaSiteAdapter(BaseSiteAdapter):
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def getSiteExampleURLs(cls):
|
def getSiteExampleURLs(cls):
|
||||||
return "http://www.literotica.com/s/story-title https://www.literotica.com/s/story-title http://portuguese.literotica.com/s/story-title http://german.literotica.com/s/story-title"
|
return "https://www.literotica.com/s/story-title https://www.literotica.com/series/se/9999999 https://www.literotica.com/s/story-title https://www.literotica.com/i/image-or-comic-title https://www.literotica.com/p/poem-title https://portuguese.literotica.com/s/story-title https://german.literotica.com/s/story-title"
|
||||||
|
|
||||||
def getSiteURLPattern(self):
|
def getSiteURLPattern(self):
|
||||||
return r"https?://"+LANG_RE+r"(\.i)?\.literotica\.com/(beta/)?s/([a-zA-Z0-9_-]+)"
|
# also https://www.literotica.com/series/se/80075773
|
||||||
|
# /s/ for story, /i/ for image/comic, /p/ for poem
|
||||||
|
return r"https?://"+LANG_RE+r"(\.i)?\.literotica\.com/((beta/)?[sip]/([a-zA-Z0-9_-]+)|series/se/(?P<storyseriesid>[a-zA-Z0-9_-]+))"
|
||||||
|
|
||||||
def _setURL(self,url):
|
def _setURL(self,url):
|
||||||
# logger.debug("set URL:%s"%url)
|
# logger.debug("set URL:%s"%url)
|
||||||
|
|
@ -90,260 +92,337 @@ class LiteroticaSiteAdapter(BaseSiteAdapter):
|
||||||
lang = m.group('lang')
|
lang = m.group('lang')
|
||||||
if lang not in ('www','other'):
|
if lang not in ('www','other'):
|
||||||
self.story.setMetadata('language',lang.capitalize())
|
self.story.setMetadata('language',lang.capitalize())
|
||||||
|
# reset storyId
|
||||||
|
self.story.setMetadata('storyId',self.parsedUrl.path.split('/',)[-1])
|
||||||
# logger.debug("language:%s"%self.story.getMetadata('language'))
|
# logger.debug("language:%s"%self.story.getMetadata('language'))
|
||||||
|
|
||||||
def getCategories(self, soup):
|
## apply clean_chapter_titles
|
||||||
if self.getConfig("use_meta_keywords"):
|
def add_chapter(self,chapter_title,url,othermeta={}):
|
||||||
categories = soup.find("meta", {"name":"keywords"})['content'].split(',')
|
if self.getConfig("clean_chapter_titles"):
|
||||||
categories = [c for c in categories if not self.story.getMetadata('title') in c]
|
storytitle = self.story.getMetadataRaw('title').lower()
|
||||||
if self.story.getMetadata('author') in categories:
|
chapter_name_type = None
|
||||||
categories.remove(self.story.getMetadata('author'))
|
# strip trailing ch or pt before doing the chapter clean.
|
||||||
# logger.debug("Meta = %s" % categories)
|
# doesn't remove from story title metadata
|
||||||
for category in categories:
|
storytitle = re.sub(r'^(.*?)( (ch|pt))?$',r'\1',storytitle)
|
||||||
# logger.debug("\tCategory=%s" % category)
|
if chapter_title.lower().startswith(storytitle):
|
||||||
# self.story.addToList('category', category.title())
|
chapter = chapter_title[len(storytitle):].strip()
|
||||||
self.story.addToList('eroticatags', category.title())
|
# logger.debug('\tChapter: "%s"' % chapter)
|
||||||
|
if chapter == '':
|
||||||
|
chapter_title = 'Chapter %d' % (self.num_chapters() + 1)
|
||||||
|
# Sometimes the first chapter does not have type of chapter
|
||||||
|
if self.num_chapters() == 0:
|
||||||
|
# logger.debug('\tChapter: first chapter without chapter type')
|
||||||
|
chapter_name_type = None
|
||||||
|
else:
|
||||||
|
separater_char = chapter[0]
|
||||||
|
# logger.debug('\tseparater_char: "%s"' % separater_char)
|
||||||
|
chapter = chapter[1:].strip() if separater_char in [":", "-"] else chapter
|
||||||
|
# logger.debug('\tChapter: "%s"' % chapter)
|
||||||
|
if chapter.lower().startswith('ch.'):
|
||||||
|
chapter = chapter[len('ch.'):].strip()
|
||||||
|
try:
|
||||||
|
chapter_title = 'Chapter %d' % int(chapter)
|
||||||
|
except:
|
||||||
|
chapter_title = 'Chapter %s' % chapter
|
||||||
|
chapter_name_type = 'Chapter' if chapter_name_type is None else chapter_name_type
|
||||||
|
# logger.debug('\tChapter: chapter_name_type="%s"' % chapter_name_type)
|
||||||
|
elif chapter.lower().startswith('pt.'):
|
||||||
|
chapter = chapter[len('pt.'):].strip()
|
||||||
|
try:
|
||||||
|
chapter_title = 'Part %d' % int(chapter)
|
||||||
|
except:
|
||||||
|
chapter_title = 'Part %s' % chapter
|
||||||
|
chapter_name_type = 'Part' if chapter_name_type is None else chapter_name_type
|
||||||
|
# logger.debug('\tChapter: chapter_name_type="%s"' % chapter_name_type)
|
||||||
|
elif separater_char in [":", "-"]:
|
||||||
|
chapter_title = chapter
|
||||||
|
# logger.debug('\tChapter: taking chapter text as whole')
|
||||||
|
super(LiteroticaSiteAdapter, self).add_chapter(chapter_title,url,othermeta)
|
||||||
|
|
||||||
def extractChapterUrlsAndMetadata(self):
|
def extractChapterUrlsAndMetadata(self):
|
||||||
"""
|
"""
|
||||||
NOTE: Some stories can have versions,
|
In April 2024, site introduced significant changes, including
|
||||||
e.g. /my-story-ch-05-version-10
|
adding a 'Story Series' page and link to it in each chapter.
|
||||||
NOTE: If two stories share the same title, a running index is added,
|
But not all stories, one-shots don't have 'Story Series'.
|
||||||
e.g.: /my-story-ch-02-1
|
|
||||||
Strategy:
|
literotica has 'Story Series' & 'Story'. FFF calls them 'Story' & 'Chapters'
|
||||||
* Go to author's page, search for the current story link,
|
See https://github.com/JimmXinu/FanFicFare/issues/1058#issuecomment-2078490037
|
||||||
* If it's in a tr.root-story => One-part story
|
|
||||||
* , get metadata and be done
|
So /series/se/ will be the story URL for multi chapters but
|
||||||
* If it's in a tr.sl => Chapter in series
|
keep individual 'chapter' URL for one-shots.
|
||||||
* Search up from there until we find a tr.ser-ttl (this is the
|
|
||||||
story)
|
|
||||||
* Gather metadata
|
|
||||||
* Search down from there for all tr.sl until the next
|
|
||||||
tr.ser-ttl, foreach
|
|
||||||
* Chapter link is there
|
|
||||||
"""
|
"""
|
||||||
|
logger.debug("Chapter/Story URL: <%s> " % self.url)
|
||||||
|
|
||||||
if not (self.is_adult or self.getConfig("is_adult")):
|
if not (self.is_adult or self.getConfig("is_adult")):
|
||||||
raise exceptions.AdultCheckRequired(self.url)
|
raise exceptions.AdultCheckRequired(self.url)
|
||||||
|
|
||||||
# logger.debug("Chapter/Story URL: <%s> " % self.url)
|
(data,rurl) = self.get_request_redirected(self.url)
|
||||||
|
# logger.debug(data)
|
||||||
(data1,rurl) = self.get_request_redirected(self.url)
|
|
||||||
## for language domains
|
## for language domains
|
||||||
self._setURL(rurl)
|
self._setURL(rurl)
|
||||||
logger.debug("set opened url:%s"%self.url)
|
logger.debug("set opened url:%s"%self.url)
|
||||||
soup1 = self.make_soup(data1)
|
soup = self.make_soup(data)
|
||||||
#strip comments from soup
|
|
||||||
[comment.extract() for comment in soup1.findAll(text=lambda text:isinstance(text, Comment))]
|
|
||||||
|
|
||||||
if "This submission is awaiting moderator's approval" in data1:
|
if "This submission is awaiting moderator's approval" in data:
|
||||||
raise exceptions.StoryDoesNotExist("This submission is awaiting moderator's approval. %s"%self.url)
|
raise exceptions.StoryDoesNotExist("This submission is awaiting moderator's approval. %s"%self.url)
|
||||||
|
|
||||||
|
## 2025Feb - domains other than www now use different HTML.
|
||||||
|
## Need to look for two different versions of basically
|
||||||
|
## everything.
|
||||||
|
|
||||||
|
## not series URL, assumed to be a chapter. Look for Story
|
||||||
|
## Info block of post-beta page. I don't think it should happen?
|
||||||
|
if '/series/se' not in self.url:
|
||||||
|
#logger.debug(data)
|
||||||
|
## looking for /series/se URL to indicate this is a
|
||||||
|
## chapter.
|
||||||
|
if not soup.select_one('div.page__aside') and not soup.select_one('div.sidebar') and not soup.select_one('div[class^="_sidebar_"]'):
|
||||||
|
raise exceptions.FailedToDownload("Missing Story Info block, Beta turned off?")
|
||||||
|
|
||||||
|
storyseriestag = soup.select_one('a.bn_av')
|
||||||
|
if not storyseriestag:
|
||||||
|
storyseriestag = soup.select_one('a[class^="_files__link_"]')
|
||||||
|
# logger.debug("Story Series Tag:%s"%storyseriestag)
|
||||||
|
|
||||||
|
if storyseriestag:
|
||||||
|
self._setURL(storyseriestag['href'])
|
||||||
|
data = self.get_request(storyseriestag['href'])
|
||||||
|
# logger.debug(data)
|
||||||
|
soup = self.make_soup(data)
|
||||||
|
# logger.debug(soup)
|
||||||
|
else:
|
||||||
|
logger.debug("One-shot")
|
||||||
|
|
||||||
|
isSingleStory = '/series/se' not in self.url
|
||||||
|
|
||||||
|
if not isSingleStory:
|
||||||
|
# Normilize the url?
|
||||||
|
state = re.findall(r"prefix\=\"/series/\",state='(.+?)'</script>", data)
|
||||||
|
json_state = json.loads(state[0].replace("\\'","'").replace("\\\\","\\"))
|
||||||
|
url_series_id = unicode(re.match(self.getSiteURLPattern(),self.url).group('storyseriesid'))
|
||||||
|
json_series_id = unicode(json_state['series']['data']['id'])
|
||||||
|
if json_series_id != url_series_id:
|
||||||
|
res = re.sub(url_series_id, json_series_id, unicode(self.url))
|
||||||
|
logger.debug("Normalized url: %s"%res)
|
||||||
|
self._setURL(res)
|
||||||
|
|
||||||
|
## common between one-shots and multi-chapters
|
||||||
|
# title
|
||||||
|
self.story.setMetadata('title', stripHTML(soup.select_one('h1')))
|
||||||
|
# logger.debug(self.story.getMetadata('title'))
|
||||||
|
|
||||||
# author
|
# author
|
||||||
authora = soup1.find("a", class_="y_eU")
|
## XXX This is still the author URL like:
|
||||||
|
## https://www.literotica.com/stories/memberpage.php?uid=999999&page=submissions
|
||||||
|
## because that's what's on the page. It redirects to the /authors/ page.
|
||||||
|
## Only way I know right now to get the /authors/ is to make
|
||||||
|
## the req and look at the redirect.
|
||||||
|
## Should change to /authors/ if/when it starts appearing.
|
||||||
|
## Assuming it's in the same place.
|
||||||
|
authora = soup.find("a", class_="y_eU")
|
||||||
|
if not authora:
|
||||||
|
authora = soup.select_one('a[class^="_author__title"]')
|
||||||
authorurl = authora['href']
|
authorurl = authora['href']
|
||||||
# logger.debug(authora)
|
|
||||||
# logger.debug(authorurl)
|
|
||||||
self.story.setMetadata('authorId', urlparse.parse_qs(authorurl.split('?')[1])['uid'][0])
|
|
||||||
if authorurl.startswith('//'):
|
if authorurl.startswith('//'):
|
||||||
authorurl = self.parsedUrl.scheme+':'+authorurl
|
authorurl = self.parsedUrl.scheme+':'+authorurl
|
||||||
|
# logger.debug(authora)
|
||||||
|
# logger.debug(authorurl)
|
||||||
|
self.story.setMetadata('author', stripHTML(authora))
|
||||||
self.story.setMetadata('authorUrl', authorurl)
|
self.story.setMetadata('authorUrl', authorurl)
|
||||||
self.story.setMetadata('author', authora.text)
|
if '?' in authorurl:
|
||||||
|
self.story.setMetadata('authorId', urlparse.parse_qs(authorurl.split('?')[1])['uid'][0])
|
||||||
|
elif '/authors/' in authorurl:
|
||||||
|
self.story.setMetadata('authorId', authorurl.split('/')[-1])
|
||||||
|
else: # if all else fails
|
||||||
|
self.story.setMetadata('authorId', stripHTML(authora))
|
||||||
|
|
||||||
# get the author page
|
if soup.select('div#tabpanel-tags'):
|
||||||
dataAuth = self.get_request(authorurl)
|
# logger.debug("tags1")
|
||||||
soupAuth = self.make_soup(dataAuth)
|
self.story.extendList('eroticatags', [ stripHTML(t).title() for t in soup.select('div#tabpanel-tags a.av_as') ])
|
||||||
#strip comments from soup
|
if soup.select('div[class^="_widget__tags_"]'):
|
||||||
[comment.extract() for comment in soupAuth.findAll(text=lambda text:isinstance(text, Comment))]
|
# logger.debug("tags2")
|
||||||
# logger.debug(soupAuth)
|
self.story.extendList('eroticatags', [ stripHTML(t).title() for t in soup.select('div[class^="_widget__tags_"] a[class^="_tag_item_"]') ])
|
||||||
|
# logger.debug(self.story.getList('eroticatags'))
|
||||||
|
|
||||||
## Find link to url in author's page
|
## look first for 'Series Introduction', then Info panel short desc
|
||||||
## site has started using //domain.name/asdf urls remove https?: from front
|
## series can have either, so put in common code.
|
||||||
## site has started putting https back on again.
|
desc = []
|
||||||
## site is now using language specific german.lit... etc on author pages.
|
introtag = soup.select_one('div.bp_rh')
|
||||||
## site is now back to using www.lit... etc on author pages.
|
descdiv = soup.select_one('div#tabpanel-info div.bn_B') or \
|
||||||
search_url_re = r"https?://"+LANG_RE+r"(\.i)?\." + re.escape(self.getSiteDomain()) + self.url[self.url.index('/s/'):]
|
soup.select_one('div[class^="_tab__pane_"] div[class^="_widget__info_"]')
|
||||||
logger.debug(search_url_re)
|
if introtag and stripHTML(introtag):
|
||||||
storyLink = soupAuth.find('a', href=re.compile(search_url_re))
|
# make sure there's something in the tag.
|
||||||
# storyLink = soupAuth.find('a', href=re.compile(r'.*literotica.com/s/'+re.escape(self.story.getMetadata('storyId')) ))
|
# logger.debug("intro %s"%introtag)
|
||||||
# storyLink = soupAuth.find('a', href=re.compile(r'(https?:)?'+re.escape(self.url[self.url.index(':')+1:]).replace(r'www',r'[^\.]+') ))
|
desc.append(unicode(introtag))
|
||||||
# storyLink = soupAuth.find('a', href=self.url)#[self.url.index(':')+1:])
|
elif descdiv and stripHTML(descdiv):
|
||||||
|
# make sure there's something in the tag.
|
||||||
|
# logger.debug("desc %s"%descdiv)
|
||||||
|
desc.append(unicode(descdiv))
|
||||||
|
if not desc or self.getConfig("include_chapter_descriptions_in_summary"):
|
||||||
|
## Only for backward compatibility with 'stories' that
|
||||||
|
## don't have an intro or short desc.
|
||||||
|
descriptions = []
|
||||||
|
for i, chapterdesctag in enumerate(soup.select('p.br_rk')):
|
||||||
|
# remove category link, but only temporarily
|
||||||
|
a = chapterdesctag.a.extract()
|
||||||
|
descriptions.append("%d. %s" % (i + 1, stripHTML(chapterdesctag)))
|
||||||
|
# now put it back--it's used below
|
||||||
|
chapterdesctag.append(a)
|
||||||
|
desc.append(unicode("<p>"+"</p>\n<p>".join(descriptions)+"</p>"))
|
||||||
|
|
||||||
if storyLink is not None:
|
self.setDescription(self.url,u''.join(desc))
|
||||||
# pull the published date from the author page
|
|
||||||
# default values from single link. Updated below if multiple chapter.
|
|
||||||
# logger.debug("Found story on the author page.")
|
|
||||||
date = storyLink.parent.parent.findAll('td')[-1].text
|
|
||||||
self.story.setMetadata('datePublished', makeDate(date, self.dateformat))
|
|
||||||
self.story.setMetadata('dateUpdated',makeDate(date, self.dateformat))
|
|
||||||
|
|
||||||
if storyLink is not None:
|
|
||||||
urlTr = storyLink.parent.parent
|
|
||||||
if "sl" in urlTr['class']:
|
|
||||||
isSingleStory = False
|
|
||||||
else:
|
|
||||||
isSingleStory = True
|
|
||||||
else:
|
|
||||||
raise exceptions.FailedToDownload("Couldn't find story <%s> on author's page <%s>" % (self.url, authorurl))
|
|
||||||
|
|
||||||
if isSingleStory:
|
if isSingleStory:
|
||||||
self.story.setMetadata('title', storyLink.text.strip('/'))
|
## one-shots don't *display* date info, but they have it
|
||||||
# logger.debug('Title: "%s"' % storyLink.text.strip('/'))
|
## hidden in <script>
|
||||||
self.setDescription(authorurl, urlTr.findAll("td")[1].text)
|
## shows _date_approve "date_approve":"01/31/2024"
|
||||||
self.story.addToList('category', urlTr.findAll("td")[2].text)
|
|
||||||
# self.story.addToList('eroticatags', urlTr.findAll("td")[2].text)
|
## multichap also have "date_approve", but they have
|
||||||
date = urlTr.findAll('td')[-1].text
|
## several and they're more than just the story chapters.
|
||||||
self.story.setMetadata('datePublished', makeDate(date, self.dateformat))
|
date = re.search(r'"date_approve":"(\d\d/\d\d/\d\d\d\d)"',data)
|
||||||
self.story.setMetadata('dateUpdated',makeDate(date, self.dateformat))
|
if not date:
|
||||||
self.add_chapter(storyLink.text, self.url)
|
date = re.search(r'date_approve:"(\d\d/\d\d/\d\d\d\d)"',data)
|
||||||
averrating = stripHTML(storyLink.parent)
|
if date:
|
||||||
## title (0.00)
|
dateval = makeDate(date.group(1), self.dateformat)
|
||||||
averrating = averrating[averrating.rfind('(')+1:averrating.rfind(')')]
|
self.story.setMetadata('datePublished', dateval)
|
||||||
try:
|
self.story.setMetadata('dateUpdated', dateval)
|
||||||
self.story.setMetadata('averrating', float(averrating))
|
|
||||||
except:
|
## one-shots don't have same json data to get aver_rating
|
||||||
pass
|
## from below. This kludge matches the data_approve
|
||||||
# self.story.setMetadata('averrating',averrating)
|
rateall = re.search(r'rate_all:([\d\.]+)',data)
|
||||||
# parse out the list of chapters
|
if rateall:
|
||||||
|
self.story.setMetadata('averrating', '%4.2f' % float(rateall.group(1)))
|
||||||
|
|
||||||
|
## one-shots assumed completed.
|
||||||
|
self.story.setMetadata('status','Completed')
|
||||||
|
|
||||||
|
# Add the category from the breadcumb.
|
||||||
|
breadcrumbs = soup.find('div', id='BreadCrumbComponent')
|
||||||
|
if not breadcrumbs:
|
||||||
|
breadcrumbs = soup.select_one('ul[class^="_breadcrumbs_list_"]')
|
||||||
|
if not breadcrumbs:
|
||||||
|
# _breadcrumbs_18u7l_1
|
||||||
|
breadcrumbs = soup.select_one('nav[class^="_breadcrumbs_"]')
|
||||||
|
self.story.addToList('category', breadcrumbs.find_all('a')[1].string)
|
||||||
|
|
||||||
|
## one-shot chapter
|
||||||
|
self.add_chapter(self.story.getMetadata('title'), self.url)
|
||||||
|
|
||||||
else:
|
else:
|
||||||
seriesTr = urlTr.previousSibling
|
## Multi-chapter stories. AKA multi-part 'Story Series'.
|
||||||
while 'ser-ttl' not in seriesTr['class']:
|
bn_antags = soup.select('div#tabpanel-info p.bn_an')
|
||||||
seriesTr = seriesTr.previousSibling
|
# logger.debug(bn_antags)
|
||||||
m = re.match(r"^(?P<title>.*?):\s(?P<numChapters>\d+)\sPart\sSeries$", seriesTr.find("strong").text)
|
if bn_antags and not self.getConfig("dates_from_chapters"):
|
||||||
self.story.setMetadata('title', m.group('title'))
|
## Use dates from series metadata unless dates_from_chapters is enabled
|
||||||
seriesTitle = m.group('title')
|
dates = []
|
||||||
|
for datetag in bn_antags[:2]:
|
||||||
|
datetxt = stripHTML(datetag)
|
||||||
|
# remove 'Started:' 'Updated:'
|
||||||
|
# Assume can't use 'Started:' 'Updated:' (vs [0] or [1]) because of lang localization
|
||||||
|
datetxt = datetxt[datetxt.index(':')+1:]
|
||||||
|
dates.append(datetxt)
|
||||||
|
# logger.debug(dates)
|
||||||
|
self.story.setMetadata('datePublished', makeDate(dates[0], self.dateformat))
|
||||||
|
self.story.setMetadata('dateUpdated', makeDate(dates[1], self.dateformat))
|
||||||
|
|
||||||
## Walk the chapters
|
## bn_antags[2] contains "The author has completed this series." or "The author is still actively writing this series."
|
||||||
chapterTr = seriesTr.nextSibling
|
## I won't be surprised if this breaks later because of lang localization
|
||||||
dates = []
|
if "completed" in stripHTML(bn_antags[-1]):
|
||||||
descriptions = []
|
self.story.setMetadata('status','Completed')
|
||||||
ratings = []
|
else:
|
||||||
chapters = []
|
self.story.setMetadata('status','In-Progress')
|
||||||
chapter_name_type = None
|
|
||||||
while chapterTr is not None and 'sl' in chapterTr['class']:
|
|
||||||
description = "%d. %s" % (len(descriptions)+1,stripHTML(chapterTr.findAll("td")[1]))
|
|
||||||
description = stripHTML(chapterTr.findAll("td")[1])
|
|
||||||
chapterLink = chapterTr.find("td", "fc").find("a")
|
|
||||||
if self.getConfig('chapter_categories_use_all'):
|
|
||||||
self.story.addToList('category', chapterTr.findAll("td")[2].text)
|
|
||||||
# self.story.addToList('eroticatags', chapterTr.findAll("td")[2].text)
|
|
||||||
pub_date = makeDate(chapterTr.findAll('td')[-1].text, self.dateformat)
|
|
||||||
dates.append(pub_date)
|
|
||||||
chapterTr = chapterTr.nextSibling
|
|
||||||
|
|
||||||
chapter_title = chapterLink.text
|
## category from chapter list
|
||||||
if self.getConfig("clean_chapter_titles"):
|
self.story.extendList('category',[ stripHTML(t) for t in soup.select('a.br_rl') ])
|
||||||
# logger.debug('\tChapter Name: "%s"' % chapterLink.text)
|
|
||||||
if chapterLink.text.lower().startswith(seriesTitle.lower()):
|
|
||||||
chapter = chapterLink.text[len(seriesTitle):].strip()
|
|
||||||
# logger.debug('\tChapter: "%s"' % chapter)
|
|
||||||
if chapter == '':
|
|
||||||
chapter_title = 'Chapter %d' % (self.num_chapters() + 1)
|
|
||||||
# Sometimes the first chapter does not have type of chapter
|
|
||||||
if self.num_chapters() == 0:
|
|
||||||
logger.debug('\tChapter: first chapter without chapter type')
|
|
||||||
chapter_name_type = None
|
|
||||||
else:
|
|
||||||
separater_char = chapter[0]
|
|
||||||
# logger.debug('\tseparater_char: "%s"' % separater_char)
|
|
||||||
chapter = chapter[1:].strip() if separater_char in [":", "-"] else chapter
|
|
||||||
# logger.debug('\tChapter: "%s"' % chapter)
|
|
||||||
if chapter.lower().startswith('ch.'):
|
|
||||||
chapter = chapter[len('ch.'):].strip()
|
|
||||||
try:
|
|
||||||
chapter_title = 'Chapter %d' % int(chapter)
|
|
||||||
except:
|
|
||||||
chapter_title = 'Chapter %s' % chapter
|
|
||||||
chapter_name_type = 'Chapter' if chapter_name_type is None else chapter_name_type
|
|
||||||
logger.debug('\tChapter: chapter_name_type="%s"' % chapter_name_type)
|
|
||||||
elif chapter.lower().startswith('pt.'):
|
|
||||||
chapter = chapter[len('pt.'):]
|
|
||||||
try:
|
|
||||||
chapter_title = 'Part %d' % int(chapter)
|
|
||||||
except:
|
|
||||||
chapter_title = 'Part %s' % chapter
|
|
||||||
chapter_name_type = 'Part' if chapter_name_type is None else chapter_name_type
|
|
||||||
logger.debug('\tChapter: chapter_name_type="%s"' % chapter_name_type)
|
|
||||||
elif separater_char in [":", "-"]:
|
|
||||||
chapter_title = chapter
|
|
||||||
logger.debug('\tChapter: taking chapter text as whole')
|
|
||||||
|
|
||||||
# pages include full URLs.
|
for chapteratag in soup.select('a.br_rj'):
|
||||||
chapurl = chapterLink['href']
|
chapter_title = stripHTML(chapteratag)
|
||||||
if chapurl.startswith('//'):
|
# logger.debug('\tChapter: "%s"' % chapteratag)
|
||||||
chapurl = self.parsedUrl.scheme + ':' + chapurl
|
# /series/se does include full URLs current.
|
||||||
|
chapurl = chapteratag['href']
|
||||||
# logger.debug("Chapter URL: " + chapurl)
|
# logger.debug("Chapter URL: " + chapurl)
|
||||||
# logger.debug("Chapter Title: " + chapter_title)
|
self.add_chapter(chapter_title, chapurl)
|
||||||
# logger.debug("Chapter description: " + description)
|
|
||||||
chapters.append((chapter_title, chapurl, description, pub_date))
|
|
||||||
# self.add_chapter(chapter_title, chapurl)
|
|
||||||
numrating = stripHTML(chapterLink.parent)
|
|
||||||
## title (0.00)
|
|
||||||
numrating = numrating[numrating.rfind('(')+1:numrating.rfind(')')]
|
|
||||||
try:
|
|
||||||
ratings.append(float(numrating))
|
|
||||||
except:
|
|
||||||
pass
|
|
||||||
|
|
||||||
if self.getConfig("clean_chapter_titles") \
|
# <img src="https://uploads.literotica.com/series/cover/813-1695143444-desktop-x1.jpg" alt="Series cover">
|
||||||
and chapter_name_type is not None \
|
coverimg = soup.select_one('img[alt="Series cover"]')
|
||||||
and not chapters[0][0].startswith(chapter_name_type):
|
if coverimg:
|
||||||
logger.debug('\tChapter: chapter_name_type="%s"' % chapter_name_type)
|
self.setCoverImage(self.url,coverimg['src'])
|
||||||
logger.debug('\tChapter: first chapter="%s"' % chapters[0][0])
|
|
||||||
logger.debug('\tChapter: first chapter number="%s"' % chapters[0][0][len('Chapter'):])
|
|
||||||
chapters[0] = ("%s %s" % (chapter_name_type, chapters[0][0][len('Chapter'):].strip()),
|
|
||||||
chapters[0][1],
|
|
||||||
chapters[0][2],
|
|
||||||
chapters[0][3]
|
|
||||||
)
|
|
||||||
|
|
||||||
chapters = sorted(chapters, key=lambda chapter: chapter[3])
|
#### Attempting averrating from JS metadata.
|
||||||
for i, chapter in enumerate(chapters):
|
#### also alternate chapters from json
|
||||||
self.add_chapter(chapter[0], chapter[1])
|
try:
|
||||||
descriptions.append("%d. %s" % (i + 1, chapter[2]))
|
state_start="state='"
|
||||||
## Set the oldest date as publication date, the newest as update date
|
state_end="'</script>"
|
||||||
dates.sort()
|
i = data.index(state_start)
|
||||||
self.story.setMetadata('datePublished', dates[0])
|
if i:
|
||||||
self.story.setMetadata('dateUpdated', dates[-1])
|
state = data[i+len(state_start):data.index(state_end,i)].replace("\\'","'").replace("\\\\","\\")
|
||||||
self.story.setMetadata('datePublished', chapters[0][3])
|
if state:
|
||||||
self.story.setMetadata('dateUpdated', chapters[-1][3])
|
# logger.debug(state)
|
||||||
## Set description to joint chapter descriptions
|
json_state = json.loads(state)
|
||||||
self.setDescription(authorurl,"<p>"+"</p>\n<p>".join(descriptions)+"</p>")
|
# logger.debug(json.dumps(json_state, sort_keys=True,indent=2, separators=(',', ':')))
|
||||||
|
all_rates = []
|
||||||
|
if 'series' in json_state:
|
||||||
|
all_rates = [ float(x['rate_all']) for x in json_state['series']['works'] ]
|
||||||
|
|
||||||
if len(ratings) > 0:
|
## Extract dates from chapter approval dates if dates_from_chapters is enabled
|
||||||
self.story.setMetadata('averrating','%4.2f' % (sum(ratings) / float(len(ratings))))
|
if self.getConfig("dates_from_chapters"):
|
||||||
|
date_approvals = []
|
||||||
|
for work in json_state['series']['works']:
|
||||||
|
if 'date_approve' in work:
|
||||||
|
try:
|
||||||
|
date_approvals.append(makeDate(work['date_approve'], self.dateformat))
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
if date_approvals:
|
||||||
|
# Oldest date is published, newest is updated
|
||||||
|
date_approvals.sort()
|
||||||
|
self.story.setMetadata('datePublished', date_approvals[0])
|
||||||
|
self.story.setMetadata('dateUpdated', date_approvals[-1])
|
||||||
|
if all_rates:
|
||||||
|
self.story.setMetadata('averrating', '%4.2f' % (sum(all_rates) / float(len(all_rates))))
|
||||||
|
|
||||||
# normalize on first chapter URL.
|
## alternate chapters from JSON
|
||||||
self._setURL(self.get_chapter(0,'url'))
|
if self.num_chapters() < 1:
|
||||||
|
logger.debug("Getting Chapters from series JSON")
|
||||||
|
seriesid = json_state.get('series',{}).get('data',{}).get('id',None)
|
||||||
|
if seriesid:
|
||||||
|
logger.info("Fetching chapter data from JSON")
|
||||||
|
logger.debug(seriesid)
|
||||||
|
series_json = json.loads(self.get_request('https://literotica.com/api/3/series/%s/works'%seriesid))
|
||||||
|
# logger.debug(json.dumps(series_json, sort_keys=True,indent=2, separators=(',', ':')))
|
||||||
|
for chap in series_json:
|
||||||
|
self.add_chapter(chap['title'], 'https://www.literotica.com/s/'+chap['url'])
|
||||||
|
|
||||||
# reset storyId to first chapter.
|
## Collect tags from series/story page if tags_from_chapters is enabled
|
||||||
self.story.setMetadata('storyId',self.parsedUrl.path.split('/',)[2])
|
if self.getConfig("tags_from_chapters"):
|
||||||
|
self.story.extendList('eroticatags', [ unicode(t['tag']).title() for t in chap['tags'] ])
|
||||||
|
|
||||||
|
|
||||||
# Add the category from the breadcumb. This might duplicate a category already added.
|
except Exception as e:
|
||||||
self.story.addToList('category', soup1.find('div', id='BreadCrumbComponent').findAll('a')[1].string)
|
logger.warning("Processing JSON failed. (%s)"%e)
|
||||||
self.getCategories(soup1)
|
|
||||||
|
|
||||||
|
## Features removed because not supportable by new site form:
|
||||||
|
## averrating metadata entry
|
||||||
|
## order_chapters_by_date option
|
||||||
|
## use_meta_keywords option
|
||||||
return
|
return
|
||||||
|
|
||||||
|
|
||||||
def getPageText(self, raw_page, url):
|
def getPageText(self, raw_page, url):
|
||||||
# logger.debug('Getting page text')
|
logger.debug('Getting page text')
|
||||||
# logger.debug(soup)
|
|
||||||
raw_page = raw_page.replace('<div class="b-story-body-x x-r15"><div><p>','<div class="b-story-body-x x-r15"><div>')
|
raw_page = raw_page.replace('<div class="b-story-body-x x-r15"><div><p>','<div class="b-story-body-x x-r15"><div>')
|
||||||
# logger.debug("\tChapter text: %s" % raw_page)
|
# logger.debug("\tChapter text: %s" % raw_page)
|
||||||
page_soup = self.make_soup(raw_page)
|
page_soup = self.make_soup(raw_page)
|
||||||
[comment.extract() for comment in page_soup.findAll(text=lambda text:isinstance(text, Comment))]
|
[comment.extract() for comment in page_soup.find_all(string=lambda text:isinstance(text, Comment))]
|
||||||
story2 = page_soup.find('div', 'aa_ht').div
|
fullhtml = ""
|
||||||
# logger.debug('getPageText - story2: %s' % story2)
|
for aa_ht_div in page_soup.find_all('div', 'aa_ht') + page_soup.select('div[class^="_article__content_"]'):
|
||||||
|
if aa_ht_div.div:
|
||||||
fullhtml = unicode(story2)
|
html = unicode(aa_ht_div.div)
|
||||||
# logger.debug(fullhtml)
|
# Strip some starting and ending tags,
|
||||||
# Strip some starting and ending tags,
|
html = re.sub(r'^<div.*?>', r'', html)
|
||||||
fullhtml = re.sub(r'^<div.*?>', r'', fullhtml)
|
html = re.sub(r'</div>$', r'', html)
|
||||||
fullhtml = re.sub(r'</div>$', r'', fullhtml)
|
html = re.sub(r'<p></p>$', r'', html)
|
||||||
fullhtml = re.sub(r'<p></p>$', r'', fullhtml)
|
fullhtml = fullhtml + html
|
||||||
# logger.debug('getPageText - fullhtml: %s' % fullhtml)
|
# logger.debug('getPageText - fullhtml: %s' % fullhtml)
|
||||||
return fullhtml
|
return fullhtml
|
||||||
|
|
||||||
def getChapterText(self, url):
|
def getChapterText(self, url):
|
||||||
|
|
@ -353,9 +432,15 @@ class LiteroticaSiteAdapter(BaseSiteAdapter):
|
||||||
raw_page = self.get_request(url)
|
raw_page = self.get_request(url)
|
||||||
page_soup = self.make_soup(raw_page)
|
page_soup = self.make_soup(raw_page)
|
||||||
pages = page_soup.find('div',class_='l_bH')
|
pages = page_soup.find('div',class_='l_bH')
|
||||||
|
if not pages:
|
||||||
|
pages = page_soup.select_one('div._pagination_h0sum_1')
|
||||||
|
if not pages:
|
||||||
|
pages = page_soup.select_one('div.clearfix.panel._pagination_1400x_1')
|
||||||
|
if not pages:
|
||||||
|
pages = page_soup.select_one('div[class^="panel clearfix _pagination_"]')
|
||||||
|
# logger.debug(pages)
|
||||||
|
|
||||||
fullhtml = ""
|
fullhtml = ""
|
||||||
self.getCategories(page_soup)
|
|
||||||
chapter_description = ''
|
chapter_description = ''
|
||||||
if self.getConfig("description_in_chapter"):
|
if self.getConfig("description_in_chapter"):
|
||||||
chapter_description = page_soup.find("meta", {"name" : "description"})['content']
|
chapter_description = page_soup.find("meta", {"name" : "description"})['content']
|
||||||
|
|
@ -366,7 +451,10 @@ class LiteroticaSiteAdapter(BaseSiteAdapter):
|
||||||
## look for highest numbered page, they're not all listed
|
## look for highest numbered page, they're not all listed
|
||||||
## when there are many.
|
## when there are many.
|
||||||
|
|
||||||
last_page_link = pages.find_all('a', class_='l_bJ')[-1]
|
last_page_links = pages.find_all('a', class_='l_bJ')
|
||||||
|
if not last_page_links:
|
||||||
|
last_page_links = pages.select('a[class^="_pagination__item_"]')
|
||||||
|
last_page_link = last_page_links[-1]
|
||||||
last_page_no = int(urlparse.parse_qs(last_page_link['href'].split('?')[1])['page'][0])
|
last_page_no = int(urlparse.parse_qs(last_page_link['href'].split('?')[1])['page'][0])
|
||||||
# logger.debug(last_page_no)
|
# logger.debug(last_page_no)
|
||||||
for page_no in range(2, last_page_no+1):
|
for page_no in range(2, last_page_no+1):
|
||||||
|
|
@ -375,7 +463,7 @@ class LiteroticaSiteAdapter(BaseSiteAdapter):
|
||||||
raw_page = self.get_request(page_url)
|
raw_page = self.get_request(page_url)
|
||||||
fullhtml += self.getPageText(raw_page, url)
|
fullhtml += self.getPageText(raw_page, url)
|
||||||
|
|
||||||
# logger.debug(fullhtml)
|
#logger.debug(fullhtml)
|
||||||
page_soup = self.make_soup(fullhtml)
|
page_soup = self.make_soup(fullhtml)
|
||||||
fullhtml = self.utf8FromSoup(url, self.make_soup(fullhtml))
|
fullhtml = self.utf8FromSoup(url, self.make_soup(fullhtml))
|
||||||
fullhtml = chapter_description + fullhtml
|
fullhtml = chapter_description + fullhtml
|
||||||
|
|
@ -383,6 +471,123 @@ class LiteroticaSiteAdapter(BaseSiteAdapter):
|
||||||
|
|
||||||
return fullhtml
|
return fullhtml
|
||||||
|
|
||||||
|
def get_urls_from_page(self,url,normalize):
|
||||||
|
from ..geturls import get_urls_from_html
|
||||||
|
|
||||||
|
## hook for logins, etc.
|
||||||
|
self.before_get_urls_from_page(url,normalize)
|
||||||
|
|
||||||
|
# this way it uses User-Agent or other special settings.
|
||||||
|
data = self.get_request(url,usecache=False)
|
||||||
|
soup = self.make_soup(data)
|
||||||
|
|
||||||
|
page_urls = get_urls_from_html(soup, url, configuration=self.configuration, normalize=normalize)
|
||||||
|
|
||||||
|
if not self.getConfig("fetch_stories_from_api",True):
|
||||||
|
logger.debug('fetch_stories_from_api Not enabled')
|
||||||
|
return {'urllist': page_urls}
|
||||||
|
|
||||||
|
user_story_list = re.search(r'literotica\.com/authors/.+?/lists\?listid=(?P<list_id>\d+)', url)
|
||||||
|
fav_authors = re.search(r'literotica\.com/authors/.+?/favorites', url)
|
||||||
|
written = re.search(r'literotica.com/authors/.+?/works/', url)
|
||||||
|
logger.debug((bool(user_story_list), bool(fav_authors), bool(written)))
|
||||||
|
|
||||||
|
# If the url is not supported
|
||||||
|
if not user_story_list and not fav_authors and not written:
|
||||||
|
logger.debug('No supported link. %s', url)
|
||||||
|
return {'urllist':page_urls}
|
||||||
|
|
||||||
|
# Grabbing the main list where chapters are contained.
|
||||||
|
if user_story_list:
|
||||||
|
js_story_list = re.search(r';\$R\[\d+?\]\(\$R\[\d+?\],\$R\[\d+?\]\);\$R\[\d+?\]\(\$R\[\d+?\],\$R\[\d+?\]=\{success:!\d,current_page:(?P<current_page>\d+?),last_page:(?P<last_page>\d+?),total:\d+?,per_page:\d+,(has_series:!\d)?data:\$R\[\d+?\]=\[\$R\[\d+?\]=(?P<data>.+)\}\]\}\);', data) # }] } } }); \$R\[\d+?\]\(\$R\[\d+?\],\$R\[\d+?\]\);\$R\[\d+?]\(\$R\[\d+?\],\$R\[\d+?\]=\{sliders:
|
||||||
|
logger.debug('user_story_list ID [%s]'%user_story_list.group('list_id'))
|
||||||
|
else:
|
||||||
|
js_story_list = re.search(r'\$R\[\d+?\]\(\$R\[\d+?\],\$R\[\d+?\]={current_page:(?P<current_page>\d+?),last_page:(?P<last_page>\d+?),total:\d+?,per_page:\d+,(has_series:!\d,)?data:\$R\[\d+\]=\[\$R\[\d+\]=\{(?!aim)(?P<data>.+)\}\);_\$HY\.r\[', data)
|
||||||
|
|
||||||
|
# In case the regex becomes outdated
|
||||||
|
if not js_story_list:
|
||||||
|
logger.debug('Failed to grab data from the js.')
|
||||||
|
return {'urllist':page_urls}
|
||||||
|
|
||||||
|
user = None
|
||||||
|
script_tags = soup.find_all('script')
|
||||||
|
for script in script_tags:
|
||||||
|
if not script.string:
|
||||||
|
continue
|
||||||
|
# Getting author from the js.
|
||||||
|
user = re.search(r'_\$HY\.r\[\"AuthorQuery\[\\\"(?P<author>.+?)\\\"\]\"\]', script.string)
|
||||||
|
if user != None:
|
||||||
|
logger.debug("User: [%s]"%user.group('author'))
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
logger.debug('Failed to get a username')
|
||||||
|
return {'urllist': page_urls}
|
||||||
|
|
||||||
|
# Extract the current (should be 1) and last page numbers from the js.
|
||||||
|
logger.debug("Pages %s/%s"%(js_story_list.group('current_page'), js_story_list.group('last_page')))
|
||||||
|
|
||||||
|
urls = []
|
||||||
|
# Necessary to format a proper link as there were no visible data specifying what kind of link that should be.
|
||||||
|
cat_to_link = {'adult-comics': 'i', 'erotic-art': 'i', 'illustrated-poetry': 'p', 'erotic-audio-poetry': 'p', 'erotic-poetry': 'p', 'non-erotic-poetry': 'p'}
|
||||||
|
stories_found = re.findall(r"category_info:\$R\[.*?type:\".+?\",pageUrl:\"(.+?)\"}.+?,type:\"(.+?)\",url:\"(.+?)\",", js_story_list.group('data'))
|
||||||
|
for story in stories_found:
|
||||||
|
story_category, story_type, story_url = story
|
||||||
|
urls.append('https://www.literotica.com/%s/%s'%(cat_to_link.get(story_category, 's'), story_url))
|
||||||
|
|
||||||
|
# Removes the duplicates
|
||||||
|
seen = set()
|
||||||
|
urls = [x for x in (page_urls + urls) if not (x in seen or seen.add(x))]
|
||||||
|
logger.debug("Found [%s] stories so far."%len(urls))
|
||||||
|
|
||||||
|
# Sometimes the rest of the stories are burried in the js so no fetching in necessery.
|
||||||
|
if js_story_list.group('last_page') == js_story_list.group('current_page'):
|
||||||
|
return {'urllist': urls}
|
||||||
|
|
||||||
|
user = urlparse.quote(user.group(1))
|
||||||
|
logger.debug("Escaped user: [%s]"%user)
|
||||||
|
|
||||||
|
if written:
|
||||||
|
category = re.search(r"_\$HY\.r\[\"AuthorSeriesAndWorksQuery\[\\\".+?\\\",\\\"\D+?\\\",\\\"(?P<type>\D+?)\\\"\]\"\]=\$R\[\d+?\]=\$R\[\d+?\]\(\$R\[\d+?\]=\{", data)
|
||||||
|
elif fav_authors:
|
||||||
|
category = re.search(r"_\$HY\.r\[\"AuthorFavoriteWorksQuery\[\\\".+?\\\",\\\"(?P<type>\D+?)\\\",\d\]\"\]=\$R\[\d+?\]=\$R\[\d+?\]\(\$R\[\d+?\]={", data)
|
||||||
|
|
||||||
|
if not user_story_list and not category:
|
||||||
|
logger.debug("Type of works not found")
|
||||||
|
return {'urllist': urls}
|
||||||
|
|
||||||
|
last_page = int(js_story_list.group('last_page'))
|
||||||
|
current_page = int(js_story_list.group('current_page')) + 1
|
||||||
|
# Fetching the remaining urls from api. Can't trust the number given about the pages left from a website. Sometimes even the api returns outdated number of pages.
|
||||||
|
while current_page <= last_page:
|
||||||
|
i = len(urls)
|
||||||
|
logger.debug("Pages %s/%s"%(current_page, int(last_page)))
|
||||||
|
if fav_authors:
|
||||||
|
jsn = self.get_request('https://literotica.com/api/3/users/{}/favorite/works?params=%7B%22page%22%3A{}%2C%22pageSize%22%3A50%2C%22type%22%3A%22{}%22%2C%22withSeriesDetails%22%3Atrue%7D'.format(user, current_page, category.group('type')))
|
||||||
|
elif user_story_list:
|
||||||
|
jsn = self.get_request('https://literotica.com/api/3/users/{}/list/{}?params=%7B%22page%22%3A{}%2C%22pageSize%22%3A50%2C%22withSeriesDetails%22%3Atrue%7D'.format(user, user_story_list.group('list_id'), current_page))
|
||||||
|
else:
|
||||||
|
jsn = self.get_request('https://literotica.com/api/3/users/{}/series_and_works?params=%7B%22page%22%3A{}%2C%22pageSize%22%3A50%2C%22sort%22%3A%22date%22%2C%22type%22%3A%22{}%22%2C%22listType%22%3A%22expanded%22%7D'.format(user, current_page, category.group('type')))
|
||||||
|
|
||||||
|
urls_data = json.loads(jsn)
|
||||||
|
last_page = urls_data["last_page"]
|
||||||
|
current_page = int(urls_data["current_page"]) + 1
|
||||||
|
for story in urls_data['data']:
|
||||||
|
#logger.debug('parts' in story)
|
||||||
|
if story['url'] and story.get('work_count') == None:
|
||||||
|
urls.append('https://www.literotica.com/%s/%s'%(cat_to_link.get(story["category_info"]["pageUrl"], 's'), str(story['url'])))
|
||||||
|
continue
|
||||||
|
# Most of the time series has no url specified and contains all of the story links belonging to the series
|
||||||
|
urls.append('https://www.literotica.com/series/se/%s'%str(story['id']))
|
||||||
|
for series_story in story['parts']:
|
||||||
|
urls.append('https://www.literotica.com/%s/%s'%(cat_to_link.get(series_story["category_info"]["pageUrl"], 's'), str(series_story['url'])))
|
||||||
|
logger.debug("Found [%s] stories."%(len(urls) - i))
|
||||||
|
|
||||||
|
# Again removing duplicates.
|
||||||
|
seen = set()
|
||||||
|
urls = [x for x in urls if not (x in seen or seen.add(x))]
|
||||||
|
|
||||||
|
logger.debug("Found total of [%s] stories"%len(urls))
|
||||||
|
return {'urllist':urls}
|
||||||
|
|
||||||
def getClass():
|
def getClass():
|
||||||
return LiteroticaSiteAdapter
|
return LiteroticaSiteAdapter
|
||||||
|
|
|
||||||
|
|
@ -1,50 +0,0 @@
|
||||||
# -*- coding: utf-8 -*-
|
|
||||||
|
|
||||||
# Copyright 2018 FanFicFare team
|
|
||||||
# Copyright 2018 FanFicFare team
|
|
||||||
#
|
|
||||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
# you may not use this file except in compliance with the License.
|
|
||||||
# You may obtain a copy of the License at
|
|
||||||
#
|
|
||||||
# http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
#
|
|
||||||
# Unless required by applicable law or agreed to in writing, software
|
|
||||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
# See the License for the specific language governing permissions and
|
|
||||||
# limitations under the License.
|
|
||||||
#
|
|
||||||
|
|
||||||
# Software: eFiction
|
|
||||||
##################################################################################
|
|
||||||
### Rewritten by: GComyn on November, 06, 2016
|
|
||||||
### Original was adapter_fannation.py
|
|
||||||
##################################################################################
|
|
||||||
from __future__ import absolute_import
|
|
||||||
from .base_efiction_adapter import BaseEfictionAdapter
|
|
||||||
|
|
||||||
class LooseLugsComAdapter(BaseEfictionAdapter):
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def getSiteDomain():
|
|
||||||
return 'www.looselugs.com'
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def getSiteAbbrev(self):
|
|
||||||
return 'looselugs'
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def getDateFormat(self):
|
|
||||||
# The date format will vary from site to site.
|
|
||||||
# http://docs.python.org/library/datetime.html#strftime-strptime-behavior
|
|
||||||
return "%B %d, %Y"
|
|
||||||
|
|
||||||
##################################################################################
|
|
||||||
### The Efiction Base Adapter uses the Bulk story to retrieve the metadata, but
|
|
||||||
### on this site, the Rating is not present in the Bulk page...
|
|
||||||
### so it is not retrieved.
|
|
||||||
##################################################################################
|
|
||||||
|
|
||||||
def getClass():
|
|
||||||
return LooseLugsComAdapter
|
|
||||||
|
|
@ -1,347 +0,0 @@
|
||||||
# -*- coding: utf-8 -*-
|
|
||||||
|
|
||||||
# Copyright 2011 Fanficdownloader team, 2018 FanFicFare team
|
|
||||||
#
|
|
||||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
# you may not use this file except in compliance with the License.
|
|
||||||
# You may obtain a copy of the License at
|
|
||||||
#
|
|
||||||
# http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
#
|
|
||||||
# Unless required by applicable law or agreed to in writing, software
|
|
||||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
# See the License for the specific language governing permissions and
|
|
||||||
# limitations under the License.
|
|
||||||
#
|
|
||||||
##############################################################################
|
|
||||||
### Adapted by GComyn
|
|
||||||
### Completed on November, 22, 2016
|
|
||||||
##############################################################################
|
|
||||||
from __future__ import absolute_import
|
|
||||||
import logging
|
|
||||||
logger = logging.getLogger(__name__)
|
|
||||||
import re
|
|
||||||
from ..htmlcleanup import stripHTML
|
|
||||||
from .. import exceptions as exceptions
|
|
||||||
|
|
||||||
# py2 vs py3 transition
|
|
||||||
from ..six import text_type as unicode
|
|
||||||
|
|
||||||
from .base_adapter import BaseSiteAdapter, makeDate
|
|
||||||
|
|
||||||
class LOTRgficComAdapter(BaseSiteAdapter):
|
|
||||||
|
|
||||||
def __init__(self, config, url):
|
|
||||||
BaseSiteAdapter.__init__(self, config, url)
|
|
||||||
|
|
||||||
self.story.setMetadata('siteabbrev','lotrgfic')
|
|
||||||
|
|
||||||
self.username = "NoneGiven" # if left empty, site doesn't return any message at all.
|
|
||||||
self.password = ""
|
|
||||||
self.is_adult=False
|
|
||||||
|
|
||||||
# get storyId from url--url validation guarantees query is only sid=1234
|
|
||||||
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
|
|
||||||
|
|
||||||
|
|
||||||
# normalized story URL.
|
|
||||||
self._setURL('https://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId'))
|
|
||||||
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def getSiteDomain():
|
|
||||||
return 'www.lotrgfic.com'
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def getSiteExampleURLs(cls):
|
|
||||||
return "https://"+cls.getSiteDomain()+"/viewstory.php?sid=1234"
|
|
||||||
|
|
||||||
def getSiteURLPattern(self):
|
|
||||||
return r"https?://"+re.escape(self.getSiteDomain()+"/viewstory.php?sid=")+r"\d+$"
|
|
||||||
|
|
||||||
def extractChapterUrlsAndMetadata(self):
|
|
||||||
|
|
||||||
if self.is_adult or self.getConfig("is_adult"):
|
|
||||||
addurl = "&warning=3"
|
|
||||||
else:
|
|
||||||
addurl=""
|
|
||||||
|
|
||||||
url = self.url+'&index=1'+addurl
|
|
||||||
logger.debug("URL: "+url)
|
|
||||||
|
|
||||||
data = self.get_request(url)
|
|
||||||
|
|
||||||
if "Content is only suitable for mature adults. May contain explicit language and adult themes. Equivalent of NC-17." in data:
|
|
||||||
raise exceptions.AdultCheckRequired(self.url)
|
|
||||||
elif "Access denied. This story has not been validated by the adminstrators of this site." in data:
|
|
||||||
raise exceptions.AccessDenied(self.getSiteDomain() +" says: Access denied. This story has not been validated by the adminstrators of this site.")
|
|
||||||
|
|
||||||
soup = self.make_soup(data)
|
|
||||||
|
|
||||||
### Main Content for the Table Of Contents page.
|
|
||||||
div = soup.find('div',{'id':'maincontent'})
|
|
||||||
|
|
||||||
divfooter = div.find('div',{'id':'footer'})
|
|
||||||
if divfooter != None:
|
|
||||||
divfooter.extract()
|
|
||||||
|
|
||||||
## Title
|
|
||||||
a = div.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$"))
|
|
||||||
self.story.setMetadata('title',stripHTML(a))
|
|
||||||
|
|
||||||
# Find authorid and URL from... author url.
|
|
||||||
a = div.find('a', href=re.compile(r"viewuser.php"))
|
|
||||||
self.story.setMetadata('authorId',a['href'].split('=')[1])
|
|
||||||
self.story.setMetadata('authorUrl','https://'+self.host+'/'+a['href'])
|
|
||||||
self.story.setMetadata('author',a.string)
|
|
||||||
|
|
||||||
# Find the chapters:
|
|
||||||
for chapter in div.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+r"&chapter=\d+$")):
|
|
||||||
# just in case there's tags, like <i> in chapter titles.
|
|
||||||
self.add_chapter(chapter,'https://'+self.host+'/'+chapter['href']+addurl)
|
|
||||||
|
|
||||||
|
|
||||||
### Metadata is contained
|
|
||||||
|
|
||||||
def defaultGetattr(d,k):
|
|
||||||
try:
|
|
||||||
return d[k]
|
|
||||||
except:
|
|
||||||
return ""
|
|
||||||
|
|
||||||
# <span class="label">Rated:</span> NC-17<br /> etc
|
|
||||||
### This site has the metadata formatted all over the place,
|
|
||||||
### so we have to do some very cludgy programming to get it.
|
|
||||||
### If someone can do it better, please do so, and let us know.
|
|
||||||
## I'm going to leave this section in, so we can get those
|
|
||||||
## elements that are "formatted correctly".
|
|
||||||
labels = soup.findAll('span',{'class':'label'})
|
|
||||||
for labelspan in labels:
|
|
||||||
value = labelspan.nextSibling
|
|
||||||
label = labelspan.string
|
|
||||||
|
|
||||||
if 'Summary' in label:
|
|
||||||
## the summary is not encased in a span label... so we can't do anything here.
|
|
||||||
## I'm going to leave it here just in case.
|
|
||||||
## Everything until the next span class='label'
|
|
||||||
svalue = ''
|
|
||||||
while value and 'label' not in defaultGetattr(value,'class'):
|
|
||||||
svalue += unicode(value)
|
|
||||||
value = value.nextSibling
|
|
||||||
# sometimes poorly formated desc (<p> w/o </p>) leads
|
|
||||||
# to all labels being included.
|
|
||||||
svalue=svalue[:svalue.find('<span class="label">')]
|
|
||||||
self.setDescription(url,svalue)
|
|
||||||
|
|
||||||
if 'Rated' in label:
|
|
||||||
self.story.setMetadata('rating', value)
|
|
||||||
|
|
||||||
if 'Word count' in label:
|
|
||||||
self.story.setMetadata('numWords', value)
|
|
||||||
|
|
||||||
if 'Categories' in label:
|
|
||||||
cats = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=categories'))
|
|
||||||
catstext = [cat.string for cat in cats]
|
|
||||||
for cat in catstext:
|
|
||||||
self.story.addToList('category',cat.string)
|
|
||||||
|
|
||||||
if 'Characters' in label:
|
|
||||||
chars = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=characters'))
|
|
||||||
charstext = [char.string for char in chars]
|
|
||||||
for char in charstext:
|
|
||||||
self.story.addToList('characters',char.string)
|
|
||||||
|
|
||||||
if 'Genre' in label:
|
|
||||||
genres = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=1'))
|
|
||||||
genrestext = [genre.string for genre in genres]
|
|
||||||
self.genre = ', '.join(genrestext)
|
|
||||||
for genre in genrestext:
|
|
||||||
self.story.addToList('genre',genre.string)
|
|
||||||
|
|
||||||
if 'Warnings' in label:
|
|
||||||
warnings = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=4'))
|
|
||||||
warningstext = [warning.string for warning in warnings]
|
|
||||||
self.warning = ', '.join(warningstext)
|
|
||||||
for warning in warningstext:
|
|
||||||
self.story.addToList('warnings',warning.string)
|
|
||||||
|
|
||||||
if 'Places' in label:
|
|
||||||
places = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=2'))
|
|
||||||
placestext = [place.string for place in places]
|
|
||||||
self.warning = ', '.join(placestext)
|
|
||||||
for place in placestext:
|
|
||||||
self.story.addToList('places',place.string)
|
|
||||||
|
|
||||||
if 'Times' in label:
|
|
||||||
times = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=3'))
|
|
||||||
timestext = [time.string for time in times]
|
|
||||||
self.warning = ', '.join(timestext)
|
|
||||||
for time in timestext:
|
|
||||||
self.story.addToList('times',time.string)
|
|
||||||
|
|
||||||
if 'Completed' in label:
|
|
||||||
if 'Yes' in value:
|
|
||||||
self.story.setMetadata('status', 'Completed')
|
|
||||||
else:
|
|
||||||
self.story.setMetadata('status', 'In-Progress')
|
|
||||||
|
|
||||||
if 'Published' in label:
|
|
||||||
self.story.setMetadata('datePublished', makeDate(value.strip(), "%d %b %Y"))
|
|
||||||
|
|
||||||
if 'Updated' in label:
|
|
||||||
# there's a stray [ at the end.
|
|
||||||
#value = value[0:-1]
|
|
||||||
self.story.setMetadata('dateUpdated', makeDate(value.strip(), "%d %b %Y"))
|
|
||||||
|
|
||||||
try:
|
|
||||||
# Find Series name from series URL.
|
|
||||||
a = soup.find('a', href=re.compile(r"viewseries.php\?seriesid=\d+"))
|
|
||||||
series_name = a.string
|
|
||||||
series_url = 'https://'+self.host+'/'+a['href']
|
|
||||||
|
|
||||||
seriessoup = self.make_soup(self.get_request(series_url))
|
|
||||||
storyas = seriessoup.findAll('a', href=re.compile(r'^viewstory.php\?sid=\d+$'))
|
|
||||||
i=1
|
|
||||||
for a in storyas:
|
|
||||||
if a['href'] == ('viewstory.php?sid='+self.story.getMetadata('storyId')):
|
|
||||||
self.setSeries(series_name, i)
|
|
||||||
self.story.setMetadata('seriesUrl',series_url)
|
|
||||||
break
|
|
||||||
i+=1
|
|
||||||
|
|
||||||
except:
|
|
||||||
# I find it hard to care if the series parsing fails
|
|
||||||
pass
|
|
||||||
|
|
||||||
## Now we are going to cludge together the rest of the metadata
|
|
||||||
metad = soup.findAll('p',{'class':'smaller'})
|
|
||||||
## Categories don't have a proper label, but do use links, so...
|
|
||||||
cats = soup.findAll('a',href=re.compile(r'browse.php\?type=categories'))
|
|
||||||
catstext = [cat.string for cat in cats]
|
|
||||||
for cat in catstext:
|
|
||||||
if cat != None:
|
|
||||||
self.story.addToList('category',cat.string)
|
|
||||||
|
|
||||||
## Characters don't have a proper label, but do use links, so...
|
|
||||||
chars = soup.findAll('a',href=re.compile(r'browse.php\?type=characters'))
|
|
||||||
charstext = [char.string for char in chars]
|
|
||||||
for char in charstext:
|
|
||||||
if char != None:
|
|
||||||
self.story.addToList('characters',char.string)
|
|
||||||
|
|
||||||
### Rating is not enclosed in a label, only in a p tag classed 'smaller' so...
|
|
||||||
ratng = metad[0].find('strong').get_text().replace('Rated','').strip()
|
|
||||||
self.story.setMetadata('rating', ratng)
|
|
||||||
|
|
||||||
## No we try to get the summary... it's not within it's own
|
|
||||||
## dedicated tag, so we have to split some hairs..
|
|
||||||
## This may not work every time... but I tested it with 6 stories...
|
|
||||||
mdata = metad[0]
|
|
||||||
while '<hr/>' not in unicode(mdata.nextSibling):
|
|
||||||
mdata = mdata.nextSibling
|
|
||||||
self.setDescription(url,mdata.previousSibling.previousSibling.get_text())
|
|
||||||
|
|
||||||
### the rest of the metadata are not in tags at all... so we have to be really cludgy.
|
|
||||||
## we don't need the rest of them, so we get rid of all but the last one
|
|
||||||
metad = metad[-1]
|
|
||||||
## we also don't need any of the links in here, so we'll get rid of them as well.
|
|
||||||
links = metad.findAll('a')
|
|
||||||
for link in links:
|
|
||||||
link.extract()
|
|
||||||
## and we've already done the labels, so let's remove them
|
|
||||||
labels = metad.findAll('span',{'class':'label'})
|
|
||||||
for label in labels:
|
|
||||||
label.extract()
|
|
||||||
## now we should only have text and <br>'s... somthing like this:
|
|
||||||
#<p class="smaller">Categories:
|
|
||||||
#<br/>
|
|
||||||
#Characters: , , ,
|
|
||||||
#<br/>
|
|
||||||
# , <br/> <br/> <br/> None<br/>
|
|
||||||
#Challenges: None
|
|
||||||
#<br/>
|
|
||||||
#Series: None
|
|
||||||
#<br/>
|
|
||||||
#Chapters: 1    |    Word count: 200    |    Read Count: 767
|
|
||||||
#<br/>
|
|
||||||
#Completed: Yes    |    Updated: 04/27/13    |    Published: 04/27/13
|
|
||||||
#<br/>
|
|
||||||
#</p>
|
|
||||||
## we'll have to remove the non-breaking spaces to get this to work.
|
|
||||||
metad = unicode(metad).replace(u"\xa0",'').replace('\n','')
|
|
||||||
for txt in metad.split('<br/>'):
|
|
||||||
if 'Challenges:' in txt:
|
|
||||||
txt = txt.replace('Challenges:','').strip()
|
|
||||||
self.story.setMetadata('challenges', txt)
|
|
||||||
elif 'Series:' in txt:
|
|
||||||
txt = txt.replace('Series:','').strip()
|
|
||||||
self.story.setMetadata('challenges', txt)
|
|
||||||
elif 'Chapters:' in txt:
|
|
||||||
for txt2 in txt.split('|'):
|
|
||||||
txt2 = txt2.replace('\n','').strip()
|
|
||||||
if 'Word count:' in txt2:
|
|
||||||
txt2 = txt2.replace('Word count:','').strip()
|
|
||||||
self.story.setMetadata('numWords', value)
|
|
||||||
elif 'Read Count:' in txt2:
|
|
||||||
txt2= txt2.replace('Read Count:','').strip()
|
|
||||||
self.story.setMetadata('readings', value)
|
|
||||||
elif 'Completed:' in txt:
|
|
||||||
for txt2 in txt.split('|'):
|
|
||||||
txt2 = txt2.strip()
|
|
||||||
if 'Completed:' in txt2:
|
|
||||||
if 'Yes' in txt2:
|
|
||||||
self.story.setMetadata('status', 'Completed')
|
|
||||||
else:
|
|
||||||
self.story.setMetadata('status', 'In-Progress')
|
|
||||||
elif 'Updated:' in txt2:
|
|
||||||
txt2= txt2.replace('Updated:','').strip()
|
|
||||||
self.story.setMetadata('dateUpdated', makeDate(txt2.strip(), "%b/%d/%y"))
|
|
||||||
elif 'Published:' in txt2:
|
|
||||||
txt2= txt2.replace('Published:','').strip()
|
|
||||||
self.story.setMetadata('datePublished', makeDate(txt2.strip(), "%b/%d/%y"))
|
|
||||||
|
|
||||||
|
|
||||||
def getChapterText(self, url):
|
|
||||||
|
|
||||||
logger.debug('Getting chapter text from: %s' % url)
|
|
||||||
|
|
||||||
data = self.get_request(url)
|
|
||||||
# problems with some stories, but only in calibre. I suspect
|
|
||||||
# issues with different SGML parsers in python. This is a
|
|
||||||
# nasty hack, but it works.
|
|
||||||
data = data[data.index("<body"):]
|
|
||||||
|
|
||||||
soup = self.make_soup(data)
|
|
||||||
|
|
||||||
span = soup.find('div', {'id' : 'maincontent'})
|
|
||||||
|
|
||||||
# Everything is encased in the maincontent section, so we have
|
|
||||||
# to remove as much as we can systematically
|
|
||||||
tables = span.findAll('table')
|
|
||||||
for table in tables:
|
|
||||||
table.extract()
|
|
||||||
|
|
||||||
headings = span.findAll('h3')
|
|
||||||
for heading in headings:
|
|
||||||
heading.extract()
|
|
||||||
|
|
||||||
links = span.findAll('a')
|
|
||||||
for link in links:
|
|
||||||
link.extract()
|
|
||||||
|
|
||||||
forms = span.findAll('form')
|
|
||||||
for form in forms:
|
|
||||||
form.extract()
|
|
||||||
|
|
||||||
divs = span.findAll('div')
|
|
||||||
for div in divs:
|
|
||||||
div.extract()
|
|
||||||
|
|
||||||
if None == span:
|
|
||||||
raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url)
|
|
||||||
|
|
||||||
return self.utf8FromSoup(url,span)
|
|
||||||
|
|
||||||
def getClass():
|
|
||||||
return LOTRgficComAdapter
|
|
||||||
|
|
@ -116,7 +116,7 @@ class LumosSycophantHexComAdapter(BaseSiteAdapter):
|
||||||
self.story.setMetadata('rating', rating)
|
self.story.setMetadata('rating', rating)
|
||||||
|
|
||||||
# Find the chapters:
|
# Find the chapters:
|
||||||
for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+r"&chapter=\d+$")):
|
for chapter in soup.find_all('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+r"&chapter=\d+$")):
|
||||||
# just in case there's tags, like <i> in chapter titles.
|
# just in case there's tags, like <i> in chapter titles.
|
||||||
self.add_chapter(chapter,'http://'+self.host+'/'+chapter['href']+addurl)
|
self.add_chapter(chapter,'http://'+self.host+'/'+chapter['href']+addurl)
|
||||||
|
|
||||||
|
|
@ -134,7 +134,7 @@ class LumosSycophantHexComAdapter(BaseSiteAdapter):
|
||||||
|
|
||||||
# <span class="label">Rated:</span> NC-17<br /> etc
|
# <span class="label">Rated:</span> NC-17<br /> etc
|
||||||
|
|
||||||
labels = soup.findAll('span',{'class':'label'})
|
labels = soup.find_all('span',{'class':'label'})
|
||||||
|
|
||||||
value = labels[0].previousSibling
|
value = labels[0].previousSibling
|
||||||
svalue = ""
|
svalue = ""
|
||||||
|
|
@ -154,22 +154,22 @@ class LumosSycophantHexComAdapter(BaseSiteAdapter):
|
||||||
self.story.setMetadata('numWords', value.split(' -')[0])
|
self.story.setMetadata('numWords', value.split(' -')[0])
|
||||||
|
|
||||||
if 'Categories' in label:
|
if 'Categories' in label:
|
||||||
cats = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=categories'))
|
cats = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=categories'))
|
||||||
for cat in cats:
|
for cat in cats:
|
||||||
self.story.addToList('category',cat.string)
|
self.story.addToList('category',cat.string)
|
||||||
|
|
||||||
if 'Characters' in label:
|
if 'Characters' in label:
|
||||||
chars = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=characters'))
|
chars = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=characters'))
|
||||||
for char in chars:
|
for char in chars:
|
||||||
self.story.addToList('characters',char.string)
|
self.story.addToList('characters',char.string)
|
||||||
|
|
||||||
if 'Genre' in label:
|
if 'Genre' in label:
|
||||||
genres = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=1'))
|
genres = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=class&type_id=1'))
|
||||||
for genre in genres:
|
for genre in genres:
|
||||||
self.story.addToList('genre',genre.string)
|
self.story.addToList('genre',genre.string)
|
||||||
|
|
||||||
if 'Warnings' in label:
|
if 'Warnings' in label:
|
||||||
warnings = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=2'))
|
warnings = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=class&type_id=2'))
|
||||||
for warning in warnings:
|
for warning in warnings:
|
||||||
self.story.addToList('warnings',warning.string)
|
self.story.addToList('warnings',warning.string)
|
||||||
|
|
||||||
|
|
@ -194,7 +194,7 @@ class LumosSycophantHexComAdapter(BaseSiteAdapter):
|
||||||
series_url = 'http://'+self.host+'/'+a['href']
|
series_url = 'http://'+self.host+'/'+a['href']
|
||||||
|
|
||||||
seriessoup = self.make_soup(self.get_request(series_url))
|
seriessoup = self.make_soup(self.get_request(series_url))
|
||||||
storyas = seriessoup.findAll('a', href=re.compile(r'^viewstory.php\?sid=\d+$'))
|
storyas = seriessoup.find_all('a', href=re.compile(r'^viewstory.php\?sid=\d+$'))
|
||||||
i=1
|
i=1
|
||||||
for a in storyas:
|
for a in storyas:
|
||||||
if a['href'] == ('viewstory.php?sid='+self.story.getMetadata('storyId')):
|
if a['href'] == ('viewstory.php?sid='+self.story.getMetadata('storyId')):
|
||||||
|
|
|
||||||
|
|
@ -162,7 +162,7 @@ class MassEffect2InAdapter(BaseSiteAdapter):
|
||||||
self.story.extendList('authorId', [authorId])
|
self.story.extendList('authorId', [authorId])
|
||||||
self.story.extendList('authorUrl', [authorUrl])
|
self.story.extendList('authorUrl', [authorUrl])
|
||||||
|
|
||||||
if not self.story.getMetadata('rating'):
|
if not self.story.getMetadataRaw('rating'):
|
||||||
ratingTitle = chapter.getRatingTitle()
|
ratingTitle = chapter.getRatingTitle()
|
||||||
if ratingTitle:
|
if ratingTitle:
|
||||||
self.story.setMetadata('rating', ratingTitle)
|
self.story.setMetadata('rating', ratingTitle)
|
||||||
|
|
@ -204,7 +204,6 @@ class MassEffect2InAdapter(BaseSiteAdapter):
|
||||||
self.story.setMetadata('datePublished', datePublished)
|
self.story.setMetadata('datePublished', datePublished)
|
||||||
self.story.setMetadata('dateUpdated', dateUpdated)
|
self.story.setMetadata('dateUpdated', dateUpdated)
|
||||||
self.story.setMetadata('numWords', unicode(wordCount))
|
self.story.setMetadata('numWords', unicode(wordCount))
|
||||||
self.story.setMetadata('numChapters', len(chapters))
|
|
||||||
|
|
||||||
# Site-specific metadata.
|
# Site-specific metadata.
|
||||||
self.story.setMetadata('language', self.SITE_LANGUAGE)
|
self.story.setMetadata('language', self.SITE_LANGUAGE)
|
||||||
|
|
@ -678,7 +677,7 @@ class Chapter(object):
|
||||||
|
|
||||||
def _excludeEditorSignature(self, root):
|
def _excludeEditorSignature(self, root):
|
||||||
"""Exclude editor signature from within `root' element."""
|
"""Exclude editor signature from within `root' element."""
|
||||||
for textNode in root.findAll(text=True):
|
for stringNode in root.find_all(string=True):
|
||||||
if re.match(self.SIGNED_PATTERN, textNode.string):
|
if re.match(self.SIGNED_PATTERN, textNode.string):
|
||||||
editorLink = textNode.findNext('a')
|
editorLink = textNode.findNext('a')
|
||||||
if editorLink:
|
if editorLink:
|
||||||
|
|
|
||||||
|
|
@ -64,7 +64,9 @@ class MCStoriesComSiteAdapter(BaseSiteAdapter):
|
||||||
return "https://mcstories.com/StoryTitle/ https://mcstories.com/StoryTitle/index.html https://mcstories.com/StoryTitle/StoryTitle1.html"
|
return "https://mcstories.com/StoryTitle/ https://mcstories.com/StoryTitle/index.html https://mcstories.com/StoryTitle/StoryTitle1.html"
|
||||||
|
|
||||||
def getSiteURLPattern(self):
|
def getSiteURLPattern(self):
|
||||||
return r"https?://(www\.)?mcstories\.com/([a-zA-Z0-9_-]+)/"
|
## Note that this uses a regular expression *negative*
|
||||||
|
## lookahead--story URLs *can't* have /Titles/ /Authors/ etc.
|
||||||
|
return r"https?://(www\.)?mcstories\.com(?!/(Titles|Authors|Tags|ReadersPicks)/)/[a-zA-Z0-9_-]+/"
|
||||||
|
|
||||||
def extractChapterUrlsAndMetadata(self):
|
def extractChapterUrlsAndMetadata(self):
|
||||||
"""
|
"""
|
||||||
|
|
@ -83,7 +85,7 @@ class MCStoriesComSiteAdapter(BaseSiteAdapter):
|
||||||
data1 = self.get_request(self.url)
|
data1 = self.get_request(self.url)
|
||||||
soup1 = self.make_soup(data1)
|
soup1 = self.make_soup(data1)
|
||||||
#strip comments from soup
|
#strip comments from soup
|
||||||
[comment.extract() for comment in soup1.find_all(text=lambda text:isinstance(text, Comment))]
|
[comment.extract() for comment in soup1.find_all(string=lambda text:isinstance(text, Comment))]
|
||||||
|
|
||||||
if 'Page Not Found.' in data1:
|
if 'Page Not Found.' in data1:
|
||||||
raise exceptions.StoryDoesNotExist(self.url)
|
raise exceptions.StoryDoesNotExist(self.url)
|
||||||
|
|
@ -93,12 +95,13 @@ class MCStoriesComSiteAdapter(BaseSiteAdapter):
|
||||||
self.story.setMetadata('title', title.text)
|
self.story.setMetadata('title', title.text)
|
||||||
|
|
||||||
# Author
|
# Author
|
||||||
author = soup1.find('h3', class_='byline').a
|
# byline = soup1.find('h3', class_='byline')
|
||||||
authorurl = urlparse.urljoin(self.url, author['href'])
|
for author in soup1.select('h3.byline a'):
|
||||||
self.story.setMetadata('author', author.text)
|
authorurl = urlparse.urljoin(self.url, author['href'])
|
||||||
self.story.setMetadata('authorUrl', authorurl)
|
self.story.addToList('author', author.text)
|
||||||
authorid = os.path.splitext(os.path.basename(authorurl))[0]
|
self.story.addToList('authorUrl', authorurl)
|
||||||
self.story.setMetadata('authorId', authorid)
|
authorid = os.path.splitext(os.path.basename(authorurl))[0]
|
||||||
|
self.story.addToList('authorId', authorid)
|
||||||
|
|
||||||
# Description
|
# Description
|
||||||
synopsis = soup1.find('section', class_='synopsis')
|
synopsis = soup1.find('section', class_='synopsis')
|
||||||
|
|
@ -160,7 +163,7 @@ class MCStoriesComSiteAdapter(BaseSiteAdapter):
|
||||||
soup1 = self.make_soup(data1)
|
soup1 = self.make_soup(data1)
|
||||||
|
|
||||||
#strip comments from soup
|
#strip comments from soup
|
||||||
[comment.extract() for comment in soup1.find_all(text=lambda text:isinstance(text, Comment))]
|
[comment.extract() for comment in soup1.find_all(string=lambda text:isinstance(text, Comment))]
|
||||||
|
|
||||||
# get story text
|
# get story text
|
||||||
story1 = soup1.find('article', id='mcstories')
|
story1 = soup1.find('article', id='mcstories')
|
||||||
|
|
|
||||||
|
|
@ -68,7 +68,7 @@ class MediaMinerOrgSiteAdapter(BaseSiteAdapter):
|
||||||
|
|
||||||
# The date format will vary from site to site.
|
# The date format will vary from site to site.
|
||||||
# http://docs.python.org/library/datetime.html#strftime-strptime-behavior
|
# http://docs.python.org/library/datetime.html#strftime-strptime-behavior
|
||||||
self.dateformat = "%B %d, %Y %H:%M"
|
self.dateformat = "%m.%d.%Y"
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def getSiteDomain():
|
def getSiteDomain():
|
||||||
|
|
@ -89,12 +89,16 @@ class MediaMinerOrgSiteAdapter(BaseSiteAdapter):
|
||||||
## even newer urls
|
## even newer urls
|
||||||
## https://www.mediaminer.org/fanfic/s/gundam-wing-fan-fiction/the-preventer-operatives/171000
|
## https://www.mediaminer.org/fanfic/s/gundam-wing-fan-fiction/the-preventer-operatives/171000
|
||||||
## https://www.mediaminer.org/fanfic/c/gundam-wing-fan-fiction/the-preventer-operatives/171000/608822
|
## https://www.mediaminer.org/fanfic/c/gundam-wing-fan-fiction/the-preventer-operatives/171000/608822
|
||||||
|
## email urls:
|
||||||
|
## https://www.mediaminer.org/fanfic/view_ch.php/161297/626395?utm_source=add_chapter&utm_medium=email
|
||||||
|
## author page urls:
|
||||||
|
## https://www.mediaminer.org/fanfic/view_st.php?id=145608&submit=View
|
||||||
return r"https?://"+re.escape(self.getSiteDomain())+r"/fanfic/"+\
|
return r"https?://"+re.escape(self.getSiteDomain())+r"/fanfic/"+\
|
||||||
r"((s/(?P<cattitle4>[^/]+)/(?P<urltitle4>[^/]+)/(?P<id4>\d+))|"+\
|
r"((s/(?P<cattitle4>[^/]+)/(?P<urltitle4>[^/]+)/(?P<id4>\d+))|"+\
|
||||||
r"((c/(?P<cattitle5>[^/]+)/(?P<urltitle5>[^/]+)/(?P<id5>\d+))/\d+)|"+\
|
r"((c/(?P<cattitle5>[^/]+)/(?P<urltitle5>[^/]+)/(?P<id5>\d+))/\d+)|"+\
|
||||||
r"(s/(?P<urltitle1>[^/]+)/(?P<id1>\d+))|"+\
|
r"(s/(?P<urltitle1>[^/]+)/(?P<id1>\d+))|"+\
|
||||||
r"((c/(?P<urltitle2>[^/]+)/[^/]+/(?P<id2>\d+))/\d+)|"+\
|
r"((c/(?P<urltitle2>[^/]+)/[^/]+/(?P<id2>\d+))/\d+)|"+\
|
||||||
r"(view_st\.php/(?P<id3>\d+)))"
|
r"(view_(st|ch)\.php(/|\?id=)(?P<id3>\d+)))"
|
||||||
|
|
||||||
# Override stripURLParameters so the id parameter won't get stripped
|
# Override stripURLParameters so the id parameter won't get stripped
|
||||||
@classmethod
|
@classmethod
|
||||||
|
|
@ -142,21 +146,24 @@ class MediaMinerOrgSiteAdapter(BaseSiteAdapter):
|
||||||
for (atag,aurl,name) in [ (x,x['href'],stripHTML(x)) for x in chap_p.find_all('a') ]:
|
for (atag,aurl,name) in [ (x,x['href'],stripHTML(x)) for x in chap_p.find_all('a') ]:
|
||||||
self.add_chapter(name,'https://'+self.host+aurl)
|
self.add_chapter(name,'https://'+self.host+aurl)
|
||||||
|
|
||||||
|
|
||||||
# category
|
# category
|
||||||
# <a href="/fanfic/src.php/a/567">Ranma 1/2</a>
|
# <a href="/fanfic/src.php/a/567">Ranma 1/2</a>
|
||||||
for a in soup.findAll('a',href=re.compile(r"^/fanfic/a/")):
|
for a in soup.find_all('a',href=re.compile(r"^/fanfic/a/")):
|
||||||
self.story.addToList('category',a.string)
|
self.story.addToList('category',a.string)
|
||||||
|
|
||||||
# genre
|
# genre
|
||||||
# <a href="/fanfic/src.php/g/567">Ranma 1/2</a>
|
# <a href="/fanfic/src.php/g/567">Ranma 1/2</a>
|
||||||
for a in soup.findAll('a',href=re.compile(r"^/fanfic/src.php/g/")):
|
for a in soup.find_all('a',href=re.compile(r"^/fanfic/src.php/g/")):
|
||||||
self.story.addToList('genre',a.string)
|
self.story.addToList('genre',a.string)
|
||||||
|
|
||||||
metastr = stripHTML(soup.find("div",{"class":"post-meta"}))
|
metasoup = soup.find("div",{"class":"post-meta"})
|
||||||
|
metastr = stripHTML(metasoup)
|
||||||
|
metahtml = unicode(metasoup)
|
||||||
|
|
||||||
|
self.setDescription(url, metahtml[metahtml.index('</a><br/>')+9:metahtml.index('<br/><b>')])
|
||||||
|
|
||||||
# Latest Revision: February 07, 2015 15:21 PST
|
# Latest Revision: February 07, 2015 15:21 PST
|
||||||
m = re.match(r".*?(?:Latest Revision|Uploaded On): ([a-zA-Z]+ \d\d, \d\d\d\d \d\d:\d\d)",metastr)
|
m = re.match(r".*?(?:Latest Revision|Uploaded On): ?(\d\d\.\d\d\.\d\d\d\d) ?",metastr)
|
||||||
if m:
|
if m:
|
||||||
self.story.setMetadata('dateUpdated', makeDate(m.group(1), self.dateformat))
|
self.story.setMetadata('dateUpdated', makeDate(m.group(1), self.dateformat))
|
||||||
# site doesn't give date published on index page.
|
# site doesn't give date published on index page.
|
||||||
|
|
@ -164,19 +171,20 @@ class MediaMinerOrgSiteAdapter(BaseSiteAdapter):
|
||||||
# self.story.setMetadata('datePublished',
|
# self.story.setMetadata('datePublished',
|
||||||
# self.story.getMetadataRaw('dateUpdated'))
|
# self.story.getMetadataRaw('dateUpdated'))
|
||||||
|
|
||||||
# Words: 123456
|
# Words:123 or 23.1K or 1.0M
|
||||||
m = re.match(r".*?\| Words: (\d+) \|",metastr)
|
m = re.match(r".*?\| ?Words: ?([\.\d]+)(K|M|) ?\|",metastr)
|
||||||
if m:
|
if m:
|
||||||
self.story.setMetadata('numWords', m.group(1))
|
if not m.group(2):
|
||||||
|
word_factor = 1
|
||||||
# Summary: ....
|
elif m.group(2) == 'K':
|
||||||
m = re.match(r".*?Summary: (.*)$",metastr)
|
word_factor = 1000
|
||||||
if m:
|
elif m.group(2) == 'M':
|
||||||
self.setDescription(url, m.group(1))
|
word_factor = 1000000
|
||||||
#self.story.setMetadata('description', m.group(1))
|
num_words = int(float(m.group(1))*word_factor)
|
||||||
|
self.story.setMetadata('numWords', num_words)
|
||||||
|
|
||||||
# completed
|
# completed
|
||||||
m = re.match(r".*?Status: Completed.*?",metastr)
|
m = re.match(r".*?Status: ?Completed.*?",metastr)
|
||||||
if m:
|
if m:
|
||||||
self.story.setMetadata('status','Completed')
|
self.story.setMetadata('status','Completed')
|
||||||
else:
|
else:
|
||||||
|
|
@ -194,7 +202,7 @@ class MediaMinerOrgSiteAdapter(BaseSiteAdapter):
|
||||||
# print("data:%s"%data)
|
# print("data:%s"%data)
|
||||||
headerstr = stripHTML(soup.find('div',{'class':'post-meta'}))
|
headerstr = stripHTML(soup.find('div',{'class':'post-meta'}))
|
||||||
|
|
||||||
m = re.match(r".*?Uploaded On: ([a-zA-Z]+ \d\d, \d\d\d\d \d\d:\d\d)",headerstr)
|
m = re.match(r".*?Uploaded On: ?(\d\d\.\d\d\.\d\d\d\d)",headerstr)
|
||||||
if m:
|
if m:
|
||||||
date = makeDate(m.group(1), self.dateformat)
|
date = makeDate(m.group(1), self.dateformat)
|
||||||
if not self.story.getMetadataRaw('datePublished') or date < self.story.getMetadataRaw('datePublished'):
|
if not self.story.getMetadataRaw('datePublished') or date < self.story.getMetadataRaw('datePublished'):
|
||||||
|
|
|
||||||
|
|
@ -1,272 +0,0 @@
|
||||||
# -*- coding: utf-8 -*-
|
|
||||||
|
|
||||||
# Copyright 2012 Fanficdownloader team, 2018 FanFicFare team
|
|
||||||
#
|
|
||||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
# you may not use this file except in compliance with the License.
|
|
||||||
# You may obtain a copy of the License at
|
|
||||||
#
|
|
||||||
# http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
#
|
|
||||||
# Unless required by applicable law or agreed to in writing, software
|
|
||||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
# See the License for the specific language governing permissions and
|
|
||||||
# limitations under the License.
|
|
||||||
#
|
|
||||||
|
|
||||||
# Software: eFiction
|
|
||||||
from __future__ import absolute_import
|
|
||||||
import logging
|
|
||||||
logger = logging.getLogger(__name__)
|
|
||||||
import re
|
|
||||||
from ..htmlcleanup import stripHTML
|
|
||||||
from .. import exceptions as exceptions
|
|
||||||
|
|
||||||
# py2 vs py3 transition
|
|
||||||
from ..six import text_type as unicode
|
|
||||||
|
|
||||||
from .base_adapter import BaseSiteAdapter, makeDate
|
|
||||||
|
|
||||||
def getClass():
|
|
||||||
return MerlinFicDtwinsCoUk
|
|
||||||
|
|
||||||
# Class name has to be unique. Our convention is camel case the
|
|
||||||
# sitename with Adapter at the end. www is skipped.
|
|
||||||
class MerlinFicDtwinsCoUk(BaseSiteAdapter):
|
|
||||||
|
|
||||||
def __init__(self, config, url):
|
|
||||||
BaseSiteAdapter.__init__(self, config, url)
|
|
||||||
|
|
||||||
self.username = "NoneGiven" # if left empty, site doesn't return any message at all.
|
|
||||||
self.password = ""
|
|
||||||
self.is_adult=False
|
|
||||||
|
|
||||||
# get storyId from url--url validation guarantees query is only sid=1234
|
|
||||||
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
|
|
||||||
|
|
||||||
|
|
||||||
# normalized story URL.
|
|
||||||
self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId'))
|
|
||||||
|
|
||||||
# Each adapter needs to have a unique site abbreviation.
|
|
||||||
self.story.setMetadata('siteabbrev','mrfd')
|
|
||||||
|
|
||||||
# The date format will vary from site to site.
|
|
||||||
# http://docs.python.org/library/datetime.html#strftime-strptime-behavior
|
|
||||||
self.dateformat = "%b %d, %Y"
|
|
||||||
|
|
||||||
@staticmethod # must be @staticmethod, don't remove it.
|
|
||||||
def getSiteDomain():
|
|
||||||
# The site domain. Does have www here, if it uses it.
|
|
||||||
return 'merlinfic.dtwins.co.uk'
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def getSiteExampleURLs(cls):
|
|
||||||
return "http://"+cls.getSiteDomain()+"/viewstory.php?sid=1234"
|
|
||||||
|
|
||||||
def getSiteURLPattern(self):
|
|
||||||
return re.escape("http://"+self.getSiteDomain()+"/viewstory.php?sid=")+r"\d+$"
|
|
||||||
|
|
||||||
## Login seems to be reasonably standard across eFiction sites.
|
|
||||||
def needToLoginCheck(self, data):
|
|
||||||
if 'Registered Users Only' in data \
|
|
||||||
or 'There is no such account on our website' in data \
|
|
||||||
or "That password doesn't match the one in our database" in data:
|
|
||||||
return True
|
|
||||||
else:
|
|
||||||
return False
|
|
||||||
|
|
||||||
def performLogin(self, url):
|
|
||||||
params = {}
|
|
||||||
|
|
||||||
if self.password:
|
|
||||||
params['penname'] = self.username
|
|
||||||
params['password'] = self.password
|
|
||||||
else:
|
|
||||||
params['penname'] = self.getConfig("username")
|
|
||||||
params['password'] = self.getConfig("password")
|
|
||||||
params['cookiecheck'] = '1'
|
|
||||||
params['submit'] = 'Submit'
|
|
||||||
|
|
||||||
loginUrl = 'http://' + self.getSiteDomain() + '/user.php?action=login'
|
|
||||||
logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
|
|
||||||
params['penname']))
|
|
||||||
|
|
||||||
d = self.post_request(loginUrl, params)
|
|
||||||
|
|
||||||
if "Member Account" not in d : #Member Account
|
|
||||||
logger.info("Failed to login to URL %s as %s" % (loginUrl,
|
|
||||||
params['penname']))
|
|
||||||
raise exceptions.FailedToLogin(url,params['penname'])
|
|
||||||
return False
|
|
||||||
else:
|
|
||||||
return True
|
|
||||||
|
|
||||||
## Getting the chapter list and the meta data, plus 'is adult' checking.
|
|
||||||
def extractChapterUrlsAndMetadata(self):
|
|
||||||
|
|
||||||
if self.is_adult or self.getConfig("is_adult"):
|
|
||||||
# Weirdly, different sites use different warning numbers.
|
|
||||||
# If the title search below fails, there's a good chance
|
|
||||||
# you need a different number. print data at that point
|
|
||||||
# and see what the 'click here to continue' url says.
|
|
||||||
addurl = "&ageconsent=ok&warning=4"
|
|
||||||
else:
|
|
||||||
addurl=""
|
|
||||||
|
|
||||||
# index=1 makes sure we see the story chapter index. Some
|
|
||||||
# sites skip that for one-chapter stories.
|
|
||||||
url = self.url+'&index=1'+addurl
|
|
||||||
logger.debug("URL: "+url)
|
|
||||||
|
|
||||||
data = self.get_request(url)
|
|
||||||
|
|
||||||
if self.needToLoginCheck(data):
|
|
||||||
# need to log in for this one.
|
|
||||||
self.performLogin(url)
|
|
||||||
data = self.get_request(url)
|
|
||||||
|
|
||||||
m = re.search(r"'viewstory.php\?sid=\d+((?:&ageconsent=ok)?&warning=\d+)'",data)
|
|
||||||
if m != None:
|
|
||||||
if self.is_adult or self.getConfig("is_adult"):
|
|
||||||
# We tried the default and still got a warning, so
|
|
||||||
# let's pull the warning number from the 'continue'
|
|
||||||
# link and reload data.
|
|
||||||
addurl = m.group(1)
|
|
||||||
# correct stupid & error in url.
|
|
||||||
addurl = addurl.replace("&","&")
|
|
||||||
url = self.url+'&index=1'+addurl
|
|
||||||
logger.debug("URL 2nd try: "+url)
|
|
||||||
|
|
||||||
data = self.get_request(url)
|
|
||||||
else:
|
|
||||||
raise exceptions.AdultCheckRequired(self.url)
|
|
||||||
|
|
||||||
if "Access denied. This story has not been validated by the adminstrators of this site." in data:
|
|
||||||
raise exceptions.AccessDenied(self.getSiteDomain() +" says: Access denied. This story has not been validated by the adminstrators of this site.")
|
|
||||||
|
|
||||||
soup = self.make_soup(data)
|
|
||||||
# print data
|
|
||||||
|
|
||||||
|
|
||||||
## Title
|
|
||||||
a = soup.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$"))
|
|
||||||
self.story.setMetadata('title',stripHTML(a))
|
|
||||||
|
|
||||||
# Find authorid and URL from... author url.
|
|
||||||
a = soup.find('a', href=re.compile(r"viewuser.php\?uid=\d+"))
|
|
||||||
self.story.setMetadata('authorId',a['href'].split('=')[1])
|
|
||||||
self.story.setMetadata('authorUrl','http://'+self.host+'/'+a['href'])
|
|
||||||
self.story.setMetadata('author',a.string)
|
|
||||||
|
|
||||||
# Find the chapters:
|
|
||||||
for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+r"&chapter=\d+$")):
|
|
||||||
# just in case there's tags, like <i> in chapter titles.
|
|
||||||
self.add_chapter(chapter,'http://'+self.host+'/'+chapter['href']+addurl)
|
|
||||||
|
|
||||||
|
|
||||||
# eFiction sites don't help us out a lot with their meta data
|
|
||||||
# formating, so it's a little ugly.
|
|
||||||
|
|
||||||
# utility method
|
|
||||||
def defaultGetattr(d,k):
|
|
||||||
try:
|
|
||||||
return d[k]
|
|
||||||
except:
|
|
||||||
return ""
|
|
||||||
|
|
||||||
# <span class="label">Rated:</span> NC-17<br /> etc
|
|
||||||
labels = soup.findAll('span',{'class':'label'})
|
|
||||||
for labelspan in labels:
|
|
||||||
value = labelspan.nextSibling
|
|
||||||
label = labelspan.string
|
|
||||||
|
|
||||||
if 'Summary' in label:
|
|
||||||
## Everything until the next span class='label'
|
|
||||||
svalue = ""
|
|
||||||
while 'label' not in defaultGetattr(value,'class'):
|
|
||||||
svalue += unicode(value)
|
|
||||||
value = value.nextSibling
|
|
||||||
self.setDescription(url,svalue)
|
|
||||||
#self.story.setMetadata('description',stripHTML(svalue))
|
|
||||||
|
|
||||||
if 'Rated' in label:
|
|
||||||
self.story.setMetadata('rating', value)
|
|
||||||
|
|
||||||
if 'Word count' in label:
|
|
||||||
self.story.setMetadata('numWords', value)
|
|
||||||
|
|
||||||
if 'Categories' in label:
|
|
||||||
cats = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=categories'))
|
|
||||||
for cat in cats:
|
|
||||||
self.story.addToList('category',cat.string)
|
|
||||||
|
|
||||||
if 'Characters' in label:
|
|
||||||
chars = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=characters'))
|
|
||||||
for char in chars:
|
|
||||||
self.story.addToList('characters',char.string)
|
|
||||||
|
|
||||||
if 'Pairing' in label:
|
|
||||||
ships = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=1'))
|
|
||||||
for ship in ships:
|
|
||||||
self.story.addToList('ships',ship.string)
|
|
||||||
|
|
||||||
if 'Genre' in label:
|
|
||||||
genres = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=3'))
|
|
||||||
for genre in genres:
|
|
||||||
self.story.addToList('genre',genre.string)
|
|
||||||
|
|
||||||
if 'Warnings' in label:
|
|
||||||
warnings = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=2'))
|
|
||||||
for warning in warnings:
|
|
||||||
self.story.addToList('warnings',warning.string)
|
|
||||||
|
|
||||||
if 'Completed' in label:
|
|
||||||
if 'Yes' in value:
|
|
||||||
self.story.setMetadata('status', 'Completed')
|
|
||||||
else:
|
|
||||||
self.story.setMetadata('status', 'In-Progress')
|
|
||||||
|
|
||||||
if 'Published' in label:
|
|
||||||
self.story.setMetadata('datePublished', makeDate(stripHTML(value), self.dateformat))
|
|
||||||
|
|
||||||
if 'Updated' in label:
|
|
||||||
self.story.setMetadata('dateUpdated', makeDate(stripHTML(value), self.dateformat))
|
|
||||||
|
|
||||||
try:
|
|
||||||
# Find Series name from series URL.
|
|
||||||
a = soup.find('a', href=re.compile(r"viewseries.php\?seriesid=\d+"))
|
|
||||||
series_name = a.string
|
|
||||||
series_url = 'http://'+self.host+'/'+a['href']
|
|
||||||
|
|
||||||
seriessoup = self.make_soup(self.get_request(series_url))
|
|
||||||
# can't use ^viewstory...$ in case of higher rated stories with javascript href.
|
|
||||||
storyas = seriessoup.findAll('a', href=re.compile(r'viewstory.php\?sid=\d+'))
|
|
||||||
i=1
|
|
||||||
for a in storyas:
|
|
||||||
# skip 'report this' and 'TOC' links
|
|
||||||
if 'contact.php' not in a['href'] and 'index' not in a['href']:
|
|
||||||
if a['href'] == ('viewstory.php?sid='+self.story.getMetadata('storyId')):
|
|
||||||
self.setSeries(series_name, i)
|
|
||||||
self.story.setMetadata('seriesUrl',series_url)
|
|
||||||
break
|
|
||||||
i+=1
|
|
||||||
|
|
||||||
except:
|
|
||||||
# I find it hard to care if the series parsing fails
|
|
||||||
pass
|
|
||||||
|
|
||||||
# grab the text for an individual chapter.
|
|
||||||
def getChapterText(self, url):
|
|
||||||
|
|
||||||
logger.debug('Getting chapter text from: %s' % url)
|
|
||||||
|
|
||||||
soup = self.make_soup(self.get_request(url))
|
|
||||||
|
|
||||||
div = soup.find('div', {'id' : 'story'})
|
|
||||||
|
|
||||||
if None == div:
|
|
||||||
raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url)
|
|
||||||
|
|
||||||
return self.utf8FromSoup(url,div)
|
|
||||||
Some files were not shown because too many files have changed in this diff Show more
Loading…
Reference in a new issue