mirror of
https://github.com/JimmXinu/FanFicFare.git
synced 2026-05-09 05:21:13 +02:00
Compare commits
820 commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
a172a7bd2b | ||
|
|
ab103dce6e | ||
|
|
892e9207f0 | ||
|
|
b4e392fae1 | ||
|
|
d9525d9726 | ||
|
|
cb77b12754 | ||
|
|
b41a633821 | ||
|
|
50c8db2992 | ||
|
|
ef6dd99bfe | ||
|
|
59796ff537 | ||
|
|
8ee0a6e898 | ||
|
|
c53fc362bd | ||
|
|
c87cfc1057 | ||
|
|
6ee151c90a | ||
|
|
db01c828a0 | ||
|
|
4d03874f06 | ||
|
|
36f56483e6 | ||
|
|
18e45a403b | ||
|
|
2e25172ba3 | ||
|
|
65e3fd562b | ||
|
|
7089bf6689 | ||
|
|
061dc1333f | ||
|
|
0a7fb5c090 | ||
|
|
cf02f729ae | ||
|
|
730c4f77f9 | ||
|
|
c02da29cbd | ||
|
|
b87d796221 | ||
|
|
436370fe5b | ||
|
|
ac77f31bc2 | ||
|
|
16f2c74e4b | ||
|
|
af5c2aa0bc | ||
|
|
31dec5b62d | ||
|
|
97d37fcfc1 | ||
|
|
c730aa2f68 | ||
|
|
4e2e359dee | ||
|
|
bb96049934 | ||
|
|
84965ef25f | ||
|
|
348d129a1e | ||
|
|
4794e9bc51 | ||
|
|
d46dc76ae1 | ||
|
|
08bae8d9be | ||
|
|
405c37aeb5 | ||
|
|
270e01c3c7 | ||
|
|
12d57f5950 | ||
|
|
562b3a4ecd | ||
|
|
e69045fd98 | ||
|
|
747bde3394 | ||
|
|
aa00c7ae03 | ||
|
|
0539f818f3 | ||
|
|
41a6f56f44 | ||
|
|
e3832245e6 | ||
|
|
909b64c83c | ||
|
|
732f5e2571 | ||
|
|
d9dd04396e | ||
|
|
36e2183d45 | ||
|
|
040b7205b8 | ||
|
|
d8ed180eb1 | ||
|
|
2a6c1e74db | ||
|
|
b7c8c96153 | ||
|
|
a16096592c | ||
|
|
bb34eecc7c | ||
|
|
ceed7ef1a8 | ||
|
|
1d2a887c2d | ||
|
|
a3f3302312 | ||
|
|
ecf005b145 | ||
|
|
3bd074fa2c | ||
|
|
0fd95daa8e | ||
|
|
1b57e49d98 | ||
|
|
db0d39c9cd | ||
|
|
cbde66cf41 | ||
|
|
17331e9eb3 | ||
|
|
9b96c151a5 | ||
|
|
1b65a30798 | ||
|
|
c9a47877f7 | ||
|
|
bdc77ad0f6 | ||
|
|
719971c76c | ||
|
|
c74dba472a | ||
|
|
c1fb7f0fc5 | ||
|
|
94c932cd2f | ||
|
|
27fb765c0d | ||
|
|
06ce46f64a | ||
|
|
c04d85fa97 | ||
|
|
b6cdc30db5 | ||
|
|
9bbb5e8b01 | ||
|
|
18ce6e6fba | ||
|
|
507910f5da | ||
|
|
ccf7801a89 | ||
|
|
9a52a10626 | ||
|
|
6963153aac | ||
|
|
ee357cd5b4 | ||
|
|
b84e3d2858 | ||
|
|
9377fc6671 | ||
|
|
aaa0fa613a | ||
|
|
eac5acfbfa | ||
|
|
8dca1ef343 | ||
|
|
28e8f61cf8 | ||
|
|
78abf476ea | ||
|
|
2b1f9446dd | ||
|
|
9815736b4e | ||
|
|
3f54cce9a1 | ||
|
|
223138b8e5 | ||
|
|
4aa47c8bab | ||
|
|
a97a85f357 | ||
|
|
ffc3696d84 | ||
|
|
86c4e1974b | ||
|
|
b6fd7c2ca4 | ||
|
|
326300b40e | ||
|
|
282bafe514 | ||
|
|
061a8feccf | ||
|
|
26c9b6d2ce | ||
|
|
ed02d61953 | ||
|
|
b58d54b8ea | ||
|
|
1bc3ffc269 | ||
|
|
cbd295f911 | ||
|
|
35653f533f | ||
|
|
ea7afea8c2 | ||
|
|
384a2fe8b7 | ||
|
|
b278cac620 | ||
|
|
e23de49fb5 | ||
|
|
f64f041546 | ||
|
|
1d53c506c9 | ||
|
|
c8d6ce8004 | ||
|
|
3f08417c04 | ||
|
|
79ebf6a02b | ||
|
|
41dfb8eab8 | ||
|
|
590b663170 | ||
|
|
9bb408c8b3 | ||
|
|
5d6a63a8ca | ||
|
|
4078ccfdb1 | ||
|
|
79c29121c3 | ||
|
|
dea48d9e07 | ||
|
|
c165196a35 | ||
|
|
c385013db9 | ||
|
|
8780aa3105 | ||
|
|
12c7bfe29c | ||
|
|
08d0b8a4e0 | ||
|
|
1d401f8dba | ||
|
|
193bb3ed61 | ||
|
|
63fd8cd660 | ||
|
|
26a1152390 | ||
|
|
e0907147f7 | ||
|
|
99bba3ff12 | ||
|
|
3fdb6630fb | ||
|
|
0d6b789c9f | ||
|
|
edaa03ef42 | ||
|
|
4e17a10792 | ||
|
|
9fd48e0168 | ||
|
|
818e990184 | ||
|
|
9bb7b54023 | ||
|
|
af6695e27f | ||
|
|
46293f2d02 | ||
|
|
7f968ba102 | ||
|
|
1e5cb9b184 | ||
|
|
9627e6e62c | ||
|
|
5e644098f9 | ||
|
|
fa3a56d096 | ||
|
|
ba18216ef8 | ||
|
|
f207e31b3b | ||
|
|
0e1ace18e4 | ||
|
|
b17a632640 | ||
|
|
485d4631f9 | ||
|
|
30929bc38e | ||
|
|
ae4311f4dd | ||
|
|
3a3c35ea1f | ||
|
|
19dd89fb4d | ||
|
|
b247a7465b | ||
|
|
d5c20db681 | ||
|
|
a599ff6ad2 | ||
|
|
e21c6604a1 | ||
|
|
273c1931f4 | ||
|
|
fdf29eeade | ||
|
|
06e55728d0 | ||
|
|
0a3ab4bc9d | ||
|
|
a4a91b373f | ||
|
|
a68e771026 | ||
|
|
d7c79fcb3b | ||
|
|
5cc05ed96d | ||
|
|
e5b5768f11 | ||
|
|
6cf2519ef9 | ||
|
|
f4f98e0877 | ||
|
|
bb8fb9efa5 | ||
|
|
be38778d72 | ||
|
|
55d8efbdcd | ||
|
|
9df7822e32 | ||
|
|
69e6a3d2cf | ||
|
|
8ea03be5f3 | ||
|
|
75a213beb9 | ||
|
|
ead830c60a | ||
|
|
20681315e7 | ||
|
|
e2961eaadf | ||
|
|
7f0d7f70be | ||
|
|
c5264c2147 | ||
|
|
ff402c16ca | ||
|
|
4a9da1c02e | ||
|
|
c14f1014b8 | ||
|
|
74bc398994 | ||
|
|
6e8e74fc55 | ||
|
|
68ad4c87aa | ||
|
|
fe82aed91d | ||
|
|
7d14bf6e90 | ||
|
|
39500a9386 | ||
|
|
d5f8891e4f | ||
|
|
edce6949ae | ||
|
|
bec6fac2ea | ||
|
|
a9bd19a079 | ||
|
|
7135ba5892 | ||
|
|
9ba4c100ca | ||
|
|
fe565149ba | ||
|
|
624f60a5c1 | ||
|
|
5c79ac0b5c | ||
|
|
615711f904 | ||
|
|
2f77bd9e97 | ||
|
|
abdc881812 | ||
|
|
1ba73bf316 | ||
|
|
a359c6b326 | ||
|
|
ff64356e85 | ||
|
|
0271b14f6c | ||
|
|
bf845e200f | ||
|
|
e94ff6e1e8 | ||
|
|
07313d2744 | ||
|
|
bd2026df7e | ||
|
|
0fa177ff79 | ||
|
|
d84c72a215 | ||
|
|
c319857da0 | ||
|
|
df586e9bb7 | ||
|
|
354a5708ce | ||
|
|
096face5d2 | ||
|
|
02e3bddd5c | ||
|
|
9dadef1905 | ||
|
|
2e8a899d8c | ||
|
|
623915f623 | ||
|
|
57865ca53d | ||
|
|
e9c4b9ef30 | ||
|
|
0ad088b663 | ||
|
|
e37a7f72be | ||
|
|
9befe122dd | ||
|
|
e6d6227ff1 | ||
|
|
d854a6efe7 | ||
|
|
a97af94f8a | ||
|
|
e2ea97e99a | ||
|
|
215f6dd8ff | ||
|
|
687aa9c3ba | ||
|
|
523cf78640 | ||
|
|
90e50964b6 | ||
|
|
a83823ea13 | ||
|
|
727aa6f1bc | ||
|
|
072d929298 | ||
|
|
992c5a1378 | ||
|
|
f8937c1af3 | ||
|
|
af5c78e2e9 | ||
|
|
4a26dfdfff | ||
|
|
a82ef5dbae | ||
|
|
6adc995fa5 | ||
|
|
f534efd3df | ||
|
|
f41e64141a | ||
|
|
94036e3fbb | ||
|
|
9142609c61 | ||
|
|
f9d7b893ee | ||
|
|
4e2ae7441d | ||
|
|
87dbef980f | ||
|
|
921f8c287b | ||
|
|
637c6e3cc3 | ||
|
|
ba90ff9f3a | ||
|
|
34e84b2942 | ||
|
|
31eb7f421a | ||
|
|
85d4656005 | ||
|
|
006b8873a5 | ||
|
|
3246036f88 | ||
|
|
6d114532e2 | ||
|
|
2edb1d58d5 | ||
|
|
8dc3c5d3d8 | ||
|
|
2ec8c97e28 | ||
|
|
c51161c3d1 | ||
|
|
bd645a97c7 | ||
|
|
f7cbfa56bb | ||
|
|
07fd16813f | ||
|
|
2fe971c79f | ||
|
|
e4082c6235 | ||
|
|
960d5ba11a | ||
|
|
066539793d | ||
|
|
5b312494fb | ||
|
|
e628b10247 | ||
|
|
61c063ed72 | ||
|
|
11d3f601c9 | ||
|
|
3b8d0f63d4 | ||
|
|
b8b30c6a78 | ||
|
|
b007f68a88 | ||
|
|
6d8a67ef2e | ||
|
|
ab66e9e285 | ||
|
|
b3f7add5a1 | ||
|
|
800be43d24 | ||
|
|
70f77e17e2 | ||
|
|
caf46ba421 | ||
|
|
686ed80230 | ||
|
|
56689a10c4 | ||
|
|
065d077752 | ||
|
|
c8f817e830 | ||
|
|
1432241319 | ||
|
|
0e9f60f8a6 | ||
|
|
74de62385f | ||
|
|
d2f69eb5d5 | ||
|
|
c3655d59ca | ||
|
|
aca07bbf59 | ||
|
|
3edd3c3e7b | ||
|
|
61ba096c6e | ||
|
|
47fd71c4b9 | ||
|
|
e1d0bed52d | ||
|
|
acb88cbefc | ||
|
|
f1e7cabf6a | ||
|
|
21ec27ffd4 | ||
|
|
5567e6417d | ||
|
|
af352a480c | ||
|
|
92069dc638 | ||
|
|
76e9421858 | ||
|
|
70558bf444 | ||
|
|
b60dfdcc28 | ||
|
|
b976439669 | ||
|
|
6de50509ed | ||
|
|
4d9c38d3c2 | ||
|
|
90ecb63be4 | ||
|
|
bd49f8e8fa | ||
|
|
21c0315e60 | ||
|
|
fc97fa6d5c | ||
|
|
2c3bf3c642 | ||
|
|
a9c725d32a | ||
|
|
f936c5b0fb | ||
|
|
53344afa49 | ||
|
|
d5addfa2fd | ||
|
|
6d8375a9f3 | ||
|
|
7bc03ac798 | ||
|
|
05d62a5343 | ||
|
|
31115f9245 | ||
|
|
26ee692208 | ||
|
|
dd43d25f76 | ||
|
|
fffd15d7ea | ||
|
|
7c2700c8ea | ||
|
|
94518c4f25 | ||
|
|
531b965b22 | ||
|
|
658b637716 | ||
|
|
44f5feacfb | ||
|
|
52451a3eba | ||
|
|
7123f7dd6f | ||
|
|
08a0f9b5fc | ||
|
|
74ac96a67e | ||
|
|
9eed0340e9 | ||
|
|
73b90c0291 | ||
|
|
c33a6e6b05 | ||
|
|
d77cc15586 | ||
|
|
21483f7227 | ||
|
|
6c0df42fe7 | ||
|
|
c3a90a8914 | ||
|
|
e7f66d293a | ||
|
|
e49b3a6be0 | ||
|
|
ae72efdc00 | ||
|
|
bc935e213a | ||
|
|
a8e0eabbd8 | ||
|
|
81b84a8133 | ||
|
|
a973b8c926 | ||
|
|
08ccc659ca | ||
|
|
fb610de27a | ||
|
|
29d2e3734b | ||
|
|
48cf17c7b7 | ||
|
|
ac61c2bb68 | ||
|
|
a12d2a688b | ||
|
|
52027eac46 | ||
|
|
a1d4fba728 | ||
|
|
69872b922c | ||
|
|
7bd1a1acfc | ||
|
|
80e5a22f0d | ||
|
|
3cd4188bd8 | ||
|
|
21d16dbe90 | ||
|
|
5ce7875851 | ||
|
|
35be14a168 | ||
|
|
930940c7fd | ||
|
|
f001f19a47 | ||
|
|
fd7382fb56 | ||
|
|
c69e940d2a | ||
|
|
31dcd8e6ff | ||
|
|
0bd85c10a8 | ||
|
|
b075c22261 | ||
|
|
87b3e04fa1 | ||
|
|
630f09e644 | ||
|
|
a0463fc85b | ||
|
|
de7d8079d9 | ||
|
|
4aad0ec913 | ||
|
|
c379b45cb9 | ||
|
|
82825d1b16 | ||
|
|
11b2d5643e | ||
|
|
06dc2add8f | ||
|
|
ab7198bb8f | ||
|
|
d854733ffa | ||
|
|
a2cc6bcdd3 | ||
|
|
c9accda3f8 | ||
|
|
8e55d1e6f4 | ||
|
|
9b8eb547fc | ||
|
|
62b3c9264e | ||
|
|
370be379f0 | ||
|
|
1addfe14fc | ||
|
|
e510fb027e | ||
|
|
86b807805f | ||
|
|
0ace02ee75 | ||
|
|
38ad74af68 | ||
|
|
6c70a60cdb | ||
|
|
80ee0ca9b9 | ||
|
|
8b143a0c1b | ||
|
|
9fb86da341 | ||
|
|
5c703122ec | ||
|
|
75f89beab1 | ||
|
|
fc9d184f20 | ||
|
|
6c411e054a | ||
|
|
dbef4719d9 | ||
|
|
da6b4c25f2 | ||
|
|
23004e3953 | ||
|
|
4a15c2a7d5 | ||
|
|
84dad2ec43 | ||
|
|
5ac38fc327 | ||
|
|
35e0ada643 | ||
|
|
a9533364ec | ||
|
|
4a03186ce6 | ||
|
|
a0271e2957 | ||
|
|
11491c6383 | ||
|
|
24dccc73f0 | ||
|
|
8e3a88776a | ||
|
|
28141ce9d1 | ||
|
|
ffaa3bf82a | ||
|
|
d0d05d6c3b | ||
|
|
6d74a58181 | ||
|
|
de85fd42f7 | ||
|
|
c4aebd40df | ||
|
|
81cb631491 | ||
|
|
35aa5d2143 | ||
|
|
a8b1489233 | ||
|
|
ffb179c9a1 | ||
|
|
6d8d7ab66f | ||
|
|
a128083ce8 | ||
|
|
9f78ec0177 | ||
|
|
d941810825 | ||
|
|
ba1975342c | ||
|
|
27cfac45e4 | ||
|
|
64a4eb2bb2 | ||
|
|
371f995fda | ||
|
|
816bbdfd66 | ||
|
|
cdd6df8a57 | ||
|
|
5d4489bb28 | ||
|
|
a9944cd255 | ||
|
|
c284b2a6c6 | ||
|
|
15dde72f14 | ||
|
|
ff0f22565c | ||
|
|
33813b4047 | ||
|
|
ae3accca27 | ||
|
|
d998467f7a | ||
|
|
29fddbce8e | ||
|
|
a4e1db32e0 | ||
|
|
81aea65555 | ||
|
|
9005f9db4c | ||
|
|
7de040d8db | ||
|
|
9c53cf236e | ||
|
|
2e6ac07020 | ||
|
|
3febac62a8 | ||
|
|
c4ea6ca5fd | ||
|
|
75f9fb2d38 | ||
|
|
e4f83c52ca | ||
|
|
eb54731ae9 | ||
|
|
eb24bcb2ac | ||
|
|
ffa533e5fd | ||
|
|
bd76066905 | ||
|
|
eb17af9252 | ||
|
|
4471b1f980 | ||
|
|
9cfd88c098 | ||
|
|
c1cf8995ea | ||
|
|
55995be7de | ||
|
|
869686f363 | ||
|
|
f45a05ddb6 | ||
|
|
434ff0de74 | ||
|
|
d0ece28197 | ||
|
|
cd1db0a462 | ||
|
|
075c5cb7c2 | ||
|
|
b8740ca1c7 | ||
|
|
3db3e28595 | ||
|
|
b610d49f6b | ||
|
|
35afca430a | ||
|
|
1499037e19 | ||
|
|
1aaa4102a5 | ||
|
|
049c9af0e4 | ||
|
|
482b6b67eb | ||
|
|
cdb752df6a | ||
|
|
0412355001 | ||
|
|
0dc049aedb | ||
|
|
832387dea0 | ||
|
|
94bd4bf236 | ||
|
|
493e76df30 | ||
|
|
44b6e752f6 | ||
|
|
5d6f2c91c1 | ||
|
|
04ae49f944 | ||
|
|
020606fea1 | ||
|
|
711698620e | ||
|
|
968687bb82 | ||
|
|
07ab6d137b | ||
|
|
d51ac5d6f5 | ||
|
|
478d2e8f17 | ||
|
|
67a1dcee90 | ||
|
|
af834b1e40 | ||
|
|
ae535e2518 | ||
|
|
96d36ae71a | ||
|
|
480b7239e5 | ||
|
|
2666164c5b | ||
|
|
6ef8d1b215 | ||
|
|
654619e7e2 | ||
|
|
4ea869a764 | ||
|
|
837df18cb0 | ||
|
|
248f1c022b | ||
|
|
4fabf9e65c | ||
|
|
b7c318f520 | ||
|
|
89a15e1b16 | ||
|
|
5b41097abc | ||
|
|
a672b6dbdf | ||
|
|
e4d5d43efa | ||
|
|
cc572857e0 | ||
|
|
2f52ae31c0 | ||
|
|
3ddf801925 | ||
|
|
182695b0af | ||
|
|
656e67cc57 | ||
|
|
34215ce0ee | ||
|
|
c706aed271 | ||
|
|
e5f8e5bba4 | ||
|
|
11d8fae876 | ||
|
|
4a14e5fc86 | ||
|
|
7548ce6ae0 | ||
|
|
e113bbfb1c | ||
|
|
d1ccdfd21f | ||
|
|
68e8f49e9f | ||
|
|
49a0328268 | ||
|
|
25ea3fcaad | ||
|
|
a5378ca419 | ||
|
|
e0b733b60d | ||
|
|
33b2b10bf3 | ||
|
|
c468c26208 | ||
|
|
9d29f888b3 | ||
|
|
d1e8a77489 | ||
|
|
ef66e73fa4 | ||
|
|
7f128587c0 | ||
|
|
53a7a60dbc | ||
|
|
71a61ff166 | ||
|
|
9c051e6c3b | ||
|
|
f0d89498dc | ||
|
|
abb370a852 | ||
|
|
4b9054d1b4 | ||
|
|
2d0db171a8 | ||
|
|
7f67465767 | ||
|
|
6801d5e01d | ||
|
|
b01914c24e | ||
|
|
dd41f99288 | ||
|
|
37db56e6b3 | ||
|
|
f0a08f7647 | ||
|
|
2593f742c9 | ||
|
|
6ac299c198 | ||
|
|
3eda289349 | ||
|
|
95a7bdd3a9 | ||
|
|
84257e7388 | ||
|
|
465bffd896 | ||
|
|
eabfd1bef3 | ||
|
|
8d6676617c | ||
|
|
c47b620f67 | ||
|
|
df94cc439e | ||
|
|
08032778bd | ||
|
|
52deec3fd8 | ||
|
|
5b443d4363 | ||
|
|
4170cfd9a6 | ||
|
|
ae4735df04 | ||
|
|
6041036787 | ||
|
|
d451265621 | ||
|
|
677f213337 | ||
|
|
8537702028 | ||
|
|
6d3d4d1ae6 | ||
|
|
1f42c188fa | ||
|
|
9346985718 | ||
|
|
4585afde50 | ||
|
|
bee6cb9ba6 | ||
|
|
581b627a3e | ||
|
|
4436001494 | ||
|
|
6116a19986 | ||
|
|
99fd4ea0e5 | ||
|
|
a613b842f2 | ||
|
|
6462c5c366 | ||
|
|
8c4a8cd2da | ||
|
|
7a0ea3ce96 | ||
|
|
f14fe9d3aa | ||
|
|
36add28269 | ||
|
|
87b4171dd4 | ||
|
|
951acf61b4 | ||
|
|
8674b54753 | ||
|
|
b7e5bf0468 | ||
|
|
0f12c127b6 | ||
|
|
50c51dc993 | ||
|
|
65bf03a613 | ||
|
|
0bb8421f98 | ||
|
|
108e603e63 | ||
|
|
1868ed842e | ||
|
|
6c505a6170 | ||
|
|
72d508b0bf | ||
|
|
d6f2faf170 | ||
|
|
92cbff7db9 | ||
|
|
4bb2d50921 | ||
|
|
c3d8bc4fd0 | ||
|
|
37ae6cbdbb | ||
|
|
b953daa3c2 | ||
|
|
463910cd54 | ||
|
|
95bfdf907f | ||
|
|
85550aeaf6 | ||
|
|
5b20926f2c | ||
|
|
c915aceb85 | ||
|
|
36d56b867c | ||
|
|
e1cec84075 | ||
|
|
ba3676d73f | ||
|
|
80f50b298f | ||
|
|
9120504249 | ||
|
|
55c7ca9c10 | ||
|
|
704ea89d72 | ||
|
|
8eecd0aa7d | ||
|
|
c53f99d01c | ||
|
|
438a1265f2 | ||
|
|
86766223cb | ||
|
|
1fa94de1d9 | ||
|
|
56d1cf19ef | ||
|
|
701c096ed4 | ||
|
|
aab3e1c601 | ||
|
|
8d040a4926 | ||
|
|
4453cbb143 | ||
|
|
0c173f8110 | ||
|
|
a14b39eb4c | ||
|
|
c9cb51f8c4 | ||
|
|
dbe6c6105c | ||
|
|
04231eecfe | ||
|
|
a55a4c93a5 | ||
|
|
dcd4f0f6a5 | ||
|
|
792ab02195 | ||
|
|
7a87310403 | ||
|
|
7e070528a1 | ||
|
|
4f3af1395f | ||
|
|
1fc4f3d70b | ||
|
|
12ee3dae5e | ||
|
|
cf28bc26f0 | ||
|
|
bd41796231 | ||
|
|
f21f039b3a | ||
|
|
7263f4120c | ||
|
|
22e0e8da66 | ||
|
|
7173bf0803 | ||
|
|
7246cdf853 | ||
|
|
c60b296bc9 | ||
|
|
a8a86533ad | ||
|
|
d1c5847a58 | ||
|
|
68e0d70fcb | ||
|
|
74b28f7ead | ||
|
|
acda805c3c | ||
|
|
a37fbbbd51 | ||
|
|
2cdb6036ea | ||
|
|
77afdc0208 | ||
|
|
7e0e68f66f | ||
|
|
bbec6fcd5f | ||
|
|
631fe6c9c9 | ||
|
|
a86755ad98 | ||
|
|
42d2b00007 | ||
|
|
ad10cad0b0 | ||
|
|
71d3589ebc | ||
|
|
84ed1827be | ||
|
|
ce29a6923e | ||
|
|
d96d194b2b | ||
|
|
5cb3bccf45 | ||
|
|
e6639323b7 | ||
|
|
f94e0eaf32 | ||
|
|
37bcb1284b | ||
|
|
295bd2e1ab | ||
|
|
45b4a8d8bf | ||
|
|
cdb60423fe | ||
|
|
50f913843b | ||
|
|
581d6f6657 | ||
|
|
e03f65332a | ||
|
|
3e9abec817 | ||
|
|
0d8f84ba23 | ||
|
|
c646419336 | ||
|
|
622a4eb44b | ||
|
|
d4fbc73b41 | ||
|
|
391f469a99 | ||
|
|
a0ca55d7f6 | ||
|
|
a4bbe27771 | ||
|
|
a5e2d1eb45 | ||
|
|
7a89d03339 | ||
|
|
ae638fd0a1 | ||
|
|
26a59b373a | ||
|
|
479c0b7d95 | ||
|
|
52a0bb6e0e | ||
|
|
f2f333c807 | ||
|
|
3f2f2a33d3 | ||
|
|
ba9272822b | ||
|
|
9575044262 | ||
|
|
7306e81a30 | ||
|
|
19f9132109 | ||
|
|
f340ba50da | ||
|
|
6e90c7ed7b | ||
|
|
0a81bc7c6b | ||
|
|
f5dd6b90fc | ||
|
|
e1a9438595 | ||
|
|
97a72380e6 | ||
|
|
a6a3a4e240 | ||
|
|
b6b1e6ecdc | ||
|
|
85cf21a32c | ||
|
|
918ed4a23e | ||
|
|
84d6106a30 | ||
|
|
6761cae9c1 | ||
|
|
e330ccbe94 | ||
|
|
da7059e978 | ||
|
|
893345dc33 | ||
|
|
9fcc6fe68a | ||
|
|
0c02f17d67 | ||
|
|
11c8805f4c | ||
|
|
cf065fa706 | ||
|
|
3c94c9d308 | ||
|
|
831bea725f | ||
|
|
b748283484 | ||
|
|
28af7e1722 | ||
|
|
1673da5a4b | ||
|
|
c97c0e822d | ||
|
|
ce24ac70d9 | ||
|
|
9ab4739710 | ||
|
|
685084e711 | ||
|
|
dd049ac297 | ||
|
|
516f7464b7 | ||
|
|
46be37e034 | ||
|
|
693f0aa774 | ||
|
|
646693ca3e | ||
|
|
22534986d3 | ||
|
|
18b183585a | ||
|
|
5862ba627e | ||
|
|
c38f4ab400 | ||
|
|
f5c9fcf029 | ||
|
|
9e206d2215 | ||
|
|
b1b2451fa6 | ||
|
|
91f2f84c10 | ||
|
|
16ba74c98e | ||
|
|
0cc3b81580 | ||
|
|
c769900332 | ||
|
|
a84e6ab385 | ||
|
|
af163c27e0 | ||
|
|
016452ec89 | ||
|
|
b584779a13 | ||
|
|
01d97ed770 | ||
|
|
607ef27fe1 | ||
|
|
448a9cfaef | ||
|
|
88fb6069fc | ||
|
|
cd5fd2cab4 | ||
|
|
a21fcf7e77 | ||
|
|
627a8dbff5 | ||
|
|
dd1207f11e | ||
|
|
49aec452ca | ||
|
|
e033f71ece | ||
|
|
62b097f3d5 | ||
|
|
3098c1983f | ||
|
|
37626680f9 | ||
|
|
d99fe607da | ||
|
|
c80f22cdd3 | ||
|
|
0b6402ca8a | ||
|
|
26a7633337 | ||
|
|
3ee7614441 | ||
|
|
718ae6ac83 | ||
|
|
e0686eada2 | ||
|
|
9f1fd42889 | ||
|
|
a088a34c89 | ||
|
|
14cdc10ee3 | ||
|
|
8667643e7c | ||
|
|
e6d123a17d | ||
|
|
ae28b714b3 | ||
|
|
33cd1642f8 | ||
|
|
63ec69f9f2 | ||
|
|
20ea9a00ed | ||
|
|
779222b66d | ||
|
|
afb2b9fe29 | ||
|
|
20052e1922 | ||
|
|
e03f3f40da | ||
|
|
00f6656d7d | ||
|
|
dd2c1a48b5 | ||
|
|
a37588a8f7 | ||
|
|
fc99805a85 | ||
|
|
d73b1732d3 | ||
|
|
043fb289bf | ||
|
|
a0332f27be | ||
|
|
99285763d3 | ||
|
|
26467d8f35 | ||
|
|
930ba5bb19 | ||
|
|
fb552c823a | ||
|
|
bfc0c4f3ef | ||
|
|
216cb27f03 | ||
|
|
21a5ded593 | ||
|
|
ff07987a02 | ||
|
|
bd6afdafb8 | ||
|
|
fd7c5ac867 | ||
|
|
87eb84b5fa | ||
|
|
784cb711d8 | ||
|
|
54a00a934b | ||
|
|
c638ac8457 | ||
|
|
b710a4cdc7 | ||
|
|
16c8c6b445 | ||
|
|
5cee35149f | ||
|
|
de201c7263 | ||
|
|
222a4f4828 | ||
|
|
7d6af47f60 | ||
|
|
1c05d58d1a | ||
|
|
8152b51353 | ||
|
|
d387eafff2 | ||
|
|
fe5605ea50 | ||
|
|
7f97decb8a | ||
|
|
cfd28dd1ff | ||
|
|
2c43eab432 | ||
|
|
fda597ddae | ||
|
|
7502c0f2fb | ||
|
|
eaeeda6911 | ||
|
|
8850c1a62b | ||
|
|
0205ec4ccb | ||
|
|
2600bf7be5 | ||
|
|
012ff40f0f | ||
|
|
0df9e39931 |
216 changed files with 33738 additions and 31003 deletions
3
.gitignore
vendored
3
.gitignore
vendored
|
|
@ -20,6 +20,9 @@
|
|||
# pycharm project specific settings files
|
||||
.idea
|
||||
|
||||
# vscode project specific settings file
|
||||
.vscode
|
||||
|
||||
cleanup.sh
|
||||
FanFictionDownLoader.zip
|
||||
*.epub
|
||||
|
|
|
|||
|
|
@ -52,9 +52,9 @@ Test versions are available at:
|
|||
|
||||
- The [test plugin] is posted at MobileRead.
|
||||
- The test version of CLI for pip install is uploaded to the testpypi repository and can be installed with:
|
||||
|
||||
> `pip install --extra-index-url https://testpypi.python.org/pypi --upgrade FanFicFare`
|
||||
|
||||
```
|
||||
pip install --extra-index-url https://test.pypi.org/simple/ --upgrade FanFicFare
|
||||
```
|
||||
|
||||
### Other Releases
|
||||
|
||||
|
|
|
|||
|
|
@ -33,7 +33,7 @@ except NameError:
|
|||
from calibre.customize import InterfaceActionBase
|
||||
|
||||
# pulled out from FanFicFareBase for saving in prefs.py
|
||||
__version__ = (4, 25, 0)
|
||||
__version__ = (4, 57, 7)
|
||||
|
||||
## Apparently the name for this class doesn't matter--it was still
|
||||
## 'demo' for the first few versions.
|
||||
|
|
|
|||
20
calibre-plugin/action_chains.py
Normal file
20
calibre-plugin/action_chains.py
Normal file
|
|
@ -0,0 +1,20 @@
|
|||
from __future__ import (unicode_literals, division, absolute_import,
|
||||
print_function)
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2024, Jim Miller'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
## References:
|
||||
## https://www.mobileread.com/forums/showthread.php?p=4435205&postcount=65
|
||||
## https://www.mobileread.com/forums/showthread.php?p=4102834&postcount=389
|
||||
|
||||
from calibre_plugins.action_chains.events import ChainEvent
|
||||
|
||||
class FanFicFareDownloadFinished(ChainEvent):
|
||||
|
||||
# replace with the name of your event
|
||||
name = 'FanFicFare Download Finished'
|
||||
|
||||
def get_event_signal(self):
|
||||
return self.gui.iactions['FanFicFare'].download_finished_signal
|
||||
|
|
@ -2,7 +2,6 @@
|
|||
|
||||
from __future__ import (unicode_literals, division, absolute_import,
|
||||
print_function)
|
||||
import six
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2011, Grant Drake <grant.drake@gmail.com>, 2018, Jim Miller'
|
||||
|
|
@ -22,7 +21,9 @@ from calibre.gui2.actions import menu_action_unique_name
|
|||
from calibre.gui2.keyboard import ShortcutConfig
|
||||
from calibre.utils.config import config_dir
|
||||
from calibre.utils.date import now, format_date, qt_to_dt, UNDEFINED_DATE
|
||||
from fanficfare.six import text_type as unicode
|
||||
|
||||
import fanficfare.six as six
|
||||
from six import text_type as unicode
|
||||
|
||||
# Global definition of our plugin name. Used for common functions that require this.
|
||||
plugin_name = None
|
||||
|
|
|
|||
|
|
@ -2,7 +2,6 @@
|
|||
|
||||
from __future__ import (unicode_literals, division, absolute_import,
|
||||
print_function)
|
||||
import six
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2021, Jim Miller'
|
||||
|
|
@ -24,7 +23,8 @@ from PyQt5.Qt import (QWidget, QVBoxLayout, QHBoxLayout, QGridLayout, QLabel,
|
|||
from calibre.gui2 import dynamic, info_dialog
|
||||
from calibre.gui2.complete2 import EditWithComplete
|
||||
from calibre.gui2.dialogs.confirm_delete import confirm
|
||||
from fanficfare.six import text_type as unicode
|
||||
import fanficfare.six as six
|
||||
from six import text_type as unicode
|
||||
|
||||
try:
|
||||
from calibre.ebooks.covers import generate_cover as cal_generate_cover
|
||||
|
|
@ -371,6 +371,7 @@ class ConfigWidget(QWidget):
|
|||
prefs['suppresstitlesort'] = self.std_columns_tab.suppresstitlesort.isChecked()
|
||||
prefs['authorcase'] = self.std_columns_tab.authorcase.isChecked()
|
||||
prefs['titlecase'] = self.std_columns_tab.titlecase.isChecked()
|
||||
prefs['seriescase'] = self.std_columns_tab.seriescase.isChecked()
|
||||
prefs['setanthologyseries'] = self.std_columns_tab.setanthologyseries.isChecked()
|
||||
|
||||
prefs['set_author_url'] =self.std_columns_tab.set_author_url.isChecked()
|
||||
|
|
@ -416,6 +417,10 @@ class ConfigWidget(QWidget):
|
|||
prefs['auto_reject_from_email'] = self.imap_tab.auto_reject_from_email.isChecked()
|
||||
prefs['update_existing_only_from_email'] = self.imap_tab.update_existing_only_from_email.isChecked()
|
||||
prefs['download_from_email_immediately'] = self.imap_tab.download_from_email_immediately.isChecked()
|
||||
|
||||
prefs['site_split_jobs'] = self.other_tab.site_split_jobs.isChecked()
|
||||
prefs['reconsolidate_jobs'] = self.other_tab.reconsolidate_jobs.isChecked()
|
||||
|
||||
prefs.save_to_db()
|
||||
self.plugin_action.set_popup_mode()
|
||||
|
||||
|
|
@ -756,6 +761,7 @@ class BasicTab(QWidget):
|
|||
tooltip=_("One URL per line:\n<b>http://...,note</b>\n<b>http://...,title by author - note</b>"),
|
||||
rejectreasons=rejecturllist.get_reject_reasons(),
|
||||
reasonslabel=_('Add this reason to all URLs added:'),
|
||||
accept_storyurls=True,
|
||||
save_size_name='fff:Add Reject List')
|
||||
d.exec_()
|
||||
if d.result() == d.Accepted:
|
||||
|
|
@ -1094,7 +1100,7 @@ class CalibreCoverTab(QWidget):
|
|||
self.plugin_gen_cover = QRadioButton(_('Plugin %(gc)s')%no_trans,self)
|
||||
self.plugin_gen_cover.setToolTip(_("Use the %(gc)s plugin to create covers.<br>"
|
||||
"Requires that you have the the %(gc)s plugin installed.<br>"
|
||||
"Additional settings are below."%no_trans))
|
||||
"Additional settings are below.")%no_trans)
|
||||
self.gencov_rdgrp.addButton(self.plugin_gen_cover)
|
||||
# always, new only, when no cover from site, inject yes/no...
|
||||
self.plugin_gen_cover.setChecked(prefs['plugin_gen_cover'])
|
||||
|
|
@ -1274,6 +1280,31 @@ class OtherTab(QWidget):
|
|||
self.l = QVBoxLayout()
|
||||
self.setLayout(self.l)
|
||||
|
||||
groupbox = QGroupBox()
|
||||
self.l.addWidget(groupbox)
|
||||
|
||||
groupl = QVBoxLayout()
|
||||
groupbox.setLayout(groupl)
|
||||
|
||||
label = QLabel("<h3>"+
|
||||
_("Background Job Settings")+
|
||||
"</h3>"
|
||||
)
|
||||
label.setWordWrap(True)
|
||||
groupl.addWidget(label)
|
||||
|
||||
self.site_split_jobs = QCheckBox(_('Split downloads into separate background jobs by site'),self)
|
||||
self.site_split_jobs.setToolTip(_("Launches a separate background Job for each site in the list of stories to download/update. Otherwise, there will be only one background job."))
|
||||
self.site_split_jobs.setChecked(prefs['site_split_jobs'])
|
||||
groupl.addWidget(self.site_split_jobs)
|
||||
|
||||
self.reconsolidate_jobs = QCheckBox(_('Reconsolidate split downloads before updating library'),self)
|
||||
self.reconsolidate_jobs.setToolTip(_("Hold all downloads/updates launched together until they all finish. Otherwise, there will be a 'Proceed to update' dialog for each site."))
|
||||
self.reconsolidate_jobs.setChecked(prefs['reconsolidate_jobs'])
|
||||
groupl.addWidget(self.reconsolidate_jobs)
|
||||
|
||||
self.l.addSpacing(5)
|
||||
|
||||
label = QLabel(_("These controls aren't plugin settings as such, but convenience buttons for setting Keyboard shortcuts and getting all the FanFicFare confirmation dialogs back again."))
|
||||
label.setWordWrap(True)
|
||||
self.l.addWidget(label)
|
||||
|
|
@ -1607,6 +1638,11 @@ class StandardColumnsTab(QWidget):
|
|||
self.setanthologyseries.setChecked(prefs['setanthologyseries'])
|
||||
row.append(self.setanthologyseries)
|
||||
|
||||
self.seriescase = QCheckBox(_('Fix Series Case?'),self)
|
||||
self.seriescase.setToolTip(_("If checked, Calibre's routine for correcting the capitalization of title will be applied.")
|
||||
+"\n"+_("This effects Calibre metadata only, not FanFicFare metadata in title page."))
|
||||
self.seriescase.setChecked(prefs['seriescase'])
|
||||
row.append(self.seriescase)
|
||||
grid = QGridLayout()
|
||||
for rownum, row in enumerate(rows):
|
||||
for colnum, col in enumerate(row):
|
||||
|
|
|
|||
|
|
@ -38,6 +38,7 @@ from calibre.gui2 import gprefs
|
|||
show_download_options = 'fff:add new/update dialogs:show_download_options'
|
||||
from calibre.gui2.dialogs.confirm_delete import confirm
|
||||
from calibre.gui2.complete2 import EditWithComplete
|
||||
from fanficfare.exceptions import NotGoingToDownload
|
||||
from fanficfare.six import text_type as unicode, ensure_text
|
||||
|
||||
# pulls in translation files for _() strings
|
||||
|
|
@ -155,15 +156,6 @@ class RejectUrlEntry:
|
|||
|
||||
return retval
|
||||
|
||||
class NotGoingToDownload(Exception):
|
||||
def __init__(self,error,icon='dialog_error.png',showerror=True):
|
||||
self.error=error
|
||||
self.icon=icon
|
||||
self.showerror=showerror
|
||||
|
||||
def __str__(self):
|
||||
return self.error
|
||||
|
||||
class DroppableQTextEdit(QTextEdit):
|
||||
def __init__(self,parent):
|
||||
QTextEdit.__init__(self,parent)
|
||||
|
|
@ -189,12 +181,32 @@ class DroppableQTextEdit(QTextEdit):
|
|||
else:
|
||||
return QTextEdit.insertFromMimeData(self, mime_data)
|
||||
|
||||
class AddNewDialog(SizePersistedDialog):
|
||||
class HotKeyedSizePersistedDialog(SizePersistedDialog):
|
||||
|
||||
def __init__(self, gui, save_size_name):
|
||||
super(HotKeyedSizePersistedDialog,self).__init__(gui, save_size_name)
|
||||
self.keys=dict()
|
||||
|
||||
def addCtrlKeyPress(self,key,func):
|
||||
# print("addKeyPress: key(0x%x)"%key)
|
||||
# print("control: 0x%x"%QtCore.Qt.ControlModifier)
|
||||
self.keys[key]=func
|
||||
|
||||
def keyPressEvent(self, event):
|
||||
# print("event: key(0x%x) modifiers(0x%x)"%(event.key(),event.modifiers()))
|
||||
if (event.modifiers() & QtCore.Qt.ControlModifier) and event.key() in self.keys:
|
||||
func = self.keys[event.key()]
|
||||
return func()
|
||||
else:
|
||||
return super(HotKeyedSizePersistedDialog,self).keyPressEvent(event)
|
||||
|
||||
class AddNewDialog(HotKeyedSizePersistedDialog):
|
||||
|
||||
go_signal = pyqtSignal(object, object, object, object)
|
||||
|
||||
def __init__(self, gui, prefs, icon):
|
||||
SizePersistedDialog.__init__(self, gui, 'fff:add new dialog')
|
||||
super(AddNewDialog,self).__init__(gui, 'fff:add new dialog')
|
||||
|
||||
self.prefs = prefs
|
||||
|
||||
self.setMinimumWidth(300)
|
||||
|
|
@ -333,6 +345,9 @@ class AddNewDialog(SizePersistedDialog):
|
|||
self.button_box.rejected.connect(self.reject)
|
||||
self.l.addWidget(self.button_box)
|
||||
|
||||
self.addCtrlKeyPress(QtCore.Qt.Key_Return,self.ok_clicked)
|
||||
self.addCtrlKeyPress(QtCore.Qt.Key_Enter,self.ok_clicked) # num pad
|
||||
|
||||
def click_show_download_options(self,x):
|
||||
self.gbf.setVisible(x)
|
||||
gprefs[show_download_options] = x
|
||||
|
|
@ -475,14 +490,15 @@ class AddNewDialog(SizePersistedDialog):
|
|||
self.collision.setCurrentIndex(i)
|
||||
|
||||
def get_fff_options(self):
|
||||
retval = {
|
||||
'fileform': unicode(self.fileform.currentText()),
|
||||
'collision': unicode(self.collision.currentText()),
|
||||
'updatemeta': self.updatemeta.isChecked(),
|
||||
'bgmeta': False, # self.bgmeta.isChecked(),
|
||||
'smarten_punctuation':self.prefs['smarten_punctuation'],
|
||||
'do_wordcount':self.prefs['do_wordcount'],
|
||||
}
|
||||
retval = dict(self.extraoptions)
|
||||
retval.update( {
|
||||
'fileform': unicode(self.fileform.currentText()),
|
||||
'collision': unicode(self.collision.currentText()),
|
||||
'updatemeta': self.updatemeta.isChecked(),
|
||||
'bgmeta': False, # self.bgmeta.isChecked(),
|
||||
'smarten_punctuation':self.prefs['smarten_punctuation'],
|
||||
'do_wordcount':self.prefs['do_wordcount'],
|
||||
} )
|
||||
|
||||
if self.merge:
|
||||
retval['fileform']=='epub'
|
||||
|
|
@ -497,7 +513,6 @@ class AddNewDialog(SizePersistedDialog):
|
|||
def get_urlstext(self):
|
||||
return unicode(self.url.toPlainText())
|
||||
|
||||
|
||||
class FakeLineEdit():
|
||||
def __init__(self):
|
||||
pass
|
||||
|
|
@ -619,6 +634,48 @@ class UserPassDialog(QDialog):
|
|||
self.status=False
|
||||
self.hide()
|
||||
|
||||
class TOTPDialog(QDialog):
|
||||
'''
|
||||
Need to collect Timebased One Time Password(TOTP) for some sites.
|
||||
'''
|
||||
def __init__(self, gui, site, exception=None):
|
||||
QDialog.__init__(self, gui)
|
||||
self.status=False
|
||||
|
||||
self.l = QVBoxLayout()
|
||||
self.setLayout(self.l)
|
||||
|
||||
grid = QGridLayout()
|
||||
self.l.addLayout(grid)
|
||||
|
||||
self.setWindowTitle(_('Time-based One Time Password(TOTP)'))
|
||||
grid.addWidget(QLabel(_("Site requires a Time-based One Time Password(TOTP) for this url:\n%s")%exception.url),0,0,1,2)
|
||||
|
||||
grid.addWidget(QLabel(_("TOTP:")),2,0)
|
||||
self.totp = QLineEdit(self)
|
||||
grid.addWidget(self.totp,2,1)
|
||||
|
||||
horz = QHBoxLayout()
|
||||
self.l.addLayout(horz)
|
||||
|
||||
self.ok_button = QPushButton(_('OK'), self)
|
||||
self.ok_button.clicked.connect(self.ok)
|
||||
horz.addWidget(self.ok_button)
|
||||
|
||||
self.cancel_button = QPushButton(_('Cancel'), self)
|
||||
self.cancel_button.clicked.connect(self.cancel)
|
||||
horz.addWidget(self.cancel_button)
|
||||
|
||||
self.resize(self.sizeHint())
|
||||
|
||||
def ok(self):
|
||||
self.status=True
|
||||
self.hide()
|
||||
|
||||
def cancel(self):
|
||||
self.status=False
|
||||
self.hide()
|
||||
|
||||
def LoopProgressDialog(gui,
|
||||
book_list,
|
||||
foreach_function,
|
||||
|
|
@ -656,6 +713,7 @@ class _LoopProgressDialog(QProgressDialog):
|
|||
QProgressDialog.__init__(self,
|
||||
init_label,
|
||||
_('Cancel'), 0, len(book_list), gui)
|
||||
self.gui = gui
|
||||
self.setWindowTitle(win_title)
|
||||
self.setMinimumWidth(500)
|
||||
self.book_list = book_list
|
||||
|
|
@ -1262,6 +1320,7 @@ class EditTextDialog(SizePersistedDialog):
|
|||
icon=None, title=None, label=None, tooltip=None,
|
||||
read_only=False,
|
||||
rejectreasons=[],reasonslabel=None,
|
||||
accept_storyurls=False,
|
||||
save_size_name='fff:edit text dialog',
|
||||
):
|
||||
SizePersistedDialog.__init__(self, parent, save_size_name)
|
||||
|
|
@ -1275,7 +1334,10 @@ class EditTextDialog(SizePersistedDialog):
|
|||
self.setWindowIcon(icon)
|
||||
self.l.addWidget(self.label)
|
||||
|
||||
self.textedit = QTextEdit(self)
|
||||
if accept_storyurls:
|
||||
self.textedit = DroppableQTextEdit(self)
|
||||
else:
|
||||
self.textedit = QTextEdit(self)
|
||||
self.textedit.setLineWrapMode(QTextEditNoWrap)
|
||||
self.textedit.setReadOnly(read_only)
|
||||
self.textedit.setText(text)
|
||||
|
|
@ -1319,7 +1381,18 @@ class EditTextDialog(SizePersistedDialog):
|
|||
def get_reason_text(self):
|
||||
return unicode(self.reason_edit.currentText()).strip()
|
||||
|
||||
class IniTextDialog(SizePersistedDialog):
|
||||
class QTextEditPlainPaste(QTextEdit):
|
||||
def insertFromMimeData(self, mimeData):
|
||||
# logger.debug("insertFromMimeData called")
|
||||
#Ensure it is text.
|
||||
if (mimeData.hasText()):
|
||||
text = mimeData.text()
|
||||
self.insertPlainText(text)
|
||||
#In case not text.
|
||||
else:
|
||||
QTextEdit.insertFromMimeData(self, mimeData)
|
||||
|
||||
class IniTextDialog(HotKeyedSizePersistedDialog):
|
||||
|
||||
def __init__(self, parent, text,
|
||||
icon=None, title=None, label=None,
|
||||
|
|
@ -1327,9 +1400,7 @@ class IniTextDialog(SizePersistedDialog):
|
|||
read_only=False,
|
||||
save_size_name='fff:ini text dialog',
|
||||
):
|
||||
SizePersistedDialog.__init__(self, parent, save_size_name)
|
||||
|
||||
self.keys=dict()
|
||||
super(IniTextDialog,self).__init__(parent, save_size_name)
|
||||
|
||||
self.l = QVBoxLayout()
|
||||
self.setLayout(self.l)
|
||||
|
|
@ -1340,7 +1411,7 @@ class IniTextDialog(SizePersistedDialog):
|
|||
self.setWindowIcon(icon)
|
||||
self.l.addWidget(self.label)
|
||||
|
||||
self.textedit = QTextEdit(self)
|
||||
self.textedit = QTextEditPlainPaste(self)
|
||||
|
||||
highlighter = IniHighlighter(self.textedit,
|
||||
sections=get_valid_sections(),
|
||||
|
|
@ -1430,19 +1501,6 @@ class IniTextDialog(SizePersistedDialog):
|
|||
# print("call parent accept")
|
||||
return SizePersistedDialog.accept(self)
|
||||
|
||||
def addCtrlKeyPress(self,key,func):
|
||||
# print("addKeyPress: key(0x%x)"%key)
|
||||
# print("control: 0x%x"%QtCore.Qt.ControlModifier)
|
||||
self.keys[key]=func
|
||||
|
||||
def keyPressEvent(self, event):
|
||||
# print("event: key(0x%x) modifiers(0x%x)"%(event.key(),event.modifiers()))
|
||||
if (event.modifiers() & QtCore.Qt.ControlModifier) and event.key() in self.keys:
|
||||
func = self.keys[event.key()]
|
||||
return func()
|
||||
else:
|
||||
return SizePersistedDialog.keyPressEvent(self, event)
|
||||
|
||||
def get_plain_text(self):
|
||||
return unicode(self.textedit.toPlainText())
|
||||
|
||||
|
|
@ -1511,7 +1569,6 @@ class IniTextDialog(SizePersistedDialog):
|
|||
# And finally we set this new cursor as the parent's
|
||||
self.textedit.setTextCursor(cursor)
|
||||
|
||||
|
||||
class ViewLog(SizePersistedDialog):
|
||||
|
||||
def label_clicked(self, event, lineno=None):
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
|
|
@ -33,8 +33,8 @@ def get_fff_config(url,fileform="epub",personalini=None):
|
|||
except Exception as e:
|
||||
logger.debug("Failed trying to get ini config for url(%s): %s, using section %s instead"%(url,e,sections))
|
||||
configuration = Configuration(sections,fileform)
|
||||
configuration.readfp(StringIO(ensure_text(get_resources("plugin-defaults.ini"))))
|
||||
configuration.readfp(StringIO(ensure_text(personalini)))
|
||||
configuration.read_file(StringIO(ensure_text(get_resources("plugin-defaults.ini"))))
|
||||
configuration.read_file(StringIO(ensure_text(personalini)))
|
||||
|
||||
return configuration
|
||||
|
||||
|
|
|
|||
|
|
@ -95,7 +95,7 @@ class IniHighlighter(QSyntaxHighlighter):
|
|||
if sections:
|
||||
# *known* sections
|
||||
resections = r'('+(r'|'.join(sections))+r')'
|
||||
resections = resections.replace('.','\.') #escape dots.
|
||||
resections = resections.replace('.',r'\.') #escape dots.
|
||||
self.highlightingRules.append( HighlightingRule( r"^\["+resections+r"\]\s*$", colors['knownsections'], QFontBold, blocknum=2 ) )
|
||||
|
||||
# test story sections
|
||||
|
|
|
|||
|
|
@ -2,7 +2,6 @@
|
|||
|
||||
from __future__ import (unicode_literals, division, absolute_import,
|
||||
print_function)
|
||||
import six
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2020, Jim Miller, 2011, Grant Drake <grant.drake@gmail.com>'
|
||||
|
|
@ -15,10 +14,8 @@ from time import sleep
|
|||
from datetime import time
|
||||
from io import StringIO
|
||||
from collections import defaultdict
|
||||
import sys
|
||||
|
||||
from calibre.utils.ipc.server import Empty, Server
|
||||
from calibre.utils.ipc.job import ParallelJob
|
||||
from calibre.constants import numeric_version as calibre_version
|
||||
from calibre.utils.date import local_tz
|
||||
|
||||
# pulls in translation files for _() strings
|
||||
|
|
@ -33,160 +30,100 @@ except NameError:
|
|||
#
|
||||
# ------------------------------------------------------------------------------
|
||||
|
||||
def do_download_worker(book_list,
|
||||
options,
|
||||
cpus,
|
||||
merge=False,
|
||||
notification=lambda x,y:x):
|
||||
'''
|
||||
Coordinator job, to launch child jobs to do downloads.
|
||||
This is run as a worker job in the background to keep the UI more
|
||||
responsive and get around any memory leak issues as it will launch
|
||||
a child job for each book as a worker process
|
||||
'''
|
||||
## Now running one BG proc per site, which downloads for the same
|
||||
## site in serial.
|
||||
logger.info("CPUs:%s"%cpus)
|
||||
server = Server(pool_size=cpus)
|
||||
def do_download_worker_single(site,
|
||||
book_list,
|
||||
options,
|
||||
merge,
|
||||
notification=lambda x,y:x):
|
||||
|
||||
logger.info(options['version'])
|
||||
|
||||
sites_lists = defaultdict(list)
|
||||
[ sites_lists[x['site']].append(x) for x in book_list if x['good'] ]
|
||||
## same info debug calibre prints out at startup. For when users
|
||||
## give me job output instead of debug log.
|
||||
from calibre.debug import print_basic_debug_info
|
||||
print_basic_debug_info(sys.stderr)
|
||||
|
||||
totals = {}
|
||||
# can't do direct assignment in list comprehension? I'm sure it
|
||||
# makes sense to some pythonista.
|
||||
# [ totals[x['url']]=0.0 for x in book_list if x['good'] ]
|
||||
[ totals.update({x['url']:0.0}) for x in book_list if x['good'] ]
|
||||
# logger.debug(sites_lists.keys())
|
||||
|
||||
# Queue all the jobs
|
||||
jobs_running = 0
|
||||
for site in sites_lists.keys():
|
||||
site_list = sites_lists[site]
|
||||
logger.info(_("Launch background process for site %s:")%site + "\n" +
|
||||
"\n".join([ x['url'] for x in site_list ]))
|
||||
# logger.debug([ x['url'] for x in site_list])
|
||||
args = ['calibre_plugins.fanficfare_plugin.jobs',
|
||||
'do_download_site',
|
||||
(site,site_list,options,merge)]
|
||||
job = ParallelJob('arbitrary_n',
|
||||
"site:(%s)"%site,
|
||||
done=None,
|
||||
args=args)
|
||||
job._site_list = site_list
|
||||
job._processed = False
|
||||
server.add_job(job)
|
||||
jobs_running += 1
|
||||
|
||||
# This server is an arbitrary_n job, so there is a notifier available.
|
||||
# Set the % complete to a small number to avoid the 'unavailable' indicator
|
||||
notification(0.01, _('Downloading FanFiction Stories'))
|
||||
from calibre_plugins.fanficfare_plugin import FanFicFareBase
|
||||
fffbase = FanFicFareBase(options['plugin_path'])
|
||||
with fffbase: # so the sys.path was modified while loading the
|
||||
# plug impl.
|
||||
from fanficfare.fff_profile import do_cprofile
|
||||
|
||||
# dequeue the job results as they arrive, saving the results
|
||||
count = 0
|
||||
while True:
|
||||
job = server.changed_jobs_queue.get()
|
||||
# logger.debug("job get job._processed:%s"%job._processed)
|
||||
# A job can 'change' when it is not finished, for example if it
|
||||
# produces a notification.
|
||||
msg = None
|
||||
try:
|
||||
## msg = book['url']
|
||||
(percent,msg) = job.notifications.get_nowait()
|
||||
# logger.debug("%s<-%s"%(percent,msg))
|
||||
if percent == 10.0: # Only when signaling d/l done.
|
||||
count += 1
|
||||
totals[msg] = 1.0/len(totals)
|
||||
# logger.info("Finished: %s"%msg)
|
||||
else:
|
||||
## extra function just so I can easily use the same
|
||||
## @do_cprofile decorator
|
||||
@do_cprofile
|
||||
def profiled_func():
|
||||
count = 0
|
||||
totals = {}
|
||||
# can't do direct assignment in list comprehension? I'm sure it
|
||||
# makes sense to some pythonista.
|
||||
# [ totals[x['url']]=0.0 for x in book_list if x['good'] ]
|
||||
[ totals.update({x['url']:0.0}) for x in book_list if x['good'] ]
|
||||
# logger.debug(sites_lists.keys())
|
||||
|
||||
def do_indiv_notif(percent,msg):
|
||||
totals[msg] = percent/len(totals)
|
||||
notification(max(0.01,sum(totals.values())), _('%(count)d of %(total)d stories finished downloading')%{'count':count,'total':len(totals)})
|
||||
except Empty:
|
||||
pass
|
||||
# without update, is_finished will never be set. however, we
|
||||
# do want to get all the notifications for status so we don't
|
||||
# miss the 'done' ones.
|
||||
job.update(consume_notifications=False)
|
||||
notification(max(0.01,sum(totals.values())), _('%(count)d of %(total)d stories finished downloading')%{'count':count,'total':len(totals)})
|
||||
|
||||
# if not job._processed:
|
||||
# sleep(0.5)
|
||||
## Can have a race condition where job.is_finished before
|
||||
## notifications for all downloads have been processed.
|
||||
## Or even after the job has been finished.
|
||||
# logger.debug("job.is_finished(%s) or job._processed(%s)"%(job.is_finished, job._processed))
|
||||
if not job.is_finished:
|
||||
continue
|
||||
|
||||
## only process each job once. We can get more than one loop
|
||||
## after job.is_finished.
|
||||
if not job._processed:
|
||||
# sleep(1)
|
||||
# A job really finished. Get the information.
|
||||
|
||||
## This is where bg proc details end up in GUI log.
|
||||
## job.details is the whole debug log for each proc.
|
||||
logger.info("\n\n" + ("="*80) + " " + job.details.replace('\r',''))
|
||||
# logger.debug("Finished background process for site %s:\n%s"%(job._site_list[0]['site'],"\n".join([ x['url'] for x in job._site_list ])))
|
||||
for b in job._site_list:
|
||||
book_list.remove(b)
|
||||
book_list.extend(job.result)
|
||||
job._processed = True
|
||||
jobs_running -= 1
|
||||
|
||||
## Can't use individual count--I've seen stories all reported
|
||||
## finished before results of all jobs processed.
|
||||
if jobs_running == 0:
|
||||
book_list = sorted(book_list,key=lambda x : x['listorder'])
|
||||
logger.info("\n"+_("Download Results:")+"\n%s\n"%("\n".join([ "%(status)s %(url)s %(comment)s" % book for book in book_list])))
|
||||
|
||||
good_lists = defaultdict(list)
|
||||
bad_lists = defaultdict(list)
|
||||
do_list = []
|
||||
done_list = []
|
||||
logger.info("\n\n"+_("Downloading FanFiction Stories")+"\n%s\n"%("\n".join([ "%(status)s %(url)s %(comment)s" % book for book in book_list])))
|
||||
## pass failures from metadata through bg job so all results are
|
||||
## together.
|
||||
for book in book_list:
|
||||
if book['good']:
|
||||
good_lists[book['status']].append(book)
|
||||
do_list.append(book)
|
||||
else:
|
||||
bad_lists[book['status']].append(book)
|
||||
done_list.append(book)
|
||||
for book in do_list:
|
||||
# logger.info("%s"%book['url'])
|
||||
done_list.append(do_download_for_worker(book,options,merge,do_indiv_notif))
|
||||
count += 1
|
||||
return finish_download(done_list)
|
||||
return profiled_func()
|
||||
|
||||
order = [_('Add'),
|
||||
_('Update'),
|
||||
_('Meta'),
|
||||
_('Different URL'),
|
||||
_('Rejected'),
|
||||
_('Skipped'),
|
||||
_('Bad'),
|
||||
_('Error'),
|
||||
]
|
||||
j = 0
|
||||
for d in [ good_lists, bad_lists ]:
|
||||
for status in order:
|
||||
if d[status]:
|
||||
l = d[status]
|
||||
logger.info("\n"+status+"\n%s\n"%("\n".join([book['url'] for book in l])))
|
||||
for book in l:
|
||||
book['reportorder'] = j
|
||||
j += 1
|
||||
del d[status]
|
||||
# just in case a status is added but doesn't appear in order.
|
||||
for status in d.keys():
|
||||
logger.info("\n"+status+"\n%s\n"%("\n".join([book['url'] for book in d[status]])))
|
||||
break
|
||||
def finish_download(donelist):
|
||||
book_list = sorted(donelist,key=lambda x : x['listorder'])
|
||||
logger.info("\n"+_("Download Results:")+"\n%s\n"%("\n".join([ "%(status)s %(url)s %(comment)s" % book for book in book_list])))
|
||||
|
||||
server.close()
|
||||
good_lists = defaultdict(list)
|
||||
bad_lists = defaultdict(list)
|
||||
for book in book_list:
|
||||
if book['good']:
|
||||
good_lists[book['status']].append(book)
|
||||
else:
|
||||
bad_lists[book['status']].append(book)
|
||||
|
||||
order = [_('Add'),
|
||||
_('Update'),
|
||||
_('Meta'),
|
||||
_('Different URL'),
|
||||
_('Rejected'),
|
||||
_('Skipped'),
|
||||
_('Bad'),
|
||||
_('Error'),
|
||||
]
|
||||
stnum = 0
|
||||
for d in [ good_lists, bad_lists ]:
|
||||
for status in order:
|
||||
stnum += 1
|
||||
if d[status]:
|
||||
l = d[status]
|
||||
logger.info("\n"+status+"\n%s\n"%("\n".join([book['url'] for book in l])))
|
||||
for book in l:
|
||||
# Add prior listorder to 10000 * status num for
|
||||
# ordering of accumulated results with multiple bg
|
||||
# jobs
|
||||
book['reportorder'] = stnum*10000 + book['listorder']
|
||||
del d[status]
|
||||
# just in case a status is added but doesn't appear in order.
|
||||
for status in d.keys():
|
||||
logger.info("\n"+status+"\n%s\n"%("\n".join([book['url'] for book in d[status]])))
|
||||
|
||||
# return the book list as the job result
|
||||
return book_list
|
||||
|
||||
def do_download_site(site,book_list,options,merge,notification=lambda x,y:x):
|
||||
# logger.info(_("Started job for %s")%site)
|
||||
retval = []
|
||||
for book in book_list:
|
||||
# logger.info("%s"%book['url'])
|
||||
retval.append(do_download_for_worker(book,options,merge,notification))
|
||||
notification(10.0,book['url'])
|
||||
return retval
|
||||
|
||||
def do_download_for_worker(book,options,merge,notification=lambda x,y:x):
|
||||
'''
|
||||
Child job, to download story when run as a worker job
|
||||
|
|
@ -196,13 +133,13 @@ def do_download_for_worker(book,options,merge,notification=lambda x,y:x):
|
|||
fffbase = FanFicFareBase(options['plugin_path'])
|
||||
with fffbase: # so the sys.path was modified while loading the
|
||||
# plug impl.
|
||||
from calibre_plugins.fanficfare_plugin.dialogs import NotGoingToDownload
|
||||
from calibre_plugins.fanficfare_plugin.prefs import (
|
||||
SAVE_YES, SAVE_YES_UNLESS_SITE, OVERWRITE, OVERWRITEALWAYS, UPDATE,
|
||||
UPDATEALWAYS, ADDNEW, SKIP, CALIBREONLY, CALIBREONLYSAVECOL)
|
||||
from calibre_plugins.fanficfare_plugin.wordcount import get_word_count
|
||||
from fanficfare import adapters, writers
|
||||
from fanficfare.epubutils import get_update_data
|
||||
from fanficfare.exceptions import NotGoingToDownload
|
||||
from fanficfare.six import text_type as unicode
|
||||
|
||||
from calibre_plugins.fanficfare_plugin.fff_util import get_fff_config
|
||||
|
|
@ -231,6 +168,7 @@ def do_download_for_worker(book,options,merge,notification=lambda x,y:x):
|
|||
adapter.is_adult = book['is_adult']
|
||||
adapter.username = book['username']
|
||||
adapter.password = book['password']
|
||||
adapter.totp = book['totp']
|
||||
adapter.setChaptersRange(book['begin'],book['end'])
|
||||
|
||||
## each site download job starts with a new copy of the
|
||||
|
|
@ -251,6 +189,17 @@ def do_download_for_worker(book,options,merge,notification=lambda x,y:x):
|
|||
if not story.getMetadata("series") and 'calibre_series' in book:
|
||||
adapter.setSeries(book['calibre_series'][0],book['calibre_series'][1])
|
||||
|
||||
# logger.debug(merge)
|
||||
# logger.debug(book.get('epub_for_update','(NONE)'))
|
||||
# logger.debug(options.get('mergebook','(NOMERGEBOOK)'))
|
||||
|
||||
# is a merge, is a pre-existing anthology, and is not a pre-existing book in anthology.
|
||||
if merge and 'mergebook' in options and 'epub_for_update' not in book:
|
||||
# internal for plugin anthologies to mark chapters
|
||||
# (new) in new stories
|
||||
story.setMetadata("newforanthology","true")
|
||||
logger.debug("metadata newforanthology:%s"%story.getMetadata("newforanthology"))
|
||||
|
||||
# set PI version instead of default.
|
||||
if 'version' in options:
|
||||
story.setMetadata('version',options['version'])
|
||||
|
|
@ -259,7 +208,6 @@ def do_download_for_worker(book,options,merge,notification=lambda x,y:x):
|
|||
book['author_sort'] = book['author'] = story.getList("author", removeallentities=True)
|
||||
book['publisher'] = story.getMetadata("publisher")
|
||||
book['url'] = story.getMetadata("storyUrl", removeallentities=True)
|
||||
book['tags'] = story.getSubjectTags(removeallentities=True)
|
||||
book['comments'] = story.get_sanitized_description()
|
||||
book['series'] = story.getMetadata("series", removeallentities=True)
|
||||
|
||||
|
|
@ -401,8 +349,7 @@ def do_download_for_worker(book,options,merge,notification=lambda x,y:x):
|
|||
except:
|
||||
logger.error("WordCount failed")
|
||||
|
||||
if options['smarten_punctuation'] and options['fileform'] == "epub" \
|
||||
and calibre_version >= (0, 9, 39):
|
||||
if options['smarten_punctuation'] and options['fileform'] == "epub":
|
||||
# for smarten punc
|
||||
from calibre.ebooks.oeb.polish.main import polish, ALL_OPTS
|
||||
from calibre.utils.logging import Log
|
||||
|
|
@ -412,12 +359,14 @@ def do_download_for_worker(book,options,merge,notification=lambda x,y:x):
|
|||
data = {'smarten_punctuation':True}
|
||||
opts = ALL_OPTS.copy()
|
||||
opts.update(data)
|
||||
O = namedtuple('Options', ' '.join(six.iterkeys(ALL_OPTS)))
|
||||
O = namedtuple('Options', ' '.join(ALL_OPTS.keys()))
|
||||
opts = O(**opts)
|
||||
|
||||
log = Log(level=Log.DEBUG)
|
||||
polish({outfile:outfile}, opts, log, logger.info)
|
||||
|
||||
## here to catch tags set in chapters in literotica for
|
||||
## both overwrites and updates.
|
||||
book['tags'] = story.getSubjectTags(removeallentities=True)
|
||||
except NotGoingToDownload as d:
|
||||
book['good']=False
|
||||
book['status']=_('Bad')
|
||||
|
|
@ -443,11 +392,12 @@ def inject_cal_cols(book,story,configuration):
|
|||
if 'calibre_columns' in book:
|
||||
injectini = ['[injected]']
|
||||
extra_valid = []
|
||||
for k, v in six.iteritems(book['calibre_columns']):
|
||||
for k in book['calibre_columns'].keys():
|
||||
v = book['calibre_columns'][k]
|
||||
story.setMetadata(k,v['val'])
|
||||
injectini.append('%s_label:%s'%(k,v['label']))
|
||||
extra_valid.append(k)
|
||||
if extra_valid: # if empty, there's nothing to add.
|
||||
injectini.append("add_to_extra_valid_entries:,"+','.join(extra_valid))
|
||||
configuration.readfp(StringIO('\n'.join(injectini)))
|
||||
configuration.read_file(StringIO('\n'.join(injectini)))
|
||||
#print("added:\n%s\n"%('\n'.join(injectini)))
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
|
|
@ -126,6 +126,7 @@ default_prefs['suppressauthorsort'] = False
|
|||
default_prefs['suppresstitlesort'] = False
|
||||
default_prefs['authorcase'] = False
|
||||
default_prefs['titlecase'] = False
|
||||
default_prefs['seriescase'] = False
|
||||
default_prefs['setanthologyseries'] = False
|
||||
default_prefs['mark'] = False
|
||||
default_prefs['mark_success'] = True
|
||||
|
|
@ -197,6 +198,11 @@ default_prefs['auto_reject_from_email'] = False
|
|||
default_prefs['update_existing_only_from_email'] = False
|
||||
default_prefs['download_from_email_immediately'] = False
|
||||
|
||||
|
||||
#default_prefs['single_proc_jobs'] = True # setting and code removed
|
||||
default_prefs['site_split_jobs'] = True
|
||||
default_prefs['reconsolidate_jobs'] = True
|
||||
|
||||
def set_library_config(library_config,db,setting=PREFS_KEY_SETTINGS):
|
||||
db.prefs.set_namespaced(PREFS_NAMESPACE,
|
||||
setting,
|
||||
|
|
|
|||
2613
calibre-plugin/translations/ar.po
Normal file
2613
calibre-plugin/translations/ar.po
Normal file
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
2612
calibre-plugin/translations/mr.po
Normal file
2612
calibre-plugin/translations/mr.po
Normal file
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
2612
calibre-plugin/translations/ta.po
Normal file
2612
calibre-plugin/translations/ta.po
Normal file
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
|
|
@ -30,8 +30,12 @@ from .. import configurable as configurable
|
|||
|
||||
## must import each adapter here.
|
||||
|
||||
from . import base_adapter
|
||||
from . import base_efiction_adapter
|
||||
from . import adapter_test1
|
||||
from . import adapter_test2
|
||||
from . import adapter_test3
|
||||
from . import adapter_test4
|
||||
from . import adapter_fanfictionnet
|
||||
from . import adapter_fictionalleyarchiveorg
|
||||
from . import adapter_fictionpresscom
|
||||
|
|
@ -49,7 +53,6 @@ from . import adapter_archiveofourownorg
|
|||
from . import adapter_ficbooknet
|
||||
from . import adapter_midnightwhispers
|
||||
from . import adapter_ksarchivecom
|
||||
from . import adapter_archiveskyehawkecom
|
||||
from . import adapter_libraryofmoriacom
|
||||
from . import adapter_ashwindersycophanthexcom
|
||||
from . import adapter_chaossycophanthexcom
|
||||
|
|
@ -60,17 +63,13 @@ from . import adapter_phoenixsongnet
|
|||
from . import adapter_walkingtheplankorg
|
||||
from . import adapter_dokugacom
|
||||
from . import adapter_storiesofardacom
|
||||
from . import adapter_destinysgatewaycom
|
||||
from . import adapter_ncisfictioncom
|
||||
from . import adapter_fanfiktionde
|
||||
from . import adapter_ponyfictionarchivenet
|
||||
from . import adapter_themasquenet
|
||||
from . import adapter_pretendercentrecom
|
||||
from . import adapter_darksolaceorg
|
||||
from . import adapter_finestoriescom
|
||||
from . import adapter_hlfictionnet
|
||||
from . import adapter_storyroomcom
|
||||
from . import adapter_dracoandginnycom
|
||||
from . import adapter_scarvesandcoffeenet
|
||||
from . import adapter_wolverineandroguecom
|
||||
from . import adapter_thehookupzonenet
|
||||
from . import adapter_efpfanficnet
|
||||
|
|
@ -101,50 +100,47 @@ from . import adapter_asexstoriescom
|
|||
from . import adapter_gluttonyfictioncom
|
||||
from . import adapter_valentchambercom
|
||||
from . import adapter_wwwgiantessworldnet
|
||||
from . import adapter_lotrgficcom
|
||||
from . import adapter_starslibrarynet
|
||||
from . import adapter_fanficauthorsnet
|
||||
from . import adapter_fireflyfansnet
|
||||
from . import adapter_trekfanfictionnet
|
||||
from . import adapter_wwwlushstoriescom
|
||||
from . import adapter_wwwutopiastoriescom
|
||||
from . import adapter_sinfuldreamscomunicornfic
|
||||
from . import adapter_sinfuldreamscomwhisperedmuse
|
||||
from . import adapter_sinfuldreamscomwickedtemptation
|
||||
from . import adapter_asianfanficscom
|
||||
from . import adapter_mttjustoncenet
|
||||
from . import adapter_narutoficorg
|
||||
from . import adapter_starskyhutcharchivenet
|
||||
from . import adapter_thedelphicexpansecom
|
||||
from . import adapter_wwwaneroticstorycom
|
||||
from . import adapter_lcfanficcom
|
||||
from . import adapter_noveltrovecom
|
||||
from . import adapter_inkbunnynet
|
||||
from . import adapter_alternatehistorycom
|
||||
from . import adapter_wattpadcom
|
||||
from . import adapter_novelonlinefullcom
|
||||
from . import adapter_wwwnovelallcom
|
||||
from . import adapter_wuxiaworldxyz
|
||||
from . import adapter_novelupdatescc
|
||||
from . import adapter_hentaifoundrycom
|
||||
from . import adapter_mugglenetfanfictioncom
|
||||
from . import adapter_swiorgru
|
||||
from . import adapter_fanficsme
|
||||
from . import adapter_fanfictalkcom
|
||||
from . import adapter_scifistoriescom
|
||||
from . import adapter_silmarillionwritersguildorg
|
||||
from . import adapter_chireadscom
|
||||
from . import adapter_scribblehubcom
|
||||
from . import adapter_fictionlive
|
||||
from . import adapter_thesietchcom
|
||||
from . import adapter_fastnovelsnet
|
||||
from . import adapter_squidgeworldorg
|
||||
from . import adapter_novelfull
|
||||
from . import adapter_worldofxde
|
||||
from . import adapter_psychficcom
|
||||
from . import adapter_deviantartcom
|
||||
from . import adapter_merengohu
|
||||
from . import adapter_readonlymindcom
|
||||
from . import adapter_wwwsunnydaleafterdarkcom
|
||||
from . import adapter_syosetucom
|
||||
from . import adapter_kakuyomujp
|
||||
from . import adapter_fanfictionsfr
|
||||
from . import adapter_touchfluffytail
|
||||
from . import adapter_spiritfanfictioncom
|
||||
from . import adapter_superlove
|
||||
from . import adapter_cfaa
|
||||
from . import adapter_althistorycom
|
||||
|
||||
## This bit of complexity allows adapters to be added by just adding
|
||||
## importing. It eliminates the long if/else clauses we used to need
|
||||
|
|
@ -228,6 +224,21 @@ def get_section_url(url):
|
|||
## return unchanged in that case.
|
||||
return url
|
||||
|
||||
def get_url_search(url):
|
||||
'''
|
||||
For adapters that have story URLs that can change. This is
|
||||
used for searching the Calibre library by identifiers:url for
|
||||
sites (generally) that contain author or title that can
|
||||
change, but also have a unique identifier that doesn't.
|
||||
|
||||
returns a string containing a regexp, not a compiled re object.
|
||||
'''
|
||||
cls = _get_class_for(url)[0]
|
||||
if not cls:
|
||||
## still apply common processing.
|
||||
cls = base_adapter.BaseSiteAdapter
|
||||
return cls.get_url_search(url)
|
||||
|
||||
def getAdapter(config,url,anyurl=False):
|
||||
|
||||
#logger.debug("trying url:"+url)
|
||||
|
|
|
|||
|
|
@ -15,201 +15,24 @@
|
|||
# limitations under the License.
|
||||
#
|
||||
|
||||
# Software: eFiction
|
||||
from __future__ import absolute_import
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import re
|
||||
|
||||
from ..htmlcleanup import stripHTML
|
||||
from .. import exceptions as exceptions
|
||||
|
||||
# py2 vs py3 transition
|
||||
from ..six import text_type as unicode
|
||||
|
||||
from .base_adapter import BaseSiteAdapter, makeDate
|
||||
|
||||
class AdAstraFanficComSiteAdapter(BaseSiteAdapter):
|
||||
|
||||
def __init__(self, config, url):
|
||||
BaseSiteAdapter.__init__(self, config, url)
|
||||
self.story.setMetadata('siteabbrev','aaff')
|
||||
self.is_adult=False
|
||||
|
||||
# get storyId from url--url validation guarantees query is only sid=1234
|
||||
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
|
||||
|
||||
|
||||
# normalized story URL.
|
||||
self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId'))
|
||||
|
||||
|
||||
@staticmethod
|
||||
def getSiteDomain():
|
||||
return 'www.adastrafanfic.com'
|
||||
|
||||
@classmethod
|
||||
def getSiteExampleURLs(cls):
|
||||
return "http://"+cls.getSiteDomain()+"/viewstory.php?sid=1234"
|
||||
|
||||
def getSiteURLPattern(self):
|
||||
return re.escape("http://"+self.getSiteDomain()+"/viewstory.php?sid=")+r"\d+$"
|
||||
|
||||
def extractChapterUrlsAndMetadata(self):
|
||||
|
||||
if self.is_adult or self.getConfig("is_adult"):
|
||||
addurl = "&warning=5"
|
||||
else:
|
||||
addurl=""
|
||||
|
||||
url = self.url+'&index=1'+addurl
|
||||
logger.debug("URL: "+url)
|
||||
|
||||
data = self.get_request(url)
|
||||
|
||||
if "Content is only suitable for mature adults. May contain explicit language and adult themes. Equivalent of NC-17." in data:
|
||||
raise exceptions.AdultCheckRequired(self.url)
|
||||
|
||||
# problems with some stories, but only in calibre. I suspect
|
||||
# issues with different SGML parsers in python. This is a
|
||||
# nasty hack, but it works.
|
||||
data = data[data.index("<body"):]
|
||||
|
||||
soup = self.make_soup(data)
|
||||
|
||||
## Title
|
||||
a = soup.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$"))
|
||||
self.story.setMetadata('title',stripHTML(a))
|
||||
|
||||
# Find authorid and URL from... author url.
|
||||
a = soup.find('a', href=re.compile(r"viewuser.php"))
|
||||
self.story.setMetadata('authorId',a['href'].split('=')[1])
|
||||
self.story.setMetadata('authorUrl','http://'+self.host+'/'+a['href'])
|
||||
self.story.setMetadata('author',a.string)
|
||||
|
||||
# Find the chapters:
|
||||
for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+r"&chapter=\d+$")):
|
||||
# just in case there's tags, like <i> in chapter titles.
|
||||
self.add_chapter(chapter,'http://'+self.host+'/'+chapter['href']+addurl)
|
||||
|
||||
|
||||
## <meta name='description' content='<p>Description</p> ...' >
|
||||
## Summary, strangely, is in the content attr of a <meta name='description'> tag
|
||||
## which is escaped HTML. Unfortunately, we can't use it because they don't
|
||||
## escape (') chars in the desc, breakin the tag.
|
||||
#meta_desc = soup.find('meta',{'name':'description'})
|
||||
#metasoup = bs.BeautifulStoneSoup(meta_desc['content'])
|
||||
#self.story.setMetadata('description',stripHTML(metasoup))
|
||||
|
||||
def defaultGetattr(d,k):
|
||||
try:
|
||||
return d[k]
|
||||
except:
|
||||
return ""
|
||||
|
||||
# <span class="label">Rated:</span> NC-17<br /> etc
|
||||
labels = soup.findAll('span',{'class':'label'})
|
||||
for labelspan in labels:
|
||||
value = labelspan.nextSibling
|
||||
label = labelspan.string
|
||||
|
||||
if 'Summary' in label:
|
||||
## Everything until the next span class='label'
|
||||
svalue = ''
|
||||
while value and 'label' not in defaultGetattr(value,'class'):
|
||||
svalue += unicode(value)
|
||||
value = value.nextSibling
|
||||
# sometimes poorly formated desc (<p> w/o </p>) leads
|
||||
# to all labels being included.
|
||||
svalue=svalue[:svalue.find('<span class="label">')]
|
||||
self.setDescription(url,svalue)
|
||||
#self.story.setMetadata('description',stripHTML(svalue))
|
||||
|
||||
if 'Rated' in label:
|
||||
self.story.setMetadata('rating', value)
|
||||
|
||||
if 'Word count' in label:
|
||||
self.story.setMetadata('numWords', value)
|
||||
|
||||
if 'Categories' in label:
|
||||
cats = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=categories'))
|
||||
catstext = [cat.string for cat in cats]
|
||||
for cat in catstext:
|
||||
self.story.addToList('category',cat.string)
|
||||
|
||||
if 'Characters' in label:
|
||||
chars = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=characters'))
|
||||
charstext = [char.string for char in chars]
|
||||
for char in charstext:
|
||||
self.story.addToList('characters',char.string)
|
||||
|
||||
if 'Genre' in label:
|
||||
genres = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=1'))
|
||||
genrestext = [genre.string for genre in genres]
|
||||
self.genre = ', '.join(genrestext)
|
||||
for genre in genrestext:
|
||||
self.story.addToList('genre',genre.string)
|
||||
|
||||
if 'Warnings' in label:
|
||||
warnings = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=2'))
|
||||
warningstext = [warning.string for warning in warnings]
|
||||
self.warning = ', '.join(warningstext)
|
||||
for warning in warningstext:
|
||||
self.story.addToList('warnings',warning.string)
|
||||
|
||||
if 'Completed' in label:
|
||||
if 'Yes' in value:
|
||||
self.story.setMetadata('status', 'Completed')
|
||||
else:
|
||||
self.story.setMetadata('status', 'In-Progress')
|
||||
|
||||
if 'Published' in label:
|
||||
self.story.setMetadata('datePublished', makeDate(value.strip(), "%d %b %Y"))
|
||||
|
||||
if 'Updated' in label:
|
||||
# there's a stray [ at the end.
|
||||
#value = value[0:-1]
|
||||
self.story.setMetadata('dateUpdated', makeDate(value.strip(), "%d %b %Y"))
|
||||
|
||||
try:
|
||||
# Find Series name from series URL.
|
||||
a = soup.find('a', href=re.compile(r"viewseries.php\?seriesid=\d+"))
|
||||
series_name = a.string
|
||||
series_url = 'http://'+self.host+'/'+a['href']
|
||||
|
||||
seriessoup = self.make_soup(self.get_request(series_url))
|
||||
storyas = seriessoup.findAll('a', href=re.compile(r'^viewstory.php\?sid=\d+$'))
|
||||
i=1
|
||||
for a in storyas:
|
||||
if a['href'] == ('viewstory.php?sid='+self.story.getMetadata('storyId')):
|
||||
self.setSeries(series_name, i)
|
||||
self.story.setMetadata('seriesUrl',series_url)
|
||||
break
|
||||
i+=1
|
||||
|
||||
except:
|
||||
# I find it hard to care if the series parsing fails
|
||||
pass
|
||||
|
||||
|
||||
def getChapterText(self, url):
|
||||
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
data = self.get_request(url)
|
||||
# problems with some stories, but only in calibre. I suspect
|
||||
# issues with different SGML parsers in python. This is a
|
||||
# nasty hack, but it works.
|
||||
data = data[data.index("<body"):]
|
||||
|
||||
soup = self.make_soup(data)
|
||||
|
||||
span = soup.find('div', {'id' : 'story'})
|
||||
|
||||
if None == span:
|
||||
raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url)
|
||||
|
||||
return self.utf8FromSoup(url,span)
|
||||
from .base_otw_adapter import BaseOTWAdapter
|
||||
|
||||
def getClass():
|
||||
return AdAstraFanficComSiteAdapter
|
||||
return AdastrafanficComAdapter
|
||||
|
||||
class AdastrafanficComAdapter(BaseOTWAdapter):
|
||||
|
||||
def __init__(self, config, url):
|
||||
BaseOTWAdapter.__init__(self, config, url)
|
||||
|
||||
# Each adapter needs to have a unique site abbreviation.
|
||||
self.story.setMetadata('siteabbrev','aaff')
|
||||
|
||||
@staticmethod # must be @staticmethod, don't remove it.
|
||||
def getSiteDomain():
|
||||
# The site domain. Does have www here, if it uses it.
|
||||
return 'www.adastrafanfic.com'
|
||||
|
|
|
|||
|
|
@ -68,9 +68,7 @@ class AdultFanFictionOrgAdapter(BaseSiteAdapter):
|
|||
|
||||
# The date format will vary from site to site.
|
||||
# http://docs.python.org/library/datetime.html#strftime-strptime-behavior
|
||||
self.dateformat = "%Y-%m-%d"
|
||||
|
||||
|
||||
self.dateformat = "%B %d, %Y"
|
||||
|
||||
## Added because adult-fanfiction.org does send you to
|
||||
## www.adult-fanfiction.org when you go to it and it also moves
|
||||
|
|
@ -139,91 +137,45 @@ class AdultFanFictionOrgAdapter(BaseSiteAdapter):
|
|||
def getSiteURLPattern(self):
|
||||
return r'https?://(anime|anime2|bleach|books|buffy|cartoon|celeb|comics|ff|games|hp|inu|lotr|manga|movies|naruto|ne|original|tv|xmen|ygo|yuyu)\.adult-fanfiction\.org/story\.php\?no=\d+$'
|
||||
|
||||
##This is not working right now, so I'm commenting it out, but leaving it for future testing
|
||||
## Login seems to be reasonably standard across eFiction sites.
|
||||
#def needToLoginCheck(self, data):
|
||||
##This adapter will always require a login
|
||||
# return True
|
||||
|
||||
# <form name="login" method="post" action="">
|
||||
# <div class="top">E-mail: <span id="sprytextfield1">
|
||||
# <input name="email" type="text" id="email" size="20" maxlength="255" />
|
||||
# <span class="textfieldRequiredMsg">Email is required.</span><span class="textfieldInvalidFormatMsg">Invalid E-mail.</span></span></div>
|
||||
# <div class="top">Password: <span id="sprytextfield2">
|
||||
# <input name="pass1" type="password" id="pass1" size="20" maxlength="32" />
|
||||
# <span class="textfieldRequiredMsg">password is required.</span><span class="textfieldMinCharsMsg">Minimum 8 characters8.</span><span class="textfieldMaxCharsMsg">Exceeded 32 characters.</span></span></div>
|
||||
# <div class="top"><br /> <input name="loginsubmittop" type="hidden" id="loginsubmit" value="TRUE" />
|
||||
# <input type="submit" value="Login" />
|
||||
# </div>
|
||||
# </form>
|
||||
|
||||
|
||||
##This is not working right now, so I'm commenting it out, but leaving it for future testing
|
||||
#def performLogin(self, url, soup):
|
||||
# params = {}
|
||||
|
||||
# if self.password:
|
||||
# params['email'] = self.username
|
||||
# params['pass1'] = self.password
|
||||
# else:
|
||||
# params['email'] = self.getConfig("username")
|
||||
# params['pass1'] = self.getConfig("password")
|
||||
# params['submit'] = 'Login'
|
||||
|
||||
# # copy all hidden input tags to pick up appropriate tokens.
|
||||
# for tag in soup.findAll('input',{'type':'hidden'}):
|
||||
# params[tag['name']] = tag['value']
|
||||
|
||||
# logger.debug("Will now login to URL {0} as {1} with password: {2}".format(url, params['email'],params['pass1']))
|
||||
|
||||
# d = self.post_request(url, params, usecache=False)
|
||||
# d = self.post_request(url, params, usecache=False)
|
||||
# soup = self.make_soup(d)
|
||||
|
||||
#if not (soup.find('form', {'name' : 'login'}) == None):
|
||||
# logger.info("Failed to login to URL %s as %s" % (url, params['email']))
|
||||
# raise exceptions.FailedToLogin(url,params['email'])
|
||||
# return False
|
||||
#else:
|
||||
# return True
|
||||
|
||||
## Getting the chapter list and the meta data, plus 'is adult' checking.
|
||||
def doExtractChapterUrlsAndMetadata(self, get_cover=True):
|
||||
|
||||
## You need to have your is_adult set to true to get this story
|
||||
if not (self.is_adult or self.getConfig("is_adult")):
|
||||
raise exceptions.AdultCheckRequired(self.url)
|
||||
else:
|
||||
d = self.post_request('https://www.adult-fanfiction.org/globals/ajax/age-verify.php', {"verify":"1"})
|
||||
if "Age verified successfully" not in d:
|
||||
raise exceptions.FailedToDownload("Failed to Verify Age: {0}".format(d))
|
||||
|
||||
url = self.url
|
||||
logger.debug("URL: "+url)
|
||||
|
||||
data = self.get_request(url)
|
||||
# logger.debug(data)
|
||||
|
||||
if "The dragons running the back end of the site can not seem to find the story you are looking for." in data:
|
||||
raise exceptions.StoryDoesNotExist("{0}.{1} says: The dragons running the back end of the site can not seem to find the story you are looking for.".format(self.zone, self.getBaseDomain()))
|
||||
|
||||
soup = self.make_soup(data)
|
||||
|
||||
##This is not working right now, so I'm commenting it out, but leaving it for future testing
|
||||
#self.performLogin(url, soup)
|
||||
|
||||
|
||||
## Title
|
||||
## Some of the titles have a backslash on the story page, but not on the Author's page
|
||||
## So I am removing it from the title, so it can be found on the Author's page further in the code.
|
||||
## Also, some titles may have extra spaces ' ', and the search on the Author's page removes them,
|
||||
## so I have to here as well. I used multiple replaces to make sure, since I did the same below.
|
||||
a = soup.find('a', href=re.compile(r'story.php\?no='+self.story.getMetadata('storyId')+"$"))
|
||||
self.story.setMetadata('title',stripHTML(a).replace('\\','').replace(' ',' ').replace(' ',' ').replace(' ',' ').strip())
|
||||
h1 = soup.find('h1')
|
||||
# logger.debug("Title:%s"%h1)
|
||||
self.story.setMetadata('title',stripHTML(h1).replace('\\','').replace(' ',' ').replace(' ',' ').replace(' ',' ').strip())
|
||||
|
||||
# Find the chapters:
|
||||
chapters = soup.find('ul',{'class':'dropdown-content'})
|
||||
for i, chapter in enumerate(chapters.findAll('a')):
|
||||
self.add_chapter(chapter,self.url+'&chapter='+unicode(i+1))
|
||||
# Find the chapters from first list only
|
||||
chapters = soup.select_one('select.chapter-select').select('option')
|
||||
for chapter in chapters:
|
||||
self.add_chapter(chapter,self.url+'&chapter='+chapter['value'])
|
||||
|
||||
|
||||
# Find authorid and URL from... author url.
|
||||
a = soup.find('a', href=re.compile(r"profile.php\?no=\d+"))
|
||||
a = soup.find('a', href=re.compile(r"profile.php\?id=\d+"))
|
||||
if a == None:
|
||||
# I know that the original author of fanficfare wants to always have metadata,
|
||||
# but I posit that if the story is there, even if we can't get the metadata from the
|
||||
|
|
@ -232,140 +184,56 @@ class AdultFanFictionOrgAdapter(BaseSiteAdapter):
|
|||
self.story.setMetadata('authorUrl','https://www.adult-fanfiction.org')
|
||||
self.story.setMetadata('author','Unknown')
|
||||
logger.warning('There was no author found for the story... Metadata will not be retreived.')
|
||||
self.setDescription(url,'>>>>>>>>>> No Summary Given <<<<<<<<<<')
|
||||
self.setDescription(url,'>>>>>>>>>> No Summary Given, Unknown Author <<<<<<<<<<')
|
||||
else:
|
||||
self.story.setMetadata('authorId',a['href'].split('=')[1])
|
||||
self.story.setMetadata('authorUrl',a['href'])
|
||||
self.story.setMetadata('author',stripHTML(a))
|
||||
|
||||
##The story page does not give much Metadata, so we go to the Author's page
|
||||
## The story page does not give much Metadata, so we go to
|
||||
## the Author's page. Except it's actually a sub-req for
|
||||
## list of author's stories for that subdomain
|
||||
author_Url = 'https://members.{0}/load-user-stories.php?subdomain={1}&uid={2}'.format(
|
||||
self.getBaseDomain(),
|
||||
self.zone,
|
||||
self.story.getMetadata('authorId'))
|
||||
|
||||
##Get the first Author page to see if there are multiple pages.
|
||||
##AFF doesn't care if the page number is larger than the actual pages,
|
||||
##it will continue to show the last page even if the variable is larger than the actual page
|
||||
author_Url = '{0}&view=story&zone={1}&page=1'.format(self.story.getMetadata('authorUrl'), self.zone)
|
||||
#author_Url = self.story.getMetadata('authorUrl')+'&view=story&zone='+self.zone+'&page=1'
|
||||
|
||||
##I'm resetting the author page to the zone for this story
|
||||
self.story.setMetadata('authorUrl',author_Url)
|
||||
|
||||
logger.debug('Getting the author page: {0}'.format(author_Url))
|
||||
logger.debug('Getting the load-user-stories page: {0}'.format(author_Url))
|
||||
adata = self.get_request(author_Url)
|
||||
|
||||
if "The member you are looking for does not exist." in adata:
|
||||
raise exceptions.StoryDoesNotExist("{0}.{1} says: The member you are looking for does not exist.".format(self.zone, self.getBaseDomain()))
|
||||
#raise exceptions.StoryDoesNotExist(self.zone+'.'+self.getBaseDomain() +" says: The member you are looking for does not exist.")
|
||||
none_found = "No stories found in this category."
|
||||
if none_found in adata:
|
||||
raise exceptions.StoryDoesNotExist("{0}.{1} says: {2}".format(self.zone, self.getBaseDomain(), none_found))
|
||||
|
||||
asoup = self.make_soup(adata)
|
||||
# logger.debug(asoup)
|
||||
|
||||
##Getting the number of author pages
|
||||
pages = 0
|
||||
pagination=asoup.find('ul',{'class' : 'pagination'})
|
||||
if pagination:
|
||||
pages = pagination.findAll('li')[-1].find('a')
|
||||
if not pages == None:
|
||||
pages = pages['href'].split('=')[-1]
|
||||
else:
|
||||
pages = 0
|
||||
story_card = asoup.select_one('div.story-card:has(a[href="{0}"])'.format(url))
|
||||
# logger.debug(story_card)
|
||||
|
||||
storya = None
|
||||
##If there is only 1 page of stories, check it to get the Metadata,
|
||||
if pages == 0:
|
||||
a = asoup.findAll('li')
|
||||
for lc2 in a:
|
||||
if lc2.find('a', href=re.compile(r'story.php\?no='+self.story.getMetadata('storyId')+"$")):
|
||||
storya = lc2
|
||||
break
|
||||
## otherwise go through the pages
|
||||
else:
|
||||
page=1
|
||||
i=0
|
||||
while i == 0:
|
||||
##We already have the first page, so if this is the first time through, skip getting the page
|
||||
if page != 1:
|
||||
author_Url = '{0}&view=story&zone={1}&page={2}'.format(self.story.getMetadata('authorUrl'), self.zone, unicode(page))
|
||||
logger.debug('Getting the author page: {0}'.format(author_Url))
|
||||
adata = self.get_request(author_Url)
|
||||
##This will probably never be needed, since AFF doesn't seem to care what number you put as
|
||||
## the page number, it will default to the last page, even if you use 1000, for an author
|
||||
## that only hase 5 pages of stories, but I'm keeping it in to appease Saint Justin Case (just in case).
|
||||
if "The member you are looking for does not exist." in adata:
|
||||
raise exceptions.StoryDoesNotExist("{0}.{1} says: The member you are looking for does not exist.".format(self.zone, self.getBaseDomain()))
|
||||
# we look for the li element that has the story here
|
||||
asoup = self.make_soup(adata)
|
||||
## Category
|
||||
## I've only seen one category per story so far, but just in case:
|
||||
for cat in story_card.select('div.story-card-category'):
|
||||
# remove Category:, old code suggests Located: is also
|
||||
# possible, so removing by <strong>
|
||||
cat.find("strong").decompose()
|
||||
self.story.addToList('category',stripHTML(cat))
|
||||
|
||||
a = asoup.findAll('li')
|
||||
for lc2 in a:
|
||||
if lc2.find('a', href=re.compile(r'story.php\?no='+self.story.getMetadata('storyId')+"$")):
|
||||
i=1
|
||||
storya = lc2
|
||||
break
|
||||
page = page + 1
|
||||
if page > int(pages):
|
||||
break
|
||||
self.setDescription(url,story_card.select_one('div.story-card-description'))
|
||||
|
||||
##Split the Metadata up into a list
|
||||
##We have to change the soup type to a string, then remove the newlines, and double spaces,
|
||||
##then changes the <br/> to '-:-', which seperates the different elemeents.
|
||||
##Then we strip the HTML elements from the string.
|
||||
##There is also a double <br/>, so we have to fix that, then remove the leading and trailing '-:-'.
|
||||
##They are always in the same order.
|
||||
## EDIT 09/26/2016: Had some trouble with unicode errors... so I had to put in the decode/encode parts to fix it
|
||||
liMetadata = unicode(storya).replace('\n','').replace('\r','').replace('\t',' ').replace(' ',' ').replace(' ',' ').replace(' ',' ')
|
||||
liMetadata = stripHTML(liMetadata.replace(r'<br/>','-:-').replace('<!-- <br /-->','-:-'))
|
||||
liMetadata = liMetadata.strip('-:-').strip('-:-').encode('utf-8')
|
||||
for i, value in enumerate(liMetadata.decode('utf-8').split('-:-')):
|
||||
if i == 0:
|
||||
# The value for the title has been manipulated, so may not be the same as gotten at the start.
|
||||
# I'm going to use the href from the storya retrieved from the author's page to determine if it is correct.
|
||||
if storya.find('a', href=re.compile(r'story.php\?no='+self.story.getMetadata('storyId')+"$"))['href'] != url:
|
||||
raise exceptions.StoryDoesNotExist('Did not find story in author story list: {0}'.format(author_Url))
|
||||
elif i == 1:
|
||||
##Get the description
|
||||
self.setDescription(url,stripHTML(value.strip()))
|
||||
else:
|
||||
# the rest of the values can be missing, so instead of hardcoding the numbers, we search for them.
|
||||
if 'Located :' in value:
|
||||
self.story.setMetadata('category',value.replace(r'>',r'>').replace(r'Located :',r'').strip())
|
||||
elif 'Category :' in value:
|
||||
# Get the Category
|
||||
self.story.setMetadata('category',value.replace(r'>',r'>').replace(r'Located :',r'').strip())
|
||||
elif 'Content Tags :' in value:
|
||||
# Get the Erotic Tags
|
||||
value = stripHTML(value.replace(r'Content Tags :',r'')).strip()
|
||||
for code in re.split(r'\s',value):
|
||||
self.story.addToList('eroticatags',code)
|
||||
elif 'Posted :' in value:
|
||||
# Get the Posted Date
|
||||
value = value.replace(r'Posted :',r'').strip()
|
||||
if value.startswith('008'):
|
||||
# It is unknown how the 200 became 008, but I'm going to change it back here
|
||||
value = value.replace('008','200')
|
||||
elif value.startswith('0000'):
|
||||
# Since the date is showing as 0000,
|
||||
# I'm going to put the memberdate here
|
||||
value = asoup.find('div',{'id':'contentdata'}).find('p').get_text(strip=True).replace('Member Since','').strip()
|
||||
self.story.setMetadata('datePublished', makeDate(stripHTML(value), self.dateformat))
|
||||
elif 'Edited :' in value:
|
||||
# Get the 'Updated' Edited date
|
||||
# AFF has the time for the Updated date, and we only want the date,
|
||||
# so we take the first 10 characters only
|
||||
value = value.replace(r'Edited :',r'').strip()[0:10]
|
||||
if value.startswith('008'):
|
||||
# It is unknown how the 200 became 008, but I'm going to change it back here
|
||||
value = value.replace('008','200')
|
||||
self.story.setMetadata('dateUpdated', makeDate(stripHTML(value), self.dateformat))
|
||||
elif value.startswith('0000') or '-00-' in value:
|
||||
# Since the date is showing as 0000,
|
||||
# or there is -00- in the date,
|
||||
# I'm going to put the Published date here
|
||||
self.story.setMetadata('dateUpdated', self.story.getMetadata('datPublished'))
|
||||
else:
|
||||
self.story.setMetadata('dateUpdated', makeDate(stripHTML(value), self.dateformat))
|
||||
else:
|
||||
# This catches the blank elements, and the Review and Dragon Prints.
|
||||
# I am not interested in these, so do nothing
|
||||
zzzzzzz=0
|
||||
for tag in story_card.select('span.story-tag'):
|
||||
self.story.addToList('eroticatags',stripHTML(tag))
|
||||
|
||||
## created/updates share formatting
|
||||
for meta in story_card.select('div.story-card-meta-item span:last-child'):
|
||||
meta = stripHTML(meta)
|
||||
if 'Created: ' in meta:
|
||||
meta = meta.replace('Created: ','')
|
||||
self.story.setMetadata('datePublished', makeDate(meta, self.dateformat))
|
||||
|
||||
if 'Updated: ' in meta:
|
||||
meta = meta.replace('Updated: ','')
|
||||
self.story.setMetadata('dateUpdated', makeDate(meta, self.dateformat))
|
||||
|
||||
# grab the text for an individual chapter.
|
||||
def getChapterText(self, url):
|
||||
|
|
@ -373,10 +241,11 @@ class AdultFanFictionOrgAdapter(BaseSiteAdapter):
|
|||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
soup = self.make_soup(self.get_request(url))
|
||||
chaptertag = soup.find('ul',{'class':'pagination'}).parent.parent.parent.findNextSibling('li')
|
||||
chaptertag = soup.select_one('div.chapter-body')
|
||||
if None == chaptertag:
|
||||
raise exceptions.FailedToDownload("Error downloading Chapter: {0}! Missing required element!".format(url))
|
||||
# Change td to a div.
|
||||
chaptertag.name='div'
|
||||
## chapter text includes a copy of story title, author,
|
||||
## chapter title, & eroticatags specific to the chapter. Did
|
||||
## before, too.
|
||||
|
||||
return self.utf8FromSoup(url,chaptertag)
|
||||
|
|
|
|||
40
fanficfare/adapters/adapter_althistorycom.py
Normal file
40
fanficfare/adapters/adapter_althistorycom.py
Normal file
|
|
@ -0,0 +1,40 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2026 FanFicFare team
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
from __future__ import absolute_import
|
||||
import re
|
||||
|
||||
from .base_xenforo2forum_adapter import BaseXenForo2ForumAdapter
|
||||
|
||||
def getClass():
|
||||
return AltHistoryComAdapter
|
||||
|
||||
## NOTE: This is a different site than www.alternatehistory.com.
|
||||
|
||||
class AltHistoryComAdapter(BaseXenForo2ForumAdapter):
|
||||
|
||||
def __init__(self, config, url):
|
||||
BaseXenForo2ForumAdapter.__init__(self, config, url)
|
||||
|
||||
# Each adapter needs to have a unique site abbreviation.
|
||||
self.story.setMetadata('siteabbrev','ahc')
|
||||
|
||||
@staticmethod # must be @staticmethod, don't remove it.
|
||||
def getSiteDomain():
|
||||
# The site domain. Does have www here, if it uses it.
|
||||
return 'althistory.com'
|
||||
|
||||
|
|
@ -18,56 +18,20 @@
|
|||
from __future__ import absolute_import
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import re
|
||||
import json
|
||||
|
||||
from ..six import text_type as unicode
|
||||
from ..htmlcleanup import stripHTML
|
||||
from .. import exceptions as exceptions
|
||||
|
||||
# py2 vs py3 transition
|
||||
|
||||
from .base_adapter import BaseSiteAdapter, makeDate
|
||||
from .base_otw_adapter import BaseOTWAdapter
|
||||
|
||||
def getClass():
|
||||
return ArchiveOfOurOwnOrgAdapter
|
||||
|
||||
class ArchiveOfOurOwnOrgAdapter(BaseSiteAdapter):
|
||||
class ArchiveOfOurOwnOrgAdapter(BaseOTWAdapter):
|
||||
|
||||
def __init__(self, config, url):
|
||||
BaseSiteAdapter.__init__(self, config, url)
|
||||
|
||||
self.username = "NoneGiven" # if left empty, site doesn't return any message at all.
|
||||
self.password = ""
|
||||
self.is_adult=False
|
||||
self.addurl = ""
|
||||
|
||||
self.full_work_soup = None
|
||||
self.full_work_chapters = None
|
||||
self.use_full_work_soup = True
|
||||
|
||||
# get storyId from url--url validation guarantees query is only sid=1234
|
||||
self.story.setMetadata('storyId',self.parsedUrl.path.split('/',)[2])
|
||||
|
||||
# get storyId from url--url validation guarantees query correct
|
||||
m = re.match(self.getSiteURLPattern(),url)
|
||||
if m:
|
||||
self.story.setMetadata('storyId',m.group('id'))
|
||||
|
||||
# normalized story URL.
|
||||
self._setURL('https://' + self.getSiteDomain() + '/works/'+self.story.getMetadata('storyId'))
|
||||
else:
|
||||
raise exceptions.InvalidStoryURL(url,
|
||||
self.getSiteDomain(),
|
||||
self.getSiteExampleURLs())
|
||||
BaseOTWAdapter.__init__(self, config, url)
|
||||
|
||||
# Each adapter needs to have a unique site abbreviation.
|
||||
self.story.setMetadata('siteabbrev','ao3')
|
||||
|
||||
# The date format will vary from site to site.
|
||||
# http://docs.python.org/library/datetime.html#strftime-strptime-behavior
|
||||
self.dateformat = "%Y-%b-%d"
|
||||
|
||||
@staticmethod # must be @staticmethod, don't remove it.
|
||||
def getSiteDomain():
|
||||
# The site domain. Does have www here, if it uses it.
|
||||
|
|
@ -85,559 +49,21 @@ class ArchiveOfOurOwnOrgAdapter(BaseSiteAdapter):
|
|||
return ['archiveofourown.org',
|
||||
'archiveofourown.com',
|
||||
'archiveofourown.net',
|
||||
'archiveofourown.gay',
|
||||
'download.archiveofourown.org',
|
||||
'download.archiveofourown.com',
|
||||
'download.archiveofourown.net',
|
||||
'ao3.org',
|
||||
]
|
||||
|
||||
@classmethod
|
||||
def getSiteExampleURLs(cls):
|
||||
return "https://"+cls.getSiteDomain()+"/works/123456 https://"+cls.getSiteDomain()+"/collections/Some_Archive/works/123456 https://"+cls.getSiteDomain()+"/works/123456/chapters/78901"
|
||||
|
||||
def getSiteURLPattern(self):
|
||||
# https://archiveofourown.org/collections/Smallville_Slash_Archive/works/159770
|
||||
# Discard leading zeros from story ID numbers--AO3 doesn't use them in it's own chapter URLs.
|
||||
# logger.debug(r"https?://" + r"|".join([x.replace('.','\.') for x in self.getAcceptDomains()]) + r"(/collections/[^/]+)?/works/0*(?P<id>\d+)")
|
||||
return r"https?://(" + r"|".join([x.replace('.',r'\.') for x in self.getAcceptDomains()]) + r")(/collections/[^/]+)?/works/0*(?P<id>\d+)"
|
||||
|
||||
@classmethod
|
||||
def get_section_url(cls,url):
|
||||
## minimal URL used for section names in INI and reject list
|
||||
## for comparison
|
||||
# logger.debug("pre--url:%s"%url)
|
||||
## https://archiveofourown.org/works/19334905/chapters/71697933
|
||||
# http://archiveofourown.org/works/34686793/chapters/89043733
|
||||
url = re.sub(r'^https?://(.*/works/\d+).*$',r'https://\1',url)
|
||||
# logger.debug("post-url:%s"%url)
|
||||
def mod_url_request(self, url):
|
||||
return url
|
||||
|
||||
## Login
|
||||
def needToLoginCheck(self, data):
|
||||
if 'This work is only available to registered users of the Archive.' in data \
|
||||
or "The password or user name you entered doesn't match our records" in data:
|
||||
return True
|
||||
def mod_url_request(self, url):
|
||||
## add / to *not* replace media.archiveofourown.org
|
||||
if self.getConfig("use_archive_transformativeworks_org",False):
|
||||
return url.replace("/archiveofourown.org","/archive.transformativeworks.org")
|
||||
elif self.getConfig("use_archiveofourown_gay",False):
|
||||
return url.replace("/archiveofourown.org","/archiveofourown.gay")
|
||||
else:
|
||||
return False
|
||||
|
||||
def performLogin(self, url, data):
|
||||
|
||||
params = {}
|
||||
if self.password:
|
||||
params['user[login]'] = self.username
|
||||
params['user[password]'] = self.password
|
||||
else:
|
||||
params['user[login]'] = self.getConfig("username")
|
||||
params['user[password]'] = self.getConfig("password")
|
||||
params['user[remember_me]'] = '1'
|
||||
params['commit'] = 'Log in'
|
||||
params['utf8'] = u'\x2713' # utf8 *is* required now. hex code works better than actual character for some reason. u'✓'
|
||||
|
||||
# authenticity_token now comes from a completely separate json call.
|
||||
token_json = json.loads(self.get_request('https://' + self.getSiteDomain() + "/token_dispenser.json"))
|
||||
params['authenticity_token'] = token_json['token']
|
||||
|
||||
loginUrl = 'https://' + self.getSiteDomain() + '/users/login'
|
||||
logger.info("Will now login to URL (%s) as (%s)" % (loginUrl,
|
||||
params['user[login]']))
|
||||
|
||||
d = self.post_request(loginUrl, params)
|
||||
|
||||
if 'href="/users/logout"' not in d :
|
||||
logger.info("Failed to login to URL %s as %s" % (loginUrl,
|
||||
params['user[login]']))
|
||||
raise exceptions.FailedToLogin(url,params['user[login]'])
|
||||
return False
|
||||
else:
|
||||
return True
|
||||
|
||||
## Getting the chapter list and the meta data, plus 'is adult' checking.
|
||||
def extractChapterUrlsAndMetadata(self):
|
||||
|
||||
if self.is_adult or self.getConfig("is_adult"):
|
||||
self.addurl = "?view_adult=true"
|
||||
else:
|
||||
self.addurl=""
|
||||
|
||||
metaurl = self.url+self.addurl
|
||||
url = self.url+'/navigate'+self.addurl
|
||||
logger.info("url: "+url)
|
||||
logger.info("metaurl: "+metaurl)
|
||||
|
||||
data = self.get_request(url)
|
||||
if '<h2 class="heading">Error 503 - Service unavailable</h2>' in data:
|
||||
# note that it's not *actually* a 503 code...
|
||||
raise exceptions.FailedToDownload('Site is currently unavailable.')
|
||||
|
||||
meta = self.get_request(metaurl)
|
||||
|
||||
if 'This work is part of an ongoing challenge and will be revealed soon!' in meta:
|
||||
raise exceptions.FailedToDownload('Site says: "This work is part of an ongoing challenge and will be revealed soon!"')
|
||||
|
||||
if "This work could have adult content. If you proceed you have agreed that you are willing to see such content." in meta:
|
||||
if self.addurl:
|
||||
## "?view_adult=true" doesn't work on base story
|
||||
## URL anymore, which means we have to
|
||||
metasoup = self.make_soup(meta)
|
||||
a = metasoup.find('a',text='Proceed')
|
||||
metaurl = 'https://'+self.host+a['href']
|
||||
meta = self.get_request(metaurl)
|
||||
else:
|
||||
raise exceptions.AdultCheckRequired(self.url)
|
||||
|
||||
if "Sorry, we couldn't find the work you were looking for." in data:
|
||||
raise exceptions.StoryDoesNotExist(self.url)
|
||||
|
||||
# need to log in for this one, or always_login.
|
||||
if self.needToLoginCheck(data) or \
|
||||
( self.getConfig("always_login") and 'href="/users/logout"' not in data ):
|
||||
self.performLogin(url,data)
|
||||
data = self.get_request(url,usecache=False)
|
||||
meta = self.get_request(metaurl,usecache=False)
|
||||
|
||||
## duplicate of check above for login-required stories that
|
||||
## are also hidden.
|
||||
if 'This work is part of an ongoing challenge and will be revealed soon!' in meta:
|
||||
raise exceptions.FailedToDownload('Site says: "This work is part of an ongoing challenge and will be revealed soon!"')
|
||||
|
||||
soup = self.make_soup(data)
|
||||
for tag in soup.findAll('div',id='admin-banner'):
|
||||
tag.extract()
|
||||
metasoup = self.make_soup(meta)
|
||||
for tag in metasoup.findAll('div',id='admin-banner'):
|
||||
tag.extract()
|
||||
|
||||
|
||||
## Title
|
||||
a = soup.find('a', href=re.compile(r"/works/\d+$"))
|
||||
self.story.setMetadata('title',stripHTML(a))
|
||||
|
||||
if self.getConfig("always_login"):
|
||||
# deliberately using always_login instead of checking for
|
||||
# actual login so we don't have a case where these show up
|
||||
# for a user only when they get user-restricted stories.
|
||||
try:
|
||||
# is bookmarked if has update /bookmarks/ form --
|
||||
# create bookmark form uses different url
|
||||
self.story.setMetadata('bookmarked',
|
||||
None != metasoup.find('form',action=re.compile(r'^/bookmarks/')))
|
||||
self.story.extendList('bookmarktags',
|
||||
metasoup.find('input',id='bookmark_tag_string')['value'].split(', '))
|
||||
self.story.setMetadata('bookmarkprivate',
|
||||
metasoup.find('input',id='bookmark_private').has_attr('checked'))
|
||||
self.story.setMetadata('bookmarkrec',
|
||||
metasoup.find('input',id='bookmark_rec').has_attr('checked'))
|
||||
except KeyError:
|
||||
pass
|
||||
self.story.setMetadata('bookmarksummary',
|
||||
stripHTML(metasoup.find('textarea',id='bookmark_notes')))
|
||||
|
||||
if metasoup.find('img',alt='(Restricted)'):
|
||||
self.story.setMetadata('restricted','Restricted')
|
||||
|
||||
# Find authorid and URL from... author url.
|
||||
alist = soup.findAll('a', href=re.compile(r"/users/\w+/pseuds/.+"))
|
||||
if len(alist) < 1: # ao3 allows for author 'Anonymous' with no author link.
|
||||
self.story.setMetadata('author','Anonymous')
|
||||
self.story.setMetadata('authorUrl','https://' + self.getSiteDomain() + '/')
|
||||
self.story.setMetadata('authorId','0')
|
||||
else:
|
||||
for a in alist:
|
||||
self.story.addToList('authorId',a['href'].split('/')[-1])
|
||||
self.story.addToList('authorUrl','https://'+self.host+a['href'])
|
||||
self.story.addToList('author',a.text)
|
||||
|
||||
byline = metasoup.find('h3',{'class':'byline'})
|
||||
if byline:
|
||||
self.story.setMetadata('byline',stripHTML(byline))
|
||||
|
||||
# byline:
|
||||
# <h3 class="byline heading">
|
||||
# Hope Roy [archived by <a href="/users/ssa_archivist/pseuds/ssa_archivist" rel="author">ssa_archivist</a>]
|
||||
# </h3>
|
||||
# stripped:"Hope Roy [archived by ssa_archivist]"
|
||||
m = re.match(r'(?P<author>.*) \[archived by ?(?P<archivist>.*)\]',stripHTML(byline))
|
||||
if( m and
|
||||
len(alist) == 1 and
|
||||
self.getConfig('use_archived_author') ):
|
||||
self.story.setMetadata('author',m.group('author'))
|
||||
|
||||
newestChapter = None
|
||||
self.newestChapterNum = None # save for comparing during update.
|
||||
# Scan all chapters to find the oldest and newest, on AO3 it's
|
||||
# possible for authors to insert new chapters out-of-order or
|
||||
# change the dates of earlier ones by editing them--That WILL
|
||||
# break epub update.
|
||||
# Find the chapters:
|
||||
chapters=soup.findAll('a', href=re.compile(r'/works/'+self.story.getMetadata('storyId')+r"/chapters/\d+$"))
|
||||
self.story.setMetadata('numChapters',len(chapters))
|
||||
logger.debug("numChapters: (%s)"%self.story.getMetadata('numChapters'))
|
||||
if len(chapters)==1:
|
||||
self.add_chapter(self.story.getMetadata('title'),'https://'+self.host+chapters[0]['href'])
|
||||
else:
|
||||
for index, chapter in enumerate(chapters):
|
||||
# strip just in case there's tags, like <i> in chapter titles.
|
||||
# (2013-09-21)
|
||||
date = stripHTML(chapter.findNext('span'))[1:-1]
|
||||
chapterDate = makeDate(date,self.dateformat)
|
||||
self.add_chapter(chapter,'https://'+self.host+chapter['href'],
|
||||
{'date':chapterDate.strftime(self.getConfig("datechapter_format",self.getConfig("datePublished_format","%Y-%m-%d")))})
|
||||
if newestChapter == None or chapterDate > newestChapter:
|
||||
newestChapter = chapterDate
|
||||
self.newestChapterNum = index
|
||||
|
||||
a = metasoup.find('blockquote',{'class':'userstuff'})
|
||||
if a != None:
|
||||
a.name='div' # Change blockquote to div.
|
||||
self.setDescription(url,a)
|
||||
#self.story.setMetadata('description',a.text)
|
||||
|
||||
a = metasoup.find('dd',{'class':"rating tags"})
|
||||
if a != None:
|
||||
self.story.setMetadata('rating',stripHTML(a.text))
|
||||
|
||||
d = metasoup.find('dd',{'class':"language"})
|
||||
if d != None:
|
||||
self.story.setMetadata('language',stripHTML(d.text))
|
||||
|
||||
a = metasoup.find('dd',{'class':"fandom tags"})
|
||||
if a != None:
|
||||
fandoms = a.findAll('a',{'class':"tag"})
|
||||
for fandom in fandoms:
|
||||
self.story.addToList('fandoms',fandom.string)
|
||||
|
||||
a = metasoup.find('dd',{'class':"warning tags"})
|
||||
if a != None:
|
||||
warnings = a.findAll('a',{'class':"tag"})
|
||||
for warning in warnings:
|
||||
self.story.addToList('warnings',warning.string)
|
||||
|
||||
a = metasoup.find('dd',{'class':"freeform tags"})
|
||||
if a != None:
|
||||
genres = a.findAll('a',{'class':"tag"})
|
||||
for genre in genres:
|
||||
self.story.addToList('freeformtags',genre.string)
|
||||
|
||||
a = metasoup.find('dd',{'class':"category tags"})
|
||||
if a != None:
|
||||
genres = a.findAll('a',{'class':"tag"})
|
||||
for genre in genres:
|
||||
if genre != "Gen":
|
||||
self.story.addToList('ao3categories',genre.string)
|
||||
|
||||
a = metasoup.find('dd',{'class':"character tags"})
|
||||
if a != None:
|
||||
chars = a.findAll('a',{'class':"tag"})
|
||||
for char in chars:
|
||||
self.story.addToList('characters',char.string)
|
||||
|
||||
a = metasoup.find('dd',{'class':"relationship tags"})
|
||||
if a != None:
|
||||
ships = a.findAll('a',{'class':"tag"})
|
||||
for ship in ships:
|
||||
self.story.addToList('ships',ship.string)
|
||||
|
||||
a = metasoup.find('dd',{'class':"collections"})
|
||||
if a != None:
|
||||
collections = a.findAll('a')
|
||||
for collection in collections:
|
||||
self.story.addToList('collections',collection.string)
|
||||
|
||||
stats = metasoup.find('dl',{'class':'stats'})
|
||||
dt = stats.findAll('dt')
|
||||
dd = stats.findAll('dd')
|
||||
for x in range(0,len(dt)):
|
||||
label = dt[x].text
|
||||
value = dd[x].text
|
||||
|
||||
if 'Words:' in label:
|
||||
self.story.setMetadata('numWords', value)
|
||||
|
||||
if 'Comments:' in label:
|
||||
self.story.setMetadata('comments', value)
|
||||
|
||||
if 'Kudos:' in label:
|
||||
self.story.setMetadata('kudos', value)
|
||||
|
||||
if 'Hits:' in label:
|
||||
self.story.setMetadata('hits', value)
|
||||
|
||||
if 'Bookmarks:' in label:
|
||||
self.story.setMetadata('bookmarks', value)
|
||||
|
||||
if 'Chapters:' in label:
|
||||
self.story.setMetadata('chapterslashtotal', value)
|
||||
if value.split('/')[0] == value.split('/')[1]:
|
||||
self.story.setMetadata('status', 'Completed')
|
||||
else:
|
||||
self.story.setMetadata('status', 'In-Progress')
|
||||
|
||||
|
||||
if 'Published' in label:
|
||||
self.story.setMetadata('datePublished', makeDate(stripHTML(value), self.dateformat))
|
||||
self.story.setMetadata('dateUpdated', makeDate(stripHTML(value), self.dateformat))
|
||||
|
||||
if 'Updated' in label:
|
||||
self.story.setMetadata('dateUpdated', makeDate(stripHTML(value), self.dateformat))
|
||||
|
||||
if 'Completed' in label:
|
||||
self.story.setMetadata('dateUpdated', makeDate(stripHTML(value), self.dateformat))
|
||||
|
||||
|
||||
# Find Series name from series URL.
|
||||
ddseries = metasoup.find('dd',{'class':"series"})
|
||||
|
||||
if ddseries:
|
||||
for i, a in enumerate(ddseries.findAll('a', href=re.compile(r"/series/\d+"))):
|
||||
series_name = stripHTML(a)
|
||||
series_url = 'https://'+self.host+a['href']
|
||||
series_index = int(stripHTML(a.previousSibling).replace(', ','').split(' ')[1]) # "Part # of" or ", Part #"
|
||||
self.story.setMetadata('series%02d'%i,"%s [%s]"%(series_name,series_index))
|
||||
self.story.setMetadata('series%02dUrl'%i,series_url)
|
||||
if i == 0:
|
||||
self.setSeries(series_name, series_index)
|
||||
self.story.setMetadata('seriesUrl',series_url)
|
||||
|
||||
if self.getConfig('use_workskin',False):
|
||||
divmain = metasoup.find('div',{'id':'main'})
|
||||
if divmain:
|
||||
# we sort of assume ddmain exists because otherwise, there would be no fic
|
||||
workskin = divmain.style
|
||||
if workskin:
|
||||
workskin = unicode(workskin.contents[0]) # 'contents' returns a list with (here) a single element
|
||||
# some transformation to adjust which classes are affected
|
||||
workskin = workskin.replace('#workskin', '.userstuff')
|
||||
self.story.extra_css = "/*start of AO3 workskin*/\n" + workskin + "\n/* end of AO3 workskin*/\n"
|
||||
|
||||
def hookForUpdates(self,chaptercount):
|
||||
if self.newestChapterNum and self.oldchapters and len(self.oldchapters) > self.newestChapterNum:
|
||||
logger.info("Existing epub has %s chapters\nNewest chapter is %s. Discarding old chapters from there on."%(len(self.oldchapters), self.newestChapterNum+1))
|
||||
self.oldchapters = self.oldchapters[:self.newestChapterNum]
|
||||
return len(self.oldchapters)
|
||||
|
||||
## Normalize chapter URLs because a) site has changed from http to
|
||||
## https and b) in case of title change. That way updates to
|
||||
## existing stories don't re-download all chapters.
|
||||
def normalize_chapterurl(self,url):
|
||||
url = re.sub(r"https?://("+self.getSiteDomain()+r"/works/\d+/chapters/\d+)(\?view_adult=true)?$",
|
||||
r"https://\1",url)
|
||||
return url
|
||||
|
||||
# grab the text for an individual chapter.
|
||||
def getChapterTextNum(self, url, index):
|
||||
## FYI: Chapter urls used to include ?view_adult=true in each
|
||||
## one. With cookiejar being passed now, that's not
|
||||
## necessary. However, there is a corner case with plugin--If
|
||||
## a user-required story is attempted after gathering metadata
|
||||
## for one that needs adult, but not user AND the user doesn't
|
||||
## enter a valid user, the is_adult cookie from before can be
|
||||
## lost.
|
||||
logger.debug('Getting chapter text for: %s index: %s' % (url,index))
|
||||
|
||||
save_chapter_soup = self.make_soup('<div class="story"></div>')
|
||||
## use the div because the full soup will also have <html><body>.
|
||||
## need save_chapter_soup for .new_tag()
|
||||
save_chapter=save_chapter_soup.find('div')
|
||||
|
||||
whole_dl_soup = chapter_dl_soup = None
|
||||
|
||||
if self.use_full_work_soup and self.getConfig("use_view_full_work",True) and self.getConfig("always_reload_first_chapter"):
|
||||
self.use_full_work_soup = False
|
||||
logger.warning("OVERRIDE: AO3 - use_view_full_work not used when always_reload_first_chapter:true")
|
||||
|
||||
if self.use_full_work_soup and self.getConfig("use_view_full_work",True) and self.num_chapters() > 1:
|
||||
logger.debug("USE view_full_work")
|
||||
## Assumed view_adult=true was cookied during metadata
|
||||
if not self.full_work_soup:
|
||||
self.full_work_soup = self.make_soup(self.get_request(self.url+"?view_full_work=true"+self.addurl.replace('?','&')))
|
||||
## AO3 has had several cases now where chapter numbers
|
||||
## are missing, breaking the link between
|
||||
## <div id=chapter-##> and Chapter ##.
|
||||
## But they should all still be there and in the right
|
||||
## order, so array[index]
|
||||
self.full_work_chapters = self.full_work_soup.find_all('div',{'id':re.compile(r'chapter-\d+')})
|
||||
if len(self.full_work_chapters) != self.num_chapters():
|
||||
## sanity check just in case.
|
||||
self.use_full_work_soup = False
|
||||
self.full_work_soup = None
|
||||
logger.warning("chapter count in view_full_work(%s) disagrees with num of chapters(%s)--ending use_view_full_work"%(len(self.full_work_chapters),self.num_chapters()))
|
||||
whole_dl_soup = self.full_work_soup
|
||||
|
||||
if whole_dl_soup:
|
||||
chapter_dl_soup = self.full_work_chapters[index]
|
||||
else:
|
||||
whole_dl_soup = chapter_dl_soup = self.make_soup(self.get_request(url+self.addurl))
|
||||
if None == chapter_dl_soup:
|
||||
raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url)
|
||||
|
||||
exclude_notes=self.getConfigList('exclude_notes')
|
||||
|
||||
def append_tag(elem,tag,string=None,classes=None):
|
||||
'''bs4 requires tags be added separately.'''
|
||||
new_tag = save_chapter_soup.new_tag(tag)
|
||||
if string:
|
||||
new_tag.string=string
|
||||
if classes:
|
||||
new_tag['class']=[classes]
|
||||
elem.append(new_tag)
|
||||
return new_tag
|
||||
|
||||
## These are the over-all work's 'Notes at the beginning'.
|
||||
## They only appear on the first chapter in individual chapter
|
||||
## pages and before chapter-1 div. Appending removes
|
||||
## headnotes from whole_dl_soup, so be sure to only do it on
|
||||
## the first chapter.
|
||||
head_notes_div = append_tag(save_chapter,'div',classes="fff_chapter_notes fff_head_notes")
|
||||
if 'authorheadnotes' not in exclude_notes and index == 0:
|
||||
headnotes = whole_dl_soup.find('div', {'class' : "preface group"}).find('div', {'class' : "notes module"})
|
||||
if headnotes != None:
|
||||
## Also include ul class='associations'.
|
||||
ulassoc = headnotes.find('ul', {'class' : "associations"})
|
||||
headnotes = headnotes.find('blockquote', {'class' : "userstuff"})
|
||||
if headnotes != None or ulassoc != None:
|
||||
append_tag(head_notes_div,'b',"Author's Note:")
|
||||
if ulassoc != None:
|
||||
# fix relative links--all examples so far have been.
|
||||
for alink in ulassoc.find_all('a'):
|
||||
if 'http' not in alink['href']:
|
||||
alink['href']='https://' + self.getSiteDomain() + alink['href']
|
||||
head_notes_div.append(ulassoc)
|
||||
if headnotes != None:
|
||||
head_notes_div.append(headnotes)
|
||||
|
||||
## Can appear on every chapter
|
||||
if 'chaptersummary' not in exclude_notes:
|
||||
chapsumm = chapter_dl_soup.find('div', {'id' : "summary"})
|
||||
if chapsumm != None:
|
||||
chapsumm = chapsumm.find('blockquote')
|
||||
append_tag(head_notes_div,'b',"Summary for the Chapter:")
|
||||
head_notes_div.append(chapsumm)
|
||||
|
||||
## Can appear on every chapter
|
||||
if 'chapterheadnotes' not in exclude_notes:
|
||||
chapnotes = chapter_dl_soup.find('div', {'id' : "notes"})
|
||||
if chapnotes != None:
|
||||
chapnotes = chapnotes.find('blockquote')
|
||||
if chapnotes != None:
|
||||
append_tag(head_notes_div,'b',"Notes for the Chapter:")
|
||||
head_notes_div.append(chapnotes)
|
||||
|
||||
text = chapter_dl_soup.find('div', {'class' : "userstuff module"})
|
||||
chtext = text.find('h3', {'class' : "landmark heading"})
|
||||
if chtext:
|
||||
chtext.extract()
|
||||
save_chapter.append(text)
|
||||
|
||||
foot_notes_div = append_tag(save_chapter,'div',classes="fff_chapter_notes fff_foot_notes")
|
||||
## Can appear on every chapter
|
||||
if 'chapterfootnotes' not in exclude_notes:
|
||||
chapfoot = chapter_dl_soup.find('div', {'class' : "end notes module"})
|
||||
if chapfoot != None:
|
||||
chapfoot = chapfoot.find('blockquote')
|
||||
append_tag(foot_notes_div,'b',"Notes for the Chapter:")
|
||||
foot_notes_div.append(chapfoot)
|
||||
|
||||
skip_on_update_tags = []
|
||||
## These are the over-all work's 'Notes at the end'.
|
||||
## They only appear on the last chapter in individual chapter
|
||||
## pages and after chapter-# div. Appending removes
|
||||
## headnotes from whole_dl_soup, so be sure to only do it on
|
||||
## the last chapter.
|
||||
if 'authorfootnotes' not in exclude_notes and index+1 == self.num_chapters():
|
||||
footnotes = whole_dl_soup.find('div', {'id' : "work_endnotes"})
|
||||
if footnotes != None:
|
||||
footnotes = footnotes.find('blockquote')
|
||||
if footnotes:
|
||||
b = append_tag(foot_notes_div,'b',"Author's Note:")
|
||||
skip_on_update_tags.append(b)
|
||||
skip_on_update_tags.append(footnotes)
|
||||
foot_notes_div.append(footnotes)
|
||||
|
||||
## It looks like 'Inspired by' links now all appear in the ul
|
||||
## class=associations tag in authorheadnotes. This code is
|
||||
## left in case I'm wrong and there are still stories with div
|
||||
## id=children inspired links at the end.
|
||||
if 'inspiredlinks' not in exclude_notes and index+1 == self.num_chapters():
|
||||
inspiredlinks = whole_dl_soup.find('div', {'id' : "children"})
|
||||
if inspiredlinks != None:
|
||||
if inspiredlinks:
|
||||
inspiredlinks.find('h3').name='b' # don't want a big h3 at the end.
|
||||
# fix relative links--all examples so far have been.
|
||||
for alink in inspiredlinks.find_all('a'):
|
||||
if 'http' not in alink['href']:
|
||||
alink['href']='https://' + self.getSiteDomain() + alink['href']
|
||||
skip_on_update_tags.append(inspiredlinks)
|
||||
foot_notes_div.append(inspiredlinks)
|
||||
|
||||
## remove empty head/food notes div(s)
|
||||
if not head_notes_div.find(True):
|
||||
head_notes_div.extract()
|
||||
if not foot_notes_div.find(True):
|
||||
foot_notes_div.extract()
|
||||
## AO3 story end notes end up in the 'last' chapter, but if
|
||||
## updated, then there's a new 'last' chapter. This option
|
||||
## applies the 'skip_on_ffdl_update' class to those tags which
|
||||
## means they will be removed during epub reading for update.
|
||||
## Results: only the last chapter will have end notes.
|
||||
## Side-effect: An 'Update Always' that doesn't add a new
|
||||
## lasts chapter will remove the end notes.
|
||||
if self.getConfig("remove_authorfootnotes_on_update"):
|
||||
for skip_tag in skip_on_update_tags:
|
||||
if skip_tag.has_attr('class'):
|
||||
skip_tag['class'].append('skip_on_ffdl_update')
|
||||
else:
|
||||
skip_tag['class']=['skip_on_ffdl_update']
|
||||
# logger.debug(skip_tag)
|
||||
|
||||
return self.utf8FromSoup(url,save_chapter)
|
||||
|
||||
def before_get_urls_from_page(self,url,normalize):
|
||||
# special stuff to log into archiveofourown.org, if possible.
|
||||
# Unlike most that show the links to 'adult' stories, but protect
|
||||
# them, AO3 doesn't even show them if not logged in. Only works
|
||||
# with saved user/pass--not going to prompt for list.
|
||||
if self.getConfig("username"):
|
||||
if self.getConfig("is_adult"):
|
||||
if '?' in url:
|
||||
addurl = "&view_adult=true"
|
||||
else:
|
||||
addurl = "?view_adult=true"
|
||||
else:
|
||||
addurl=""
|
||||
# just to get an authenticity_token.
|
||||
data = self.get_request(url+addurl)
|
||||
# login the session.
|
||||
self.performLogin(url,data)
|
||||
# get the list page with logged in session.
|
||||
|
||||
def get_series_from_page(self,url,data,normalize=False):
|
||||
'''
|
||||
This method is to make it easier for adapters to detect a
|
||||
series URL, pick out the series metadata and list of storyUrls
|
||||
to return without needing to override get_urls_from_page
|
||||
entirely.
|
||||
'''
|
||||
|
||||
if 'This work is only available to registered users of the Archive' in data:
|
||||
raise exceptions.FailedToDownload("This work is only available to registered users of the Archive -- set username/password in personal.ini under [archiveofourown.org]")
|
||||
## easiest way to get all the weird URL possibilities and stay
|
||||
## up to date with future changes.
|
||||
m = re.match(self.getSiteURLPattern().replace('/works/','/series/'),url)
|
||||
if m:
|
||||
soup = self.make_soup(data)
|
||||
retval = {}
|
||||
retval['urllist']=[ 'https://'+self.host+a['href'] for a in soup.select('h4.heading a:first-child') ]
|
||||
retval['name']=stripHTML(soup.select_one("h2.heading"))
|
||||
desc=soup.select_one("div.wrapper dd blockquote.userstuff")
|
||||
if desc:
|
||||
desc.name='div' # change blockquote to div to match stories.
|
||||
retval['desc']=desc
|
||||
stats=stripHTML(soup.select_one("dl.series dl.stats"))
|
||||
if 'Complete:Yes' in stats:
|
||||
retval['status'] = "Completed"
|
||||
elif 'Complete:No' in stats:
|
||||
retval['status'] = "In-Progress"
|
||||
return retval
|
||||
## return dict with at least {'urllist':['storyUrl','storyUrl',...]}
|
||||
## optionally 'name' and 'desc'?
|
||||
return {}
|
||||
return url
|
||||
|
|
|
|||
|
|
@ -1,174 +0,0 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2011 Fanficdownloader team, 2018 FanFicFare team
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
from __future__ import absolute_import
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
from ..htmlcleanup import stripHTML
|
||||
from .. import exceptions as exceptions
|
||||
|
||||
# py2 vs py3 transition
|
||||
|
||||
from .base_adapter import BaseSiteAdapter, makeDate
|
||||
|
||||
|
||||
def getClass():
|
||||
return ArchiveSkyeHawkeComAdapter
|
||||
|
||||
# Class name has to be unique. Our convention is camel case the
|
||||
# sitename with Adapter at the end. www is skipped.
|
||||
class ArchiveSkyeHawkeComAdapter(BaseSiteAdapter):
|
||||
|
||||
def __init__(self, config, url):
|
||||
BaseSiteAdapter.__init__(self, config, url)
|
||||
|
||||
self.username = "NoneGiven" # if left empty, site doesn't return any message at all.
|
||||
self.password = ""
|
||||
self.is_adult=False
|
||||
|
||||
# get storyId from url--url validation guarantees query is only sid=1234
|
||||
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
|
||||
|
||||
|
||||
# normalized story URL.
|
||||
self._setURL('http://' + self.getSiteDomain() + '/story.php?no='+self.story.getMetadata('storyId'))
|
||||
|
||||
# Each adapter needs to have a unique site abbreviation.
|
||||
self.story.setMetadata('siteabbrev','ash')
|
||||
|
||||
# The date format will vary from site to site.
|
||||
# http://docs.python.org/library/datetime.html#strftime-strptime-behavior
|
||||
self.dateformat = "%Y-%m-%d"
|
||||
|
||||
@staticmethod # must be @staticmethod, don't remove it.
|
||||
def getSiteDomain():
|
||||
# The site domain. Does have www here, if it uses it.
|
||||
return 'archive.skyehawke.com'
|
||||
|
||||
@classmethod
|
||||
def getAcceptDomains(cls):
|
||||
return ['archive.skyehawke.com','www.skyehawke.com']
|
||||
|
||||
@classmethod
|
||||
def getSiteExampleURLs(cls):
|
||||
return "http://archive.skyehawke.com/story.php?no=1234 http://www.skyehawke.com/archive/story.php?no=1234 http://skyehawke.com/archive/story.php?no=1234"
|
||||
|
||||
def getSiteURLPattern(self):
|
||||
return r"https?://(archive|www)\.skyehawke\.com/(archive/)?story\.php\?no=\d+$"
|
||||
|
||||
## Getting the chapter list and the meta data, plus 'is adult' checking.
|
||||
def extractChapterUrlsAndMetadata(self):
|
||||
|
||||
url = self.url
|
||||
logger.debug("URL: "+url)
|
||||
|
||||
data = self.get_request(url)
|
||||
|
||||
soup = self.make_soup(data)
|
||||
# print data
|
||||
|
||||
|
||||
## Title
|
||||
a = soup.find('div', {'class':"story border"}).find('span',{'class':'left'})
|
||||
title=stripHTML(a).split('"')[1]
|
||||
self.story.setMetadata('title',title)
|
||||
|
||||
# Find authorid and URL from... author url.
|
||||
author = a.find('a')
|
||||
self.story.setMetadata('authorId',author['href'].split('=')[1])
|
||||
self.story.setMetadata('authorUrl','http://'+self.host+'/'+author['href'])
|
||||
self.story.setMetadata('author',author.string)
|
||||
|
||||
authorSoup = self.make_soup(self.get_request(self.story.getMetadata('authorUrl')))
|
||||
|
||||
chapter=soup.find('select',{'name':'chapter'}).findAll('option')
|
||||
|
||||
for i in range(1,len(chapter)):
|
||||
ch=chapter[i]
|
||||
self.add_chapter(ch,ch['value'])
|
||||
|
||||
|
||||
# eFiction sites don't help us out a lot with their meta data
|
||||
# formating, so it's a little ugly.
|
||||
|
||||
box=soup.find('div', {'class': "container borderridge"})
|
||||
sum=box.find('span').text
|
||||
self.setDescription(url,sum)
|
||||
|
||||
boxes=soup.findAll('div', {'class': "container bordersolid"})
|
||||
for box in boxes:
|
||||
if box.find('b') != None and box.find('b').text == "History and Story Information":
|
||||
|
||||
for b in box.findAll('b'):
|
||||
if "words" in b.nextSibling:
|
||||
self.story.setMetadata('numWords', b.text)
|
||||
if "archived" in b.previousSibling:
|
||||
self.story.setMetadata('datePublished', makeDate(stripHTML(b.text), self.dateformat))
|
||||
if "updated" in b.previousSibling:
|
||||
self.story.setMetadata('dateUpdated', makeDate(stripHTML(b.text), self.dateformat))
|
||||
if "fandom" in b.nextSibling:
|
||||
self.story.addToList('category', b.text)
|
||||
|
||||
for br in box.findAll('br'):
|
||||
br.replaceWith('split')
|
||||
genre=box.text.split("Genre:")[1].split("split")[0]
|
||||
if not "Unspecified" in genre:
|
||||
self.story.addToList('genre',genre)
|
||||
|
||||
|
||||
if box.find('span') != None and box.find('span').text == "WARNING":
|
||||
|
||||
rating=box.findAll('span')[1]
|
||||
rating.find('br').replaceWith('split')
|
||||
rating=rating.text.replace("This story is rated",'').split('split')[0]
|
||||
self.story.setMetadata('rating',rating)
|
||||
logger.debug(self.story.getMetadata('rating'))
|
||||
|
||||
warnings=box.find('ol')
|
||||
if warnings != None:
|
||||
warnings=warnings.text.replace(']', '').replace('[', '').split(' ')
|
||||
for warning in warnings:
|
||||
self.story.addToList('warnings',warning)
|
||||
|
||||
|
||||
for asoup in authorSoup.findAll('div', {'class':"story bordersolid"}):
|
||||
if asoup.find('a')['href'] == 'story.php?no='+self.story.getMetadata('storyId'):
|
||||
if '[ Completed ]' in asoup.text:
|
||||
self.story.setMetadata('status', 'Completed')
|
||||
else:
|
||||
self.story.setMetadata('status', 'In-Progress')
|
||||
chars=asoup.findNext('div').text.split('Characters')[1].split(']')[0]
|
||||
for char in chars.split(','):
|
||||
if not "None" in char:
|
||||
self.story.addToList('characters',char)
|
||||
break
|
||||
|
||||
|
||||
|
||||
# grab the text for an individual chapter.
|
||||
def getChapterText(self, url):
|
||||
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
soup = self.make_soup(self.get_request(url))
|
||||
|
||||
div = soup.find('div',{'class':"chapter bordersolid"}).findNext('div').findNext('div')
|
||||
|
||||
if None == div:
|
||||
raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url)
|
||||
|
||||
return self.utf8FromSoup(url,div)
|
||||
|
|
@ -79,7 +79,7 @@ class ASexStoriesComAdapter(BaseSiteAdapter):
|
|||
data1 = self.get_request(self.url)
|
||||
soup1 = self.make_soup(data1)
|
||||
#strip comments from soup
|
||||
[comment.extract() for comment in soup1.find_all(text=lambda text:isinstance(text, Comment))]
|
||||
[comment.extract() for comment in soup1.find_all(string=lambda text:isinstance(text, Comment))]
|
||||
|
||||
if 'Page Not Found.' in data1:
|
||||
raise exceptions.StoryDoesNotExist(self.url)
|
||||
|
|
@ -92,7 +92,7 @@ class ASexStoriesComAdapter(BaseSiteAdapter):
|
|||
self.story.setMetadata('title', title.string)
|
||||
|
||||
# Author
|
||||
author = soup1.find('div',{'class':'story-info'}).findAll('div',{'class':'story-info-bl'})[1].find('a')
|
||||
author = soup1.find('div',{'class':'story-info'}).find_all('div',{'class':'story-info-bl'})[1].find('a')
|
||||
authorurl = author['href']
|
||||
self.story.setMetadata('author', author.string)
|
||||
self.story.setMetadata('authorUrl', authorurl)
|
||||
|
|
@ -112,7 +112,7 @@ class ASexStoriesComAdapter(BaseSiteAdapter):
|
|||
### add it before the rest of the pages, if any
|
||||
self.add_chapter('1', self.url)
|
||||
|
||||
chapterTable = soup1.find('div',{'class':'pages'}).findAll('a')
|
||||
chapterTable = soup1.find('div',{'class':'pages'}).find_all('a')
|
||||
|
||||
if chapterTable is not None:
|
||||
# Multi-chapter story
|
||||
|
|
@ -124,7 +124,7 @@ class ASexStoriesComAdapter(BaseSiteAdapter):
|
|||
self.add_chapter(chapterTitle, chapterUrl)
|
||||
|
||||
|
||||
rated = soup1.find('div',{'class':'story-info'}).findAll('div',{'class':'story-info-bl5'})[0].find('img')['title'].replace('- Rate','').strip()
|
||||
rated = soup1.find('div',{'class':'story-info'}).find_all('div',{'class':'story-info-bl5'})[0].find('img')['title'].replace('- Rate','').strip()
|
||||
self.story.setMetadata('rating',rated)
|
||||
|
||||
self.story.setMetadata('dateUpdated', makeDate('01/01/2001', '%m/%d/%Y'))
|
||||
|
|
|
|||
|
|
@ -48,7 +48,7 @@ class AshwinderSycophantHexComAdapter(BaseSiteAdapter):
|
|||
|
||||
|
||||
# normalized story URL.
|
||||
self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId'))
|
||||
self._setURL('https://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId'))
|
||||
|
||||
# Each adapter needs to have a unique site abbreviation.
|
||||
self.story.setMetadata('siteabbrev','asph')
|
||||
|
|
@ -64,10 +64,10 @@ class AshwinderSycophantHexComAdapter(BaseSiteAdapter):
|
|||
|
||||
@classmethod
|
||||
def getSiteExampleURLs(cls):
|
||||
return "http://"+cls.getSiteDomain()+"/viewstory.php?sid=1234"
|
||||
return "https://"+cls.getSiteDomain()+"/viewstory.php?sid=1234"
|
||||
|
||||
def getSiteURLPattern(self):
|
||||
return re.escape("http://"+self.getSiteDomain()+"/viewstory.php?sid=")+r"\d+$"
|
||||
return r"https?://"+re.escape(self.getSiteDomain()+"/viewstory.php?sid=")+r"\d+$"
|
||||
|
||||
## Login seems to be reasonably standard across eFiction sites.
|
||||
def needToLoginCheck(self, data):
|
||||
|
|
@ -92,7 +92,7 @@ class AshwinderSycophantHexComAdapter(BaseSiteAdapter):
|
|||
params['intent'] = ''
|
||||
params['submit'] = 'Submit'
|
||||
|
||||
loginUrl = 'http://' + self.getSiteDomain() + '/user.php'
|
||||
loginUrl = 'https://' + self.getSiteDomain() + '/user.php'
|
||||
logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
|
||||
params['penname']))
|
||||
|
||||
|
|
@ -130,20 +130,20 @@ class AshwinderSycophantHexComAdapter(BaseSiteAdapter):
|
|||
# Find authorid and URL from... author url.
|
||||
a = soup.find('a', href=re.compile(r"viewuser.php\?uid=\d+"))
|
||||
self.story.setMetadata('authorId',a['href'].split('=')[1])
|
||||
self.story.setMetadata('authorUrl','http://'+self.host+'/'+a['href'])
|
||||
self.story.setMetadata('authorUrl','https://'+self.host+'/'+a['href'])
|
||||
self.story.setMetadata('author',a.string)
|
||||
asoup = self.make_soup(self.get_request(self.story.getMetadata('authorUrl')))
|
||||
|
||||
try:
|
||||
# in case link points somewhere other than the first chapter
|
||||
a = soup.findAll('option')[1]['value']
|
||||
a = soup.find_all('option')[1]['value']
|
||||
self.story.setMetadata('storyId',a.split('=',)[1])
|
||||
url = 'http://'+self.host+'/'+a
|
||||
url = 'https://'+self.host+'/'+a
|
||||
soup = self.make_soup(self.get_request(url))
|
||||
except:
|
||||
pass
|
||||
|
||||
for info in asoup.findAll('table', {'width' : '100%', 'bordercolor' : re.compile(r'#')}):
|
||||
for info in asoup.find_all('table', {'width' : '100%', 'bordercolor' : re.compile(r'#')}):
|
||||
a = info.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$"))
|
||||
if a != None:
|
||||
self.story.setMetadata('title',stripHTML(a))
|
||||
|
|
@ -151,13 +151,13 @@ class AshwinderSycophantHexComAdapter(BaseSiteAdapter):
|
|||
|
||||
|
||||
# Find the chapters:
|
||||
chapters=soup.findAll('a', href=re.compile(r'viewstory.php\?sid=\d+&i=1$'))
|
||||
chapters=soup.find_all('a', href=re.compile(r'viewstory.php\?sid=\d+&i=1$'))
|
||||
if len(chapters) == 0:
|
||||
self.add_chapter(self.story.getMetadata('title'),url)
|
||||
else:
|
||||
for chapter in chapters:
|
||||
# just in case there's tags, like <i> in chapter titles.
|
||||
self.add_chapter(chapter,'http://'+self.host+'/'+chapter['href'])
|
||||
self.add_chapter(chapter,'https://'+self.host+'/'+chapter['href'])
|
||||
|
||||
|
||||
# eFiction sites don't help us out a lot with their meta data
|
||||
|
|
@ -170,7 +170,7 @@ class AshwinderSycophantHexComAdapter(BaseSiteAdapter):
|
|||
except:
|
||||
return ""
|
||||
|
||||
cats = info.findAll('a',href=re.compile('categories.php'))
|
||||
cats = info.find_all('a',href=re.compile('categories.php'))
|
||||
for cat in cats:
|
||||
self.story.addToList('category',cat.string)
|
||||
|
||||
|
|
@ -188,7 +188,7 @@ class AshwinderSycophantHexComAdapter(BaseSiteAdapter):
|
|||
## <td><span class="sb"><b>Published:</b> 04/08/2007</td>
|
||||
|
||||
## one story had <b>Updated...</b> in the description. Restrict to sub-table
|
||||
labels = info.find('table').findAll('b')
|
||||
labels = info.find('table').find_all('b')
|
||||
for labelspan in labels:
|
||||
value = labelspan.nextSibling
|
||||
label = stripHTML(labelspan)
|
||||
|
|
|
|||
|
|
@ -111,11 +111,17 @@ class AsianFanFicsComAdapter(BaseSiteAdapter):
|
|||
def doExtractChapterUrlsAndMetadata(self,get_cover=True):
|
||||
url = self.url
|
||||
logger.info("url: "+url)
|
||||
data = self.get_request(url)
|
||||
soup = None
|
||||
try:
|
||||
data = self.get_request(url)
|
||||
soup = self.make_soup(data)
|
||||
except exceptions.HTTPErrorFFF as e:
|
||||
if e.status_code != 404:
|
||||
raise
|
||||
data = self.decode_data(e.data)
|
||||
|
||||
soup = self.make_soup(data)
|
||||
|
||||
if self.loginNeededCheck(data):
|
||||
# logger.debug(data)
|
||||
if not soup or self.loginNeededCheck(data):
|
||||
# always login if not already to avoid lots of headaches
|
||||
self.performLogin(url,data)
|
||||
# refresh website after logging in
|
||||
|
|
@ -140,8 +146,8 @@ class AsianFanFicsComAdapter(BaseSiteAdapter):
|
|||
|
||||
# Find authorid and URL from... author url.
|
||||
mainmeta = soup.find('footer', {'class': 'main-meta'})
|
||||
alist = mainmeta.find('span', text='Author(s)')
|
||||
alist = alist.parent.findAll('a', href=re.compile(r"/profile/u/[^/]+"))
|
||||
alist = mainmeta.find('span', string='Author(s)')
|
||||
alist = alist.parent.find_all('a', href=re.compile(r"/profile/u/[^/]+"))
|
||||
for a in alist:
|
||||
self.story.addToList('authorId',a['href'].split('/')[-1])
|
||||
self.story.addToList('authorUrl','https://'+self.host+a['href'])
|
||||
|
|
@ -153,10 +159,10 @@ class AsianFanFicsComAdapter(BaseSiteAdapter):
|
|||
chapters=soup.find('select',{'name':'chapter-nav'})
|
||||
hrefattr=None
|
||||
if chapters:
|
||||
chapters=chapters.findAll('option')
|
||||
chapters=chapters.find_all('option')
|
||||
hrefattr='value'
|
||||
else: # didn't find <select name='chapter-nav', look for alternative
|
||||
chapters=soup.find('div',{'class':'widget--chapters'}).findAll('a')
|
||||
chapters=soup.find('div',{'class':'widget--chapters'}).find_all('a')
|
||||
hrefattr='href'
|
||||
for index, chapter in enumerate(chapters):
|
||||
if chapter.text != 'Foreword' and 'Collapse chapters' not in chapter.text:
|
||||
|
|
@ -165,9 +171,9 @@ class AsianFanFicsComAdapter(BaseSiteAdapter):
|
|||
|
||||
|
||||
# find timestamp
|
||||
a = soup.find('span', text='Updated')
|
||||
a = soup.find('span', string='Updated')
|
||||
if a == None:
|
||||
a = soup.find('span', text='Published') # use published date if work was never updated
|
||||
a = soup.find('span', string='Published') # use published date if work was never updated
|
||||
a = a.parent.find('time')
|
||||
chapterDate = makeDate(a['datetime'],self.dateformat)
|
||||
if newestChapter == None or chapterDate > newestChapter:
|
||||
|
|
@ -175,7 +181,7 @@ class AsianFanFicsComAdapter(BaseSiteAdapter):
|
|||
self.newestChapterNum = index
|
||||
|
||||
# story status
|
||||
a = mainmeta.find('span', text='Completed')
|
||||
a = mainmeta.find('span', string='Completed')
|
||||
if a:
|
||||
self.story.setMetadata('status', 'Completed')
|
||||
else:
|
||||
|
|
@ -194,37 +200,37 @@ class AsianFanFicsComAdapter(BaseSiteAdapter):
|
|||
self.setDescription(url,a)
|
||||
|
||||
# story tags
|
||||
a = mainmeta.find('span',text='Tags')
|
||||
a = mainmeta.find('span',string='Tags')
|
||||
if a:
|
||||
tags = a.parent.findAll('a')
|
||||
tags = a.parent.find_all('a')
|
||||
for tag in tags:
|
||||
self.story.addToList('tags', tag.text)
|
||||
|
||||
# story tags
|
||||
a = mainmeta.find('span',text='Characters')
|
||||
a = mainmeta.find('span',string='Characters')
|
||||
if a:
|
||||
self.story.addToList('characters', a.nextSibling)
|
||||
|
||||
# published on
|
||||
a = soup.find('span', text='Published')
|
||||
a = soup.find('span', string='Published')
|
||||
a = a.parent.find('time')
|
||||
self.story.setMetadata('datePublished', makeDate(a['datetime'], self.dateformat))
|
||||
|
||||
# updated on
|
||||
a = soup.find('span', text='Updated')
|
||||
a = soup.find('span', string='Updated')
|
||||
if a:
|
||||
a = a.parent.find('time')
|
||||
self.story.setMetadata('dateUpdated', makeDate(a['datetime'], self.dateformat))
|
||||
|
||||
# word count
|
||||
a = soup.find('span', text='Total Word Count')
|
||||
a = soup.find('span', string='Total Word Count')
|
||||
if a:
|
||||
a = a.find_next('span')
|
||||
self.story.setMetadata('numWords', int(a.text.split()[0]))
|
||||
|
||||
# upvote, subs, and views
|
||||
a = soup.find('div',{'class':'title-meta'})
|
||||
spans = a.findAll('span', recursive=False)
|
||||
spans = a.find_all('span', recursive=False)
|
||||
self.story.setMetadata('upvotes', re.search(r'\(([^)]+)', spans[0].find('span').text).group(1))
|
||||
self.story.setMetadata('subscribers', re.search(r'\(([^)]+)', spans[1].find('span').text).group(1))
|
||||
if len(spans) > 2: # views can be private
|
||||
|
|
@ -246,13 +252,39 @@ class AsianFanFicsComAdapter(BaseSiteAdapter):
|
|||
|
||||
data = self.get_request(url)
|
||||
soup = self.make_soup(data)
|
||||
# logger.debug(data)
|
||||
|
||||
ageform = soup.select_one('form[action="/account/toggle_age"]')
|
||||
# logger.debug(ageform)
|
||||
if ageform and (self.is_adult or self.getConfig("is_adult")):
|
||||
params = {}
|
||||
params['is_of_age']=ageform.select_one('input#is_of_age')['value']
|
||||
params['current_url']=ageform.select_one('input#current_url')['value']
|
||||
params['csrf_aff_token']=ageform.select_one('input[name="csrf_aff_token"]')['value']
|
||||
loginUrl = 'https://' + self.getSiteDomain() + '/account/mark_over_18'
|
||||
logger.info("Will now toggle age to URL (%s)" % (loginUrl))
|
||||
# logger.debug(params)
|
||||
data = self.post_request(loginUrl, params)
|
||||
soup = self.make_soup(data)
|
||||
# logger.debug(data)
|
||||
|
||||
content = soup.find('div', {'id': 'user-submitted-body'})
|
||||
|
||||
if self.getConfig('inject_chapter_image'):
|
||||
logger.debug("Injecting chapter image")
|
||||
imgdiv = soup.select_one('div#bodyText div.bot-spacer')
|
||||
if imgdiv:
|
||||
content.insert(0, "\n")
|
||||
content.insert(0, imgdiv)
|
||||
content.insert(0, "\n")
|
||||
|
||||
if self.getConfig('inject_chapter_title'):
|
||||
logger.debug("Injecting full-length chapter title")
|
||||
title = soup.find('h1', {'id' : 'chapter-title'}).text
|
||||
newTitle = soup.new_tag('h3')
|
||||
newTitle.string = title
|
||||
content.insert(0, "\n")
|
||||
content.insert(0, newTitle)
|
||||
content.insert(0, "\n")
|
||||
|
||||
return self.utf8FromSoup(url,content)
|
||||
|
|
|
|||
|
|
@ -126,7 +126,7 @@ class BDSMLibraryComSiteAdapter(BaseSiteAdapter):
|
|||
|
||||
# Find the chapters:
|
||||
# The update date is with the chapter links... so we will update it here as well
|
||||
for chapter in soup.findAll('a', href=re.compile(r'/stories/chapter.php\?storyid='+self.story.getMetadata('storyId')+r"&chapterid=\d+$")):
|
||||
for chapter in soup.find_all('a', href=re.compile(r'/stories/chapter.php\?storyid='+self.story.getMetadata('storyId')+r"&chapterid=\d+$")):
|
||||
value = chapter.findNext('td').findNext('td').string.replace('(added on','').replace(')','').strip()
|
||||
self.story.setMetadata('dateUpdated', makeDate(value, self.dateformat))
|
||||
self.add_chapter(chapter,'https://'+self.getSiteDomain()+chapter['href'])
|
||||
|
|
@ -134,11 +134,11 @@ class BDSMLibraryComSiteAdapter(BaseSiteAdapter):
|
|||
|
||||
# Get the MetaData
|
||||
# Erotia Tags
|
||||
tags = soup.findAll('a',href=re.compile(r'/stories/search.php\?selectedcode'))
|
||||
tags = soup.find_all('a',href=re.compile(r'/stories/search.php\?selectedcode'))
|
||||
for tag in tags:
|
||||
self.story.addToList('eroticatags',tag.text)
|
||||
|
||||
for td in soup.findAll('td'):
|
||||
for td in soup.find_all('td'):
|
||||
if len(td.text)>0:
|
||||
if 'Added on:' in td.text and '<table' not in unicode(td):
|
||||
value = td.text.replace('Added on:','').strip()
|
||||
|
|
@ -169,20 +169,20 @@ class BDSMLibraryComSiteAdapter(BaseSiteAdapter):
|
|||
raise exceptions.FailedToDownload("Error downloading Chapter: {0}! Missing required element!".format(url))
|
||||
|
||||
#strip comments from soup
|
||||
[comment.extract() for comment in chaptertag.findAll(text=lambda text:isinstance(text, Comment))]
|
||||
[comment.extract() for comment in chaptertag.find_all(string=lambda text:isinstance(text, Comment))]
|
||||
|
||||
# BDSM Library basically wraps it's own html around the document,
|
||||
# so we will be removing the script, title and meta content from the
|
||||
# storyblock
|
||||
for tag in chaptertag.findAll('head') + chaptertag.findAll('style') + chaptertag.findAll('title') + chaptertag.findAll('meta') + chaptertag.findAll('o:p') + chaptertag.findAll('link'):
|
||||
for tag in chaptertag.find_all('head') + chaptertag.find_all('style') + chaptertag.find_all('title') + chaptertag.find_all('meta') + chaptertag.find_all('o:p') + chaptertag.find_all('link'):
|
||||
tag.extract()
|
||||
|
||||
for tag in chaptertag.findAll('o:smarttagtype'):
|
||||
for tag in chaptertag.find_all('o:smarttagtype'):
|
||||
tag.name = 'span'
|
||||
|
||||
## I'm going to take the attributes off all of the tags
|
||||
## because they usually refer to the style that we removed above.
|
||||
for tag in chaptertag.findAll(True):
|
||||
for tag in chaptertag.find_all(True):
|
||||
tag.attrs = None
|
||||
|
||||
return self.utf8FromSoup(url,chaptertag)
|
||||
|
|
|
|||
|
|
@ -117,7 +117,7 @@ class BloodshedverseComAdapter(BaseSiteAdapter):
|
|||
|
||||
summary_div = list_box.find('div', {'class': 'list_summary'})
|
||||
if not self.getConfig('keep_summary_html'):
|
||||
summary = ''.join(summary_div(text=True))
|
||||
summary = ''.join(summary_div(string=True))
|
||||
else:
|
||||
summary = self.utf8FromSoup(author_url, summary_div)
|
||||
|
||||
|
|
@ -157,9 +157,6 @@ class BloodshedverseComAdapter(BaseSiteAdapter):
|
|||
|
||||
self.story.addToList('warnings', warning)
|
||||
|
||||
elif key == 'Chapters':
|
||||
self.story.setMetadata('numChapters', int(value))
|
||||
|
||||
elif key == 'Words':
|
||||
# Apparently only numChapters need to be an integer for
|
||||
# some strange reason. Remove possible ',' characters as to
|
||||
|
|
@ -174,7 +171,7 @@ class BloodshedverseComAdapter(BaseSiteAdapter):
|
|||
# ugly %p(am/pm) hack moved into makeDate so other sites can use it.
|
||||
self.story.setMetadata('dateUpdated', date)
|
||||
|
||||
if self.story.getMetadata('rating') == 'NC-17' and not (self.is_adult or self.getConfig('is_adult')):
|
||||
if self.story.getMetadataRaw('rating') == 'NC-17' and not (self.is_adult or self.getConfig('is_adult')):
|
||||
raise exceptions.AdultCheckRequired(self.url)
|
||||
|
||||
def getChapterText(self, url):
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2013 Fanficdownloader team, 2018 FanFicFare team
|
||||
# Copyright 2024 FanFicFare team
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
|
|
@ -19,22 +19,20 @@ from __future__ import absolute_import
|
|||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# py2 vs py3 transition
|
||||
|
||||
from .adapter_storiesonlinenet import StoriesOnlineNetAdapter
|
||||
from .base_otw_adapter import BaseOTWAdapter
|
||||
|
||||
def getClass():
|
||||
return FineStoriesComAdapter
|
||||
return CFAAAdapter
|
||||
|
||||
# Class name has to be unique. Our convention is camel case the
|
||||
# sitename with Adapter at the end. www is skipped.
|
||||
class FineStoriesComAdapter(StoriesOnlineNetAdapter):
|
||||
class CFAAAdapter(BaseOTWAdapter):
|
||||
|
||||
@classmethod
|
||||
def getSiteAbbrev(cls):
|
||||
return 'fnst'
|
||||
def __init__(self, config, url):
|
||||
BaseOTWAdapter.__init__(self, config, url)
|
||||
|
||||
# Each adapter needs to have a unique site abbreviation.
|
||||
self.story.setMetadata('siteabbrev','cfaa')
|
||||
|
||||
@staticmethod # must be @staticmethod, don't remove it.
|
||||
def getSiteDomain():
|
||||
# The site domain. Does have www here, if it uses it.
|
||||
return 'finestories.com'
|
||||
return 'www.cfaarchive.org'
|
||||
|
|
@ -116,7 +116,7 @@ class ChaosSycophantHexComAdapter(BaseSiteAdapter):
|
|||
self.story.setMetadata('rating', rating)
|
||||
|
||||
# Find the chapters:
|
||||
for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+r"&chapter=\d+$")):
|
||||
for chapter in soup.find_all('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+r"&chapter=\d+$")):
|
||||
# just in case there's tags, like <i> in chapter titles.
|
||||
self.add_chapter(chapter,'http://'+self.host+'/'+chapter['href']+addurl)
|
||||
|
||||
|
|
@ -134,7 +134,7 @@ class ChaosSycophantHexComAdapter(BaseSiteAdapter):
|
|||
|
||||
# <span class="label">Rated:</span> NC-17<br /> etc
|
||||
|
||||
labels = soup.findAll('span',{'class':'label'})
|
||||
labels = soup.find_all('span',{'class':'label'})
|
||||
|
||||
value = labels[0].previousSibling
|
||||
svalue = ""
|
||||
|
|
@ -154,22 +154,22 @@ class ChaosSycophantHexComAdapter(BaseSiteAdapter):
|
|||
self.story.setMetadata('numWords', value.split(' -')[0])
|
||||
|
||||
if 'Categories' in label:
|
||||
cats = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=categories'))
|
||||
cats = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=categories'))
|
||||
for cat in cats:
|
||||
self.story.addToList('category',cat.string)
|
||||
|
||||
if 'Characters' in label:
|
||||
chars = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=characters'))
|
||||
chars = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=characters'))
|
||||
for char in chars:
|
||||
self.story.addToList('characters',char.string)
|
||||
|
||||
if 'Genre' in label:
|
||||
genres = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=1'))
|
||||
genres = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=class&type_id=1'))
|
||||
for genre in genres:
|
||||
self.story.addToList('genre',genre.string)
|
||||
|
||||
if 'Warnings' in label:
|
||||
warnings = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=2'))
|
||||
warnings = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=class&type_id=2'))
|
||||
for warning in warnings:
|
||||
self.story.addToList('warnings',warning.string)
|
||||
|
||||
|
|
@ -194,7 +194,7 @@ class ChaosSycophantHexComAdapter(BaseSiteAdapter):
|
|||
series_url = 'http://'+self.host+'/'+a['href']
|
||||
|
||||
seriessoup = self.make_soup(self.get_request(series_url))
|
||||
storyas = seriessoup.findAll('a', href=re.compile(r'^viewstory.php\?sid=\d+$'))
|
||||
storyas = seriessoup.find_all('a', href=re.compile(r'^viewstory.php\?sid=\d+$'))
|
||||
i=1
|
||||
for a in storyas:
|
||||
if a['href'] == ('viewstory.php?sid='+self.story.getMetadata('storyId')):
|
||||
|
|
|
|||
|
|
@ -88,8 +88,8 @@ class ChireadsComSiteAdapter(BaseSiteAdapter):
|
|||
intro = stripHTML(info.select_one('.inform-inform-txt').span)
|
||||
self.setDescription(self.url, intro)
|
||||
|
||||
for content in soup.findAll('div', {'id': 'content'}):
|
||||
for a in content.findAll('a'):
|
||||
for content in soup.find_all('div', {'id': 'content'}):
|
||||
for a in content.find_all('a'):
|
||||
self.add_chapter(a.get_text(), a['href'])
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -98,7 +98,7 @@ class ChosenTwoFanFicArchiveAdapter(BaseSiteAdapter):
|
|||
## Title
|
||||
## Some stories have a banner that has it's own a tag before the actual text title...
|
||||
## so I'm checking the pagetitle div for all a tags that match the criteria, then taking the last.
|
||||
a = soup.find('div',{'id':'pagetitle'}).findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$"))[-1]
|
||||
a = soup.find('div',{'id':'pagetitle'}).find_all('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$"))[-1]
|
||||
self.story.setMetadata('title',stripHTML(a))
|
||||
|
||||
# Find authorid and URL from... author url.
|
||||
|
|
@ -110,7 +110,7 @@ class ChosenTwoFanFicArchiveAdapter(BaseSiteAdapter):
|
|||
self.story.setMetadata('author',a.string)
|
||||
|
||||
# Find the chapters:
|
||||
for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+r"&chapter=\d+$")):
|
||||
for chapter in soup.find_all('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+r"&chapter=\d+$")):
|
||||
# just in case there's tags, like <i> in chapter titles.
|
||||
#self.add_chapter(chapter,'http://'+self.host+'/'+chapter['href'])
|
||||
self.add_chapter(chapter,'https://{0}/{1}{2}'.format(self.host, chapter['href'],addURL))
|
||||
|
|
@ -127,7 +127,7 @@ class ChosenTwoFanFicArchiveAdapter(BaseSiteAdapter):
|
|||
return ""
|
||||
|
||||
# <span class="label">Rated:</span> NC-17<br /> etc
|
||||
labels = soup.findAll('span',{'class':'label'})
|
||||
labels = soup.find_all('span',{'class':'label'})
|
||||
for labelspan in labels:
|
||||
val = labelspan.nextSibling
|
||||
value = unicode('')
|
||||
|
|
@ -149,27 +149,27 @@ class ChosenTwoFanFicArchiveAdapter(BaseSiteAdapter):
|
|||
self.story.setMetadata('numWords', stripHTML(value))
|
||||
|
||||
if 'Categories' in label:
|
||||
cats = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=categories'))
|
||||
cats = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=categories'))
|
||||
for cat in cats:
|
||||
self.story.addToList('category',cat.string)
|
||||
|
||||
if 'Characters' in label:
|
||||
chars = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=characters'))
|
||||
chars = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=characters'))
|
||||
for char in chars:
|
||||
self.story.addToList('characters',char.string)
|
||||
|
||||
if 'Genre' in label:
|
||||
genres = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=1')) # XXX
|
||||
genres = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=class&type_id=1')) # XXX
|
||||
for genre in genres:
|
||||
self.story.addToList('genre',genre.string)
|
||||
|
||||
if 'Pairing' in label:
|
||||
ships = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=4'))
|
||||
ships = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=class&type_id=4'))
|
||||
for ship in ships:
|
||||
self.story.addToList('ships',ship.string)
|
||||
|
||||
if 'Warnings' in label:
|
||||
warnings = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=2')) # XXX
|
||||
warnings = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=class&type_id=2')) # XXX
|
||||
for warning in warnings:
|
||||
self.story.addToList('warnings',warning.string)
|
||||
|
||||
|
|
@ -196,7 +196,7 @@ class ChosenTwoFanFicArchiveAdapter(BaseSiteAdapter):
|
|||
|
||||
seriessoup = self.make_soup(self.get_request(series_url))
|
||||
# can't use ^viewstory...$ in case of higher rated stories with javascript href.
|
||||
storyas = seriessoup.findAll('a', href=re.compile(r'viewstory.php\?sid=\d+'))
|
||||
storyas = seriessoup.find_all('a', href=re.compile(r'viewstory.php\?sid=\d+'))
|
||||
i=1
|
||||
for a in storyas:
|
||||
# this site has several links to each story.
|
||||
|
|
|
|||
|
|
@ -1,222 +0,0 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2012 Fanficdownloader team, 2018 FanFicFare team
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
# Software: eFiction
|
||||
from __future__ import absolute_import
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import re
|
||||
from ..htmlcleanup import stripHTML
|
||||
from .. import exceptions as exceptions
|
||||
|
||||
# py2 vs py3 transition
|
||||
from ..six import text_type as unicode
|
||||
|
||||
from .base_adapter import BaseSiteAdapter, makeDate
|
||||
|
||||
def getClass():
|
||||
return DestinysGatewayComAdapter
|
||||
|
||||
# Class name has to be unique. Our convention is camel case the
|
||||
# sitename with Adapter at the end. www is skipped.
|
||||
class DestinysGatewayComAdapter(BaseSiteAdapter):
|
||||
|
||||
def __init__(self, config, url):
|
||||
BaseSiteAdapter.__init__(self, config, url)
|
||||
|
||||
self.username = "NoneGiven" # if left empty, site doesn't return any message at all.
|
||||
self.password = ""
|
||||
self.is_adult=False
|
||||
|
||||
# get storyId from url--url validation guarantees query is only sid=1234
|
||||
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
|
||||
|
||||
|
||||
# normalized story URL.
|
||||
self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId'))
|
||||
|
||||
# Each adapter needs to have a unique site abbreviation.
|
||||
self.story.setMetadata('siteabbrev','dgrfa')
|
||||
|
||||
# The date format will vary from site to site.
|
||||
# http://docs.python.org/library/datetime.html#strftime-strptime-behavior
|
||||
self.dateformat = "%b %d %Y"
|
||||
|
||||
@staticmethod # must be @staticmethod, don't remove it.
|
||||
def getSiteDomain():
|
||||
# The site domain. Does have www here, if it uses it.
|
||||
return 'www.destinysgateway.com'
|
||||
|
||||
@classmethod
|
||||
def getSiteExampleURLs(cls):
|
||||
return "http://"+cls.getSiteDomain()+"/viewstory.php?sid=1234"
|
||||
|
||||
def getSiteURLPattern(self):
|
||||
return re.escape("http://"+self.getSiteDomain()+"/viewstory.php?sid=").replace(r"www\.",r"(www\.)?")+r"\d+$"
|
||||
|
||||
|
||||
## Getting the chapter list and the meta data, plus 'is adult' checking.
|
||||
def extractChapterUrlsAndMetadata(self):
|
||||
|
||||
if self.is_adult or self.getConfig("is_adult"):
|
||||
# Weirdly, different sites use different warning numbers.
|
||||
# If the title search below fails, there's a good chance
|
||||
# you need a different number. print data at that point
|
||||
# and see what the 'click here to continue' url says.
|
||||
addurl = "&warning=4"
|
||||
else:
|
||||
addurl=""
|
||||
|
||||
# index=1 makes sure we see the story chapter index. Some
|
||||
# sites skip that for one-chapter stories.
|
||||
url = self.url+'&index=1'+addurl
|
||||
logger.debug("URL: "+url)
|
||||
|
||||
data = self.get_request(url)
|
||||
|
||||
m = re.search(r"'viewstory.php\?sid=\d+((?:&ageconsent=ok)?&warning=\d+)'",data)
|
||||
if m != None:
|
||||
if self.is_adult or self.getConfig("is_adult"):
|
||||
# We tried the default and still got a warning, so
|
||||
# let's pull the warning number from the 'continue'
|
||||
# link and reload data.
|
||||
addurl = m.group(1)
|
||||
# correct stupid & error in url.
|
||||
addurl = addurl.replace("&","&")
|
||||
url = self.url+'&index=1'+addurl
|
||||
logger.debug("URL 2nd try: "+url)
|
||||
|
||||
data = self.get_request(url)
|
||||
else:
|
||||
raise exceptions.AdultCheckRequired(self.url)
|
||||
|
||||
if "Access denied. This story has not been validated by the adminstrators of this site." in data:
|
||||
raise exceptions.AccessDenied(self.getSiteDomain() +" says: Access denied. This story has not been validated by the adminstrators of this site.")
|
||||
|
||||
soup = self.make_soup(data)
|
||||
# print data
|
||||
|
||||
|
||||
## Title
|
||||
a = soup.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$"))
|
||||
self.story.setMetadata('title',stripHTML(a))
|
||||
|
||||
# Find authorid and URL from... author url.
|
||||
a = soup.find('a', href=re.compile(r"viewuser.php\?uid=\d+"))
|
||||
self.story.setMetadata('authorId',a['href'].split('=')[1])
|
||||
self.story.setMetadata('authorUrl','http://'+self.host+'/'+a['href'])
|
||||
self.story.setMetadata('author',a.string)
|
||||
|
||||
# Find the chapters:
|
||||
for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+r"&chapter=\d+$")):
|
||||
# just in case there's tags, like <i> in chapter titles.
|
||||
self.add_chapter(chapter,'http://'+self.host+'/'+chapter['href']+addurl)
|
||||
|
||||
|
||||
# eFiction sites don't help us out a lot with their meta data
|
||||
# formating, so it's a little ugly.
|
||||
|
||||
# utility method
|
||||
def defaultGetattr(d,k):
|
||||
try:
|
||||
return d[k]
|
||||
except:
|
||||
return ""
|
||||
|
||||
# <span class="label">Rated:</span> NC-17<br /> etc
|
||||
labels = soup.findAll('span',{'class':'label'})
|
||||
for labelspan in labels:
|
||||
value = labelspan.nextSibling
|
||||
label = labelspan.string
|
||||
|
||||
if 'Summary' in label:
|
||||
## Everything until the next span class='label'
|
||||
svalue = ""
|
||||
while value and 'label' not in defaultGetattr(value,'class'):
|
||||
svalue += unicode(value)
|
||||
value = value.nextSibling
|
||||
self.setDescription(url,svalue)
|
||||
#self.story.setMetadata('description',stripHTML(svalue))
|
||||
|
||||
if 'Rated' in label:
|
||||
self.story.setMetadata('rating', value)
|
||||
|
||||
if 'Word count' in label:
|
||||
self.story.setMetadata('numWords', value)
|
||||
|
||||
if 'Categories' in label:
|
||||
cats = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=categories'))
|
||||
for cat in cats:
|
||||
self.story.addToList('category',cat.string)
|
||||
|
||||
if 'Genre' in label:
|
||||
genres = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=1'))
|
||||
for genre in genres:
|
||||
self.story.addToList('genre',genre.string)
|
||||
|
||||
if 'Warnings' in label:
|
||||
warnings = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=2'))
|
||||
for warning in warnings:
|
||||
self.story.addToList('warnings',warning.string)
|
||||
|
||||
if 'Completed' in label:
|
||||
if 'Yes' in value:
|
||||
self.story.setMetadata('status', 'Completed')
|
||||
else:
|
||||
self.story.setMetadata('status', 'In-Progress')
|
||||
|
||||
if 'Published' in label:
|
||||
self.story.setMetadata('datePublished', makeDate(stripHTML(value), self.dateformat))
|
||||
|
||||
if 'Updated' in label:
|
||||
# there's a stray [ at the end.
|
||||
#value = value[0:-1]
|
||||
self.story.setMetadata('dateUpdated', makeDate(stripHTML(value), self.dateformat))
|
||||
|
||||
try:
|
||||
# Find Series name from series URL.
|
||||
a = soup.find('a', href=re.compile(r"viewseries.php\?seriesid=\d+"))
|
||||
series_name = a.string
|
||||
series_url = 'http://'+self.host+'/'+a['href']
|
||||
|
||||
seriessoup = self.make_soup(self.get_request(series_url))
|
||||
storyas = seriessoup.findAll('a', href=re.compile(r'^viewstory.php\?sid=\d+$'))
|
||||
i=1
|
||||
for a in storyas:
|
||||
if a['href'] == ('viewstory.php?sid='+self.story.getMetadata('storyId')):
|
||||
self.setSeries(series_name, i)
|
||||
self.story.setMetadata('seriesUrl',series_url)
|
||||
break
|
||||
i+=1
|
||||
|
||||
except:
|
||||
# I find it hard to care if the series parsing fails
|
||||
pass
|
||||
|
||||
# grab the text for an individual chapter.
|
||||
def getChapterText(self, url):
|
||||
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
soup = self.make_soup(self.get_request(url))
|
||||
|
||||
div = soup.find('div', {'id' : 'story'})
|
||||
|
||||
if None == div:
|
||||
raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url)
|
||||
|
||||
return self.utf8FromSoup(url,div)
|
||||
|
|
@ -74,38 +74,74 @@ class DeviantArtComSiteAdapter(BaseSiteAdapter):
|
|||
return r'https?://www\.deviantart\.com/(?P<author>[^/]+)/art/(?P<id>[^/]+)/?'
|
||||
|
||||
def performLogin(self, url):
|
||||
data = self.get_request_raw('https://www.deviantart.com/users/login', referer=url)
|
||||
if self.username and self.username != 'NoneGiven':
|
||||
username = self.username
|
||||
else:
|
||||
username = self.getConfig('username')
|
||||
|
||||
# logger.debug("\n\nusername:(%s)\n\n"%username)
|
||||
if not username:
|
||||
logger.info("Login Required for URL %s" % url)
|
||||
raise exceptions.FailedToLogin(url,username)
|
||||
|
||||
data = self.get_request_raw('https://www.deviantart.com/users/login', referer=url, usecache=False)
|
||||
data = self.decode_data(data)
|
||||
soup = self.make_soup(data)
|
||||
params = {
|
||||
'referer': url,
|
||||
'referer': 'https://www.deviantart.com/_sisu/do/signin', # soup.find('input', {'name': 'referer'})['value'],
|
||||
'referer_type': soup.find('input', {'name': 'referer_type'})['value'],
|
||||
'csrf_token': soup.find('input', {'name': 'csrf_token'})['value'],
|
||||
'challenge': soup.find('input', {'name': 'challenge'})['value'],
|
||||
'lu_token': soup.find('input', {'name': 'lu_token'})['value'],
|
||||
'remember': 'on',
|
||||
'username': username
|
||||
}
|
||||
|
||||
loginUrl = 'https://' + self.getSiteDomain() + '/_sisu/do/step2'
|
||||
logger.debug('Will now login to deviantARt as (%s)' % username)
|
||||
|
||||
result = self.post_request(loginUrl, params, usecache=False)
|
||||
soup = self.make_soup(result)
|
||||
if not soup.find('input', {'name': 'lu_token2'}):
|
||||
logger.info("Login Failed for URL %s (no lu_token2 found)" % url)
|
||||
raise exceptions.FailedToLogin(url,username)
|
||||
|
||||
params = {
|
||||
'referer': 'https://www.deviantart.com/_sisu/do/signin', # soup.find('input', {'name': 'referer'})['value'],
|
||||
'referer_type': soup.find('input', {'name': 'referer_type'})['value'],
|
||||
'csrf_token': soup.find('input', {'name': 'csrf_token'})['value'],
|
||||
'challenge': soup.find('input', {'name': 'challenge'})['value'],
|
||||
'lu_token': soup.find('input', {'name': 'lu_token'})['value'],
|
||||
'lu_token2': soup.find('input', {'name': 'lu_token2'})['value'],
|
||||
'remember': 'on',
|
||||
'username': ''
|
||||
}
|
||||
|
||||
if self.password:
|
||||
params['username'] = self.username
|
||||
params['password'] = self.password
|
||||
else:
|
||||
params['username'] = self.getConfig('username')
|
||||
params['password'] = self.getConfig('password')
|
||||
|
||||
# logger.debug("\n\nparams['password']:(%s)\n\n"%params['password'])
|
||||
loginUrl = 'https://' + self.getSiteDomain() + '/_sisu/do/signin'
|
||||
logger.debug('Will now login to deviantARt as (%s)' % params['username'])
|
||||
logger.debug('Will now send password to deviantARt')
|
||||
|
||||
result = self.post_request(loginUrl, params, usecache=False)
|
||||
|
||||
if 'Log In | DeviantArt' in result:
|
||||
logger.error('Failed to login to deviantArt as %s' % params['username'])
|
||||
raise exceptions.FailedToLogin('https://www.deviantart.com', params['username'])
|
||||
logger.error('Failed to login to deviantArt as %s' % username)
|
||||
raise exceptions.FailedToLogin('https://www.deviantart.com', username)
|
||||
else:
|
||||
return True
|
||||
|
||||
def requiresLogin(self, data):
|
||||
return '</a> has limited the viewing of this artwork to members of the DeviantArt community only' in data
|
||||
|
||||
def isLoggedIn(self, data):
|
||||
return '<form id="logout-form" action="https://www.deviantart.com/users/logout" method="POST">' in data
|
||||
|
||||
def isWatchersOnly(self, data):
|
||||
return '<span>Watchers-Only Deviation</span>' in data
|
||||
return '>Watchers-Only Deviation<' in data
|
||||
|
||||
def requiresMatureContentEnabled(self, data):
|
||||
return (
|
||||
|
|
@ -114,44 +150,50 @@ class DeviantArtComSiteAdapter(BaseSiteAdapter):
|
|||
or '>This filter hides content that may be inappropriate for some viewers<' in data
|
||||
or '>May contain sensitive content<' in data
|
||||
or '>Log in to view<' in data
|
||||
or '>This deviation has been labeled as containing themes not suitable for all deviants.<' in data
|
||||
)
|
||||
|
||||
def extractChapterUrlsAndMetadata(self):
|
||||
isLoggedIn = False
|
||||
logger.debug('URL: %s', self.url)
|
||||
|
||||
data = self.get_request(self.url)
|
||||
|
||||
soup = self.make_soup(data)
|
||||
|
||||
if self.requiresLogin(data):
|
||||
## story can require login outright, or it can show up as
|
||||
## watchers-only or mature-enabled without the same 'requires
|
||||
## login' strings.
|
||||
if self.requiresLogin(data) or ( not self.isLoggedIn(data) and
|
||||
(self.isWatchersOnly(data) or
|
||||
self.requiresMatureContentEnabled(data)) ):
|
||||
if self.performLogin(self.url):
|
||||
isLoggedIn = True
|
||||
data = self.get_request(self.url, usecache=False)
|
||||
soup = self.make_soup(data)
|
||||
|
||||
## Check watchers only and mature enabled again, separately,
|
||||
## after login because they can still apply after login.
|
||||
if self.isWatchersOnly(data):
|
||||
raise exceptions.FailedToDownload(
|
||||
'Deviation is only available for watchers.' +
|
||||
'You must watch this author before you can download it.'
|
||||
)
|
||||
|
||||
)
|
||||
if self.requiresMatureContentEnabled(data):
|
||||
# as far as I can tell deviantArt has no way to show mature
|
||||
# content that doesn't involve logging in or using JavaScript
|
||||
if not isLoggedIn:
|
||||
self.performLogin(self.url)
|
||||
isLoggedIn = True
|
||||
data = self.get_request(self.url, usecache=False)
|
||||
soup = self.make_soup(data)
|
||||
if self.requiresMatureContentEnabled(data):
|
||||
raise exceptions.FailedToDownload(
|
||||
'Deviation is set as mature, you must go into your account ' +
|
||||
'and enable showing of mature content.'
|
||||
)
|
||||
raise exceptions.FailedToDownload(
|
||||
'Deviation is set as mature, you must go into your account ' +
|
||||
'and enable showing of mature content.'
|
||||
)
|
||||
|
||||
appurl = soup.select_one('meta[property="da:appurl"]')['content']
|
||||
story_id = urlparse(appurl).path.lstrip('/')
|
||||
appurl = soup.select_one('meta[property="og:url"]')['content']
|
||||
if appurl:
|
||||
story_id = urlparse(appurl).path.lstrip('/')
|
||||
else:
|
||||
logger.debug("Looking for JS story id")
|
||||
## after login, this is only found in a JS block. Dunno why.
|
||||
## F875A309-B0DB-860E-5079-790D0FBE5668
|
||||
match = re.match(r'\\"deviationUuid\\":\\"(?P<id>[A-Z0-9-]+)\\",',data)
|
||||
if match:
|
||||
story_id = match.group('id')
|
||||
else:
|
||||
raise exceptions.FailedToDownload('Failed to find Story ID.')
|
||||
self.story.setMetadata('storyId', story_id)
|
||||
|
||||
title = soup.select_one('h1').get_text()
|
||||
|
|
@ -180,19 +222,35 @@ class DeviantArtComSiteAdapter(BaseSiteAdapter):
|
|||
def getChapterText(self, url):
|
||||
logger.debug('Getting chapter text from: %s', url)
|
||||
data = self.get_request(url)
|
||||
# logger.debug(data)
|
||||
soup = self.make_soup(data)
|
||||
|
||||
# remove comments section to avoid false matches
|
||||
comments = soup.select_one('[data-hook=comments_thread]')
|
||||
comments.decompose()
|
||||
if comments:
|
||||
comments.decompose()
|
||||
# previous search not always found in some stories.
|
||||
# <div id="comments"></div> inside the real containing
|
||||
# div seems more common
|
||||
commentsdiv = soup.select_one('div#comments')
|
||||
if commentsdiv:
|
||||
commentsdiv.parent.decompose()
|
||||
|
||||
# three different 'content' tags to look for.
|
||||
# This is the current in Oct 2024
|
||||
content = soup.select_one('[data-editor-viewer="1"]')
|
||||
|
||||
content = soup.select_one('[data-id=rich-content-viewer]')
|
||||
if content is None:
|
||||
# older story
|
||||
# older story? I can't find any of this style in Oct2024
|
||||
content = soup.select_one('[data-id="rich-content-viewer"]')
|
||||
|
||||
if content is None:
|
||||
# olderer story, but used by some older (2018) posts
|
||||
content = soup.select_one('.legacy-journal')
|
||||
if content is None:
|
||||
raise exceptions.FailedToDownload(
|
||||
'Could not find story text. Please open a bug with the URL %s' % self.url
|
||||
|
||||
if content is None:
|
||||
raise exceptions.FailedToDownload(
|
||||
'Could not find story text. Please open a bug with the URL %s' % self.url
|
||||
)
|
||||
|
||||
return self.utf8FromSoup(url, content)
|
||||
|
|
|
|||
|
|
@ -95,7 +95,7 @@ class DokugaComAdapter(BaseSiteAdapter):
|
|||
params['Submit'] = 'Submit'
|
||||
|
||||
# copy all hidden input tags to pick up appropriate tokens.
|
||||
for tag in soup.findAll('input',{'type':'hidden'}):
|
||||
for tag in soup.find_all('input',{'type':'hidden'}):
|
||||
params[tag['name']] = tag['value']
|
||||
|
||||
loginUrl = 'http://' + self.getSiteDomain() + '/fanfiction'
|
||||
|
|
@ -153,7 +153,7 @@ class DokugaComAdapter(BaseSiteAdapter):
|
|||
self.story.setMetadata('title',stripHTML(a))
|
||||
|
||||
# Find the chapters:
|
||||
chapters = soup.find('select').findAll('option')
|
||||
chapters = soup.find('select').find_all('option')
|
||||
if len(chapters)==1:
|
||||
self.add_chapter(self.story.getMetadata('title'),'http://'+self.host+'/'+self.section+'/story/'+self.story.getMetadata('storyId')+'/1')
|
||||
else:
|
||||
|
|
@ -168,7 +168,7 @@ class DokugaComAdapter(BaseSiteAdapter):
|
|||
asoup=asoup.find('div', {'id' : 'cb_tabid_52'}).find('div')
|
||||
|
||||
#grab the rest of the metadata from the author's page
|
||||
for div in asoup.findAll('div'):
|
||||
for div in asoup.find_all('div'):
|
||||
nav=div.find('a', href=re.compile(r'/fanfiction/story/'+self.story.getMetadata('storyId')+"/1$"))
|
||||
if nav != None:
|
||||
break
|
||||
|
|
@ -208,7 +208,7 @@ class DokugaComAdapter(BaseSiteAdapter):
|
|||
|
||||
else:
|
||||
asoup=asoup.find('div', {'id' : 'maincol'}).find('div', {'class' : 'padding'})
|
||||
for div in asoup.findAll('div'):
|
||||
for div in asoup.find_all('div'):
|
||||
nav=div.find('a', href=re.compile(r'/spark/story/'+self.story.getMetadata('storyId')+"/1$"))
|
||||
if nav != None:
|
||||
break
|
||||
|
|
|
|||
|
|
@ -161,7 +161,7 @@ class DracoAndGinnyComAdapter(BaseSiteAdapter):
|
|||
self.story.setMetadata('author',a.string)
|
||||
|
||||
# Find the chapters:
|
||||
for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+r"&chapter=\d+$")):
|
||||
for chapter in soup.find_all('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+r"&chapter=\d+$")):
|
||||
# just in case there's tags, like <i> in chapter titles.
|
||||
self.add_chapter(chapter,'http://'+self.host+'/'+chapter['href']+addurl)
|
||||
|
||||
|
|
@ -181,13 +181,13 @@ class DracoAndGinnyComAdapter(BaseSiteAdapter):
|
|||
|
||||
self.setDescription(url,content.find('blockquote'))
|
||||
|
||||
for genre in content.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=1')):
|
||||
for genre in content.find_all('a',href=re.compile(r'browse.php\?type=class&type_id=1')):
|
||||
self.story.addToList('genre',genre.string)
|
||||
|
||||
for warning in content.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=2')):
|
||||
for warning in content.find_all('a',href=re.compile(r'browse.php\?type=class&type_id=2')):
|
||||
self.story.addToList('warnings',warning.string)
|
||||
|
||||
labels = content.findAll('b')
|
||||
labels = content.find_all('b')
|
||||
|
||||
for labelspan in labels:
|
||||
value = labelspan.nextSibling
|
||||
|
|
@ -208,22 +208,22 @@ class DracoAndGinnyComAdapter(BaseSiteAdapter):
|
|||
self.story.setMetadata('rating', value)
|
||||
|
||||
if 'Categories' in label:
|
||||
cats = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=categories'))
|
||||
cats = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=categories'))
|
||||
for cat in cats:
|
||||
self.story.addToList('category',cat.string)
|
||||
|
||||
if 'Characters' in label:
|
||||
chars = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=characters'))
|
||||
chars = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=characters'))
|
||||
for char in chars:
|
||||
self.story.addToList('characters',char.string)
|
||||
|
||||
if 'Genre' in label:
|
||||
genres = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=1'))
|
||||
genres = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=class&type_id=1'))
|
||||
for genre in genres:
|
||||
self.story.addToList('genre',genre.string)
|
||||
|
||||
if 'Warnings' in label:
|
||||
warnings = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=2'))
|
||||
warnings = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=class&type_id=2'))
|
||||
for warning in warnings:
|
||||
self.story.addToList('warnings',warning.string)
|
||||
|
||||
|
|
@ -247,7 +247,7 @@ class DracoAndGinnyComAdapter(BaseSiteAdapter):
|
|||
|
||||
seriessoup = self.make_soup(self.get_request(series_url))
|
||||
# can't use ^viewstory...$ in case of higher rated stories with javascript href.
|
||||
storyas = seriessoup.findAll('a', href=re.compile(r'viewstory.php\?sid=\d+'))
|
||||
storyas = seriessoup.find_all('a', href=re.compile(r'viewstory.php\?sid=\d+'))
|
||||
i=1
|
||||
for a in storyas:
|
||||
# skip 'report this' and 'TOC' links
|
||||
|
|
|
|||
|
|
@ -138,7 +138,7 @@ class EFPFanFicNet(BaseSiteAdapter):
|
|||
# no selector found, so it's a one-chapter story.
|
||||
self.add_chapter(self.story.getMetadata('title'),url)
|
||||
else:
|
||||
allOptions = select.findAll('option', {'value' : re.compile(r'viewstory')})
|
||||
allOptions = select.find_all('option', {'value' : re.compile(r'viewstory')})
|
||||
for o in allOptions:
|
||||
url = u'https://%s/%s' % ( self.getSiteDomain(),
|
||||
o['value'])
|
||||
|
|
@ -170,14 +170,14 @@ class EFPFanFicNet(BaseSiteAdapter):
|
|||
if authsoup != None:
|
||||
# last author link with offset should be the 'next' link.
|
||||
authurl = u'https://%s/%s' % ( self.getSiteDomain(),
|
||||
authsoup.findAll('a',href=re.compile(r'viewuser\.php\?uid=\d+&catid=&offset='))[-1]['href'] )
|
||||
authsoup.find_all('a',href=re.compile(r'viewuser\.php\?uid=\d+&catid=&offset='))[-1]['href'] )
|
||||
|
||||
# Need author page for most of the metadata.
|
||||
logger.debug("fetching author page: (%s)"%authurl)
|
||||
authsoup = self.make_soup(self.get_request(authurl))
|
||||
#print("authsoup:%s"%authsoup)
|
||||
|
||||
storyas = authsoup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+r'&i=1$'))
|
||||
storyas = authsoup.find_all('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+r'&i=1$'))
|
||||
for storya in storyas:
|
||||
#print("======storya:%s"%storya)
|
||||
storyblock = storya.findParent('div',{'class':'storybloc'})
|
||||
|
|
@ -194,7 +194,7 @@ class EFPFanFicNet(BaseSiteAdapter):
|
|||
# Tipo di coppia: Het | Personaggi: Akasuna no Sasori , Akatsuki, Nuovo Personaggio | Note: OOC | Avvertimenti: Tematiche delicate<br />
|
||||
# Categoria: <a href="categories.php?catid=1&parentcatid=1">Anime & Manga</a> > <a href="categories.php?catid=108&parentcatid=108">Naruto</a> | Contesto: Naruto Shippuuden | Leggi le <a href="reviews.php?sid=1331275&a=">3</a> recensioni</div>
|
||||
|
||||
cats = noteblock.findAll('a',href=re.compile(r'browse.php\?type=categories'))
|
||||
cats = noteblock.find_all('a',href=re.compile(r'browse.php\?type=categories'))
|
||||
for cat in cats:
|
||||
self.story.addToList('category',cat.string)
|
||||
|
||||
|
|
@ -262,7 +262,7 @@ class EFPFanFicNet(BaseSiteAdapter):
|
|||
|
||||
seriessoup = self.make_soup(self.get_request(series_url))
|
||||
# can't use ^viewstory...$ in case of higher rated stories with javascript href.
|
||||
storyas = seriessoup.findAll('a', href=re.compile(r'viewstory.php\?sid=\d+&i=1'))
|
||||
storyas = seriessoup.find_all('a', href=re.compile(r'viewstory.php\?sid=\d+&i=1'))
|
||||
i=1
|
||||
for a in storyas:
|
||||
if a['href'] == ('viewstory.php?sid='+self.story.getMetadata('storyId'))+'&i=1':
|
||||
|
|
@ -288,11 +288,11 @@ class EFPFanFicNet(BaseSiteAdapter):
|
|||
raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url)
|
||||
|
||||
# remove any header and 'o:p' tags.
|
||||
for tag in div.findAll("head") + div.findAll("o:p"):
|
||||
for tag in div.find_all("head") + div.find_all("o:p"):
|
||||
tag.extract()
|
||||
|
||||
# change any html and body tags to div.
|
||||
for tag in div.findAll("html") + div.findAll("body"):
|
||||
for tag in div.find_all("html") + div.find_all("body"):
|
||||
tag.name='div'
|
||||
|
||||
# remove extra bogus doctype.
|
||||
|
|
|
|||
|
|
@ -126,7 +126,7 @@ class ErosnSapphoSycophantHexComAdapter(BaseSiteAdapter):
|
|||
self.story.setMetadata('rating', rating)
|
||||
|
||||
# Find the chapters:
|
||||
for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+r"&chapter=\d+$")):
|
||||
for chapter in soup.find_all('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+r"&chapter=\d+$")):
|
||||
# just in case there's tags, like <i> in chapter titles.
|
||||
self.add_chapter(chapter,'http://'+self.host+'/'+chapter['href']+addurl)
|
||||
|
||||
|
|
@ -144,7 +144,7 @@ class ErosnSapphoSycophantHexComAdapter(BaseSiteAdapter):
|
|||
|
||||
# <span class="label">Rated:</span> NC-17<br /> etc
|
||||
|
||||
labels = soup.findAll('span',{'class':'label'})
|
||||
labels = soup.find_all('span',{'class':'label'})
|
||||
|
||||
value = labels[0].previousSibling
|
||||
svalue = ""
|
||||
|
|
@ -164,22 +164,22 @@ class ErosnSapphoSycophantHexComAdapter(BaseSiteAdapter):
|
|||
self.story.setMetadata('numWords', value.split(' -')[0])
|
||||
|
||||
if 'Categories' in label:
|
||||
cats = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=categories'))
|
||||
cats = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=categories'))
|
||||
for cat in cats:
|
||||
self.story.addToList('category',cat.string)
|
||||
|
||||
if 'Characters' in label:
|
||||
chars = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=characters'))
|
||||
chars = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=characters'))
|
||||
for char in chars:
|
||||
self.story.addToList('characters',char.string)
|
||||
|
||||
if 'Genre' in label:
|
||||
genres = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=1'))
|
||||
genres = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=class&type_id=1'))
|
||||
for genre in genres:
|
||||
self.story.addToList('genre',genre.string)
|
||||
|
||||
if 'Warnings' in label:
|
||||
warnings = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=2'))
|
||||
warnings = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=class&type_id=2'))
|
||||
for warning in warnings:
|
||||
self.story.addToList('warnings',warning.string)
|
||||
|
||||
|
|
@ -204,7 +204,7 @@ class ErosnSapphoSycophantHexComAdapter(BaseSiteAdapter):
|
|||
series_url = 'http://'+self.host+'/'+a['href']
|
||||
|
||||
seriessoup = self.make_soup(self.get_request(series_url))
|
||||
storyas = seriessoup.findAll('a', href=re.compile(r'viewstory.php\?sid=\d+'))
|
||||
storyas = seriessoup.find_all('a', href=re.compile(r'viewstory.php\?sid=\d+'))
|
||||
i=1
|
||||
for a in storyas:
|
||||
# skip 'report this' and 'TOC' links
|
||||
|
|
|
|||
|
|
@ -53,6 +53,9 @@ class FanficAuthorsNetAdapter(BaseSiteAdapter):
|
|||
#Setting the 'Zone' for each "Site"
|
||||
self.zone = self.parsedUrl.netloc.replace('.fanficauthors.net','')
|
||||
|
||||
# site change .nsns to -nsns
|
||||
self.zone = self.zone.replace('.nsns','-nsns')
|
||||
|
||||
# normalized story URL.
|
||||
self._setURL('https://{0}.{1}/{2}/'.format(
|
||||
self.zone, self.getBaseDomain(), self.story.getMetadata('storyId')))
|
||||
|
|
@ -79,7 +82,10 @@ class FanficAuthorsNetAdapter(BaseSiteAdapter):
|
|||
@classmethod
|
||||
def getAcceptDomains(cls):
|
||||
|
||||
# need both .nsns(old) and -nsns(new) because it's a domain
|
||||
# change, not just URL change.
|
||||
return ['aaran-st-vines.nsns.fanficauthors.net',
|
||||
'aaran-st-vines-nsns.fanficauthors.net',
|
||||
'abraxan.fanficauthors.net',
|
||||
'bobmin.fanficauthors.net',
|
||||
'canoncansodoff.fanficauthors.net',
|
||||
|
|
@ -95,9 +101,12 @@ class FanficAuthorsNetAdapter(BaseSiteAdapter):
|
|||
'jeconais.fanficauthors.net',
|
||||
'kinsfire.fanficauthors.net',
|
||||
'kokopelli.nsns.fanficauthors.net',
|
||||
'kokopelli-nsns.fanficauthors.net',
|
||||
'ladya.nsns.fanficauthors.net',
|
||||
'ladya-nsns.fanficauthors.net',
|
||||
'lorddwar.fanficauthors.net',
|
||||
'mrintel.nsns.fanficauthors.net',
|
||||
'mrintel-nsns.fanficauthors.net',
|
||||
'musings-of-apathy.fanficauthors.net',
|
||||
'ruskbyte.fanficauthors.net',
|
||||
'seelvor.fanficauthors.net',
|
||||
|
|
@ -108,7 +117,7 @@ class FanficAuthorsNetAdapter(BaseSiteAdapter):
|
|||
################################################################################################
|
||||
@classmethod
|
||||
def getSiteExampleURLs(self):
|
||||
return ("https://aaran-st-vines.nsns.fanficauthors.net/A_Story_Name/ "
|
||||
return ("https://aaran-st-vines-nsns.fanficauthors.net/A_Story_Name/ "
|
||||
+ "https://abraxan.fanficauthors.net/A_Story_Name/ "
|
||||
+ "https://bobmin.fanficauthors.net/A_Story_Name/ "
|
||||
+ "https://canoncansodoff.fanficauthors.net/A_Story_Name/ "
|
||||
|
|
@ -123,10 +132,10 @@ class FanficAuthorsNetAdapter(BaseSiteAdapter):
|
|||
+ "https://jbern.fanficauthors.net/A_Story_Name/ "
|
||||
+ "https://jeconais.fanficauthors.net/A_Story_Name/ "
|
||||
+ "https://kinsfire.fanficauthors.net/A_Story_Name/ "
|
||||
+ "https://kokopelli.nsns.fanficauthors.net/A_Story_Name/ "
|
||||
+ "https://ladya.nsns.fanficauthors.net/A_Story_Name/ "
|
||||
+ "https://kokopelli-nsns.fanficauthors.net/A_Story_Name/ "
|
||||
+ "https://ladya-nsns.fanficauthors.net/A_Story_Name/ "
|
||||
+ "https://lorddwar.fanficauthors.net/A_Story_Name/ "
|
||||
+ "https://mrintel.nsns.fanficauthors.net/A_Story_Name/ "
|
||||
+ "https://mrintel-nsns.fanficauthors.net/A_Story_Name/ "
|
||||
+ "https://musings-of-apathy.fanficauthors.net/A_Story_Name/ "
|
||||
+ "https://ruskbyte.fanficauthors.net/A_Story_Name/ "
|
||||
+ "https://seelvor.fanficauthors.net/A_Story_Name/ "
|
||||
|
|
@ -136,8 +145,16 @@ class FanficAuthorsNetAdapter(BaseSiteAdapter):
|
|||
|
||||
################################################################################################
|
||||
def getSiteURLPattern(self):
|
||||
## .nsns kept here to match both . and -
|
||||
return r'https?://(aaran-st-vines.nsns|abraxan|bobmin|canoncansodoff|chemprof|copperbadge|crys|deluded-musings|draco664|fp|frenchsession|ishtar|jbern|jeconais|kinsfire|kokopelli.nsns|ladya.nsns|lorddwar|mrintel.nsns|musings-of-apathy|ruskbyte|seelvor|tenhawk|viridian|whydoyouneedtoknow)\.fanficauthors\.net/([a-zA-Z0-9_]+)/'
|
||||
|
||||
@classmethod
|
||||
def get_section_url(cls,url):
|
||||
## only changing .nsns to -nsns and only when part of the
|
||||
## domain.
|
||||
url = url.replace('.nsns.fanficauthors.net','-nsns.fanficauthors.net')
|
||||
return url
|
||||
|
||||
################################################################################################
|
||||
def doExtractChapterUrlsAndMetadata(self, get_cover=True):
|
||||
|
||||
|
|
@ -163,7 +180,7 @@ class FanficAuthorsNetAdapter(BaseSiteAdapter):
|
|||
# Find the chapters:
|
||||
# The published and update dates are with the chapter links...
|
||||
# so we have to get them from there.
|
||||
chapters = soup.findAll('a', href=re.compile('/'+self.story.getMetadata(
|
||||
chapters = soup.find_all('a', href=re.compile('/'+self.story.getMetadata(
|
||||
'storyId')+'/([a-zA-Z0-9_]+)/'))
|
||||
|
||||
# Here we are getting the published date. It is the date the first chapter was "updated"
|
||||
|
|
@ -202,7 +219,7 @@ class FanficAuthorsNetAdapter(BaseSiteAdapter):
|
|||
## Raising AdultCheckRequired after collecting chapters gives
|
||||
## a double chapter list. So does genre, but it de-dups
|
||||
## automatically.
|
||||
if( self.story.getMetadata('rating') == 'Mature'
|
||||
if( self.story.getMetadataRaw('rating') in ['Mature','Adult Only']
|
||||
and not (self.is_adult or self.getConfig("is_adult")) ):
|
||||
raise exceptions.AdultCheckRequired(self.url)
|
||||
|
||||
|
|
@ -226,7 +243,7 @@ class FanficAuthorsNetAdapter(BaseSiteAdapter):
|
|||
# grab the text for an individual chapter.
|
||||
def getChapterText(self, url):
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
if( self.story.getMetadata('rating') == 'Mature' and
|
||||
if( self.story.getMetadataRaw('rating') in ['Mature','Adult Only'] and
|
||||
(self.is_adult or self.getConfig("is_adult")) ):
|
||||
addurl = "?bypass=1"
|
||||
else:
|
||||
|
|
@ -241,8 +258,8 @@ class FanficAuthorsNetAdapter(BaseSiteAdapter):
|
|||
"Error downloading Chapter: '{0}'! Missing required element!".format(url))
|
||||
|
||||
#Now, there are a lot of extranious tags within the story division.. so we will remove them.
|
||||
for tag in story.findAll('ul',{'class':'pager'}) + story.findAll(
|
||||
'div',{'class':'alert'}) + story.findAll('div', {'class':'btn-group'}):
|
||||
for tag in story.find_all('ul',{'class':'pager'}) + story.find_all(
|
||||
'div',{'class':'alert'}) + story.find_all('div', {'class':'btn-group'}):
|
||||
tag.extract()
|
||||
|
||||
return self.utf8FromSoup(url,story)
|
||||
|
|
|
|||
|
|
@ -134,7 +134,7 @@ class FanFicsMeAdapter(BaseSiteAdapter):
|
|||
## restrict meta searches to header.
|
||||
fichead = soup.find('div',class_='FicHead')
|
||||
def get_meta_content(title):
|
||||
val_label = fichead.find('div',string=title+u':')
|
||||
val_label = fichead.find('div',string=re.compile(u'^'+title+u':'))
|
||||
if val_label:
|
||||
return val_label.find_next('div')
|
||||
|
||||
|
|
@ -150,7 +150,7 @@ class FanFicsMeAdapter(BaseSiteAdapter):
|
|||
self.story.setMetadata('rating',stripHTML(get_meta_content(u'Рейтинг')))
|
||||
|
||||
## Need to login for any rating higher than General.
|
||||
if self.story.getMetadata('rating') != 'General' and self.needToLoginCheck(data):
|
||||
if self.story.getMetadataRaw('rating') != 'General' and self.needToLoginCheck(data):
|
||||
self.performLogin(url)
|
||||
# reload after login.
|
||||
data = self.get_request(url,usecache=False)
|
||||
|
|
@ -168,7 +168,7 @@ class FanFicsMeAdapter(BaseSiteAdapter):
|
|||
self.story.setMetadata('title',stripHTML(h))
|
||||
|
||||
## author(s):
|
||||
content = get_meta_content(u'Автор')
|
||||
content = get_meta_content(u'Авторы?')
|
||||
if content:
|
||||
alist = content.find_all('a', class_='user')
|
||||
for a in alist:
|
||||
|
|
@ -181,12 +181,8 @@ class FanFicsMeAdapter(BaseSiteAdapter):
|
|||
self.story.setMetadata('authorUrl','https://'+self.host)
|
||||
self.story.setMetadata('authorId','0')
|
||||
|
||||
# translator(s)
|
||||
content = get_meta_content(u'Переводчик')
|
||||
if not content:
|
||||
# Переводчик vs Переводчи is 'Translator' vs 'TranslatorS'
|
||||
content = get_meta_content(u'Переводчи')
|
||||
logger.debug(content)
|
||||
# translator(s) in different strings
|
||||
content = get_meta_content(u'Переводчикк?и?')
|
||||
if content:
|
||||
for a in content.find_all('a', class_='user'):
|
||||
self.story.addToList('translatorsId',a['href'].split('/user')[-1])
|
||||
|
|
@ -301,6 +297,10 @@ class FanFicsMeAdapter(BaseSiteAdapter):
|
|||
# grab the text for an individual chapter.
|
||||
def getChapterTextNum(self, url, index):
|
||||
logger.debug('Getting chapter text for: %s index: %s' % (url,index))
|
||||
m = re.match(r'.*&chapter=(\d+).*',url)
|
||||
if m:
|
||||
index=m.group(1)
|
||||
logger.debug("Using index(%s) from &chapter="%index)
|
||||
|
||||
chapter_div = None
|
||||
if self.use_full_work_soup and self.getConfig("use_view_full_work",True) and self.num_chapters() > 1:
|
||||
|
|
|
|||
|
|
@ -44,9 +44,8 @@ class FanfictalkComAdapter(BaseSiteAdapter):
|
|||
# get storyId from url--url validation guarantees query is only sid=1234
|
||||
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
|
||||
|
||||
|
||||
# normalized story URL.
|
||||
self._setURL('https://' + self.getSiteDomain() + '/archive/viewstory.php?sid='+self.story.getMetadata('storyId'))
|
||||
self._setURL('https://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId'))
|
||||
|
||||
# Each adapter needs to have a unique site abbreviation.
|
||||
self.story.setMetadata('siteabbrev','ahpfftc')
|
||||
|
|
@ -57,24 +56,24 @@ class FanfictalkComAdapter(BaseSiteAdapter):
|
|||
|
||||
@classmethod
|
||||
def getAcceptDomains(cls):
|
||||
return [cls.getSiteDomain(),'archive.hpfanfictalk.com']
|
||||
return [cls.getSiteDomain(),'archive.hpfanfictalk.com','fanfictalk.com']
|
||||
|
||||
@classmethod
|
||||
def getConfigSections(cls):
|
||||
"Only needs to be overriden if has additional ini sections."
|
||||
return [cls.getConfigSection(),'archive.hpfanfictalk.com']
|
||||
return [cls.getConfigSection(),'archive.hpfanfictalk.com','fanfictalk.com']
|
||||
|
||||
@staticmethod # must be @staticmethod, don't remove it.
|
||||
@staticmethod # must be @stgetAcceptDomainsaticmethod, don't remove it.
|
||||
def getSiteDomain():
|
||||
# The site domain. Does have www here, if it uses it.
|
||||
return 'fanfictalk.com'
|
||||
return 'archive.fanfictalk.com'
|
||||
|
||||
@classmethod
|
||||
def getSiteExampleURLs(cls):
|
||||
return "https://"+cls.getSiteDomain()+"/archive/viewstory.php?sid=1234"
|
||||
return "https://"+cls.getSiteDomain()+"/viewstory.php?sid=1234"
|
||||
|
||||
def getSiteURLPattern(self):
|
||||
return r"https?://(archive\.hp)?"+re.escape(self.getSiteDomain())+r"(/archive)?/viewstory\.php\?sid=\d+$"
|
||||
return r"https?://("+r"|".join([x.replace('.',r'\.') for x in self.getAcceptDomains()])+r")(/archive)?/viewstory\.php\?sid=\d+$"
|
||||
|
||||
## Getting the chapter list and the meta data, plus 'is adult' checking.
|
||||
def extractChapterUrlsAndMetadata(self):
|
||||
|
|
@ -118,7 +117,7 @@ class FanfictalkComAdapter(BaseSiteAdapter):
|
|||
# Find the chapters:
|
||||
for chapter in soup.find_all('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+r"&chapter=\d+$")):
|
||||
# just in case there's tags, like <i> in chapter titles.
|
||||
self.add_chapter(chapter,'https://'+self.host+'/archive/'+chapter['href'])
|
||||
self.add_chapter(chapter,'https://'+self.host+'/'+chapter['href'])
|
||||
|
||||
# categories
|
||||
for a in soup.select("div#sort a"):
|
||||
|
|
@ -171,14 +170,14 @@ class FanfictalkComAdapter(BaseSiteAdapter):
|
|||
# Site allows stories to be in several series at once. FFF
|
||||
# isn't thrilled with that, we have series00, series01, etc.
|
||||
# Example:
|
||||
# https://fanfictalk.com/archive/viewstory.php?sid=483
|
||||
# https://archive.fanfictalk.com/viewstory.php?sid=483
|
||||
|
||||
if self.getConfig("collect_series"):
|
||||
seriesspan = soup.find('span',label='Series')
|
||||
for i, seriesa in enumerate(seriesspan.find_all('a', href=re.compile(r"viewseries\.php\?seriesid=\d+"))):
|
||||
# logger.debug(seriesa)
|
||||
series_name = stripHTML(seriesa)
|
||||
series_url = 'https://'+self.host+'/archive/'+seriesa['href']
|
||||
series_url = 'https://'+self.host+'/'+seriesa['href']
|
||||
|
||||
seriessoup = self.make_soup(self.get_request(series_url))
|
||||
storyas = seriessoup.find_all('a', href=re.compile(r'viewstory.php\?sid=\d+'))
|
||||
|
|
@ -205,9 +204,17 @@ class FanfictalkComAdapter(BaseSiteAdapter):
|
|||
# grab the text for an individual chapter.
|
||||
def getChapterText(self, url):
|
||||
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
if self.is_adult or self.getConfig("is_adult"):
|
||||
# Weirdly, different sites use different warning numbers.
|
||||
# If the title search below fails, there's a good chance
|
||||
# you need a different number. print data at that point
|
||||
# and see what the 'click here to continue' url says.
|
||||
addurl = "&ageconsent=ok&warning=3"
|
||||
else:
|
||||
addurl=""
|
||||
|
||||
soup = self.make_soup(self.get_request(url))
|
||||
logger.debug('Getting chapter text from: %s' % (url+addurl))
|
||||
soup = self.make_soup(self.get_request(url+addurl))
|
||||
|
||||
div = soup.find('div', {'id' : 'story'})
|
||||
|
||||
|
|
|
|||
|
|
@ -93,6 +93,14 @@ class FanFictionNetSiteAdapter(BaseSiteAdapter):
|
|||
# logger.debug("post-url:%s"%url)
|
||||
return url
|
||||
|
||||
@classmethod
|
||||
def get_url_search(cls,url):
|
||||
regexp = super(getClass(), cls).get_url_search(url)
|
||||
regexp = re.sub(r"^(?P<keep>.*net/s/\d+/\d+/)(?P<urltitle>[^\$]*)?",
|
||||
r"\g<keep>(.*)",regexp)
|
||||
logger.debug(regexp)
|
||||
return regexp
|
||||
|
||||
def getSiteURLPattern(self):
|
||||
return self._get_site_url_pattern()
|
||||
|
||||
|
|
@ -102,6 +110,31 @@ class FanFictionNetSiteAdapter(BaseSiteAdapter):
|
|||
return re.sub(r"https?://(www|m)\.(?P<keep>fanfiction\.net/s/\d+/\d+/).*",
|
||||
r"https://www.\g<keep>",url)+self.urltitle
|
||||
|
||||
def get_request(self,url,usecache=True):
|
||||
## use super version if not set or isn't a chapter URL with a
|
||||
## title.
|
||||
if( not self.getConfig("try_shortened_title_urls") or
|
||||
not re.match(r"https?://www\.fanfiction\.net/s/\d+/\d+/(?P<title>[^/]+)$", url) ):
|
||||
return super(getClass(), self).get_request(url,usecache)
|
||||
|
||||
## kludgey way to attempt more than one URL variant by
|
||||
## removing title one letter at a time. Note that network and
|
||||
## open_pages_in_browser retries still happen first.
|
||||
titlelen = len(url.split('/')[-1])
|
||||
maxcut = min([4,titlelen])
|
||||
j = 0
|
||||
while j < maxcut: # should actually leave loop either by
|
||||
# return or exception raise.
|
||||
try:
|
||||
useurl = url
|
||||
if j: # j==0, full URL, then remove letters.
|
||||
useurl = url[:-j]
|
||||
return super(getClass(), self).get_request(useurl,usecache)
|
||||
except exceptions.HTTPErrorFFF as fffe:
|
||||
if j >= maxcut or 'Page not found or expired' not in unicode(fffe):
|
||||
raise
|
||||
j = j+1
|
||||
|
||||
def doExtractChapterUrlsAndMetadata(self,get_cover=True):
|
||||
|
||||
# fetch the chapter. From that we will get almost all the
|
||||
|
|
@ -134,7 +167,7 @@ class FanFictionNetSiteAdapter(BaseSiteAdapter):
|
|||
## the first chapter. It generates another server request and
|
||||
## doesn't seem to be needed lately, so now default it to off.
|
||||
try:
|
||||
chapcount = len(soup.find('select', { 'name' : 'chapter' } ).findAll('option'))
|
||||
chapcount = len(soup.find('select', { 'name' : 'chapter' } ).find_all('option'))
|
||||
# get chapter part of url.
|
||||
except:
|
||||
chapcount = 1
|
||||
|
|
@ -179,7 +212,7 @@ class FanFictionNetSiteAdapter(BaseSiteAdapter):
|
|||
## For 1, use the second link.
|
||||
## For 2, fetch the crossover page and pull the two categories from there.
|
||||
pre_links = soup.find('div',{'id':'pre_story_links'})
|
||||
categories = pre_links.findAll('a',{'class':'xcontrast_txt'})
|
||||
categories = pre_links.find_all('a',{'class':'xcontrast_txt'})
|
||||
#print("xcontrast_txt a:%s"%categories)
|
||||
if len(categories) > 1:
|
||||
# Strangely, the ones with *two* links are the
|
||||
|
|
@ -218,7 +251,7 @@ class FanFictionNetSiteAdapter(BaseSiteAdapter):
|
|||
|
||||
|
||||
grayspan = gui_table1i.find('span', {'class':'xgray xcontrast_txt'})
|
||||
# for b in grayspan.findAll('button'):
|
||||
# for b in grayspan.find_all('button'):
|
||||
# b.extract()
|
||||
metatext = stripHTML(grayspan).replace('Hurt/Comfort','Hurt-Comfort')
|
||||
#logger.debug("metatext:(%s)"%metatext)
|
||||
|
|
@ -257,7 +290,7 @@ class FanFictionNetSiteAdapter(BaseSiteAdapter):
|
|||
|
||||
# Updated: <span data-xutime='1368059198'>5/8</span> - Published: <span data-xutime='1278984264'>7/12/2010</span>
|
||||
# Published: <span data-xutime='1384358726'>8m ago</span>
|
||||
dates = soup.findAll('span',{'data-xutime':re.compile(r'^\d+$')})
|
||||
dates = soup.find_all('span',{'data-xutime':re.compile(r'^\d+$')})
|
||||
if len(dates) > 1 :
|
||||
# updated get set to the same as published upstream if not found.
|
||||
self.story.setMetadata('dateUpdated',datetime.fromtimestamp(float(dates[0]['data-xutime'])))
|
||||
|
|
@ -308,11 +341,10 @@ class FanFictionNetSiteAdapter(BaseSiteAdapter):
|
|||
img = soup.select_one('img.lazy.cimage')
|
||||
cover_url=img['data-original']
|
||||
except:
|
||||
img = soup.select_one('img.cimage:not(.lazy)')
|
||||
if img:
|
||||
cover_url=img['src']
|
||||
## Nov 19, 2020, ffnet lazy cover images returning 0 byte
|
||||
## files.
|
||||
## Nov 2023 - src is always "/static/images/d_60_90.jpg" now
|
||||
## Only take cover if there's data-original
|
||||
## Primary motivator is to prevent unneeded author page hits.
|
||||
pass
|
||||
logger.debug("cover_url:%s"%cover_url)
|
||||
|
||||
authimg_url = ""
|
||||
|
|
@ -363,7 +395,7 @@ class FanFictionNetSiteAdapter(BaseSiteAdapter):
|
|||
# no selector found, so it's a one-chapter story.
|
||||
self.add_chapter(self.story.getMetadata('title'),url)
|
||||
else:
|
||||
allOptions = select.findAll('option')
|
||||
allOptions = select.find_all('option')
|
||||
for o in allOptions:
|
||||
## title URL will be put back on chapter URL during
|
||||
## normalize_chapterurl() anyway, but also here for
|
||||
|
|
|
|||
157
fanficfare/adapters/adapter_fanfictionsfr.py
Normal file
157
fanficfare/adapters/adapter_fanfictionsfr.py
Normal file
|
|
@ -0,0 +1,157 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2024 FanFicFare team
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
|
||||
from __future__ import absolute_import
|
||||
import io
|
||||
import logging
|
||||
import re
|
||||
import zipfile
|
||||
|
||||
from bs4 import BeautifulSoup
|
||||
# py2 vs py3 transition
|
||||
|
||||
from .base_adapter import BaseSiteAdapter, makeDate
|
||||
from fanficfare.htmlcleanup import stripHTML
|
||||
from .. import exceptions as exceptions
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def getClass():
|
||||
return FanfictionsFrSiteAdapter
|
||||
|
||||
|
||||
class FanfictionsFrSiteAdapter(BaseSiteAdapter):
|
||||
def __init__(self, config, url):
|
||||
BaseSiteAdapter.__init__(self, config, url)
|
||||
self.story.setMetadata('siteabbrev', 'fanfictionsfr')
|
||||
self.story.setMetadata('langcode','fr')
|
||||
self.story.setMetadata('language','Français')
|
||||
|
||||
# get storyId from url--url validation guarantees query correct
|
||||
match = re.match(self.getSiteURLPattern(), url)
|
||||
if not match:
|
||||
raise exceptions.InvalidStoryURL(url, self.getSiteDomain(), self.getSiteExampleURLs())
|
||||
|
||||
story_id = match.group('id')
|
||||
self.story.setMetadata('storyId', story_id)
|
||||
fandom_name = match.group('fandom')
|
||||
|
||||
self._setURL('https://%s/fanfictions/%s/%s/chapters.html' % (self.getSiteDomain(), fandom_name, story_id))
|
||||
|
||||
@staticmethod
|
||||
def getSiteDomain():
|
||||
return 'www.fanfictions.fr'
|
||||
|
||||
@classmethod
|
||||
def getSiteExampleURLs(cls):
|
||||
return 'https://%s/fanfictions/fandom/fanfiction-id/chapters.html' % cls.getSiteDomain()
|
||||
|
||||
def getSiteURLPattern(self):
|
||||
return r'https?://(?:www\.)?fanfictions\.fr/fanfictions/(?P<fandom>[^/]+)/(?P<id>[^/]+)(/chapters.html)?'
|
||||
|
||||
def extractChapterUrlsAndMetadata(self):
|
||||
logger.debug('URL: %s', self.url)
|
||||
|
||||
data = self.get_request(self.url)
|
||||
soup = self.make_soup(data)
|
||||
|
||||
# detect if the fanfiction is 'suspended' (chapters unavailable)
|
||||
alert_div = soup.find('div', id='alertInactiveFic')
|
||||
if alert_div:
|
||||
raise exceptions.FailedToDownload("Failed to download the fanfiction, most likely because it is suspended.")
|
||||
|
||||
title_element = soup.find('h1', itemprop='name')
|
||||
self.story.setMetadata('title', stripHTML(title_element))
|
||||
|
||||
author_div = soup.find('div', itemprop='author')
|
||||
author_name = stripHTML(author_div.a)
|
||||
author_id = author_div.a['href'].split('/')[-1].replace('.html', '')
|
||||
|
||||
self.story.setMetadata('author', author_name)
|
||||
self.story.setMetadata('authorId', author_id)
|
||||
|
||||
published_date_element = soup.find('span', class_='date-distance')
|
||||
published_date_text = published_date_element['data-date']
|
||||
published_date = makeDate(published_date_text, '%Y-%m-%d %H:%M:%S')
|
||||
if published_date:
|
||||
self.story.setMetadata('datePublished', published_date)
|
||||
|
||||
status_element = soup.find('p', title="Statut de la fanfiction").find('span', class_='badge')
|
||||
french_status = stripHTML(status_element)
|
||||
status_translation = {
|
||||
"En cours": "In-Progress",
|
||||
"Terminée": "Completed",
|
||||
"One-shot": "Completed",
|
||||
}
|
||||
self.story.setMetadata('status', status_translation.get(french_status, french_status))
|
||||
|
||||
genre_elements = soup.find('div', title="Format et genres").find_all('span', class_="highlightable")
|
||||
self.story.extendList('genre', [ stripHTML(genre) for genre in genre_elements[1:] ])
|
||||
|
||||
category_elements = soup.find_all('li', class_="breadcrumb-item")
|
||||
self.story.extendList('category', [ stripHTML(category) for category in category_elements[-2].find_all('a') ])
|
||||
|
||||
first_description = soup.find('p', itemprop='abstract')
|
||||
self.setDescription(self.url, first_description)
|
||||
|
||||
chapter_cards = soup.find_all(class_=['card', 'chapter'])
|
||||
|
||||
for chapter_card in chapter_cards:
|
||||
chapter_title_tag = chapter_card.find('h2')
|
||||
if chapter_title_tag:
|
||||
chapter_title = stripHTML(chapter_title_tag)
|
||||
chapter_link = 'https://'+self.getSiteDomain()+chapter_title_tag.find('a')['href']
|
||||
|
||||
# Clean up the chapter title by replacing multiple spaces and newline characters with a single space
|
||||
chapter_title = re.sub(r'\s+', ' ', chapter_title)
|
||||
|
||||
self.add_chapter(chapter_title, chapter_link)
|
||||
|
||||
last_chapter_div = chapter_cards[-1]
|
||||
updated_date_element = last_chapter_div.find('span', class_='date-distance')
|
||||
last_chapter_update_date = updated_date_element['data-date']
|
||||
date = makeDate(last_chapter_update_date, '%Y-%m-%d %H:%M:%S')
|
||||
if date:
|
||||
self.story.setMetadata('dateUpdated', date)
|
||||
|
||||
|
||||
def getChapterText(self, url):
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
response, redirection_url = self.get_request_redirected(url)
|
||||
|
||||
if "telecharger_pdf.html" in redirection_url:
|
||||
with zipfile.ZipFile(io.BytesIO(response.encode('latin1'))) as z:
|
||||
# Assuming there's only one text file inside the zip
|
||||
file_list = z.namelist()
|
||||
if len(file_list) != 1:
|
||||
raise exceptions.FailedToDownload("Error downloading Chapter: %s! Zip file should contain exactly one text file!" % url)
|
||||
text_filename = file_list[0]
|
||||
with z.open(text_filename) as text_file:
|
||||
# Decode the text file with windows-1252 encoding
|
||||
text = text_file.read().decode('windows-1252')
|
||||
return text.replace("\r\n", "<br>\r\n")
|
||||
else:
|
||||
soup = self.make_soup(response)
|
||||
|
||||
div_content = soup.find('div', id='readarea')
|
||||
if div_content is None:
|
||||
raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url)
|
||||
|
||||
return self.utf8FromSoup(url, div_content)
|
||||
|
|
@ -134,7 +134,7 @@ class FanFiktionDeAdapter(BaseSiteAdapter):
|
|||
self.story.setMetadata('author',stripHTML(a))
|
||||
|
||||
# Find the chapters:
|
||||
for chapter in soup.find('select').findAll('option'):
|
||||
for chapter in soup.find('select').find_all('option'):
|
||||
self.add_chapter(chapter,'https://'+self.host+'/s/'+self.story.getMetadata('storyId')+'/'+chapter['value'])
|
||||
|
||||
## title="Wörter" failed with max_zalgo:1
|
||||
|
|
@ -163,11 +163,11 @@ class FanFiktionDeAdapter(BaseSiteAdapter):
|
|||
except e:
|
||||
logger.debug("Failed to find native status:%s"%e)
|
||||
|
||||
if head.find('span',title='Fertiggestellt'):
|
||||
if head.find('span',title='fertiggestellt'):
|
||||
self.story.setMetadata('status', 'Completed')
|
||||
elif head.find('span',title='Pausiert'):
|
||||
elif head.find('span',title='pausiert'):
|
||||
self.story.setMetadata('status', 'Paused')
|
||||
elif head.find('span',title='Abgebrochen'):
|
||||
elif head.find('span',title='abgebrochen'):
|
||||
self.story.setMetadata('status', 'Cancelled')
|
||||
else:
|
||||
self.story.setMetadata('status', 'In-Progress')
|
||||
|
|
@ -181,13 +181,13 @@ class FanFiktionDeAdapter(BaseSiteAdapter):
|
|||
|
||||
# #find metadata on the author's page
|
||||
# asoup = self.make_soup(self.get_request("https://"+self.getSiteDomain()+"?a=q&a1=v&t=nickdetailsstories&lbi=stories&ar=0&nick="+self.story.getMetadata('authorId')))
|
||||
# tr=asoup.findAll('tr')
|
||||
# tr=asoup.find_all('tr')
|
||||
# for i in range(1,len(tr)):
|
||||
# a = tr[i].find('a')
|
||||
# if '/s/'+self.story.getMetadata('storyId')+'/1/' in a['href']:
|
||||
# break
|
||||
|
||||
# td = tr[i].findAll('td')
|
||||
# td = tr[i].find_all('td')
|
||||
# self.story.addToList('category',stripHTML(td[2]))
|
||||
# self.story.setMetadata('rating', stripHTML(td[5]))
|
||||
# self.story.setMetadata('numWords', stripHTML(td[6]))
|
||||
|
|
@ -204,7 +204,7 @@ class FanFiktionDeAdapter(BaseSiteAdapter):
|
|||
soup = self.make_soup(self.get_request(url))
|
||||
|
||||
div = soup.find('div', {'id' : 'storytext'})
|
||||
for a in div.findAll('script'):
|
||||
for a in div.find_all('script'):
|
||||
a.extract()
|
||||
|
||||
if None == div:
|
||||
|
|
|
|||
|
|
@ -1,168 +0,0 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
# Copyright 2018 FanFicFare team
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
####################################################################################################
|
||||
### Adapted by Rikkit on November 7. 2017
|
||||
###=================================================================================================
|
||||
### Tested with Calibre
|
||||
####################################################################################################
|
||||
|
||||
from __future__ import absolute_import
|
||||
import logging
|
||||
import re
|
||||
# py2 vs py3 transition
|
||||
|
||||
from .base_adapter import BaseSiteAdapter, makeDate
|
||||
|
||||
from ..htmlcleanup import stripHTML
|
||||
from .. import exceptions as exceptions
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
def getClass():
|
||||
''' Initializing the class '''
|
||||
return FastNovelNetAdapter
|
||||
|
||||
class FastNovelNetAdapter(BaseSiteAdapter):
|
||||
''' Adapter for FASTNOVEL.net '''
|
||||
def __init__(self, config, url):
|
||||
BaseSiteAdapter.__init__(self, config, url)
|
||||
|
||||
self.story.setMetadata('siteabbrev', 'fstnvl')
|
||||
|
||||
self.dateformat = '%d/%m/%Y'
|
||||
|
||||
# get storyId from url--url validation guarantees query correct
|
||||
match = re.match(self.getSiteURLPattern(), url)
|
||||
if not match:
|
||||
raise exceptions.InvalidStoryURL(url, self.getSiteDomain(), self.getSiteExampleURLs())
|
||||
|
||||
story_id = match.group('id')
|
||||
self.story.setMetadata('storyId', story_id)
|
||||
self._setURL('https://%s/%s/' % (self.getSiteDomain(), story_id))
|
||||
|
||||
|
||||
@staticmethod
|
||||
def getSiteDomain():
|
||||
return 'fastnovels.net'
|
||||
|
||||
@classmethod
|
||||
def getAcceptDomains(cls):
|
||||
return [cls.getSiteDomain(),'fastnovel.net']
|
||||
|
||||
@classmethod
|
||||
def getConfigSections(cls):
|
||||
"Only needs to be overriden if has additional ini sections."
|
||||
return [cls.getConfigSection(),'fastnovel.net']
|
||||
|
||||
@classmethod
|
||||
def getSiteExampleURLs(cls):
|
||||
return "https://fastnovels.net/a-story-name-id"
|
||||
|
||||
def getSiteURLPattern(self):
|
||||
# https://fastnovels.net/ultimate-scheming-system-158/
|
||||
# also accept fastnovel.net
|
||||
return r"https?://fastnovels?\.net/(?P<id>[^/]+)"
|
||||
|
||||
## Normalized chapter URLs by changing old titlenum part to be
|
||||
## same as storyId.
|
||||
def normalize_chapterurl(self,url):
|
||||
# https://fastnovels.net/cultivation-chat-group8-29/chapter-25206.html
|
||||
return re.sub(r"\.net/.*(?P<keep>/chapter-\d+.html)",
|
||||
r".net/"+self.story.getMetadata('storyId')+r"\g<keep>",url)
|
||||
|
||||
def extractChapterUrlsAndMetadata(self):
|
||||
logger.debug('URL: %s', self.url)
|
||||
|
||||
(data,rurl) = self.get_request_redirected(self.url)
|
||||
if rurl != self.url:
|
||||
match = re.match(self.getSiteURLPattern(), rurl)
|
||||
if not match:
|
||||
## shouldn't happen, but in case it does...
|
||||
raise exceptions.InvalidStoryURL(url, self.getSiteDomain(), self.getSiteExampleURLs())
|
||||
|
||||
story_id = match.group('id')
|
||||
self.story.setMetadata('storyId', story_id)
|
||||
self._setURL('https://%s/%s/' % (self.getSiteDomain(), story_id))
|
||||
logger.debug("set to redirected url:%s"%self.url)
|
||||
|
||||
soup = self.make_soup(data)
|
||||
|
||||
self.story.setMetadata('title', soup.find('h1').string)
|
||||
|
||||
for li in soup.select('.meta-data li'):
|
||||
label = li.select_one('label')
|
||||
if not label:
|
||||
continue
|
||||
|
||||
if label.string == "Author:":
|
||||
for a in li.select('a'):
|
||||
self.story.setMetadata('authorId', a["href"].split('/')[2])
|
||||
self.story.setMetadata('authorUrl','https://'+self.host+a["href"])
|
||||
self.story.setMetadata('author', a["title"])
|
||||
|
||||
if label.string == "Genre:":
|
||||
for a in li.select('a'):
|
||||
self.story.addToList('genre',a["title"])
|
||||
|
||||
if label.string == "Status:":
|
||||
if li.select_one('strong').string.strip() == "Completed":
|
||||
self.story.setMetadata('status', 'Completed')
|
||||
else:
|
||||
self.story.setMetadata('status', 'In-Progress')
|
||||
|
||||
if label.string == "Last updated:":
|
||||
dateUpd = label.next_sibling.strip()
|
||||
self.story.setMetadata('dateUpdated', makeDate(stripHTML(dateUpd), self.dateformat))
|
||||
|
||||
coverurl = soup.select_one('div.book-cover')["data-original"]
|
||||
if coverurl != "https://fastnovels.net/images/novel/default.jpg":
|
||||
self.setCoverImage(self.url, coverurl)
|
||||
|
||||
tags = soup.select_one('.tags')
|
||||
if tags:
|
||||
for a in tags.select("li.tag-item a"):
|
||||
self.story.addToList('tags', a["title"])
|
||||
# extract tags, because it inside description
|
||||
tags.extract()
|
||||
|
||||
self.setDescription(self.url, soup.select_one('div.content p'))
|
||||
|
||||
## number from end of storyId, taken this way in case it changes.
|
||||
# <input id="film_id" type="hidden" value="10667">
|
||||
film_id = soup.select_one('input#post_id')['value']
|
||||
ch_data = self.post_request('https://'+self.host+'/',
|
||||
parameters={'id': film_id,
|
||||
'list_postdata': '1'})
|
||||
# logger.debug(ch_data)
|
||||
ch_soup = self.make_soup(ch_data)
|
||||
# logger.debug(ch_soup)
|
||||
# for book in soup.select("#list-chapters .book"):
|
||||
# volume = book.select_one('.title a').string
|
||||
for a in ch_soup.select(".list-chapters a.chapter"):
|
||||
# title = volume + " " + stripHTML(a)
|
||||
title = stripHTML(a)
|
||||
self.add_chapter(title, 'https://' + self.host + a["href"])
|
||||
|
||||
def getChapterText(self, url):
|
||||
data = self.get_request(url)
|
||||
soup = self.make_soup(data)
|
||||
|
||||
story = soup.select_one('#chapter-body')
|
||||
if not story:
|
||||
raise exceptions.FailedToDownload(
|
||||
"Error downloading Chapter: %s! Missing required element!" % url)
|
||||
|
||||
return self.utf8FromSoup(url, story)
|
||||
|
|
@ -15,16 +15,16 @@
|
|||
# limitations under the License.
|
||||
#
|
||||
|
||||
from __future__ import absolute_import
|
||||
import datetime
|
||||
from __future__ import absolute_import,unicode_literals
|
||||
# import datetime
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import json
|
||||
import re
|
||||
from .. import translit
|
||||
# from .. import translit
|
||||
|
||||
|
||||
from ..htmlcleanup import stripHTML
|
||||
from .. import exceptions as exceptions
|
||||
from .. import exceptions# as exceptions
|
||||
|
||||
# py2 vs py3 transition
|
||||
|
||||
|
|
@ -58,7 +58,7 @@ class FicBookNetAdapter(BaseSiteAdapter):
|
|||
|
||||
# The date format will vary from site to site.
|
||||
# http://docs.python.org/library/datetime.html#strftime-strptime-behavior
|
||||
self.dateformat = "%d %m %Y"
|
||||
self.dateformat = u"%d %m %Y г., %H:%M"
|
||||
|
||||
@staticmethod # must be @staticmethod, don't remove it.
|
||||
def getSiteDomain():
|
||||
|
|
@ -67,17 +67,33 @@ class FicBookNetAdapter(BaseSiteAdapter):
|
|||
|
||||
@classmethod
|
||||
def getSiteExampleURLs(cls):
|
||||
return "https://"+cls.getSiteDomain()+"/readfic/12345 https://"+cls.getSiteDomain()+"/readfic/93626/246417#part_content"
|
||||
return "https://"+cls.getSiteDomain()+"/readfic/12345 https://"+cls.getSiteDomain()+"/readfic/93626/246417#part_content https://"+cls.getSiteDomain()+"/readfic/578de1cd-a8b4-7ff1-aa49-750426508b82 https://"+cls.getSiteDomain()+"/readfic/578de1cd-a8b4-7ff1-aa49-750426508b82/94793742#part_content"
|
||||
|
||||
def getSiteURLPattern(self):
|
||||
return r"https?://"+re.escape(self.getSiteDomain()+"/readfic/")+r"\d+"
|
||||
return r"https?://"+re.escape(self.getSiteDomain()+"/readfic/")+r"[\d\-a-zA-Z]+"
|
||||
|
||||
def performLogin(self,url,data):
|
||||
params = {}
|
||||
if self.password:
|
||||
params['login'] = self.username
|
||||
params['password'] = self.password
|
||||
else:
|
||||
params['login'] = self.getConfig("username")
|
||||
params['password'] = self.getConfig("password")
|
||||
|
||||
logger.debug("Try to login in as (%s)" % params['login'])
|
||||
d = self.post_request('https://' + self.getSiteDomain() + '/login_check_static',params,usecache=False)
|
||||
|
||||
if 'Войти используя аккаунт на сайте' in d:
|
||||
raise exceptions.FailedToLogin(url,params['login'])
|
||||
|
||||
return True
|
||||
|
||||
## Getting the chapter list and the meta data, plus 'is adult' checking.
|
||||
def extractChapterUrlsAndMetadata(self):
|
||||
def extractChapterUrlsAndMetadata(self,get_cover=True):
|
||||
url=self.url
|
||||
logger.debug("URL: "+url)
|
||||
data = self.get_request(url)
|
||||
|
||||
soup = self.make_soup(data)
|
||||
|
||||
adult_div = soup.find('div',id='adultCoverWarning')
|
||||
|
|
@ -87,9 +103,11 @@ class FicBookNetAdapter(BaseSiteAdapter):
|
|||
else:
|
||||
raise exceptions.AdultCheckRequired(self.url)
|
||||
|
||||
|
||||
## Title
|
||||
a = soup.find('section',{'class':'chapter-info'}).find('h1')
|
||||
try:
|
||||
a = soup.find('section',{'class':'chapter-info'}).find('h1')
|
||||
except AttributeError:
|
||||
raise exceptions.FailedToDownload("Error collecting meta: %s! Missing required element!" % url)
|
||||
# kill '+' marks if present.
|
||||
sup = a.find('sup')
|
||||
if sup:
|
||||
|
|
@ -105,34 +123,6 @@ class FicBookNetAdapter(BaseSiteAdapter):
|
|||
self.story.setMetadata('author',a.text)
|
||||
logger.debug("Author: (%s)"%self.story.getMetadata('author'))
|
||||
|
||||
# Find the chapters:
|
||||
pubdate = None
|
||||
chapters = soup.find('ul', {'class' : 'list-of-fanfic-parts'})
|
||||
if chapters != None:
|
||||
for chapdiv in chapters.findAll('li', {'class':'part'}):
|
||||
chapter=chapdiv.find('a',href=re.compile(r'/readfic/'+self.story.getMetadata('storyId')+r"/\d+#part_content$"))
|
||||
churl='https://'+self.host+chapter['href']
|
||||
self.add_chapter(chapter,churl)
|
||||
|
||||
datespan = chapdiv.find('span')
|
||||
if pubdate == None and datespan:
|
||||
pubdate = translit.translit(stripHTML(datespan))
|
||||
update = translit.translit(stripHTML(datespan))
|
||||
else:
|
||||
self.add_chapter(self.story.getMetadata('title'),url)
|
||||
self.story.setMetadata('numChapters',1)
|
||||
pubdate=translit.translit(stripHTML(soup.find('div',{'class':'title-area'}).find('span')))
|
||||
update=pubdate
|
||||
|
||||
logger.debug("numChapters: (%s)"%self.story.getMetadata('numChapters'))
|
||||
|
||||
if not ',' in pubdate:
|
||||
pubdate=datetime.date.today().strftime(self.dateformat)
|
||||
if not ',' in update:
|
||||
update=datetime.date.today().strftime(self.dateformat)
|
||||
pubdate=pubdate.split(',')[0]
|
||||
update=update.split(',')[0]
|
||||
|
||||
fullmon = {"yanvarya":"01", u"января":"01",
|
||||
"fievralya":"02", u"февраля":"02",
|
||||
"marta":"03", u"марта":"03",
|
||||
|
|
@ -146,31 +136,50 @@ class FicBookNetAdapter(BaseSiteAdapter):
|
|||
"noyabrya":"11", u"ноября":"11",
|
||||
"diekabrya":"12", u"декабря":"12" }
|
||||
|
||||
for (name,num) in fullmon.items():
|
||||
if name in pubdate:
|
||||
pubdate = pubdate.replace(name,num)
|
||||
if name in update:
|
||||
update = update.replace(name,num)
|
||||
# Find the chapters:
|
||||
pubdate = None
|
||||
chapters = soup.find('ul', {'class' : 'list-of-fanfic-parts'})
|
||||
if chapters is not None:
|
||||
for chapdiv in chapters.find_all('li', {'class':'part'}):
|
||||
chapter=chapdiv.find('a',href=re.compile(r'/readfic/'+self.story.getMetadata('storyId')+r"/\d+#part_content$"))
|
||||
churl='https://'+self.host+chapter['href']
|
||||
|
||||
self.story.setMetadata('dateUpdated', makeDate(update, self.dateformat))
|
||||
self.story.setMetadata('datePublished', makeDate(pubdate, self.dateformat))
|
||||
# Find the chapter dates.
|
||||
date_str = chapdiv.find('span', {'title': True})['title'].replace(u"\u202fг. в", "")
|
||||
for month_name, month_num in fullmon.items():
|
||||
date_str = date_str.replace(month_name, month_num)
|
||||
chapterdate = makeDate(date_str,self.dateformat)
|
||||
self.add_chapter(chapter,churl,
|
||||
{'date':chapterdate.strftime(self.getConfig("datechapter_format",self.getConfig("datePublished_format",self.dateformat)))})
|
||||
|
||||
if pubdate is None and chapterdate:
|
||||
pubdate = chapterdate
|
||||
update = chapterdate
|
||||
else:
|
||||
self.add_chapter(self.story.getMetadata('title'),url)
|
||||
date_str = soup.find('div', {'class' : 'part-date'}).find('span', {'title': True})['title'].replace(u"\u202fг. в", "")
|
||||
for month_name, month_num in fullmon.items():
|
||||
date_str = date_str.replace(month_name, month_num)
|
||||
pubdate = update = makeDate(date_str,self.dateformat)
|
||||
|
||||
logger.debug("numChapters: (%s)"%self.story.getMetadata('numChapters'))
|
||||
|
||||
self.story.setMetadata('dateUpdated', update)
|
||||
self.story.setMetadata('datePublished', pubdate)
|
||||
self.story.setMetadata('language','Russian')
|
||||
|
||||
## after site change, I don't see word count anywhere.
|
||||
# pr=soup.find('a', href=re.compile(r'/printfic/\w+'))
|
||||
# pr='https://'+self.host+pr['href']
|
||||
# pr = self.make_soup(self.get_request(pr))
|
||||
# pr=pr.findAll('div', {'class' : 'part_text'})
|
||||
# i=0
|
||||
# for part in pr:
|
||||
# i=i+len(stripHTML(part).split(' '))
|
||||
# self.story.setMetadata('numWords', unicode(i))
|
||||
dlinfo = soup.select_one('header.d-flex.flex-column.gap-12.word-break')
|
||||
|
||||
|
||||
dlinfo = soup.find('div',{'class':'fanfic-main-info'})
|
||||
series_label = dlinfo.select_one('div.description.word-break').find('strong', string='Серия:')
|
||||
logger.debug('Series: %s'%str(series_label))
|
||||
if series_label:
|
||||
series_div = series_label.find_next_sibling("div")
|
||||
# No accurate series number as for that, additional request needs to be made
|
||||
self.setSeries(stripHTML(series_div.a), 1)
|
||||
self.story.setMetadata('seriesUrl','https://' + self.getSiteDomain() + series_div.a.get('href'))
|
||||
|
||||
i=0
|
||||
fandoms = dlinfo.find('div').findAll('a', href=re.compile(r'/fanfiction/\w+'))
|
||||
fandoms = dlinfo.select_one('div:not([class])').find_all('a', href=re.compile(r'/fanfiction/\w+'))
|
||||
for fandom in fandoms:
|
||||
self.story.addToList('category',fandom.string)
|
||||
i=i+1
|
||||
|
|
@ -179,13 +188,16 @@ class FicBookNetAdapter(BaseSiteAdapter):
|
|||
|
||||
tags = soup.find('div',{'class':'tags'})
|
||||
if tags:
|
||||
for genre in tags.findAll('a',href=re.compile(r'/tags/')):
|
||||
for genre in tags.find_all('a',href=re.compile(r'/tags/')):
|
||||
self.story.addToList('genre',stripHTML(genre))
|
||||
|
||||
logger.debug("category: (%s)"%self.story.getMetadata('category'))
|
||||
logger.debug("genre: (%s)"%self.story.getMetadata('genre'))
|
||||
|
||||
ratingdt = dlinfo.find('div',{'class':re.compile(r'badge-rating-.*')})
|
||||
self.story.setMetadata('rating', stripHTML(ratingdt.find('span')))
|
||||
|
||||
# meta=table.findAll('a', href=re.compile(r'/ratings/'))
|
||||
# meta=table.find_all('a', href=re.compile(r'/ratings/'))
|
||||
# i=0
|
||||
# for m in meta:
|
||||
# if i == 0:
|
||||
|
|
@ -203,7 +215,12 @@ class FicBookNetAdapter(BaseSiteAdapter):
|
|||
else:
|
||||
self.story.setMetadata('status', 'In-Progress')
|
||||
|
||||
paircharsdt = soup.find('strong',text='Пэйринг и персонажи:')
|
||||
try:
|
||||
self.story.setMetadata('universe', stripHTML(dlinfo.find('a', href=re.compile('/fandom_universe/'))))
|
||||
except AttributeError:
|
||||
pass
|
||||
|
||||
paircharsdt = soup.find('strong',string='Пэйринг и персонажи:')
|
||||
# site keeps both ships and indiv chars in /pairings/ links.
|
||||
if paircharsdt:
|
||||
for paira in paircharsdt.find_next('div').find_all('a', href=re.compile(r'/pairings/')):
|
||||
|
|
@ -216,8 +233,98 @@ class FicBookNetAdapter(BaseSiteAdapter):
|
|||
self.story.addToList('characters',stripHTML(paira))
|
||||
|
||||
summary=soup.find('div', itemprop='description')
|
||||
self.setDescription(url,summary)
|
||||
#self.story.setMetadata('description', summary.text)
|
||||
if summary:
|
||||
# Fix for the text not displaying properly
|
||||
summary['class'].append('part_text')
|
||||
self.setDescription(url,summary)
|
||||
#self.story.setMetadata('description', summary.text)
|
||||
|
||||
stats = soup.find('div', {'class':'hat-actions-container'})
|
||||
targetdata = stats.find_all('span', {'class' : 'main-info'})
|
||||
for data in targetdata:
|
||||
svg_class = data.find('svg')['class'][1] if data.find('svg') else None
|
||||
value = int(stripHTML(data)) if stripHTML(data).isdigit() else 0
|
||||
|
||||
if svg_class == 'ic_thumbs-up' and value > 0:
|
||||
self.story.setMetadata('likes', value)
|
||||
#logger.debug("likes: (%s)"%self.story.getMetadata('likes'))
|
||||
elif svg_class == 'ic_bubble-dark' and value > 0:
|
||||
self.story.setMetadata('reviews', value)
|
||||
#logger.debug("reviews: (%s)"%self.story.getMetadata('reviews'))
|
||||
elif svg_class == 'ic_bookmark' and value > 0:
|
||||
self.story.setMetadata('numCollections', value)
|
||||
logger.debug("numCollections: (%s)"%self.story.getMetadata('numCollections'))
|
||||
|
||||
# Grab the amount of pages and words
|
||||
targetpages = soup.find('strong',string='Размер:').find_next('div')
|
||||
if targetpages:
|
||||
targetpages_text = re.sub(r"(?<!\,)\s| ", "", targetpages.text, flags=re.UNICODE | re.MULTILINE)
|
||||
|
||||
pages_raw = re.search(r'(\d+)(?:страницы|страниц)', targetpages_text, re.UNICODE)
|
||||
pages = int(pages_raw.group(1))
|
||||
if pages > 0:
|
||||
self.story.setMetadata('pages', pages)
|
||||
logger.debug("pages: (%s)"%self.story.getMetadata('pages'))
|
||||
|
||||
numWords_raw = re.search(r"(\d+)(?:слова|слов)", targetpages_text, re.UNICODE)
|
||||
numWords = int(numWords_raw.group(1))
|
||||
if numWords > 0:
|
||||
self.story.setMetadata('numWords', numWords)
|
||||
logger.debug("numWords: (%s)"%self.story.getMetadata('numWords'))
|
||||
|
||||
# Grab FBN Category
|
||||
class_tag = soup.select_one('div[class^="badge-with-icon direction"]').find('span', {'class' : 'badge-text'}).text
|
||||
if class_tag:
|
||||
self.story.setMetadata('classification',class_tag)
|
||||
#logger.debug("classification: (%s)"%self.story.getMetadata('classification'))
|
||||
|
||||
# Find dedication.
|
||||
ded = soup.find('div', {'class' : 'js-public-beta-dedication'})
|
||||
if ded:
|
||||
ded['class'].append('part_text')
|
||||
self.story.setMetadata('dedication',ded)
|
||||
|
||||
# Find author comment
|
||||
comm = soup.find('div', {'class' : 'js-public-beta-author-comment'})
|
||||
if comm:
|
||||
comm['class'].append('part_text')
|
||||
self.story.setMetadata('authorcomment',comm)
|
||||
|
||||
follows = stats.find('fanfic-follow-button')[':follow-count']
|
||||
if int(follows) > 0:
|
||||
self.story.setMetadata('follows', int(follows))
|
||||
logger.debug("follows: (%s)"%self.story.getMetadata('follows'))
|
||||
|
||||
# Grab the amount of awards
|
||||
numAwards = 0
|
||||
try:
|
||||
awards = soup.find('fanfic-reward-list')[':initial-fic-rewards-list']
|
||||
award_list = json.loads(awards)
|
||||
numAwards = int(len(award_list))
|
||||
# Grab the awards, but if multiple awards have the same name, only one will be kept; only an issue with hundreds of them.
|
||||
self.story.extendList('awards', [str(award['user_text']) for award in award_list])
|
||||
#logger.debug("awards (%s)"%self.story.getMetadata('awards'))
|
||||
except (TypeError, KeyError):
|
||||
logger.debug("Could not grab the awards")
|
||||
|
||||
if numAwards > 0:
|
||||
self.story.setMetadata('numAwards', numAwards)
|
||||
logger.debug("Num Awards (%s)"%self.story.getMetadata('numAwards'))
|
||||
|
||||
if get_cover:
|
||||
cover = soup.find('fanfic-cover', {'class':"jsVueComponent"})
|
||||
if cover is not None:
|
||||
self.setCoverImage(url,cover['src-original'])
|
||||
|
||||
def replace_formatting(self,tag):
|
||||
tname = tag.name
|
||||
## operating on plain text because BS4 is hard to work on
|
||||
## text with.
|
||||
## stripHTML() discards whitespace around other tags, like <i>
|
||||
txt = tag.get_text()
|
||||
txt = txt.replace("\n","<br/>")
|
||||
soup = self.make_soup("<"+tname+">"+txt+"</"+tname+">")
|
||||
return soup.find(tname)
|
||||
|
||||
# grab the text for an individual chapter.
|
||||
def getChapterText(self, url):
|
||||
|
|
@ -227,10 +334,60 @@ class FicBookNetAdapter(BaseSiteAdapter):
|
|||
soup = self.make_soup(self.get_request(url))
|
||||
|
||||
chapter = soup.find('div', {'id' : 'content'})
|
||||
if chapter == None: ## still needed?
|
||||
if chapter is None: ## still needed?
|
||||
chapter = soup.find('div', {'class' : 'public_beta_disabled'})
|
||||
|
||||
if None == chapter:
|
||||
if chapter is None:
|
||||
raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url)
|
||||
|
||||
## ficbook uses weird CSS white-space: pre-wrap; for
|
||||
## paragraphing. Doesn't work with txt output
|
||||
if 'part_text' in chapter['class'] and self.getConfig('replace_text_formatting'):
|
||||
## copy classes, except part_text
|
||||
divclasses = chapter['class']
|
||||
divclasses.remove('part_text')
|
||||
chapter = self.replace_formatting(chapter)
|
||||
chapter['class'] = divclasses
|
||||
|
||||
exclude_notes=self.getConfigList('exclude_notes')
|
||||
if 'headnotes' not in exclude_notes:
|
||||
# Find the headnote
|
||||
head_note = soup.select_one("div.part-comment-top div.js-public-beta-comment-before")
|
||||
if head_note:
|
||||
# Create the structure for the headnote
|
||||
head_notes_div_tag = soup.new_tag('div', attrs={'class': 'fff_chapter_notes fff_head_notes'})
|
||||
head_b_tag = soup.new_tag('b')
|
||||
head_b_tag.string = 'Примечания:'
|
||||
if 'text-preline' in head_note['class'] and self.getConfig('replace_text_formatting'):
|
||||
head_blockquote_tag = self.replace_formatting(head_note)
|
||||
head_blockquote_tag.name = 'blockquote'
|
||||
else:
|
||||
head_blockquote_tag = soup.new_tag('blockquote')
|
||||
head_blockquote_tag.string = stripHTML(head_note)
|
||||
head_notes_div_tag.append(head_b_tag)
|
||||
head_notes_div_tag.append(head_blockquote_tag)
|
||||
# Prepend the headnotes to the chapter, <hr> to mimic the site
|
||||
chapter.insert(0, head_notes_div_tag)
|
||||
chapter.insert(1, soup.new_tag('hr'))
|
||||
|
||||
if 'footnotes' not in exclude_notes:
|
||||
# Find the endnote
|
||||
end_note = soup.select_one("div.part-comment-bottom div.js-public-beta-comment-after")
|
||||
if end_note:
|
||||
# Create the structure for the footnote
|
||||
end_notes_div_tag = soup.new_tag('div', attrs={'class': 'fff_chapter_notes fff_foot_notes'})
|
||||
end_b_tag = soup.new_tag('b')
|
||||
end_b_tag.string = 'Примечания:'
|
||||
if 'text-preline' in end_note['class'] and self.getConfig('replace_text_formatting'):
|
||||
end_blockquote_tag = self.replace_formatting(end_note)
|
||||
end_blockquote_tag.name = 'blockquote'
|
||||
else:
|
||||
end_blockquote_tag = soup.new_tag('blockquote')
|
||||
end_blockquote_tag.string = stripHTML(end_note)
|
||||
end_notes_div_tag.append(end_b_tag)
|
||||
end_notes_div_tag.append(end_blockquote_tag)
|
||||
# Append the endnotes to the chapter, <hr> to mimic the site
|
||||
chapter.append(soup.new_tag('hr'))
|
||||
chapter.append(end_notes_div_tag)
|
||||
|
||||
return self.utf8FromSoup(url,chapter)
|
||||
|
|
|
|||
|
|
@ -177,7 +177,7 @@ class FictionAlleyArchiveOrgSiteAdapter(BaseSiteAdapter):
|
|||
elif key == 'Words':
|
||||
self.story.setMetadata('numWords',val)
|
||||
|
||||
summary = soup.find('dt',text='Story Summary:')
|
||||
summary = soup.find('dt',string='Story Summary:')
|
||||
if summary:
|
||||
summary = summary.find_next_sibling('dd')
|
||||
summary.name='div'
|
||||
|
|
@ -201,16 +201,16 @@ class FictionAlleyArchiveOrgSiteAdapter(BaseSiteAdapter):
|
|||
# epubutils.py
|
||||
# Yes, this still applies to fictionalley-archive.
|
||||
|
||||
for tag in chaptext.findAll('head') + chaptext.findAll('meta') + chaptext.findAll('script'):
|
||||
for tag in chaptext.find_all('head') + chaptext.find_all('meta') + chaptext.find_all('script'):
|
||||
tag.extract()
|
||||
|
||||
for tag in chaptext.findAll('body') + chaptext.findAll('html'):
|
||||
for tag in chaptext.find_all('body') + chaptext.find_all('html'):
|
||||
tag.name = 'div'
|
||||
|
||||
if self.getConfig('include_author_notes'):
|
||||
row = chaptext.find_previous_sibling('div',class_='row')
|
||||
logger.debug(row)
|
||||
andt = row.find('dt',text="Author's Note:")
|
||||
andt = row.find('dt',string="Author's Note:")
|
||||
logger.debug(andt)
|
||||
if andt:
|
||||
chaptext.insert(0,andt.parent.extract())
|
||||
|
|
|
|||
|
|
@ -235,7 +235,7 @@ class FictionHuntComSiteAdapter(BaseSiteAdapter):
|
|||
# logger.debug(data)
|
||||
self.story.setMetadata('title',stripHTML(soup.find('h1',{'class':'Story__title'})))
|
||||
|
||||
summhead = soup.find('h5',text='Summary')
|
||||
summhead = soup.find('h5',string='Summary')
|
||||
self.setDescription(url,summhead.find_next('div'))
|
||||
|
||||
## author:
|
||||
|
|
@ -244,12 +244,12 @@ class FictionHuntComSiteAdapter(BaseSiteAdapter):
|
|||
self.story.setMetadata('authorUrl',autha['href'])
|
||||
self.story.setMetadata('author',autha.string)
|
||||
|
||||
updlab = soup.find('label',text='Last Updated:')
|
||||
updlab = soup.find('label',string='Last Updated:')
|
||||
if updlab:
|
||||
update = updlab.find_next('time')['datetime']
|
||||
self.story.setMetadata('dateUpdated', makeDate(update, self.dateformat))
|
||||
|
||||
publab = soup.find('label',text='Published:')
|
||||
publab = soup.find('label',string='Published:')
|
||||
if publab:
|
||||
pubdate = publab.find_next('time')['datetime']
|
||||
self.story.setMetadata('datePublished', makeDate(pubdate, self.dateformat))
|
||||
|
|
@ -280,7 +280,7 @@ class FictionHuntComSiteAdapter(BaseSiteAdapter):
|
|||
# logger.debug(meta)
|
||||
|
||||
# Find original ffnet URL
|
||||
a = soup.find('a', text="Source")
|
||||
a = soup.find('a', string="Source")
|
||||
self.story.setMetadata('origin',stripHTML(a))
|
||||
self.story.setMetadata('originUrl',a['href'])
|
||||
|
||||
|
|
|
|||
|
|
@ -55,6 +55,8 @@ class FictionLiveAdapter(BaseSiteAdapter):
|
|||
self.story_id = self.parsedUrl.path.split('/')[3]
|
||||
self.story.setMetadata('storyId', self.story_id)
|
||||
|
||||
self.chapter_id_to_api = {}
|
||||
|
||||
# normalize URL. omits title in the url
|
||||
self._setURL("https://fiction.live/stories//{s_id}".format(s_id = self.story_id));
|
||||
|
||||
|
|
@ -91,6 +93,15 @@ class FictionLiveAdapter(BaseSiteAdapter):
|
|||
# doesn't use a timezone object and returns tz-naive datetimes. I *think* I can leave the rest to fanficfare
|
||||
return datetime.fromtimestamp(timestamp / 1000.0, None)
|
||||
|
||||
def img_url_trans(self,imgurl):
|
||||
"Apparently site changed cdn URLs for images more than once."
|
||||
# logger.debug("pre--imgurl:%s"%imgurl)
|
||||
imgurl = re.sub(r'(\w+)\.cloudfront\.net',r'cdn6.fiction.live/file/fictionlive',imgurl)
|
||||
imgurl = re.sub(r'www\.filepicker\.io/api/file/(\w+)',r'cdn4.fiction.live/fp/\1',imgurl)
|
||||
imgurl = re.sub(r'cdn[34].fiction.live/(.+)',r'cdn6.fiction.live/file/fictionlive/\1',imgurl)
|
||||
# logger.debug("post-imgurl:%s"%imgurl)
|
||||
return imgurl
|
||||
|
||||
def doExtractChapterUrlsAndMetadata(self, get_cover=True):
|
||||
|
||||
metadata_url = "https://fiction.live/api/node/{s_id}/"
|
||||
|
|
@ -162,7 +173,7 @@ class FictionLiveAdapter(BaseSiteAdapter):
|
|||
|
||||
tags = data['ta'] if 'ta' in data else []
|
||||
|
||||
if (self.story.getMetadata('rating') in {"nsfw", "adult"} or 'smut' in tags) and \
|
||||
if (self.story.getMetadataRaw('rating') in {"nsfw", "adult"} or 'smut' in tags) and \
|
||||
not (self.is_adult or self.getConfig("is_adult")):
|
||||
raise exceptions.AdultCheckRequired(self.url)
|
||||
|
||||
|
|
@ -230,6 +241,17 @@ class FictionLiveAdapter(BaseSiteAdapter):
|
|||
a, b = itertools.tee(iterable, 2)
|
||||
next(b, None)
|
||||
return list(zip(a, b))
|
||||
|
||||
def map_chap_ids_to_api(chapter_ids, route_ids, times):
|
||||
for index, bounds in enumerate(times):
|
||||
start, end = bounds
|
||||
end -= 1
|
||||
chapter_url = chunkrange_url.format(s_id = data['_id'], start = start, end = end)
|
||||
self.chapter_id_to_api[chapter_ids[index]] = chapter_url
|
||||
|
||||
for route_id in route_ids:
|
||||
chapter_url = route_chunkrange_url.format(c_id = route_id)
|
||||
self.chapter_id_to_api[route_id] = chapter_url
|
||||
|
||||
## first thing to do is seperate out the appendices
|
||||
appendices, maintext, routes = [], [], []
|
||||
|
|
@ -251,22 +273,25 @@ class FictionLiveAdapter(BaseSiteAdapter):
|
|||
## main-text chapter extraction processing. *should* now handle all the edge cases.
|
||||
## relies on fanficfare ignoring empty chapters!
|
||||
|
||||
titles = [c['title'] for c in maintext]
|
||||
titles = ["Home"] + titles
|
||||
titles = ["Home"] + [c['title'] for c in maintext]
|
||||
chapter_ids = ['home'] + [c['id'] for c in maintext]
|
||||
times = [data['ct']] + [c['ct'] for c in maintext] + [self.most_recent_chunk + 2] # need to be 1 over, and add_url etc does -1
|
||||
times = pair(times)
|
||||
|
||||
times = [c['ct'] for c in maintext]
|
||||
times = [data['ct']] + times + [self.most_recent_chunk + 2] # need to be 1 over, and add_url etc does -1
|
||||
if self.getConfig('include_appendices', True): # Add appendices after main text if desired
|
||||
titles = titles + ["Appendix: " + a['title'][9:] for a in appendices]
|
||||
chapter_ids = chapter_ids + [a['id'] for a in appendices]
|
||||
times = times + [(a['ct'], a['ct'] + 2) for a in appendices]
|
||||
|
||||
route_ids = [r['id'] for r in routes]
|
||||
|
||||
map_chap_ids_to_api(chapter_ids, route_ids, times) # Map chapter ids to API URLs for use when comparing the two
|
||||
|
||||
# doesn't actually run without the call to list.
|
||||
list(map(add_chapter_url, titles, pair(times)))
|
||||
|
||||
for a in appendices: # add appendices afterwards
|
||||
chapter_start = a['ct']
|
||||
chapter_title = "Appendix: " + a['title'][9:] # 'Appendix: ' rather than '#special' at beginning of name
|
||||
add_chapter_url(chapter_title, (chapter_start, chapter_start + 2)) # 1 msec range = this one chunk only
|
||||
list(map(add_chapter_url, titles, times))
|
||||
|
||||
for r in routes: # add route at the end, after appendices
|
||||
route_id = r['id'] # to get route chapter content, the route id is needed, not the timestamp
|
||||
route_id = r['id'] # to get route chapter content, the route id is needed, not the timestamp
|
||||
chapter_title = "Route: " + r['title'] # 'Route: ' at beginning of name, since it's a multiroute chapter
|
||||
add_route_chapter_url(chapter_title, route_id)
|
||||
|
||||
|
|
@ -296,7 +321,7 @@ class FictionLiveAdapter(BaseSiteAdapter):
|
|||
|
||||
text += "<div>" # chapter chunks aren't always well-delimited in their contents
|
||||
|
||||
# appendix chunks are mixed in with other things
|
||||
# appendix chunks are mixed in with other things
|
||||
if not getting_appendix and 't' in chunk and chunk['t'].startswith("#special"): # t = title = bookmark
|
||||
continue
|
||||
|
||||
|
|
@ -314,7 +339,7 @@ class FictionLiveAdapter(BaseSiteAdapter):
|
|||
## soup to repair the most egregious HTML errors.
|
||||
return self.utf8FromSoup(url,self.make_soup(text))
|
||||
|
||||
### everything from here out is chunk data handling.
|
||||
### everything from here out is chunk data handling.
|
||||
|
||||
def format_chapter(self, chunk):
|
||||
"""Handles any formatting in the chapter body text for text chapters.
|
||||
|
|
@ -329,7 +354,7 @@ class FictionLiveAdapter(BaseSiteAdapter):
|
|||
if self.achievements:
|
||||
soup = self.append_achievments(soup)
|
||||
|
||||
return str(soup)
|
||||
return str(soup)
|
||||
|
||||
def add_spoiler_legends(self, soup):
|
||||
# find spoiler links and change link-anchor block to legend block
|
||||
|
|
@ -409,7 +434,7 @@ class FictionLiveAdapter(BaseSiteAdapter):
|
|||
# so let's just ignore non-int values here
|
||||
if not isinstance(v, int):
|
||||
continue
|
||||
if 0 <= v <= len(choices):
|
||||
if 0 <= v < len(choices):
|
||||
output[v] += 1
|
||||
return output
|
||||
|
||||
|
|
@ -493,8 +518,10 @@ class FictionLiveAdapter(BaseSiteAdapter):
|
|||
# now matches the site and does *not* include dicerolls as posts!
|
||||
num_votes = str(len(posts)) + " posts" if len(posts) != 0 else "be the first to post."
|
||||
|
||||
posts_title = chunk['b'] if 'b' in chunk else "Reader Posts"
|
||||
|
||||
output = ""
|
||||
output += u"<h4><span>Reader Posts — <small> Posting " + closed
|
||||
output += u"<h4><span>" + posts_title + " — <small> Posting " + closed
|
||||
output += u" — " + num_votes + "</small></span></h4>\n"
|
||||
|
||||
## so. a voter can roll with their post. these rolls are in a seperate dict, but have the **same uid**.
|
||||
|
|
@ -520,6 +547,35 @@ class FictionLiveAdapter(BaseSiteAdapter):
|
|||
|
||||
return output
|
||||
|
||||
def normalize_chapterurl(self, url):
|
||||
if url.startswith(r'https://fiction.live/api/anonkun/chapters'):
|
||||
return url
|
||||
|
||||
pattern = None
|
||||
|
||||
if url.startswith(r'https://fiction.live/api/anonkun/route'):
|
||||
pattern = r"https?://(?:beta\.)?fiction\.live/[^/]*/[^/]*/[a-zA-Z0-9]+/routes/([a-zA-Z0-9]+)"
|
||||
elif url.startswith(r'https://fiction.live/'):
|
||||
pattern = r"https?://(?:beta\.)?fiction\.live/[^/]*/[^/]*/[a-zA-Z0-9]+/[^/]*(/[a-zA-Z0-9]+|home)"
|
||||
# regex101 rocks
|
||||
|
||||
if not pattern:
|
||||
return url
|
||||
|
||||
match = re.match(pattern, url)
|
||||
if not match:
|
||||
return url
|
||||
|
||||
chapter_id = match.group(1)
|
||||
|
||||
if chapter_id.startswith('/'):
|
||||
chapter_id = chapter_id[1:]
|
||||
|
||||
if chapter_id and chapter_id in self.chapter_id_to_api:
|
||||
return self.chapter_id_to_api[chapter_id]
|
||||
|
||||
return url
|
||||
|
||||
def format_unknown(self, chunk):
|
||||
raise NotImplementedError("Unknown chunk type ({}) in fiction.live story.".format(chunk))
|
||||
|
||||
|
|
@ -534,5 +590,5 @@ class FictionLiveAdapter(BaseSiteAdapter):
|
|||
|
||||
# TODO: support chapter urls for single-chapter / chapter-range downloads
|
||||
# complicated -- urls for getChapterText are API urls generated by add_chapters, not the public/website ones
|
||||
# in particular, may need more API reversing to figure out how to get the *end* of the chunk range
|
||||
# in particular, may need more API reversing to figure out how to get the *end* of the chunk range
|
||||
# find in 'bm' in the metadata?
|
||||
|
|
|
|||
|
|
@ -23,7 +23,7 @@ class FictionManiaTVAdapter(BaseSiteAdapter):
|
|||
SITE_ABBREVIATION = 'fmt'
|
||||
SITE_DOMAIN = 'fictionmania.tv'
|
||||
|
||||
BASE_URL = 'http://' + SITE_DOMAIN + '/stories/'
|
||||
BASE_URL = 'https://' + SITE_DOMAIN + '/stories/'
|
||||
READ_TEXT_STORY_URL_TEMPLATE = BASE_URL + 'readtextstory.html?storyID=%s'
|
||||
DETAILS_URL_TEMPLATE = BASE_URL + 'details.html?storyID=%s'
|
||||
|
||||
|
|
@ -40,10 +40,6 @@ class FictionManiaTVAdapter(BaseSiteAdapter):
|
|||
self._setURL(self.READ_TEXT_STORY_URL_TEMPLATE % story_id)
|
||||
self.story.setMetadata('siteabbrev', self.SITE_ABBREVIATION)
|
||||
|
||||
# Always single chapters, probably should use the Anthology feature to
|
||||
# merge chapters of a story
|
||||
self.story.setMetadata('numChapters', 1)
|
||||
|
||||
@staticmethod
|
||||
def getSiteDomain():
|
||||
return FictionManiaTVAdapter.SITE_DOMAIN
|
||||
|
|
@ -53,7 +49,7 @@ class FictionManiaTVAdapter(BaseSiteAdapter):
|
|||
return cls.READ_TEXT_STORY_URL_TEMPLATE % 1234
|
||||
|
||||
def getSiteURLPattern(self):
|
||||
return r'https?' + re.escape(self.BASE_URL[len('http'):]) + r'(readtextstory|readhtmlstory|readxstory|details)\.html\?storyID=\d+$'
|
||||
return r'https?' + re.escape(self.BASE_URL[len('https'):]) + r'(readtextstory|readhtmlstory|readxstory|details)\.html\?storyID=\d+$'
|
||||
|
||||
def extractChapterUrlsAndMetadata(self):
|
||||
url = self.DETAILS_URL_TEMPLATE % self.story.getMetadata('storyId')
|
||||
|
|
@ -167,14 +163,30 @@ class FictionManiaTVAdapter(BaseSiteAdapter):
|
|||
# <div style="margin-left:10ex;margin-right:10ex">
|
||||
## fetching SWI version now instead of text.
|
||||
htmlurl = url.replace('readtextstory','readhtmlstory')
|
||||
soup = self.make_soup(self.get_request(htmlurl))
|
||||
div = soup.find('div',style="margin-left:10ex;margin-right:10ex")
|
||||
if div:
|
||||
return self.utf8FromSoup(htmlurl,div)
|
||||
else:
|
||||
## Used to find by style, but it's inconsistent now. we've seen:
|
||||
## margin-left:10ex;margin-right:10ex
|
||||
## margin-right: 5%; margin-left: 5%
|
||||
## margin-left:5%; margin-right:5%
|
||||
## margin-left:5%; margin-right:5%; background: white
|
||||
## And there's some without a <div> tag (or an unclosed div)
|
||||
## Only the comments appear to be consistent.
|
||||
beginmarker='<!--Read or display the file-->'
|
||||
endmarker='''<hr size=1 noshade>
|
||||
<!--review add read, top and bottom-->
|
||||
'''
|
||||
data = self.get_request(htmlurl)
|
||||
try:
|
||||
## if both markers are found, assume whatever is in between
|
||||
## is the chapter text.
|
||||
soup = self.make_soup(data[data.index(beginmarker):data.index(endmarker)])
|
||||
return self.utf8FromSoup(htmlurl,soup)
|
||||
except Exception as e:
|
||||
# logger.debug(e)
|
||||
# logger.debug(soup)
|
||||
logger.debug("Story With Images(SWI) not found, falling back to HTML.")
|
||||
|
||||
## fetching html version now instead of text.
|
||||
## Note that html and SWI pages are *not* formatted the same.
|
||||
soup = self.make_soup(self.get_request(url.replace('readtextstory','readxstory')))
|
||||
# logger.debug(soup)
|
||||
|
||||
|
|
|
|||
|
|
@ -66,7 +66,8 @@ class FicwadComSiteAdapter(BaseSiteAdapter):
|
|||
params['username']))
|
||||
d = self.post_request(loginUrl,params,usecache=False)
|
||||
|
||||
if "Login attempt failed..." in d:
|
||||
if "Login attempt failed..." in d or \
|
||||
'<div id="error">Please enter your username and password.</div>' in d:
|
||||
logger.info("Failed to login to URL %s as %s" % (loginUrl,
|
||||
params['username']))
|
||||
raise exceptions.FailedToLogin(url,params['username'])
|
||||
|
|
@ -114,7 +115,7 @@ class FicwadComSiteAdapter(BaseSiteAdapter):
|
|||
titleh4 = soup.find('div',{'class':'storylist'}).find('h4')
|
||||
self.story.setMetadata('title', stripHTML(titleh4.a))
|
||||
|
||||
if 'Deleted story' in self.story.getMetadata('title'):
|
||||
if 'Deleted story' in self.story.getMetadataRaw('title'):
|
||||
raise exceptions.StoryDoesNotExist("This story was deleted. %s"%self.url)
|
||||
|
||||
# Find authorid and URL from... author url.
|
||||
|
|
@ -129,14 +130,14 @@ class FicwadComSiteAdapter(BaseSiteAdapter):
|
|||
#self.story.setMetadata('description', storydiv.find("blockquote",{'class':'summary'}).p.string)
|
||||
|
||||
# most of the meta data is here:
|
||||
metap = storydiv.find("p",{"class":"meta"})
|
||||
metap = storydiv.find("div",{"class":"meta"})
|
||||
self.story.addToList('category',metap.find("a",href=re.compile(r"^/category/\d+")).string)
|
||||
|
||||
# warnings
|
||||
# <span class="req"><a href="/help/38" title="Medium Spoilers">[!!] </a> <a href="/help/38" title="Rape/Sexual Violence">[R] </a> <a href="/help/38" title="Violence">[V] </a> <a href="/help/38" title="Child/Underage Sex">[Y] </a></span>
|
||||
spanreq = metap.find("span",{"class":"story-warnings"})
|
||||
if spanreq: # can be no warnings.
|
||||
for a in spanreq.findAll("a"):
|
||||
for a in spanreq.find_all("a"):
|
||||
self.story.addToList('warnings',a['title'])
|
||||
|
||||
## perhaps not the most efficient way to parse this, using
|
||||
|
|
@ -186,7 +187,7 @@ class FicwadComSiteAdapter(BaseSiteAdapter):
|
|||
# no list found, so it's a one-chapter story.
|
||||
self.add_chapter(self.story.getMetadata('title'),url)
|
||||
else:
|
||||
chapterlistlis = storylistul.findAll('li')
|
||||
chapterlistlis = storylistul.find_all('li')
|
||||
for chapterli in chapterlistlis:
|
||||
if "blocked" in chapterli['class']:
|
||||
# paranoia check. We should already be logged in by now.
|
||||
|
|
|
|||
|
|
@ -99,6 +99,17 @@ class FimFictionNetSiteAdapter(BaseSiteAdapter):
|
|||
params['username']))
|
||||
raise exceptions.FailedToLogin(url,params['username'])
|
||||
|
||||
def make_soup(self,data):
|
||||
soup = super(FimFictionNetSiteAdapter, self).make_soup(data)
|
||||
for img in soup.select('img.lazy-img, img.user_image'):
|
||||
## FimF has started a 'camo' mechanism for images that
|
||||
## gets block by CF. attr data-source is original source.
|
||||
if img.has_attr('data-source'):
|
||||
img['src'] = img['data-source']
|
||||
elif img.has_attr('data-src'):
|
||||
img['src'] = img['data-src']
|
||||
return soup
|
||||
|
||||
def doExtractChapterUrlsAndMetadata(self,get_cover=True):
|
||||
|
||||
if self.is_adult or self.getConfig("is_adult"):
|
||||
|
|
@ -106,7 +117,8 @@ class FimFictionNetSiteAdapter(BaseSiteAdapter):
|
|||
|
||||
## Only needed with password protected stories, which you have
|
||||
## to have logged into in the website using this account.
|
||||
self.performLogin(self.url)
|
||||
if self.getConfig("always_login"):
|
||||
self.performLogin(self.url)
|
||||
|
||||
##---------------------------------------------------------------------------------------------------
|
||||
## Get the story's title page. Check if it exists.
|
||||
|
|
@ -139,7 +151,8 @@ class FimFictionNetSiteAdapter(BaseSiteAdapter):
|
|||
self.story.setMetadata("authorId", author['href'].split('/')[2])
|
||||
self.story.setMetadata("authorUrl", "https://%s/user/%s/%s" % (self.getSiteDomain(),
|
||||
self.story.getMetadata('authorId'),
|
||||
self.story.getMetadata('author')))
|
||||
# meta entry author can be changed by the user.
|
||||
stripHTML(author)))
|
||||
|
||||
#Rating text is replaced with full words for historical compatibility after the site changed
|
||||
#on 2014-10-27
|
||||
|
|
@ -167,12 +180,13 @@ class FimFictionNetSiteAdapter(BaseSiteAdapter):
|
|||
|
||||
# Cover image
|
||||
if get_cover:
|
||||
storyImage = storyContentBox.find('img', {'class':'lazy-img'})
|
||||
storyImage = soup.select_one('div.story_container__story_image img')
|
||||
if storyImage:
|
||||
coverurl = storyImage['data-fullsize']
|
||||
# try setting from data-fullsize, if fails, try using data-src
|
||||
if self.setCoverImage(self.url,coverurl)[0] == "failedtoload":
|
||||
coverurl = storyImage['data-src']
|
||||
cover_set = self.setCoverImage(self.url,coverurl)[0]
|
||||
if not cover_set or cover_set.startswith("failedtoload"):
|
||||
coverurl = storyImage['src']
|
||||
self.setCoverImage(self.url,coverurl)
|
||||
|
||||
coverSource = storyImage.parent.find('a', {'class':'source'})
|
||||
|
|
@ -284,16 +298,26 @@ class FimFictionNetSiteAdapter(BaseSiteAdapter):
|
|||
descriptionMeta = soup.find('meta', {'property':'og:description'})
|
||||
self.story.setMetadata("short_description", stripHTML(descriptionMeta['content']))
|
||||
|
||||
#groups
|
||||
# groups.
|
||||
# If there are more than X groups, there's a 'Show all' button
|
||||
# that calls for a JSON containing HTML with the full list.
|
||||
# But it doesn't work reliably with FlareSolverr.
|
||||
groupList = None
|
||||
groupButton = soup.find('button', {'data-click':'showAll'})
|
||||
if groupButton != None and groupButton.find('i', {'class':'fa-search-plus'}):
|
||||
groupResponse = self.get_request("https://www.fimfiction.net/ajax/stories/%s/groups" % (self.story.getMetadata("storyId")))
|
||||
groupData = json.loads(groupResponse)
|
||||
groupList = self.make_soup(groupData["content"])
|
||||
else:
|
||||
try:
|
||||
groupResponse = self.get_request("https://www.fimfiction.net/ajax/stories/%s/groups" % (self.story.getMetadata("storyId")))
|
||||
groupData = json.loads(groupResponse)
|
||||
groupList = self.make_soup(groupData["content"])
|
||||
except Exception as e:
|
||||
logger.warning("Collecting 'groups' (AKA 'Featured In') from JSON failed:%s"%e)
|
||||
logger.warning("Only 'groups' initially shown on the page will be collected.")
|
||||
logger.warning("This is a known issue with JSON and FlareSolverr. See #1122")
|
||||
|
||||
if not groupList:
|
||||
groupList = soup.find('ul', {'id':'story-groups-list'})
|
||||
|
||||
if not (groupList == None):
|
||||
if groupList:
|
||||
for groupContent in groupList.find_all('a'):
|
||||
self.story.addToList("groupsUrl", 'https://'+self.host+groupContent["href"])
|
||||
groupName = groupContent.find('span', {"class":"group-name"})
|
||||
|
|
@ -304,7 +328,7 @@ class FimFictionNetSiteAdapter(BaseSiteAdapter):
|
|||
|
||||
#sequels
|
||||
for header in soup.find_all('h1', {'class':'header-stories'}):
|
||||
# I don't know why using text=re.compile with find() wouldn't work, but it didn't.
|
||||
# I don't know why using string=re.compile with find() wouldn't work, but it didn't.
|
||||
if header.text.startswith('Sequels'):
|
||||
sequelContainer = header.parent
|
||||
for sequel in sequelContainer.find_all('a', {'class':'story_link'}):
|
||||
|
|
@ -384,3 +408,33 @@ class FimFictionNetSiteAdapter(BaseSiteAdapter):
|
|||
# data = self.get_request(url)
|
||||
if self.getConfig("is_adult"):
|
||||
self.set_adult_cookie()
|
||||
|
||||
def get_urls_from_page(self,url,normalize):
|
||||
iterate = self.getConfig('scrape_bookshelf', default=False)
|
||||
if not re.search(r'fimfiction\.net/bookshelf/(?P<listid>.+?)/',url) or iterate == 'legacy':
|
||||
return super().get_urls_from_page(url,normalize)
|
||||
|
||||
self.before_get_urls_from_page(url,normalize)
|
||||
|
||||
final_urls = list()
|
||||
while True:
|
||||
data = self.get_request(url,usecache=True)
|
||||
soup = self.make_soup(data)
|
||||
paginator = soup.select_one('div.paginator-container > div.page_list > ul').find_all('li')
|
||||
logger.debug("Paginator: " + str(len(paginator)))
|
||||
stories_container = soup.select_one('div.content > div.two-columns > div.left').find_all('article', recursive=False)
|
||||
x = 0
|
||||
logger.debug("Container "+str(len(stories_container)))
|
||||
for story_raw in stories_container:
|
||||
x += 1
|
||||
story_url = story_raw.select_one('div.story_content_box > header.title > div > a.story_name').get('href')
|
||||
url_story = ('https://' + self.getSiteDomain() + story_url)
|
||||
#logger.debug(url_story)
|
||||
final_urls.append(url_story)
|
||||
logger.debug("Discovered %s new stories."%str(x))
|
||||
|
||||
next_button = paginator[-1].select_one('a')
|
||||
logger.debug("Next button: " + next_button.get_text())
|
||||
if next_button.get_text() or not iterate:
|
||||
return {'urllist': final_urls}
|
||||
url = ('https://' + self.getSiteDomain() + next_button.get('href'))
|
||||
|
|
|
|||
|
|
@ -93,6 +93,9 @@ class FireFlyFansNetSiteAdapter(BaseSiteAdapter):
|
|||
|
||||
a = soup.find('a', href=re.compile(r"profileshow.aspx\?u="))
|
||||
self.story.setMetadata('authorId', a['href'].split('=')[1])
|
||||
if not self.story.getMetadata('authorId'):
|
||||
logger.warning("Site authorUrl missing authorId, using SiteMissingAuthorId")
|
||||
self.story.setMetadata('authorId', 'SiteMissingAuthorId')
|
||||
self.story.setMetadata('authorUrl', 'http://' +
|
||||
self.host + '/' + a['href'])
|
||||
self.story.setMetadata('author', a.string)
|
||||
|
|
@ -102,7 +105,6 @@ class FireFlyFansNetSiteAdapter(BaseSiteAdapter):
|
|||
# to download them one at a time yourself. I'm also setting the status to
|
||||
# complete
|
||||
self.add_chapter(self.story.getMetadata('title'), self.url)
|
||||
self.story.setMetadata('numChapters', 1)
|
||||
self.story.setMetadata('status', 'Completed')
|
||||
|
||||
## some stories do not have a summary listed, so I'm setting it here.
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2018 FanFicFare team
|
||||
# Copyright 2024 FanFicFare team
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
|
|
@ -18,15 +18,15 @@
|
|||
from __future__ import absolute_import
|
||||
import re
|
||||
|
||||
from .base_xenforoforum_adapter import BaseXenForoForumAdapter
|
||||
from .base_xenforo2forum_adapter import BaseXenForo2ForumAdapter
|
||||
|
||||
def getClass():
|
||||
return QuestionablequestingComAdapter
|
||||
|
||||
class QuestionablequestingComAdapter(BaseXenForoForumAdapter):
|
||||
class QuestionablequestingComAdapter(BaseXenForo2ForumAdapter):
|
||||
|
||||
def __init__(self, config, url):
|
||||
BaseXenForoForumAdapter.__init__(self, config, url)
|
||||
BaseXenForo2ForumAdapter.__init__(self, config, url)
|
||||
|
||||
# Each adapter needs to have a unique site abbreviation.
|
||||
self.story.setMetadata('siteabbrev','qq')
|
||||
|
|
|
|||
|
|
@ -1,216 +0,0 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2012 Fanficdownloader team, 2018 FanFicFare team
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
# Software: eFiction
|
||||
from __future__ import absolute_import
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import re
|
||||
from ..htmlcleanup import stripHTML
|
||||
from .. import exceptions as exceptions
|
||||
|
||||
# py2 vs py3 transition
|
||||
from ..six import text_type as unicode
|
||||
|
||||
from .base_adapter import BaseSiteAdapter, makeDate
|
||||
|
||||
def getClass():
|
||||
return HLFictionNetAdapter
|
||||
|
||||
# Class name has to be unique. Our convention is camel case the
|
||||
# sitename with Adapter at the end. www is skipped.
|
||||
class HLFictionNetAdapter(BaseSiteAdapter):
|
||||
|
||||
def __init__(self, config, url):
|
||||
BaseSiteAdapter.__init__(self, config, url)
|
||||
|
||||
self.username = "NoneGiven" # if left empty, site doesn't return any message at all.
|
||||
self.password = ""
|
||||
self.is_adult=False
|
||||
|
||||
# get storyId from url--url validation guarantees query is only sid=1234
|
||||
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
|
||||
|
||||
|
||||
# normalized story URL.
|
||||
self._setURL('https://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId'))
|
||||
|
||||
# Each adapter needs to have a unique site abbreviation.
|
||||
self.story.setMetadata('siteabbrev','hlf')
|
||||
|
||||
# The date format will vary from site to site.
|
||||
# http://docs.python.org/library/datetime.html#strftime-strptime-behavior
|
||||
self.dateformat = "%m/%d/%y"
|
||||
|
||||
@staticmethod # must be @staticmethod, don't remove it.
|
||||
def getSiteDomain():
|
||||
# The site domain. Does have www here, if it uses it.
|
||||
return 'hlfiction.net'
|
||||
|
||||
@classmethod
|
||||
def getSiteExampleURLs(cls):
|
||||
return "https://"+cls.getSiteDomain()+"/viewstory.php?sid=1234"
|
||||
|
||||
def getSiteURLPattern(self):
|
||||
return r"https?://"+re.escape(self.getSiteDomain()+"/viewstory.php?sid=")+r"\d+$"
|
||||
|
||||
## Getting the chapter list and the meta data, plus 'is adult' checking.
|
||||
def extractChapterUrlsAndMetadata(self):
|
||||
|
||||
# index=1 makes sure we see the story chapter index. Some
|
||||
# sites skip that for one-chapter stories.
|
||||
url = self.url
|
||||
logger.debug("URL: "+url)
|
||||
|
||||
data = self.get_request(url)
|
||||
|
||||
if "Access denied. This story has not been validated by the adminstrators of this site." in data:
|
||||
raise exceptions.AccessDenied(self.getSiteDomain() +" says: Access denied. This story has not been validated by the adminstrators of this site.")
|
||||
|
||||
soup = self.make_soup(data)
|
||||
# print data
|
||||
|
||||
|
||||
## Title and author
|
||||
a = soup.find('div', {'id' : 'pagetitle'})
|
||||
|
||||
aut = a.find('a', href=re.compile(r"viewuser.php\?uid=\d+"))
|
||||
self.story.setMetadata('authorId',aut['href'].split('=')[1])
|
||||
self.story.setMetadata('authorUrl','https://'+self.host+'/'+aut['href'])
|
||||
self.story.setMetadata('author',aut.string)
|
||||
aut.extract()
|
||||
|
||||
self.story.setMetadata('title',stripHTML(a)[:(len(a.string)-3)])
|
||||
|
||||
# Find the chapters:
|
||||
chapters=soup.find('select')
|
||||
if chapters != None:
|
||||
for chapter in chapters.findAll('option'):
|
||||
# just in case there's tags, like <i> in chapter titles.
|
||||
self.add_chapter(chapter,'https://'+self.host+'/viewstory.php?sid='+self.story.getMetadata('storyId')+'&chapter='+chapter['value'])
|
||||
else:
|
||||
self.add_chapter(self.story.getMetadata('title'),url)
|
||||
|
||||
|
||||
asoup = self.make_soup(self.get_request(self.story.getMetadata('authorUrl')))
|
||||
|
||||
for list in asoup.findAll('div', {'class' : re.compile('listbox')}):
|
||||
a = list.find('a')
|
||||
if ('viewstory.php?sid='+self.story.getMetadata('storyId')) in a['href']:
|
||||
break
|
||||
|
||||
# eFiction sites don't help us out a lot with their meta data
|
||||
# formating, so it's a little ugly.
|
||||
|
||||
# utility method
|
||||
def defaultGetattr(d,k):
|
||||
try:
|
||||
return d[k]
|
||||
except:
|
||||
return ""
|
||||
|
||||
# <span class="label">Rated:</span> NC-17<br /> etc
|
||||
labels = list.findAll('span', {'class' : 'classification'})
|
||||
for labelspan in labels:
|
||||
label = labelspan.string
|
||||
value = labelspan.nextSibling
|
||||
|
||||
if 'Summary' in label:
|
||||
## Everything until the next span class='label'
|
||||
svalue = ""
|
||||
while 'classification' not in defaultGetattr(value,'class'):
|
||||
svalue += unicode(value)
|
||||
value = value.nextSibling
|
||||
self.setDescription(url,svalue)
|
||||
#self.story.setMetadata('description',stripHTML(svalue))
|
||||
|
||||
if 'Rated' in label:
|
||||
self.story.setMetadata('rating', value[:len(value)-2])
|
||||
|
||||
if 'Word count' in label:
|
||||
self.story.setMetadata('numWords', value)
|
||||
|
||||
if 'Categories' in label:
|
||||
cats = labelspan.parent.findAll('a',href=re.compile(r'categories.php\?catid=\d+'))
|
||||
for cat in cats:
|
||||
self.story.addToList('category',cat.string)
|
||||
|
||||
if 'Characters' in label:
|
||||
for char in value.string.split(', '):
|
||||
if not 'None' in char:
|
||||
self.story.addToList('characters',char)
|
||||
|
||||
if 'Genre' in label:
|
||||
for genre in value.string.split(', '):
|
||||
if not 'None' in genre:
|
||||
self.story.addToList('genre',genre)
|
||||
|
||||
if 'Warnings' in label:
|
||||
for warning in value.string.split(', '):
|
||||
if not 'None' in warning:
|
||||
self.story.addToList('warnings',warning)
|
||||
|
||||
if 'Completed' in label:
|
||||
if 'Yes' in value:
|
||||
self.story.setMetadata('status', 'Completed')
|
||||
else:
|
||||
self.story.setMetadata('status', 'In-Progress')
|
||||
|
||||
if 'Published' in label:
|
||||
self.story.setMetadata('datePublished', makeDate(stripHTML(value), self.dateformat))
|
||||
|
||||
if 'Updated' in label:
|
||||
# there's a stray [ at the end.
|
||||
#value = value[0:-1]
|
||||
self.story.setMetadata('dateUpdated', makeDate(stripHTML(value), self.dateformat))
|
||||
|
||||
try:
|
||||
# Find Series name from series URL.
|
||||
a = list.find('a', href=re.compile(r"series.php\?seriesid=\d+"))
|
||||
series_name = a.string
|
||||
series_url = 'https://'+self.host+'/'+a['href']
|
||||
|
||||
seriessoup = self.make_soup(self.get_request(series_url))
|
||||
# can't use ^viewstory...$ in case of higher rated stories with javascript href.
|
||||
storyas = seriessoup.findAll('a', href=re.compile(r'viewstory.php\?sid=\d+'))
|
||||
i=1
|
||||
for a in storyas:
|
||||
# skip 'report this' and 'TOC' links
|
||||
if 'contact.php' not in a['href'] and 'index' not in a['href']:
|
||||
if ('viewstory.php?sid='+self.story.getMetadata('storyId')) in a['href']:
|
||||
self.setSeries(series_name, i)
|
||||
self.story.setMetadata('seriesUrl',series_url)
|
||||
break
|
||||
i+=1
|
||||
|
||||
except:
|
||||
# I find it hard to care if the series parsing fails
|
||||
pass
|
||||
|
||||
# grab the text for an individual chapter.
|
||||
def getChapterText(self, url):
|
||||
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
soup = self.make_soup(self.get_request(url))
|
||||
|
||||
div = soup.find('div', {'id' : 'story'})
|
||||
|
||||
if None == div:
|
||||
raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url)
|
||||
|
||||
return self.utf8FromSoup(url,div)
|
||||
|
|
@ -161,7 +161,7 @@ class ImagineEFicComAdapter(BaseSiteAdapter):
|
|||
self.story.setMetadata('author',a.string)
|
||||
|
||||
# Find the chapters:
|
||||
for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+r"&chapter=\d+$")):
|
||||
for chapter in soup.find_all('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+r"&chapter=\d+$")):
|
||||
# just in case there's tags, like <i> in chapter titles.
|
||||
self.add_chapter(chapter,'https://'+self.host+'/'+chapter['href']+addurl)
|
||||
|
||||
|
|
@ -178,7 +178,7 @@ class ImagineEFicComAdapter(BaseSiteAdapter):
|
|||
|
||||
|
||||
# <span class="label">Rated:</span> NC-17<br /> etc
|
||||
labels = soup.findAll('span',{'class':'label'})
|
||||
labels = soup.find_all('span',{'class':'label'})
|
||||
for labelspan in labels:
|
||||
value = labelspan.nextSibling
|
||||
label = labelspan.string
|
||||
|
|
@ -199,22 +199,22 @@ class ImagineEFicComAdapter(BaseSiteAdapter):
|
|||
self.story.setMetadata('numWords', value)
|
||||
|
||||
if 'Categories' in label:
|
||||
cats = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=categories'))
|
||||
cats = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=categories'))
|
||||
for cat in cats:
|
||||
self.story.addToList('category',cat.string)
|
||||
|
||||
if 'Characters' in label:
|
||||
chars = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=characters'))
|
||||
chars = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=characters'))
|
||||
for char in chars:
|
||||
self.story.addToList('characters',char.string)
|
||||
|
||||
if 'Genre' in label:
|
||||
genres = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=1'))
|
||||
genres = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=class&type_id=1'))
|
||||
for genre in genres:
|
||||
self.story.addToList('genre',genre.string)
|
||||
|
||||
if 'Warnings' in label:
|
||||
warnings = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=2'))
|
||||
warnings = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=class&type_id=2'))
|
||||
for warning in warnings:
|
||||
self.story.addToList('warnings',warning.string)
|
||||
|
||||
|
|
@ -238,7 +238,7 @@ class ImagineEFicComAdapter(BaseSiteAdapter):
|
|||
|
||||
seriessoup = self.make_soup(self.get_request(series_url))
|
||||
# can't use ^viewstory...$ in case of higher rated stories with javascript href.
|
||||
storyas = seriessoup.findAll('a', href=re.compile(r'viewstory.php\?sid=\d+'))
|
||||
storyas = seriessoup.find_all('a', href=re.compile(r'viewstory.php\?sid=\d+'))
|
||||
i=1
|
||||
for a in storyas:
|
||||
# skip 'report this' and 'TOC' links
|
||||
|
|
|
|||
28
fanficfare/adapters/adapter_inkbunnynet.py
Normal file → Executable file
28
fanficfare/adapters/adapter_inkbunnynet.py
Normal file → Executable file
|
|
@ -125,7 +125,7 @@ class InkBunnyNetSiteAdapter(BaseSiteAdapter):
|
|||
soup = self.make_soup(self.get_request(url,usecache=False))
|
||||
|
||||
# removing all of the scripts
|
||||
for tag in soup.findAll('script'):
|
||||
for tag in soup.find_all('script'):
|
||||
tag.extract()
|
||||
|
||||
|
||||
|
|
@ -134,7 +134,7 @@ class InkBunnyNetSiteAdapter(BaseSiteAdapter):
|
|||
self.story.setMetadata('title', stripHTML(title))
|
||||
|
||||
# Get Author
|
||||
authortag = soup.find('table',{'class':'pooltable'}).find('a',href=re.compile(r'/gallery/'))
|
||||
authortag = soup.find('table',{'class':'pooltable'}).find('a',href=re.compile(r'/gallery/|/scraps/'))
|
||||
author = authortag['href'].split('/')[-1] # no separate ID
|
||||
self.story.setMetadata('author', author)
|
||||
self.story.setMetadata('authorId', author)
|
||||
|
|
@ -149,7 +149,7 @@ class InkBunnyNetSiteAdapter(BaseSiteAdapter):
|
|||
if not self.getConfig('keep_summary_html'):
|
||||
synopsis = stripHTML(synopsis)
|
||||
|
||||
self.setDescription(url, stripHTML(synopsis))
|
||||
self.setDescription(url, synopsis)
|
||||
|
||||
#Getting Keywords/Genres
|
||||
keywords = bookdetails.find('div', {'id':'kw_scroll'}).find_next_siblings('div')[0].div.div.find_all('a')
|
||||
|
|
@ -157,10 +157,11 @@ class InkBunnyNetSiteAdapter(BaseSiteAdapter):
|
|||
self.story.addToList('genre', stripHTML(kword))
|
||||
|
||||
# Getting the Category
|
||||
category = bookdetails.findChildren('div', recursive=False)[2].find('span', string='Type:').parent
|
||||
category.find('span').decompose()
|
||||
self.story.setMetadata('category', stripHTML(category))
|
||||
for div in bookdetails.find_all('div'):
|
||||
if 'Details' == stripHTML(div).strip():
|
||||
self.story.setMetadata('category', div.find_next_siblings('div')[0].span.next_sibling.strip())
|
||||
elif 'Rating:' == stripHTML(div).strip()[:7]:
|
||||
if 'Rating:' == stripHTML(div)[:7]:
|
||||
rating = div.span.next_sibling.strip()
|
||||
self.story.setMetadata('rating', rating)
|
||||
break
|
||||
|
|
@ -178,7 +179,14 @@ class InkBunnyNetSiteAdapter(BaseSiteAdapter):
|
|||
if get_cover:
|
||||
cover_img = soup.find('img', {'id':'magicbox'})
|
||||
if cover_img:
|
||||
# image content is treated like a normal image submission
|
||||
self.setCoverImage(url, cover_img['src'])
|
||||
else:
|
||||
# image content is present, but secondary to text file
|
||||
cover_div = soup.find('div', {'class': 'content magicboxParent'})
|
||||
cover_img = cover_div.find('img', {'class':'shadowedimage'}) if cover_div else None
|
||||
if cover_img:
|
||||
self.setCoverImage(url, cover_img['src'])
|
||||
|
||||
## Save for use below
|
||||
self.soup = soup
|
||||
|
|
@ -192,3 +200,11 @@ class InkBunnyNetSiteAdapter(BaseSiteAdapter):
|
|||
raise exceptions.FailedToDownload("Error downloading Chapter: %s No text block found -- non-story URL?" % url)
|
||||
|
||||
return self.utf8FromSoup(url, story)
|
||||
|
||||
def before_get_urls_from_page(self,url,normalize):
|
||||
# To display the links to stories that are not available to guests.
|
||||
if self.getConfig("username") and self.getConfig("always_login"):
|
||||
# performLogin extracts token from the soup
|
||||
soup = self.make_soup(self.get_request(url))
|
||||
|
||||
self.performLogin(url, soup)
|
||||
|
|
|
|||
213
fanficfare/adapters/adapter_kakuyomujp.py
Normal file
213
fanficfare/adapters/adapter_kakuyomujp.py
Normal file
|
|
@ -0,0 +1,213 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2013 Fanficdownloader team, 2018 FanFicFare team
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
from __future__ import absolute_import
|
||||
import logging, time
|
||||
logger = logging.getLogger(__name__)
|
||||
import re, json
|
||||
|
||||
from .. import exceptions as exceptions
|
||||
|
||||
# py2 vs py3 transition
|
||||
from ..six.moves import http_cookiejar as cl
|
||||
|
||||
from .base_adapter import BaseSiteAdapter, makeDate
|
||||
|
||||
def getClass():
|
||||
return KakuyomuJpAdapter
|
||||
|
||||
genres = {
|
||||
'FANTASY': '異世界ファンタジー',
|
||||
'ACTION': '現代ファンタジー',
|
||||
'SF': 'SF',
|
||||
'LOVE_STORY': '恋愛',
|
||||
'ROMANCE': 'ラブコメ',
|
||||
'DRAMA': '現代ドラマ',
|
||||
'HORROR': 'ホラー',
|
||||
'MYSTERY': 'ミステリー',
|
||||
'NONFICTION': 'エッセイ・ノンフィクション',
|
||||
'HISTORY': '歴史・時代・伝奇',
|
||||
'CRITICISM': '創作論・評論',
|
||||
'OTHERS': '詩・童話・その他',
|
||||
'FAN_FICTION': '二次創作',
|
||||
}
|
||||
|
||||
class KakuyomuJpAdapter(BaseSiteAdapter):
|
||||
|
||||
def __init__(self, config, url):
|
||||
BaseSiteAdapter.__init__(self, config, url)
|
||||
|
||||
self.story.setMetadata('siteabbrev', 'kakuyomu')
|
||||
self.story.setMetadata('language', 'Japanese')
|
||||
|
||||
self.storyId = self.path.split('/')[-1]
|
||||
self.story.setMetadata('storyId', self.storyId)
|
||||
|
||||
@staticmethod
|
||||
def getSiteDomain():
|
||||
return 'kakuyomu.jp'
|
||||
|
||||
@classmethod
|
||||
def getSiteExampleURLs(cls):
|
||||
return ("https://kakuyomu.jp/works/12341234123412341234")
|
||||
|
||||
def getSiteURLPattern(self):
|
||||
return r"^https?://kakuyomu\.jp/works/[0-9]+$"
|
||||
|
||||
def extractChapterUrlsAndMetadata(self):
|
||||
data = self.get_request(self.url)
|
||||
|
||||
# Page could not be found
|
||||
if 'お探しのページは見つかりませんでした' in data:
|
||||
raise exceptions.StoryDoesNotExist(self.url)
|
||||
|
||||
soup = self.make_soup(data)
|
||||
info = json.loads(soup.find(id='__NEXT_DATA__').contents[0])['props']['pageProps']['__APOLLO_STATE__']
|
||||
|
||||
workKey = 'Work:%s' % self.storyId
|
||||
|
||||
# Title
|
||||
self.story.setMetadata('title', info[workKey]['title'])
|
||||
|
||||
# Author
|
||||
authorKey = info[workKey]['author']['__ref']
|
||||
self.story.setMetadata('authorId', authorKey.split(':')[1])
|
||||
self.story.setMetadata('authorUrl', 'https://kakuyomu.jp/users/%s' % info[authorKey]['name'])
|
||||
self.story.setMetadata('author', info[authorKey]['activityName'])
|
||||
|
||||
# Description
|
||||
self.setDescription(self.url, info[workKey]['introduction'])
|
||||
self.story.setMetadata('catchphrase', info[workKey]['catchphrase'])
|
||||
|
||||
# Date Published and Updated
|
||||
# 2024-01-01T03:00:12Z
|
||||
self.story.setMetadata('datePublished',
|
||||
makeDate(info[workKey]['publishedAt'], '%Y-%m-%dT%H:%M:%SZ'))
|
||||
self.story.setMetadata('dateUpdated',
|
||||
makeDate(info[workKey]['editedAt'], '%Y-%m-%dT%H:%M:%SZ'))
|
||||
|
||||
# Character count
|
||||
self.story.setMetadata('numWords', info[workKey]['totalCharacterCount'])
|
||||
|
||||
# Status
|
||||
completed = info[workKey]['serialStatus'] == 'COMPLETED'
|
||||
self.story.setMetadata('status', 'Completed' if completed else 'In-Progress')
|
||||
|
||||
# Warnings
|
||||
rating = 'G'
|
||||
if info[workKey]['isCruel']:
|
||||
rating = 'R15'
|
||||
self.story.addToList('warnings', '残酷描写有り')
|
||||
if info[workKey]['isViolent']:
|
||||
rating = 'R15'
|
||||
self.story.addToList('warnings', '暴力描写有り')
|
||||
if info[workKey]['isSexual']:
|
||||
rating = 'R15'
|
||||
self.story.addToList('warnings', '性描写有り')
|
||||
|
||||
# Tags
|
||||
for tag in info[workKey]['tagLabels']:
|
||||
if re.match(r'[RrR].?[11][55]', tag) is None:
|
||||
self.story.addToList('freeformtags', tag)
|
||||
else:
|
||||
rating = 'R15'
|
||||
|
||||
# Rating
|
||||
self.story.setMetadata('rating', rating)
|
||||
|
||||
# Genre
|
||||
self.story.setMetadata('genre', genres[info[workKey]['genre']])
|
||||
|
||||
if info[workKey]['genre'] == 'FAN_FICTION':
|
||||
fandomKey = info[workKey]['fanFictionSource']['__ref']
|
||||
self.story.addToList('fandoms', info[fandomKey]['title'])
|
||||
|
||||
# Ratings, Comments, Etc.
|
||||
self.story.setMetadata('reviews', info[workKey]['reviewCount'])
|
||||
self.story.setMetadata('points', info[workKey]['totalReviewPoint'])
|
||||
self.story.setMetadata('comments', info[workKey]['totalPublicEpisodeCommentCount'])
|
||||
self.story.setMetadata('views', info[workKey]['totalReadCount'])
|
||||
self.story.setMetadata('follows', info[workKey]['totalFollowers'])
|
||||
self.story.setMetadata('collections', len(info[workKey]['publicWorkCollections']))
|
||||
self.story.setMetadata('events', info[workKey]['totalWorkContestCount'] + info[workKey]['totalUserEventCount'])
|
||||
self.story.setMetadata('published', info[workKey]['hasPublication'])
|
||||
|
||||
# visitorWorkFollowing
|
||||
# workReviewByVisitor
|
||||
|
||||
# Chapters, Episodes
|
||||
|
||||
# TOC nodes are in a list
|
||||
# each have a list of named episodes
|
||||
# each can have a named chapter
|
||||
# named chapters can be at depth 1 or 2
|
||||
# episodes might be empty (premium subscription)
|
||||
|
||||
prependSectionTitles = self.getConfig('prepend_section_titles', 'firstepisode')
|
||||
|
||||
numEpisodes = 0
|
||||
titles = []
|
||||
nestingLevel = 0
|
||||
newSection = False
|
||||
for tocNodeRef in info[workKey]['tableOfContentsV2']:
|
||||
tocNode = info[tocNodeRef['__ref']]
|
||||
|
||||
if tocNode['chapter'] is not None:
|
||||
chapter = info[tocNode['chapter']['__ref']]
|
||||
while chapter['level'] <= nestingLevel:
|
||||
titles.pop()
|
||||
nestingLevel -= 1
|
||||
titles.append(chapter['title'])
|
||||
nestingLevel = chapter['level']
|
||||
newSection = True
|
||||
else:
|
||||
titles = []
|
||||
nestingLevel = 0
|
||||
newSection = False
|
||||
|
||||
for episodeRef in tocNode['episodeUnions']:
|
||||
if not episodeRef['__ref'].startswith('EmptyEpisode'):
|
||||
numEpisodes += 1
|
||||
episode = info[episodeRef['__ref']]
|
||||
epUrl = 'https://kakuyomu.jp/works/' + self.storyId + '/episodes/' + episode['id']
|
||||
epTitle = episode['title']
|
||||
|
||||
if ((len(titles) > 0) and
|
||||
((newSection and prependSectionTitles == 'firstepisode') or
|
||||
prependSectionTitles == 'true')):
|
||||
titles.append(epTitle)
|
||||
# bracket with ZWSP to mark presence of section titles
|
||||
epTitle = u'\u200b' + u'\u3000\u200b'.join(titles)
|
||||
titles.pop()
|
||||
|
||||
self.add_chapter(epTitle, epUrl)
|
||||
newSection = False
|
||||
|
||||
logger.debug("Story: <%s>", self.story)
|
||||
return
|
||||
|
||||
def getChapterText(self, url):
|
||||
logger.debug('Getting chapter text from <%s>' % url)
|
||||
|
||||
soup = self.make_soup(self.get_request(url))
|
||||
soup = soup.find('div', {'class':'widget-episodeBody js-episode-body'})
|
||||
if soup is None:
|
||||
raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url)
|
||||
soup.attrs = {'class':'episode-body'}
|
||||
|
||||
return self.utf8FromSoup(url, soup)
|
||||
|
||||
|
|
@ -144,13 +144,13 @@ class KSArchiveComAdapter(BaseSiteAdapter): # XXX
|
|||
|
||||
# Find authorid and URL from... author urls.
|
||||
pagetitle = soup.find('div',id='pagetitle')
|
||||
for a in pagetitle.findAll('a', href=re.compile(r"viewuser.php\?uid=\d+")):
|
||||
for a in pagetitle.find_all('a', href=re.compile(r"viewuser.php\?uid=\d+")):
|
||||
self.story.addToList('authorId',a['href'].split('=')[1])
|
||||
self.story.addToList('authorUrl','https://'+self.host+'/'+a['href'])
|
||||
self.story.addToList('author',stripHTML(a))
|
||||
|
||||
# Find the chapters:
|
||||
for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+r"&chapter=\d+$")):
|
||||
for chapter in soup.find_all('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+r"&chapter=\d+$")):
|
||||
# just in case there's tags, like <i> in chapter titles.
|
||||
self.add_chapter(chapter,'https://'+self.host+'/'+chapter['href']+addurl)
|
||||
|
||||
|
|
@ -166,7 +166,7 @@ class KSArchiveComAdapter(BaseSiteAdapter): # XXX
|
|||
return ""
|
||||
|
||||
# <span class="label">Rated:</span> NC-17<br /> etc
|
||||
labels = soup.findAll('span',{'class':'label'})
|
||||
labels = soup.find_all('span',{'class':'label'})
|
||||
for labelspan in labels:
|
||||
value = labelspan.nextSibling
|
||||
label = stripHTML(labelspan)
|
||||
|
|
@ -193,7 +193,7 @@ class KSArchiveComAdapter(BaseSiteAdapter): # XXX
|
|||
self.story.setMetadata('numWords', value)
|
||||
|
||||
if 'Categories' in label:
|
||||
cats = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=categories'))
|
||||
cats = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=categories'))
|
||||
catstext = [stripHTML(cat) for cat in cats]
|
||||
for cat in catstext:
|
||||
# ran across one story with an empty <a href="browse.php?type=categories&catid=1"></a>
|
||||
|
|
@ -204,7 +204,7 @@ class KSArchiveComAdapter(BaseSiteAdapter): # XXX
|
|||
if 'Characters' in label:
|
||||
self.story.addToList('characters','Kirk')
|
||||
self.story.addToList('characters','Spock')
|
||||
chars = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=characters'))
|
||||
chars = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=characters'))
|
||||
charstext = [stripHTML(char) for char in chars]
|
||||
for char in charstext:
|
||||
self.story.addToList('characters',stripHTML(char))
|
||||
|
|
@ -213,7 +213,7 @@ class KSArchiveComAdapter(BaseSiteAdapter): # XXX
|
|||
## leaving it in. Check to make sure the type_id number
|
||||
## is correct, though--it's site specific.
|
||||
if 'Genre' in label:
|
||||
genres = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=1')) # XXX
|
||||
genres = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=class&type_id=1')) # XXX
|
||||
genrestext = [stripHTML(genre) for genre in genres]
|
||||
self.genre = ', '.join(genrestext)
|
||||
for genre in genrestext:
|
||||
|
|
@ -223,7 +223,7 @@ class KSArchiveComAdapter(BaseSiteAdapter): # XXX
|
|||
## has 'Story Type', which is much more what most sites
|
||||
## call genre.
|
||||
if 'Story Type' in label:
|
||||
genres = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=5')) # XXX
|
||||
genres = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=class&type_id=5')) # XXX
|
||||
genrestext = [stripHTML(genre) for genre in genres]
|
||||
self.genre = ', '.join(genrestext)
|
||||
for genre in genrestext:
|
||||
|
|
@ -233,21 +233,21 @@ class KSArchiveComAdapter(BaseSiteAdapter): # XXX
|
|||
## leaving it in. Check to make sure the type_id number
|
||||
## is correct, though--it's site specific.
|
||||
if 'Warnings' in label:
|
||||
warnings = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=2')) # XXX
|
||||
warnings = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=class&type_id=2')) # XXX
|
||||
warningstext = [stripHTML(warning) for warning in warnings]
|
||||
self.warning = ', '.join(warningstext)
|
||||
for warning in warningstext:
|
||||
self.story.addToList('warnings',stripHTML(warning))
|
||||
|
||||
if 'Universe' in label:
|
||||
universes = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=3')) # XXX
|
||||
universes = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=class&type_id=3')) # XXX
|
||||
universestext = [stripHTML(universe) for universe in universes]
|
||||
self.universe = ', '.join(universestext)
|
||||
for universe in universestext:
|
||||
self.story.addToList('universe',stripHTML(universe))
|
||||
|
||||
if 'Crossover Fandom' in label:
|
||||
crossoverfandoms = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=4')) # XXX
|
||||
crossoverfandoms = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=class&type_id=4')) # XXX
|
||||
crossoverfandomstext = [stripHTML(crossoverfandom) for crossoverfandom in crossoverfandoms]
|
||||
self.crossoverfandom = ', '.join(crossoverfandomstext)
|
||||
for crossoverfandom in crossoverfandomstext:
|
||||
|
|
@ -274,7 +274,7 @@ class KSArchiveComAdapter(BaseSiteAdapter): # XXX
|
|||
series_url = 'https://'+self.host+'/'+a['href']
|
||||
|
||||
seriessoup = self.make_soup(self.get_request(series_url))
|
||||
storyas = seriessoup.findAll('a', href=re.compile(r'viewstory.php\?sid=\d+'))
|
||||
storyas = seriessoup.find_all('a', href=re.compile(r'viewstory.php\?sid=\d+'))
|
||||
i=1
|
||||
for a in storyas:
|
||||
# skip 'report this' and 'TOC' links
|
||||
|
|
|
|||
|
|
@ -19,6 +19,7 @@ from __future__ import absolute_import
|
|||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import re
|
||||
import json
|
||||
|
||||
from bs4.element import Comment
|
||||
from ..htmlcleanup import stripHTML
|
||||
|
|
@ -37,7 +38,7 @@ class LiteroticaSiteAdapter(BaseSiteAdapter):
|
|||
|
||||
def __init__(self, config, url):
|
||||
BaseSiteAdapter.__init__(self, config, url)
|
||||
logger.debug("LiteroticaComAdapter:__init__ - url='%s'" % url)
|
||||
#logger.debug("LiteroticaComAdapter:__init__ - url='%s'" % url)
|
||||
|
||||
# Each adapter needs to have a unique site abbreviation.
|
||||
self.story.setMetadata('siteabbrev','litero')
|
||||
|
|
@ -47,16 +48,15 @@ class LiteroticaSiteAdapter(BaseSiteAdapter):
|
|||
# where first chapter doesn't have '-ch-'.
|
||||
# Now just rely on extractChapterUrlsAndMetadata to reset
|
||||
# storyId to first chapter link.
|
||||
storyId = self.parsedUrl.path.split('/',)[2]
|
||||
|
||||
## DON'T normalize to www.literotica.com--keep for language,
|
||||
## which will be set in _setURL(url). Also, multi-chapter
|
||||
## have been keeping the language when 'normalizing' to first
|
||||
## chapter.
|
||||
url = re.sub(r"^(https?://)"+LANG_RE+r"(\.i)?",
|
||||
r"\1\2",
|
||||
r"https://\2",
|
||||
url)
|
||||
url = url.replace('/beta/s/','/s/') # to allow beta site URLs.
|
||||
url = url.replace('/beta/','/') # to allow beta site URLs.
|
||||
|
||||
## strip ?page=...
|
||||
url = re.sub(r"\?page=.*$", "", url)
|
||||
|
|
@ -66,7 +66,7 @@ class LiteroticaSiteAdapter(BaseSiteAdapter):
|
|||
|
||||
# The date format will vary from site to site.
|
||||
# http://docs.python.org/library/datetime.html#strftime-strptime-behavior
|
||||
self.dateformat = "%m/%d/%y"
|
||||
self.dateformat = "%m/%d/%Y"
|
||||
|
||||
@staticmethod
|
||||
def getSiteDomain():
|
||||
|
|
@ -78,10 +78,12 @@ class LiteroticaSiteAdapter(BaseSiteAdapter):
|
|||
|
||||
@classmethod
|
||||
def getSiteExampleURLs(cls):
|
||||
return "http://www.literotica.com/s/story-title https://www.literotica.com/s/story-title http://portuguese.literotica.com/s/story-title http://german.literotica.com/s/story-title"
|
||||
return "https://www.literotica.com/s/story-title https://www.literotica.com/series/se/9999999 https://www.literotica.com/s/story-title https://www.literotica.com/i/image-or-comic-title https://www.literotica.com/p/poem-title https://portuguese.literotica.com/s/story-title https://german.literotica.com/s/story-title"
|
||||
|
||||
def getSiteURLPattern(self):
|
||||
return r"https?://"+LANG_RE+r"(\.i)?\.literotica\.com/(beta/)?s/([a-zA-Z0-9_-]+)"
|
||||
# also https://www.literotica.com/series/se/80075773
|
||||
# /s/ for story, /i/ for image/comic, /p/ for poem
|
||||
return r"https?://"+LANG_RE+r"(\.i)?\.literotica\.com/((beta/)?[sip]/([a-zA-Z0-9_-]+)|series/se/(?P<storyseriesid>[a-zA-Z0-9_-]+))"
|
||||
|
||||
def _setURL(self,url):
|
||||
# logger.debug("set URL:%s"%url)
|
||||
|
|
@ -90,259 +92,337 @@ class LiteroticaSiteAdapter(BaseSiteAdapter):
|
|||
lang = m.group('lang')
|
||||
if lang not in ('www','other'):
|
||||
self.story.setMetadata('language',lang.capitalize())
|
||||
# reset storyId
|
||||
self.story.setMetadata('storyId',self.parsedUrl.path.split('/',)[-1])
|
||||
# logger.debug("language:%s"%self.story.getMetadata('language'))
|
||||
|
||||
def getCategories(self, soup):
|
||||
if self.getConfig("use_meta_keywords"):
|
||||
categories = soup.find("meta", {"name":"keywords"})['content'].split(',')
|
||||
categories = [c for c in categories if not self.story.getMetadata('title') in c]
|
||||
if self.story.getMetadata('author') in categories:
|
||||
categories.remove(self.story.getMetadata('author'))
|
||||
# logger.debug("Meta = %s" % categories)
|
||||
for category in categories:
|
||||
# logger.debug("\tCategory=%s" % category)
|
||||
# self.story.addToList('category', category.title())
|
||||
self.story.addToList('eroticatags', category.title())
|
||||
## apply clean_chapter_titles
|
||||
def add_chapter(self,chapter_title,url,othermeta={}):
|
||||
if self.getConfig("clean_chapter_titles"):
|
||||
storytitle = self.story.getMetadataRaw('title').lower()
|
||||
chapter_name_type = None
|
||||
# strip trailing ch or pt before doing the chapter clean.
|
||||
# doesn't remove from story title metadata
|
||||
storytitle = re.sub(r'^(.*?)( (ch|pt))?$',r'\1',storytitle)
|
||||
if chapter_title.lower().startswith(storytitle):
|
||||
chapter = chapter_title[len(storytitle):].strip()
|
||||
# logger.debug('\tChapter: "%s"' % chapter)
|
||||
if chapter == '':
|
||||
chapter_title = 'Chapter %d' % (self.num_chapters() + 1)
|
||||
# Sometimes the first chapter does not have type of chapter
|
||||
if self.num_chapters() == 0:
|
||||
# logger.debug('\tChapter: first chapter without chapter type')
|
||||
chapter_name_type = None
|
||||
else:
|
||||
separater_char = chapter[0]
|
||||
# logger.debug('\tseparater_char: "%s"' % separater_char)
|
||||
chapter = chapter[1:].strip() if separater_char in [":", "-"] else chapter
|
||||
# logger.debug('\tChapter: "%s"' % chapter)
|
||||
if chapter.lower().startswith('ch.'):
|
||||
chapter = chapter[len('ch.'):].strip()
|
||||
try:
|
||||
chapter_title = 'Chapter %d' % int(chapter)
|
||||
except:
|
||||
chapter_title = 'Chapter %s' % chapter
|
||||
chapter_name_type = 'Chapter' if chapter_name_type is None else chapter_name_type
|
||||
# logger.debug('\tChapter: chapter_name_type="%s"' % chapter_name_type)
|
||||
elif chapter.lower().startswith('pt.'):
|
||||
chapter = chapter[len('pt.'):].strip()
|
||||
try:
|
||||
chapter_title = 'Part %d' % int(chapter)
|
||||
except:
|
||||
chapter_title = 'Part %s' % chapter
|
||||
chapter_name_type = 'Part' if chapter_name_type is None else chapter_name_type
|
||||
# logger.debug('\tChapter: chapter_name_type="%s"' % chapter_name_type)
|
||||
elif separater_char in [":", "-"]:
|
||||
chapter_title = chapter
|
||||
# logger.debug('\tChapter: taking chapter text as whole')
|
||||
super(LiteroticaSiteAdapter, self).add_chapter(chapter_title,url,othermeta)
|
||||
|
||||
def extractChapterUrlsAndMetadata(self):
|
||||
"""
|
||||
NOTE: Some stories can have versions,
|
||||
e.g. /my-story-ch-05-version-10
|
||||
NOTE: If two stories share the same title, a running index is added,
|
||||
e.g.: /my-story-ch-02-1
|
||||
Strategy:
|
||||
* Go to author's page, search for the current story link,
|
||||
* If it's in a tr.root-story => One-part story
|
||||
* , get metadata and be done
|
||||
* If it's in a tr.sl => Chapter in series
|
||||
* Search up from there until we find a tr.ser-ttl (this is the
|
||||
story)
|
||||
* Gather metadata
|
||||
* Search down from there for all tr.sl until the next
|
||||
tr.ser-ttl, foreach
|
||||
* Chapter link is there
|
||||
In April 2024, site introduced significant changes, including
|
||||
adding a 'Story Series' page and link to it in each chapter.
|
||||
But not all stories, one-shots don't have 'Story Series'.
|
||||
|
||||
literotica has 'Story Series' & 'Story'. FFF calls them 'Story' & 'Chapters'
|
||||
See https://github.com/JimmXinu/FanFicFare/issues/1058#issuecomment-2078490037
|
||||
|
||||
So /series/se/ will be the story URL for multi chapters but
|
||||
keep individual 'chapter' URL for one-shots.
|
||||
"""
|
||||
logger.debug("Chapter/Story URL: <%s> " % self.url)
|
||||
|
||||
if not (self.is_adult or self.getConfig("is_adult")):
|
||||
raise exceptions.AdultCheckRequired(self.url)
|
||||
|
||||
# logger.debug("Chapter/Story URL: <%s> " % self.url)
|
||||
|
||||
(data1,rurl) = self.get_request_redirected(self.url)
|
||||
(data,rurl) = self.get_request_redirected(self.url)
|
||||
# logger.debug(data)
|
||||
## for language domains
|
||||
self._setURL(rurl)
|
||||
logger.debug("set opened url:%s"%self.url)
|
||||
soup1 = self.make_soup(data1)
|
||||
#strip comments from soup
|
||||
[comment.extract() for comment in soup1.findAll(text=lambda text:isinstance(text, Comment))]
|
||||
soup = self.make_soup(data)
|
||||
|
||||
if "This submission is awaiting moderator's approval" in data1:
|
||||
if "This submission is awaiting moderator's approval" in data:
|
||||
raise exceptions.StoryDoesNotExist("This submission is awaiting moderator's approval. %s"%self.url)
|
||||
|
||||
## 2025Feb - domains other than www now use different HTML.
|
||||
## Need to look for two different versions of basically
|
||||
## everything.
|
||||
|
||||
## not series URL, assumed to be a chapter. Look for Story
|
||||
## Info block of post-beta page. I don't think it should happen?
|
||||
if '/series/se' not in self.url:
|
||||
#logger.debug(data)
|
||||
## looking for /series/se URL to indicate this is a
|
||||
## chapter.
|
||||
if not soup.select_one('div.page__aside') and not soup.select_one('div.sidebar') and not soup.select_one('div[class^="_sidebar_"]'):
|
||||
raise exceptions.FailedToDownload("Missing Story Info block, Beta turned off?")
|
||||
|
||||
storyseriestag = soup.select_one('a.bn_av')
|
||||
if not storyseriestag:
|
||||
storyseriestag = soup.select_one('a[class^="_files__link_"]')
|
||||
# logger.debug("Story Series Tag:%s"%storyseriestag)
|
||||
|
||||
if storyseriestag:
|
||||
self._setURL(storyseriestag['href'])
|
||||
data = self.get_request(storyseriestag['href'])
|
||||
# logger.debug(data)
|
||||
soup = self.make_soup(data)
|
||||
# logger.debug(soup)
|
||||
else:
|
||||
logger.debug("One-shot")
|
||||
|
||||
isSingleStory = '/series/se' not in self.url
|
||||
|
||||
if not isSingleStory:
|
||||
# Normilize the url?
|
||||
state = re.findall(r"prefix\=\"/series/\",state='(.+?)'</script>", data)
|
||||
json_state = json.loads(state[0].replace("\\'","'").replace("\\\\","\\"))
|
||||
url_series_id = unicode(re.match(self.getSiteURLPattern(),self.url).group('storyseriesid'))
|
||||
json_series_id = unicode(json_state['series']['data']['id'])
|
||||
if json_series_id != url_series_id:
|
||||
res = re.sub(url_series_id, json_series_id, unicode(self.url))
|
||||
logger.debug("Normalized url: %s"%res)
|
||||
self._setURL(res)
|
||||
|
||||
## common between one-shots and multi-chapters
|
||||
# title
|
||||
self.story.setMetadata('title', stripHTML(soup.select_one('h1')))
|
||||
# logger.debug(self.story.getMetadata('title'))
|
||||
|
||||
# author
|
||||
authora = soup1.find("a", class_="y_eU")
|
||||
## XXX This is still the author URL like:
|
||||
## https://www.literotica.com/stories/memberpage.php?uid=999999&page=submissions
|
||||
## because that's what's on the page. It redirects to the /authors/ page.
|
||||
## Only way I know right now to get the /authors/ is to make
|
||||
## the req and look at the redirect.
|
||||
## Should change to /authors/ if/when it starts appearing.
|
||||
## Assuming it's in the same place.
|
||||
authora = soup.find("a", class_="y_eU")
|
||||
if not authora:
|
||||
authora = soup.select_one('a[class^="_author__title"]')
|
||||
authorurl = authora['href']
|
||||
# logger.debug(authora)
|
||||
# logger.debug(authorurl)
|
||||
self.story.setMetadata('authorId', urlparse.parse_qs(authorurl.split('?')[1])['uid'][0])
|
||||
if authorurl.startswith('//'):
|
||||
authorurl = self.parsedUrl.scheme+':'+authorurl
|
||||
# logger.debug(authora)
|
||||
# logger.debug(authorurl)
|
||||
self.story.setMetadata('author', stripHTML(authora))
|
||||
self.story.setMetadata('authorUrl', authorurl)
|
||||
self.story.setMetadata('author', authora.text)
|
||||
if '?' in authorurl:
|
||||
self.story.setMetadata('authorId', urlparse.parse_qs(authorurl.split('?')[1])['uid'][0])
|
||||
elif '/authors/' in authorurl:
|
||||
self.story.setMetadata('authorId', authorurl.split('/')[-1])
|
||||
else: # if all else fails
|
||||
self.story.setMetadata('authorId', stripHTML(authora))
|
||||
|
||||
# get the author page
|
||||
dataAuth = self.get_request(authorurl)
|
||||
soupAuth = self.make_soup(dataAuth)
|
||||
#strip comments from soup
|
||||
[comment.extract() for comment in soupAuth.findAll(text=lambda text:isinstance(text, Comment))]
|
||||
# logger.debug(soupAuth)
|
||||
if soup.select('div#tabpanel-tags'):
|
||||
# logger.debug("tags1")
|
||||
self.story.extendList('eroticatags', [ stripHTML(t).title() for t in soup.select('div#tabpanel-tags a.av_as') ])
|
||||
if soup.select('div[class^="_widget__tags_"]'):
|
||||
# logger.debug("tags2")
|
||||
self.story.extendList('eroticatags', [ stripHTML(t).title() for t in soup.select('div[class^="_widget__tags_"] a[class^="_tag_item_"]') ])
|
||||
# logger.debug(self.story.getList('eroticatags'))
|
||||
|
||||
## Find link to url in author's page
|
||||
## site has started using //domain.name/asdf urls remove https?: from front
|
||||
## site has started putting https back on again.
|
||||
## site is now using language specific german.lit... etc on author pages.
|
||||
## site is now back to using www.lit... etc on author pages.
|
||||
search_url_re = r"https?://"+LANG_RE+r"(\.i)?\." + re.escape(self.getSiteDomain()) + self.url[self.url.index('/s/'):]+r"$"
|
||||
logger.debug(search_url_re)
|
||||
storyLink = soupAuth.find('a', href=re.compile(search_url_re))
|
||||
# storyLink = soupAuth.find('a', href=re.compile(r'.*literotica.com/s/'+re.escape(self.story.getMetadata('storyId')) ))
|
||||
# storyLink = soupAuth.find('a', href=re.compile(r'(https?:)?'+re.escape(self.url[self.url.index(':')+1:]).replace(r'www',r'[^\.]+') ))
|
||||
# storyLink = soupAuth.find('a', href=self.url)#[self.url.index(':')+1:])
|
||||
## look first for 'Series Introduction', then Info panel short desc
|
||||
## series can have either, so put in common code.
|
||||
desc = []
|
||||
introtag = soup.select_one('div.bp_rh')
|
||||
descdiv = soup.select_one('div#tabpanel-info div.bn_B') or \
|
||||
soup.select_one('div[class^="_tab__pane_"] div[class^="_widget__info_"]')
|
||||
if introtag and stripHTML(introtag):
|
||||
# make sure there's something in the tag.
|
||||
# logger.debug("intro %s"%introtag)
|
||||
desc.append(unicode(introtag))
|
||||
elif descdiv and stripHTML(descdiv):
|
||||
# make sure there's something in the tag.
|
||||
# logger.debug("desc %s"%descdiv)
|
||||
desc.append(unicode(descdiv))
|
||||
if not desc or self.getConfig("include_chapter_descriptions_in_summary"):
|
||||
## Only for backward compatibility with 'stories' that
|
||||
## don't have an intro or short desc.
|
||||
descriptions = []
|
||||
for i, chapterdesctag in enumerate(soup.select('p.br_rk')):
|
||||
# remove category link, but only temporarily
|
||||
a = chapterdesctag.a.extract()
|
||||
descriptions.append("%d. %s" % (i + 1, stripHTML(chapterdesctag)))
|
||||
# now put it back--it's used below
|
||||
chapterdesctag.append(a)
|
||||
desc.append(unicode("<p>"+"</p>\n<p>".join(descriptions)+"</p>"))
|
||||
|
||||
if storyLink is not None:
|
||||
# pull the published date from the author page
|
||||
# default values from single link. Updated below if multiple chapter.
|
||||
# logger.debug("Found story on the author page.")
|
||||
date = storyLink.parent.parent.findAll('td')[-1].text
|
||||
self.story.setMetadata('datePublished', makeDate(date, self.dateformat))
|
||||
self.story.setMetadata('dateUpdated',makeDate(date, self.dateformat))
|
||||
|
||||
if storyLink is not None:
|
||||
urlTr = storyLink.parent.parent
|
||||
if "sl" in urlTr['class']:
|
||||
isSingleStory = False
|
||||
else:
|
||||
isSingleStory = True
|
||||
else:
|
||||
raise exceptions.FailedToDownload("Couldn't find story <%s> on author's page <%s>" % (self.url, authorurl))
|
||||
self.setDescription(self.url,u''.join(desc))
|
||||
|
||||
if isSingleStory:
|
||||
self.story.setMetadata('title', storyLink.text.strip('/'))
|
||||
# logger.debug('Title: "%s"' % storyLink.text.strip('/'))
|
||||
self.setDescription(authorurl, urlTr.findAll("td")[1].text)
|
||||
self.story.addToList('category', urlTr.findAll("td")[2].text)
|
||||
# self.story.addToList('eroticatags', urlTr.findAll("td")[2].text)
|
||||
date = urlTr.findAll('td')[-1].text
|
||||
self.story.setMetadata('datePublished', makeDate(date, self.dateformat))
|
||||
self.story.setMetadata('dateUpdated',makeDate(date, self.dateformat))
|
||||
self.add_chapter(storyLink.text, self.url)
|
||||
averrating = stripHTML(storyLink.parent)
|
||||
## title (0.00)
|
||||
averrating = averrating[averrating.rfind('(')+1:averrating.rfind(')')]
|
||||
try:
|
||||
self.story.setMetadata('averrating', float(averrating))
|
||||
except:
|
||||
pass
|
||||
# self.story.setMetadata('averrating',averrating)
|
||||
# parse out the list of chapters
|
||||
## one-shots don't *display* date info, but they have it
|
||||
## hidden in <script>
|
||||
## shows _date_approve "date_approve":"01/31/2024"
|
||||
|
||||
## multichap also have "date_approve", but they have
|
||||
## several and they're more than just the story chapters.
|
||||
date = re.search(r'"date_approve":"(\d\d/\d\d/\d\d\d\d)"',data)
|
||||
if not date:
|
||||
date = re.search(r'date_approve:"(\d\d/\d\d/\d\d\d\d)"',data)
|
||||
if date:
|
||||
dateval = makeDate(date.group(1), self.dateformat)
|
||||
self.story.setMetadata('datePublished', dateval)
|
||||
self.story.setMetadata('dateUpdated', dateval)
|
||||
|
||||
## one-shots don't have same json data to get aver_rating
|
||||
## from below. This kludge matches the data_approve
|
||||
rateall = re.search(r'rate_all:([\d\.]+)',data)
|
||||
if rateall:
|
||||
self.story.setMetadata('averrating', '%4.2f' % float(rateall.group(1)))
|
||||
|
||||
## one-shots assumed completed.
|
||||
self.story.setMetadata('status','Completed')
|
||||
|
||||
# Add the category from the breadcumb.
|
||||
breadcrumbs = soup.find('div', id='BreadCrumbComponent')
|
||||
if not breadcrumbs:
|
||||
breadcrumbs = soup.select_one('ul[class^="_breadcrumbs_list_"]')
|
||||
if not breadcrumbs:
|
||||
# _breadcrumbs_18u7l_1
|
||||
breadcrumbs = soup.select_one('nav[class^="_breadcrumbs_"]')
|
||||
self.story.addToList('category', breadcrumbs.find_all('a')[1].string)
|
||||
|
||||
## one-shot chapter
|
||||
self.add_chapter(self.story.getMetadata('title'), self.url)
|
||||
|
||||
else:
|
||||
seriesTr = urlTr.previousSibling
|
||||
while 'ser-ttl' not in seriesTr['class']:
|
||||
seriesTr = seriesTr.previousSibling
|
||||
m = re.match(r"^(?P<title>.*?):\s(?P<numChapters>\d+)\sPart\sSeries$", seriesTr.find("strong").text)
|
||||
self.story.setMetadata('title', m.group('title'))
|
||||
seriesTitle = m.group('title')
|
||||
## Multi-chapter stories. AKA multi-part 'Story Series'.
|
||||
bn_antags = soup.select('div#tabpanel-info p.bn_an')
|
||||
# logger.debug(bn_antags)
|
||||
if bn_antags and not self.getConfig("dates_from_chapters"):
|
||||
## Use dates from series metadata unless dates_from_chapters is enabled
|
||||
dates = []
|
||||
for datetag in bn_antags[:2]:
|
||||
datetxt = stripHTML(datetag)
|
||||
# remove 'Started:' 'Updated:'
|
||||
# Assume can't use 'Started:' 'Updated:' (vs [0] or [1]) because of lang localization
|
||||
datetxt = datetxt[datetxt.index(':')+1:]
|
||||
dates.append(datetxt)
|
||||
# logger.debug(dates)
|
||||
self.story.setMetadata('datePublished', makeDate(dates[0], self.dateformat))
|
||||
self.story.setMetadata('dateUpdated', makeDate(dates[1], self.dateformat))
|
||||
|
||||
## Walk the chapters
|
||||
chapterTr = seriesTr.nextSibling
|
||||
dates = []
|
||||
descriptions = []
|
||||
ratings = []
|
||||
chapters = []
|
||||
chapter_name_type = None
|
||||
while chapterTr is not None and 'sl' in chapterTr['class']:
|
||||
description = "%d. %s" % (len(descriptions)+1,stripHTML(chapterTr.findAll("td")[1]))
|
||||
description = stripHTML(chapterTr.findAll("td")[1])
|
||||
chapterLink = chapterTr.find("td", "fc").find("a")
|
||||
if self.getConfig('chapter_categories_use_all'):
|
||||
self.story.addToList('category', chapterTr.findAll("td")[2].text)
|
||||
# self.story.addToList('eroticatags', chapterTr.findAll("td")[2].text)
|
||||
pub_date = makeDate(chapterTr.findAll('td')[-1].text, self.dateformat)
|
||||
dates.append(pub_date)
|
||||
chapterTr = chapterTr.nextSibling
|
||||
## bn_antags[2] contains "The author has completed this series." or "The author is still actively writing this series."
|
||||
## I won't be surprised if this breaks later because of lang localization
|
||||
if "completed" in stripHTML(bn_antags[-1]):
|
||||
self.story.setMetadata('status','Completed')
|
||||
else:
|
||||
self.story.setMetadata('status','In-Progress')
|
||||
|
||||
chapter_title = chapterLink.text
|
||||
if self.getConfig("clean_chapter_titles"):
|
||||
# logger.debug('\tChapter Name: "%s"' % chapterLink.text)
|
||||
if chapterLink.text.lower().startswith(seriesTitle.lower()):
|
||||
chapter = chapterLink.text[len(seriesTitle):].strip()
|
||||
# logger.debug('\tChapter: "%s"' % chapter)
|
||||
if chapter == '':
|
||||
chapter_title = 'Chapter %d' % (self.num_chapters() + 1)
|
||||
# Sometimes the first chapter does not have type of chapter
|
||||
if self.num_chapters() == 0:
|
||||
logger.debug('\tChapter: first chapter without chapter type')
|
||||
chapter_name_type = None
|
||||
else:
|
||||
separater_char = chapter[0]
|
||||
# logger.debug('\tseparater_char: "%s"' % separater_char)
|
||||
chapter = chapter[1:].strip() if separater_char in [":", "-"] else chapter
|
||||
# logger.debug('\tChapter: "%s"' % chapter)
|
||||
if chapter.lower().startswith('ch.'):
|
||||
chapter = chapter[len('ch.'):].strip()
|
||||
try:
|
||||
chapter_title = 'Chapter %d' % int(chapter)
|
||||
except:
|
||||
chapter_title = 'Chapter %s' % chapter
|
||||
chapter_name_type = 'Chapter' if chapter_name_type is None else chapter_name_type
|
||||
logger.debug('\tChapter: chapter_name_type="%s"' % chapter_name_type)
|
||||
elif chapter.lower().startswith('pt.'):
|
||||
chapter = chapter[len('pt.'):]
|
||||
try:
|
||||
chapter_title = 'Part %d' % int(chapter)
|
||||
except:
|
||||
chapter_title = 'Part %s' % chapter
|
||||
chapter_name_type = 'Part' if chapter_name_type is None else chapter_name_type
|
||||
logger.debug('\tChapter: chapter_name_type="%s"' % chapter_name_type)
|
||||
elif separater_char in [":", "-"]:
|
||||
chapter_title = chapter
|
||||
logger.debug('\tChapter: taking chapter text as whole')
|
||||
## category from chapter list
|
||||
self.story.extendList('category',[ stripHTML(t) for t in soup.select('a.br_rl') ])
|
||||
|
||||
# pages include full URLs.
|
||||
chapurl = chapterLink['href']
|
||||
if chapurl.startswith('//'):
|
||||
chapurl = self.parsedUrl.scheme + ':' + chapurl
|
||||
for chapteratag in soup.select('a.br_rj'):
|
||||
chapter_title = stripHTML(chapteratag)
|
||||
# logger.debug('\tChapter: "%s"' % chapteratag)
|
||||
# /series/se does include full URLs current.
|
||||
chapurl = chapteratag['href']
|
||||
# logger.debug("Chapter URL: " + chapurl)
|
||||
# logger.debug("Chapter Title: " + chapter_title)
|
||||
# logger.debug("Chapter description: " + description)
|
||||
chapters.append((chapter_title, chapurl, description, pub_date))
|
||||
# self.add_chapter(chapter_title, chapurl)
|
||||
numrating = stripHTML(chapterLink.parent)
|
||||
## title (0.00)
|
||||
numrating = numrating[numrating.rfind('(')+1:numrating.rfind(')')]
|
||||
try:
|
||||
ratings.append(float(numrating))
|
||||
except:
|
||||
pass
|
||||
self.add_chapter(chapter_title, chapurl)
|
||||
|
||||
if self.getConfig("clean_chapter_titles") \
|
||||
and chapter_name_type is not None \
|
||||
and not chapters[0][0].startswith(chapter_name_type):
|
||||
logger.debug('\tChapter: chapter_name_type="%s"' % chapter_name_type)
|
||||
logger.debug('\tChapter: first chapter="%s"' % chapters[0][0])
|
||||
logger.debug('\tChapter: first chapter number="%s"' % chapters[0][0][len('Chapter'):])
|
||||
chapters[0] = ("%s %s" % (chapter_name_type, chapters[0][0][len('Chapter'):].strip()),
|
||||
chapters[0][1],
|
||||
chapters[0][2],
|
||||
chapters[0][3]
|
||||
)
|
||||
# <img src="https://uploads.literotica.com/series/cover/813-1695143444-desktop-x1.jpg" alt="Series cover">
|
||||
coverimg = soup.select_one('img[alt="Series cover"]')
|
||||
if coverimg:
|
||||
self.setCoverImage(self.url,coverimg['src'])
|
||||
|
||||
if self.getConfig("order_chapters_by_date"):
|
||||
chapters = sorted(chapters, key=lambda chapter: chapter[3])
|
||||
for i, chapter in enumerate(chapters):
|
||||
self.add_chapter(chapter[0], chapter[1])
|
||||
descriptions.append("%d. %s" % (i + 1, chapter[2]))
|
||||
## Set the oldest date as publication date, the newest as update date
|
||||
dates.sort()
|
||||
self.story.setMetadata('datePublished', dates[0])
|
||||
self.story.setMetadata('dateUpdated', dates[-1])
|
||||
## Set description to joint chapter descriptions
|
||||
self.setDescription(authorurl,"<p>"+"</p>\n<p>".join(descriptions)+"</p>")
|
||||
#### Attempting averrating from JS metadata.
|
||||
#### also alternate chapters from json
|
||||
try:
|
||||
state_start="state='"
|
||||
state_end="'</script>"
|
||||
i = data.index(state_start)
|
||||
if i:
|
||||
state = data[i+len(state_start):data.index(state_end,i)].replace("\\'","'").replace("\\\\","\\")
|
||||
if state:
|
||||
# logger.debug(state)
|
||||
json_state = json.loads(state)
|
||||
# logger.debug(json.dumps(json_state, sort_keys=True,indent=2, separators=(',', ':')))
|
||||
all_rates = []
|
||||
if 'series' in json_state:
|
||||
all_rates = [ float(x['rate_all']) for x in json_state['series']['works'] ]
|
||||
|
||||
if len(ratings) > 0:
|
||||
self.story.setMetadata('averrating','%4.2f' % (sum(ratings) / float(len(ratings))))
|
||||
## Extract dates from chapter approval dates if dates_from_chapters is enabled
|
||||
if self.getConfig("dates_from_chapters"):
|
||||
date_approvals = []
|
||||
for work in json_state['series']['works']:
|
||||
if 'date_approve' in work:
|
||||
try:
|
||||
date_approvals.append(makeDate(work['date_approve'], self.dateformat))
|
||||
except:
|
||||
pass
|
||||
if date_approvals:
|
||||
# Oldest date is published, newest is updated
|
||||
date_approvals.sort()
|
||||
self.story.setMetadata('datePublished', date_approvals[0])
|
||||
self.story.setMetadata('dateUpdated', date_approvals[-1])
|
||||
if all_rates:
|
||||
self.story.setMetadata('averrating', '%4.2f' % (sum(all_rates) / float(len(all_rates))))
|
||||
|
||||
# normalize on first chapter URL.
|
||||
self._setURL(self.get_chapter(0,'url'))
|
||||
## alternate chapters from JSON
|
||||
if self.num_chapters() < 1:
|
||||
logger.debug("Getting Chapters from series JSON")
|
||||
seriesid = json_state.get('series',{}).get('data',{}).get('id',None)
|
||||
if seriesid:
|
||||
logger.info("Fetching chapter data from JSON")
|
||||
logger.debug(seriesid)
|
||||
series_json = json.loads(self.get_request('https://literotica.com/api/3/series/%s/works'%seriesid))
|
||||
# logger.debug(json.dumps(series_json, sort_keys=True,indent=2, separators=(',', ':')))
|
||||
for chap in series_json:
|
||||
self.add_chapter(chap['title'], 'https://www.literotica.com/s/'+chap['url'])
|
||||
|
||||
# reset storyId to first chapter.
|
||||
self.story.setMetadata('storyId',self.parsedUrl.path.split('/',)[2])
|
||||
## Collect tags from series/story page if tags_from_chapters is enabled
|
||||
if self.getConfig("tags_from_chapters"):
|
||||
self.story.extendList('eroticatags', [ unicode(t['tag']).title() for t in chap['tags'] ])
|
||||
|
||||
|
||||
# Add the category from the breadcumb. This might duplicate a category already added.
|
||||
self.story.addToList('category', soup1.find('div', id='BreadCrumbComponent').findAll('a')[1].string)
|
||||
self.getCategories(soup1)
|
||||
except Exception as e:
|
||||
logger.warning("Processing JSON failed. (%s)"%e)
|
||||
|
||||
## Features removed because not supportable by new site form:
|
||||
## averrating metadata entry
|
||||
## order_chapters_by_date option
|
||||
## use_meta_keywords option
|
||||
return
|
||||
|
||||
|
||||
def getPageText(self, raw_page, url):
|
||||
# logger.debug('Getting page text')
|
||||
# logger.debug(soup)
|
||||
logger.debug('Getting page text')
|
||||
raw_page = raw_page.replace('<div class="b-story-body-x x-r15"><div><p>','<div class="b-story-body-x x-r15"><div>')
|
||||
# logger.debug("\tChapter text: %s" % raw_page)
|
||||
# logger.debug("\tChapter text: %s" % raw_page)
|
||||
page_soup = self.make_soup(raw_page)
|
||||
[comment.extract() for comment in page_soup.findAll(text=lambda text:isinstance(text, Comment))]
|
||||
story2 = page_soup.find('div', 'aa_ht').div
|
||||
# logger.debug('getPageText - story2: %s' % story2)
|
||||
|
||||
fullhtml = unicode(story2)
|
||||
# logger.debug(fullhtml)
|
||||
# Strip some starting and ending tags,
|
||||
fullhtml = re.sub(r'^<div.*?>', r'', fullhtml)
|
||||
fullhtml = re.sub(r'</div>$', r'', fullhtml)
|
||||
fullhtml = re.sub(r'<p></p>$', r'', fullhtml)
|
||||
# logger.debug('getPageText - fullhtml: %s' % fullhtml)
|
||||
[comment.extract() for comment in page_soup.find_all(string=lambda text:isinstance(text, Comment))]
|
||||
fullhtml = ""
|
||||
for aa_ht_div in page_soup.find_all('div', 'aa_ht') + page_soup.select('div[class^="_article__content_"]'):
|
||||
if aa_ht_div.div:
|
||||
html = unicode(aa_ht_div.div)
|
||||
# Strip some starting and ending tags,
|
||||
html = re.sub(r'^<div.*?>', r'', html)
|
||||
html = re.sub(r'</div>$', r'', html)
|
||||
html = re.sub(r'<p></p>$', r'', html)
|
||||
fullhtml = fullhtml + html
|
||||
# logger.debug('getPageText - fullhtml: %s' % fullhtml)
|
||||
return fullhtml
|
||||
|
||||
def getChapterText(self, url):
|
||||
|
|
@ -352,9 +432,15 @@ class LiteroticaSiteAdapter(BaseSiteAdapter):
|
|||
raw_page = self.get_request(url)
|
||||
page_soup = self.make_soup(raw_page)
|
||||
pages = page_soup.find('div',class_='l_bH')
|
||||
if not pages:
|
||||
pages = page_soup.select_one('div._pagination_h0sum_1')
|
||||
if not pages:
|
||||
pages = page_soup.select_one('div.clearfix.panel._pagination_1400x_1')
|
||||
if not pages:
|
||||
pages = page_soup.select_one('div[class^="panel clearfix _pagination_"]')
|
||||
# logger.debug(pages)
|
||||
|
||||
fullhtml = ""
|
||||
self.getCategories(page_soup)
|
||||
chapter_description = ''
|
||||
if self.getConfig("description_in_chapter"):
|
||||
chapter_description = page_soup.find("meta", {"name" : "description"})['content']
|
||||
|
|
@ -365,7 +451,10 @@ class LiteroticaSiteAdapter(BaseSiteAdapter):
|
|||
## look for highest numbered page, they're not all listed
|
||||
## when there are many.
|
||||
|
||||
last_page_link = pages.find_all('a', class_='l_bJ')[-1]
|
||||
last_page_links = pages.find_all('a', class_='l_bJ')
|
||||
if not last_page_links:
|
||||
last_page_links = pages.select('a[class^="_pagination__item_"]')
|
||||
last_page_link = last_page_links[-1]
|
||||
last_page_no = int(urlparse.parse_qs(last_page_link['href'].split('?')[1])['page'][0])
|
||||
# logger.debug(last_page_no)
|
||||
for page_no in range(2, last_page_no+1):
|
||||
|
|
@ -374,7 +463,7 @@ class LiteroticaSiteAdapter(BaseSiteAdapter):
|
|||
raw_page = self.get_request(page_url)
|
||||
fullhtml += self.getPageText(raw_page, url)
|
||||
|
||||
# logger.debug(fullhtml)
|
||||
#logger.debug(fullhtml)
|
||||
page_soup = self.make_soup(fullhtml)
|
||||
fullhtml = self.utf8FromSoup(url, self.make_soup(fullhtml))
|
||||
fullhtml = chapter_description + fullhtml
|
||||
|
|
@ -382,6 +471,123 @@ class LiteroticaSiteAdapter(BaseSiteAdapter):
|
|||
|
||||
return fullhtml
|
||||
|
||||
def get_urls_from_page(self,url,normalize):
|
||||
from ..geturls import get_urls_from_html
|
||||
|
||||
## hook for logins, etc.
|
||||
self.before_get_urls_from_page(url,normalize)
|
||||
|
||||
# this way it uses User-Agent or other special settings.
|
||||
data = self.get_request(url,usecache=False)
|
||||
soup = self.make_soup(data)
|
||||
|
||||
page_urls = get_urls_from_html(soup, url, configuration=self.configuration, normalize=normalize)
|
||||
|
||||
if not self.getConfig("fetch_stories_from_api",True):
|
||||
logger.debug('fetch_stories_from_api Not enabled')
|
||||
return {'urllist': page_urls}
|
||||
|
||||
user_story_list = re.search(r'literotica\.com/authors/.+?/lists\?listid=(?P<list_id>\d+)', url)
|
||||
fav_authors = re.search(r'literotica\.com/authors/.+?/favorites', url)
|
||||
written = re.search(r'literotica.com/authors/.+?/works/', url)
|
||||
logger.debug((bool(user_story_list), bool(fav_authors), bool(written)))
|
||||
|
||||
# If the url is not supported
|
||||
if not user_story_list and not fav_authors and not written:
|
||||
logger.debug('No supported link. %s', url)
|
||||
return {'urllist':page_urls}
|
||||
|
||||
# Grabbing the main list where chapters are contained.
|
||||
if user_story_list:
|
||||
js_story_list = re.search(r';\$R\[\d+?\]\(\$R\[\d+?\],\$R\[\d+?\]\);\$R\[\d+?\]\(\$R\[\d+?\],\$R\[\d+?\]=\{success:!\d,current_page:(?P<current_page>\d+?),last_page:(?P<last_page>\d+?),total:\d+?,per_page:\d+,(has_series:!\d)?data:\$R\[\d+?\]=\[\$R\[\d+?\]=(?P<data>.+)\}\]\}\);', data) # }] } } }); \$R\[\d+?\]\(\$R\[\d+?\],\$R\[\d+?\]\);\$R\[\d+?]\(\$R\[\d+?\],\$R\[\d+?\]=\{sliders:
|
||||
logger.debug('user_story_list ID [%s]'%user_story_list.group('list_id'))
|
||||
else:
|
||||
js_story_list = re.search(r'\$R\[\d+?\]\(\$R\[\d+?\],\$R\[\d+?\]={current_page:(?P<current_page>\d+?),last_page:(?P<last_page>\d+?),total:\d+?,per_page:\d+,(has_series:!\d,)?data:\$R\[\d+\]=\[\$R\[\d+\]=\{(?!aim)(?P<data>.+)\}\);_\$HY\.r\[', data)
|
||||
|
||||
# In case the regex becomes outdated
|
||||
if not js_story_list:
|
||||
logger.debug('Failed to grab data from the js.')
|
||||
return {'urllist':page_urls}
|
||||
|
||||
user = None
|
||||
script_tags = soup.find_all('script')
|
||||
for script in script_tags:
|
||||
if not script.string:
|
||||
continue
|
||||
# Getting author from the js.
|
||||
user = re.search(r'_\$HY\.r\[\"AuthorQuery\[\\\"(?P<author>.+?)\\\"\]\"\]', script.string)
|
||||
if user != None:
|
||||
logger.debug("User: [%s]"%user.group('author'))
|
||||
break
|
||||
else:
|
||||
logger.debug('Failed to get a username')
|
||||
return {'urllist': page_urls}
|
||||
|
||||
# Extract the current (should be 1) and last page numbers from the js.
|
||||
logger.debug("Pages %s/%s"%(js_story_list.group('current_page'), js_story_list.group('last_page')))
|
||||
|
||||
urls = []
|
||||
# Necessary to format a proper link as there were no visible data specifying what kind of link that should be.
|
||||
cat_to_link = {'adult-comics': 'i', 'erotic-art': 'i', 'illustrated-poetry': 'p', 'erotic-audio-poetry': 'p', 'erotic-poetry': 'p', 'non-erotic-poetry': 'p'}
|
||||
stories_found = re.findall(r"category_info:\$R\[.*?type:\".+?\",pageUrl:\"(.+?)\"}.+?,type:\"(.+?)\",url:\"(.+?)\",", js_story_list.group('data'))
|
||||
for story in stories_found:
|
||||
story_category, story_type, story_url = story
|
||||
urls.append('https://www.literotica.com/%s/%s'%(cat_to_link.get(story_category, 's'), story_url))
|
||||
|
||||
# Removes the duplicates
|
||||
seen = set()
|
||||
urls = [x for x in (page_urls + urls) if not (x in seen or seen.add(x))]
|
||||
logger.debug("Found [%s] stories so far."%len(urls))
|
||||
|
||||
# Sometimes the rest of the stories are burried in the js so no fetching in necessery.
|
||||
if js_story_list.group('last_page') == js_story_list.group('current_page'):
|
||||
return {'urllist': urls}
|
||||
|
||||
user = urlparse.quote(user.group(1))
|
||||
logger.debug("Escaped user: [%s]"%user)
|
||||
|
||||
if written:
|
||||
category = re.search(r"_\$HY\.r\[\"AuthorSeriesAndWorksQuery\[\\\".+?\\\",\\\"\D+?\\\",\\\"(?P<type>\D+?)\\\"\]\"\]=\$R\[\d+?\]=\$R\[\d+?\]\(\$R\[\d+?\]=\{", data)
|
||||
elif fav_authors:
|
||||
category = re.search(r"_\$HY\.r\[\"AuthorFavoriteWorksQuery\[\\\".+?\\\",\\\"(?P<type>\D+?)\\\",\d\]\"\]=\$R\[\d+?\]=\$R\[\d+?\]\(\$R\[\d+?\]={", data)
|
||||
|
||||
if not user_story_list and not category:
|
||||
logger.debug("Type of works not found")
|
||||
return {'urllist': urls}
|
||||
|
||||
last_page = int(js_story_list.group('last_page'))
|
||||
current_page = int(js_story_list.group('current_page')) + 1
|
||||
# Fetching the remaining urls from api. Can't trust the number given about the pages left from a website. Sometimes even the api returns outdated number of pages.
|
||||
while current_page <= last_page:
|
||||
i = len(urls)
|
||||
logger.debug("Pages %s/%s"%(current_page, int(last_page)))
|
||||
if fav_authors:
|
||||
jsn = self.get_request('https://literotica.com/api/3/users/{}/favorite/works?params=%7B%22page%22%3A{}%2C%22pageSize%22%3A50%2C%22type%22%3A%22{}%22%2C%22withSeriesDetails%22%3Atrue%7D'.format(user, current_page, category.group('type')))
|
||||
elif user_story_list:
|
||||
jsn = self.get_request('https://literotica.com/api/3/users/{}/list/{}?params=%7B%22page%22%3A{}%2C%22pageSize%22%3A50%2C%22withSeriesDetails%22%3Atrue%7D'.format(user, user_story_list.group('list_id'), current_page))
|
||||
else:
|
||||
jsn = self.get_request('https://literotica.com/api/3/users/{}/series_and_works?params=%7B%22page%22%3A{}%2C%22pageSize%22%3A50%2C%22sort%22%3A%22date%22%2C%22type%22%3A%22{}%22%2C%22listType%22%3A%22expanded%22%7D'.format(user, current_page, category.group('type')))
|
||||
|
||||
urls_data = json.loads(jsn)
|
||||
last_page = urls_data["last_page"]
|
||||
current_page = int(urls_data["current_page"]) + 1
|
||||
for story in urls_data['data']:
|
||||
#logger.debug('parts' in story)
|
||||
if story['url'] and story.get('work_count') == None:
|
||||
urls.append('https://www.literotica.com/%s/%s'%(cat_to_link.get(story["category_info"]["pageUrl"], 's'), str(story['url'])))
|
||||
continue
|
||||
# Most of the time series has no url specified and contains all of the story links belonging to the series
|
||||
urls.append('https://www.literotica.com/series/se/%s'%str(story['id']))
|
||||
for series_story in story['parts']:
|
||||
urls.append('https://www.literotica.com/%s/%s'%(cat_to_link.get(series_story["category_info"]["pageUrl"], 's'), str(series_story['url'])))
|
||||
logger.debug("Found [%s] stories."%(len(urls) - i))
|
||||
|
||||
# Again removing duplicates.
|
||||
seen = set()
|
||||
urls = [x for x in urls if not (x in seen or seen.add(x))]
|
||||
|
||||
logger.debug("Found total of [%s] stories"%len(urls))
|
||||
return {'urllist':urls}
|
||||
|
||||
def getClass():
|
||||
return LiteroticaSiteAdapter
|
||||
|
|
|
|||
|
|
@ -1,347 +0,0 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2011 Fanficdownloader team, 2018 FanFicFare team
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
##############################################################################
|
||||
### Adapted by GComyn
|
||||
### Completed on November, 22, 2016
|
||||
##############################################################################
|
||||
from __future__ import absolute_import
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import re
|
||||
from ..htmlcleanup import stripHTML
|
||||
from .. import exceptions as exceptions
|
||||
|
||||
# py2 vs py3 transition
|
||||
from ..six import text_type as unicode
|
||||
|
||||
from .base_adapter import BaseSiteAdapter, makeDate
|
||||
|
||||
class LOTRgficComAdapter(BaseSiteAdapter):
|
||||
|
||||
def __init__(self, config, url):
|
||||
BaseSiteAdapter.__init__(self, config, url)
|
||||
|
||||
self.story.setMetadata('siteabbrev','lotrgfic')
|
||||
|
||||
self.username = "NoneGiven" # if left empty, site doesn't return any message at all.
|
||||
self.password = ""
|
||||
self.is_adult=False
|
||||
|
||||
# get storyId from url--url validation guarantees query is only sid=1234
|
||||
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
|
||||
|
||||
|
||||
# normalized story URL.
|
||||
self._setURL('https://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId'))
|
||||
|
||||
|
||||
@staticmethod
|
||||
def getSiteDomain():
|
||||
return 'www.lotrgfic.com'
|
||||
|
||||
@classmethod
|
||||
def getSiteExampleURLs(cls):
|
||||
return "https://"+cls.getSiteDomain()+"/viewstory.php?sid=1234"
|
||||
|
||||
def getSiteURLPattern(self):
|
||||
return r"https?://"+re.escape(self.getSiteDomain()+"/viewstory.php?sid=")+r"\d+$"
|
||||
|
||||
def extractChapterUrlsAndMetadata(self):
|
||||
|
||||
if self.is_adult or self.getConfig("is_adult"):
|
||||
addurl = "&warning=3"
|
||||
else:
|
||||
addurl=""
|
||||
|
||||
url = self.url+'&index=1'+addurl
|
||||
logger.debug("URL: "+url)
|
||||
|
||||
data = self.get_request(url)
|
||||
|
||||
if "Content is only suitable for mature adults. May contain explicit language and adult themes. Equivalent of NC-17." in data:
|
||||
raise exceptions.AdultCheckRequired(self.url)
|
||||
elif "Access denied. This story has not been validated by the adminstrators of this site." in data:
|
||||
raise exceptions.AccessDenied(self.getSiteDomain() +" says: Access denied. This story has not been validated by the adminstrators of this site.")
|
||||
|
||||
soup = self.make_soup(data)
|
||||
|
||||
### Main Content for the Table Of Contents page.
|
||||
div = soup.find('div',{'id':'maincontent'})
|
||||
|
||||
divfooter = div.find('div',{'id':'footer'})
|
||||
if divfooter != None:
|
||||
divfooter.extract()
|
||||
|
||||
## Title
|
||||
a = div.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$"))
|
||||
self.story.setMetadata('title',stripHTML(a))
|
||||
|
||||
# Find authorid and URL from... author url.
|
||||
a = div.find('a', href=re.compile(r"viewuser.php"))
|
||||
self.story.setMetadata('authorId',a['href'].split('=')[1])
|
||||
self.story.setMetadata('authorUrl','https://'+self.host+'/'+a['href'])
|
||||
self.story.setMetadata('author',a.string)
|
||||
|
||||
# Find the chapters:
|
||||
for chapter in div.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+r"&chapter=\d+$")):
|
||||
# just in case there's tags, like <i> in chapter titles.
|
||||
self.add_chapter(chapter,'https://'+self.host+'/'+chapter['href']+addurl)
|
||||
|
||||
|
||||
### Metadata is contained
|
||||
|
||||
def defaultGetattr(d,k):
|
||||
try:
|
||||
return d[k]
|
||||
except:
|
||||
return ""
|
||||
|
||||
# <span class="label">Rated:</span> NC-17<br /> etc
|
||||
### This site has the metadata formatted all over the place,
|
||||
### so we have to do some very cludgy programming to get it.
|
||||
### If someone can do it better, please do so, and let us know.
|
||||
## I'm going to leave this section in, so we can get those
|
||||
## elements that are "formatted correctly".
|
||||
labels = soup.findAll('span',{'class':'label'})
|
||||
for labelspan in labels:
|
||||
value = labelspan.nextSibling
|
||||
label = labelspan.string
|
||||
|
||||
if 'Summary' in label:
|
||||
## the summary is not encased in a span label... so we can't do anything here.
|
||||
## I'm going to leave it here just in case.
|
||||
## Everything until the next span class='label'
|
||||
svalue = ''
|
||||
while value and 'label' not in defaultGetattr(value,'class'):
|
||||
svalue += unicode(value)
|
||||
value = value.nextSibling
|
||||
# sometimes poorly formated desc (<p> w/o </p>) leads
|
||||
# to all labels being included.
|
||||
svalue=svalue[:svalue.find('<span class="label">')]
|
||||
self.setDescription(url,svalue)
|
||||
|
||||
if 'Rated' in label:
|
||||
self.story.setMetadata('rating', value)
|
||||
|
||||
if 'Word count' in label:
|
||||
self.story.setMetadata('numWords', value)
|
||||
|
||||
if 'Categories' in label:
|
||||
cats = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=categories'))
|
||||
catstext = [cat.string for cat in cats]
|
||||
for cat in catstext:
|
||||
self.story.addToList('category',cat.string)
|
||||
|
||||
if 'Characters' in label:
|
||||
chars = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=characters'))
|
||||
charstext = [char.string for char in chars]
|
||||
for char in charstext:
|
||||
self.story.addToList('characters',char.string)
|
||||
|
||||
if 'Genre' in label:
|
||||
genres = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=1'))
|
||||
genrestext = [genre.string for genre in genres]
|
||||
self.genre = ', '.join(genrestext)
|
||||
for genre in genrestext:
|
||||
self.story.addToList('genre',genre.string)
|
||||
|
||||
if 'Warnings' in label:
|
||||
warnings = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=4'))
|
||||
warningstext = [warning.string for warning in warnings]
|
||||
self.warning = ', '.join(warningstext)
|
||||
for warning in warningstext:
|
||||
self.story.addToList('warnings',warning.string)
|
||||
|
||||
if 'Places' in label:
|
||||
places = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=2'))
|
||||
placestext = [place.string for place in places]
|
||||
self.warning = ', '.join(placestext)
|
||||
for place in placestext:
|
||||
self.story.addToList('places',place.string)
|
||||
|
||||
if 'Times' in label:
|
||||
times = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=3'))
|
||||
timestext = [time.string for time in times]
|
||||
self.warning = ', '.join(timestext)
|
||||
for time in timestext:
|
||||
self.story.addToList('times',time.string)
|
||||
|
||||
if 'Completed' in label:
|
||||
if 'Yes' in value:
|
||||
self.story.setMetadata('status', 'Completed')
|
||||
else:
|
||||
self.story.setMetadata('status', 'In-Progress')
|
||||
|
||||
if 'Published' in label:
|
||||
self.story.setMetadata('datePublished', makeDate(value.strip(), "%d %b %Y"))
|
||||
|
||||
if 'Updated' in label:
|
||||
# there's a stray [ at the end.
|
||||
#value = value[0:-1]
|
||||
self.story.setMetadata('dateUpdated', makeDate(value.strip(), "%d %b %Y"))
|
||||
|
||||
try:
|
||||
# Find Series name from series URL.
|
||||
a = soup.find('a', href=re.compile(r"viewseries.php\?seriesid=\d+"))
|
||||
series_name = a.string
|
||||
series_url = 'https://'+self.host+'/'+a['href']
|
||||
|
||||
seriessoup = self.make_soup(self.get_request(series_url))
|
||||
storyas = seriessoup.findAll('a', href=re.compile(r'^viewstory.php\?sid=\d+$'))
|
||||
i=1
|
||||
for a in storyas:
|
||||
if a['href'] == ('viewstory.php?sid='+self.story.getMetadata('storyId')):
|
||||
self.setSeries(series_name, i)
|
||||
self.story.setMetadata('seriesUrl',series_url)
|
||||
break
|
||||
i+=1
|
||||
|
||||
except:
|
||||
# I find it hard to care if the series parsing fails
|
||||
pass
|
||||
|
||||
## Now we are going to cludge together the rest of the metadata
|
||||
metad = soup.findAll('p',{'class':'smaller'})
|
||||
## Categories don't have a proper label, but do use links, so...
|
||||
cats = soup.findAll('a',href=re.compile(r'browse.php\?type=categories'))
|
||||
catstext = [cat.string for cat in cats]
|
||||
for cat in catstext:
|
||||
if cat != None:
|
||||
self.story.addToList('category',cat.string)
|
||||
|
||||
## Characters don't have a proper label, but do use links, so...
|
||||
chars = soup.findAll('a',href=re.compile(r'browse.php\?type=characters'))
|
||||
charstext = [char.string for char in chars]
|
||||
for char in charstext:
|
||||
if char != None:
|
||||
self.story.addToList('characters',char.string)
|
||||
|
||||
### Rating is not enclosed in a label, only in a p tag classed 'smaller' so...
|
||||
ratng = metad[0].find('strong').get_text().replace('Rated','').strip()
|
||||
self.story.setMetadata('rating', ratng)
|
||||
|
||||
## No we try to get the summary... it's not within it's own
|
||||
## dedicated tag, so we have to split some hairs..
|
||||
## This may not work every time... but I tested it with 6 stories...
|
||||
mdata = metad[0]
|
||||
while '<hr/>' not in unicode(mdata.nextSibling):
|
||||
mdata = mdata.nextSibling
|
||||
self.setDescription(url,mdata.previousSibling.previousSibling.get_text())
|
||||
|
||||
### the rest of the metadata are not in tags at all... so we have to be really cludgy.
|
||||
## we don't need the rest of them, so we get rid of all but the last one
|
||||
metad = metad[-1]
|
||||
## we also don't need any of the links in here, so we'll get rid of them as well.
|
||||
links = metad.findAll('a')
|
||||
for link in links:
|
||||
link.extract()
|
||||
## and we've already done the labels, so let's remove them
|
||||
labels = metad.findAll('span',{'class':'label'})
|
||||
for label in labels:
|
||||
label.extract()
|
||||
## now we should only have text and <br>'s... somthing like this:
|
||||
#<p class="smaller">Categories:
|
||||
#<br/>
|
||||
#Characters: , , ,
|
||||
#<br/>
|
||||
# , <br/> <br/> <br/> None<br/>
|
||||
#Challenges: None
|
||||
#<br/>
|
||||
#Series: None
|
||||
#<br/>
|
||||
#Chapters: 1    |    Word count: 200    |    Read Count: 767
|
||||
#<br/>
|
||||
#Completed: Yes    |    Updated: 04/27/13    |    Published: 04/27/13
|
||||
#<br/>
|
||||
#</p>
|
||||
## we'll have to remove the non-breaking spaces to get this to work.
|
||||
metad = unicode(metad).replace(u"\xa0",'').replace('\n','')
|
||||
for txt in metad.split('<br/>'):
|
||||
if 'Challenges:' in txt:
|
||||
txt = txt.replace('Challenges:','').strip()
|
||||
self.story.setMetadata('challenges', txt)
|
||||
elif 'Series:' in txt:
|
||||
txt = txt.replace('Series:','').strip()
|
||||
self.story.setMetadata('challenges', txt)
|
||||
elif 'Chapters:' in txt:
|
||||
for txt2 in txt.split('|'):
|
||||
txt2 = txt2.replace('\n','').strip()
|
||||
if 'Word count:' in txt2:
|
||||
txt2 = txt2.replace('Word count:','').strip()
|
||||
self.story.setMetadata('numWords', value)
|
||||
elif 'Read Count:' in txt2:
|
||||
txt2= txt2.replace('Read Count:','').strip()
|
||||
self.story.setMetadata('readings', value)
|
||||
elif 'Completed:' in txt:
|
||||
for txt2 in txt.split('|'):
|
||||
txt2 = txt2.strip()
|
||||
if 'Completed:' in txt2:
|
||||
if 'Yes' in txt2:
|
||||
self.story.setMetadata('status', 'Completed')
|
||||
else:
|
||||
self.story.setMetadata('status', 'In-Progress')
|
||||
elif 'Updated:' in txt2:
|
||||
txt2= txt2.replace('Updated:','').strip()
|
||||
self.story.setMetadata('dateUpdated', makeDate(txt2.strip(), "%b/%d/%y"))
|
||||
elif 'Published:' in txt2:
|
||||
txt2= txt2.replace('Published:','').strip()
|
||||
self.story.setMetadata('datePublished', makeDate(txt2.strip(), "%b/%d/%y"))
|
||||
|
||||
|
||||
def getChapterText(self, url):
|
||||
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
data = self.get_request(url)
|
||||
# problems with some stories, but only in calibre. I suspect
|
||||
# issues with different SGML parsers in python. This is a
|
||||
# nasty hack, but it works.
|
||||
data = data[data.index("<body"):]
|
||||
|
||||
soup = self.make_soup(data)
|
||||
|
||||
span = soup.find('div', {'id' : 'maincontent'})
|
||||
|
||||
# Everything is encased in the maincontent section, so we have
|
||||
# to remove as much as we can systematically
|
||||
tables = span.findAll('table')
|
||||
for table in tables:
|
||||
table.extract()
|
||||
|
||||
headings = span.findAll('h3')
|
||||
for heading in headings:
|
||||
heading.extract()
|
||||
|
||||
links = span.findAll('a')
|
||||
for link in links:
|
||||
link.extract()
|
||||
|
||||
forms = span.findAll('form')
|
||||
for form in forms:
|
||||
form.extract()
|
||||
|
||||
divs = span.findAll('div')
|
||||
for div in divs:
|
||||
div.extract()
|
||||
|
||||
if None == span:
|
||||
raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url)
|
||||
|
||||
return self.utf8FromSoup(url,span)
|
||||
|
||||
def getClass():
|
||||
return LOTRgficComAdapter
|
||||
|
|
@ -116,7 +116,7 @@ class LumosSycophantHexComAdapter(BaseSiteAdapter):
|
|||
self.story.setMetadata('rating', rating)
|
||||
|
||||
# Find the chapters:
|
||||
for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+r"&chapter=\d+$")):
|
||||
for chapter in soup.find_all('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+r"&chapter=\d+$")):
|
||||
# just in case there's tags, like <i> in chapter titles.
|
||||
self.add_chapter(chapter,'http://'+self.host+'/'+chapter['href']+addurl)
|
||||
|
||||
|
|
@ -134,7 +134,7 @@ class LumosSycophantHexComAdapter(BaseSiteAdapter):
|
|||
|
||||
# <span class="label">Rated:</span> NC-17<br /> etc
|
||||
|
||||
labels = soup.findAll('span',{'class':'label'})
|
||||
labels = soup.find_all('span',{'class':'label'})
|
||||
|
||||
value = labels[0].previousSibling
|
||||
svalue = ""
|
||||
|
|
@ -154,22 +154,22 @@ class LumosSycophantHexComAdapter(BaseSiteAdapter):
|
|||
self.story.setMetadata('numWords', value.split(' -')[0])
|
||||
|
||||
if 'Categories' in label:
|
||||
cats = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=categories'))
|
||||
cats = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=categories'))
|
||||
for cat in cats:
|
||||
self.story.addToList('category',cat.string)
|
||||
|
||||
if 'Characters' in label:
|
||||
chars = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=characters'))
|
||||
chars = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=characters'))
|
||||
for char in chars:
|
||||
self.story.addToList('characters',char.string)
|
||||
|
||||
if 'Genre' in label:
|
||||
genres = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=1'))
|
||||
genres = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=class&type_id=1'))
|
||||
for genre in genres:
|
||||
self.story.addToList('genre',genre.string)
|
||||
|
||||
if 'Warnings' in label:
|
||||
warnings = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=2'))
|
||||
warnings = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=class&type_id=2'))
|
||||
for warning in warnings:
|
||||
self.story.addToList('warnings',warning.string)
|
||||
|
||||
|
|
@ -194,7 +194,7 @@ class LumosSycophantHexComAdapter(BaseSiteAdapter):
|
|||
series_url = 'http://'+self.host+'/'+a['href']
|
||||
|
||||
seriessoup = self.make_soup(self.get_request(series_url))
|
||||
storyas = seriessoup.findAll('a', href=re.compile(r'^viewstory.php\?sid=\d+$'))
|
||||
storyas = seriessoup.find_all('a', href=re.compile(r'^viewstory.php\?sid=\d+$'))
|
||||
i=1
|
||||
for a in storyas:
|
||||
if a['href'] == ('viewstory.php?sid='+self.story.getMetadata('storyId')):
|
||||
|
|
|
|||
|
|
@ -162,7 +162,7 @@ class MassEffect2InAdapter(BaseSiteAdapter):
|
|||
self.story.extendList('authorId', [authorId])
|
||||
self.story.extendList('authorUrl', [authorUrl])
|
||||
|
||||
if not self.story.getMetadata('rating'):
|
||||
if not self.story.getMetadataRaw('rating'):
|
||||
ratingTitle = chapter.getRatingTitle()
|
||||
if ratingTitle:
|
||||
self.story.setMetadata('rating', ratingTitle)
|
||||
|
|
@ -204,7 +204,6 @@ class MassEffect2InAdapter(BaseSiteAdapter):
|
|||
self.story.setMetadata('datePublished', datePublished)
|
||||
self.story.setMetadata('dateUpdated', dateUpdated)
|
||||
self.story.setMetadata('numWords', unicode(wordCount))
|
||||
self.story.setMetadata('numChapters', len(chapters))
|
||||
|
||||
# Site-specific metadata.
|
||||
self.story.setMetadata('language', self.SITE_LANGUAGE)
|
||||
|
|
@ -678,7 +677,7 @@ class Chapter(object):
|
|||
|
||||
def _excludeEditorSignature(self, root):
|
||||
"""Exclude editor signature from within `root' element."""
|
||||
for textNode in root.findAll(text=True):
|
||||
for stringNode in root.find_all(string=True):
|
||||
if re.match(self.SIGNED_PATTERN, textNode.string):
|
||||
editorLink = textNode.findNext('a')
|
||||
if editorLink:
|
||||
|
|
|
|||
|
|
@ -64,7 +64,9 @@ class MCStoriesComSiteAdapter(BaseSiteAdapter):
|
|||
return "https://mcstories.com/StoryTitle/ https://mcstories.com/StoryTitle/index.html https://mcstories.com/StoryTitle/StoryTitle1.html"
|
||||
|
||||
def getSiteURLPattern(self):
|
||||
return r"https?://(www\.)?mcstories\.com/([a-zA-Z0-9_-]+)/"
|
||||
## Note that this uses a regular expression *negative*
|
||||
## lookahead--story URLs *can't* have /Titles/ /Authors/ etc.
|
||||
return r"https?://(www\.)?mcstories\.com(?!/(Titles|Authors|Tags|ReadersPicks)/)/[a-zA-Z0-9_-]+/"
|
||||
|
||||
def extractChapterUrlsAndMetadata(self):
|
||||
"""
|
||||
|
|
@ -83,7 +85,7 @@ class MCStoriesComSiteAdapter(BaseSiteAdapter):
|
|||
data1 = self.get_request(self.url)
|
||||
soup1 = self.make_soup(data1)
|
||||
#strip comments from soup
|
||||
[comment.extract() for comment in soup1.find_all(text=lambda text:isinstance(text, Comment))]
|
||||
[comment.extract() for comment in soup1.find_all(string=lambda text:isinstance(text, Comment))]
|
||||
|
||||
if 'Page Not Found.' in data1:
|
||||
raise exceptions.StoryDoesNotExist(self.url)
|
||||
|
|
@ -161,7 +163,7 @@ class MCStoriesComSiteAdapter(BaseSiteAdapter):
|
|||
soup1 = self.make_soup(data1)
|
||||
|
||||
#strip comments from soup
|
||||
[comment.extract() for comment in soup1.find_all(text=lambda text:isinstance(text, Comment))]
|
||||
[comment.extract() for comment in soup1.find_all(string=lambda text:isinstance(text, Comment))]
|
||||
|
||||
# get story text
|
||||
story1 = soup1.find('article', id='mcstories')
|
||||
|
|
|
|||
|
|
@ -148,12 +148,12 @@ class MediaMinerOrgSiteAdapter(BaseSiteAdapter):
|
|||
|
||||
# category
|
||||
# <a href="/fanfic/src.php/a/567">Ranma 1/2</a>
|
||||
for a in soup.findAll('a',href=re.compile(r"^/fanfic/a/")):
|
||||
for a in soup.find_all('a',href=re.compile(r"^/fanfic/a/")):
|
||||
self.story.addToList('category',a.string)
|
||||
|
||||
# genre
|
||||
# <a href="/fanfic/src.php/g/567">Ranma 1/2</a>
|
||||
for a in soup.findAll('a',href=re.compile(r"^/fanfic/src.php/g/")):
|
||||
for a in soup.find_all('a',href=re.compile(r"^/fanfic/src.php/g/")):
|
||||
self.story.addToList('genre',a.string)
|
||||
|
||||
metasoup = soup.find("div",{"class":"post-meta"})
|
||||
|
|
|
|||
|
|
@ -1,54 +0,0 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2022 FanFicFare team
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
# Software: eFiction
|
||||
from __future__ import absolute_import
|
||||
from .base_efiction_adapter import BaseEfictionAdapter
|
||||
|
||||
class MerengoHuAdapter(BaseEfictionAdapter):
|
||||
|
||||
@classmethod
|
||||
def getProtocol(self):
|
||||
return "https"
|
||||
|
||||
@staticmethod
|
||||
def getSiteDomain():
|
||||
return 'merengo.hu'
|
||||
|
||||
@classmethod
|
||||
def getSiteAbbrev(self):
|
||||
return 'merengo'
|
||||
|
||||
@classmethod
|
||||
def getDateFormat(self):
|
||||
return "%Y.%m.%d"
|
||||
|
||||
@classmethod
|
||||
def getBacktoIndex(self):
|
||||
return 'Vissza az indexhez'
|
||||
|
||||
def extractChapterUrlsAndMetadata(self):
|
||||
## merengo.hu has a custom 18 consent click through
|
||||
self.get_request(self.getUrlForPhp('tizennyolc.php')+'?consent=true')
|
||||
|
||||
## Call super of extractChapterUrlsAndMetadata().
|
||||
## base_efiction leaves the soup in self.html.
|
||||
return super(MerengoHuAdapter, self).extractChapterUrlsAndMetadata()
|
||||
|
||||
def getClass():
|
||||
return MerengoHuAdapter
|
||||
|
||||
|
|
@ -154,7 +154,7 @@ class MidnightwhispersAdapter(BaseSiteAdapter): # XXX
|
|||
self.story.setMetadata('author',a.string)
|
||||
|
||||
# Find the chapters:
|
||||
for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+r"&chapter=\d+$")):
|
||||
for chapter in soup.find_all('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+r"&chapter=\d+$")):
|
||||
# just in case there's tags, like <i> in chapter titles.
|
||||
self.add_chapter(chapter,'https://'+self.host+'/'+chapter['href']+addurl)
|
||||
|
||||
|
|
@ -170,7 +170,7 @@ class MidnightwhispersAdapter(BaseSiteAdapter): # XXX
|
|||
return ""
|
||||
|
||||
# <span class="label">Rated:</span> NC-17<br /> etc
|
||||
labels = soup.findAll('span',{'class':'label'})
|
||||
labels = soup.find_all('span',{'class':'label'})
|
||||
for labelspan in labels:
|
||||
value = labelspan.nextSibling
|
||||
label = labelspan.string
|
||||
|
|
@ -191,13 +191,13 @@ class MidnightwhispersAdapter(BaseSiteAdapter): # XXX
|
|||
self.story.setMetadata('numWords', value)
|
||||
|
||||
if 'Categories' in label:
|
||||
cats = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=categories'))
|
||||
cats = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=categories'))
|
||||
catstext = [cat.string for cat in cats]
|
||||
for cat in catstext:
|
||||
self.story.addToList('category',cat.string)
|
||||
|
||||
if 'Characters' in label:
|
||||
chars = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=characters'))
|
||||
chars = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=characters'))
|
||||
charstext = [char.string for char in chars]
|
||||
for char in charstext:
|
||||
self.story.addToList('characters',char.string)
|
||||
|
|
@ -206,7 +206,7 @@ class MidnightwhispersAdapter(BaseSiteAdapter): # XXX
|
|||
## leaving it in. Check to make sure the type_id number
|
||||
## is correct, though--it's site specific.
|
||||
if 'Genre' in label:
|
||||
genres = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=2')) # XXX
|
||||
genres = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=class&type_id=2')) # XXX
|
||||
genrestext = [genre.string for genre in genres]
|
||||
self.genre = ', '.join(genrestext)
|
||||
for genre in genrestext:
|
||||
|
|
@ -216,7 +216,7 @@ class MidnightwhispersAdapter(BaseSiteAdapter): # XXX
|
|||
## leaving it in. Check to make sure the type_id number
|
||||
## is correct, though--it's site specific.
|
||||
if 'Warnings' in label:
|
||||
warnings = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=2')) # XXX
|
||||
warnings = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=class&type_id=2')) # XXX
|
||||
warningstext = [warning.string for warning in warnings]
|
||||
self.warning = ', '.join(warningstext)
|
||||
for warning in warningstext:
|
||||
|
|
@ -243,7 +243,7 @@ class MidnightwhispersAdapter(BaseSiteAdapter): # XXX
|
|||
series_url = 'https://'+self.host+'/'+a['href']
|
||||
|
||||
seriessoup = self.make_soup(self.get_request(series_url))
|
||||
storyas = seriessoup.findAll('a', href=re.compile(r'^viewstory.php\?sid=\d+$'))
|
||||
storyas = seriessoup.find_all('a', href=re.compile(r'^viewstory.php\?sid=\d+$'))
|
||||
i=1
|
||||
for a in storyas:
|
||||
# skip 'report this' and 'TOC' links
|
||||
|
|
|
|||
|
|
@ -40,7 +40,7 @@ class NovelFullSiteAdapter(BaseSiteAdapter):
|
|||
|
||||
self.story.setMetadata("title", soup.select_one("h3.title").text)
|
||||
|
||||
for author in soup.find("h3", text="Author:").fetchNextSiblings(
|
||||
for author in soup.find("h3", string="Author:").fetchNextSiblings(
|
||||
"a", href=re.compile("/author/")
|
||||
):
|
||||
self.story.addToList("authorId", author.text)
|
||||
|
|
@ -91,7 +91,7 @@ class NovelFullSiteAdapter(BaseSiteAdapter):
|
|||
content = soup.find(id="chapter-content")
|
||||
|
||||
# Remove chapter header if present
|
||||
chapter_header = content.find(["p", "h3"], text=re.compile(r"Chapter \d+:"))
|
||||
chapter_header = content.find(["p", "h3"], string=re.compile(r"Chapter \d+:"))
|
||||
|
||||
if chapter_header:
|
||||
chapter_header.decompose()
|
||||
|
|
|
|||
|
|
@ -189,13 +189,13 @@ class LightNovelGateSiteAdapter(BaseSiteAdapter):
|
|||
"Error downloading Chapter: %s! Missing required element!" % url)
|
||||
|
||||
# Some comments we will get is invalid. Remove them all.
|
||||
[comment.extract() for comment in story.find_all(text=lambda text:isinstance(text, Comment))]
|
||||
[comment.extract() for comment in story.find_all(string=lambda text:isinstance(text, Comment))]
|
||||
|
||||
# We don't need links. They have a bad css and they are not working most of times.
|
||||
[a.extract() for a in story.find_all('a')]
|
||||
|
||||
# Some tags have non-standard tag name.
|
||||
for tag in story.findAll(recursive=True):
|
||||
for tag in story.find_all(recursive=True):
|
||||
if tag.name not in HTML_TAGS:
|
||||
tag.name = 'span'
|
||||
|
||||
|
|
|
|||
|
|
@ -1,149 +0,0 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2014 Fanficdownloader team, 2018 FanFicFare team
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
####################################################################################################
|
||||
## Adapted by GComyn on April 22, 2017
|
||||
####################################################################################################
|
||||
|
||||
from __future__ import absolute_import
|
||||
import logging
|
||||
import re
|
||||
import sys # ## used for debug purposes
|
||||
|
||||
# py2 vs py3 transition
|
||||
|
||||
from .base_adapter import BaseSiteAdapter, makeDate
|
||||
|
||||
from .. import exceptions as exceptions
|
||||
from ..htmlcleanup import stripHTML
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
####################################################################################################
|
||||
def getClass():
|
||||
return NovelTroveComSiteAdapter
|
||||
|
||||
|
||||
####################################################################################################
|
||||
class NovelTroveComSiteAdapter(BaseSiteAdapter):
|
||||
''' This is a site with 1 story per page, so no multiple chapter stories
|
||||
The date is listed (on the newer stories) as a month and a year, so I'll be adding that
|
||||
to the summary, instead of trying to transform it to a date. '''
|
||||
def __init__(self, config, url):
|
||||
BaseSiteAdapter.__init__(self, config, url)
|
||||
|
||||
self.username = "NoneGiven" # if left empty, site doesn't return any message at all.
|
||||
self.password = ""
|
||||
self.is_adult = False
|
||||
|
||||
# get storyId from url
|
||||
# https://noveltrove.com/story/983/put-that-big-cock-in-me
|
||||
self.story.setMetadata('storyId', self.parsedUrl.path.split('/')[2] + '_' + self.parsedUrl.path.split('/')[3])
|
||||
|
||||
# Each adapter needs to have a unique site abbreviation.
|
||||
self.story.setMetadata('siteabbrev','ntcom')
|
||||
|
||||
# This is a 1 story/page site, so we will initialize the variable to keep the soup
|
||||
self.html = ''
|
||||
self.endindex = []
|
||||
|
||||
# The date format will vary from site to site.
|
||||
# http://docs.python.org/library/datetime.html#strftime-strptime-behavior
|
||||
self.dateformat = "%d %b. '%y"
|
||||
|
||||
####################################################################################################
|
||||
@staticmethod # must be @staticmethod, don't remove it.
|
||||
def getSiteDomain():
|
||||
# The site domain. Does have www here, if it uses it.
|
||||
return 'noveltrove.com'
|
||||
|
||||
####################################################################################################
|
||||
@classmethod
|
||||
def getSiteExampleURLs(cls):
|
||||
return "https://"+cls.getSiteDomain()+"/story/12345/astoryname"
|
||||
|
||||
####################################################################################################
|
||||
def getSiteURLPattern(self):
|
||||
return r"https://"+re.escape(self.getSiteDomain())+r"/story/([0-9])+/*(?P<id>[^/]+)"
|
||||
|
||||
####################################################################################################
|
||||
## Getting the chapter list and the meta data, plus 'is adult' checking.
|
||||
def doExtractChapterUrlsAndMetadata(self, get_cover=True):
|
||||
|
||||
url = self.url
|
||||
|
||||
data = self.get_request(url)
|
||||
|
||||
soup = self.make_soup(data)
|
||||
|
||||
# Now go hunting for all the meta data we can get
|
||||
metablock = soup.find('div', {'class': 'title-infos'})
|
||||
|
||||
## Getting Title
|
||||
title = stripHTML(metablock.find('h1'))
|
||||
self.story.setMetadata('title', title)
|
||||
|
||||
## Getting author
|
||||
author = metablock.find('a', {'class':'author'})
|
||||
self.story.setMetadata('authorId',author['href'].split('/')[1])
|
||||
self.story.setMetadata('authorUrl','https://'+self.host+author['href'])
|
||||
self.story.setMetadata('author',author.string)
|
||||
|
||||
## Get the categories
|
||||
for tag in metablock.find_all('a', {'class':'story-category'}):
|
||||
self.story.addToList('category',stripHTML(tag))
|
||||
|
||||
## There is no summary for these stories, so I'm going to take the first
|
||||
## 250 characters.
|
||||
synopsis = ''
|
||||
pcount = 0
|
||||
for para in soup.find('div', {'class':'body'}).find_all('p'):
|
||||
synopsis += para.get_text() + ' '
|
||||
pcount += 1
|
||||
if pcount > 10:
|
||||
break
|
||||
|
||||
synopsis = synopsis.strip()[:250] + '...'
|
||||
|
||||
self.setDescription(url, synopsis)
|
||||
|
||||
## Since this is a 1 story/page site, the published and updated dates are the same.
|
||||
dateposted = stripHTML(metablock.find('div', {'class':'date'}))
|
||||
self.story.setMetadata('datePublished', makeDate(dateposted, self.dateformat))
|
||||
self.story.setMetadata('dateUpdated', makeDate(dateposted, self.dateformat))
|
||||
|
||||
## This is a 1 story/page site, so we'll keep the soup for the getChapterText function
|
||||
## the chapterUrl and numChapters need to be set as well
|
||||
self.html = soup
|
||||
self.add_chapter(self.story.getMetadata('title'), url)
|
||||
self.story.setMetadata('status', 'Completed')
|
||||
|
||||
## Getting the non-standard title page entries
|
||||
copyrt = soup.find('div', {'class':'copyright'}).get_text()
|
||||
self.story.setMetadata('copyright', copyrt)
|
||||
|
||||
|
||||
# grab the text for an individual chapter.
|
||||
def getChapterText(self, url):
|
||||
logger.debug('Using data that we got from: %s' % url)
|
||||
|
||||
soup = self.html
|
||||
story = soup.find('div', {'class':'body'})
|
||||
|
||||
if story == None:
|
||||
raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url)
|
||||
|
||||
return self.utf8FromSoup(url,story)
|
||||
|
|
@ -137,14 +137,14 @@ class OcclumencySycophantHexComAdapter(BaseSiteAdapter):
|
|||
|
||||
try:
|
||||
# in case link points somewhere other than the first chapter
|
||||
a = soup.findAll('option')[1]['value']
|
||||
a = soup.find_all('option')[1]['value']
|
||||
self.story.setMetadata('storyId',a.split('=',)[1])
|
||||
url = 'http://'+self.host+'/'+a
|
||||
soup = self.make_soup(self.get_request(url))
|
||||
except:
|
||||
pass
|
||||
|
||||
for info in asoup.findAll('table', {'class' : 'border'}):
|
||||
for info in asoup.find_all('table', {'class' : 'border'}):
|
||||
a = info.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$"))
|
||||
if a != None:
|
||||
self.story.setMetadata('title',stripHTML(a))
|
||||
|
|
@ -152,7 +152,7 @@ class OcclumencySycophantHexComAdapter(BaseSiteAdapter):
|
|||
|
||||
|
||||
# Find the chapters:
|
||||
chapters=soup.findAll('a', href=re.compile(r'viewstory.php\?sid=\d+&i=1$'))
|
||||
chapters=soup.find_all('a', href=re.compile(r'viewstory.php\?sid=\d+&i=1$'))
|
||||
if len(chapters) == 0:
|
||||
self.add_chapter(self.story.getMetadata('title'),url)
|
||||
else:
|
||||
|
|
@ -171,7 +171,7 @@ class OcclumencySycophantHexComAdapter(BaseSiteAdapter):
|
|||
except:
|
||||
return ""
|
||||
|
||||
cats = info.findAll('a',href=re.compile('categories.php'))
|
||||
cats = info.find_all('a',href=re.compile('categories.php'))
|
||||
for cat in cats:
|
||||
self.story.addToList('category',cat.string)
|
||||
|
||||
|
|
@ -188,7 +188,7 @@ class OcclumencySycophantHexComAdapter(BaseSiteAdapter):
|
|||
self.setDescription(url,svalue)
|
||||
|
||||
# <span class="label">Rated:</span> NC-17<br /> etc
|
||||
labels = info.findAll('b')
|
||||
labels = info.find_all('b')
|
||||
for labelspan in labels:
|
||||
value = labelspan.nextSibling
|
||||
label = stripHTML(labelspan)
|
||||
|
|
|
|||
|
|
@ -93,26 +93,26 @@ class PhoenixSongNetAdapter(BaseSiteAdapter):
|
|||
chapters = soup.find('select')
|
||||
if chapters == None:
|
||||
self.add_chapter(self.story.getMetadata('title'),url)
|
||||
for b in soup.findAll('b'):
|
||||
for b in soup.find_all('b'):
|
||||
if b.text == "Updated":
|
||||
date = b.nextSibling.string.split(': ')[1].split(',')
|
||||
self.story.setMetadata('datePublished', makeDate(date[0]+date[1], self.dateformat))
|
||||
self.story.setMetadata('dateUpdated', makeDate(date[0]+date[1], self.dateformat))
|
||||
else:
|
||||
i = 0
|
||||
chapters = chapters.findAll('option')
|
||||
chapters = chapters.find_all('option')
|
||||
for chapter in chapters:
|
||||
self.add_chapter(chapter,'https://'+self.host+chapter['value'])
|
||||
if i == 0:
|
||||
self.story.setMetadata('storyId',chapter['value'].split('/')[3])
|
||||
head = self.make_soup(self.get_request('https://'+self.host+chapter['value'])).findAll('b')
|
||||
head = self.make_soup(self.get_request('https://'+self.host+chapter['value'])).find_all('b')
|
||||
for b in head:
|
||||
if b.text == "Updated":
|
||||
date = b.nextSibling.string.split(': ')[1].split(',')
|
||||
self.story.setMetadata('datePublished', makeDate(date[0]+date[1], self.dateformat))
|
||||
|
||||
if i == (len(chapters)-1):
|
||||
head = self.make_soup(self.get_request('https://'+self.host+chapter['value'])).findAll('b')
|
||||
head = self.make_soup(self.get_request('https://'+self.host+chapter['value'])).find_all('b')
|
||||
for b in head:
|
||||
if b.text == "Updated":
|
||||
date = b.nextSibling.string.split(': ')[1].split(',')
|
||||
|
|
@ -160,20 +160,20 @@ class PhoenixSongNetAdapter(BaseSiteAdapter):
|
|||
soup = self.make_soup(self.get_request(url))
|
||||
|
||||
chapter=self.make_soup('<div class="story"></div>')
|
||||
for p in soup.findAll(['p','blockquote']):
|
||||
for p in soup.find_all(['p','blockquote']):
|
||||
if "This is for problems with the formatting or the layout of the chapter." in stripHTML(p):
|
||||
break
|
||||
chapter.append(p)
|
||||
|
||||
for a in chapter.findAll('div'):
|
||||
for a in chapter.find_all('div'):
|
||||
a.extract()
|
||||
for a in chapter.findAll('table'):
|
||||
for a in chapter.find_all('table'):
|
||||
a.extract()
|
||||
for a in chapter.findAll('script'):
|
||||
for a in chapter.find_all('script'):
|
||||
a.extract()
|
||||
for a in chapter.findAll('form'):
|
||||
for a in chapter.find_all('form'):
|
||||
a.extract()
|
||||
for a in chapter.findAll('textarea'):
|
||||
for a in chapter.find_all('textarea'):
|
||||
a.extract()
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -1,241 +0,0 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2012 Fanficdownloader team, 2018 FanFicFare team
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
# Software: eFiction
|
||||
from __future__ import absolute_import
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import re
|
||||
from ..htmlcleanup import stripHTML
|
||||
from .. import exceptions as exceptions
|
||||
|
||||
# py2 vs py3 transition
|
||||
from ..six import text_type as unicode
|
||||
|
||||
from .base_adapter import BaseSiteAdapter, makeDate
|
||||
|
||||
def getClass():
|
||||
return PonyFictionArchiveNetAdapter
|
||||
|
||||
# Class name has to be unique. Our convention is camel case the
|
||||
# sitename with Adapter at the end. www is skipped.
|
||||
class PonyFictionArchiveNetAdapter(BaseSiteAdapter):
|
||||
|
||||
def __init__(self, config, url):
|
||||
BaseSiteAdapter.__init__(self, config, url)
|
||||
|
||||
self.username = "NoneGiven" # if left empty, site doesn't return any message at all.
|
||||
self.password = ""
|
||||
self.is_adult=False
|
||||
|
||||
# get storyId from url--url validation guarantees query is only sid=1234
|
||||
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
|
||||
|
||||
# normalized story URL.
|
||||
if "explicit" in self.parsedUrl.netloc:
|
||||
self._setURL('https://explicit.' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId'))
|
||||
self.dateformat = "%d/%b/%y"
|
||||
else:
|
||||
self._setURL('https://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId'))
|
||||
self.dateformat = "%d %b %Y"
|
||||
|
||||
# Each adapter needs to have a unique site abbreviation.
|
||||
self.story.setMetadata('siteabbrev','pffa')
|
||||
|
||||
@staticmethod # must be @staticmethod, don't remove it.
|
||||
def getSiteDomain():
|
||||
# The site domain. Does have www here, if it uses it.
|
||||
return 'ponyfictionarchive.net'
|
||||
|
||||
@classmethod
|
||||
def getAcceptDomains(cls):
|
||||
return ['www.ponyfictionarchive.net','ponyfictionarchive.net','explicit.ponyfictionarchive.net']
|
||||
|
||||
@classmethod
|
||||
def getSiteExampleURLs(cls):
|
||||
return "https://"+cls.getSiteDomain()+"/viewstory.php?sid=1234 https://explicit."+cls.getSiteDomain()+"/viewstory.php?sid=1234"
|
||||
|
||||
def getSiteURLPattern(self):
|
||||
return r"https?://(www\.|explicit\.)?"+re.escape(self.getSiteDomain()+"/viewstory.php?sid=")+r"\d+$"
|
||||
|
||||
|
||||
## Getting the chapter list and the meta data, plus 'is adult' checking.
|
||||
def extractChapterUrlsAndMetadata(self):
|
||||
|
||||
if self.is_adult or self.getConfig("is_adult"):
|
||||
# Weirdly, different sites use different warning numbers.
|
||||
# If the title search below fails, there's a good chance
|
||||
# you need a different number. print data at that point
|
||||
# and see what the 'click here to continue' url says.
|
||||
addurl = "&warning=9"
|
||||
else:
|
||||
addurl=""
|
||||
|
||||
# index=1 makes sure we see the story chapter index. Some
|
||||
# sites skip that for one-chapter stories.
|
||||
url = self.url+'&index=1'+addurl
|
||||
logger.debug("URL: "+url)
|
||||
|
||||
data = self.get_request(url)
|
||||
|
||||
m = re.search(r"'viewstory.php\?sid=\d+((?:&ageconsent=ok)?&warning=\d+)'",data)
|
||||
if m != None:
|
||||
if self.is_adult or self.getConfig("is_adult"):
|
||||
# We tried the default and still got a warning, so
|
||||
# let's pull the warning number from the 'continue'
|
||||
# link and reload data.
|
||||
addurl = m.group(1)
|
||||
# correct stupid & error in url.
|
||||
addurl = addurl.replace("&","&")
|
||||
url = self.url+'&index=1'+addurl
|
||||
logger.debug("URL 2nd try: "+url)
|
||||
|
||||
data = self.get_request(url)
|
||||
else:
|
||||
raise exceptions.AdultCheckRequired(self.url)
|
||||
|
||||
if "Access denied. This story has not been validated by the adminstrators of this site." in data:
|
||||
raise exceptions.AccessDenied(self.getSiteDomain() +" says: Access denied. This story has not been validated by the adminstrators of this site.")
|
||||
|
||||
soup = self.make_soup(data)
|
||||
# print data
|
||||
|
||||
|
||||
## Title
|
||||
a = soup.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$"))
|
||||
self.story.setMetadata('title',stripHTML(a))
|
||||
|
||||
# Find authorid and URL from... author url.
|
||||
a = soup.find('a', href=re.compile(r"viewuser.php\?uid=\d+"))
|
||||
self.story.setMetadata('authorId',a['href'].split('=')[1])
|
||||
self.story.setMetadata('authorUrl','https://'+self.host+'/'+a['href'])
|
||||
self.story.setMetadata('author',a.string)
|
||||
|
||||
# Find the chapters:
|
||||
for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+r"&chapter=\d+$")):
|
||||
# just in case there's tags, like <i> in chapter titles.
|
||||
self.add_chapter(chapter,'https://'+self.host+'/'+chapter['href']+addurl)
|
||||
|
||||
|
||||
# eFiction sites don't help us out a lot with their meta data
|
||||
# formating, so it's a little ugly.
|
||||
|
||||
# utility method
|
||||
def defaultGetattr(d,k):
|
||||
try:
|
||||
return d[k]
|
||||
except:
|
||||
return ""
|
||||
|
||||
genres = soup.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=1'))
|
||||
for genre in genres:
|
||||
self.story.addToList('genre',genre.string)
|
||||
|
||||
warnings = soup.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=3'))
|
||||
for warning in warnings:
|
||||
self.story.addToList('warnings',warning.string)
|
||||
|
||||
status = soup.find('a',href=re.compile(r'browse.php\?type=class&type_id=2'))
|
||||
if status: # apparently this site can have stories with neither In-Progress or Complete.
|
||||
self.story.setMetadata('status',status.string)
|
||||
|
||||
try:
|
||||
# explicit.site and .site have some differences now...
|
||||
section = soup.findAll('span', {'class' : 'General'})[1]
|
||||
self.story.setMetadata('rating', section.previousSibling.previousSibling.string)
|
||||
|
||||
value = section.nextSibling
|
||||
svalue = ""
|
||||
while 'label' not in defaultGetattr(value,'class'):
|
||||
svalue += unicode(value)
|
||||
value = value.nextSibling
|
||||
self.setDescription(url,svalue)
|
||||
|
||||
except:
|
||||
# find rating in data
|
||||
# <br /> • Mature • <br />
|
||||
lead = "<br /> • "
|
||||
trail = " • <br />"
|
||||
rating = data[data.index(lead)+len(lead):data.index(trail)]
|
||||
if len(rating)<20: # minor sanity check.
|
||||
self.story.setMetadata('rating',rating)
|
||||
descstr = data[data.index(trail)+len(trail):] # from desc on
|
||||
descstr = descstr[:descstr.index('<span class="label">')] # remove after desc.
|
||||
self.setDescription(url,descstr)
|
||||
|
||||
# <span class="label">Rated:</span> NC-17<br /> etc
|
||||
labels = soup.findAll('span',{'class':'label'})
|
||||
for labelspan in labels:
|
||||
value = labelspan.nextSibling
|
||||
label = labelspan.string
|
||||
|
||||
if 'Word count' in label:
|
||||
self.story.setMetadata('numWords', value)
|
||||
|
||||
if 'Characters' in label:
|
||||
chars = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=characters'))
|
||||
for char in chars:
|
||||
self.story.addToList('characters',char.string)
|
||||
|
||||
if 'Completed' in label:
|
||||
if 'Yes' in value:
|
||||
self.story.setMetadata('status', 'Completed')
|
||||
else:
|
||||
self.story.setMetadata('status', 'In-Progress')
|
||||
|
||||
if 'Published' in label:
|
||||
self.story.setMetadata('datePublished', makeDate(stripHTML(value), self.dateformat))
|
||||
|
||||
if 'Updated' in label:
|
||||
self.story.setMetadata('dateUpdated', makeDate(stripHTML(value), self.dateformat))
|
||||
|
||||
try:
|
||||
# Find Series name from series URL.
|
||||
a = soup.find('a', href=re.compile(r"viewseries.php\?seriesid=\d+"))
|
||||
series_name = a.string
|
||||
series_url = 'https://'+self.host+'/'+a['href']
|
||||
|
||||
seriessoup = self.make_soup(self.get_request(series_url))
|
||||
# can't use ^viewstory...$ in case of higher rated stories with javascript href.
|
||||
storyas = seriessoup.findAll('a', href=re.compile(r'viewstory.php\?sid=\d+'))
|
||||
i=1
|
||||
for a in storyas:
|
||||
# skip 'report this' and 'TOC' links
|
||||
if 'contact.php' not in a['href'] and 'index' not in a['href']:
|
||||
if a['href'] == ('viewstory.php?sid='+self.story.getMetadata('storyId')):
|
||||
self.setSeries(series_name, i)
|
||||
self.story.setMetadata('seriesUrl',series_url)
|
||||
break
|
||||
i+=1
|
||||
|
||||
except:
|
||||
# I find it hard to care if the series parsing fails
|
||||
pass
|
||||
|
||||
# grab the text for an individual chapter.
|
||||
def getChapterText(self, url):
|
||||
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
soup = self.make_soup(self.get_request(url))
|
||||
|
||||
div = soup.find('div', {'id' : 'story'})
|
||||
|
||||
if None == div:
|
||||
raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url)
|
||||
|
||||
return self.utf8FromSoup(url,div)
|
||||
|
|
@ -80,7 +80,7 @@ class PotionsAndSnitchesOrgSiteAdapter(BaseSiteAdapter):
|
|||
self.story.setMetadata('author',a.string)
|
||||
|
||||
# Find the chapters:
|
||||
for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+r"&chapter=\d+$")):
|
||||
for chapter in soup.find_all('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+r"&chapter=\d+$")):
|
||||
# just in case there's tags, like <i> in chapter titles.
|
||||
self.add_chapter(chapter,'http://'+self.host+'/fanfiction/'+chapter['href'])
|
||||
|
||||
|
|
@ -92,7 +92,7 @@ class PotionsAndSnitchesOrgSiteAdapter(BaseSiteAdapter):
|
|||
return ""
|
||||
|
||||
# <span class="label">Rated:</span> NC-17<br /> etc
|
||||
labels = soup.findAll('span',{'class':'label'})
|
||||
labels = soup.find_all('span',{'class':'label'})
|
||||
for labelspan in labels:
|
||||
value = labelspan.nextSibling
|
||||
label = labelspan.string
|
||||
|
|
@ -116,13 +116,13 @@ class PotionsAndSnitchesOrgSiteAdapter(BaseSiteAdapter):
|
|||
self.story.setMetadata('reads', value)
|
||||
|
||||
if 'Categories' in label:
|
||||
cats = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=categories'))
|
||||
cats = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=categories'))
|
||||
catstext = [cat.string for cat in cats]
|
||||
for cat in catstext:
|
||||
self.story.addToList('category',cat.string)
|
||||
|
||||
if 'Characters' in label:
|
||||
chars = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=characters'))
|
||||
chars = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=characters'))
|
||||
charstext = [char.string for char in chars]
|
||||
for char in charstext:
|
||||
if "Snape and Harry (required)" in char:
|
||||
|
|
@ -132,27 +132,27 @@ class PotionsAndSnitchesOrgSiteAdapter(BaseSiteAdapter):
|
|||
self.story.addToList('characters',char.string)
|
||||
|
||||
if 'Warning' in label:
|
||||
warnings = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class'))
|
||||
warnings = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=class'))
|
||||
for warning in warnings:
|
||||
self.story.addToList('warnings',stripHTML(warning))
|
||||
|
||||
if 'Genre' in label:
|
||||
genres = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class'))
|
||||
genres = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=class'))
|
||||
for genre in genres:
|
||||
self.story.addToList('genre',stripHTML(genre))
|
||||
|
||||
if 'Takes Place' in label:
|
||||
takesplaces = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class'))
|
||||
takesplaces = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=class'))
|
||||
for takesplace in takesplaces:
|
||||
self.story.addToList('takesplaces',stripHTML(takesplace))
|
||||
|
||||
if 'Snape flavour' in label:
|
||||
snapeflavours = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class'))
|
||||
snapeflavours = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=class'))
|
||||
for snapeflavour in snapeflavours:
|
||||
self.story.addToList('snapeflavours',stripHTML(snapeflavour))
|
||||
|
||||
if 'Tags' in label:
|
||||
sitetags = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class'))
|
||||
sitetags = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=class'))
|
||||
for sitetag in sitetags:
|
||||
self.story.addToList('sitetags',stripHTML(sitetag))
|
||||
|
||||
|
|
@ -176,7 +176,7 @@ class PotionsAndSnitchesOrgSiteAdapter(BaseSiteAdapter):
|
|||
series_url = 'http://'+self.host+'/fanfiction/'+a['href']
|
||||
|
||||
seriessoup = self.make_soup(self.get_request(series_url))
|
||||
storyas = seriessoup.findAll('a', href=re.compile(r'^viewstory.php\?sid=\d+$'))
|
||||
storyas = seriessoup.find_all('a', href=re.compile(r'^viewstory.php\?sid=\d+$'))
|
||||
i=1
|
||||
for a in storyas:
|
||||
if a['href'] == ('viewstory.php?sid='+self.story.getMetadata('storyId')):
|
||||
|
|
|
|||
|
|
@ -121,7 +121,7 @@ class PretenderCenterComAdapter(BaseSiteAdapter):
|
|||
self.story.setMetadata('author',a.string)
|
||||
|
||||
# Find the chapters:
|
||||
for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+r"&chapter=\d+$")):
|
||||
for chapter in soup.find_all('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+r"&chapter=\d+$")):
|
||||
# just in case there's tags, like <i> in chapter titles.
|
||||
self.add_chapter(chapter,'https://'+self.host+'/missingpieces/'+chapter['href']+addurl)
|
||||
|
||||
|
|
@ -138,7 +138,7 @@ class PretenderCenterComAdapter(BaseSiteAdapter):
|
|||
|
||||
|
||||
# <span class="label">Rated:</span> NC-17<br /> etc
|
||||
labels = soup.findAll('span',{'class':'label'})
|
||||
labels = soup.find_all('span',{'class':'label'})
|
||||
for labelspan in labels:
|
||||
value = labelspan.nextSibling
|
||||
label = labelspan.string
|
||||
|
|
@ -159,22 +159,22 @@ class PretenderCenterComAdapter(BaseSiteAdapter):
|
|||
self.story.setMetadata('numWords', value)
|
||||
|
||||
if 'Categories' in label:
|
||||
cats = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=categories'))
|
||||
cats = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=categories'))
|
||||
for cat in cats:
|
||||
self.story.addToList('category',cat.string)
|
||||
|
||||
if 'Characters' in label:
|
||||
chars = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=characters'))
|
||||
chars = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=characters'))
|
||||
for char in chars:
|
||||
self.story.addToList('characters',char.string)
|
||||
|
||||
if 'Genre' in label:
|
||||
genres = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=1')) # XXX
|
||||
genres = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=class&type_id=1')) # XXX
|
||||
for genre in genres:
|
||||
self.story.addToList('genre',genre.string)
|
||||
|
||||
if 'Warnings' in label:
|
||||
warnings = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=2')) # XXX
|
||||
warnings = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=class&type_id=2')) # XXX
|
||||
for warning in warnings:
|
||||
self.story.addToList('warnings',warning.string)
|
||||
|
||||
|
|
@ -198,7 +198,7 @@ class PretenderCenterComAdapter(BaseSiteAdapter):
|
|||
|
||||
seriessoup = self.make_soup(self.get_request(series_url))
|
||||
# can't use ^viewstory...$ in case of higher rated stories with javascript href.
|
||||
storyas = seriessoup.findAll('a', href=re.compile(r'viewstory.php\?sid=\d+'))
|
||||
storyas = seriessoup.find_all('a', href=re.compile(r'viewstory.php\?sid=\d+'))
|
||||
i=1
|
||||
for a in storyas:
|
||||
# skip 'report this' and 'TOC' links
|
||||
|
|
|
|||
|
|
@ -111,7 +111,7 @@ class PsychFicComAdapter(BaseSiteAdapter):
|
|||
self.story.setMetadata('author',a.string)
|
||||
|
||||
# Find the chapters:
|
||||
for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+r"&chapter=\d+$")):
|
||||
for chapter in soup.find_all('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+r"&chapter=\d+$")):
|
||||
# just in case there's tags, like <i> in chapter titles.
|
||||
self.add_chapter(chapter,'http://'+self.host+'/'+chapter['href']+addurl)
|
||||
|
||||
|
|
@ -126,7 +126,7 @@ class PsychFicComAdapter(BaseSiteAdapter):
|
|||
except:
|
||||
return ""
|
||||
|
||||
labels = soup.findAll('span',{'class':'label'})
|
||||
labels = soup.find_all('span',{'class':'label'})
|
||||
for labelspan in labels:
|
||||
value = labelspan.nextSibling
|
||||
label = labelspan.string
|
||||
|
|
@ -147,22 +147,22 @@ class PsychFicComAdapter(BaseSiteAdapter):
|
|||
self.story.setMetadata('numWords', value)
|
||||
|
||||
if 'Categories' in label:
|
||||
cats = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=categories'))
|
||||
cats = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=categories'))
|
||||
for cat in cats:
|
||||
self.story.addToList('category',cat.string)
|
||||
|
||||
if 'Characters' in label:
|
||||
chars = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=characters'))
|
||||
chars = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=characters'))
|
||||
for char in chars:
|
||||
self.story.addToList('characters',char.string)
|
||||
|
||||
if 'Genre' in label:
|
||||
genres = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=1'))
|
||||
genres = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=class&type_id=1'))
|
||||
for genre in genres:
|
||||
self.story.addToList('genre',genre.string)
|
||||
|
||||
if 'Warnings' in label:
|
||||
warnings = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=2'))
|
||||
warnings = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=class&type_id=2'))
|
||||
for warning in warnings:
|
||||
self.story.addToList('warnings',warning.string)
|
||||
|
||||
|
|
@ -186,7 +186,7 @@ class PsychFicComAdapter(BaseSiteAdapter):
|
|||
series_url = 'http://'+self.host+'/'+a['href']
|
||||
|
||||
seriessoup = self.make_soup(self.get_request(series_url))
|
||||
storyas = seriessoup.findAll('a', href=re.compile(r'^viewstory.php\?sid=\d+$'))
|
||||
storyas = seriessoup.find_all('a', href=re.compile(r'^viewstory.php\?sid=\d+$'))
|
||||
i=1
|
||||
for a in storyas:
|
||||
if a['href'] == ('viewstory.php?sid='+self.story.getMetadata('storyId')):
|
||||
|
|
|
|||
|
|
@ -182,7 +182,7 @@ class ReadOnlyMindComAdapter(BaseSiteAdapter):
|
|||
soup = self.make_soup(self.get_request(url))
|
||||
|
||||
#strip comments from soup
|
||||
# [comment.extract() for comment in soup1.find_all(text=lambda text:isinstance(text, Comment))]
|
||||
# [comment.extract() for comment in soup1.find_all(string=lambda text:isinstance(text, Comment))]
|
||||
|
||||
# story text
|
||||
story = soup.find('section', id='chapter-content')
|
||||
|
|
|
|||
|
|
@ -104,6 +104,42 @@ class RoyalRoadAdapter(BaseSiteAdapter):
|
|||
def getSiteURLPattern(self):
|
||||
return "https?"+re.escape("://")+r"(www\.|)royalroadl?\.com/fiction/\d+(/.*)?$"
|
||||
|
||||
|
||||
# rr won't send you future updates if you aren't 'caught up'
|
||||
# on the story. Login isn't required but logging in will
|
||||
# mark stories you've downloaded as 'read' on rr.
|
||||
def performLogin(self):
|
||||
params = {}
|
||||
|
||||
if self.password:
|
||||
params['Email'] = self.username
|
||||
params['password'] = self.password
|
||||
else:
|
||||
params['Email'] = self.getConfig("username")
|
||||
params['password'] = self.getConfig("password")
|
||||
|
||||
if not params['password']:
|
||||
return
|
||||
|
||||
loginUrl = 'https://' + self.getSiteDomain() + '/account/login'
|
||||
logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
|
||||
params['Email']))
|
||||
|
||||
## need to pull empty login page first to get request token
|
||||
soup = self.make_soup(self.get_request(loginUrl))
|
||||
## FYI, this will fail if cookiejar is shared, but
|
||||
## use_basic_cache is false.
|
||||
params['__RequestVerificationToken']=soup.find('input', {'name':'__RequestVerificationToken'})['value']
|
||||
|
||||
d = self.post_request(loginUrl, params)
|
||||
if "Sign in" in d : #Member Account
|
||||
logger.info("Failed to login to URL %s as %s (requires Email not name)" % (loginUrl,
|
||||
params['Email']))
|
||||
raise exceptions.FailedToLogin(self.url,"Failed to login as %s (RoyalRoad requires Email not name)" % params['Email'])
|
||||
return False
|
||||
else:
|
||||
return True
|
||||
|
||||
## RR chapter URL only requires the chapter ID number field to be correct, story ID and title values are ignored
|
||||
## URL format after the domain /fiction/ is long form, storyID/storyTitle/chapter/chapterID/chapterTitle
|
||||
## short form has /fiction/chapter/chapterID both forms have optional final /
|
||||
|
|
@ -119,8 +155,18 @@ class RoyalRoadAdapter(BaseSiteAdapter):
|
|||
return self.chapterUrls[chapter_url_index]['url']
|
||||
return url
|
||||
|
||||
def make_soup(self,data):
|
||||
|
||||
|
||||
def make_soup(self, data):
|
||||
soup = super(RoyalRoadAdapter, self).make_soup(data)
|
||||
# Parse and store styles in a set
|
||||
self.styles_to_ignore = set()
|
||||
style_elements = soup.find_all('style')
|
||||
for style_element in style_elements:
|
||||
class_matches = re.findall(r'\.(\S+)\s*\{[^\}]*display\s*:\s*none\s*;[^\}]*\}', style_element.string, flags=re.IGNORECASE)
|
||||
if class_matches:
|
||||
self.styles_to_ignore.update(class_matches)
|
||||
del class_matches
|
||||
self.handle_spoilers(soup)
|
||||
return soup
|
||||
|
||||
|
|
@ -150,6 +196,9 @@ class RoyalRoadAdapter(BaseSiteAdapter):
|
|||
url = self.url
|
||||
logger.debug("URL: "+url)
|
||||
|
||||
# Log in so site will mark the chapers as read
|
||||
self.performLogin()
|
||||
|
||||
data = self.get_request(url)
|
||||
|
||||
soup = self.make_soup(data)
|
||||
|
|
@ -177,7 +226,12 @@ class RoyalRoadAdapter(BaseSiteAdapter):
|
|||
|
||||
|
||||
chapters = soup.find('table',{'id':'chapters'}).find('tbody')
|
||||
tds = [tr.findAll('td') for tr in chapters.findAll('tr')]
|
||||
tds = [tr.find_all('td') for tr in chapters.find_all('tr')]
|
||||
|
||||
if not tds:
|
||||
raise exceptions.FailedToDownload(
|
||||
"Story has no chapters: %s" % url)
|
||||
|
||||
# Links in the RR ToC page are in the normalized long form, so match is simpler than in normalize_chapterurl()
|
||||
chap_pattern_long = r"https?://(?:www\.)?royalroadl?\.com/fiction/\d+/[^/]+/chapter/(\d+)/[^/]+/?$"
|
||||
for chapter,date in tds:
|
||||
|
|
@ -212,6 +266,8 @@ class RoyalRoadAdapter(BaseSiteAdapter):
|
|||
self.story.setMetadata('status', 'Stub')
|
||||
elif 'DROPPED' == label:
|
||||
self.story.setMetadata('status', 'Dropped')
|
||||
elif 'INACTIVE' == label:
|
||||
self.story.setMetadata('status', 'Inactive')
|
||||
elif 'Fan Fiction' == label:
|
||||
self.story.addToList('category', 'FanFiction')
|
||||
elif 'Original' == label:
|
||||
|
|
@ -223,7 +279,7 @@ class RoyalRoadAdapter(BaseSiteAdapter):
|
|||
self.story.setMetadata('stars',stars)
|
||||
logger.debug("stars:(%s)"%self.story.getMetadata('stars'))
|
||||
|
||||
warning = soup.find('strong',text='Warning')
|
||||
warning = soup.find('strong',string='Warning')
|
||||
if warning != None:
|
||||
for li in warning.find_next('ul').find_all('li'):
|
||||
self.story.addToList('warnings',stripHTML(li))
|
||||
|
|
@ -233,7 +289,8 @@ class RoyalRoadAdapter(BaseSiteAdapter):
|
|||
if img:
|
||||
cover_url = img['src']
|
||||
# usually URL is for thumbnail. Try expected URL for larger image, if fails fall back to the original URL
|
||||
if self.setCoverImage(url,cover_url.replace('/covers-full/', '/covers-large/'))[0] == "failedtoload":
|
||||
cover_set = self.setCoverImage(url,cover_url.replace('/covers-full/', '/covers-large/'))[0]
|
||||
if not cover_set or cover_set.startswith("failedtoload"):
|
||||
self.setCoverImage(url,cover_url)
|
||||
# some content is show as tables, this will preserve them
|
||||
|
||||
|
|
@ -258,7 +315,7 @@ class RoyalRoadAdapter(BaseSiteAdapter):
|
|||
div = soup.find('div',{'class':"chapter-inner chapter-content"})
|
||||
|
||||
# TODO: these stories often have tables in, but these wont render correctly
|
||||
# defaults.ini output CSS now outlines/pads the tables, at least.
|
||||
# defaults.ini output CSS now outlines/pads the tables, at least.
|
||||
|
||||
if None == div:
|
||||
raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url)
|
||||
|
|
@ -274,5 +331,10 @@ class RoyalRoadAdapter(BaseSiteAdapter):
|
|||
if endnote:
|
||||
# move endnote into chapter text div.
|
||||
div.append(endnote.extract())
|
||||
def has_display_none_style(tag):
|
||||
tag_class = tag.get('class', '')
|
||||
return any(style in tag_class for style in self.styles_to_ignore)
|
||||
|
||||
for element in div.find_all(has_display_none_style):
|
||||
element.extract()
|
||||
return self.utf8FromSoup(url,div)
|
||||
|
|
|
|||
|
|
@ -193,7 +193,7 @@ class SamAndJackNetAdapter(BaseSiteAdapter): # XXX
|
|||
|
||||
# Find authorid and URL from... author url.
|
||||
# (fetch multiple authors)
|
||||
alist = soup.findAll('a', href=re.compile(r"viewuser.php\?uid=\d+"))
|
||||
alist = soup.find_all('a', href=re.compile(r"viewuser.php\?uid=\d+"))
|
||||
for a in alist:
|
||||
self.story.addToList('authorId',a['href'].split('=')[1])
|
||||
self.story.addToList('authorUrl','http://'+self.host+'/fanfics/'+a['href'])
|
||||
|
|
@ -201,11 +201,11 @@ class SamAndJackNetAdapter(BaseSiteAdapter): # XXX
|
|||
|
||||
# Reviews
|
||||
reviewdata = soup.find('div', {'id' : 'sort'})
|
||||
a = reviewdata.findAll('a', href=re.compile(r'reviews.php\?type=ST&(amp;)?item='+self.story.getMetadata('storyId')+"$"))[1] # second one.
|
||||
a = reviewdata.find_all('a', href=re.compile(r'reviews.php\?type=ST&(amp;)?item='+self.story.getMetadata('storyId')+"$"))[1] # second one.
|
||||
self.story.setMetadata('reviews',stripHTML(a))
|
||||
|
||||
# Find the chapters:
|
||||
for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+r"&chapter=\d+$")):
|
||||
for chapter in soup.find_all('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+r"&chapter=\d+$")):
|
||||
# just in case there's tags, like <i> in chapter titles.
|
||||
self.add_chapter(chapter,'http://'+self.host+'/fanfics/'+chapter['href']+addurl)
|
||||
|
||||
|
|
@ -222,7 +222,7 @@ class SamAndJackNetAdapter(BaseSiteAdapter): # XXX
|
|||
|
||||
|
||||
# <span class="label">Rated:</span> NC-17<br /> etc
|
||||
labels = soup.findAll('span',{'class':'label'})
|
||||
labels = soup.find_all('span',{'class':'label'})
|
||||
for labelspan in labels:
|
||||
value = labelspan.nextSibling
|
||||
label = labelspan.string
|
||||
|
|
@ -237,13 +237,13 @@ class SamAndJackNetAdapter(BaseSiteAdapter): # XXX
|
|||
self.story.setMetadata('numWords', value)
|
||||
|
||||
if 'Categories' in label:
|
||||
cats = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=categories'))
|
||||
cats = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=categories'))
|
||||
catstext = [cat.string for cat in cats]
|
||||
for cat in catstext:
|
||||
self.story.addToList('category',cat.string)
|
||||
|
||||
if 'Characters' in label:
|
||||
chars = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=characters'))
|
||||
chars = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=characters'))
|
||||
charstext = [char.string for char in chars]
|
||||
for char in charstext:
|
||||
self.story.addToList('characters',char.string)
|
||||
|
|
@ -252,7 +252,7 @@ class SamAndJackNetAdapter(BaseSiteAdapter): # XXX
|
|||
## leaving it in. Check to make sure the type_id number
|
||||
## is correct, though--it's site specific.
|
||||
if 'Genre' in label:
|
||||
genres = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=1')) # XXX
|
||||
genres = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=class&type_id=1')) # XXX
|
||||
genrestext = [genre.string for genre in genres]
|
||||
self.genre = ', '.join(genrestext)
|
||||
for genre in genrestext:
|
||||
|
|
@ -262,7 +262,7 @@ class SamAndJackNetAdapter(BaseSiteAdapter): # XXX
|
|||
## leaving it in. Check to make sure the type_id number
|
||||
## is correct, though--it's site specific.
|
||||
if 'Warnings' in label:
|
||||
warnings = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=2')) # XXX
|
||||
warnings = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=class&type_id=2')) # XXX
|
||||
warningstext = [warning.string for warning in warnings]
|
||||
self.warning = ', '.join(warningstext)
|
||||
for warning in warningstext:
|
||||
|
|
@ -291,7 +291,7 @@ class SamAndJackNetAdapter(BaseSiteAdapter): # XXX
|
|||
series_url = 'http://'+self.host+'/fanfics/'+a['href']
|
||||
|
||||
seriessoup = self.make_soup(self.get_request(series_url))
|
||||
storyas = seriessoup.findAll('a', href=re.compile(r'^viewstory.php\?sid=\d+$'))
|
||||
storyas = seriessoup.find_all('a', href=re.compile(r'^viewstory.php\?sid=\d+$'))
|
||||
i=1
|
||||
for a in storyas:
|
||||
if a['href'] == ('viewstory.php?sid='+self.story.getMetadata('storyId')):
|
||||
|
|
|
|||
|
|
@ -1,242 +0,0 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2012 Fanficdownloader team, 2020 FanFicFare team
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
# Software: eFiction
|
||||
from __future__ import absolute_import
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import re
|
||||
from ..htmlcleanup import stripHTML
|
||||
from .. import exceptions as exceptions
|
||||
|
||||
# py2 vs py3 transition
|
||||
from ..six import text_type as unicode
|
||||
|
||||
from .base_adapter import BaseSiteAdapter, makeDate
|
||||
|
||||
def getClass():
|
||||
return ScarvesAndCoffeeNetAdapter
|
||||
|
||||
# Class name has to be unique. Our convention is camel case the
|
||||
# sitename with Adapter at the end. www is skipped.
|
||||
class ScarvesAndCoffeeNetAdapter(BaseSiteAdapter):
|
||||
|
||||
def __init__(self, config, url):
|
||||
BaseSiteAdapter.__init__(self, config, url)
|
||||
|
||||
self.username = "NoneGiven" # if left empty, site doesn't return any message at all.
|
||||
self.password = ""
|
||||
self.is_adult=False
|
||||
|
||||
# get storyId from url--url validation guarantees query is only sid=1234
|
||||
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
|
||||
|
||||
|
||||
# normalized story URL.
|
||||
self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId'))
|
||||
|
||||
# Each adapter needs to have a unique site abbreviation.
|
||||
self.story.setMetadata('siteabbrev','scacf')
|
||||
|
||||
# The date format will vary from site to site.
|
||||
# http://docs.python.org/library/datetime.html#strftime-strptime-behavior
|
||||
self.dateformat = "%m/%d/%Y"
|
||||
|
||||
@staticmethod # must be @staticmethod, don't remove it.
|
||||
def getSiteDomain():
|
||||
# The site domain. Does have www here, if it uses it.
|
||||
return 'www.scarvesandcoffee.net'
|
||||
|
||||
@classmethod
|
||||
def getSiteExampleURLs(cls):
|
||||
return "http://"+cls.getSiteDomain()+"/viewstory.php?sid=1234"
|
||||
|
||||
def getSiteURLPattern(self):
|
||||
return re.escape("http://"+self.getSiteDomain()+"/viewstory.php?sid=")+r"\d+$"
|
||||
|
||||
## Login seems to be reasonably standard across eFiction sites.
|
||||
def needToLoginCheck(self, data):
|
||||
if 'Registered Users Only' in data \
|
||||
or 'There is no such account on our website' in data \
|
||||
or "That password doesn't match the one in our database" in data:
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
|
||||
## Getting the chapter list and the meta data, plus 'is adult' checking.
|
||||
def extractChapterUrlsAndMetadata(self):
|
||||
|
||||
if self.is_adult or self.getConfig("is_adult"):
|
||||
# Weirdly, different sites use different warning numbers.
|
||||
# If the title search below fails, there's a good chance
|
||||
# you need a different number. print data at that point
|
||||
# and see what the 'click here to continue' url says.
|
||||
addurl = "&ageconsent=ok&warning=20"
|
||||
else:
|
||||
addurl=""
|
||||
|
||||
# index=1 makes sure we see the story chapter index. Some
|
||||
# sites skip that for one-chapter stories.
|
||||
url = self.url+'&index=1'+addurl
|
||||
logger.debug("URL: "+url)
|
||||
|
||||
data = self.get_request(url)
|
||||
|
||||
m = re.search(r"'viewstory.php\?sid=\d+((?:&ageconsent=ok)?&warning=\d+)'",data)
|
||||
if m != None:
|
||||
if self.is_adult or self.getConfig("is_adult"):
|
||||
# We tried the default and still got a warning, so
|
||||
# let's pull the warning number from the 'continue'
|
||||
# link and reload data.
|
||||
addurl = m.group(1)
|
||||
# correct stupid & error in url.
|
||||
addurl = addurl.replace("&","&")
|
||||
url = self.url+'&index=1'+addurl
|
||||
logger.debug("URL 2nd try: "+url)
|
||||
|
||||
data = self.get_request(url)
|
||||
else:
|
||||
raise exceptions.AdultCheckRequired(self.url)
|
||||
|
||||
if "Access denied. This story has not been validated by the adminstrators of this site." in data:
|
||||
raise exceptions.AccessDenied(self.getSiteDomain() +" says: Access denied. This story has not been validated by the adminstrators of this site.")
|
||||
|
||||
soup = self.make_soup(data)
|
||||
# print data
|
||||
|
||||
|
||||
## Title
|
||||
a = soup.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$"))
|
||||
self.story.setMetadata('title',stripHTML(a))
|
||||
|
||||
# Find authorid and URL from... author url.
|
||||
a = soup.find('div',{"id":"pagetitle"}).find('a', href=re.compile(r"viewuser.php\?uid=\d+"))
|
||||
self.story.setMetadata('authorId',a['href'].split('=')[1])
|
||||
self.story.setMetadata('authorUrl','http://'+self.host+'/'+a['href'])
|
||||
self.story.setMetadata('author',a.string)
|
||||
|
||||
# Find the chapters:
|
||||
for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+r"&chapter=\d+$")):
|
||||
# just in case there's tags, like <i> in chapter titles.
|
||||
self.add_chapter(chapter,'http://'+self.host+'/'+chapter['href']+addurl)
|
||||
|
||||
|
||||
# eFiction sites don't help us out a lot with their meta data
|
||||
# formating, so it's a little ugly.
|
||||
|
||||
# utility method
|
||||
def defaultGetattr(d,k):
|
||||
try:
|
||||
return d[k]
|
||||
except:
|
||||
return ""
|
||||
|
||||
# <span class="label">Rated:</span> NC-17<br /> etc
|
||||
labels = soup.findAll('span',{'class':'label'})
|
||||
for labelspan in labels:
|
||||
value = labelspan.nextSibling
|
||||
label = labelspan.string
|
||||
|
||||
if 'Summary' in label:
|
||||
## Everything until the next span class='label'
|
||||
svalue = ""
|
||||
while 'label' not in defaultGetattr(value,'class'):
|
||||
svalue += unicode(value)
|
||||
value = value.nextSibling
|
||||
self.setDescription(url,svalue)
|
||||
#self.story.setMetadata('description',stripHTML(svalue))
|
||||
|
||||
if 'Rated' in label:
|
||||
self.story.setMetadata('rating', value)
|
||||
|
||||
if 'Word count' in label:
|
||||
self.story.setMetadata('numWords', value)
|
||||
|
||||
if 'Categories' in label:
|
||||
cats = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=categories'))
|
||||
for cat in cats:
|
||||
self.story.addToList('genre',cat.string)
|
||||
|
||||
if 'Characters' in label:
|
||||
chars = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=characters'))
|
||||
for char in chars:
|
||||
self.story.addToList('characters',char.string)
|
||||
|
||||
if 'Completed' in label:
|
||||
if 'Yes' in value:
|
||||
self.story.setMetadata('status', 'Completed')
|
||||
else:
|
||||
self.story.setMetadata('status', 'In-Progress')
|
||||
|
||||
if 'Published' in label:
|
||||
self.story.setMetadata('datePublished', makeDate(stripHTML(value), self.dateformat))
|
||||
|
||||
if 'Updated' in label:
|
||||
self.story.setMetadata('dateUpdated', makeDate(stripHTML(value), self.dateformat))
|
||||
|
||||
try:
|
||||
# Find Series name from series URL.
|
||||
a = soup.find('a', href=re.compile(r"viewseries.php\?seriesid=\d+"))
|
||||
series_name = a.string
|
||||
series_url = 'http://'+self.host+'/'+a['href']
|
||||
|
||||
seriessoup = self.make_soup(self.get_request(series_url))
|
||||
# can't use ^viewstory...$ in case of higher rated stories with javascript href.
|
||||
storyas = seriessoup.findAll('a', href=re.compile(r'viewstory.php\?sid=\d+'))
|
||||
i=1
|
||||
for a in storyas:
|
||||
# skip 'report this' and 'TOC' links
|
||||
if 'contact.php' not in a['href'] and 'index' not in a['href']:
|
||||
if a['href'] == ('viewstory.php?sid='+self.story.getMetadata('storyId')):
|
||||
self.setSeries(series_name, i)
|
||||
self.story.setMetadata('seriesUrl',series_url)
|
||||
break
|
||||
i+=1
|
||||
|
||||
except:
|
||||
# I find it hard to care if the series parsing fails
|
||||
pass
|
||||
|
||||
# grab the text for an individual chapter.
|
||||
def getChapterText(self, url):
|
||||
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
soup = self.make_soup(self.get_request(url))
|
||||
|
||||
div = soup.find('div', {'id' : 'story'})
|
||||
|
||||
if None == div:
|
||||
raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url)
|
||||
|
||||
return self.utf8FromSoup(url,div)
|
||||
|
||||
def get_urls_from_page(self,url,normalize):
|
||||
from ..geturls import get_urls_from_html
|
||||
# this way it uses User-Agent or other special settings.
|
||||
data = self.get_request(url,usecache=False)
|
||||
## I can't find when or why exactly this was added, but it was
|
||||
## in the old code, so here it remains.
|
||||
soup = self.make_soup(data)
|
||||
series = self.get_series_from_page(url,data)
|
||||
if series:
|
||||
return series
|
||||
else:
|
||||
return {'urllist':get_urls_from_html(soup.find('div',{'id':'mainpage'}),
|
||||
url,
|
||||
configuration=self.configuration,
|
||||
normalize=normalize)}
|
||||
|
|
@ -57,16 +57,9 @@ class ScribbleHubComAdapter(BaseSiteAdapter): # XXX
|
|||
self.username = "NoneGiven" # if left empty, site doesn't return any message at all.
|
||||
self.password = ""
|
||||
self.is_adult=False
|
||||
self.urltitle = "some-title"
|
||||
|
||||
m = re.match(self.getSiteURLPattern(),url)
|
||||
# logger.debug("id:%s"%m.group('id'))
|
||||
# logger.debug("title:%s"%m.group('title'))
|
||||
|
||||
# get storyId from url
|
||||
self.story.setMetadata('storyId', m.group('id'))
|
||||
|
||||
# normalized story URL.
|
||||
self._setURL('https://' + self.getSiteDomain() + '/series/' + self.story.getMetadata('storyId') + '/' + m.group('title') + '/')
|
||||
self.set_story_idurl(url)
|
||||
|
||||
# Each adapter needs to have a unique site abbreviation.
|
||||
self.story.setMetadata('siteabbrev','scrhub') # XXX
|
||||
|
|
@ -75,6 +68,19 @@ class ScribbleHubComAdapter(BaseSiteAdapter): # XXX
|
|||
# http://docs.python.org/library/datetime.html#strftime-strptime-behavior
|
||||
self.dateformat = "%b %d, %Y" # XXX
|
||||
|
||||
def set_story_idurl(self,url):
|
||||
m = re.match(self.getSiteURLPattern(),url)
|
||||
# logger.debug("id:%s"%m.group('id'))
|
||||
# logger.debug("urltitle:%s"%m.group('urltitle'))
|
||||
|
||||
# get storyId from url
|
||||
self.story.setMetadata('storyId', m.group('id'))
|
||||
if m.group('urltitle'):
|
||||
self.urltitle = m.group('urltitle')
|
||||
# logger.debug("urltitle:%s"%self.urltitle)
|
||||
|
||||
# normalized story URL.
|
||||
self._setURL('https://' + self.getSiteDomain() + '/series/' + self.story.getMetadata('storyId') + '/' + self.urltitle + '/')
|
||||
|
||||
@staticmethod # must be @staticmethod, don't remove it.
|
||||
def getSiteDomain():
|
||||
|
|
@ -88,8 +94,44 @@ class ScribbleHubComAdapter(BaseSiteAdapter): # XXX
|
|||
return "https://"+cls.getSiteDomain()+"/series/1234/storyname/"
|
||||
|
||||
def getSiteURLPattern(self):
|
||||
return re.escape("https://"+self.getSiteDomain())+r"/(series|read)/(?P<id>\d+)[/-](?P<title>[^/]+)"
|
||||
|
||||
return self._get_site_url_pattern()
|
||||
|
||||
## here so getSiteURLPattern and get_section_url(class method) can
|
||||
## both use it. Note adapter_fictionpresscom has one too.
|
||||
@classmethod
|
||||
def _get_site_url_pattern(cls):
|
||||
return re.escape("https://"+cls.getSiteDomain())+r"/(series|read)/(?P<id>\d+)([/-](?P<urltitle>[^/]+))?"
|
||||
|
||||
@classmethod
|
||||
def get_section_url(cls,url):
|
||||
## minimal URL used for section names in INI and reject list
|
||||
## for comparison
|
||||
# logger.debug("pre section--url:%s"%url)
|
||||
m = re.match(cls._get_site_url_pattern(),url)
|
||||
if m:
|
||||
url = "https://"+cls.getSiteDomain()\
|
||||
+"/series/"+m.group('id')+"/a-title/"
|
||||
# logger.debug("post-section url:%s"%url)
|
||||
return url
|
||||
|
||||
@classmethod
|
||||
def get_url_search(cls,url):
|
||||
regexp = super(getClass(), cls).get_url_search(url)
|
||||
regexp = re.sub(r"^(?P<keep>.*com/series/\d+/)(?P<urltitle>[^$]*)?",
|
||||
r"\g<keep>(.*)",regexp)
|
||||
logger.debug(regexp)
|
||||
return regexp
|
||||
|
||||
## normalized chapter URLs DO contain the story title now, but
|
||||
## normalized to current urltitle in case of title changes.
|
||||
def normalize_chapterurl(self,url):
|
||||
# https://www.scribblehub.com/read/862913-hp-the-arcane-thief-litrpg/chapter/1175961/
|
||||
# logger.debug("pre normal chapter--url:%s"%url)
|
||||
url = re.sub(r"https?://(?P<keep>www\.scribblehub\.com/read/\d+-).*(?P<chapter>/chapter/\d+/)",
|
||||
(r"https://\g<keep>"+self.urltitle+r"\g<chapter>"),url)
|
||||
# logger.debug("post normal chapter-url:%s"%url)
|
||||
return url
|
||||
|
||||
def post_request(self, url,
|
||||
parameters=None,
|
||||
usecache=True):
|
||||
|
|
@ -97,8 +139,8 @@ class ScribbleHubComAdapter(BaseSiteAdapter): # XXX
|
|||
return super(getClass(), self).post_request(url, parameters, usecache)
|
||||
except exceptions.HTTPErrorFFF as e:
|
||||
## this is a fix for the scribblehub ajax request sometimes returning
|
||||
# a 400 but only with flaresolverr. Have not been able to reproduce
|
||||
# in curl/firefox. See: https://github.com/JimmXinu/FanFicFare/pull/900
|
||||
# a 400 but only with flaresolverr. Have not been able to reproduce
|
||||
# in curl/firefox. See: https://github.com/JimmXinu/FanFicFare/pull/900
|
||||
logger.debug("HTTPErrorFFF/Scribblehub: " + str(e.status_code))
|
||||
if e.status_code == 400 and self.getConfig('use_flaresolverr_proxy'):
|
||||
return self.decode_data(e.data)
|
||||
|
|
@ -136,11 +178,15 @@ class ScribbleHubComAdapter(BaseSiteAdapter): # XXX
|
|||
|
||||
soup = self.make_soup(data)
|
||||
|
||||
|
||||
## Title
|
||||
pagetitle = soup.find('div',{'class':'fic_title'})
|
||||
self.story.setMetadata('title',stripHTML(pagetitle))
|
||||
|
||||
## <link rel="canonical" href="https://www.scribblehub.com/series/862913/hp-the-arcane-thief-litrpg/" />
|
||||
canonicalurl = soup.select_one('link[rel=canonical]')['href']
|
||||
self.set_story_idurl(canonicalurl)
|
||||
url = canonicalurl
|
||||
|
||||
# Find authorid and URL from main story page
|
||||
self.story.setMetadata('authorId',stripHTML(soup.find('span',{'class':'auth_name_fic'})))
|
||||
self.story.setMetadata('authorUrl',soup.find('div',{'class':'author'}).find('div',{'property':'author'}).find('span',{'property':'name'}).find('a').get('href'))
|
||||
|
|
@ -151,33 +197,20 @@ class ScribbleHubComAdapter(BaseSiteAdapter): # XXX
|
|||
|
||||
# Get the contents list from scribblehub, iterate through and add to chapters
|
||||
# Can be fairly certain this will not 404 - we know the story id is valid
|
||||
contents_payload = {"action": "wi_gettocchp",
|
||||
"strSID": self.story.getMetadata('storyId'),
|
||||
"strmypostid": 0,
|
||||
"strFic": "yes"}
|
||||
|
||||
# 14/12/22 - Looks like it should follow this format now (below), but still returns a 400
|
||||
# but not a 403. tested in browser getting rid of all other cookies to try and get a 400 and nopes.
|
||||
|
||||
# contents_payload = {"action": "wi_getreleases_pagination",
|
||||
# "pagenum": 1,
|
||||
# "mypostid": 421879}
|
||||
# contents_payload = "action=wi_getreleases_pagination&pagenum=1&mypostid=421879"
|
||||
contents_payload = {"action": "wi_getreleases_pagination",
|
||||
"pagenum": -1,
|
||||
"mypostid": self.story.getMetadata('storyId')}
|
||||
|
||||
contents_data = self.post_request("https://www.scribblehub.com/wp-admin/admin-ajax.php", contents_payload)
|
||||
|
||||
# logger.debug(contents_data)
|
||||
contents_soup = self.make_soup(contents_data)
|
||||
|
||||
for i in range(1, int(contents_soup.find('ol',{'id':'ol_toc'}).get('count')) + 1):
|
||||
chapter_url = contents_soup.find('li',{'cnt':str(i)}).find('a').get('href')
|
||||
chapter_name = contents_soup.find('li',{'cnt':str(i)}).find('a').get('title')
|
||||
# logger.debug("Found Chapter " + str(i) + ", name: " + chapter_name + ", url: " + chapter_url)
|
||||
for toca in contents_soup.select('a.toc_a'):
|
||||
chapter_url = toca['href']
|
||||
chapter_name = stripHTML(toca)
|
||||
# logger.debug("Found Chapter: " + chapter_name + ", url: " + chapter_url)
|
||||
self.add_chapter(chapter_name, chapter_url)
|
||||
|
||||
|
||||
# eFiction sites don't help us out a lot with their meta data
|
||||
# formating, so it's a little ugly.
|
||||
|
||||
# utility method
|
||||
def defaultGetattr(d,k):
|
||||
try:
|
||||
|
|
@ -194,13 +227,13 @@ class ScribbleHubComAdapter(BaseSiteAdapter): # XXX
|
|||
|
||||
# Categories
|
||||
if soup.find('span',{'class': 'wi_fic_showtags_inner'}):
|
||||
categories = soup.find('span',{'class': 'wi_fic_showtags_inner'}).findAll('a')
|
||||
categories = soup.find('span',{'class': 'wi_fic_showtags_inner'}).find_all('a')
|
||||
for category in categories:
|
||||
self.story.addToList('category', stripHTML(category))
|
||||
|
||||
# Genres
|
||||
if soup.find('a',{'class': 'fic_genre'}):
|
||||
genres = soup.findAll('a',{'class': 'fic_genre'})
|
||||
genres = soup.find_all('a',{'class': 'fic_genre'})
|
||||
for genre in genres:
|
||||
self.story.addToList('genre', stripHTML(genre))
|
||||
|
||||
|
|
@ -212,7 +245,7 @@ class ScribbleHubComAdapter(BaseSiteAdapter): # XXX
|
|||
|
||||
# Content Warnings
|
||||
if soup.find('ul',{'class': 'ul_rate_expand'}):
|
||||
warnings = soup.find('ul',{'class': 'ul_rate_expand'}).findAll('a')
|
||||
warnings = soup.find('ul',{'class': 'ul_rate_expand'}).find_all('a')
|
||||
for warn in warnings:
|
||||
self.story.addToList('warnings', stripHTML(warn))
|
||||
|
||||
|
|
@ -266,7 +299,7 @@ class ScribbleHubComAdapter(BaseSiteAdapter): # XXX
|
|||
self.story.setMetadata(metadata, stripHTML(row.find('td')))
|
||||
|
||||
if soup.find('table',{'class': 'table_pro_overview'}):
|
||||
stats_table = soup.find('table',{'class': 'table_pro_overview'}).findAll('tr')
|
||||
stats_table = soup.find('table',{'class': 'table_pro_overview'}).find_all('tr')
|
||||
for row in stats_table:
|
||||
find_stats_data("Total Views (All)", row, "views")
|
||||
find_stats_data("Word Count", row, "numWords")
|
||||
|
|
|
|||
|
|
@ -171,7 +171,7 @@ class SheppardWeirComAdapter(BaseSiteAdapter): # XXX
|
|||
|
||||
# Find authorid and URL from... author url.
|
||||
# (fetch multiple authors)
|
||||
alist = soup.findAll('a', href=re.compile(r"viewuser.php\?uid=\d+"))
|
||||
alist = soup.find_all('a', href=re.compile(r"viewuser.php\?uid=\d+"))
|
||||
for a in alist:
|
||||
self.story.addToList('authorId',a['href'].split('=')[1])
|
||||
self.story.addToList('authorUrl','https://'+self.host+'/fanfics/'+a['href'])
|
||||
|
|
@ -180,12 +180,12 @@ class SheppardWeirComAdapter(BaseSiteAdapter): # XXX
|
|||
|
||||
# Reviews
|
||||
reviewdata = soup.find('div', {'id' : 'sort'})
|
||||
a = reviewdata.findAll('a', href=re.compile(r'reviews.php\?type=ST&(amp;)?item='+self.story.getMetadata('storyId')+"$"))[1] # second one.
|
||||
a = reviewdata.find_all('a', href=re.compile(r'reviews.php\?type=ST&(amp;)?item='+self.story.getMetadata('storyId')+"$"))[1] # second one.
|
||||
self.story.setMetadata('reviews',stripHTML(a))
|
||||
|
||||
|
||||
# Find the chapters:
|
||||
for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+r"&chapter=\d+$")):
|
||||
for chapter in soup.find_all('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+r"&chapter=\d+$")):
|
||||
# just in case there's tags, like <i> in chapter titles.
|
||||
self.add_chapter(chapter,'https://'+self.host+'/fanfics/'+chapter['href']+addurl)
|
||||
|
||||
|
|
@ -208,7 +208,7 @@ class SheppardWeirComAdapter(BaseSiteAdapter): # XXX
|
|||
self.setDescription(url,self.make_soup(summarydata))
|
||||
|
||||
# <span class="label">Rated:</span> NC-17<br /> etc
|
||||
labels = soup.findAll('span',{'class':'label'})
|
||||
labels = soup.find_all('span',{'class':'label'})
|
||||
for labelspan in labels:
|
||||
value = labelspan.nextSibling
|
||||
label = labelspan.string
|
||||
|
|
@ -220,13 +220,13 @@ class SheppardWeirComAdapter(BaseSiteAdapter): # XXX
|
|||
self.story.setMetadata('numWords', value)
|
||||
|
||||
if 'Categories' in label:
|
||||
cats = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=categories'))
|
||||
cats = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=categories'))
|
||||
catstext = [cat.string for cat in cats]
|
||||
for cat in catstext:
|
||||
self.story.addToList('category',cat.string)
|
||||
|
||||
if 'Characters' in label:
|
||||
chars = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=characters'))
|
||||
chars = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=characters'))
|
||||
charstext = [char.string for char in chars]
|
||||
for char in charstext:
|
||||
self.story.addToList('characters',char.string)
|
||||
|
|
@ -235,7 +235,7 @@ class SheppardWeirComAdapter(BaseSiteAdapter): # XXX
|
|||
## leaving it in. Check to make sure the type_id number
|
||||
## is correct, though--it's site specific.
|
||||
if 'Genre' in label:
|
||||
genres = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=1')) # XXX
|
||||
genres = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=class&type_id=1')) # XXX
|
||||
genrestext = [genre.string for genre in genres]
|
||||
self.genre = ', '.join(genrestext)
|
||||
for genre in genrestext:
|
||||
|
|
@ -245,7 +245,7 @@ class SheppardWeirComAdapter(BaseSiteAdapter): # XXX
|
|||
## leaving it in. Check to make sure the type_id number
|
||||
## is correct, though--it's site specific.
|
||||
if 'Warnings' in label:
|
||||
warnings = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=2')) # XXX
|
||||
warnings = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=class&type_id=2')) # XXX
|
||||
warningstext = [warning.string for warning in warnings]
|
||||
self.warning = ', '.join(warningstext)
|
||||
for warning in warningstext:
|
||||
|
|
@ -273,7 +273,7 @@ class SheppardWeirComAdapter(BaseSiteAdapter): # XXX
|
|||
series_url = 'https://'+self.host+'/fanfics/'+a['href']
|
||||
|
||||
seriessoup = self.make_soup(self.get_request(series_url))
|
||||
storyas = seriessoup.findAll('a', href=re.compile(r'^viewstory.php\?sid=\d+$'))
|
||||
storyas = seriessoup.find_all('a', href=re.compile(r'^viewstory.php\?sid=\d+$'))
|
||||
i=1
|
||||
for a in storyas:
|
||||
if a['href'] == ('viewstory.php?sid='+self.story.getMetadata('storyId')):
|
||||
|
|
|
|||
|
|
@ -1,272 +0,0 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2020 FanFicFare team
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
from __future__ import absolute_import
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import re
|
||||
from bs4.element import Tag
|
||||
from .. import exceptions as exceptions
|
||||
|
||||
# py2 vs py3 transition
|
||||
from ..six import text_type as unicode
|
||||
|
||||
from .base_adapter import BaseSiteAdapter, makeDate
|
||||
|
||||
def getClass():
|
||||
return SilmarillionWritersGuildOrgAdapter
|
||||
|
||||
# Class name has to be unique. Our convention is camel case the
|
||||
# sitename with Adapter at the end. www is skipped.
|
||||
class SilmarillionWritersGuildOrgAdapter(BaseSiteAdapter):
|
||||
|
||||
def __init__(self, config, url):
|
||||
BaseSiteAdapter.__init__(self, config, url)
|
||||
|
||||
self.username = "NoneGiven" # if left empty, site doesn't return any message at all.
|
||||
self.password = ""
|
||||
self.is_adult=False
|
||||
|
||||
# get storyId from url--url validation guarantees query is only sid=1234
|
||||
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
|
||||
|
||||
# normalized story URL.
|
||||
self._setURL('http://' + self.getSiteDomain() + '/archive/home/viewstory.php?sid='+self.story.getMetadata('storyId'))
|
||||
|
||||
# Each adapter needs to have a unique site abbreviation.
|
||||
self.story.setMetadata('siteabbrev','swg')
|
||||
|
||||
# The date format will vary from site to site.
|
||||
# http://docs.python.org/library/datetime.html#strftime-strptime-behavior
|
||||
self.dateformat = "%B %d, %Y"
|
||||
|
||||
@staticmethod # must be @staticmethod, don't remove it.
|
||||
def getSiteDomain():
|
||||
# The site domain. Does have www here, if it uses it.
|
||||
return 'www.silmarillionwritersguild.org'
|
||||
|
||||
@classmethod
|
||||
def getSiteExampleURLs(cls):
|
||||
return "https://"+cls.getSiteDomain()+"/archive/home/viewstory.php?sid=123"
|
||||
|
||||
def getSiteURLPattern(self):
|
||||
return r"https?://"+re.escape(self.getSiteDomain()+"/archive/home/viewstory.php?sid=")+r"\d+$"
|
||||
|
||||
## Getting the chapter list and the meta data
|
||||
def extractChapterUrlsAndMetadata(self):
|
||||
|
||||
url = self.url
|
||||
logger.debug("URL: "+url)
|
||||
|
||||
data = self.get_request(url)
|
||||
|
||||
soup = self.make_soup(data)
|
||||
|
||||
|
||||
## Title and author
|
||||
|
||||
# find story header
|
||||
a = soup.find('h6')
|
||||
|
||||
titleLinks = a.find_all('a')
|
||||
authorLink= titleLinks[1]
|
||||
|
||||
self.story.setMetadata('authorId',authorLink['href'].split('=')[1])
|
||||
self.story.setMetadata('authorUrl','https://'+self.host+'/archive/home/'+authorLink['href'])
|
||||
self.story.setMetadata('author',authorLink.string)
|
||||
|
||||
self.story.setMetadata('title',a.find('strong').find('a').get_text())
|
||||
|
||||
# Site does some weird stuff with pagination on series view and will only display 25 stories per page of results
|
||||
# Therefor to get accurate index for series, we fetch all sub-pages of series and parse for valid story urls and add to a list,
|
||||
# Then find first instance of current story url and use the number of loop itteration for index
|
||||
|
||||
# This is pretty slow but ehh it works
|
||||
|
||||
try:
|
||||
# Find Series name from series URL.
|
||||
a = soup.find('a', href=re.compile(r"viewseries.php\?seriesid=\d+"))
|
||||
if a:
|
||||
seriesName = a.string
|
||||
seriesUrl = 'https://'+self.host+'/archive/home/'+a['href']
|
||||
|
||||
self.story.setMetadata('seriesUrl',seriesUrl)
|
||||
|
||||
#logger.debug("Series Url: "+seriesUrl)
|
||||
|
||||
# Get Series page and convert to soup
|
||||
seriesPageSoup = self.make_soup(self.get_request(seriesUrl+"&offset=0"))
|
||||
## &offset=0 is the same as the first page, by adding
|
||||
## that, the page cache will save us from fetching it
|
||||
## twice in the loop below.
|
||||
|
||||
# Find Series page sub-pages
|
||||
seriesPageUrlList = []
|
||||
seriesStoryList = []
|
||||
for i in seriesPageSoup.findAll('a', href=re.compile(r"viewseries.php\?seriesid=\d+&offset=\d+$")):
|
||||
# Don't include url from next button, is another http request and parse + could cause more bugs!
|
||||
if i.string != '[Next]':
|
||||
seriesPageUrlList.append(i)
|
||||
|
||||
#get urls from all subpages and append to list
|
||||
i=1
|
||||
for seriesPagePageUrl in seriesPageUrlList:
|
||||
seriesPagePageSoup = self.make_soup(self.get_request('https://'+self.host+'/archive/home/'+seriesPagePageUrl['href']))
|
||||
storyHeaders = seriesPagePageSoup.findAll('h5')
|
||||
## can't just search for story URLs, some story
|
||||
## descs also contain story URLs. Looks like only
|
||||
## story titles are <h5>.
|
||||
for storyHeader in storyHeaders:
|
||||
seriesPagePageStoryUrl = storyHeader.find('a',href=re.compile(r'^viewstory.php\?sid=\d+$'))
|
||||
if seriesPagePageStoryUrl['href'] == ('viewstory.php?sid='+self.story.getMetadata('storyId')):
|
||||
#logger.debug("Series Name: "+ seriesName)
|
||||
#logger.debug("Series Index: "+i)
|
||||
self.setSeries(seriesName, i)
|
||||
raise StopIteration("Break out of series parsing loops")
|
||||
i+=1
|
||||
|
||||
except StopIteration:
|
||||
# break out of both loops, don't need to fetch further
|
||||
# pages after story found.
|
||||
pass
|
||||
except Exception as e:
|
||||
logger.warning("series parsing failed(%s)"%e)
|
||||
|
||||
# Find the chapters by regexing urls
|
||||
chapters=soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+r"&chapter=\d+$"))
|
||||
|
||||
if len(chapters)==1:
|
||||
self.add_chapter(self.story.getMetadata('title'),'https://'+self.host+'/archive/home/'+chapters[0]['href'])
|
||||
else:
|
||||
for chapter in chapters:
|
||||
# logger.debug("Added Chapter: "+chapter.string)
|
||||
self.add_chapter(chapter,'https://'+self.host+'/archive/home/'+chapter['href'])
|
||||
|
||||
# find the details section for the work, will hopefully make parsing metadata a bit easier
|
||||
|
||||
workDetails = soup.find('div', {'id' : 'general'}).find('div', {'id' : 'general'})
|
||||
|
||||
# some metadata can be retrieved through regexes so will do that to try and avoid a janky mess.
|
||||
|
||||
#get characters
|
||||
try:
|
||||
charList = workDetails.findAll('a', href=re.compile(r'browse.php\?type=characters'+r"&charid=\d+$"))
|
||||
for char in charList:
|
||||
self.story.addToList('characters',char.string)
|
||||
|
||||
except Exception as e:
|
||||
logger.warning("character parsing failed(%s)"%e)
|
||||
|
||||
#get warnings
|
||||
try:
|
||||
warnList = workDetails.findAll('a', href=re.compile(r'browse.php\?type=class&type_id=2'+r"&classid=\d+$"))
|
||||
for warn in warnList:
|
||||
self.story.addToList('warnings', warn.string)
|
||||
|
||||
except Exception as e:
|
||||
logger.warning("warning parsing failed(%s)"%e)
|
||||
|
||||
#get genres
|
||||
try:
|
||||
genresList = workDetails.findAll('a', href=re.compile(r'browse.php\?type=class&type_id=1'+r"&classid=\d+$"))
|
||||
for genre in genresList:
|
||||
self.story.addToList('genre', genre.string)
|
||||
|
||||
except Exception as e:
|
||||
logger.warning("genre parsing failed(%s)"%e)
|
||||
|
||||
# no convenient way to extract remaining metadata so bodge it by finding relevant identifier string and using next element as the data source
|
||||
|
||||
#get summary by finding identifier, then itterating until next identifier is found and using data between the two as the summary
|
||||
try:
|
||||
summaryStart = workDetails.find('strong',text='Summary: ')
|
||||
currentElement = summaryStart.parent.next_sibling
|
||||
summaryValue = ""
|
||||
while not isinstance(currentElement,Tag) or currentElement.name != 'strong':
|
||||
summaryValue += unicode(currentElement)
|
||||
currentElement = currentElement.next_sibling
|
||||
#logger.debug(summaryValue)
|
||||
self.setDescription(url,summaryValue)
|
||||
except Exception as e:
|
||||
logger.warning("summary parsing failed(%s) -- This can be caused by bad HTML in story description."%e)
|
||||
|
||||
|
||||
#get rating
|
||||
try:
|
||||
rating = workDetails.find('strong',text='Rated:').next_sibling.string
|
||||
self.story.setMetadata('rating', rating)
|
||||
except Exception as e:
|
||||
logger.warning("rating parsing failed(%s) -- This can be caused by bad HTML in story description."%e)
|
||||
|
||||
#get completion status and correct for consistency with other adapters
|
||||
try:
|
||||
if (workDetails.find('strong',text='Completed:').next_sibling.string).lower() == "yes":
|
||||
status="Completed"
|
||||
|
||||
else:
|
||||
status="In-Progress"
|
||||
|
||||
self.story.setMetadata('status', status)
|
||||
except Exception as e:
|
||||
logger.warning("status parsing failed(%s) -- This can be caused by bad HTML in story description."%e)
|
||||
|
||||
#get wordcount
|
||||
try:
|
||||
wordCount = workDetails.find('strong',text='Word count:').next_sibling.string
|
||||
self.story.setMetadata('numWords', wordCount)
|
||||
except Exception as e:
|
||||
logger.warning("wordcount parsing failed(%s) -- This can be caused by bad HTML in story description."%e)
|
||||
|
||||
#get published date, this works for some reason yet doesn't without the spaces in it
|
||||
try:
|
||||
datePublished = workDetails.find('strong',text=' Published: ').next_sibling.string
|
||||
self.story.setMetadata('datePublished', makeDate(datePublished, self.dateformat))
|
||||
|
||||
except Exception as e:
|
||||
logger.warning("datePublished parsing failed(%s) -- This can be caused by bad HTML in story description."%e)
|
||||
|
||||
#get updated date
|
||||
try:
|
||||
dateUpdated = workDetails.find('strong',text='Updated:').next_sibling.string
|
||||
self.story.setMetadata('dateUpdated', makeDate(dateUpdated, self.dateformat))
|
||||
|
||||
except Exception as e:
|
||||
logger.warning("dateUpdated parsing failed(%s) -- This can be caused by bad HTML in story description."%e)
|
||||
|
||||
# grab the text for an individual chapter.
|
||||
def getChapterText(self, url):
|
||||
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
data = self.get_request(url)
|
||||
soup = self.make_soup(data)
|
||||
|
||||
# No convenient way to get story without the rest of the page, so get whole page and strip unneeded sections
|
||||
|
||||
contentParent = soup.find('div', {'id' : 'maincontent'}).find('div', {'id' : 'general'})
|
||||
|
||||
contentParent.find('p').decompose() # remove page header
|
||||
contentParent.find_all('div',id='general')[2].decompose() #remove page footer
|
||||
contentParent.find_all('div',id='general')[0].decompose() #remove chapter select etc.
|
||||
|
||||
contentParent.name='div'
|
||||
|
||||
#error on failure
|
||||
if None == contentParent:
|
||||
raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url)
|
||||
|
||||
return self.utf8FromSoup(url,contentParent)
|
||||
|
|
@ -1,47 +0,0 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2011 Fanficdownloader team, 2018 FanFicFare team
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
# Software: eFiction
|
||||
from __future__ import absolute_import
|
||||
from .base_efiction_adapter import BaseEfictionAdapter
|
||||
|
||||
class SinfulDreamsComWhisperedMuse(BaseEfictionAdapter):
|
||||
|
||||
@staticmethod
|
||||
def getSiteDomain():
|
||||
return 'sinful-dreams.com'
|
||||
|
||||
@classmethod
|
||||
def getPathToArchive(self):
|
||||
return '/whispered/muse'
|
||||
|
||||
@classmethod
|
||||
def getConfigSection(cls):
|
||||
"Overriden because [domain/path] section for multiple-adapter domain."
|
||||
return cls.getSiteDomain()+cls.getPathToArchive()
|
||||
|
||||
@classmethod
|
||||
def getSiteAbbrev(self):
|
||||
return 'snfldrms-wm'
|
||||
|
||||
@classmethod
|
||||
def getDateFormat(self):
|
||||
return "%m/%d/%Y"
|
||||
|
||||
def getClass():
|
||||
return SinfulDreamsComWhisperedMuse
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show more
Loading…
Reference in a new issue