mirror of
https://github.com/JimmXinu/FanFicFare.git
synced 2026-05-09 05:21:13 +02:00
Compare commits
2737 commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
a172a7bd2b | ||
|
|
ab103dce6e | ||
|
|
892e9207f0 | ||
|
|
b4e392fae1 | ||
|
|
d9525d9726 | ||
|
|
cb77b12754 | ||
|
|
b41a633821 | ||
|
|
50c8db2992 | ||
|
|
ef6dd99bfe | ||
|
|
59796ff537 | ||
|
|
8ee0a6e898 | ||
|
|
c53fc362bd | ||
|
|
c87cfc1057 | ||
|
|
6ee151c90a | ||
|
|
db01c828a0 | ||
|
|
4d03874f06 | ||
|
|
36f56483e6 | ||
|
|
18e45a403b | ||
|
|
2e25172ba3 | ||
|
|
65e3fd562b | ||
|
|
7089bf6689 | ||
|
|
061dc1333f | ||
|
|
0a7fb5c090 | ||
|
|
cf02f729ae | ||
|
|
730c4f77f9 | ||
|
|
c02da29cbd | ||
|
|
b87d796221 | ||
|
|
436370fe5b | ||
|
|
ac77f31bc2 | ||
|
|
16f2c74e4b | ||
|
|
af5c2aa0bc | ||
|
|
31dec5b62d | ||
|
|
97d37fcfc1 | ||
|
|
c730aa2f68 | ||
|
|
4e2e359dee | ||
|
|
bb96049934 | ||
|
|
84965ef25f | ||
|
|
348d129a1e | ||
|
|
4794e9bc51 | ||
|
|
d46dc76ae1 | ||
|
|
08bae8d9be | ||
|
|
405c37aeb5 | ||
|
|
270e01c3c7 | ||
|
|
12d57f5950 | ||
|
|
562b3a4ecd | ||
|
|
e69045fd98 | ||
|
|
747bde3394 | ||
|
|
aa00c7ae03 | ||
|
|
0539f818f3 | ||
|
|
41a6f56f44 | ||
|
|
e3832245e6 | ||
|
|
909b64c83c | ||
|
|
732f5e2571 | ||
|
|
d9dd04396e | ||
|
|
36e2183d45 | ||
|
|
040b7205b8 | ||
|
|
d8ed180eb1 | ||
|
|
2a6c1e74db | ||
|
|
b7c8c96153 | ||
|
|
a16096592c | ||
|
|
bb34eecc7c | ||
|
|
ceed7ef1a8 | ||
|
|
1d2a887c2d | ||
|
|
a3f3302312 | ||
|
|
ecf005b145 | ||
|
|
3bd074fa2c | ||
|
|
0fd95daa8e | ||
|
|
1b57e49d98 | ||
|
|
db0d39c9cd | ||
|
|
cbde66cf41 | ||
|
|
17331e9eb3 | ||
|
|
9b96c151a5 | ||
|
|
1b65a30798 | ||
|
|
c9a47877f7 | ||
|
|
bdc77ad0f6 | ||
|
|
719971c76c | ||
|
|
c74dba472a | ||
|
|
c1fb7f0fc5 | ||
|
|
94c932cd2f | ||
|
|
27fb765c0d | ||
|
|
06ce46f64a | ||
|
|
c04d85fa97 | ||
|
|
b6cdc30db5 | ||
|
|
9bbb5e8b01 | ||
|
|
18ce6e6fba | ||
|
|
507910f5da | ||
|
|
ccf7801a89 | ||
|
|
9a52a10626 | ||
|
|
6963153aac | ||
|
|
ee357cd5b4 | ||
|
|
b84e3d2858 | ||
|
|
9377fc6671 | ||
|
|
aaa0fa613a | ||
|
|
eac5acfbfa | ||
|
|
8dca1ef343 | ||
|
|
28e8f61cf8 | ||
|
|
78abf476ea | ||
|
|
2b1f9446dd | ||
|
|
9815736b4e | ||
|
|
3f54cce9a1 | ||
|
|
223138b8e5 | ||
|
|
4aa47c8bab | ||
|
|
a97a85f357 | ||
|
|
ffc3696d84 | ||
|
|
86c4e1974b | ||
|
|
b6fd7c2ca4 | ||
|
|
326300b40e | ||
|
|
282bafe514 | ||
|
|
061a8feccf | ||
|
|
26c9b6d2ce | ||
|
|
ed02d61953 | ||
|
|
b58d54b8ea | ||
|
|
1bc3ffc269 | ||
|
|
cbd295f911 | ||
|
|
35653f533f | ||
|
|
ea7afea8c2 | ||
|
|
384a2fe8b7 | ||
|
|
b278cac620 | ||
|
|
e23de49fb5 | ||
|
|
f64f041546 | ||
|
|
1d53c506c9 | ||
|
|
c8d6ce8004 | ||
|
|
3f08417c04 | ||
|
|
79ebf6a02b | ||
|
|
41dfb8eab8 | ||
|
|
590b663170 | ||
|
|
9bb408c8b3 | ||
|
|
5d6a63a8ca | ||
|
|
4078ccfdb1 | ||
|
|
79c29121c3 | ||
|
|
dea48d9e07 | ||
|
|
c165196a35 | ||
|
|
c385013db9 | ||
|
|
8780aa3105 | ||
|
|
12c7bfe29c | ||
|
|
08d0b8a4e0 | ||
|
|
1d401f8dba | ||
|
|
193bb3ed61 | ||
|
|
63fd8cd660 | ||
|
|
26a1152390 | ||
|
|
e0907147f7 | ||
|
|
99bba3ff12 | ||
|
|
3fdb6630fb | ||
|
|
0d6b789c9f | ||
|
|
edaa03ef42 | ||
|
|
4e17a10792 | ||
|
|
9fd48e0168 | ||
|
|
818e990184 | ||
|
|
9bb7b54023 | ||
|
|
af6695e27f | ||
|
|
46293f2d02 | ||
|
|
7f968ba102 | ||
|
|
1e5cb9b184 | ||
|
|
9627e6e62c | ||
|
|
5e644098f9 | ||
|
|
fa3a56d096 | ||
|
|
ba18216ef8 | ||
|
|
f207e31b3b | ||
|
|
0e1ace18e4 | ||
|
|
b17a632640 | ||
|
|
485d4631f9 | ||
|
|
30929bc38e | ||
|
|
ae4311f4dd | ||
|
|
3a3c35ea1f | ||
|
|
19dd89fb4d | ||
|
|
b247a7465b | ||
|
|
d5c20db681 | ||
|
|
a599ff6ad2 | ||
|
|
e21c6604a1 | ||
|
|
273c1931f4 | ||
|
|
fdf29eeade | ||
|
|
06e55728d0 | ||
|
|
0a3ab4bc9d | ||
|
|
a4a91b373f | ||
|
|
a68e771026 | ||
|
|
d7c79fcb3b | ||
|
|
5cc05ed96d | ||
|
|
e5b5768f11 | ||
|
|
6cf2519ef9 | ||
|
|
f4f98e0877 | ||
|
|
bb8fb9efa5 | ||
|
|
be38778d72 | ||
|
|
55d8efbdcd | ||
|
|
9df7822e32 | ||
|
|
69e6a3d2cf | ||
|
|
8ea03be5f3 | ||
|
|
75a213beb9 | ||
|
|
ead830c60a | ||
|
|
20681315e7 | ||
|
|
e2961eaadf | ||
|
|
7f0d7f70be | ||
|
|
c5264c2147 | ||
|
|
ff402c16ca | ||
|
|
4a9da1c02e | ||
|
|
c14f1014b8 | ||
|
|
74bc398994 | ||
|
|
6e8e74fc55 | ||
|
|
68ad4c87aa | ||
|
|
fe82aed91d | ||
|
|
7d14bf6e90 | ||
|
|
39500a9386 | ||
|
|
d5f8891e4f | ||
|
|
edce6949ae | ||
|
|
bec6fac2ea | ||
|
|
a9bd19a079 | ||
|
|
7135ba5892 | ||
|
|
9ba4c100ca | ||
|
|
fe565149ba | ||
|
|
624f60a5c1 | ||
|
|
5c79ac0b5c | ||
|
|
615711f904 | ||
|
|
2f77bd9e97 | ||
|
|
abdc881812 | ||
|
|
1ba73bf316 | ||
|
|
a359c6b326 | ||
|
|
ff64356e85 | ||
|
|
0271b14f6c | ||
|
|
bf845e200f | ||
|
|
e94ff6e1e8 | ||
|
|
07313d2744 | ||
|
|
bd2026df7e | ||
|
|
0fa177ff79 | ||
|
|
d84c72a215 | ||
|
|
c319857da0 | ||
|
|
df586e9bb7 | ||
|
|
354a5708ce | ||
|
|
096face5d2 | ||
|
|
02e3bddd5c | ||
|
|
9dadef1905 | ||
|
|
2e8a899d8c | ||
|
|
623915f623 | ||
|
|
57865ca53d | ||
|
|
e9c4b9ef30 | ||
|
|
0ad088b663 | ||
|
|
e37a7f72be | ||
|
|
9befe122dd | ||
|
|
e6d6227ff1 | ||
|
|
d854a6efe7 | ||
|
|
a97af94f8a | ||
|
|
e2ea97e99a | ||
|
|
215f6dd8ff | ||
|
|
687aa9c3ba | ||
|
|
523cf78640 | ||
|
|
90e50964b6 | ||
|
|
a83823ea13 | ||
|
|
727aa6f1bc | ||
|
|
072d929298 | ||
|
|
992c5a1378 | ||
|
|
f8937c1af3 | ||
|
|
af5c78e2e9 | ||
|
|
4a26dfdfff | ||
|
|
a82ef5dbae | ||
|
|
6adc995fa5 | ||
|
|
f534efd3df | ||
|
|
f41e64141a | ||
|
|
94036e3fbb | ||
|
|
9142609c61 | ||
|
|
f9d7b893ee | ||
|
|
4e2ae7441d | ||
|
|
87dbef980f | ||
|
|
921f8c287b | ||
|
|
637c6e3cc3 | ||
|
|
ba90ff9f3a | ||
|
|
34e84b2942 | ||
|
|
31eb7f421a | ||
|
|
85d4656005 | ||
|
|
006b8873a5 | ||
|
|
3246036f88 | ||
|
|
6d114532e2 | ||
|
|
2edb1d58d5 | ||
|
|
8dc3c5d3d8 | ||
|
|
2ec8c97e28 | ||
|
|
c51161c3d1 | ||
|
|
bd645a97c7 | ||
|
|
f7cbfa56bb | ||
|
|
07fd16813f | ||
|
|
2fe971c79f | ||
|
|
e4082c6235 | ||
|
|
960d5ba11a | ||
|
|
066539793d | ||
|
|
5b312494fb | ||
|
|
e628b10247 | ||
|
|
61c063ed72 | ||
|
|
11d3f601c9 | ||
|
|
3b8d0f63d4 | ||
|
|
b8b30c6a78 | ||
|
|
b007f68a88 | ||
|
|
6d8a67ef2e | ||
|
|
ab66e9e285 | ||
|
|
b3f7add5a1 | ||
|
|
800be43d24 | ||
|
|
70f77e17e2 | ||
|
|
caf46ba421 | ||
|
|
686ed80230 | ||
|
|
56689a10c4 | ||
|
|
065d077752 | ||
|
|
c8f817e830 | ||
|
|
1432241319 | ||
|
|
0e9f60f8a6 | ||
|
|
74de62385f | ||
|
|
d2f69eb5d5 | ||
|
|
c3655d59ca | ||
|
|
aca07bbf59 | ||
|
|
3edd3c3e7b | ||
|
|
61ba096c6e | ||
|
|
47fd71c4b9 | ||
|
|
e1d0bed52d | ||
|
|
acb88cbefc | ||
|
|
f1e7cabf6a | ||
|
|
21ec27ffd4 | ||
|
|
5567e6417d | ||
|
|
af352a480c | ||
|
|
92069dc638 | ||
|
|
76e9421858 | ||
|
|
70558bf444 | ||
|
|
b60dfdcc28 | ||
|
|
b976439669 | ||
|
|
6de50509ed | ||
|
|
4d9c38d3c2 | ||
|
|
90ecb63be4 | ||
|
|
bd49f8e8fa | ||
|
|
21c0315e60 | ||
|
|
fc97fa6d5c | ||
|
|
2c3bf3c642 | ||
|
|
a9c725d32a | ||
|
|
f936c5b0fb | ||
|
|
53344afa49 | ||
|
|
d5addfa2fd | ||
|
|
6d8375a9f3 | ||
|
|
7bc03ac798 | ||
|
|
05d62a5343 | ||
|
|
31115f9245 | ||
|
|
26ee692208 | ||
|
|
dd43d25f76 | ||
|
|
fffd15d7ea | ||
|
|
7c2700c8ea | ||
|
|
94518c4f25 | ||
|
|
531b965b22 | ||
|
|
658b637716 | ||
|
|
44f5feacfb | ||
|
|
52451a3eba | ||
|
|
7123f7dd6f | ||
|
|
08a0f9b5fc | ||
|
|
74ac96a67e | ||
|
|
9eed0340e9 | ||
|
|
73b90c0291 | ||
|
|
c33a6e6b05 | ||
|
|
d77cc15586 | ||
|
|
21483f7227 | ||
|
|
6c0df42fe7 | ||
|
|
c3a90a8914 | ||
|
|
e7f66d293a | ||
|
|
e49b3a6be0 | ||
|
|
ae72efdc00 | ||
|
|
bc935e213a | ||
|
|
a8e0eabbd8 | ||
|
|
81b84a8133 | ||
|
|
a973b8c926 | ||
|
|
08ccc659ca | ||
|
|
fb610de27a | ||
|
|
29d2e3734b | ||
|
|
48cf17c7b7 | ||
|
|
ac61c2bb68 | ||
|
|
a12d2a688b | ||
|
|
52027eac46 | ||
|
|
a1d4fba728 | ||
|
|
69872b922c | ||
|
|
7bd1a1acfc | ||
|
|
80e5a22f0d | ||
|
|
3cd4188bd8 | ||
|
|
21d16dbe90 | ||
|
|
5ce7875851 | ||
|
|
35be14a168 | ||
|
|
930940c7fd | ||
|
|
f001f19a47 | ||
|
|
fd7382fb56 | ||
|
|
c69e940d2a | ||
|
|
31dcd8e6ff | ||
|
|
0bd85c10a8 | ||
|
|
b075c22261 | ||
|
|
87b3e04fa1 | ||
|
|
630f09e644 | ||
|
|
a0463fc85b | ||
|
|
de7d8079d9 | ||
|
|
4aad0ec913 | ||
|
|
c379b45cb9 | ||
|
|
82825d1b16 | ||
|
|
11b2d5643e | ||
|
|
06dc2add8f | ||
|
|
ab7198bb8f | ||
|
|
d854733ffa | ||
|
|
a2cc6bcdd3 | ||
|
|
c9accda3f8 | ||
|
|
8e55d1e6f4 | ||
|
|
9b8eb547fc | ||
|
|
62b3c9264e | ||
|
|
370be379f0 | ||
|
|
1addfe14fc | ||
|
|
e510fb027e | ||
|
|
86b807805f | ||
|
|
0ace02ee75 | ||
|
|
38ad74af68 | ||
|
|
6c70a60cdb | ||
|
|
80ee0ca9b9 | ||
|
|
8b143a0c1b | ||
|
|
9fb86da341 | ||
|
|
5c703122ec | ||
|
|
75f89beab1 | ||
|
|
fc9d184f20 | ||
|
|
6c411e054a | ||
|
|
dbef4719d9 | ||
|
|
da6b4c25f2 | ||
|
|
23004e3953 | ||
|
|
4a15c2a7d5 | ||
|
|
84dad2ec43 | ||
|
|
5ac38fc327 | ||
|
|
35e0ada643 | ||
|
|
a9533364ec | ||
|
|
4a03186ce6 | ||
|
|
a0271e2957 | ||
|
|
11491c6383 | ||
|
|
24dccc73f0 | ||
|
|
8e3a88776a | ||
|
|
28141ce9d1 | ||
|
|
ffaa3bf82a | ||
|
|
d0d05d6c3b | ||
|
|
6d74a58181 | ||
|
|
de85fd42f7 | ||
|
|
c4aebd40df | ||
|
|
81cb631491 | ||
|
|
35aa5d2143 | ||
|
|
a8b1489233 | ||
|
|
ffb179c9a1 | ||
|
|
6d8d7ab66f | ||
|
|
a128083ce8 | ||
|
|
9f78ec0177 | ||
|
|
d941810825 | ||
|
|
ba1975342c | ||
|
|
27cfac45e4 | ||
|
|
64a4eb2bb2 | ||
|
|
371f995fda | ||
|
|
816bbdfd66 | ||
|
|
cdd6df8a57 | ||
|
|
5d4489bb28 | ||
|
|
a9944cd255 | ||
|
|
c284b2a6c6 | ||
|
|
15dde72f14 | ||
|
|
ff0f22565c | ||
|
|
33813b4047 | ||
|
|
ae3accca27 | ||
|
|
d998467f7a | ||
|
|
29fddbce8e | ||
|
|
a4e1db32e0 | ||
|
|
81aea65555 | ||
|
|
9005f9db4c | ||
|
|
7de040d8db | ||
|
|
9c53cf236e | ||
|
|
2e6ac07020 | ||
|
|
3febac62a8 | ||
|
|
c4ea6ca5fd | ||
|
|
75f9fb2d38 | ||
|
|
e4f83c52ca | ||
|
|
eb54731ae9 | ||
|
|
eb24bcb2ac | ||
|
|
ffa533e5fd | ||
|
|
bd76066905 | ||
|
|
eb17af9252 | ||
|
|
4471b1f980 | ||
|
|
9cfd88c098 | ||
|
|
c1cf8995ea | ||
|
|
55995be7de | ||
|
|
869686f363 | ||
|
|
f45a05ddb6 | ||
|
|
434ff0de74 | ||
|
|
d0ece28197 | ||
|
|
cd1db0a462 | ||
|
|
075c5cb7c2 | ||
|
|
b8740ca1c7 | ||
|
|
3db3e28595 | ||
|
|
b610d49f6b | ||
|
|
35afca430a | ||
|
|
1499037e19 | ||
|
|
1aaa4102a5 | ||
|
|
049c9af0e4 | ||
|
|
482b6b67eb | ||
|
|
cdb752df6a | ||
|
|
0412355001 | ||
|
|
0dc049aedb | ||
|
|
832387dea0 | ||
|
|
94bd4bf236 | ||
|
|
493e76df30 | ||
|
|
44b6e752f6 | ||
|
|
5d6f2c91c1 | ||
|
|
04ae49f944 | ||
|
|
020606fea1 | ||
|
|
711698620e | ||
|
|
968687bb82 | ||
|
|
07ab6d137b | ||
|
|
d51ac5d6f5 | ||
|
|
478d2e8f17 | ||
|
|
67a1dcee90 | ||
|
|
af834b1e40 | ||
|
|
ae535e2518 | ||
|
|
96d36ae71a | ||
|
|
480b7239e5 | ||
|
|
2666164c5b | ||
|
|
6ef8d1b215 | ||
|
|
654619e7e2 | ||
|
|
4ea869a764 | ||
|
|
837df18cb0 | ||
|
|
248f1c022b | ||
|
|
4fabf9e65c | ||
|
|
b7c318f520 | ||
|
|
89a15e1b16 | ||
|
|
5b41097abc | ||
|
|
a672b6dbdf | ||
|
|
e4d5d43efa | ||
|
|
cc572857e0 | ||
|
|
2f52ae31c0 | ||
|
|
3ddf801925 | ||
|
|
182695b0af | ||
|
|
656e67cc57 | ||
|
|
34215ce0ee | ||
|
|
c706aed271 | ||
|
|
e5f8e5bba4 | ||
|
|
11d8fae876 | ||
|
|
4a14e5fc86 | ||
|
|
7548ce6ae0 | ||
|
|
e113bbfb1c | ||
|
|
d1ccdfd21f | ||
|
|
68e8f49e9f | ||
|
|
49a0328268 | ||
|
|
25ea3fcaad | ||
|
|
a5378ca419 | ||
|
|
e0b733b60d | ||
|
|
33b2b10bf3 | ||
|
|
c468c26208 | ||
|
|
9d29f888b3 | ||
|
|
d1e8a77489 | ||
|
|
ef66e73fa4 | ||
|
|
7f128587c0 | ||
|
|
53a7a60dbc | ||
|
|
71a61ff166 | ||
|
|
9c051e6c3b | ||
|
|
f0d89498dc | ||
|
|
abb370a852 | ||
|
|
4b9054d1b4 | ||
|
|
2d0db171a8 | ||
|
|
7f67465767 | ||
|
|
6801d5e01d | ||
|
|
b01914c24e | ||
|
|
dd41f99288 | ||
|
|
37db56e6b3 | ||
|
|
f0a08f7647 | ||
|
|
2593f742c9 | ||
|
|
6ac299c198 | ||
|
|
3eda289349 | ||
|
|
95a7bdd3a9 | ||
|
|
84257e7388 | ||
|
|
465bffd896 | ||
|
|
eabfd1bef3 | ||
|
|
8d6676617c | ||
|
|
c47b620f67 | ||
|
|
df94cc439e | ||
|
|
08032778bd | ||
|
|
52deec3fd8 | ||
|
|
5b443d4363 | ||
|
|
4170cfd9a6 | ||
|
|
ae4735df04 | ||
|
|
6041036787 | ||
|
|
d451265621 | ||
|
|
677f213337 | ||
|
|
8537702028 | ||
|
|
6d3d4d1ae6 | ||
|
|
1f42c188fa | ||
|
|
9346985718 | ||
|
|
4585afde50 | ||
|
|
bee6cb9ba6 | ||
|
|
581b627a3e | ||
|
|
4436001494 | ||
|
|
6116a19986 | ||
|
|
99fd4ea0e5 | ||
|
|
a613b842f2 | ||
|
|
6462c5c366 | ||
|
|
8c4a8cd2da | ||
|
|
7a0ea3ce96 | ||
|
|
f14fe9d3aa | ||
|
|
36add28269 | ||
|
|
87b4171dd4 | ||
|
|
951acf61b4 | ||
|
|
8674b54753 | ||
|
|
b7e5bf0468 | ||
|
|
0f12c127b6 | ||
|
|
50c51dc993 | ||
|
|
65bf03a613 | ||
|
|
0bb8421f98 | ||
|
|
108e603e63 | ||
|
|
1868ed842e | ||
|
|
6c505a6170 | ||
|
|
72d508b0bf | ||
|
|
d6f2faf170 | ||
|
|
92cbff7db9 | ||
|
|
4bb2d50921 | ||
|
|
c3d8bc4fd0 | ||
|
|
37ae6cbdbb | ||
|
|
b953daa3c2 | ||
|
|
463910cd54 | ||
|
|
95bfdf907f | ||
|
|
85550aeaf6 | ||
|
|
5b20926f2c | ||
|
|
c915aceb85 | ||
|
|
36d56b867c | ||
|
|
e1cec84075 | ||
|
|
ba3676d73f | ||
|
|
80f50b298f | ||
|
|
9120504249 | ||
|
|
55c7ca9c10 | ||
|
|
704ea89d72 | ||
|
|
8eecd0aa7d | ||
|
|
c53f99d01c | ||
|
|
438a1265f2 | ||
|
|
86766223cb | ||
|
|
1fa94de1d9 | ||
|
|
56d1cf19ef | ||
|
|
701c096ed4 | ||
|
|
aab3e1c601 | ||
|
|
8d040a4926 | ||
|
|
4453cbb143 | ||
|
|
0c173f8110 | ||
|
|
a14b39eb4c | ||
|
|
c9cb51f8c4 | ||
|
|
dbe6c6105c | ||
|
|
04231eecfe | ||
|
|
a55a4c93a5 | ||
|
|
dcd4f0f6a5 | ||
|
|
792ab02195 | ||
|
|
7a87310403 | ||
|
|
7e070528a1 | ||
|
|
4f3af1395f | ||
|
|
1fc4f3d70b | ||
|
|
12ee3dae5e | ||
|
|
cf28bc26f0 | ||
|
|
bd41796231 | ||
|
|
f21f039b3a | ||
|
|
7263f4120c | ||
|
|
22e0e8da66 | ||
|
|
7173bf0803 | ||
|
|
7246cdf853 | ||
|
|
c60b296bc9 | ||
|
|
a8a86533ad | ||
|
|
d1c5847a58 | ||
|
|
68e0d70fcb | ||
|
|
74b28f7ead | ||
|
|
acda805c3c | ||
|
|
a37fbbbd51 | ||
|
|
2cdb6036ea | ||
|
|
77afdc0208 | ||
|
|
7e0e68f66f | ||
|
|
bbec6fcd5f | ||
|
|
631fe6c9c9 | ||
|
|
a86755ad98 | ||
|
|
42d2b00007 | ||
|
|
ad10cad0b0 | ||
|
|
71d3589ebc | ||
|
|
84ed1827be | ||
|
|
ce29a6923e | ||
|
|
d96d194b2b | ||
|
|
5cb3bccf45 | ||
|
|
e6639323b7 | ||
|
|
f94e0eaf32 | ||
|
|
37bcb1284b | ||
|
|
295bd2e1ab | ||
|
|
45b4a8d8bf | ||
|
|
cdb60423fe | ||
|
|
50f913843b | ||
|
|
581d6f6657 | ||
|
|
e03f65332a | ||
|
|
3e9abec817 | ||
|
|
0d8f84ba23 | ||
|
|
c646419336 | ||
|
|
622a4eb44b | ||
|
|
d4fbc73b41 | ||
|
|
391f469a99 | ||
|
|
a0ca55d7f6 | ||
|
|
a4bbe27771 | ||
|
|
a5e2d1eb45 | ||
|
|
7a89d03339 | ||
|
|
ae638fd0a1 | ||
|
|
26a59b373a | ||
|
|
479c0b7d95 | ||
|
|
52a0bb6e0e | ||
|
|
f2f333c807 | ||
|
|
3f2f2a33d3 | ||
|
|
ba9272822b | ||
|
|
9575044262 | ||
|
|
7306e81a30 | ||
|
|
19f9132109 | ||
|
|
f340ba50da | ||
|
|
6e90c7ed7b | ||
|
|
0a81bc7c6b | ||
|
|
f5dd6b90fc | ||
|
|
e1a9438595 | ||
|
|
97a72380e6 | ||
|
|
a6a3a4e240 | ||
|
|
b6b1e6ecdc | ||
|
|
85cf21a32c | ||
|
|
918ed4a23e | ||
|
|
84d6106a30 | ||
|
|
6761cae9c1 | ||
|
|
e330ccbe94 | ||
|
|
da7059e978 | ||
|
|
893345dc33 | ||
|
|
9fcc6fe68a | ||
|
|
0c02f17d67 | ||
|
|
11c8805f4c | ||
|
|
cf065fa706 | ||
|
|
3c94c9d308 | ||
|
|
831bea725f | ||
|
|
b748283484 | ||
|
|
28af7e1722 | ||
|
|
1673da5a4b | ||
|
|
c97c0e822d | ||
|
|
ce24ac70d9 | ||
|
|
9ab4739710 | ||
|
|
685084e711 | ||
|
|
dd049ac297 | ||
|
|
516f7464b7 | ||
|
|
46be37e034 | ||
|
|
693f0aa774 | ||
|
|
646693ca3e | ||
|
|
22534986d3 | ||
|
|
18b183585a | ||
|
|
5862ba627e | ||
|
|
c38f4ab400 | ||
|
|
f5c9fcf029 | ||
|
|
9e206d2215 | ||
|
|
b1b2451fa6 | ||
|
|
91f2f84c10 | ||
|
|
16ba74c98e | ||
|
|
0cc3b81580 | ||
|
|
c769900332 | ||
|
|
a84e6ab385 | ||
|
|
af163c27e0 | ||
|
|
016452ec89 | ||
|
|
b584779a13 | ||
|
|
01d97ed770 | ||
|
|
607ef27fe1 | ||
|
|
448a9cfaef | ||
|
|
88fb6069fc | ||
|
|
cd5fd2cab4 | ||
|
|
a21fcf7e77 | ||
|
|
627a8dbff5 | ||
|
|
dd1207f11e | ||
|
|
49aec452ca | ||
|
|
e033f71ece | ||
|
|
62b097f3d5 | ||
|
|
3098c1983f | ||
|
|
37626680f9 | ||
|
|
d99fe607da | ||
|
|
c80f22cdd3 | ||
|
|
0b6402ca8a | ||
|
|
26a7633337 | ||
|
|
3ee7614441 | ||
|
|
718ae6ac83 | ||
|
|
e0686eada2 | ||
|
|
9f1fd42889 | ||
|
|
a088a34c89 | ||
|
|
14cdc10ee3 | ||
|
|
8667643e7c | ||
|
|
e6d123a17d | ||
|
|
ae28b714b3 | ||
|
|
33cd1642f8 | ||
|
|
63ec69f9f2 | ||
|
|
20ea9a00ed | ||
|
|
779222b66d | ||
|
|
afb2b9fe29 | ||
|
|
20052e1922 | ||
|
|
e03f3f40da | ||
|
|
00f6656d7d | ||
|
|
dd2c1a48b5 | ||
|
|
a37588a8f7 | ||
|
|
fc99805a85 | ||
|
|
d73b1732d3 | ||
|
|
043fb289bf | ||
|
|
a0332f27be | ||
|
|
99285763d3 | ||
|
|
26467d8f35 | ||
|
|
930ba5bb19 | ||
|
|
fb552c823a | ||
|
|
bfc0c4f3ef | ||
|
|
216cb27f03 | ||
|
|
21a5ded593 | ||
|
|
ff07987a02 | ||
|
|
bd6afdafb8 | ||
|
|
fd7c5ac867 | ||
|
|
87eb84b5fa | ||
|
|
784cb711d8 | ||
|
|
54a00a934b | ||
|
|
c638ac8457 | ||
|
|
b710a4cdc7 | ||
|
|
16c8c6b445 | ||
|
|
5cee35149f | ||
|
|
de201c7263 | ||
|
|
222a4f4828 | ||
|
|
7d6af47f60 | ||
|
|
1c05d58d1a | ||
|
|
8152b51353 | ||
|
|
d387eafff2 | ||
|
|
fe5605ea50 | ||
|
|
7f97decb8a | ||
|
|
cfd28dd1ff | ||
|
|
2c43eab432 | ||
|
|
fda597ddae | ||
|
|
7502c0f2fb | ||
|
|
eaeeda6911 | ||
|
|
8850c1a62b | ||
|
|
0205ec4ccb | ||
|
|
2600bf7be5 | ||
|
|
012ff40f0f | ||
|
|
0df9e39931 | ||
|
|
97fcc3af33 | ||
|
|
be40433377 | ||
|
|
a1f29cb034 | ||
|
|
b2b584d832 | ||
|
|
415cd6597e | ||
|
|
d1d5d61b87 | ||
|
|
2c11ecc5c8 | ||
|
|
0ac66425f8 | ||
|
|
367d3e4435 | ||
|
|
05b7147e64 | ||
|
|
200c877418 | ||
|
|
84323c1608 | ||
|
|
3ba2edef2d | ||
|
|
e5cc1cccf2 | ||
|
|
c50ffc40dc | ||
|
|
1f8106c1f3 | ||
|
|
d9ca72571e | ||
|
|
ecb0620929 | ||
|
|
c6b381e61a | ||
|
|
faf352bf80 | ||
|
|
269b7d5bd1 | ||
|
|
439d617364 | ||
|
|
d0c85feda5 | ||
|
|
25ebc603e7 | ||
|
|
1683d950c3 | ||
|
|
961bb28ecd | ||
|
|
bbb3db31a8 | ||
|
|
c917c5da3d | ||
|
|
edc2056e75 | ||
|
|
84b7cbcda2 | ||
|
|
44484670f2 | ||
|
|
0b442422ab | ||
|
|
d0448af52e | ||
|
|
e82585ecc7 | ||
|
|
ff36bd30c5 | ||
|
|
12b2117c77 | ||
|
|
34ec532eed | ||
|
|
2fa23ce9fd | ||
|
|
8399061dc9 | ||
|
|
86ab2806fa | ||
|
|
6f77504ca9 | ||
|
|
a259297092 | ||
|
|
2c662b6f33 | ||
|
|
548d6a5a58 | ||
|
|
f3d2513d32 | ||
|
|
8b20756095 | ||
|
|
8f093769ce | ||
|
|
f6dafecfa1 | ||
|
|
98f95a7da8 | ||
|
|
f3d373c8ca | ||
|
|
536ff35d66 | ||
|
|
6d31c5fb94 | ||
|
|
5730d3583a | ||
|
|
da64336967 | ||
|
|
480311c442 | ||
|
|
8b44e3d4b6 | ||
|
|
9049625ec2 | ||
|
|
d8c70ceae2 | ||
|
|
95bb8a0c7f | ||
|
|
9b1a64616b | ||
|
|
8a6894fa28 | ||
|
|
7c4e819c93 | ||
|
|
9bedeb55a0 | ||
|
|
6c92d45d97 | ||
|
|
c7c029c706 | ||
|
|
6fec02f79e | ||
|
|
fc3e8bb8ff | ||
|
|
3f52734da2 | ||
|
|
cde8a739fb | ||
|
|
dc5837badb | ||
|
|
43a2d5cd67 | ||
|
|
2c0a1d1046 | ||
|
|
64aaaf6daa | ||
|
|
dd2a076b6f | ||
|
|
cf7f84c886 | ||
|
|
98a5a120c1 | ||
|
|
77d35d88c7 | ||
|
|
f25ed9efbb | ||
|
|
de7e4803a3 | ||
|
|
1516b100d2 | ||
|
|
7ff2976dfe | ||
|
|
f4426d0532 | ||
|
|
f4fbbf0d34 | ||
|
|
57cf738df5 | ||
|
|
edb09d1a7e | ||
|
|
84c5e245e6 | ||
|
|
95cece7e9c | ||
|
|
ea345b059d | ||
|
|
6ca6d47066 | ||
|
|
fea04ed16c | ||
|
|
84b3b6d61e | ||
|
|
4f0be16f0b | ||
|
|
f8fc1a2881 | ||
|
|
f9471377bb | ||
|
|
152088de87 | ||
|
|
82702ea958 | ||
|
|
3432a786d5 | ||
|
|
4fd8972f6a | ||
|
|
e4847653c6 | ||
|
|
6e73c7400a | ||
|
|
5c40f4073a | ||
|
|
da3777a0ca | ||
|
|
dd636bb55f | ||
|
|
6fcfdaabf3 | ||
|
|
e26eb9d9cc | ||
|
|
732d40f5c8 | ||
|
|
814cf2931c | ||
|
|
5e4f041509 | ||
|
|
8862ec985f | ||
|
|
c887697d61 | ||
|
|
30115980af | ||
|
|
be057e296f | ||
|
|
a5d42e07c9 | ||
|
|
6484f588e4 | ||
|
|
83a5c28d71 | ||
|
|
96a129a70f | ||
|
|
51e6892a5e | ||
|
|
47ad5c1e1f | ||
|
|
bdb90941d3 | ||
|
|
a2e9d29cf6 | ||
|
|
b43bec4126 | ||
|
|
5992f835fb | ||
|
|
263c840f30 | ||
|
|
7786b1b5a9 | ||
|
|
b1ce5f8956 | ||
|
|
5e6ab494b9 | ||
|
|
b99560acca | ||
|
|
b146552e39 | ||
|
|
8468a502bb | ||
|
|
1b96617c78 | ||
|
|
7ac179e068 | ||
|
|
f29f3f973a | ||
|
|
e775bd451d | ||
|
|
bef71a49b6 | ||
|
|
e5ab3e1d0c | ||
|
|
bb06ffdaea | ||
|
|
5ce7aa5c48 | ||
|
|
85450360de | ||
|
|
ec6873f95f | ||
|
|
e4d5b61ef6 | ||
|
|
644bd369e4 | ||
|
|
dede2376c3 | ||
|
|
2bd727bec2 | ||
|
|
50c85d4835 | ||
|
|
7103630e55 | ||
|
|
a31d58bca3 | ||
|
|
6ae424d3ff | ||
|
|
3b703da1f3 | ||
|
|
6695f23079 | ||
|
|
5d4d8e6239 | ||
|
|
b14590c112 | ||
|
|
e11e09f935 | ||
|
|
4e0aa707b9 | ||
|
|
0845deb095 | ||
|
|
2719705a1a | ||
|
|
346da2cdee | ||
|
|
db39aaf4ff | ||
|
|
22ea1d4a15 | ||
|
|
4365e852fe | ||
|
|
6a474eb0a0 | ||
|
|
020d8d9e5b | ||
|
|
220ca33cc9 | ||
|
|
2cee4cca06 | ||
|
|
a31ace8032 | ||
|
|
6d0495eab8 | ||
|
|
6d6457a32f | ||
|
|
befe0e5254 | ||
|
|
2c41230b74 | ||
|
|
0e1e92750c | ||
|
|
b27854b8a5 | ||
|
|
2c504ae67e | ||
|
|
24d02895ef | ||
|
|
01887e37b4 | ||
|
|
628f76c20a | ||
|
|
f31e7b1860 | ||
|
|
073d52a17c | ||
|
|
eac3531f31 | ||
|
|
7873e25779 | ||
|
|
f468611b01 | ||
|
|
d3aea54b6c | ||
|
|
1d5afe8cd6 | ||
|
|
91d6aacc74 | ||
|
|
0036ba94d9 | ||
|
|
3711663a12 | ||
|
|
7e2eb531ba | ||
|
|
39cca07432 | ||
|
|
001cdd34c7 | ||
|
|
4cb0201970 | ||
|
|
56da4a2850 | ||
|
|
f613fea791 | ||
|
|
ccd25b0c93 | ||
|
|
60c14c2cef | ||
|
|
895274ad24 | ||
|
|
bf13b81837 | ||
|
|
adeb9f26c3 | ||
|
|
c3631f6ac7 | ||
|
|
1301fc3dc4 | ||
|
|
d76fa989d1 | ||
|
|
53dd0073f1 | ||
|
|
b6b0b0a8c5 | ||
|
|
c0573d76fd | ||
|
|
44b803a529 | ||
|
|
c6705a82db | ||
|
|
66813584f5 | ||
|
|
e61829052e | ||
|
|
701d358ea6 | ||
|
|
15d434fce2 | ||
|
|
c801729215 | ||
|
|
2e192380f0 | ||
|
|
4c4355a910 | ||
|
|
7c17a2dcd0 | ||
|
|
186a97042b | ||
|
|
d2f6d2d6b8 | ||
|
|
0c1bbd0c96 | ||
|
|
f5f9a7d303 | ||
|
|
224bd11821 | ||
|
|
6d6cac850b | ||
|
|
d81cc0bd4a | ||
|
|
73459f2b83 | ||
|
|
aa8c96de7b | ||
|
|
61a7701e78 | ||
|
|
337086b90b | ||
|
|
20003aa49d | ||
|
|
e1d5a68a90 | ||
|
|
ac5f94a6ac | ||
|
|
d85e3b977e | ||
|
|
fead675aae | ||
|
|
c33267750d | ||
|
|
9c5badc2bf | ||
|
|
b65713f902 | ||
|
|
8ad18383cc | ||
|
|
6e1892dd4e | ||
|
|
f593295d06 | ||
|
|
7eb142e598 | ||
|
|
4d322a8fae | ||
|
|
ccea7827ce | ||
|
|
ed2bb78657 | ||
|
|
8871352b2c | ||
|
|
04632728bc | ||
|
|
d92475b980 | ||
|
|
89c4b68b9f | ||
|
|
6e97d98118 | ||
|
|
e326b81b3f | ||
|
|
a7ced3d78a | ||
|
|
c78ff37f56 | ||
|
|
560abad128 | ||
|
|
1adba9193a | ||
|
|
a6d492d970 | ||
|
|
56a7f271ff | ||
|
|
3fffd22996 | ||
|
|
d11d4c5263 | ||
|
|
ed5260f035 | ||
|
|
5df1608d74 | ||
|
|
773b2600c5 | ||
|
|
d0fddf2da6 | ||
|
|
8ccc3dc129 | ||
|
|
2b001f003b | ||
|
|
dd88bef85a | ||
|
|
2a6e92e586 | ||
|
|
102b23434b | ||
|
|
7ea7c8497c | ||
|
|
2faafdd9f3 | ||
|
|
a09c84258f | ||
|
|
8a3ce58d4e | ||
|
|
599a89ee6a | ||
|
|
5b0b91eb46 | ||
|
|
cddfd8b835 | ||
|
|
770c9fa167 | ||
|
|
ecf4b10238 | ||
|
|
4c64b406df | ||
|
|
031b9052d1 | ||
|
|
f276b836c7 | ||
|
|
e63b05ff16 | ||
|
|
0113d07a63 | ||
|
|
c0b6e918ad | ||
|
|
92d3c7c8f0 | ||
|
|
543c741502 | ||
|
|
018f87767d | ||
|
|
238884ad53 | ||
|
|
cd83136278 | ||
|
|
6759803ccd | ||
|
|
b5f6a447b9 | ||
|
|
b26b124cfe | ||
|
|
e58df9ac97 | ||
|
|
11f7c6f115 | ||
|
|
662b808ba9 | ||
|
|
dbeba818f7 | ||
|
|
666c3b4143 | ||
|
|
e2dba246b2 | ||
|
|
4e57d27a57 | ||
|
|
4a58c43af9 | ||
|
|
1d2006761d | ||
|
|
23bc94451e | ||
|
|
a1f3349da0 | ||
|
|
f99889d5e8 | ||
|
|
137138a8ab | ||
|
|
640b0eac0e | ||
|
|
73b78d6335 | ||
|
|
7558c998df | ||
|
|
387aad83b6 | ||
|
|
43b07b6d6a | ||
|
|
b6abcc41cf | ||
|
|
a307c128fa | ||
|
|
16b78523e5 | ||
|
|
8084761154 | ||
|
|
d3dd5a86a8 | ||
|
|
69510094d3 | ||
|
|
b0ca83f760 | ||
|
|
2c707a74dd | ||
|
|
dfbbed0709 | ||
|
|
842b2d2d55 | ||
|
|
af22795cd5 | ||
|
|
cd71351181 | ||
|
|
86b3f49e6b | ||
|
|
7e53863d15 | ||
|
|
a5832e8d02 | ||
|
|
fc68c4574a | ||
|
|
f4a7a8657e | ||
|
|
943bf1f36c | ||
|
|
2482416ea5 | ||
|
|
431369ed42 | ||
|
|
314ff73280 | ||
|
|
ce6df518a2 | ||
|
|
99049da5c6 | ||
|
|
1d73c51712 | ||
|
|
dead6872d4 | ||
|
|
7a93a494ec | ||
|
|
93cfc97d1d | ||
|
|
bcd16b7840 | ||
|
|
be9f626c85 | ||
|
|
1133f5cc3a | ||
|
|
b37ae23af7 | ||
|
|
e9574d66df | ||
|
|
55f6b882df | ||
|
|
8692665724 | ||
|
|
93f483e42c | ||
|
|
05e3415059 | ||
|
|
e6b66636b9 | ||
|
|
13c6a1fd77 | ||
|
|
9b6c6da639 | ||
|
|
7b596c1110 | ||
|
|
23e0977218 | ||
|
|
7fbcb054ad | ||
|
|
40a2af2b3d | ||
|
|
0b8180a2cf | ||
|
|
33f3aa8dd2 | ||
|
|
6682a3117b | ||
|
|
38ea209a40 | ||
|
|
295868b923 | ||
|
|
fc8e96cc9e | ||
|
|
58387605e6 | ||
|
|
1d5e5d3722 | ||
|
|
4aa9c1bf34 | ||
|
|
d347523942 | ||
|
|
a181c36ccb | ||
|
|
28a2b5e926 | ||
|
|
65a7538452 | ||
|
|
bb3a86298e | ||
|
|
d01ae7004a | ||
|
|
31f3384c8e | ||
|
|
97823bc12b | ||
|
|
7f2514c177 | ||
|
|
e3b487205d | ||
|
|
b5dd8d4565 | ||
|
|
7341598cc3 | ||
|
|
04dd608930 | ||
|
|
8b64b415c4 | ||
|
|
0da8d430d9 | ||
|
|
38570c26c7 | ||
|
|
78c6b3e5cd | ||
|
|
7550554c3e | ||
|
|
68bb6f6fcf | ||
|
|
bf01b1a7de | ||
|
|
c53cbfe156 | ||
|
|
a1f839d732 | ||
|
|
71de6900ee | ||
|
|
11665834b5 | ||
|
|
36eed1bc43 | ||
|
|
b39d6a33b7 | ||
|
|
9c554375aa | ||
|
|
7c6c82e0ac | ||
|
|
ceccc5baab | ||
|
|
379d6ac634 | ||
|
|
53c75ce01c | ||
|
|
08044e5c0d | ||
|
|
63b1d7ac72 | ||
|
|
63450c65e1 | ||
|
|
e9d206bf9b | ||
|
|
3913028800 | ||
|
|
b8879d6b75 | ||
|
|
7df74c2bbb | ||
|
|
1782a32674 | ||
|
|
20574c7e94 | ||
|
|
a78eb07c77 | ||
|
|
a8bdcde4bf | ||
|
|
523aa75588 | ||
|
|
2b36871281 | ||
|
|
0cff71b9d6 | ||
|
|
e3d358e4e0 | ||
|
|
afacc475b4 | ||
|
|
8051ef7c9f | ||
|
|
d0a13b63ff | ||
|
|
c5734f96b8 | ||
|
|
adefbcfcf8 | ||
|
|
eb9e3ba9fe | ||
|
|
6e3055e753 | ||
|
|
6c3a133ccd | ||
|
|
75af89464d | ||
|
|
b40676518c | ||
|
|
86b86b50f9 | ||
|
|
5fd455b981 | ||
|
|
58a8ca411c | ||
|
|
d2ff6ba5d2 | ||
|
|
cb3f7e1644 | ||
|
|
e2c6d4be99 | ||
|
|
20802c8a6b | ||
|
|
2243edb175 | ||
|
|
80c4f4cb56 | ||
|
|
b43d0e4b79 | ||
|
|
3c95a6a533 | ||
|
|
d3d0865a00 | ||
|
|
41e2f5ed75 | ||
|
|
8653b1520f | ||
|
|
a67dd3d7b0 | ||
|
|
bdeb2a80f7 | ||
|
|
0eb543a726 | ||
|
|
9c9a2a22f5 | ||
|
|
8aeb05a22d | ||
|
|
11670b30ba | ||
|
|
ff0a9a7335 | ||
|
|
33272aaa22 | ||
|
|
75fc53f93a | ||
|
|
890f416eae | ||
|
|
3a35e4d2d0 | ||
|
|
81ef198d00 | ||
|
|
d7f149e990 | ||
|
|
6e86f51164 | ||
|
|
a086de264c | ||
|
|
dc28197c7b | ||
|
|
de8443298e | ||
|
|
eee92b4ebb | ||
|
|
10a07fe4bf | ||
|
|
a8c10bb017 | ||
|
|
ecfa75c235 | ||
|
|
21bd4b951d | ||
|
|
ff6950b2e2 | ||
|
|
f9a39897a2 | ||
|
|
eeac5f2b9a | ||
|
|
98ea6ba721 | ||
|
|
2ca954f048 | ||
|
|
fa7cf95ee2 | ||
|
|
5680027b72 | ||
|
|
8c6c6991c2 | ||
|
|
addc024e49 | ||
|
|
335bfb02c2 | ||
|
|
fb94a3f3f1 | ||
|
|
9ea9cf4c68 | ||
|
|
e977587fae | ||
|
|
0c02cd98e0 | ||
|
|
c67e19e0bf | ||
|
|
4e4360ec62 | ||
|
|
e786090aeb | ||
|
|
03f2657a6e | ||
|
|
16be4cbbe5 | ||
|
|
53c8b69f1e | ||
|
|
28238b18ff | ||
|
|
f4c06014dd | ||
|
|
fb8ab400b7 | ||
|
|
f2d74defca | ||
|
|
c1c18a5a87 | ||
|
|
54e952748f | ||
|
|
30470c8f6a | ||
|
|
4da7db4305 | ||
|
|
23a00fb15a | ||
|
|
951cc73e46 | ||
|
|
53452ca410 | ||
|
|
01ba441a63 | ||
|
|
582c1a6e7f | ||
|
|
77d1037a90 | ||
|
|
52587ef69b | ||
|
|
ea66ae350b | ||
|
|
ad3a16f423 | ||
|
|
4cf37d449e | ||
|
|
2c00752e23 | ||
|
|
05e15487e4 | ||
|
|
99236e82ad | ||
|
|
b9f5686a3c | ||
|
|
b99a7fe494 | ||
|
|
f028bc9b6c | ||
|
|
bd1bfbfaf9 | ||
|
|
f61696fb3f | ||
|
|
f47f859de0 | ||
|
|
6a18f3509b | ||
|
|
02734791cd | ||
|
|
6194f3d9e7 | ||
|
|
197c6dde81 | ||
|
|
ea87916f4b | ||
|
|
b710bdaafd | ||
|
|
7b2d6a91fb | ||
|
|
c7a542fd17 | ||
|
|
fa2b3c9511 | ||
|
|
d6258ab74d | ||
|
|
f633ef8137 | ||
|
|
a4c6fd9ff7 | ||
|
|
0812d13003 | ||
|
|
c97407ae56 | ||
|
|
b2b56e6366 | ||
|
|
78e3689062 | ||
|
|
9f77f3a60d | ||
|
|
db85c2c4b3 | ||
|
|
dc26cef572 | ||
|
|
bc149a2deb | ||
|
|
1e46c97bbd | ||
|
|
790744c9e1 | ||
|
|
033c38fc91 | ||
|
|
825a2070c5 | ||
|
|
5128dc6743 | ||
|
|
8828e1fc28 | ||
|
|
a43949d123 | ||
|
|
61bc732810 | ||
|
|
555872bdef | ||
|
|
c0d776f64c | ||
|
|
a2dd11326f | ||
|
|
0904101b7d | ||
|
|
6fc9aa6dfc | ||
|
|
3b72126f5f | ||
|
|
80fb72928e | ||
|
|
8ee9fc36ab | ||
|
|
89e731031c | ||
|
|
619bc8a6f9 | ||
|
|
a2523f1a1e | ||
|
|
3499548a2f | ||
|
|
4460ee00cf | ||
|
|
89290bf7a4 | ||
|
|
a07b36b61f | ||
|
|
6f305d6254 | ||
|
|
7e356b733e | ||
|
|
f2c8ae6a0a | ||
|
|
b1ab540c11 | ||
|
|
9ca0bfc5d8 | ||
|
|
7011250353 | ||
|
|
744400b161 | ||
|
|
d0b81c1c7b | ||
|
|
adfaf141d3 | ||
|
|
a8047ba0a9 | ||
|
|
b142654dfc | ||
|
|
56d4688f2c | ||
|
|
df26e74145 | ||
|
|
8dd9154982 | ||
|
|
23e4f9468d | ||
|
|
aa966de4bc | ||
|
|
a711083e90 | ||
|
|
99bafb052b | ||
|
|
61b5cd8e43 | ||
|
|
1466ff2422 | ||
|
|
af8a979984 | ||
|
|
1d562d1fe4 | ||
|
|
d437654320 | ||
|
|
1eb5eb2d54 | ||
|
|
dbc90cfce5 | ||
|
|
cf5c0fd68c | ||
|
|
b02f40318c | ||
|
|
bc6d65de26 | ||
|
|
09f2fc4d4b | ||
|
|
5c06b32a30 | ||
|
|
125c55e1e3 | ||
|
|
841fe6e396 | ||
|
|
f245310927 | ||
|
|
5e31182bc8 | ||
|
|
0ca4d20720 | ||
|
|
2ddce1acd5 | ||
|
|
dc88a00ea4 | ||
|
|
df61e88714 | ||
|
|
36efc7366e | ||
|
|
a829d01e7c | ||
|
|
1459ad8611 | ||
|
|
2e78b153d5 | ||
|
|
467d79120e | ||
|
|
9080349615 | ||
|
|
2085dda0a3 | ||
|
|
52e69abb88 | ||
|
|
06fa73666f | ||
|
|
d7940213ab | ||
|
|
da5ec5b357 | ||
|
|
605fc0dbcf | ||
|
|
9da07fd160 | ||
|
|
913f8dc256 | ||
|
|
f8cb9e9364 | ||
|
|
7ec234a052 | ||
|
|
bb12670ef3 | ||
|
|
120a82c82b | ||
|
|
bd9128044a | ||
|
|
9e54b8d82b | ||
|
|
1f3f09d713 | ||
|
|
17c9a26c8a | ||
|
|
5755d462cc | ||
|
|
f4de32550c | ||
|
|
2ae9c679e1 | ||
|
|
9dc4de0f07 | ||
|
|
a64a415f59 | ||
|
|
5e02fdc2ae | ||
|
|
6d75c4b464 | ||
|
|
ff05648b04 | ||
|
|
0a114cd313 | ||
|
|
db3b17ed5f | ||
|
|
14231fdd0a | ||
|
|
76565e959a | ||
|
|
70e67f7960 | ||
|
|
9f244b9c01 | ||
|
|
6a1dccd270 | ||
|
|
b146954afd | ||
|
|
58cc24e9c4 | ||
|
|
c97de461a8 | ||
|
|
e759240175 | ||
|
|
0eb3abd44a | ||
|
|
e699910675 | ||
|
|
c92acf2b3b | ||
|
|
b439fa8bf0 | ||
|
|
7ac8d1f1aa | ||
|
|
991b928edb | ||
|
|
fb815c0453 | ||
|
|
74ddae0fd9 | ||
|
|
4dcc9ec510 | ||
|
|
2245167580 | ||
|
|
97fe1bbcf6 | ||
|
|
93fc626332 | ||
|
|
16f19e6b4a | ||
|
|
66ed3478cd | ||
|
|
982fd32a06 | ||
|
|
3be15436a8 | ||
|
|
443a543bb5 | ||
|
|
e28773850f | ||
|
|
52e740cf58 | ||
|
|
bdd8921328 | ||
|
|
3f2596c247 | ||
|
|
73305fe0df | ||
|
|
5ca13c71b3 | ||
|
|
06730f3f7b | ||
|
|
464a7a3ee3 | ||
|
|
bd52738e4c | ||
|
|
3b6a4b85a9 | ||
|
|
03d030feab | ||
|
|
1082dc5417 | ||
|
|
afb9f38ab4 | ||
|
|
9754747785 | ||
|
|
8ea2aca735 | ||
|
|
f7dcce698b | ||
|
|
b94779f7d4 | ||
|
|
b24db52b3d | ||
|
|
19571e3b2b | ||
|
|
3ae3d6c677 | ||
|
|
6924828c8d | ||
|
|
5aa8f2b25c | ||
|
|
f0b14e680e | ||
|
|
fc3f1c6588 | ||
|
|
e62c771a3f | ||
|
|
72ada92aa4 | ||
|
|
18aa2776b0 | ||
|
|
3a30d2c5ea | ||
|
|
e859aa23bf | ||
|
|
ca3a453447 | ||
|
|
7132d16053 | ||
|
|
fcf8dc2cde | ||
|
|
3aebb20ec2 | ||
|
|
db1d6d9e0c | ||
|
|
0501e98b13 | ||
|
|
0609d8bfae | ||
|
|
89c6d45786 | ||
|
|
48065e5d83 | ||
|
|
5c3a8931ed | ||
|
|
f994c67cc5 | ||
|
|
466e706f1c | ||
|
|
de01752a8b | ||
|
|
e2a3b48481 | ||
|
|
162dcf5fbd | ||
|
|
7ebc993891 | ||
|
|
c750ebc4d5 | ||
|
|
4c56c27b3b | ||
|
|
ab6c023903 | ||
|
|
7a30473ce2 | ||
|
|
0b117007dc | ||
|
|
dbba0d5cb2 | ||
|
|
37bb0b8e45 | ||
|
|
3063baeb20 | ||
|
|
d2d2584dc9 | ||
|
|
edad05c2d7 | ||
|
|
344824294d | ||
|
|
7d3c1c1e2b | ||
|
|
b1f65c9c4f | ||
|
|
b3126d3996 | ||
|
|
f26bc481d4 | ||
|
|
c65ce60f71 | ||
|
|
7077c85ada | ||
|
|
1dbfed1be2 | ||
|
|
f74f1a3561 | ||
|
|
bd3807f168 | ||
|
|
d161e21940 | ||
|
|
da800759ca | ||
|
|
f52947446a | ||
|
|
da79260189 | ||
|
|
0e4e3ab00a | ||
|
|
5dd2d3297c | ||
|
|
2ee505706c | ||
|
|
b938e15712 | ||
|
|
93d11a4b8d | ||
|
|
011e52dbb9 | ||
|
|
ad63699c5b | ||
|
|
e4d198b72b | ||
|
|
63c7edcecc | ||
|
|
125487003e | ||
|
|
935a0b2413 | ||
|
|
cce2f18d0c | ||
|
|
bccb7eed85 | ||
|
|
48917b4234 | ||
|
|
18226e2fe1 | ||
|
|
b57094dc5d | ||
|
|
91a7ce01a3 | ||
|
|
c26c9be76f | ||
|
|
a020de9f99 | ||
|
|
9f270e2b91 | ||
|
|
45ecbf8ede | ||
|
|
d5d2bae774 | ||
|
|
10e198c7ba | ||
|
|
d3034dc8df | ||
|
|
872644cbe6 | ||
|
|
1bd9c9667d | ||
|
|
64759be173 | ||
|
|
213f790f0e | ||
|
|
73a4d83eda | ||
|
|
6533f1a3c6 | ||
|
|
2a4a09f562 | ||
|
|
b77b5ccc1b | ||
|
|
66fb5d7bab | ||
|
|
6d6f273787 | ||
|
|
9eee629c38 | ||
|
|
c8a695c735 | ||
|
|
acd86e3902 | ||
|
|
0d1dd7ab5c | ||
|
|
ef7ba42f9a | ||
|
|
536a759a7f | ||
|
|
6207a2fdf7 | ||
|
|
9bc70b79e6 | ||
|
|
2cd6f53f76 | ||
|
|
4fb60c0a9c | ||
|
|
1d3067dfec | ||
|
|
92cc03cf6e | ||
|
|
3724695d23 | ||
|
|
6c0020fc4f | ||
|
|
583dd45610 | ||
|
|
87c30b3239 | ||
|
|
0053a29c64 | ||
|
|
c9a9e2e2d6 | ||
|
|
867f3fdb49 | ||
|
|
72ed9fcb4a | ||
|
|
fec9ec0a04 | ||
|
|
78ed49a45f | ||
|
|
aa88aacfe9 | ||
|
|
8e9a734299 | ||
|
|
1bfa1bc62b | ||
|
|
a757d97a40 | ||
|
|
711c9e3ad4 | ||
|
|
f139e6ea94 | ||
|
|
5ff77100c5 | ||
|
|
16478cdd5a | ||
|
|
192ca9b444 | ||
|
|
6a12c4d52f | ||
|
|
3e73f6c708 | ||
|
|
878aa0b1a6 | ||
|
|
b9fc710a87 | ||
|
|
617a76a3d1 | ||
|
|
2c8e87e85b | ||
|
|
e16694a798 | ||
|
|
0971c3c76b | ||
|
|
f6ac55beb6 | ||
|
|
d10c357036 | ||
|
|
f2c061080f | ||
|
|
dd75be8efe | ||
|
|
09828fc9c8 | ||
|
|
b1a1d7c6bc | ||
|
|
40835a969b | ||
|
|
a1c1bbd2d5 | ||
|
|
41ba08d2f6 | ||
|
|
03b93bb9d7 | ||
|
|
14c4e30576 | ||
|
|
52ae3d1ec0 | ||
|
|
0ca5326261 | ||
|
|
69b6fcc17b | ||
|
|
fb7abb7bee | ||
|
|
824e33abcd | ||
|
|
d6c7064254 | ||
|
|
f324c284ff | ||
|
|
35dbb1967b | ||
|
|
25427e17aa | ||
|
|
7e18176ffc | ||
|
|
7bc0be1788 | ||
|
|
16f9081a80 | ||
|
|
35e10c0a8b | ||
|
|
237c004e20 | ||
|
|
0e1d97915c | ||
|
|
ba50bff441 | ||
|
|
d97c4607a1 | ||
|
|
c7e716eaa0 | ||
|
|
c8f51ac64b | ||
|
|
9693cc4c35 | ||
|
|
9c53660299 | ||
|
|
97dabcfceb | ||
|
|
64666069d4 | ||
|
|
e1ff7e9739 | ||
|
|
715173f513 | ||
|
|
158b4b7553 | ||
|
|
e2d6614fe7 | ||
|
|
8a7423d27f | ||
|
|
494e3fbaaa | ||
|
|
b9f8d4e314 | ||
|
|
5ea0a3d140 | ||
|
|
a518de60b5 | ||
|
|
d9d61a04a8 | ||
|
|
fc4ff3d2de | ||
|
|
4559314798 | ||
|
|
2192b4fccd | ||
|
|
8b8dce8ba9 | ||
|
|
b1333ad5c2 | ||
|
|
e6e51fc9fd | ||
|
|
3222b0cedd | ||
|
|
bc2cea6187 | ||
|
|
eba91eaf65 | ||
|
|
d8daf768a9 | ||
|
|
670995ba3b | ||
|
|
591bcc663b | ||
|
|
aed2c5743f | ||
|
|
99aef392fe | ||
|
|
9345f6b875 | ||
|
|
e9bf516bb9 | ||
|
|
5faa05abf6 | ||
|
|
6252203b85 | ||
|
|
95dad358af | ||
|
|
3e719d7671 | ||
|
|
a62e02a6ad | ||
|
|
094afe8819 | ||
|
|
115cb44948 | ||
|
|
becca6e157 | ||
|
|
40bf0dab66 | ||
|
|
3611ccc16c | ||
|
|
477c0562a2 | ||
|
|
904385e502 | ||
|
|
72f8da76e5 | ||
|
|
97e789846c | ||
|
|
672ff9038b | ||
|
|
dddf955fae | ||
|
|
9562794e24 | ||
|
|
1e5f10888b | ||
|
|
2c94c90748 | ||
|
|
bb925dda04 | ||
|
|
c387e708e1 | ||
|
|
9b6657edb3 | ||
|
|
09b05199d1 | ||
|
|
61e35f0b53 | ||
|
|
78c6831226 | ||
|
|
43cf842721 | ||
|
|
760a5cbc9c | ||
|
|
b60c83bfd5 | ||
|
|
53fe026cfe | ||
|
|
f8bfc49ea8 | ||
|
|
3f43e5b929 | ||
|
|
fc049f53e0 | ||
|
|
34608575c7 | ||
|
|
868742f9d9 | ||
|
|
bb5e5166f6 | ||
|
|
212d076a50 | ||
|
|
1bc524db2d | ||
|
|
48df9f2023 | ||
|
|
bb7a4f3ea4 | ||
|
|
c0a1996589 | ||
|
|
555c675209 | ||
|
|
081bf75ba0 | ||
|
|
797dc6e420 | ||
|
|
adfc7494d1 | ||
|
|
d708e91725 | ||
|
|
c46d911cc4 | ||
|
|
f33a5de8b3 | ||
|
|
8428110d67 | ||
|
|
c294446082 | ||
|
|
98feb81475 | ||
|
|
f917e3955c | ||
|
|
2e17e3bef4 | ||
|
|
ecf22fea39 | ||
|
|
a78f8e94ee | ||
|
|
630945570d | ||
|
|
18b29a3f4e | ||
|
|
c1ae854548 | ||
|
|
e9f899ab57 | ||
|
|
fc7ec6b89a | ||
|
|
2a93c9191a | ||
|
|
312179570b | ||
|
|
5b9d7b422f | ||
|
|
d3f073a630 | ||
|
|
8506ed9b5b | ||
|
|
49bbd95880 | ||
|
|
4d1326c1bb | ||
|
|
d6684663bb | ||
|
|
4c93dc7097 | ||
|
|
14f3d71f70 | ||
|
|
5ee4a2e572 | ||
|
|
5f4504ccf2 | ||
|
|
5ab1779b4c | ||
|
|
6eb7597d8b | ||
|
|
0cc7aa54c7 | ||
|
|
badfc73fcb | ||
|
|
cbbdc20601 | ||
|
|
0fa4dfda3f | ||
|
|
88fe2477eb | ||
|
|
c2afc72d6c | ||
|
|
fc5123ff1f | ||
|
|
cb27cb64b6 | ||
|
|
a3ff446a3d | ||
|
|
1a3a6ec1e0 | ||
|
|
886e37b168 | ||
|
|
9f13145b2c | ||
|
|
af241ca42c | ||
|
|
0b9b066c18 | ||
|
|
fd1a1e357b | ||
|
|
0795225cc7 | ||
|
|
738d520938 | ||
|
|
dd0571d4bd | ||
|
|
3e4193a6d5 | ||
|
|
07dee591ab | ||
|
|
e7e183b296 | ||
|
|
eb04b3b7e4 | ||
|
|
2078e5923f | ||
|
|
b6751fddf4 | ||
|
|
4dd1488fec | ||
|
|
c1ecaf668e | ||
|
|
26f9ef0290 | ||
|
|
2d6a67ff18 | ||
|
|
32db6e2036 | ||
|
|
d3cb8e6be5 | ||
|
|
6cfc27cb87 | ||
|
|
3ee144475c | ||
|
|
c5b538c724 | ||
|
|
5b587a8608 | ||
|
|
5ce7c00ac3 | ||
|
|
c0d283b9c2 | ||
|
|
0c100d5917 | ||
|
|
1c0d7f93f7 | ||
|
|
f99810a1ca | ||
|
|
2ee5d71821 | ||
|
|
6bcf0f5499 | ||
|
|
5d00f16003 | ||
|
|
86dee0081d | ||
|
|
623158bb01 | ||
|
|
e3217dfed6 | ||
|
|
7aa451a3c1 | ||
|
|
f02b854343 | ||
|
|
fad21498d2 | ||
|
|
e1c27f8841 | ||
|
|
e380560cb3 | ||
|
|
03ebc65f6b | ||
|
|
b1c2fe6885 | ||
|
|
3c20a4c247 | ||
|
|
e4d81f0dff | ||
|
|
2191498ef6 | ||
|
|
8ef5dcc1b3 | ||
|
|
9fb72efa15 | ||
|
|
773f83bb06 | ||
|
|
e2b632c99a | ||
|
|
586ddce59f | ||
|
|
6695b9a846 | ||
|
|
6ffdf768bb | ||
|
|
1e38646026 | ||
|
|
ef6ceaf8b0 | ||
|
|
0c0534ea74 | ||
|
|
8ba5d2c423 | ||
|
|
ddf82749af | ||
|
|
3f6793b301 | ||
|
|
67d4eb46ee | ||
|
|
ff9db222b3 | ||
|
|
166a7795d6 | ||
|
|
dd261dec96 | ||
|
|
018bd04305 | ||
|
|
7c6817bc4f | ||
|
|
f8d976f42e | ||
|
|
d237ac849c | ||
|
|
e1f9de264f | ||
|
|
1d55f4778d | ||
|
|
20b5b8fb95 | ||
|
|
eb63b8bae5 | ||
|
|
5922d027b7 | ||
|
|
a46edf092d | ||
|
|
aa5706f372 | ||
|
|
75999010f0 | ||
|
|
a906d8f26b | ||
|
|
67d9eb92f4 | ||
|
|
59a19a7510 | ||
|
|
8894b87212 | ||
|
|
682b3ba325 | ||
|
|
3c67c4bf13 | ||
|
|
c8aec09a0e | ||
|
|
ec13618224 | ||
|
|
dfc68fd0ed | ||
|
|
12a7caa667 | ||
|
|
80a131b555 | ||
|
|
10993a4fe2 | ||
|
|
00d9c42e57 | ||
|
|
66520e236c | ||
|
|
edd089237e | ||
|
|
1d8c10b168 | ||
|
|
ca9ea7ef99 | ||
|
|
ea29473239 | ||
|
|
8175361275 | ||
|
|
4959b6eb4f | ||
|
|
bd3fb5dfe1 | ||
|
|
88bf48ce44 | ||
|
|
b9998abc48 | ||
|
|
b0cbb7da0d | ||
|
|
8e58e90e84 | ||
|
|
75b1cc23b5 | ||
|
|
3ba65f922b | ||
|
|
38a9c7db05 | ||
|
|
2e905841e2 | ||
|
|
7c262e71fa | ||
|
|
b948591389 | ||
|
|
0822212bcb | ||
|
|
22b1bca6cd | ||
|
|
f9a1fef55d | ||
|
|
df016a5e36 | ||
|
|
df4aabc517 | ||
|
|
2b12dc7054 | ||
|
|
9819e0b214 | ||
|
|
17cd3f3d04 | ||
|
|
1041fc44ec | ||
|
|
c555942bf4 | ||
|
|
34a4ad26da | ||
|
|
c2b6082345 | ||
|
|
f74e4bd252 | ||
|
|
10a6554c81 | ||
|
|
f82e534cb5 | ||
|
|
9572c25c0b | ||
|
|
ea4cf245ac | ||
|
|
39d23c8c98 | ||
|
|
2b9d4b4ebd | ||
|
|
eb51c671f5 | ||
|
|
6965a04403 | ||
|
|
48b8730571 | ||
|
|
005ccaded7 | ||
|
|
28c4c56806 | ||
|
|
030a0e7134 | ||
|
|
cb116af143 | ||
|
|
d9ae30cfe3 | ||
|
|
4b8392bb22 | ||
|
|
0ed828ec3b | ||
|
|
3048148b2a | ||
|
|
1930df68d1 | ||
|
|
c680f3bb64 | ||
|
|
96ff0dec5f | ||
|
|
4fee9b3011 | ||
|
|
865d1d9c69 | ||
|
|
56172edf6e | ||
|
|
e934417ba9 | ||
|
|
3dde6aff8f | ||
|
|
ec75736717 | ||
|
|
f08b922a80 | ||
|
|
dff03364d7 | ||
|
|
e3fb6d2a1c | ||
|
|
0e2885b6ca | ||
|
|
5948fd1109 | ||
|
|
ba8e7d7908 | ||
|
|
f96e4af3d2 | ||
|
|
96d0167538 | ||
|
|
8b1da6f6ec | ||
|
|
0479e418b2 | ||
|
|
88f1b9c44d | ||
|
|
ef98363abb | ||
|
|
1342e87c14 | ||
|
|
03c19c10a3 | ||
|
|
eeedfdee87 | ||
|
|
0e95125464 | ||
|
|
fd46963301 | ||
|
|
210a6a5589 | ||
|
|
b6b3c9425c | ||
|
|
7b35682ffd | ||
|
|
48e042064d | ||
|
|
e7a70a8301 | ||
|
|
ffde5bfdb5 | ||
|
|
967993cef2 | ||
|
|
c84f9f2895 | ||
|
|
37cdec2f27 | ||
|
|
6e68624f2a | ||
|
|
0cb2053be5 | ||
|
|
ea82a094f9 | ||
|
|
69a436af98 | ||
|
|
6c0a6594ff | ||
|
|
feeba370ed | ||
|
|
e433339f6b | ||
|
|
07f19f5f70 | ||
|
|
3ea533f5e6 | ||
|
|
56fe8dd657 | ||
|
|
04314d2b63 | ||
|
|
10fb77f00f | ||
|
|
a33f39dfec | ||
|
|
9d4f587e23 | ||
|
|
0be96953af | ||
|
|
fb474c8c45 | ||
|
|
b99f8afbe9 | ||
|
|
95297b58e0 | ||
|
|
10a7cf8aa7 | ||
|
|
b2a7986b8f | ||
|
|
2593044309 | ||
|
|
c3ff444b30 | ||
|
|
a1ea9d0f11 | ||
|
|
c74460bb56 | ||
|
|
50dff16eef | ||
|
|
d85b4b73a6 | ||
|
|
83dc85d801 | ||
|
|
18bf6445e0 | ||
|
|
19af3ea7de | ||
|
|
680bcc4280 | ||
|
|
30e076def7 | ||
|
|
1e0e2dde90 | ||
|
|
d33533f536 | ||
|
|
6d117363ed | ||
|
|
fb6d4eee01 | ||
|
|
9f668e2653 | ||
|
|
13fbf31f2c | ||
|
|
a42dccd9bf | ||
|
|
0453ecbc44 | ||
|
|
c5ce9c4cea | ||
|
|
fd11526da8 | ||
|
|
28901d293f | ||
|
|
a52949c2e6 | ||
|
|
e217a0b653 | ||
|
|
dda8acb21b | ||
|
|
e9f933a7f7 | ||
|
|
a2607ffa54 | ||
|
|
c6cafa87f2 | ||
|
|
f6d086e0dd | ||
|
|
9112346f41 | ||
|
|
34dc2e14b2 | ||
|
|
7b951d7f4d | ||
|
|
d33decd8f5 | ||
|
|
d652b4a9fe | ||
|
|
a160d28f27 | ||
|
|
7cb67982dd | ||
|
|
f772059654 | ||
|
|
4d13e477a5 | ||
|
|
5658967a8b | ||
|
|
3dd46ceee3 | ||
|
|
414fafc1e5 | ||
|
|
ae2b33ec7b | ||
|
|
cf2ae9b126 | ||
|
|
59ceff0af1 | ||
|
|
425f372968 | ||
|
|
fd8a7ce69b | ||
|
|
78d68892f7 | ||
|
|
2ee0ada0d1 | ||
|
|
9da3746b9c | ||
|
|
0f895205f6 | ||
|
|
81ec048517 | ||
|
|
72024b2b8e | ||
|
|
af38ed0878 | ||
|
|
0af9c874b1 | ||
|
|
b36278a7c8 | ||
|
|
ebb8608577 | ||
|
|
5b4e0e041b | ||
|
|
50bbb80633 | ||
|
|
0dcd5805fc | ||
|
|
769a5b44b5 | ||
|
|
3eb9755cce | ||
|
|
c53cbe8257 | ||
|
|
1af3b4ff92 | ||
|
|
1d1fc33093 | ||
|
|
6c9e84dc7f | ||
|
|
e2e6f74d42 | ||
|
|
8cc21d19ec | ||
|
|
738da1af0e | ||
|
|
285459758d | ||
|
|
b72ce6ecf3 | ||
|
|
9d8027ab7c | ||
|
|
3301b96390 | ||
|
|
9c43667b44 | ||
|
|
b9cf7e2a64 | ||
|
|
9a25c9d6f7 | ||
|
|
16049cc09b | ||
|
|
7c1a723a6d | ||
|
|
ee63036c6d | ||
|
|
a5a1322f28 | ||
|
|
07496ad0c3 | ||
|
|
0080310062 | ||
|
|
2beb2df77f | ||
|
|
0fa697b418 | ||
|
|
12383b6342 | ||
|
|
f91111de90 | ||
|
|
3a1447abea | ||
|
|
4108d5c1d1 | ||
|
|
b24cbbc954 | ||
|
|
8b0e0c8de5 | ||
|
|
b85c265fdd | ||
|
|
078950b2e3 | ||
|
|
34a12c48c1 | ||
|
|
84a7414981 | ||
|
|
64ddc71886 | ||
|
|
08cd7b0822 | ||
|
|
f1990600da | ||
|
|
a98770a18b | ||
|
|
5afd8ca3e2 | ||
|
|
91073658cc | ||
|
|
c27ffc52b2 | ||
|
|
5f9369176c | ||
|
|
31b2c75bed | ||
|
|
e8f75249da | ||
|
|
0a01ab7438 | ||
|
|
2278110d32 | ||
|
|
067a5fd244 | ||
|
|
5c52a1f43b | ||
|
|
cca3f362e6 | ||
|
|
175b4728d6 | ||
|
|
475efc8f04 | ||
|
|
338b288b38 | ||
|
|
0a2585808f | ||
|
|
d5ec157654 | ||
|
|
2fb09e6a2b | ||
|
|
793c04f262 | ||
|
|
af89ede8b4 | ||
|
|
d729386685 | ||
|
|
b7621c6555 | ||
|
|
bb14697397 | ||
|
|
cdf412660f | ||
|
|
50efa8f52d | ||
|
|
eae93edc1f | ||
|
|
0ee67a26ae | ||
|
|
e2901081f7 | ||
|
|
6925559f5f | ||
|
|
723489c230 | ||
|
|
0bf11d6ea3 | ||
|
|
bda69750a3 | ||
|
|
6eb39b6d46 | ||
|
|
d3b9dd0cde | ||
|
|
8a3b445241 | ||
|
|
1ca7036594 | ||
|
|
a819037b79 | ||
|
|
78fa57a63c | ||
|
|
7ad85b8beb | ||
|
|
fc5b7cb3b7 | ||
|
|
af63a3e770 | ||
|
|
3699991d6d | ||
|
|
915159a6d9 | ||
|
|
7b2aaee4ea | ||
|
|
2bd6435e72 | ||
|
|
dae4acd884 | ||
|
|
323fefa333 | ||
|
|
9dbe543dc9 | ||
|
|
e260b32014 | ||
|
|
d0db30fa1d | ||
|
|
c9297dd0c4 | ||
|
|
5264a15e68 | ||
|
|
7b4a3333e7 | ||
|
|
22359b0f4d | ||
|
|
6ab50b6eaa | ||
|
|
c9ef6cbdfe | ||
|
|
d40ff43a2f | ||
|
|
b35ca970f3 | ||
|
|
2ea7280f66 | ||
|
|
c9d7092f3a | ||
|
|
a353a54374 | ||
|
|
31c5696cd1 | ||
|
|
ebfe57d410 | ||
|
|
7c2fac6b7a | ||
|
|
de226a7e67 | ||
|
|
08cadaea10 | ||
|
|
35c662a616 | ||
|
|
4f5df9609e | ||
|
|
6cb97595b8 | ||
|
|
cd470000b0 | ||
|
|
6c8840eabe | ||
|
|
c70ec078ea | ||
|
|
4c27747416 | ||
|
|
6910c84225 | ||
|
|
7b21c9fbdd | ||
|
|
d0a2487674 | ||
|
|
13570e3f99 | ||
|
|
f069ae7897 | ||
|
|
9184ba9e0f | ||
|
|
a02108c95d | ||
|
|
3976266f70 | ||
|
|
df963ca78c | ||
|
|
3e3a13c096 | ||
|
|
c21c3a6041 | ||
|
|
5cae380257 | ||
|
|
710aaa32e6 | ||
|
|
8c46f82c26 | ||
|
|
123308bebd | ||
|
|
cbdb7649e1 | ||
|
|
29c718d7bd | ||
|
|
50af1fa781 | ||
|
|
b0bb9402da | ||
|
|
f2ed71a0a3 | ||
|
|
7177285f99 | ||
|
|
b2bb03921c | ||
|
|
2932c9b436 | ||
|
|
8021836a04 | ||
|
|
c2bc561688 | ||
|
|
d487b265f6 | ||
|
|
971db85948 | ||
|
|
7e29d4163d | ||
|
|
76e986117b | ||
|
|
8cb0b88c92 | ||
|
|
ad59e2cf45 | ||
|
|
d9101f315a | ||
|
|
59e24831b8 | ||
|
|
599a72d2fc | ||
|
|
882966cc0c | ||
|
|
f326301f38 | ||
|
|
f06ca006d0 | ||
|
|
1d077bda3f | ||
|
|
c68712a577 | ||
|
|
4d06041688 | ||
|
|
a9c87f4ecf | ||
|
|
ad529eb9ef | ||
|
|
afdf3e5205 | ||
|
|
34535be0a7 | ||
|
|
a0c551c46e | ||
|
|
54bee0ad0f | ||
|
|
158e0d5a5a | ||
|
|
58f3d1c268 | ||
|
|
599c9051f0 | ||
|
|
d2a6faa225 | ||
|
|
c759a9e769 | ||
|
|
b396a08828 | ||
|
|
15c21e8a6c | ||
|
|
cdf7db07b2 | ||
|
|
0055978a57 | ||
|
|
1578a9f724 | ||
|
|
c18822294f | ||
|
|
5bb53b83ae | ||
|
|
d1fb0d0d3c | ||
|
|
0c822bc0a0 | ||
|
|
4a4a9e0327 | ||
|
|
ade0458f1e | ||
|
|
c57470e955 | ||
|
|
f09a76fa61 | ||
|
|
64658387a5 | ||
|
|
69a1186978 | ||
|
|
af2dd1d063 | ||
|
|
6cd3aed4d6 | ||
|
|
2a7e1d2c19 | ||
|
|
2590ec564f | ||
|
|
fbebe48fec | ||
|
|
b86f637cee | ||
|
|
aba534a95d | ||
|
|
c1465af849 | ||
|
|
e7496f0e3a | ||
|
|
445e26158f | ||
|
|
cac74157b9 | ||
|
|
88dc400edd | ||
|
|
fd646e9924 | ||
|
|
c991f3cd3a | ||
|
|
12a5208ab2 | ||
|
|
008fdd6ea3 | ||
|
|
5175d40d6f | ||
|
|
7cefc329bc | ||
|
|
d55337e909 | ||
|
|
7585eafa77 | ||
|
|
a42a349e3c | ||
|
|
b0faa2ce21 | ||
|
|
28a23e1257 | ||
|
|
2861bb5f1e | ||
|
|
3f73e28a48 | ||
|
|
3459960c0f | ||
|
|
12ac620d71 | ||
|
|
402db7ff6e | ||
|
|
33d79d503e | ||
|
|
30aedd3bd7 | ||
|
|
b3ffa0767a | ||
|
|
a486d62d20 | ||
|
|
2e2389390e | ||
|
|
5e6b6a9b56 | ||
|
|
209a0a4a9e | ||
|
|
7cb05e38a3 | ||
|
|
894d25a938 | ||
|
|
79bd13f615 | ||
|
|
9a66915b37 | ||
|
|
d2fb987a1b | ||
|
|
b369d6aa35 | ||
|
|
4a13a03dd4 | ||
|
|
d4d3226803 | ||
|
|
c27fd27f5b | ||
|
|
a0b68ec7fe | ||
|
|
f4b9431603 | ||
|
|
545d7c5e8d | ||
|
|
e5bd062c26 | ||
|
|
90cdcb3d9e | ||
|
|
1df24b34af | ||
|
|
c0ee0cc702 | ||
|
|
135c969a0d | ||
|
|
b73093584f | ||
|
|
a7b71b94fd | ||
|
|
e315a11506 | ||
|
|
5caf276ec7 | ||
|
|
697f51d5b7 | ||
|
|
d5c1f484e6 | ||
|
|
a91345d626 | ||
|
|
8b0a4336de | ||
|
|
36e26b0592 | ||
|
|
0303b7eeb0 | ||
|
|
9f23d13c9c | ||
|
|
e27908e70a | ||
|
|
693c3d07ce | ||
|
|
986531d238 | ||
|
|
09dabc37ff | ||
|
|
0234c16117 | ||
|
|
7ec4dce57b | ||
|
|
c93b5d96fe | ||
|
|
a0948ff4f5 | ||
|
|
4cb37739d7 | ||
|
|
fbea3657ac | ||
|
|
9252a9d31d | ||
|
|
afd6b0cace | ||
|
|
23428e8f93 | ||
|
|
d226f4791f | ||
|
|
dda3c591b6 | ||
|
|
fc8efd457e | ||
|
|
49c9ea9837 | ||
|
|
befdeb193c | ||
|
|
f4a3b6e18d | ||
|
|
2fe661fbb7 | ||
|
|
b600199f64 | ||
|
|
48bbf4f2da | ||
|
|
6328c147d9 | ||
|
|
1dc801a6e0 | ||
|
|
632a551a08 | ||
|
|
1da9653ebb | ||
|
|
868d4317d9 | ||
|
|
0c2fe0b487 | ||
|
|
3fb3775ce8 | ||
|
|
b29d8d6b22 | ||
|
|
609cf53048 | ||
|
|
0ff0526d86 | ||
|
|
d0bcf638d7 | ||
|
|
20cca252b2 | ||
|
|
e1d087733f | ||
|
|
fe6e0263b8 | ||
|
|
71fff8511b | ||
|
|
74c38d2431 | ||
|
|
188c92332f | ||
|
|
5363fccbfe | ||
|
|
7e4e2d7844 | ||
|
|
1d3598ed8a | ||
|
|
dfd943f797 | ||
|
|
24c349c2e0 | ||
|
|
8cddb23c69 | ||
|
|
d6d61cd04f | ||
|
|
df26d2752a | ||
|
|
eb44773c87 | ||
|
|
1669c06703 | ||
|
|
c92effa01b | ||
|
|
0118bea9e2 | ||
|
|
fac2536d2c | ||
|
|
ffbb28a8bd | ||
|
|
1d73d2ffdb | ||
|
|
3ad076bd34 | ||
|
|
df29f57374 | ||
|
|
a0a831c2d4 | ||
|
|
67d3bbdca4 | ||
|
|
0bb83f62b7 | ||
|
|
b7480a5d3f | ||
|
|
9a0e244141 | ||
|
|
9f807bc9ca | ||
|
|
1bff056a49 | ||
|
|
e33f854402 | ||
|
|
e25bb1e3a0 | ||
|
|
9d226f9fe1 | ||
|
|
0aa3204dfb | ||
|
|
85c4e638dd | ||
|
|
3a23924721 | ||
|
|
7435d15fdb | ||
|
|
93a9c4a6c6 | ||
|
|
d2e26165bc | ||
|
|
fa784d58f9 | ||
|
|
3e61731df4 | ||
|
|
ebd77e5c52 | ||
|
|
3c77a68e61 | ||
|
|
54409dd083 | ||
|
|
37bc54feb9 | ||
|
|
c1929d60a1 | ||
|
|
1c6eba12cf | ||
|
|
c743d463c4 | ||
|
|
c73dfd9461 | ||
|
|
a2f1817f30 | ||
|
|
7a5730c720 | ||
|
|
137685cef5 | ||
|
|
c319d25fa3 | ||
|
|
d2b860ceb6 | ||
|
|
7d3c5445e2 | ||
|
|
d79d5aec98 | ||
|
|
681ddd0ad9 | ||
|
|
a14c97d335 | ||
|
|
23f93bde24 | ||
|
|
cbc7c4b64b | ||
|
|
cbf167d2a4 | ||
|
|
f12564b22f | ||
|
|
1654915282 | ||
|
|
ac3dc698bb | ||
|
|
206e8c87da | ||
|
|
232f0b1b24 | ||
|
|
ccdc926f22 | ||
|
|
406a9022ee | ||
|
|
4b71c0b6e3 | ||
|
|
f169fb53c7 | ||
|
|
46c67f87ca | ||
|
|
1328d3781b | ||
|
|
eeeabff8f7 | ||
|
|
0035f3a0f3 | ||
|
|
3da4281093 | ||
|
|
883938994b | ||
|
|
a2e1a04409 | ||
|
|
7feceb67ce | ||
|
|
164c63602b | ||
|
|
bbf7e7ddbb | ||
|
|
bc9a71adf6 | ||
|
|
361bd6fd24 | ||
|
|
aaaae4ddd8 | ||
|
|
0ce1323df1 | ||
|
|
e6e1876f44 | ||
|
|
1f17b9c9a0 | ||
|
|
ca07695481 | ||
|
|
ec956c4115 | ||
|
|
487b6067e9 | ||
|
|
a4093f9621 | ||
|
|
20197bacd9 | ||
|
|
26d15c6f34 | ||
|
|
2642a35009 | ||
|
|
328b36cb84 | ||
|
|
0107ce7d4d | ||
|
|
56e91c1f73 | ||
|
|
1b6f46a5b7 | ||
|
|
d334a6fa93 | ||
|
|
d8e7e345d0 | ||
|
|
bc6e7cafd8 | ||
|
|
dbbdca7497 | ||
|
|
e141ddfb7c | ||
|
|
47fab653c9 | ||
|
|
557c2ea601 | ||
|
|
69fc4b67a0 | ||
|
|
e985e15761 | ||
|
|
7c7ab004d2 | ||
|
|
3c4b8ff401 | ||
|
|
9689747063 | ||
|
|
7ba1f58788 | ||
|
|
6b3992e238 | ||
|
|
79688dc14d | ||
|
|
0c314fd644 | ||
|
|
a8a6519f01 | ||
|
|
ec88777733 | ||
|
|
7fbf4130d3 | ||
|
|
eacbd91d82 | ||
|
|
57e1181c48 | ||
|
|
00d15bee59 | ||
|
|
54f843ec06 | ||
|
|
e9f82c3343 | ||
|
|
020cef0b99 | ||
|
|
f36e59e9c1 | ||
|
|
3d675e5a25 | ||
|
|
594057738d | ||
|
|
7c5ee9b44a | ||
|
|
4d04fabe9c | ||
|
|
a1a7ea4d40 | ||
|
|
22d2ad4564 | ||
|
|
39368ce2ac | ||
|
|
3ee2597bc3 | ||
|
|
ba5027ad4d | ||
|
|
b66dc2a928 | ||
|
|
c1995507f1 | ||
|
|
e094793c17 | ||
|
|
8f21412374 | ||
|
|
869ed37137 | ||
|
|
ab1a22cf22 | ||
|
|
c1409b85c6 | ||
|
|
67c14a2ead | ||
|
|
c238cd5790 | ||
|
|
20e1c4c98f | ||
|
|
f59f1d0b37 | ||
|
|
e0f933c357 | ||
|
|
3a90dbaefb | ||
|
|
a2ec61dc8d | ||
|
|
c74519c9ef | ||
|
|
4878ce41c3 | ||
|
|
59badd392c | ||
|
|
f97b54328f | ||
|
|
50921e0435 | ||
|
|
5f0a706f2c | ||
|
|
aaf970d77c | ||
|
|
252d220caa | ||
|
|
7e81930a56 | ||
|
|
908da5744b | ||
|
|
f3fb857b89 | ||
|
|
9494920eef | ||
|
|
a37a14aa58 | ||
|
|
9901796331 | ||
|
|
b1c55ced18 | ||
|
|
f9395fd178 | ||
|
|
fe1ab04627 | ||
|
|
0af184921d | ||
|
|
977c07fa27 | ||
|
|
1e84465478 | ||
|
|
2dd2f9ed81 | ||
|
|
bfccd4838e | ||
|
|
f519641e6e | ||
|
|
b3d0918aab | ||
|
|
84a9d753a4 | ||
|
|
555f8a7ae6 | ||
|
|
6a6007441f | ||
|
|
8c982a6770 | ||
|
|
672fc984a6 | ||
|
|
031d9de356 | ||
|
|
f087d7dda9 | ||
|
|
cac7410d7e | ||
|
|
f779bad0de | ||
|
|
0027730789 | ||
|
|
cdb589966f | ||
|
|
029e70aa0b | ||
|
|
4b9bc818d7 | ||
|
|
54a35ca562 | ||
|
|
933072df5c | ||
|
|
11811226b4 | ||
|
|
ca41ce4123 | ||
|
|
311e4ad417 | ||
|
|
0c479fa579 | ||
|
|
62fff21f59 | ||
|
|
b53a3741bd | ||
|
|
f579ef6e08 | ||
|
|
9f557cc10a | ||
|
|
a83563961c | ||
|
|
10b9050e57 | ||
|
|
da4a09b3d5 | ||
|
|
de9a79ede8 | ||
|
|
bfc9c8b45c | ||
|
|
24abb202a7 | ||
|
|
4c3c624cc2 | ||
|
|
eba9f75c91 | ||
|
|
fbfcb9a5f6 | ||
|
|
836c02de53 | ||
|
|
7a2ff524d7 | ||
|
|
49a5536c51 | ||
|
|
1b543780a8 | ||
|
|
3405e0bda1 | ||
|
|
10029b41b1 | ||
|
|
ac16bfaeb3 | ||
|
|
b192fd0ad1 | ||
|
|
561fa5e319 | ||
|
|
e9fbb19d67 | ||
|
|
45a25394f5 | ||
|
|
cdce62f2aa | ||
|
|
45bc88d9bf | ||
|
|
a0e2db3925 | ||
|
|
4718a2c2de | ||
|
|
7c7946cc51 | ||
|
|
18804a52cf | ||
|
|
31ac92a06d | ||
|
|
c054449328 | ||
|
|
93e93dee92 | ||
|
|
b4feb0153d | ||
|
|
4d1be812e8 | ||
|
|
15a79ee0ca | ||
|
|
a2c558d864 | ||
|
|
dc77754c1a | ||
|
|
4a3640cc33 | ||
|
|
517082f4d1 | ||
|
|
72e9054b1f | ||
|
|
5012f64156 | ||
|
|
b945001851 | ||
|
|
3b0b37920f | ||
|
|
6f3d4bc3af | ||
|
|
ea013468e1 | ||
|
|
816237116d | ||
|
|
b8357c409c | ||
|
|
b717b0b2a2 | ||
|
|
2d4cda3ff9 | ||
|
|
5f9ef422dd | ||
|
|
6ba2fdc776 | ||
|
|
c732f52bb7 | ||
|
|
a941dae620 | ||
|
|
879f4bc062 | ||
|
|
e74af328cf | ||
|
|
73a2708fa1 | ||
|
|
bb439de0e4 | ||
|
|
f0e1ec7c41 | ||
|
|
f0b139085e | ||
|
|
5f8703059e | ||
|
|
d5f67b1244 | ||
|
|
45e336b61e | ||
|
|
666a9c958c | ||
|
|
aaceecef99 | ||
|
|
716679b012 | ||
|
|
d389d2b12e | ||
|
|
17b080c932 | ||
|
|
31182e18fc | ||
|
|
333444371c | ||
|
|
6de5f07ffc | ||
|
|
7669cac324 | ||
|
|
afadca7586 | ||
|
|
1897506613 | ||
|
|
8d2f9198c2 | ||
|
|
f531a6293b | ||
|
|
c8d25aa07c | ||
|
|
264c5473f0 | ||
|
|
d7902a9c1e | ||
|
|
96aeab1ddf | ||
|
|
7db7ce7337 | ||
|
|
8d95a8bab2 | ||
|
|
68b2a816fc | ||
|
|
8998bb482b | ||
|
|
aca86f43e3 | ||
|
|
7a03715ecc | ||
|
|
94d09def21 | ||
|
|
53143266c2 | ||
|
|
f1676589fd | ||
|
|
9944ffb7ac | ||
|
|
720e6f088e | ||
|
|
dff90a10df | ||
|
|
7b008f6b26 | ||
|
|
ce68248694 | ||
|
|
a26a303325 | ||
|
|
85713a3455 | ||
|
|
d4e1012328 | ||
|
|
e9d313f4ab | ||
|
|
2c4524f2f8 | ||
|
|
0ae8f31ae8 | ||
|
|
9ce631c5c0 | ||
|
|
3cbf9e5668 | ||
|
|
5c29744bb0 | ||
|
|
851acb95e2 | ||
|
|
0e42d1bb1b | ||
|
|
939a8b050a | ||
|
|
a96c5aa800 | ||
|
|
c5e973cc0c | ||
|
|
dbb3583045 | ||
|
|
3c0276e63a | ||
|
|
f01e8d8354 | ||
|
|
4f11380296 | ||
|
|
77eb5d41cb | ||
|
|
400b81720f | ||
|
|
d4cf0242e0 | ||
|
|
b5210fa2ba | ||
|
|
7ff6a8bbb7 | ||
|
|
86895fa405 | ||
|
|
20299506b8 | ||
|
|
9647f98de5 | ||
|
|
def9a64aa9 | ||
|
|
90edc483e2 | ||
|
|
defa942a29 | ||
|
|
6f6513a1eb | ||
|
|
c7b9d60500 | ||
|
|
f88ec2b9aa | ||
|
|
9cd8e20cd9 | ||
|
|
27c9bede73 | ||
|
|
63d3d235f2 | ||
|
|
0066929558 | ||
|
|
c2fabce099 | ||
|
|
fbf6a48020 | ||
|
|
38f4a4fd78 | ||
|
|
a925afb5d8 | ||
|
|
9804aaf4d6 | ||
|
|
86960b2b10 | ||
|
|
0d5099dbe4 | ||
|
|
48972cb5f2 | ||
|
|
b5ed99cc1f | ||
|
|
a2a3a01cde | ||
|
|
2d9738f13d | ||
|
|
03f930cf82 | ||
|
|
282af12792 | ||
|
|
0d71e3afe8 | ||
|
|
abf3476eea | ||
|
|
60763b8156 | ||
|
|
5c6ba3e62d | ||
|
|
95321588e4 | ||
|
|
ef88db8c1a | ||
|
|
60e7ca6858 | ||
|
|
109dd7f565 | ||
|
|
726c836587 | ||
|
|
4a75cc1526 | ||
|
|
0e1ae124bf | ||
|
|
9532e4de5c | ||
|
|
5ac11e19fc | ||
|
|
2397fdaaea | ||
|
|
0e97cd7d99 | ||
|
|
b25769109a | ||
|
|
000dc270ad | ||
|
|
47fdb9d83e | ||
|
|
158d396734 | ||
|
|
0f4b852a4b | ||
|
|
7fdca5e7b6 | ||
|
|
337aadc1fe | ||
|
|
5da0f20593 | ||
|
|
dcfcd112e1 | ||
|
|
14f9b5b82b | ||
|
|
955eb1a2d3 | ||
|
|
67f72497ea | ||
|
|
667b784dca | ||
|
|
e7e986f8d5 | ||
|
|
105efcf395 | ||
|
|
4d6cc75b9b | ||
|
|
cf6366dab4 | ||
|
|
69ecfb0837 | ||
|
|
933c7b43a4 | ||
|
|
b90bd847d2 | ||
|
|
23400f9557 | ||
|
|
1e247571ec | ||
|
|
777efc59fd | ||
|
|
a7ebdb78ab | ||
|
|
ada3f5ed2a | ||
|
|
4ec21b145f | ||
|
|
9228a8c262 | ||
|
|
23a1e6ef21 | ||
|
|
cc6ccff2b4 | ||
|
|
a4104d09b5 | ||
|
|
ac72c19676 | ||
|
|
10ec0e7bb0 | ||
|
|
c8c9520dd5 | ||
|
|
269e3d44b9 | ||
|
|
420b5e3331 | ||
|
|
2698a61231 | ||
|
|
59d6d121c4 | ||
|
|
8025ba06e4 | ||
|
|
b4e58a41c4 | ||
|
|
a81ac5312c | ||
|
|
ff1bd22193 | ||
|
|
1a7883e56c | ||
|
|
35fa48da02 | ||
|
|
6c3b243d12 | ||
|
|
cd70d20b46 | ||
|
|
e6f228c091 | ||
|
|
60651cb15b | ||
|
|
868e120ffc | ||
|
|
33838ba887 | ||
|
|
c8e7d4fbfa | ||
|
|
ed64da557e | ||
|
|
4992c952c9 | ||
|
|
ed4e366922 | ||
|
|
44b4385847 | ||
|
|
f234cd2e78 | ||
|
|
c38bfcf1da | ||
|
|
29ad115d6e | ||
|
|
753f1f34ab | ||
|
|
54c0177b15 | ||
|
|
e46942ccde | ||
|
|
59ca64719e | ||
|
|
67c1d1808a | ||
|
|
a69100ba52 | ||
|
|
46645c5b93 | ||
|
|
c7b7b393e3 | ||
|
|
dcec57c09a | ||
|
|
f8d5d9fb07 | ||
|
|
cbcfd32464 | ||
|
|
f4d05d4f24 | ||
|
|
dbe5f6a98d | ||
|
|
cff4dca5e6 | ||
|
|
adab8a6a59 | ||
|
|
831c6778d8 | ||
|
|
5f97e5b0e2 | ||
|
|
5b3d8c4377 | ||
|
|
8b99a042fc | ||
|
|
739cc681d5 | ||
|
|
b9a8a648d9 | ||
|
|
05411ee451 | ||
|
|
5e1ce3c45a | ||
|
|
a4127aee18 | ||
|
|
3b3e951100 | ||
|
|
40e9d6014a | ||
|
|
e4a5adceb6 | ||
|
|
a968f73db5 | ||
|
|
a1a49effad | ||
|
|
64f00bdb97 | ||
|
|
90a1375603 | ||
|
|
abda36cd92 | ||
|
|
9d2025afed | ||
|
|
741bc126d2 | ||
|
|
dffb4d3168 | ||
|
|
ab6684c1ad | ||
|
|
0763d16d3e | ||
|
|
2978f9ece8 | ||
|
|
7e80dacd92 | ||
|
|
881c55026f | ||
|
|
45d5728ae2 | ||
|
|
c8362433e2 | ||
|
|
012d12fb52 | ||
|
|
055c051807 | ||
|
|
26733f2651 | ||
|
|
c8b7c4412e | ||
|
|
469410cb94 | ||
|
|
3375632f2c | ||
|
|
66c1d8ffcf | ||
|
|
b8e3f33646 | ||
|
|
88c38d5d85 | ||
|
|
8676649e0b | ||
|
|
0b0703457c | ||
|
|
a8bdc69cea | ||
|
|
38e1e33cb2 | ||
|
|
2099de432a | ||
|
|
0bbe27b9e9 | ||
|
|
08bed37eba | ||
|
|
448eeeee46 | ||
|
|
eb9b8aebb7 | ||
|
|
4c20a843c7 | ||
|
|
e745962ff4 | ||
|
|
01e34ca0eb | ||
|
|
2a95105837 | ||
|
|
7d35b642fd | ||
|
|
a6ec8fd1d8 | ||
|
|
936a2409b4 | ||
|
|
e288691d1c | ||
|
|
1a5c8b02d0 | ||
|
|
de66ca06ae | ||
|
|
a56f42982c | ||
|
|
cf99d82e30 | ||
|
|
1c42040885 | ||
|
|
c79cb4e450 | ||
|
|
9a84b747e5 | ||
|
|
e53e2bfbe5 |
413 changed files with 143898 additions and 32957 deletions
7
.gitignore
vendored
7
.gitignore
vendored
|
|
@ -17,6 +17,12 @@
|
|||
*.back
|
||||
*.bak
|
||||
|
||||
# pycharm project specific settings files
|
||||
.idea
|
||||
|
||||
# vscode project specific settings file
|
||||
.vscode
|
||||
|
||||
cleanup.sh
|
||||
FanFictionDownLoader.zip
|
||||
*.epub
|
||||
|
|
@ -28,3 +34,4 @@ dist
|
|||
FanFicFare.egg-info
|
||||
personal.ini
|
||||
appcfg_oauth2_tokens
|
||||
venv/
|
||||
|
|
|
|||
78
README.md
78
README.md
|
|
@ -1,19 +1,71 @@
|
|||
FanFicFare
|
||||
[FanFicFare](https://github.com/JimmXinu/FanFicFare)
|
||||
==========
|
||||
|
||||
[This is the repository for the FanFicFare project.](https://github.com/JimmXinu/FanFicFare)
|
||||
FanFicFare makes reading stories from various websites much easier by helping
|
||||
you download them to EBook files.
|
||||
|
||||
FanFicFare is the rename and move of the previous FanFictionDownLoader (AKA
|
||||
FFDL, AKA fanficdownloader) project.
|
||||
FanFicFare was previously known as FanFictionDownLoader (AKA
|
||||
FFDL, AKA fanficdownloader).
|
||||
|
||||
This program is available as a [calibre
|
||||
plugin](http://www.mobileread.com/forums/showthread.php?p=3084025), a
|
||||
[command-line interface](https://pypi.python.org/pypi/FanFicFare) (via
|
||||
pip), and a [web service](http://fanficfare.appspot.com/).
|
||||
Main features:
|
||||
|
||||
There's additional info in the project
|
||||
[wiki](https://github.com/JimmXinu/FanFicFare/wiki) pages.
|
||||
- Download FanFiction stories from over [100 different sites](https://github.com/JimmXinu/FanFicFare/wiki/SupportedSites). into ebooks.
|
||||
|
||||
There's also a [FanFicFare
|
||||
maillist](https://groups.google.com/group/fanfic-downloader) for
|
||||
discussion and announcements.
|
||||
- Update previously downloaded EPUB format ebooks, downloading only new chapters.
|
||||
|
||||
- Get Story URLs from Web Pages.
|
||||
|
||||
- Support for downloading images in the story text. (EPUB and HTML
|
||||
only -- download EPUB and convert to AZW3 for Kindle) More details on
|
||||
configuring images in stories and cover images can be found in the
|
||||
[FAQs] or [this post in the old FFDL thread].
|
||||
|
||||
- Support for cover image. (EPUB only)
|
||||
|
||||
- Optionally keep an Update Log of past updates (EPUB only).
|
||||
|
||||
There's additional info in the project [wiki] pages.
|
||||
|
||||
There's also a [FanFicFare maillist] for discussion and announcements and a [discussion thread] for the Calibre plugin.
|
||||
|
||||
Getting FanFicFare
|
||||
==================
|
||||
|
||||
### Official Releases
|
||||
|
||||
This program is available as:
|
||||
|
||||
- A Calibre plugin from within Calibre or directly from the plugin [discussion thread], or;
|
||||
- A Command Line Interface (CLI) [Python
|
||||
package](https://pypi.python.org/pypi/FanFicFare) that you can
|
||||
install with:
|
||||
```
|
||||
pip install FanFicFare
|
||||
```
|
||||
- _As of late November 2019, the web service version is shutdown. See the [Wiki Home](https://github.com/JimmXinu/FanFicFare/wiki#web-service-version) page for details._
|
||||
|
||||
### Test Versions
|
||||
|
||||
FanFicFare is released roughly every month, but new test versions are posted more frequently as changes are made.
|
||||
|
||||
Test versions are available at:
|
||||
|
||||
- The [test plugin] is posted at MobileRead.
|
||||
- The test version of CLI for pip install is uploaded to the testpypi repository and can be installed with:
|
||||
```
|
||||
pip install --extra-index-url https://test.pypi.org/simple/ --upgrade FanFicFare
|
||||
```
|
||||
|
||||
### Other Releases
|
||||
|
||||
Other versions may be available depending on your OS. I(JimmXinu) don't directly support these:
|
||||
|
||||
- **Arch Linux**: The latest CLI release can be obtained from the [fanficfare](https://aur.archlinux.org/packages/fanficfare) AUR package. It will install the calibre plugin, if calibre is installed.
|
||||
|
||||
|
||||
[this post in the old FFDL thread]: https://www.mobileread.com/forums/showthread.php?p=1982785#post1982785
|
||||
[FAQs]: https://github.com/JimmXinu/FanFicFare/wiki/FAQs#can-fanficfare-download-a-story-containing-images
|
||||
[FanFicFare maillist]: https://groups.google.com/group/fanfic-downloader
|
||||
[wiki]: https://github.com/JimmXinu/FanFicFare/wiki
|
||||
[discussion thread]: https://www.mobileread.com/forums/showthread.php?t=259221
|
||||
[test plugin]: https://www.mobileread.com/forums/showthread.php?p=3084025&postcount=2
|
||||
|
|
|
|||
|
|
@ -1,8 +1,9 @@
|
|||
[main]
|
||||
host = https://www.transifex.com
|
||||
|
||||
[calibre-plugins.fanfictiondownloader]
|
||||
[o:calibre:p:calibre-plugins:r:fanfictiondownloader]
|
||||
file_filter = translations/<lang>.po
|
||||
source_file = translations/en.po
|
||||
source_lang = en
|
||||
type = PO
|
||||
type = PO
|
||||
|
||||
|
|
|
|||
|
|
@ -33,7 +33,7 @@ except NameError:
|
|||
from calibre.customize import InterfaceActionBase
|
||||
|
||||
# pulled out from FanFicFareBase for saving in prefs.py
|
||||
__version__ = (3, 7, 6)
|
||||
__version__ = (4, 57, 7)
|
||||
|
||||
## Apparently the name for this class doesn't matter--it was still
|
||||
## 'demo' for the first few versions.
|
||||
|
|
@ -52,7 +52,7 @@ class FanFicFareBase(InterfaceActionBase):
|
|||
supported_platforms = ['windows', 'osx', 'linux']
|
||||
author = 'Jim Miller'
|
||||
version = __version__
|
||||
minimum_calibre_version = (1, 48, 0)
|
||||
minimum_calibre_version = (2, 85, 1)
|
||||
|
||||
#: This field defines the GUI plugin class that contains all the code
|
||||
#: that actually does something. Its format is module_path:class_name
|
||||
|
|
@ -105,8 +105,19 @@ class FanFicFareBase(InterfaceActionBase):
|
|||
ac.apply_settings()
|
||||
|
||||
def load_actual_plugin(self, gui):
|
||||
with self: # so the sys.path was modified while loading the
|
||||
# plug impl.
|
||||
# so the sys.path was modified while loading the plug impl.
|
||||
with self:
|
||||
|
||||
# Make sure the fanficfare module is available globally
|
||||
# under its simple name, -- This is the only reason other
|
||||
# plugin files can import fanficfare instead of
|
||||
# calibre_plugins.fanficfare_plugin.fanficfare.
|
||||
#
|
||||
# Added specifically for the benefit of
|
||||
# eli-schwartz/eschwartz's Arch Linux distro that wants to
|
||||
# package FFF plugin outside Calibre.
|
||||
import fanficfare
|
||||
|
||||
return InterfaceActionBase.load_actual_plugin(self,gui)
|
||||
|
||||
def cli_main(self,argv):
|
||||
|
|
@ -115,9 +126,9 @@ class FanFicFareBase(InterfaceActionBase):
|
|||
# I believe there's no performance hit loading these here when
|
||||
# CLI--it would load everytime anyway.
|
||||
from calibre.library import db
|
||||
from calibre_plugins.fanficfare_plugin.fanficfare.cli import main as fff_main
|
||||
from fanficfare.cli import main as fff_main
|
||||
from calibre_plugins.fanficfare_plugin.prefs import PrefsFacade
|
||||
from calibre.utils.config import prefs as calibre_prefs
|
||||
from fanficfare.six import ensure_text
|
||||
from optparse import OptionParser
|
||||
|
||||
parser = OptionParser('%prog --run-plugin '+self.name+' -- [options] <storyurl>')
|
||||
|
|
@ -129,12 +140,11 @@ class FanFicFareBase(InterfaceActionBase):
|
|||
pargs = [x for x in argv if x.startswith('--with-library') or x.startswith('--library-path')
|
||||
or not x.startswith('-')]
|
||||
opts, args = parser.parse_args(pargs)
|
||||
|
||||
fff_prefs = PrefsFacade(db(path=opts.library_path,
|
||||
read_only=True))
|
||||
read_only=True))
|
||||
|
||||
fff_main(argv[1:],
|
||||
parser=parser,
|
||||
passed_defaultsini=get_resources("fanficfare/defaults.ini"),
|
||||
passed_personalini=fff_prefs["personal.ini"],
|
||||
passed_defaultsini=ensure_text(get_resources("fanficfare/defaults.ini")),
|
||||
passed_personalini=ensure_text(fff_prefs["personal.ini"]),
|
||||
)
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
<hr />
|
||||
|
||||
<p>Plugin created by Jim Miller, borrowing heavily from Grant Drake's
|
||||
<p>Plugin created by Jim Miller, originally borrowing heavily from Grant Drake's
|
||||
'<a href="http://www.mobileread.com/forums/showthread.php?t=134856">Reading List</a>',
|
||||
'<a href="http://www.mobileread.com/forums/showthread.php?t=126727">Extract ISBN</a>' and
|
||||
'<a href="http://www.mobileread.com/forums/showthread.php?t=134000">Count Pages</a>'
|
||||
|
|
@ -8,12 +8,12 @@
|
|||
|
||||
<p>
|
||||
Calibre officially distributes plugins from the mobileread.com forum site.
|
||||
The official distro channel for this plugin is there: <a href="http://www.mobileread.com/forums/showthread.php?t=259221">FanFicFare</a>
|
||||
The official distro channel and discussion thread for this plugin is there: <a href="http://www.mobileread.com/forums/showthread.php?t=259221">FanFicFare</a>
|
||||
</p>
|
||||
|
||||
<p> I also monitor the
|
||||
<a href="http://groups.google.com/group/fanfic-downloader">general users
|
||||
group</a> for the downloader. That covers the web application and CLI, too.
|
||||
group</a> for the downloader CLI, too.
|
||||
</p>
|
||||
|
||||
<p>
|
||||
|
|
|
|||
20
calibre-plugin/action_chains.py
Normal file
20
calibre-plugin/action_chains.py
Normal file
|
|
@ -0,0 +1,20 @@
|
|||
from __future__ import (unicode_literals, division, absolute_import,
|
||||
print_function)
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2024, Jim Miller'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
## References:
|
||||
## https://www.mobileread.com/forums/showthread.php?p=4435205&postcount=65
|
||||
## https://www.mobileread.com/forums/showthread.php?p=4102834&postcount=389
|
||||
|
||||
from calibre_plugins.action_chains.events import ChainEvent
|
||||
|
||||
class FanFicFareDownloadFinished(ChainEvent):
|
||||
|
||||
# replace with the name of your event
|
||||
name = 'FanFicFare Download Finished'
|
||||
|
||||
def get_event_signal(self):
|
||||
return self.gui.iactions['FanFicFare'].download_finished_signal
|
||||
|
|
@ -1,6 +1,6 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
from __future__ import (unicode_literals, division,
|
||||
from __future__ import (absolute_import, unicode_literals, division,
|
||||
print_function)
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
|
|
@ -9,10 +9,9 @@ __docformat__ = 'restructuredtext en'
|
|||
|
||||
import re
|
||||
|
||||
try:
|
||||
from PyQt5.Qt import (Qt, QSyntaxHighlighter, QTextCharFormat, QBrush)
|
||||
except ImportError as e:
|
||||
from PyQt4.Qt import (Qt, QSyntaxHighlighter, QTextCharFormat, QBrush)
|
||||
from PyQt5.Qt import (Qt, QSyntaxHighlighter, QTextCharFormat, QBrush)
|
||||
|
||||
from fanficfare.six import string_types
|
||||
|
||||
class BasicIniHighlighter(QSyntaxHighlighter):
|
||||
'''
|
||||
|
|
@ -23,11 +22,11 @@ class BasicIniHighlighter(QSyntaxHighlighter):
|
|||
format, so I'm leaving this in the project even though I'm not
|
||||
using.
|
||||
'''
|
||||
|
||||
|
||||
def __init__( self, parent, theme ):
|
||||
QSyntaxHighlighter.__init__( self, parent )
|
||||
self.parent = parent
|
||||
|
||||
|
||||
self.highlightingRules = []
|
||||
|
||||
# keyword
|
||||
|
|
@ -53,7 +52,7 @@ class BasicIniHighlighter(QSyntaxHighlighter):
|
|||
|
||||
class HighlightingRule():
|
||||
def __init__( self, pattern, color, style ):
|
||||
if isinstance(pattern,basestring):
|
||||
if isinstance(pattern, string_types):
|
||||
self.pattern = re.compile(pattern)
|
||||
else:
|
||||
self.pattern=pattern
|
||||
|
|
@ -61,4 +60,3 @@ class HighlightingRule():
|
|||
brush = QBrush(color, style)
|
||||
charfmt.setForeground(brush)
|
||||
self.highlight = charfmt
|
||||
|
||||
|
|
|
|||
|
|
@ -9,26 +9,22 @@ __docformat__ = 'restructuredtext en'
|
|||
|
||||
import os
|
||||
from contextlib import contextmanager
|
||||
try:
|
||||
from PyQt5 import QtWidgets as QtGui
|
||||
from PyQt5.Qt import (QApplication, Qt, QIcon, QPixmap, QLabel, QDialog, QHBoxLayout,
|
||||
QTableWidgetItem, QFont, QLineEdit, QComboBox,
|
||||
QVBoxLayout, QDialogButtonBox, QStyledItemDelegate, QDateTime,
|
||||
QTextEdit, QListWidget, QAbstractItemView, QCursor)
|
||||
except ImportError as e:
|
||||
from PyQt4 import QtGui
|
||||
from PyQt4.Qt import (QApplication, Qt, QIcon, QPixmap, QLabel, QDialog, QHBoxLayout,
|
||||
QTableWidgetItem, QFont, QLineEdit, QComboBox,
|
||||
QVBoxLayout, QDialogButtonBox, QStyledItemDelegate, QDateTime,
|
||||
QTextEdit, QListWidget, QAbstractItemView, QCursor)
|
||||
from PyQt5.Qt import (QApplication, Qt, QIcon, QPixmap, QLabel, QDialog, QHBoxLayout,
|
||||
QTableWidgetItem, QFont, QLineEdit, QComboBox,
|
||||
QVBoxLayout, QDialogButtonBox, QStyledItemDelegate, QDateTime,
|
||||
QTextEdit, QListWidget, QAbstractItemView, QCursor)
|
||||
|
||||
from calibre.constants import numeric_version as calibre_version
|
||||
from calibre.constants import iswindows, DEBUG
|
||||
from calibre.gui2 import gprefs, error_dialog, UNDEFINED_QDATETIME, info_dialog
|
||||
from calibre.gui2 import UNDEFINED_QDATETIME, gprefs, info_dialog
|
||||
from calibre.gui2.actions import menu_action_unique_name
|
||||
from calibre.gui2.keyboard import ShortcutConfig
|
||||
from calibre.utils.config import config_dir
|
||||
from calibre.utils.date import now, format_date, qt_to_dt, UNDEFINED_DATE
|
||||
|
||||
import fanficfare.six as six
|
||||
from six import text_type as unicode
|
||||
|
||||
# Global definition of our plugin name. Used for common functions that require this.
|
||||
plugin_name = None
|
||||
# Global definition of our plugin resources. Used to share between the xxxAction and xxxBase
|
||||
|
|
@ -46,8 +42,41 @@ def set_plugin_icon_resources(name, resources):
|
|||
plugin_name = name
|
||||
plugin_icon_resources = resources
|
||||
|
||||
# print_tracebacks_for_missing_resources first appears in cal 6.2.0
|
||||
if calibre_version >= (6,2,0):
|
||||
def get_icons_nolog(icon_name,plugin_name):
|
||||
return get_icons(icon_name,
|
||||
plugin_name,
|
||||
print_tracebacks_for_missing_resources=False)
|
||||
else:
|
||||
get_icons_nolog = get_icons
|
||||
|
||||
def get_icon(icon_name):
|
||||
def get_icon_6plus(icon_name):
|
||||
'''
|
||||
Retrieve a QIcon for the named image from
|
||||
1. Calibre's image cache
|
||||
2. resources/images
|
||||
3. the icon theme
|
||||
4. the plugin zip
|
||||
Only plugin zip has images/ in the image name for backward
|
||||
compatibility.
|
||||
'''
|
||||
icon = None
|
||||
if icon_name:
|
||||
icon = QIcon.ic(icon_name)
|
||||
## both .ic and get_icons return an empty QIcon if not found.
|
||||
if not icon or icon.isNull():
|
||||
# don't need a tracestack from get_icons just because
|
||||
# there's no icon in the theme
|
||||
icon = get_icons_nolog(icon_name.replace('images/',''),
|
||||
plugin_name)
|
||||
if not icon or icon.isNull():
|
||||
icon = get_icons(icon_name,plugin_name)
|
||||
if not icon:
|
||||
icon = QIcon()
|
||||
return icon
|
||||
|
||||
def get_icon_old(icon_name):
|
||||
'''
|
||||
Retrieve a QIcon for the named image from the zip file if it exists,
|
||||
or if not then from Calibre's image cache.
|
||||
|
|
@ -61,6 +90,11 @@ def get_icon(icon_name):
|
|||
return QIcon(pixmap)
|
||||
return QIcon()
|
||||
|
||||
# get_icons changed in Cal6.
|
||||
if calibre_version >= (6,0,0):
|
||||
get_icon = get_icon_6plus
|
||||
else:
|
||||
get_icon = get_icon_old
|
||||
|
||||
def get_pixmap(icon_name):
|
||||
'''
|
||||
|
|
@ -107,34 +141,6 @@ def get_local_images_dir(subfolder=None):
|
|||
return images_dir
|
||||
|
||||
|
||||
def create_menu_item(ia, parent_menu, menu_text, image=None, tooltip=None,
|
||||
shortcut=(), triggered=None, is_checked=None):
|
||||
'''
|
||||
Create a menu action with the specified criteria and action
|
||||
Note that if no shortcut is specified, will not appear in Preferences->Keyboard
|
||||
This method should only be used for actions which either have no shortcuts,
|
||||
or register their menus only once. Use create_menu_action_unique for all else.
|
||||
'''
|
||||
if shortcut is not None:
|
||||
if len(shortcut) == 0:
|
||||
shortcut = ()
|
||||
else:
|
||||
shortcut = shortcut
|
||||
ac = ia.create_action(spec=(menu_text, None, tooltip, shortcut),
|
||||
attr=menu_text)
|
||||
if image:
|
||||
ac.setIcon(get_icon(image))
|
||||
if triggered is not None:
|
||||
ac.triggered.connect(triggered)
|
||||
if is_checked is not None:
|
||||
ac.setCheckable(True)
|
||||
if is_checked:
|
||||
ac.setChecked(True)
|
||||
|
||||
parent_menu.addAction(ac)
|
||||
return ac
|
||||
|
||||
|
||||
def create_menu_action_unique(ia, parent_menu, menu_text, image=None, tooltip=None,
|
||||
shortcut=None, triggered=None, is_checked=None, shortcut_name=None,
|
||||
unique_name=None):
|
||||
|
|
@ -175,13 +181,6 @@ def create_menu_action_unique(ia, parent_menu, menu_text, image=None, tooltip=No
|
|||
return ac
|
||||
|
||||
|
||||
def swap_author_names(author):
|
||||
if author.find(',') == -1:
|
||||
return author
|
||||
name_parts = author.strip().partition(',')
|
||||
return name_parts[2].strip() + ' ' + name_parts[0]
|
||||
|
||||
|
||||
def get_library_uuid(db):
|
||||
try:
|
||||
library_uuid = db.library_id
|
||||
|
|
@ -198,17 +197,6 @@ def busy_cursor():
|
|||
finally:
|
||||
QApplication.restoreOverrideCursor()
|
||||
|
||||
|
||||
class ImageLabel(QLabel):
|
||||
|
||||
def __init__(self, parent, icon_name, size=16):
|
||||
QLabel.__init__(self, parent)
|
||||
pixmap = get_pixmap(icon_name)
|
||||
self.setPixmap(pixmap)
|
||||
self.setMaximumSize(size, size)
|
||||
self.setScaledContents(True)
|
||||
|
||||
|
||||
class ImageTitleLayout(QHBoxLayout):
|
||||
'''
|
||||
A reusable layout widget displaying an image followed by a title
|
||||
|
|
@ -264,7 +252,7 @@ class EditableTableWidgetItem(QTableWidgetItem):
|
|||
def __init__(self, text):
|
||||
if text is None:
|
||||
text = ''
|
||||
QTableWidgetItem.__init__(self, text, QtGui.QTableWidgetItem.UserType)
|
||||
QTableWidgetItem.__init__(self, text)
|
||||
self.setFlags(Qt.ItemIsSelectable|Qt.ItemIsEnabled|Qt.ItemIsEditable)
|
||||
|
||||
class ReadOnlyTableWidgetItem(QTableWidgetItem):
|
||||
|
|
@ -272,65 +260,10 @@ class ReadOnlyTableWidgetItem(QTableWidgetItem):
|
|||
def __init__(self, text):
|
||||
if text is None:
|
||||
text = ''
|
||||
QTableWidgetItem.__init__(self, text, QtGui.QTableWidgetItem.UserType)
|
||||
QTableWidgetItem.__init__(self, text)
|
||||
self.setFlags(Qt.ItemIsSelectable|Qt.ItemIsEnabled)
|
||||
|
||||
|
||||
class RatingTableWidgetItem(QTableWidgetItem):
|
||||
|
||||
def __init__(self, rating, is_read_only=False):
|
||||
QTableWidgetItem.__init__(self, '', QtGui.QTableWidgetItem.UserType)
|
||||
self.setData(Qt.DisplayRole, rating)
|
||||
if is_read_only:
|
||||
self.setFlags(Qt.ItemIsSelectable|Qt.ItemIsEnabled)
|
||||
|
||||
|
||||
class DateTableWidgetItem(QTableWidgetItem):
|
||||
|
||||
def __init__(self, date_read, is_read_only=False, default_to_today=False):
|
||||
if date_read == UNDEFINED_DATE and default_to_today:
|
||||
date_read = now()
|
||||
if is_read_only:
|
||||
QTableWidgetItem.__init__(self, format_date(date_read, None), QtGui.QTableWidgetItem.UserType)
|
||||
self.setFlags(Qt.ItemIsSelectable|Qt.ItemIsEnabled)
|
||||
else:
|
||||
QTableWidgetItem.__init__(self, '', QtGui.QTableWidgetItem.UserType)
|
||||
self.setData(Qt.DisplayRole, QDateTime(date_read))
|
||||
|
||||
|
||||
class NoWheelComboBox(QComboBox):
|
||||
|
||||
def wheelEvent (self, event):
|
||||
# Disable the mouse wheel on top of the combo box changing selection as plays havoc in a grid
|
||||
event.ignore()
|
||||
|
||||
|
||||
class CheckableTableWidgetItem(QTableWidgetItem):
|
||||
|
||||
def __init__(self, checked=False, is_tristate=False):
|
||||
QTableWidgetItem.__init__(self, '')
|
||||
self.setFlags(Qt.ItemFlags(Qt.ItemIsSelectable | Qt.ItemIsUserCheckable | Qt.ItemIsEnabled ))
|
||||
if is_tristate:
|
||||
self.setFlags(self.flags() | Qt.ItemIsTristate)
|
||||
if checked:
|
||||
self.setCheckState(Qt.Checked)
|
||||
else:
|
||||
if is_tristate and checked is None:
|
||||
self.setCheckState(Qt.PartiallyChecked)
|
||||
else:
|
||||
self.setCheckState(Qt.Unchecked)
|
||||
|
||||
def get_boolean_value(self):
|
||||
'''
|
||||
Return a boolean value indicating whether checkbox is checked
|
||||
If this is a tristate checkbox, a partially checked value is returned as None
|
||||
'''
|
||||
if self.checkState() == Qt.PartiallyChecked:
|
||||
return None
|
||||
else:
|
||||
return self.checkState() == Qt.Checked
|
||||
|
||||
|
||||
class TextIconWidgetItem(QTableWidgetItem):
|
||||
|
||||
def __init__(self, text, icon):
|
||||
|
|
@ -347,64 +280,6 @@ class ReadOnlyTextIconWidgetItem(ReadOnlyTableWidgetItem):
|
|||
self.setIcon(icon)
|
||||
|
||||
|
||||
class ReadOnlyLineEdit(QLineEdit):
|
||||
|
||||
def __init__(self, text, parent):
|
||||
if text is None:
|
||||
text = ''
|
||||
QLineEdit.__init__(self, text, parent)
|
||||
self.setEnabled(False)
|
||||
|
||||
|
||||
class KeyValueComboBox(QComboBox):
|
||||
|
||||
def __init__(self, parent, values, selected_key):
|
||||
QComboBox.__init__(self, parent)
|
||||
self.values = values
|
||||
self.populate_combo(selected_key)
|
||||
|
||||
def populate_combo(self, selected_key):
|
||||
self.clear()
|
||||
selected_idx = idx = -1
|
||||
for key, value in self.values.iteritems():
|
||||
idx = idx + 1
|
||||
self.addItem(value)
|
||||
if key == selected_key:
|
||||
selected_idx = idx
|
||||
self.setCurrentIndex(selected_idx)
|
||||
|
||||
def selected_key(self):
|
||||
for key, value in self.values.iteritems():
|
||||
if value == unicode(self.currentText()).strip():
|
||||
return key
|
||||
|
||||
|
||||
class CustomColumnComboBox(QComboBox):
|
||||
|
||||
def __init__(self, parent, custom_columns, selected_column, initial_items=['']):
|
||||
QComboBox.__init__(self, parent)
|
||||
self.populate_combo(custom_columns, selected_column, initial_items)
|
||||
|
||||
def populate_combo(self, custom_columns, selected_column, initial_items=['']):
|
||||
self.clear()
|
||||
self.column_names = initial_items
|
||||
if len(initial_items) > 0:
|
||||
self.addItems(initial_items)
|
||||
selected_idx = 0
|
||||
for idx, value in enumerate(initial_items):
|
||||
if value == selected_column:
|
||||
selected_idx = idx
|
||||
for key in sorted(custom_columns.keys()):
|
||||
self.column_names.append(key)
|
||||
self.addItem('%s (%s)'%(key, custom_columns[key]['name']))
|
||||
if key == selected_column:
|
||||
selected_idx = len(self.column_names) - 1
|
||||
self.setCurrentIndex(selected_idx)
|
||||
|
||||
def get_selected_column(self):
|
||||
return self.column_names[self.currentIndex()]
|
||||
|
||||
|
||||
class KeyboardConfigDialog(SizePersistedDialog):
|
||||
'''
|
||||
This dialog is used to allow editing of keyboard shortcuts.
|
||||
|
|
@ -438,43 +313,6 @@ class KeyboardConfigDialog(SizePersistedDialog):
|
|||
self.accept()
|
||||
|
||||
|
||||
class DateDelegate(QStyledItemDelegate):
|
||||
'''
|
||||
Delegate for dates. Because this delegate stores the
|
||||
format as an instance variable, a new instance must be created for each
|
||||
column. This differs from all the other delegates.
|
||||
'''
|
||||
def __init__(self, parent):
|
||||
QStyledItemDelegate.__init__(self, parent)
|
||||
self.format = 'dd MMM yyyy'
|
||||
|
||||
def displayText(self, val, locale):
|
||||
d = val.toDateTime()
|
||||
if d <= UNDEFINED_QDATETIME:
|
||||
return ''
|
||||
return format_date(qt_to_dt(d, as_utc=False), self.format)
|
||||
|
||||
def createEditor(self, parent, option, index):
|
||||
qde = QStyledItemDelegate.createEditor(self, parent, option, index)
|
||||
qde.setDisplayFormat(self.format)
|
||||
qde.setMinimumDateTime(UNDEFINED_QDATETIME)
|
||||
qde.setSpecialValueText(_('Undefined'))
|
||||
qde.setCalendarPopup(True)
|
||||
return qde
|
||||
|
||||
def setEditorData(self, editor, index):
|
||||
val = index.model().data(index, Qt.DisplayRole).toDateTime()
|
||||
if val is None or val == UNDEFINED_QDATETIME:
|
||||
val = now()
|
||||
editor.setDateTime(val)
|
||||
|
||||
def setModelData(self, editor, model, index):
|
||||
val = editor.dateTime()
|
||||
if val <= UNDEFINED_QDATETIME:
|
||||
model.setData(index, UNDEFINED_QDATETIME, Qt.EditRole)
|
||||
else:
|
||||
model.setData(index, QDateTime(val), Qt.EditRole)
|
||||
|
||||
class PrefsViewerDialog(SizePersistedDialog):
|
||||
|
||||
def __init__(self, gui, namespace):
|
||||
|
|
@ -505,7 +343,6 @@ class PrefsViewerDialog(SizePersistedDialog):
|
|||
self.keys_list.setAlternatingRowColors(True)
|
||||
ml.addWidget(self.keys_list)
|
||||
self.value_text = QTextEdit(self)
|
||||
self.value_text.setTabStopWidth(24)
|
||||
self.value_text.setReadOnly(True)
|
||||
ml.addWidget(self.value_text, 1)
|
||||
|
||||
|
|
@ -532,7 +369,7 @@ class PrefsViewerDialog(SizePersistedDialog):
|
|||
def _populate_settings(self):
|
||||
self.keys_list.clear()
|
||||
ns_prefix = self._get_ns_prefix()
|
||||
keys = sorted([k[len(ns_prefix):] for k in self.db.prefs.iterkeys()
|
||||
keys = sorted([k[len(ns_prefix):] for k in six.iterkeys(self.db.prefs)
|
||||
if k.startswith(ns_prefix)])
|
||||
for key in keys:
|
||||
self.keys_list.addItem(key)
|
||||
|
|
@ -595,7 +432,7 @@ class PrefsViewerDialog(SizePersistedDialog):
|
|||
if not confirm(message, self.namespace+'_clear_settings', self):
|
||||
return
|
||||
ns_prefix = self._get_ns_prefix()
|
||||
keys = [k for k in self.db.prefs.iterkeys() if k.startswith(ns_prefix)]
|
||||
keys = [k for k in six.iterkeys(self.db.prefs) if k.startswith(ns_prefix)]
|
||||
for k in keys:
|
||||
del self.db.prefs[k]
|
||||
self._populate_settings()
|
||||
|
|
@ -615,4 +452,3 @@ class PrefsViewerDialog(SizePersistedDialog):
|
|||
self.close()
|
||||
if d.do_restart:
|
||||
self.gui.quit(restart=True)
|
||||
|
||||
|
|
|
|||
|
|
@ -4,46 +4,27 @@ from __future__ import (unicode_literals, division, absolute_import,
|
|||
print_function)
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2018, Jim Miller'
|
||||
__copyright__ = '2021, Jim Miller'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
import traceback, copy, threading, re
|
||||
import re
|
||||
import threading
|
||||
from collections import OrderedDict
|
||||
|
||||
try:
|
||||
from PyQt5 import QtWidgets as QtGui
|
||||
from PyQt5.Qt import (QDialog, QWidget, QVBoxLayout, QHBoxLayout, QGridLayout,
|
||||
QLabel, QLineEdit, QFont, QWidget, QTextEdit, QComboBox,
|
||||
QCheckBox, QPushButton, QTabWidget, QScrollArea,
|
||||
QDialogButtonBox, QGroupBox, QButtonGroup, QRadioButton, Qt)
|
||||
except ImportError as e:
|
||||
from PyQt4 import QtGui
|
||||
from PyQt4.Qt import (QDialog, QWidget, QVBoxLayout, QHBoxLayout, QGridLayout,
|
||||
QLabel, QLineEdit, QFont, QWidget, QTextEdit, QComboBox,
|
||||
QCheckBox, QPushButton, QTabWidget, QScrollArea,
|
||||
QDialogButtonBox, QGroupBox, QButtonGroup, QRadioButton, Qt)
|
||||
try:
|
||||
from calibre.gui2 import QVariant
|
||||
del QVariant
|
||||
except ImportError:
|
||||
is_qt4 = False
|
||||
convert_qvariant = lambda x: x
|
||||
else:
|
||||
is_qt4 = True
|
||||
def convert_qvariant(x):
|
||||
vt = x.type()
|
||||
if vt == x.String:
|
||||
return unicode(x.toString())
|
||||
if vt == x.List:
|
||||
return [convert_qvariant(i) for i in x.toList()]
|
||||
return x.toPyObject()
|
||||
from PyQt5 import QtWidgets as QtGui
|
||||
from PyQt5.Qt import (QWidget, QVBoxLayout, QHBoxLayout, QGridLayout, QLabel,
|
||||
QLineEdit, QComboBox, QCheckBox, QPushButton, QTabWidget,
|
||||
QScrollArea, QGroupBox, QButtonGroup, QRadioButton,
|
||||
Qt)
|
||||
|
||||
from calibre.gui2 import dynamic, info_dialog, question_dialog
|
||||
from calibre.gui2.ui import get_gui
|
||||
from calibre.gui2 import dynamic, info_dialog
|
||||
from calibre.gui2.complete2 import EditWithComplete
|
||||
from calibre.gui2.dialogs.confirm_delete import confirm
|
||||
import fanficfare.six as six
|
||||
from six import text_type as unicode
|
||||
|
||||
try:
|
||||
from calibre.ebooks.covers import generate_cover as cal_generate_cover
|
||||
|
|
@ -82,23 +63,20 @@ no_trans = { 'pini':'personal.ini',
|
|||
|
||||
STD_COLS_SKIP = ['size','cover','news','ondevice','path','series_sort','sort']
|
||||
|
||||
from calibre_plugins.fanficfare_plugin.prefs \
|
||||
import (prefs, rejects_data, PREFS_NAMESPACE, prefs_save_options,
|
||||
updatecalcover_order, gencalcover_order, do_wordcount_order,
|
||||
SAVE_YES, SAVE_NO)
|
||||
from calibre_plugins.fanficfare_plugin.prefs import (
|
||||
prefs, rejects_data, PREFS_NAMESPACE, prefs_save_options,
|
||||
updatecalcover_order, gencalcover_order, do_wordcount_order,
|
||||
SAVE_YES, SAVE_NO)
|
||||
|
||||
from calibre_plugins.fanficfare_plugin.dialogs \
|
||||
import (UPDATE, UPDATEALWAYS, collision_order, save_collisions, RejectListDialog,
|
||||
EditTextDialog, IniTextDialog, RejectUrlEntry)
|
||||
from calibre_plugins.fanficfare_plugin.dialogs import (
|
||||
UPDATE, UPDATEALWAYS, collision_order, save_collisions, RejectListDialog,
|
||||
EditTextDialog, IniTextDialog, RejectUrlEntry)
|
||||
|
||||
from calibre_plugins.fanficfare_plugin.fanficfare.adapters \
|
||||
import getSiteSections
|
||||
from fanficfare.adapters import getSiteSections, get_section_url
|
||||
|
||||
from calibre_plugins.fanficfare_plugin.common_utils \
|
||||
import ( KeyboardConfigDialog, PrefsViewerDialog, busy_cursor )
|
||||
from calibre_plugins.fanficfare_plugin.common_utils import (
|
||||
KeyboardConfigDialog, PrefsViewerDialog, busy_cursor )
|
||||
|
||||
from calibre_plugins.fanficfare_plugin.fff_util \
|
||||
import (test_config)
|
||||
|
||||
class RejectURLList:
|
||||
def __init__(self,prefs,rejects_data):
|
||||
|
|
@ -116,21 +94,37 @@ class RejectURLList:
|
|||
fromline=True,normalize=normalize)
|
||||
#print("rue.url:%s"%rue.url)
|
||||
if rue.valid:
|
||||
cache[rue.url] = rue
|
||||
cache[get_section_url(rue.url)] = rue
|
||||
return cache
|
||||
|
||||
## Note that RejectURLList now applies
|
||||
## adapters.get_section_url(url) to all urls before caching and
|
||||
## before checking so ffnet/a/123/1/Title -> ffnet/a/123/1/,
|
||||
## xenforo too. Saved list still contains full URL so we're not
|
||||
## destorying any data. Could have duplicates, though.
|
||||
def _get_listcache(self):
|
||||
with busy_cursor():
|
||||
if self.listcache == None:
|
||||
# logger.debug("prefs['last_saved_version']:%s"%unicode(self.prefs['last_saved_version']))
|
||||
if tuple(self.prefs['last_saved_version']) > (3, 1, 7) and \
|
||||
self.rejects_data['rejecturls_data']:
|
||||
logger.debug("_get_listcache: rejects_data['rejecturls_data']")
|
||||
# logger.debug("_get_listcache: rejects_data['rejecturls_data']")
|
||||
self.listcache = OrderedDict()
|
||||
for x in self.rejects_data['rejecturls_data']:
|
||||
rue = RejectUrlEntry.from_data(x)
|
||||
if rue.valid:
|
||||
self.listcache[rue.url] = rue
|
||||
# if rue.url != get_section_url(rue.url):
|
||||
# logger.debug("\n=============\nurl:%s section:%s\n================"%(rue.url,get_section_url(rue.url)))
|
||||
section_url = get_section_url(rue.url)
|
||||
if section_url in self.listcache:
|
||||
logger.debug("Duplicate in Reject list: %s %s (use longer)"%(
|
||||
self.listcache[section_url].url, rue.url))
|
||||
## if there's a dup, keep the one with the
|
||||
## longer URL, more likely to be titled
|
||||
## version.
|
||||
if( section_url not in self.listcache
|
||||
or len(rue.url) > len(self.listcache[section_url].url) ):
|
||||
self.listcache[section_url] = rue
|
||||
else:
|
||||
# Assume saved rejects list is already normalized after
|
||||
# v2.10.9. If normalization needs to change someday, can
|
||||
|
|
@ -141,15 +135,19 @@ class RejectURLList:
|
|||
normalize=normalize)
|
||||
if normalize:
|
||||
self._save_list(self.listcache,clearcache=False)
|
||||
logger.debug("_get_listcache: prefs['rejecturls']")
|
||||
# logger.debug("_get_listcache: prefs['rejecturls']")
|
||||
|
||||
# logger.debug([ x.to_data() for x in self.listcache.values()])
|
||||
# logger.debug(self.listcache)
|
||||
# logger.debug([ x.to_data() for x in self.listcache.values()])
|
||||
return self.listcache
|
||||
|
||||
def _save_list(self,listcache,clearcache=True):
|
||||
with busy_cursor():
|
||||
#print("_save_list")
|
||||
self.prefs['rejecturls'] = '\n'.join([x.to_line() for x in listcache.values()])
|
||||
## As of July 2020 it's been > 1.5 years since
|
||||
## rejects_data added. Stop keeping older version in
|
||||
## prefs.
|
||||
del self.prefs['rejecturls']
|
||||
self.prefs.save_to_db()
|
||||
rejects_data['rejecturls_data'] = [x.to_data() for x in listcache.values()]
|
||||
rejects_data.save_to_db()
|
||||
|
|
@ -161,11 +159,14 @@ class RejectURLList:
|
|||
|
||||
# true if url is in list.
|
||||
def check(self,url):
|
||||
# logger.debug("Checking %s(%s)"%(url,get_section_url(url)))
|
||||
url = get_section_url(url)
|
||||
with self.sync_lock:
|
||||
listcache = self._get_listcache()
|
||||
return url in listcache
|
||||
|
||||
def get_note(self,url):
|
||||
url = get_section_url(url)
|
||||
with self.sync_lock:
|
||||
listcache = self._get_listcache()
|
||||
if url in listcache:
|
||||
|
|
@ -174,6 +175,7 @@ class RejectURLList:
|
|||
return ''
|
||||
|
||||
def get_full_note(self,url):
|
||||
url = get_section_url(url)
|
||||
with self.sync_lock:
|
||||
listcache = self._get_listcache()
|
||||
if url in listcache:
|
||||
|
|
@ -182,6 +184,7 @@ class RejectURLList:
|
|||
return ''
|
||||
|
||||
def remove(self,url):
|
||||
url = get_section_url(url)
|
||||
with self.sync_lock:
|
||||
listcache = self._get_listcache()
|
||||
if url in listcache:
|
||||
|
|
@ -189,7 +192,7 @@ class RejectURLList:
|
|||
self._save_list(listcache)
|
||||
|
||||
def add_text(self,rejecttext,addreasontext):
|
||||
self.add(self._read_list_from_text(rejecttext,addreasontext).values())
|
||||
self.add(list(self._read_list_from_text(rejecttext,addreasontext).values()))
|
||||
|
||||
def add(self,rejectlist,clear=False):
|
||||
with self.sync_lock:
|
||||
|
|
@ -198,11 +201,11 @@ class RejectURLList:
|
|||
else:
|
||||
listcache = self._get_listcache()
|
||||
for l in rejectlist:
|
||||
listcache[l.url]=l
|
||||
listcache[get_section_url(l.url)]=l
|
||||
self._save_list(listcache)
|
||||
|
||||
def get_list(self):
|
||||
return self._get_listcache().values()
|
||||
return list(self._get_listcache().values())
|
||||
|
||||
def get_reject_reasons(self):
|
||||
return self.prefs['rejectreasons'].splitlines()
|
||||
|
|
@ -276,19 +279,23 @@ class ConfigWidget(QWidget):
|
|||
prefs['collision'] = save_collisions[unicode(self.basic_tab.collision.currentText())]
|
||||
prefs['updatemeta'] = self.basic_tab.updatemeta.isChecked()
|
||||
prefs['bgmeta'] = self.basic_tab.bgmeta.isChecked()
|
||||
prefs['updateepubcover'] = self.basic_tab.updateepubcover.isChecked()
|
||||
prefs['keeptags'] = self.basic_tab.keeptags.isChecked()
|
||||
prefs['mark'] = self.basic_tab.mark.isChecked()
|
||||
prefs['mark_success'] = self.basic_tab.mark_success.isChecked()
|
||||
prefs['mark_failed'] = self.basic_tab.mark_failed.isChecked()
|
||||
prefs['mark_chapter_error'] = self.basic_tab.mark_chapter_error.isChecked()
|
||||
prefs['showmarked'] = self.basic_tab.showmarked.isChecked()
|
||||
prefs['autoconvert'] = self.basic_tab.autoconvert.isChecked()
|
||||
prefs['show_est_time'] = self.basic_tab.show_est_time.isChecked()
|
||||
prefs['urlsfromclip'] = self.basic_tab.urlsfromclip.isChecked()
|
||||
prefs['button_instantpopup'] = self.basic_tab.button_instantpopup.isChecked()
|
||||
prefs['updatedefault'] = self.basic_tab.updatedefault.isChecked()
|
||||
prefs['deleteotherforms'] = self.basic_tab.deleteotherforms.isChecked()
|
||||
prefs['adddialogstaysontop'] = self.basic_tab.adddialogstaysontop.isChecked()
|
||||
prefs['lookforurlinhtml'] = self.basic_tab.lookforurlinhtml.isChecked()
|
||||
prefs['checkforseriesurlid'] = self.basic_tab.checkforseriesurlid.isChecked()
|
||||
prefs['auto_reject_seriesurlid'] = self.basic_tab.auto_reject_seriesurlid.isChecked()
|
||||
prefs['mark_series_anthologies'] = self.basic_tab.mark_series_anthologies.isChecked()
|
||||
prefs['checkforurlchange'] = self.basic_tab.checkforurlchange.isChecked()
|
||||
prefs['injectseries'] = self.basic_tab.injectseries.isChecked()
|
||||
prefs['matchtitleauth'] = self.basic_tab.matchtitleauth.isChecked()
|
||||
|
|
@ -299,10 +306,10 @@ class ConfigWidget(QWidget):
|
|||
|
||||
if self.readinglist_tab:
|
||||
# lists
|
||||
prefs['send_lists'] = ', '.join(map( lambda x : x.strip(), filter( lambda x : x.strip() != '', unicode(self.readinglist_tab.send_lists_box.text()).split(','))))
|
||||
prefs['read_lists'] = ', '.join(map( lambda x : x.strip(), filter( lambda x : x.strip() != '', unicode(self.readinglist_tab.read_lists_box.text()).split(','))))
|
||||
# print("send_lists: %s"%prefs['send_lists'])
|
||||
# print("read_lists: %s"%prefs['read_lists'])
|
||||
prefs['send_lists'] = ', '.join([ x.strip() for x in unicode(self.readinglist_tab.send_lists_box.text()).split(',') if x.strip() ])
|
||||
prefs['read_lists'] = ', '.join([ x.strip() for x in unicode(self.readinglist_tab.read_lists_box.text()).split(',') if x.strip() ])
|
||||
# logger.debug("send_lists: %s"%prefs['send_lists'])
|
||||
# logger.debug("read_lists: %s"%prefs['read_lists'])
|
||||
prefs['addtolists'] = self.readinglist_tab.addtolists.isChecked()
|
||||
prefs['addtoreadlists'] = self.readinglist_tab.addtoreadlists.isChecked()
|
||||
prefs['addtolistsonread'] = self.readinglist_tab.addtolistsonread.isChecked()
|
||||
|
|
@ -326,9 +333,10 @@ class ConfigWidget(QWidget):
|
|||
prefs['calibre_gen_cover'] = self.calibrecover_tab.calibre_gen_cover.isChecked()
|
||||
prefs['plugin_gen_cover'] = self.calibrecover_tab.plugin_gen_cover.isChecked()
|
||||
prefs['gcnewonly'] = self.calibrecover_tab.gcnewonly.isChecked()
|
||||
prefs['covernewonly'] = self.calibrecover_tab.covernewonly.isChecked()
|
||||
gc_site_settings = {}
|
||||
for (site,combo) in self.calibrecover_tab.gc_dropdowns.iteritems():
|
||||
val = unicode(convert_qvariant(combo.itemData(combo.currentIndex())))
|
||||
for (site,combo) in six.iteritems(self.calibrecover_tab.gc_dropdowns):
|
||||
val = unicode(combo.itemData(combo.currentIndex()))
|
||||
if val != 'none':
|
||||
gc_site_settings[site] = val
|
||||
#print("gc_site_settings[%s]:%s"%(site,gc_site_settings[site]))
|
||||
|
|
@ -355,7 +363,7 @@ class ConfigWidget(QWidget):
|
|||
|
||||
# Standard Columns tab
|
||||
colsnewonly = {}
|
||||
for (col,checkbox) in self.std_columns_tab.stdcol_newonlycheck.iteritems():
|
||||
for (col,checkbox) in six.iteritems(self.std_columns_tab.stdcol_newonlycheck):
|
||||
colsnewonly[col] = checkbox.isChecked()
|
||||
prefs['std_cols_newonly'] = colsnewonly
|
||||
|
||||
|
|
@ -363,33 +371,36 @@ class ConfigWidget(QWidget):
|
|||
prefs['suppresstitlesort'] = self.std_columns_tab.suppresstitlesort.isChecked()
|
||||
prefs['authorcase'] = self.std_columns_tab.authorcase.isChecked()
|
||||
prefs['titlecase'] = self.std_columns_tab.titlecase.isChecked()
|
||||
prefs['seriescase'] = self.std_columns_tab.seriescase.isChecked()
|
||||
prefs['setanthologyseries'] = self.std_columns_tab.setanthologyseries.isChecked()
|
||||
|
||||
prefs['set_author_url'] =self.std_columns_tab.set_author_url.isChecked()
|
||||
prefs['set_series_url'] =self.std_columns_tab.set_series_url.isChecked()
|
||||
prefs['includecomments'] =self.std_columns_tab.includecomments.isChecked()
|
||||
prefs['anth_comments_newonly'] =self.std_columns_tab.anth_comments_newonly.isChecked()
|
||||
|
||||
# Custom Columns tab
|
||||
# error column
|
||||
prefs['errorcol'] = unicode(convert_qvariant(self.cust_columns_tab.errorcol.itemData(self.cust_columns_tab.errorcol.currentIndex())))
|
||||
prefs['errorcol'] = unicode(self.cust_columns_tab.errorcol.itemData(self.cust_columns_tab.errorcol.currentIndex()))
|
||||
prefs['save_all_errors'] = self.cust_columns_tab.save_all_errors.isChecked()
|
||||
|
||||
# metadata column
|
||||
prefs['savemetacol'] = unicode(convert_qvariant(self.cust_columns_tab.savemetacol.itemData(self.cust_columns_tab.savemetacol.currentIndex())))
|
||||
prefs['savemetacol'] = unicode(self.cust_columns_tab.savemetacol.itemData(self.cust_columns_tab.savemetacol.currentIndex()))
|
||||
|
||||
# lastchecked column
|
||||
prefs['lastcheckedcol'] = unicode(convert_qvariant(self.cust_columns_tab.lastcheckedcol.itemData(self.cust_columns_tab.lastcheckedcol.currentIndex())))
|
||||
prefs['lastcheckedcol'] = unicode(self.cust_columns_tab.lastcheckedcol.itemData(self.cust_columns_tab.lastcheckedcol.currentIndex()))
|
||||
|
||||
# cust cols tab
|
||||
colsmap = {}
|
||||
for (col,combo) in self.cust_columns_tab.custcol_dropdowns.iteritems():
|
||||
val = unicode(convert_qvariant(combo.itemData(combo.currentIndex())))
|
||||
for (col,combo) in six.iteritems(self.cust_columns_tab.custcol_dropdowns):
|
||||
val = unicode(combo.itemData(combo.currentIndex()))
|
||||
if val != 'none':
|
||||
colsmap[col] = val
|
||||
#print("colsmap[%s]:%s"%(col,colsmap[col]))
|
||||
prefs['custom_cols'] = colsmap
|
||||
|
||||
colsnewonly = {}
|
||||
for (col,checkbox) in self.cust_columns_tab.custcol_newonlycheck.iteritems():
|
||||
for (col,checkbox) in six.iteritems(self.cust_columns_tab.custcol_newonlycheck):
|
||||
colsnewonly[col] = checkbox.isChecked()
|
||||
prefs['custom_cols_newonly'] = colsnewonly
|
||||
|
||||
|
|
@ -399,13 +410,19 @@ class ConfigWidget(QWidget):
|
|||
prefs['imapuser'] = unicode(self.imap_tab.imapuser.text()).strip()
|
||||
prefs['imappass'] = unicode(self.imap_tab.imappass.text()).strip()
|
||||
prefs['imapfolder'] = unicode(self.imap_tab.imapfolder.text()).strip()
|
||||
# prefs['imaptags'] = unicode(self.imap_tab.imaptags.text()).strip()
|
||||
prefs['imaptags'] = ', '.join([ x.strip() for x in unicode(self.imap_tab.imaptags.text()).split(',') if x.strip() ])
|
||||
prefs['imapmarkread'] = self.imap_tab.imapmarkread.isChecked()
|
||||
prefs['imapsessionpass'] = self.imap_tab.imapsessionpass.isChecked()
|
||||
prefs['auto_reject_from_email'] = self.imap_tab.auto_reject_from_email.isChecked()
|
||||
prefs['update_existing_only_from_email'] = self.imap_tab.update_existing_only_from_email.isChecked()
|
||||
prefs['download_from_email_immediately'] = self.imap_tab.download_from_email_immediately.isChecked()
|
||||
|
||||
prefs['site_split_jobs'] = self.other_tab.site_split_jobs.isChecked()
|
||||
prefs['reconsolidate_jobs'] = self.other_tab.reconsolidate_jobs.isChecked()
|
||||
|
||||
prefs.save_to_db()
|
||||
self.plugin_action.set_popup_mode()
|
||||
|
||||
def edit_shortcuts(self):
|
||||
self.save_settings()
|
||||
|
|
@ -472,11 +489,6 @@ class BasicTab(QWidget):
|
|||
self.updatemeta.setChecked(prefs['updatemeta'])
|
||||
horz.addWidget(self.updatemeta)
|
||||
|
||||
self.updateepubcover = QCheckBox(_('Default Update EPUB Cover when Updating EPUB?'),self)
|
||||
self.updateepubcover.setToolTip(_("On each download, FanFicFare offers an option to update the book cover image <i>inside</i> the EPUB from the web site when the EPUB is updated.<br />This sets whether that will default to on or off."))
|
||||
self.updateepubcover.setChecked(prefs['updateepubcover'])
|
||||
horz.addWidget(self.updateepubcover)
|
||||
|
||||
self.bgmeta = QCheckBox(_('Default Background Metadata?'),self)
|
||||
self.bgmeta.setToolTip(_("On each download, FanFicFare offers an option to Collect Metadata from sites in a Background process.<br />This returns control to you quicker while updating, but you won't be asked for username/passwords or if you are an adult--stories that need those will just fail.<br />Only available for Update/Overwrite of existing books in case URL given isn't canonical or matches to existing book by Title/Author."))
|
||||
self.bgmeta.setChecked(prefs['bgmeta'])
|
||||
|
|
@ -506,9 +518,25 @@ class BasicTab(QWidget):
|
|||
self.auto_reject_seriesurlid = QCheckBox(_("Reject Without Confirmation?"),self)
|
||||
self.auto_reject_seriesurlid.setToolTip(_("Automatically reject storys with existing Series Anthology books.\nOnly works if 'Check for existing Series Anthology books' is on.\nDoesn't work when Collect Metadata in Background is selected."))
|
||||
self.auto_reject_seriesurlid.setChecked(prefs['auto_reject_seriesurlid'])
|
||||
self.auto_reject_seriesurlid.setEnabled(self.checkforseriesurlid.isChecked())
|
||||
|
||||
self.mark_series_anthologies = QCheckBox(_("Mark Matching Anthologies?"),self)
|
||||
self.mark_series_anthologies.setToolTip(_("Mark and show existing Series Anthology books when individual updates are skipped.\nOnly works if 'Check for existing Series Anthology books' is on.\nDoesn't work when Collect Metadata in Background is selected."))
|
||||
self.mark_series_anthologies.setChecked(prefs['mark_series_anthologies'])
|
||||
self.mark_series_anthologies.setEnabled(self.checkforseriesurlid.isChecked())
|
||||
|
||||
def mark_anthologies():
|
||||
self.auto_reject_seriesurlid.setEnabled(self.checkforseriesurlid.isChecked())
|
||||
self.mark_series_anthologies.setEnabled(self.checkforseriesurlid.isChecked())
|
||||
self.checkforseriesurlid.stateChanged.connect(mark_anthologies)
|
||||
mark_anthologies()
|
||||
|
||||
horz = QHBoxLayout()
|
||||
horz.addItem(QtGui.QSpacerItem(20, 1))
|
||||
horz.addWidget(self.auto_reject_seriesurlid)
|
||||
vertright = QVBoxLayout()
|
||||
horz.addLayout(vertright)
|
||||
vertright.addWidget(self.auto_reject_seriesurlid)
|
||||
vertright.addWidget(self.mark_series_anthologies)
|
||||
self.l.addLayout(horz)
|
||||
|
||||
self.checkforurlchange = QCheckBox(_("Check for changed Story URL?"),self)
|
||||
|
|
@ -526,12 +554,41 @@ class BasicTab(QWidget):
|
|||
groupbox.setLayout(self.l)
|
||||
|
||||
self.mark = QCheckBox(_("Mark added/updated books when finished?"),self)
|
||||
self.mark.setToolTip(_("Mark added/updated books when finished. Use with option below.\nYou can also manually search for 'marked:fff_success'.\n'marked:fff_failed' is also available, or search 'marked:fff' for both."))
|
||||
self.mark.setToolTip(_("Mark added/updated books when finished. Use with option below.\nYou can also manually search for 'marked:fff_success'.\n'marked:fff_failed' and 'marked:fff_chapter_error' are also available, or search 'marked:fff' for all."))
|
||||
self.mark.setChecked(prefs['mark'])
|
||||
self.l.addWidget(self.mark)
|
||||
|
||||
horz = QHBoxLayout()
|
||||
horz.addItem(QtGui.QSpacerItem(20, 1))
|
||||
self.l.addLayout(horz)
|
||||
|
||||
self.mark_success = QCheckBox(_("Success"),self)
|
||||
self.mark_success.setToolTip(_("Mark successfully downloaded or updated books."))
|
||||
self.mark_success.setChecked(prefs['mark_success'])
|
||||
self.mark_success.setEnabled(self.checkforseriesurlid.isChecked())
|
||||
horz.addWidget(self.mark_success)
|
||||
|
||||
self.mark_failed = QCheckBox(_("Failed"),self)
|
||||
self.mark_failed.setToolTip(_("Mark failed downloaded or updated books."))
|
||||
self.mark_failed.setChecked(prefs['mark_failed'])
|
||||
self.mark_failed.setEnabled(self.checkforseriesurlid.isChecked())
|
||||
horz.addWidget(self.mark_failed)
|
||||
|
||||
self.mark_chapter_error = QCheckBox(_("Chapter Error"),self)
|
||||
self.mark_chapter_error.setToolTip(_("Mark downloaded or updated books with chapter errors (only when <i>continue_on_chapter_error:true</i>)."))
|
||||
self.mark_chapter_error.setChecked(prefs['mark_chapter_error'])
|
||||
self.mark_chapter_error.setEnabled(self.checkforseriesurlid.isChecked())
|
||||
horz.addWidget(self.mark_chapter_error)
|
||||
|
||||
def mark_state():
|
||||
self.mark_success.setEnabled(self.mark.isChecked())
|
||||
self.mark_failed.setEnabled(self.mark.isChecked())
|
||||
self.mark_chapter_error.setEnabled(self.mark.isChecked())
|
||||
self.mark.stateChanged.connect(mark_state)
|
||||
mark_state()
|
||||
|
||||
self.showmarked = QCheckBox(_("Show Marked books when finished?"),self)
|
||||
self.showmarked.setToolTip(_("Show Marked added/updated books only when finished.\nYou can also manually search for 'marked:fff_success'.\n'marked:fff_failed' is also available, or search 'marked:fff' for both."))
|
||||
self.showmarked.setToolTip(_("Show Marked added/updated books only when finished.\nYou can also manually search for 'marked:fff_success'.\n'marked:fff_failed' and 'marked:fff_chapter_error' are also available, or search 'marked:fff' for all."))
|
||||
self.showmarked.setChecked(prefs['showmarked'])
|
||||
self.l.addWidget(self.showmarked)
|
||||
|
||||
|
|
@ -572,10 +629,20 @@ class BasicTab(QWidget):
|
|||
self.urlsfromclip.setChecked(prefs['urlsfromclip'])
|
||||
self.l.addWidget(self.urlsfromclip)
|
||||
|
||||
self.button_instantpopup = QCheckBox(_('FanFicFare button opens menu?'),self)
|
||||
self.button_instantpopup.setToolTip(_('The FanFicFare toolbar button will bring up the plugin menu. If unchecked, it will <i>Download from URLs</i> or optionally Update, see below.'))
|
||||
self.button_instantpopup.setChecked(prefs['button_instantpopup'])
|
||||
self.l.addWidget(self.button_instantpopup)
|
||||
|
||||
self.updatedefault = QCheckBox(_('Default to Update when books selected?'),self)
|
||||
self.updatedefault.setToolTip(_('The top FanFicFare plugin button will start Update if\nbooks are selected. If unchecked, it will always bring up \'Add New\'.'))
|
||||
self.updatedefault.setToolTip(_('The FanFicFare toolbar button will Update if books are selected. If unchecked, it will always <i>Download from URLs</i>.'))
|
||||
self.updatedefault.setChecked(prefs['updatedefault'])
|
||||
self.l.addWidget(self.updatedefault)
|
||||
self.updatedefault.setEnabled(not self.button_instantpopup.isChecked())
|
||||
self.button_instantpopup.stateChanged.connect(lambda x : self.updatedefault.setEnabled(not self.button_instantpopup.isChecked()))
|
||||
horz = QHBoxLayout()
|
||||
horz.addItem(QtGui.QSpacerItem(20, 1))
|
||||
horz.addWidget(self.updatedefault)
|
||||
self.l.addLayout(horz)
|
||||
|
||||
self.adddialogstaysontop = QCheckBox(_("Keep 'Add New from URL(s)' dialog on top?"),self)
|
||||
self.adddialogstaysontop.setToolTip(_("Instructs the OS and Window Manager to keep the 'Add New from URL(s)'\ndialog on top of all other windows. Useful for dragging URLs onto it."))
|
||||
|
|
@ -660,12 +727,13 @@ class BasicTab(QWidget):
|
|||
self.collision.setCurrentIndex(i)
|
||||
|
||||
def show_rejectlist(self):
|
||||
d = RejectListDialog(self,
|
||||
rejecturllist.get_list(),
|
||||
rejectreasons=rejecturllist.get_reject_reasons(),
|
||||
header=_("Edit Reject URLs List"),
|
||||
show_delete=False,
|
||||
show_all_reasons=False)
|
||||
with busy_cursor():
|
||||
d = RejectListDialog(self,
|
||||
rejecturllist.get_list(),
|
||||
rejectreasons=rejecturllist.get_reject_reasons(),
|
||||
header=_("Edit Reject URLs List"),
|
||||
show_delete=False,
|
||||
show_all_reasons=False)
|
||||
d.exec_()
|
||||
if d.result() != d.Accepted:
|
||||
return
|
||||
|
|
@ -693,6 +761,7 @@ class BasicTab(QWidget):
|
|||
tooltip=_("One URL per line:\n<b>http://...,note</b>\n<b>http://...,title by author - note</b>"),
|
||||
rejectreasons=rejecturllist.get_reject_reasons(),
|
||||
reasonslabel=_('Add this reason to all URLs added:'),
|
||||
accept_storyurls=True,
|
||||
save_size_name='fff:Add Reject List')
|
||||
d.exec_()
|
||||
if d.result() == d.Accepted:
|
||||
|
|
@ -802,7 +871,7 @@ class PersonalIniTab(QWidget):
|
|||
|
||||
def show_defaults(self):
|
||||
IniTextDialog(self,
|
||||
get_resources('plugin-defaults.ini'),
|
||||
get_resources('plugin-defaults.ini').decode('utf-8'),
|
||||
icon=self.windowIcon(),
|
||||
title=_('Plugin Defaults'),
|
||||
label=_("Plugin Defaults (%s) (Read-Only)")%'plugin-defaults.ini',
|
||||
|
|
@ -836,11 +905,11 @@ class PersonalIniTab(QWidget):
|
|||
|
||||
def show_showcalcols(self):
|
||||
lines=[]#[('calibre_std_user_categories',_('User Categories'))]
|
||||
for k,f in field_metadata.iteritems():
|
||||
for k,f in six.iteritems(field_metadata):
|
||||
if f['name'] and k not in STD_COLS_SKIP: # only if it has a human readable name.
|
||||
lines.append(('calibre_std_'+k,f['name']))
|
||||
|
||||
for k, column in self.plugin_action.gui.library_view.model().custom_columns.iteritems():
|
||||
for k, column in six.iteritems(self.plugin_action.gui.library_view.model().custom_columns):
|
||||
if k != prefs['savemetacol']:
|
||||
# custom always have name.
|
||||
lines.append(('calibre_cust_'+k[1:],column['name']))
|
||||
|
|
@ -931,7 +1000,7 @@ class CalibreCoverTab(QWidget):
|
|||
|
||||
self.gencov_elements=[] ## used to disable/enable when gen
|
||||
## cover is off/on. This is more
|
||||
## about being a visual que than real
|
||||
## about being a visual cue than real
|
||||
## necessary function.
|
||||
|
||||
topl = self.l = QVBoxLayout()
|
||||
|
|
@ -975,9 +1044,17 @@ class CalibreCoverTab(QWidget):
|
|||
horz.addWidget(self.updatecalcover)
|
||||
self.l.addLayout(horz)
|
||||
|
||||
self.covernewonly = QCheckBox(_("Set Calibre Cover Only for New Books"),self)
|
||||
self.covernewonly.setToolTip(_("Set the Calibre cover from EPUB only for new\nbooks, not updates to existing books."))
|
||||
self.covernewonly.setChecked(prefs['covernewonly'])
|
||||
horz = QHBoxLayout()
|
||||
horz.addItem(QtGui.QSpacerItem(20, 1))
|
||||
horz.addWidget(self.covernewonly)
|
||||
self.l.addLayout(horz)
|
||||
self.l.addSpacing(5)
|
||||
|
||||
tooltip = _("Generate a Calibre book cover image when Calibre metadata is updated.<br />"
|
||||
"Defaults to 'Yes, Always' for backward compatibility and because %(gc)s(Plugin)"
|
||||
" will only run if configured for Default or site.")%no_trans
|
||||
"Note that %(gc)s(Plugin) will only run if there is a %(gc)s setting configured below for Default or the appropriate site.")%no_trans
|
||||
horz = QHBoxLayout()
|
||||
label = QLabel(_('Generate Calibre Cover:'))
|
||||
label.setToolTip(tooltip)
|
||||
|
|
@ -985,13 +1062,7 @@ class CalibreCoverTab(QWidget):
|
|||
self.gencalcover = QComboBox(self)
|
||||
for i in gencalcover_order:
|
||||
self.gencalcover.addItem(i)
|
||||
# back compat. If has own value, use.
|
||||
# if prefs['gencalcover']:
|
||||
self.gencalcover.setCurrentIndex(self.gencalcover.findText(prefs_save_options[prefs['gencalcover']]))
|
||||
# elif prefs['gencover']: # doesn't have own val, set YES if old value set.
|
||||
# self.gencalcover.setCurrentIndex(self.gencalcover.findText(prefs_save_options[SAVE_YES]))
|
||||
# else: # doesn't have own value, old value not set, NO.
|
||||
# self.gencalcover.setCurrentIndex(self.gencalcover.findText(prefs_save_options[SAVE_NO]))
|
||||
|
||||
self.gencalcover.setToolTip(tooltip)
|
||||
label.setBuddy(self.gencalcover)
|
||||
|
|
@ -999,6 +1070,26 @@ class CalibreCoverTab(QWidget):
|
|||
self.l.addLayout(horz)
|
||||
self.gencalcover.currentIndexChanged.connect(self.endisable_elements)
|
||||
|
||||
horz = QHBoxLayout()
|
||||
horz.addItem(QtGui.QSpacerItem(20, 1))
|
||||
vert = QVBoxLayout()
|
||||
horz.addLayout(vert)
|
||||
self.l.addLayout(horz)
|
||||
|
||||
self.gcnewonly = QCheckBox(_("Generate Covers Only for New Books")%no_trans,self)
|
||||
self.gcnewonly.setToolTip(_("Default is to generate a cover any time the calibre metadata is"
|
||||
" updated.<br />Used for both Calibre and Plugin generated covers."))
|
||||
self.gcnewonly.setChecked(prefs['gcnewonly'])
|
||||
vert.addWidget(self.gcnewonly)
|
||||
self.gencov_elements.append(self.gcnewonly)
|
||||
|
||||
self.gc_polish_cover = QCheckBox(_("Inject/update the generated cover inside EPUB"),self)
|
||||
self.gc_polish_cover.setToolTip(_("Calibre's Polish feature will be used to inject or update the generated"
|
||||
" cover into the EPUB ebook file.<br />Used for both Calibre and Plugin generated covers."))
|
||||
self.gc_polish_cover.setChecked(prefs['gc_polish_cover'])
|
||||
vert.addWidget(self.gc_polish_cover)
|
||||
self.gencov_elements.append(self.gc_polish_cover)
|
||||
|
||||
# can't be local or it's destroyed when __init__ is done and
|
||||
# connected things don't fire.
|
||||
self.gencov_rdgrp = QButtonGroup()
|
||||
|
|
@ -1007,7 +1098,9 @@ class CalibreCoverTab(QWidget):
|
|||
self.gencov_gb.setLayout(horz)
|
||||
|
||||
self.plugin_gen_cover = QRadioButton(_('Plugin %(gc)s')%no_trans,self)
|
||||
self.plugin_gen_cover.setToolTip(_("Use plugin to create covers. Additional settings are below."))
|
||||
self.plugin_gen_cover.setToolTip(_("Use the %(gc)s plugin to create covers.<br>"
|
||||
"Requires that you have the the %(gc)s plugin installed.<br>"
|
||||
"Additional settings are below.")%no_trans)
|
||||
self.gencov_rdgrp.addButton(self.plugin_gen_cover)
|
||||
# always, new only, when no cover from site, inject yes/no...
|
||||
self.plugin_gen_cover.setChecked(prefs['plugin_gen_cover'])
|
||||
|
|
@ -1029,20 +1122,6 @@ class CalibreCoverTab(QWidget):
|
|||
#self.l.addLayout(horz)
|
||||
self.l.addWidget(self.gencov_gb)
|
||||
|
||||
self.gcnewonly = QCheckBox(_("Generate Covers Only for New Books")%no_trans,self)
|
||||
self.gcnewonly.setToolTip(_("Default is to generate a cover any time the calibre metadata is"
|
||||
" updated.<br />Used for both Calibre and Plugin generated covers."))
|
||||
self.gcnewonly.setChecked(prefs['gcnewonly'])
|
||||
self.l.addWidget(self.gcnewonly)
|
||||
self.gencov_elements.append(self.gcnewonly)
|
||||
|
||||
self.gc_polish_cover = QCheckBox(_("Inject/update the cover inside EPUB"),self)
|
||||
self.gc_polish_cover.setToolTip(_("Calibre's Polish feature will be used to inject or update the generated"
|
||||
" cover into the EPUB ebook file.<br />Used for both Calibre and Plugin generated covers."))
|
||||
self.gc_polish_cover.setChecked(prefs['gc_polish_cover'])
|
||||
self.l.addWidget(self.gc_polish_cover)
|
||||
self.gencov_elements.append(self.gc_polish_cover)
|
||||
|
||||
self.gcp_gb = QGroupBox(_("%(gc)s(Plugin) Settings")%no_trans)
|
||||
topl.addWidget(self.gcp_gb)
|
||||
self.l = QVBoxLayout()
|
||||
|
|
@ -1201,6 +1280,31 @@ class OtherTab(QWidget):
|
|||
self.l = QVBoxLayout()
|
||||
self.setLayout(self.l)
|
||||
|
||||
groupbox = QGroupBox()
|
||||
self.l.addWidget(groupbox)
|
||||
|
||||
groupl = QVBoxLayout()
|
||||
groupbox.setLayout(groupl)
|
||||
|
||||
label = QLabel("<h3>"+
|
||||
_("Background Job Settings")+
|
||||
"</h3>"
|
||||
)
|
||||
label.setWordWrap(True)
|
||||
groupl.addWidget(label)
|
||||
|
||||
self.site_split_jobs = QCheckBox(_('Split downloads into separate background jobs by site'),self)
|
||||
self.site_split_jobs.setToolTip(_("Launches a separate background Job for each site in the list of stories to download/update. Otherwise, there will be only one background job."))
|
||||
self.site_split_jobs.setChecked(prefs['site_split_jobs'])
|
||||
groupl.addWidget(self.site_split_jobs)
|
||||
|
||||
self.reconsolidate_jobs = QCheckBox(_('Reconsolidate split downloads before updating library'),self)
|
||||
self.reconsolidate_jobs.setToolTip(_("Hold all downloads/updates launched together until they all finish. Otherwise, there will be a 'Proceed to update' dialog for each site."))
|
||||
self.reconsolidate_jobs.setChecked(prefs['reconsolidate_jobs'])
|
||||
groupl.addWidget(self.reconsolidate_jobs)
|
||||
|
||||
self.l.addSpacing(5)
|
||||
|
||||
label = QLabel(_("These controls aren't plugin settings as such, but convenience buttons for setting Keyboard shortcuts and getting all the FanFicFare confirmation dialogs back again."))
|
||||
label.setWordWrap(True)
|
||||
self.l.addWidget(label)
|
||||
|
|
@ -1257,6 +1361,7 @@ permitted_values = {
|
|||
'numChapters',
|
||||
'numWords',
|
||||
'site',
|
||||
'publisher',
|
||||
'storyId',
|
||||
'authorId',
|
||||
'extratags',
|
||||
|
|
@ -1293,6 +1398,7 @@ titleLabels = {
|
|||
'numChapters':_('Chapters'),
|
||||
'numWords':_('Words'),
|
||||
'site':_('Site'),
|
||||
'publisher':_('Publisher'),
|
||||
'storyId':_('Story ID'),
|
||||
'authorId':_('Author ID'),
|
||||
'extratags':_('Extra Tags'),
|
||||
|
|
@ -1314,7 +1420,8 @@ class CustomColumnsTab(QWidget):
|
|||
self.plugin_action = plugin_action
|
||||
QWidget.__init__(self)
|
||||
|
||||
custom_columns = self.plugin_action.gui.library_view.model().custom_columns
|
||||
## sort by visible Column Name (vs #name)
|
||||
custom_columns = sorted(self.plugin_action.gui.library_view.model().custom_columns.items(), key=lambda x: x[1]['name'])
|
||||
|
||||
self.l = QVBoxLayout()
|
||||
self.setLayout(self.l)
|
||||
|
|
@ -1336,14 +1443,15 @@ class CustomColumnsTab(QWidget):
|
|||
self.sl = QVBoxLayout()
|
||||
scrollcontent.setLayout(self.sl)
|
||||
|
||||
for key, column in custom_columns.iteritems():
|
||||
for key, column in custom_columns:
|
||||
|
||||
if column['datatype'] in permitted_values:
|
||||
# print("\n============== %s ===========\n"%key)
|
||||
# for (k,v) in column.iteritems():
|
||||
# print("column['%s'] => %s"%(k,v))
|
||||
horz = QHBoxLayout()
|
||||
label = QLabel(column['name'])
|
||||
# label = QLabel(column['name'])
|
||||
label = QLabel('%s(%s)'%(column['name'],key))
|
||||
label.setToolTip(_("Update this %s column(%s) with...")%(key,column['datatype']))
|
||||
horz.addWidget(label)
|
||||
dropdown = QComboBox(self)
|
||||
|
|
@ -1389,7 +1497,7 @@ class CustomColumnsTab(QWidget):
|
|||
self.errorcol = QComboBox(self)
|
||||
self.errorcol.setToolTip(tooltip)
|
||||
self.errorcol.addItem('','none')
|
||||
for key, column in custom_columns.iteritems():
|
||||
for key, column in custom_columns:
|
||||
if column['datatype'] in ('text','comments'):
|
||||
self.errorcol.addItem(column['name'],key)
|
||||
self.errorcol.setCurrentIndex(self.errorcol.findData(prefs['errorcol']))
|
||||
|
|
@ -1413,7 +1521,7 @@ class CustomColumnsTab(QWidget):
|
|||
self.savemetacol = QComboBox(self)
|
||||
self.savemetacol.setToolTip(tooltip)
|
||||
self.savemetacol.addItem('','')
|
||||
for key, column in custom_columns.iteritems():
|
||||
for key, column in custom_columns:
|
||||
if column['datatype'] in ('comments'):
|
||||
self.savemetacol.addItem(column['name'],key)
|
||||
self.savemetacol.setCurrentIndex(self.savemetacol.findData(prefs['savemetacol']))
|
||||
|
|
@ -1433,7 +1541,8 @@ class CustomColumnsTab(QWidget):
|
|||
self.lastcheckedcol = QComboBox(self)
|
||||
self.lastcheckedcol.setToolTip(tooltip)
|
||||
self.lastcheckedcol.addItem('','none')
|
||||
for key, column in custom_columns.iteritems():
|
||||
## sort by visible Column Name (vs #name)
|
||||
for key, column in custom_columns:
|
||||
if column['datatype'] == 'datetime':
|
||||
self.lastcheckedcol.addItem(column['name'],key)
|
||||
self.lastcheckedcol.setCurrentIndex(self.lastcheckedcol.findData(prefs['lastcheckedcol']))
|
||||
|
|
@ -1476,7 +1585,7 @@ class StandardColumnsTab(QWidget):
|
|||
self.stdcol_newonlycheck = {}
|
||||
|
||||
rows=[]
|
||||
for key, column in columns.iteritems():
|
||||
for key, column in six.iteritems(columns):
|
||||
row = []
|
||||
rows.append(row)
|
||||
label = QLabel(column)
|
||||
|
|
@ -1501,10 +1610,16 @@ class StandardColumnsTab(QWidget):
|
|||
self.titlecase.setChecked(prefs['titlecase'])
|
||||
row.append(self.titlecase)
|
||||
elif key == 'authors':
|
||||
self.set_author_url = QCheckBox(_('Set Calibre Author URL'),self)
|
||||
self.set_author_url.setToolTip(_("Set Calibre Author URL to Author's URL on story site."))
|
||||
self.set_author_url.setChecked(prefs['set_author_url'])
|
||||
row.append(self.set_author_url)
|
||||
|
||||
self.suppressauthorsort = QCheckBox(_('Force Author into Author Sort?'),self)
|
||||
self.suppressauthorsort.setToolTip(_("If checked, the author(s) as given will be used for the Author Sort, too.\nIf not checked, calibre will apply it's built in algorithm which makes 'Bob Smith' sort as 'Smith, Bob', etc."))
|
||||
self.suppressauthorsort.setChecked(prefs['suppressauthorsort'])
|
||||
row.append(self.suppressauthorsort)
|
||||
|
||||
self.authorcase = QCheckBox(_('Fix Author Case?'),self)
|
||||
self.authorcase.setToolTip(_("If checked, Calibre's routine for correcting the capitalization of author names will be applied.")
|
||||
+"\n"+_("Calibre remembers all authors in the library; changing the author case on one book will effect all books by that author.")
|
||||
|
|
@ -1512,6 +1627,22 @@ class StandardColumnsTab(QWidget):
|
|||
self.authorcase.setChecked(prefs['authorcase'])
|
||||
row.append(self.authorcase)
|
||||
|
||||
elif key == 'series':
|
||||
self.set_series_url = QCheckBox(_('Set Calibre Series URL'),self)
|
||||
self.set_series_url.setToolTip(_("Set Calibre Series URL to Series's URL on story site."))
|
||||
self.set_series_url.setChecked(prefs['set_series_url'])
|
||||
row.append(self.set_series_url)
|
||||
|
||||
self.setanthologyseries = QCheckBox(_("Set 'Series [0]' for New Anthologies?"),self)
|
||||
self.setanthologyseries.setToolTip(_("If checked, the Series column will be set to 'Series Name [0]' when an Anthology for a series is first created."))
|
||||
self.setanthologyseries.setChecked(prefs['setanthologyseries'])
|
||||
row.append(self.setanthologyseries)
|
||||
|
||||
self.seriescase = QCheckBox(_('Fix Series Case?'),self)
|
||||
self.seriescase.setToolTip(_("If checked, Calibre's routine for correcting the capitalization of title will be applied.")
|
||||
+"\n"+_("This effects Calibre metadata only, not FanFicFare metadata in title page."))
|
||||
self.seriescase.setChecked(prefs['seriescase'])
|
||||
row.append(self.seriescase)
|
||||
grid = QGridLayout()
|
||||
for rownum, row in enumerate(rows):
|
||||
for colnum, col in enumerate(row):
|
||||
|
|
@ -1524,11 +1655,6 @@ class StandardColumnsTab(QWidget):
|
|||
self.l.addWidget(label)
|
||||
self.l.addSpacing(5)
|
||||
|
||||
self.set_author_url = QCheckBox(_('Set Calibre Author URL'),self)
|
||||
self.set_author_url.setToolTip(_("Set Calibre Author URL to Author's URL on story site."))
|
||||
self.set_author_url.setChecked(prefs['set_author_url'])
|
||||
self.l.addWidget(self.set_author_url)
|
||||
|
||||
self.includecomments = QCheckBox(_("Include Books' Comments in Anthology Comments?"),self)
|
||||
self.includecomments.setToolTip(_('''Include all the merged books' comments in the new book's comments.
|
||||
Default is a list of included titles only.'''))
|
||||
|
|
@ -1629,6 +1755,19 @@ class ImapTab(QWidget):
|
|||
self.l.addWidget(self.download_from_email_immediately,row,0,1,-1)
|
||||
row+=1
|
||||
|
||||
label = QLabel(_('Add these Tag(s) Automatically'))
|
||||
tooltip = ( _("Tags entered here will be automatically added to stories downloaded from email story URLs.") +"\n"+
|
||||
_("Any additional stories you then manually add to the Story URL dialog will also have these tags added.") )
|
||||
label.setToolTip(tooltip)
|
||||
self.l.addWidget(label,row,0)
|
||||
self.imaptags = EditWithComplete(self) # QLineEdit(self)
|
||||
self.imaptags.update_items_cache(self.plugin_action.gui.current_db.all_tags())
|
||||
self.imaptags.setToolTip(tooltip)
|
||||
self.imaptags.setText(prefs['imaptags'])
|
||||
self.imaptags.setCursorPosition(0)
|
||||
self.l.addWidget(self.imaptags,row,1)
|
||||
row+=1
|
||||
|
||||
label = QLabel(_("<b>It's safest if you create a separate email account that you use only "
|
||||
"for your story update notices. FanFicFare and calibre cannot guarantee that "
|
||||
"malicious code cannot get your email password once you've entered it. "
|
||||
|
|
@ -1637,5 +1776,3 @@ class ImapTab(QWidget):
|
|||
self.l.addWidget(label,row,0,1,-1,Qt.AlignTop)
|
||||
self.l.setRowStretch(row,1)
|
||||
row+=1
|
||||
|
||||
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
|
|
@ -4,18 +4,22 @@ from __future__ import (unicode_literals, division, absolute_import,
|
|||
print_function)
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2015, Jim Miller'
|
||||
__copyright__ = '2020, Jim Miller'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
from StringIO import StringIO
|
||||
from ConfigParser import ParsingError
|
||||
from functools import reduce
|
||||
|
||||
from io import StringIO
|
||||
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
from calibre_plugins.fanficfare_plugin.fanficfare import adapters, exceptions
|
||||
from calibre_plugins.fanficfare_plugin.fanficfare.configurable import Configuration
|
||||
from fanficfare import adapters
|
||||
from fanficfare.configurable import Configuration
|
||||
from calibre_plugins.fanficfare_plugin.prefs import prefs
|
||||
from fanficfare.six import ensure_text
|
||||
from fanficfare.six.moves import configparser
|
||||
from fanficfare.six.moves import collections_abc
|
||||
|
||||
def get_fff_personalini():
|
||||
return prefs['personal.ini']
|
||||
|
|
@ -29,8 +33,8 @@ def get_fff_config(url,fileform="epub",personalini=None):
|
|||
except Exception as e:
|
||||
logger.debug("Failed trying to get ini config for url(%s): %s, using section %s instead"%(url,e,sections))
|
||||
configuration = Configuration(sections,fileform)
|
||||
configuration.readfp(StringIO(get_resources("plugin-defaults.ini")))
|
||||
configuration.readfp(StringIO(personalini))
|
||||
configuration.read_file(StringIO(ensure_text(get_resources("plugin-defaults.ini"))))
|
||||
configuration.read_file(StringIO(ensure_text(personalini)))
|
||||
|
||||
return configuration
|
||||
|
||||
|
|
@ -42,8 +46,71 @@ def test_config(initext):
|
|||
configini = get_fff_config("test1.com?sid=555",
|
||||
personalini=initext)
|
||||
errors = configini.test_config()
|
||||
except ParsingError as pe:
|
||||
except configparser.ParsingError as pe:
|
||||
errors = pe.errors
|
||||
|
||||
|
||||
return errors
|
||||
|
||||
|
||||
class OrderedSet(collections_abc.MutableSet):
|
||||
|
||||
def __init__(self, iterable=None):
|
||||
self.end = end = []
|
||||
end += [None, end, end] # sentinel node for doubly linked list
|
||||
self.map = {} # key --> [key, prev, next]
|
||||
if iterable is not None:
|
||||
self |= iterable
|
||||
|
||||
def __len__(self):
|
||||
return len(self.map)
|
||||
|
||||
def __contains__(self, key):
|
||||
return key in self.map
|
||||
|
||||
def add(self, key):
|
||||
if key not in self.map:
|
||||
end = self.end
|
||||
curr = end[1]
|
||||
curr[2] = end[1] = self.map[key] = [key, curr, end]
|
||||
|
||||
def discard(self, key):
|
||||
if key in self.map:
|
||||
key, prev, next = self.map.pop(key)
|
||||
prev[2] = next
|
||||
next[1] = prev
|
||||
|
||||
def __iter__(self):
|
||||
end = self.end
|
||||
curr = end[2]
|
||||
while curr is not end:
|
||||
yield curr[0]
|
||||
curr = curr[2]
|
||||
|
||||
def __reversed__(self):
|
||||
end = self.end
|
||||
curr = end[1]
|
||||
while curr is not end:
|
||||
yield curr[0]
|
||||
curr = curr[1]
|
||||
|
||||
def pop(self, last=True):
|
||||
if not self:
|
||||
raise KeyError('set is empty')
|
||||
key = self.end[1][0] if last else self.end[2][0]
|
||||
self.discard(key)
|
||||
return key
|
||||
|
||||
def __repr__(self):
|
||||
if not self:
|
||||
return '%s()' % (self.__class__.__name__,)
|
||||
return '%s(%r)' % (self.__class__.__name__, list(self))
|
||||
|
||||
def __eq__(self, other):
|
||||
if isinstance(other, OrderedSet):
|
||||
return len(self) == len(other) and list(self) == list(other)
|
||||
return set(self) == set(other)
|
||||
|
||||
def get_common_elements(ll):
|
||||
## returns a list of elements common to all lists in ll
|
||||
## https://www.tutorialspoint.com/find-common-elements-in-list-of-lists-in-python
|
||||
return list(reduce(lambda i, j: i & j, (OrderedSet(n) for n in ll)))
|
||||
|
|
|
|||
Binary file not shown.
|
Before Width: | Height: | Size: 24 KiB |
|
|
@ -1,78 +1,117 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
from __future__ import (unicode_literals, division,
|
||||
from __future__ import (absolute_import, unicode_literals, division,
|
||||
print_function)
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2017, Jim Miller'
|
||||
__copyright__ = '2020, Jim Miller'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
import re
|
||||
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
from PyQt5.Qt import (QApplication, Qt, QColor, QSyntaxHighlighter,
|
||||
QTextCharFormat, QBrush, QFont)
|
||||
|
||||
try:
|
||||
from PyQt5.Qt import (Qt, QSyntaxHighlighter, QTextCharFormat, QBrush, QFont)
|
||||
except ImportError as e:
|
||||
from PyQt4.Qt import (Qt, QSyntaxHighlighter, QTextCharFormat, QBrush, QFont)
|
||||
# qt6 Calibre v6+
|
||||
QFontNormal = QFont.Weight.Normal
|
||||
QFontBold = QFont.Weight.Bold
|
||||
except:
|
||||
# qt5 Calibre v2-5
|
||||
QFontNormal = QFont.Normal
|
||||
QFontBold = QFont.Bold
|
||||
|
||||
from fanficfare.six import string_types
|
||||
|
||||
class IniHighlighter(QSyntaxHighlighter):
|
||||
'''
|
||||
QSyntaxHighlighter class for use with QTextEdit for highlighting
|
||||
ini config files.
|
||||
'''
|
||||
|
||||
|
||||
def __init__( self, parent, sections=[], keywords=[], entries=[], entry_keywords=[] ):
|
||||
QSyntaxHighlighter.__init__( self, parent )
|
||||
self.parent = parent
|
||||
|
||||
|
||||
self.highlightingRules = []
|
||||
|
||||
colors = {
|
||||
'knownentries':Qt.darkGreen,
|
||||
'errors':Qt.red,
|
||||
'allkeywords':Qt.darkMagenta,
|
||||
'knownkeywords':Qt.blue,
|
||||
'knownsections':Qt.darkBlue,
|
||||
'teststories':Qt.darkCyan,
|
||||
'storyUrls':Qt.darkMagenta,
|
||||
'comments':Qt.darkYellow
|
||||
}
|
||||
try:
|
||||
if( hasattr(QApplication.instance(),'is_dark_theme')
|
||||
and QApplication.instance().is_dark_theme ):
|
||||
colors = {
|
||||
'knownentries':Qt.green,
|
||||
'errors':Qt.red,
|
||||
'allkeywords':Qt.magenta,
|
||||
'knownkeywords':QColor(Qt.blue).lighter(150),
|
||||
'knownsections':Qt.darkCyan,
|
||||
'teststories':Qt.cyan,
|
||||
'storyUrls':QColor(Qt.magenta).lighter(150),
|
||||
'comments':Qt.yellow
|
||||
}
|
||||
except Exception as e:
|
||||
logger.error("Failed to set dark theme highlight colors: %s"%e)
|
||||
|
||||
if entries:
|
||||
# *known* entries
|
||||
reentries = r'('+(r'|'.join(entries))+r')'
|
||||
self.highlightingRules.append( HighlightingRule( r"\b"+reentries+r"\b", Qt.darkGreen ) )
|
||||
self.highlightingRules.append( HighlightingRule( r"\b"+reentries+r"\b", colors['knownentries'] ) )
|
||||
|
||||
# true/false -- just to be nice.
|
||||
self.highlightingRules.append( HighlightingRule( r"\b(true|false)\b", Qt.darkGreen ) )
|
||||
|
||||
self.highlightingRules.append( HighlightingRule( r"\b(true|false)\b", colors['knownentries'] ) )
|
||||
|
||||
# *all* keywords -- change known later.
|
||||
self.errorRule = HighlightingRule( r"^[^:=\s][^:=]*[:=]", Qt.red )
|
||||
self.errorRule = HighlightingRule( r"^[^:=\s][^:=]*[:=]", colors['errors'] )
|
||||
self.highlightingRules.append( self.errorRule )
|
||||
|
||||
# *all* entry keywords -- change known later.
|
||||
reentrykeywords = r'('+(r'|'.join([ e % r'[a-zA-Z0-9_]+' for e in entry_keywords ]))+r')'
|
||||
self.highlightingRules.append( HighlightingRule( r"^(add_to_)?"+reentrykeywords+r"(_filelist)?\s*[:=]", Qt.darkMagenta ) )
|
||||
self.highlightingRules.append( HighlightingRule( r"^(add_to_)?"+reentrykeywords+r"(_filelist)?\s*[:=]", colors['allkeywords'] ) )
|
||||
|
||||
if entries: # separate from known entries so entry named keyword won't be masked.
|
||||
# *known* entry keywords
|
||||
reentrykeywords = r'('+(r'|'.join([ e % reentries for e in entry_keywords ]))+r')'
|
||||
self.highlightingRules.append( HighlightingRule( r"^(add_to_)?"+reentrykeywords+r"(_filelist)?\s*[:=]", Qt.blue ) )
|
||||
self.highlightingRules.append( HighlightingRule( r"^(add_to_)?"+reentrykeywords+r"(_filelist)?\s*[:=]", colors['knownkeywords'] ) )
|
||||
|
||||
# *known* keywords
|
||||
rekeywords = r'('+(r'|'.join(keywords))+r')'
|
||||
self.highlightingRules.append( HighlightingRule( r"^(add_to_)?"+rekeywords+r"(_filelist)?\s*[:=]", Qt.blue ) )
|
||||
self.highlightingRules.append( HighlightingRule( r"^(add_to_)?"+rekeywords+r"(_filelist)?\s*[:=]", colors['knownkeywords'] ) )
|
||||
|
||||
# *all* sections -- change known later.
|
||||
self.highlightingRules.append( HighlightingRule( r"^\[[^\]]+\].*?$", Qt.red, QFont.Bold, blocknum=1 ) )
|
||||
self.highlightingRules.append( HighlightingRule( r"^\[[^\]]+\].*?$", colors['errors'], QFontBold, blocknum=1 ) )
|
||||
|
||||
if sections:
|
||||
# *known* sections
|
||||
resections = r'('+(r'|'.join(sections))+r')'
|
||||
resections = resections.replace('.','\.') #escape dots.
|
||||
self.highlightingRules.append( HighlightingRule( r"^\["+resections+r"\]\s*$", Qt.darkBlue, QFont.Bold, blocknum=2 ) )
|
||||
resections = resections.replace('.',r'\.') #escape dots.
|
||||
self.highlightingRules.append( HighlightingRule( r"^\["+resections+r"\]\s*$", colors['knownsections'], QFontBold, blocknum=2 ) )
|
||||
|
||||
# test story sections
|
||||
self.teststoryRule = HighlightingRule( r"^\[teststory:([0-9]+|defaults)\]", Qt.darkCyan, blocknum=3 )
|
||||
self.teststoryRule = HighlightingRule( r"^\[teststory:([0-9]+|defaults)\]", colors['teststories'], blocknum=3 )
|
||||
self.highlightingRules.append( self.teststoryRule )
|
||||
|
||||
# storyUrl sections
|
||||
self.storyUrlRule = HighlightingRule( r"^\[https?://.*\]", Qt.darkMagenta, blocknum=4 )
|
||||
# StoryUrls are *not* checked beyond looking for https?://
|
||||
self.storyUrlRule = HighlightingRule( r"^\[https?://.*\]", colors['storyUrls'], QFontBold, blocknum=2 )
|
||||
self.highlightingRules.append( self.storyUrlRule )
|
||||
|
||||
# NOT comments -- but can be custom columns, so don't flag.
|
||||
#self.highlightingRules.append( HighlightingRule( r"(?<!^)#[^\n]*" , Qt.red ) )
|
||||
|
||||
#self.highlightingRules.append( HighlightingRule( r"(?<!^)#[^\n]*" , colors['errors'] ) )
|
||||
|
||||
# comments -- comments must start from column 0.
|
||||
self.commentRule = HighlightingRule( r"^#[^\n]*" , Qt.darkYellow )
|
||||
self.commentRule = HighlightingRule( r"^#[^\n]*" , colors['comments'] )
|
||||
self.highlightingRules.append( self.commentRule )
|
||||
|
||||
def highlightBlock( self, text ):
|
||||
|
|
@ -91,23 +130,24 @@ class IniHighlighter(QSyntaxHighlighter):
|
|||
# unknown section, error all:
|
||||
if blocknum == 1 and blocknum == self.previousBlockState():
|
||||
self.setFormat( 0, len(text), self.errorRule.highlight )
|
||||
|
||||
|
||||
# teststory section rules:
|
||||
if blocknum == 3:
|
||||
self.setFormat( 0, len(text), self.teststoryRule.highlight )
|
||||
|
||||
# storyUrl section rules:
|
||||
if blocknum == 4:
|
||||
self.setFormat( 0, len(text), self.storyUrlRule.highlight )
|
||||
|
||||
|
||||
## changed storyUrl section to also be blocknum=1 April 2023
|
||||
## storyUrl section rules:
|
||||
# if blocknum == 4:
|
||||
# self.setFormat( 0, len(text), self.storyUrlRule.highlight )
|
||||
|
||||
self.setCurrentBlockState( blocknum )
|
||||
|
||||
class HighlightingRule():
|
||||
def __init__( self, pattern, color,
|
||||
weight=QFont.Normal,
|
||||
weight=QFontNormal,
|
||||
style=Qt.SolidPattern,
|
||||
blocknum=0):
|
||||
if isinstance(pattern,basestring):
|
||||
if isinstance(pattern, string_types):
|
||||
self.pattern = re.compile(pattern)
|
||||
else:
|
||||
self.pattern=pattern
|
||||
|
|
@ -117,4 +157,3 @@ class HighlightingRule():
|
|||
charfmt.setFontWeight(weight)
|
||||
self.highlight = charfmt
|
||||
self.blocknum=blocknum
|
||||
|
||||
|
|
|
|||
|
|
@ -4,24 +4,20 @@ from __future__ import (unicode_literals, division, absolute_import,
|
|||
print_function)
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2018, Jim Miller, 2011, Grant Drake <grant.drake@gmail.com>'
|
||||
__copyright__ = '2020, Jim Miller, 2011, Grant Drake <grant.drake@gmail.com>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
import traceback
|
||||
from time import sleep
|
||||
from datetime import time
|
||||
from StringIO import StringIO
|
||||
from io import StringIO
|
||||
from collections import defaultdict
|
||||
import sys
|
||||
|
||||
from calibre.utils.ipc.server import Server
|
||||
from calibre.utils.ipc.job import ParallelJob
|
||||
from calibre.constants import numeric_version as calibre_version
|
||||
from calibre.utils.date import local_tz
|
||||
|
||||
from calibre_plugins.fanficfare_plugin.wordcount import get_word_count
|
||||
from calibre_plugins.fanficfare_plugin.prefs import (SAVE_YES, SAVE_YES_UNLESS_SITE)
|
||||
|
||||
# pulls in translation files for _() strings
|
||||
try:
|
||||
load_translations()
|
||||
|
|
@ -34,78 +30,96 @@ except NameError:
|
|||
#
|
||||
# ------------------------------------------------------------------------------
|
||||
|
||||
def do_download_worker(book_list,
|
||||
options,
|
||||
cpus,
|
||||
merge=False,
|
||||
notification=lambda x,y:x):
|
||||
'''
|
||||
Master job, to launch child jobs to extract ISBN for a set of books
|
||||
This is run as a worker job in the background to keep the UI more
|
||||
responsive and get around the memory leak issues as it will launch
|
||||
a child job for each book as a worker process
|
||||
'''
|
||||
server = Server(pool_size=cpus)
|
||||
def do_download_worker_single(site,
|
||||
book_list,
|
||||
options,
|
||||
merge,
|
||||
notification=lambda x,y:x):
|
||||
|
||||
logger.info(options['version'])
|
||||
total = 0
|
||||
alreadybad = []
|
||||
# Queue all the jobs
|
||||
logger.info("Adding jobs for URLs:")
|
||||
for book in book_list:
|
||||
logger.info("%s"%book['url'])
|
||||
if book['good']:
|
||||
total += 1
|
||||
args = ['calibre_plugins.fanficfare_plugin.jobs',
|
||||
'do_download_for_worker',
|
||||
(book,options,merge)]
|
||||
job = ParallelJob('arbitrary_n',
|
||||
"url:(%s) id:(%s)"%(book['url'],book['calibre_id']),
|
||||
done=None,
|
||||
args=args)
|
||||
job._book = book
|
||||
server.add_job(job)
|
||||
else:
|
||||
# was already bad before the subprocess ever started.
|
||||
alreadybad.append(book)
|
||||
|
||||
# This server is an arbitrary_n job, so there is a notifier available.
|
||||
# Set the % complete to a small number to avoid the 'unavailable' indicator
|
||||
## same info debug calibre prints out at startup. For when users
|
||||
## give me job output instead of debug log.
|
||||
from calibre.debug import print_basic_debug_info
|
||||
print_basic_debug_info(sys.stderr)
|
||||
|
||||
notification(0.01, _('Downloading FanFiction Stories'))
|
||||
from calibre_plugins.fanficfare_plugin import FanFicFareBase
|
||||
fffbase = FanFicFareBase(options['plugin_path'])
|
||||
with fffbase: # so the sys.path was modified while loading the
|
||||
# plug impl.
|
||||
from fanficfare.fff_profile import do_cprofile
|
||||
|
||||
# dequeue the job results as they arrive, saving the results
|
||||
count = 0
|
||||
while True:
|
||||
job = server.changed_jobs_queue.get()
|
||||
# A job can 'change' when it is not finished, for example if it
|
||||
# produces a notification. Ignore these.
|
||||
job.update()
|
||||
if not job.is_finished:
|
||||
continue
|
||||
# A job really finished. Get the information.
|
||||
book_list.remove(job._book)
|
||||
book_list.append(job.result)
|
||||
book_id = job._book['calibre_id']
|
||||
count = count + 1
|
||||
notification(float(count)/total, _('%d of %d stories finished downloading')%(count,total))
|
||||
# Add this job's output to the current log
|
||||
logger.info('Logfile for book ID %s (%s)'%(book_id, job._book['title']))
|
||||
logger.info(job.details)
|
||||
## extra function just so I can easily use the same
|
||||
## @do_cprofile decorator
|
||||
@do_cprofile
|
||||
def profiled_func():
|
||||
count = 0
|
||||
totals = {}
|
||||
# can't do direct assignment in list comprehension? I'm sure it
|
||||
# makes sense to some pythonista.
|
||||
# [ totals[x['url']]=0.0 for x in book_list if x['good'] ]
|
||||
[ totals.update({x['url']:0.0}) for x in book_list if x['good'] ]
|
||||
# logger.debug(sites_lists.keys())
|
||||
|
||||
if count >= total:
|
||||
## ordering first by good vs bad, then by listorder.
|
||||
good_list = filter(lambda x : x['good'], book_list)
|
||||
bad_list = filter(lambda x : not x['good'], book_list)
|
||||
good_list = sorted(good_list,key=lambda x : x['listorder'])
|
||||
bad_list = sorted(bad_list,key=lambda x : x['listorder'])
|
||||
def do_indiv_notif(percent,msg):
|
||||
totals[msg] = percent/len(totals)
|
||||
notification(max(0.01,sum(totals.values())), _('%(count)d of %(total)d stories finished downloading')%{'count':count,'total':len(totals)})
|
||||
|
||||
logger.info("\n"+_("Download Results:")+"\n%s\n"%("\n".join([ "%(url)s %(comment)s" % book for book in good_list+bad_list])))
|
||||
do_list = []
|
||||
done_list = []
|
||||
logger.info("\n\n"+_("Downloading FanFiction Stories")+"\n%s\n"%("\n".join([ "%(status)s %(url)s %(comment)s" % book for book in book_list])))
|
||||
## pass failures from metadata through bg job so all results are
|
||||
## together.
|
||||
for book in book_list:
|
||||
if book['good']:
|
||||
do_list.append(book)
|
||||
else:
|
||||
done_list.append(book)
|
||||
for book in do_list:
|
||||
# logger.info("%s"%book['url'])
|
||||
done_list.append(do_download_for_worker(book,options,merge,do_indiv_notif))
|
||||
count += 1
|
||||
return finish_download(done_list)
|
||||
return profiled_func()
|
||||
|
||||
logger.info("\n"+_("Successful:")+"\n%s\n"%("\n".join([book['url'] for book in good_list])))
|
||||
logger.info("\n"+_("Unsuccessful:")+"\n%s\n"%("\n".join([book['url'] for book in bad_list])))
|
||||
break
|
||||
def finish_download(donelist):
|
||||
book_list = sorted(donelist,key=lambda x : x['listorder'])
|
||||
logger.info("\n"+_("Download Results:")+"\n%s\n"%("\n".join([ "%(status)s %(url)s %(comment)s" % book for book in book_list])))
|
||||
|
||||
server.close()
|
||||
good_lists = defaultdict(list)
|
||||
bad_lists = defaultdict(list)
|
||||
for book in book_list:
|
||||
if book['good']:
|
||||
good_lists[book['status']].append(book)
|
||||
else:
|
||||
bad_lists[book['status']].append(book)
|
||||
|
||||
order = [_('Add'),
|
||||
_('Update'),
|
||||
_('Meta'),
|
||||
_('Different URL'),
|
||||
_('Rejected'),
|
||||
_('Skipped'),
|
||||
_('Bad'),
|
||||
_('Error'),
|
||||
]
|
||||
stnum = 0
|
||||
for d in [ good_lists, bad_lists ]:
|
||||
for status in order:
|
||||
stnum += 1
|
||||
if d[status]:
|
||||
l = d[status]
|
||||
logger.info("\n"+status+"\n%s\n"%("\n".join([book['url'] for book in l])))
|
||||
for book in l:
|
||||
# Add prior listorder to 10000 * status num for
|
||||
# ordering of accumulated results with multiple bg
|
||||
# jobs
|
||||
book['reportorder'] = stnum*10000 + book['listorder']
|
||||
del d[status]
|
||||
# just in case a status is added but doesn't appear in order.
|
||||
for status in d.keys():
|
||||
logger.info("\n"+status+"\n%s\n"%("\n".join([book['url'] for book in d[status]])))
|
||||
|
||||
# return the book list as the job result
|
||||
return book_list
|
||||
|
|
@ -119,14 +133,19 @@ def do_download_for_worker(book,options,merge,notification=lambda x,y:x):
|
|||
fffbase = FanFicFareBase(options['plugin_path'])
|
||||
with fffbase: # so the sys.path was modified while loading the
|
||||
# plug impl.
|
||||
from calibre_plugins.fanficfare_plugin.dialogs import (NotGoingToDownload,
|
||||
OVERWRITE, OVERWRITEALWAYS, UPDATE, UPDATEALWAYS, ADDNEW, SKIP, CALIBREONLY, CALIBREONLYSAVECOL)
|
||||
from calibre_plugins.fanficfare_plugin.fanficfare import adapters, writers, exceptions
|
||||
from calibre_plugins.fanficfare_plugin.fanficfare.epubutils import get_update_data
|
||||
from calibre_plugins.fanficfare_plugin.prefs import (
|
||||
SAVE_YES, SAVE_YES_UNLESS_SITE, OVERWRITE, OVERWRITEALWAYS, UPDATE,
|
||||
UPDATEALWAYS, ADDNEW, SKIP, CALIBREONLY, CALIBREONLYSAVECOL)
|
||||
from calibre_plugins.fanficfare_plugin.wordcount import get_word_count
|
||||
from fanficfare import adapters, writers
|
||||
from fanficfare.epubutils import get_update_data
|
||||
from fanficfare.exceptions import NotGoingToDownload
|
||||
from fanficfare.six import text_type as unicode
|
||||
|
||||
from calibre_plugins.fanficfare_plugin.fff_util import (get_fff_adapter, get_fff_config)
|
||||
from calibre_plugins.fanficfare_plugin.fff_util import get_fff_config
|
||||
|
||||
try:
|
||||
logger.info("\n\n" + ("-"*80) + " " + book['url'])
|
||||
## No need to download at all. Can happen now due to
|
||||
## collision moving into book for CALIBREONLY changing to
|
||||
## ADDNEW when story URL not in library.
|
||||
|
|
@ -140,17 +159,6 @@ def do_download_for_worker(book,options,merge,notification=lambda x,y:x):
|
|||
options['fileform'],
|
||||
options['personal.ini'])
|
||||
|
||||
if configuration.getConfig('use_ssl_unverified_context'):
|
||||
## monkey patch to avoid SSL bug. dupliated from
|
||||
## fff_plugin.py because bg jobs run in own process
|
||||
## space.
|
||||
import ssl
|
||||
if hasattr(ssl, '_create_unverified_context'):
|
||||
ssl._create_default_https_context = ssl._create_unverified_context
|
||||
|
||||
if not options['updateepubcover'] and 'epub_for_update' in book and book['collision'] in (UPDATE, UPDATEALWAYS):
|
||||
configuration.set("overrides","never_make_cover","true")
|
||||
|
||||
# images only for epub, html, even if the user mistakenly
|
||||
# turned it on else where.
|
||||
if options['fileform'] not in ("epub","html"):
|
||||
|
|
@ -160,25 +168,46 @@ def do_download_for_worker(book,options,merge,notification=lambda x,y:x):
|
|||
adapter.is_adult = book['is_adult']
|
||||
adapter.username = book['username']
|
||||
adapter.password = book['password']
|
||||
adapter.totp = book['totp']
|
||||
adapter.setChaptersRange(book['begin'],book['end'])
|
||||
|
||||
configuration.load_cookiejar(options['cookiejarfile'])
|
||||
#logger.debug("cookiejar:%s"%configuration.cookiejar)
|
||||
configuration.set_pagecache(options['pagecache'])
|
||||
## each site download job starts with a new copy of the
|
||||
## cookiejar and basic_cache from the FG process. They
|
||||
## are not shared between different sites' BG downloads
|
||||
if 'basic_cache' in options:
|
||||
configuration.set_basic_cache(options['basic_cache'])
|
||||
else:
|
||||
options['basic_cache'] = configuration.get_basic_cache()
|
||||
options['basic_cache'].load_cache(options['basic_cachefile'])
|
||||
if 'cookiejar' in options:
|
||||
configuration.set_cookiejar(options['cookiejar'])
|
||||
else:
|
||||
options['cookiejar'] = configuration.get_cookiejar()
|
||||
options['cookiejar'].load_cookiejar(options['cookiejarfile'])
|
||||
|
||||
story = adapter.getStoryMetadataOnly()
|
||||
if not story.getMetadata("series") and 'calibre_series' in book:
|
||||
adapter.setSeries(book['calibre_series'][0],book['calibre_series'][1])
|
||||
|
||||
# logger.debug(merge)
|
||||
# logger.debug(book.get('epub_for_update','(NONE)'))
|
||||
# logger.debug(options.get('mergebook','(NOMERGEBOOK)'))
|
||||
|
||||
# is a merge, is a pre-existing anthology, and is not a pre-existing book in anthology.
|
||||
if merge and 'mergebook' in options and 'epub_for_update' not in book:
|
||||
# internal for plugin anthologies to mark chapters
|
||||
# (new) in new stories
|
||||
story.setMetadata("newforanthology","true")
|
||||
logger.debug("metadata newforanthology:%s"%story.getMetadata("newforanthology"))
|
||||
|
||||
# set PI version instead of default.
|
||||
if 'version' in options:
|
||||
story.setMetadata('version',options['version'])
|
||||
|
||||
book['title'] = story.getMetadata("title", removeallentities=True)
|
||||
book['author_sort'] = book['author'] = story.getList("author", removeallentities=True)
|
||||
book['publisher'] = story.getMetadata("site")
|
||||
book['url'] = story.getMetadata("storyUrl")
|
||||
book['tags'] = story.getSubjectTags(removeallentities=True)
|
||||
book['publisher'] = story.getMetadata("publisher")
|
||||
book['url'] = story.getMetadata("storyUrl", removeallentities=True)
|
||||
book['comments'] = story.get_sanitized_description()
|
||||
book['series'] = story.getMetadata("series", removeallentities=True)
|
||||
|
||||
|
|
@ -189,7 +218,7 @@ def do_download_for_worker(book,options,merge,notification=lambda x,y:x):
|
|||
if story.getMetadataRaw('dateCreated'):
|
||||
book['timestamp'] = story.getMetadataRaw('dateCreated').replace(tzinfo=local_tz)
|
||||
else:
|
||||
book['timestamp'] = datetime.now() # need *something* there for calibre.
|
||||
book['timestamp'] = datetime.now().replace(tzinfo=local_tz) # need *something* there for calibre.
|
||||
|
||||
writer = writers.getWriter(options['fileform'],configuration,adapter)
|
||||
outfile = book['outfile']
|
||||
|
|
@ -219,9 +248,20 @@ def do_download_for_worker(book,options,merge,notification=lambda x,y:x):
|
|||
|
||||
logger.info("write to %s"%outfile)
|
||||
inject_cal_cols(book,story,configuration)
|
||||
writer.writeStory(outfilename=outfile, forceOverwrite=True)
|
||||
writer.writeStory(outfilename=outfile,
|
||||
forceOverwrite=True,
|
||||
notification=notification)
|
||||
|
||||
book['comment'] = _('Download %s completed, %s chapters.')%(options['fileform'],story.getMetadata("numChapters"))
|
||||
if adapter.story.chapter_error_count > 0:
|
||||
book['comment'] = _('Download %(fileform)s completed, %(failed)s failed chapters, %(total)s total chapters.')%\
|
||||
{'fileform':options['fileform'],
|
||||
'failed':adapter.story.chapter_error_count,
|
||||
'total':story.getMetadata("numChapters")}
|
||||
book['chapter_error_count'] = adapter.story.chapter_error_count
|
||||
else:
|
||||
book['comment'] = _('Download %(fileform)s completed, %(total)s chapters.')%\
|
||||
{'fileform':options['fileform'],
|
||||
'total':story.getMetadata("numChapters")}
|
||||
book['all_metadata'] = story.getAllMetadata(removeallentities=True)
|
||||
if options['savemetacol'] != '':
|
||||
book['savemetacol'] = story.dump_html_metadata()
|
||||
|
|
@ -244,20 +284,21 @@ def do_download_for_worker(book,options,merge,notification=lambda x,y:x):
|
|||
adapter.oldchaptersmap,
|
||||
adapter.oldchaptersdata) = get_update_data(book['epub_for_update'])[0:9]
|
||||
|
||||
# dup handling from fff_plugin needed for anthology updates.
|
||||
if book['collision'] == UPDATE:
|
||||
if chaptercount == urlchaptercount:
|
||||
# dup handling from fff_plugin needed for anthology updates & BG metadata.
|
||||
if book['collision'] in (UPDATE,UPDATEALWAYS):
|
||||
if chaptercount == urlchaptercount and book['collision'] == UPDATE:
|
||||
if merge:
|
||||
## Deliberately pass for UPDATEALWAYS merge.
|
||||
book['comment']=_("Already contains %d chapters. Reuse as is.")%chaptercount
|
||||
book['all_metadata'] = story.getAllMetadata(removeallentities=True)
|
||||
if options['savemetacol'] != '':
|
||||
book['savemetacol'] = story.dump_html_metadata()
|
||||
book['outfile'] = book['epub_for_update'] # for anthology merge ops.
|
||||
return book
|
||||
else: # not merge,
|
||||
else:
|
||||
raise NotGoingToDownload(_("Already contains %d chapters.")%chaptercount,'edit-undo.png',showerror=False)
|
||||
elif chaptercount > urlchaptercount:
|
||||
raise NotGoingToDownload(_("Existing epub contains %d chapters, web site only has %d. Use Overwrite to force update.") % (chaptercount,urlchaptercount),'dialog_error.png')
|
||||
elif chaptercount > urlchaptercount and not (book['collision'] == UPDATEALWAYS and adapter.getConfig('force_update_epub_always')):
|
||||
raise NotGoingToDownload(_("Existing epub contains %d chapters, web site only has %d. Use Overwrite or force_update_epub_always to force update.") % (chaptercount,urlchaptercount),'dialog_error.png')
|
||||
elif chaptercount == 0:
|
||||
raise NotGoingToDownload(_("FanFicFare doesn't recognize chapters in existing epub, epub is probably from a different source. Use Overwrite to force update."),'dialog_error.png')
|
||||
|
||||
|
|
@ -269,26 +310,46 @@ def do_download_for_worker(book,options,merge,notification=lambda x,y:x):
|
|||
logger.info("write to %s"%outfile)
|
||||
|
||||
inject_cal_cols(book,story,configuration)
|
||||
writer.writeStory(outfilename=outfile, forceOverwrite=True)
|
||||
writer.writeStory(outfilename=outfile,
|
||||
forceOverwrite=True,
|
||||
notification=notification)
|
||||
|
||||
book['comment'] = _('Update %s completed, added %s chapters for %s total.')%\
|
||||
(options['fileform'],(urlchaptercount-chaptercount),urlchaptercount)
|
||||
if adapter.story.chapter_error_count > 0:
|
||||
book['comment'] = _('Update %(fileform)s completed, added %(added)s chapters, %(failed)s failed chapters, for %(total)s total.')%\
|
||||
{'fileform':options['fileform'],
|
||||
'failed':adapter.story.chapter_error_count,
|
||||
'added':(urlchaptercount-chaptercount),
|
||||
'total':urlchaptercount}
|
||||
book['chapter_error_count'] = adapter.story.chapter_error_count
|
||||
else:
|
||||
book['comment'] = _('Update %(fileform)s completed, added %(added)s chapters for %(total)s total.')%\
|
||||
{'fileform':options['fileform'],'added':(urlchaptercount-chaptercount),'total':urlchaptercount}
|
||||
book['all_metadata'] = story.getAllMetadata(removeallentities=True)
|
||||
if options['savemetacol'] != '':
|
||||
book['savemetacol'] = story.dump_html_metadata()
|
||||
else:
|
||||
## Shouldn't ever get here, but hey, it happened once
|
||||
## before with prefs['collision']
|
||||
raise Exception("Impossible state reached -- Book: %s:\nOptions:%s:"%(book,options))
|
||||
|
||||
if options['do_wordcount'] == SAVE_YES or (
|
||||
options['do_wordcount'] == SAVE_YES_UNLESS_SITE and not story.getMetadataRaw('numWords') ):
|
||||
wordcount = get_word_count(outfile)
|
||||
logger.info("get_word_count:%s"%wordcount)
|
||||
story.setMetadata('numWords',wordcount)
|
||||
writer.writeStory(outfilename=outfile, forceOverwrite=True)
|
||||
book['all_metadata'] = story.getAllMetadata(removeallentities=True)
|
||||
if options['savemetacol'] != '':
|
||||
book['savemetacol'] = story.dump_html_metadata()
|
||||
try:
|
||||
wordcount = get_word_count(outfile)
|
||||
# logger.info("get_word_count:%s"%wordcount)
|
||||
# clear cache for the rather unusual case of
|
||||
# numWords affecting other previously cached
|
||||
# entries.
|
||||
story.clear_processed_metadata_cache()
|
||||
story.setMetadata('numWords',wordcount)
|
||||
writer.writeStory(outfilename=outfile, forceOverwrite=True)
|
||||
book['all_metadata'] = story.getAllMetadata(removeallentities=True)
|
||||
if options['savemetacol'] != '':
|
||||
book['savemetacol'] = story.dump_html_metadata()
|
||||
except:
|
||||
logger.error("WordCount failed")
|
||||
|
||||
if options['smarten_punctuation'] and options['fileform'] == "epub" \
|
||||
and calibre_version >= (0, 9, 39):
|
||||
if options['smarten_punctuation'] and options['fileform'] == "epub":
|
||||
# for smarten punc
|
||||
from calibre.ebooks.oeb.polish.main import polish, ALL_OPTS
|
||||
from calibre.utils.logging import Log
|
||||
|
|
@ -298,26 +359,28 @@ def do_download_for_worker(book,options,merge,notification=lambda x,y:x):
|
|||
data = {'smarten_punctuation':True}
|
||||
opts = ALL_OPTS.copy()
|
||||
opts.update(data)
|
||||
O = namedtuple('Options', ' '.join(ALL_OPTS.iterkeys()))
|
||||
O = namedtuple('Options', ' '.join(ALL_OPTS.keys()))
|
||||
opts = O(**opts)
|
||||
|
||||
log = Log(level=Log.DEBUG)
|
||||
polish({outfile:outfile}, opts, log, logger.info)
|
||||
|
||||
## here to catch tags set in chapters in literotica for
|
||||
## both overwrites and updates.
|
||||
book['tags'] = story.getSubjectTags(removeallentities=True)
|
||||
except NotGoingToDownload as d:
|
||||
book['good']=False
|
||||
book['status']=_('Bad')
|
||||
book['showerror']=d.showerror
|
||||
book['comment']=unicode(d)
|
||||
book['icon'] = d.icon
|
||||
|
||||
except Exception as e:
|
||||
book['good']=False
|
||||
book['status']=_('Error')
|
||||
book['comment']=unicode(e)
|
||||
book['icon']='dialog_error.png'
|
||||
book['status'] = _('Error')
|
||||
logger.info("Exception: %s:%s"%(book,unicode(e)),exc_info=True)
|
||||
|
||||
#time.sleep(10)
|
||||
logger.info("Exception: %s:%s"%(book,book['comment']),exc_info=True)
|
||||
return book
|
||||
|
||||
## calibre's columns for an existing book are passed in and injected
|
||||
|
|
@ -329,12 +392,12 @@ def inject_cal_cols(book,story,configuration):
|
|||
if 'calibre_columns' in book:
|
||||
injectini = ['[injected]']
|
||||
extra_valid = []
|
||||
for k, v in book['calibre_columns'].iteritems():
|
||||
for k in book['calibre_columns'].keys():
|
||||
v = book['calibre_columns'][k]
|
||||
story.setMetadata(k,v['val'])
|
||||
injectini.append('%s_label:%s'%(k,v['label']))
|
||||
extra_valid.append(k)
|
||||
if extra_valid: # if empty, there's nothing to add.
|
||||
injectini.append("add_to_extra_valid_entries:,"+','.join(extra_valid))
|
||||
configuration.readfp(StringIO('\n'.join(injectini)))
|
||||
configuration.read_file(StringIO('\n'.join(injectini)))
|
||||
#print("added:\n%s\n"%('\n'.join(injectini)))
|
||||
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
|
|
@ -3,22 +3,9 @@
|
|||
|
||||
[defaults]
|
||||
## [defaults] section applies to all formats and sites but may be
|
||||
## overridden at several levels. Example:
|
||||
|
||||
## [defaults]
|
||||
## titlepage_entries: category,genre, status
|
||||
## [www.whofic.com]
|
||||
## # overrides defaults.
|
||||
## titlepage_entries: category,genre, status,dateUpdated,rating
|
||||
## [epub]
|
||||
## # overrides defaults & site section
|
||||
## titlepage_entries: category,genre, status,datePublished,dateUpdated,dateCreated
|
||||
## [www.whofic.com:epub]
|
||||
## # overrides defaults, site section & format section
|
||||
## titlepage_entries: category,genre, status,datePublished
|
||||
## [overrides]
|
||||
## # overrides all other sections
|
||||
## titlepage_entries: category
|
||||
## overridden at several levels. See
|
||||
## https://github.com/JimmXinu/FanFicFare/wiki/INI-File for more
|
||||
## details.
|
||||
|
||||
## Some sites also require the user to confirm they are adult for
|
||||
## adult content. Uncomment by removing '#' in front of is_adult.
|
||||
|
|
@ -29,38 +16,32 @@
|
|||
## want to make them all look the same? Strip them off, then add them
|
||||
## back on with add_chapter_numbers. Don't like the way it strips
|
||||
## numbers or adds them back? See chapter_title_strip_pattern and
|
||||
## chapter_title_add_pattern.
|
||||
## chapter_title_add_pattern in defaults.ini.
|
||||
#strip_chapter_numbers:true
|
||||
#add_chapter_numbers:true
|
||||
|
||||
## Add this to genre if there's more than one category.
|
||||
#add_genre_when_multi_category: Crossover
|
||||
|
||||
[epub]
|
||||
## include images from img tags in the body and summary of stories.
|
||||
## Include images from img tags in the body and summary of stories.
|
||||
## Images will be converted to jpg for size if possible. Images work
|
||||
## in epub format only. To get mobi or other format with images,
|
||||
## download as epub and use Calibre to convert.
|
||||
## true by default, uncomment and set false to not include images.
|
||||
#include_images:true
|
||||
|
||||
## If not set, the summary will have all html stripped for safety.
|
||||
## If set false, the summary will have all html stripped for safety.
|
||||
## Both this and include_images must be true to get images in the
|
||||
## summary.
|
||||
## true by default, uncomment and set false to not keep summary html.
|
||||
#keep_summary_html:true
|
||||
|
||||
## If set, the first image found will be made the cover image. If
|
||||
## keep_summary_html is true, any images in summary will be before any
|
||||
## If set true, and there isn't a specific cover image, the first
|
||||
## image found in the story will be made the cover image. If
|
||||
## keep_summary_html is true, images in the summary will be before any
|
||||
## in chapters.
|
||||
## true by default, uncomment and set false to turn off
|
||||
#make_firstimage_cover:true
|
||||
|
||||
## Resize images down to width, height, preserving aspect ratio.
|
||||
## Nook size, with margin.
|
||||
#image_max_size: 580, 725
|
||||
|
||||
## Change image to grayscale, if graphics library allows, to save
|
||||
## space.
|
||||
#grayscale_images: false
|
||||
|
||||
|
||||
## Most common, I expect will be using this to save username/passwords
|
||||
## for different sites. Here are a few examples. See defaults.ini
|
||||
|
|
@ -72,28 +53,6 @@
|
|||
## default is false
|
||||
#collect_series: true
|
||||
|
||||
[ficwad.com]
|
||||
#username:YourUsername
|
||||
#password:YourPassword
|
||||
|
||||
[www.adastrafanfic.com]
|
||||
## Some sites do not require a login, but do require the user to
|
||||
## confirm they are adult for adult content.
|
||||
#is_adult:true
|
||||
|
||||
[www.twcslibrary.net]
|
||||
#username:YourName
|
||||
#password:yourpassword
|
||||
#is_adult:true
|
||||
## default is false
|
||||
#collect_series: true
|
||||
|
||||
[www.fictionalley.org]
|
||||
#is_adult:true
|
||||
|
||||
[www.harrypotterfanfiction.com]
|
||||
#is_adult:true
|
||||
|
||||
[www.fimfiction.net]
|
||||
#is_adult:true
|
||||
#fail_on_password: false
|
||||
|
|
@ -102,8 +61,9 @@
|
|||
#is_adult:true
|
||||
## tth is a little unusual--it doesn't require user/pass, but the site
|
||||
## keeps track of which chapters you've read and won't send another
|
||||
## update until it thinks you're up to date. This way, on download,
|
||||
## it thinks you're up to date.
|
||||
## update until it thinks you're up to date. If you set
|
||||
## username/password, FFF will login to download. Then the site
|
||||
## thinks you're up to date.
|
||||
#username:YourName
|
||||
#password:yourpassword
|
||||
|
||||
|
|
|
|||
|
|
@ -4,7 +4,7 @@ from __future__ import (unicode_literals, division, absolute_import,
|
|||
print_function)
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2018, Jim Miller'
|
||||
__copyright__ = '2021, Jim Miller'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
import logging
|
||||
|
|
@ -12,9 +12,14 @@ logger = logging.getLogger(__name__)
|
|||
|
||||
import copy
|
||||
|
||||
from calibre.utils.config import JSONConfig
|
||||
from calibre.gui2.ui import get_gui
|
||||
|
||||
# pulls in translation files for _() strings
|
||||
try:
|
||||
load_translations()
|
||||
except NameError:
|
||||
pass # load_translations() added in calibre 1.9
|
||||
|
||||
from calibre_plugins.fanficfare_plugin import __version__ as plugin_version
|
||||
from calibre_plugins.fanficfare_plugin.common_utils import get_library_uuid
|
||||
|
||||
|
|
@ -97,9 +102,6 @@ updatecalcover_order=[YES,YES_IF_IMG,NO]
|
|||
gencalcover_order=[YES,YES_UNLESS_IMG,NO]
|
||||
do_wordcount_order=[YES,YES_UNLESS_SITE,NO]
|
||||
|
||||
# if don't have any settings for FanFicFarePlugin, copy from
|
||||
# predecessor FanFictionDownLoaderPlugin.
|
||||
FFDL_PREFS_NAMESPACE = 'FanFictionDownLoaderPlugin'
|
||||
PREFS_NAMESPACE = 'FanFicFarePlugin'
|
||||
PREFS_KEY_SETTINGS = 'settings'
|
||||
|
||||
|
|
@ -109,7 +111,7 @@ default_prefs = {}
|
|||
default_prefs['last_saved_version'] = (0,0,0)
|
||||
default_prefs['personal.ini'] = get_resources('plugin-example.ini')
|
||||
default_prefs['cal_cols_pass_in'] = False
|
||||
default_prefs['rejecturls'] = ''
|
||||
default_prefs['rejecturls'] = '' # removed, but need empty default for fallback
|
||||
default_prefs['rejectreasons'] = '''Sucked
|
||||
Boring
|
||||
Dup from another site'''
|
||||
|
|
@ -118,16 +120,22 @@ default_prefs['reject_delete_default'] = True
|
|||
|
||||
default_prefs['updatemeta'] = True
|
||||
default_prefs['bgmeta'] = False
|
||||
default_prefs['updateepubcover'] = False
|
||||
#default_prefs['updateepubcover'] = True # removed in favor of always True Oct 2022
|
||||
default_prefs['keeptags'] = False
|
||||
default_prefs['suppressauthorsort'] = False
|
||||
default_prefs['suppresstitlesort'] = False
|
||||
default_prefs['authorcase'] = False
|
||||
default_prefs['titlecase'] = False
|
||||
default_prefs['seriescase'] = False
|
||||
default_prefs['setanthologyseries'] = False
|
||||
default_prefs['mark'] = False
|
||||
default_prefs['mark_success'] = True
|
||||
default_prefs['mark_failed'] = True
|
||||
default_prefs['mark_chapter_error'] = True
|
||||
default_prefs['showmarked'] = False
|
||||
default_prefs['autoconvert'] = False
|
||||
default_prefs['urlsfromclip'] = True
|
||||
default_prefs['button_instantpopup'] = False
|
||||
default_prefs['updatedefault'] = True
|
||||
default_prefs['fileform'] = 'epub'
|
||||
default_prefs['collision'] = SAVE_UPDATE
|
||||
|
|
@ -136,6 +144,7 @@ default_prefs['adddialogstaysontop'] = False
|
|||
default_prefs['lookforurlinhtml'] = False
|
||||
default_prefs['checkforseriesurlid'] = True
|
||||
default_prefs['auto_reject_seriesurlid'] = False
|
||||
default_prefs['mark_series_anthologies'] = False
|
||||
default_prefs['checkforurlchange'] = True
|
||||
default_prefs['injectseries'] = False
|
||||
default_prefs['matchtitleauth'] = True
|
||||
|
|
@ -150,12 +159,13 @@ default_prefs['addtoreadlists'] = False
|
|||
default_prefs['addtolistsonread'] = False
|
||||
default_prefs['autounnew'] = False
|
||||
|
||||
default_prefs['updatecalcover'] = None
|
||||
default_prefs['gencalcover'] = SAVE_YES
|
||||
default_prefs['updatecalcover'] = SAVE_YES_IF_IMG
|
||||
default_prefs['covernewonly'] = False
|
||||
default_prefs['gencalcover'] = SAVE_YES_UNLESS_IMG
|
||||
default_prefs['updatecover'] = False
|
||||
default_prefs['calibre_gen_cover'] = False
|
||||
default_prefs['plugin_gen_cover'] = True
|
||||
default_prefs['gcnewonly'] = False
|
||||
default_prefs['calibre_gen_cover'] = True
|
||||
default_prefs['plugin_gen_cover'] = False
|
||||
default_prefs['gcnewonly'] = True
|
||||
default_prefs['gc_site_settings'] = {}
|
||||
default_prefs['allow_gc_from_ini'] = True
|
||||
default_prefs['gc_polish_cover'] = False
|
||||
|
|
@ -173,6 +183,7 @@ default_prefs['allow_custcol_from_ini'] = True
|
|||
|
||||
default_prefs['std_cols_newonly'] = {}
|
||||
default_prefs['set_author_url'] = True
|
||||
default_prefs['set_series_url'] = True
|
||||
default_prefs['includecomments'] = False
|
||||
default_prefs['anth_comments_newonly'] = True
|
||||
|
||||
|
|
@ -181,11 +192,17 @@ default_prefs['imapuser'] = ''
|
|||
default_prefs['imappass'] = ''
|
||||
default_prefs['imapsessionpass'] = False
|
||||
default_prefs['imapfolder'] = 'INBOX'
|
||||
default_prefs['imaptags'] = ''
|
||||
default_prefs['imapmarkread'] = True
|
||||
default_prefs['auto_reject_from_email'] = False
|
||||
default_prefs['update_existing_only_from_email'] = False
|
||||
default_prefs['download_from_email_immediately'] = False
|
||||
|
||||
|
||||
#default_prefs['single_proc_jobs'] = True # setting and code removed
|
||||
default_prefs['site_split_jobs'] = True
|
||||
default_prefs['reconsolidate_jobs'] = True
|
||||
|
||||
def set_library_config(library_config,db,setting=PREFS_KEY_SETTINGS):
|
||||
db.prefs.set_namespaced(PREFS_NAMESPACE,
|
||||
setting,
|
||||
|
|
@ -200,12 +217,6 @@ def get_library_config(db,setting=PREFS_KEY_SETTINGS,def_prefs=default_prefs):
|
|||
library_config = db.prefs.get_namespaced(PREFS_NAMESPACE,
|
||||
setting)
|
||||
|
||||
# if don't have any settings for FanFicFarePlugin, copy from
|
||||
# predecessor FanFictionDownLoaderPlugin.
|
||||
if library_config is None:
|
||||
logger.info("Attempting to read settings from predecessor--FFDL")
|
||||
library_config = db.prefs.get_namespaced(FFDL_PREFS_NAMESPACE,
|
||||
setting)
|
||||
if library_config is None:
|
||||
# defaults.
|
||||
logger.info("Using default settings")
|
||||
|
|
|
|||
2613
calibre-plugin/translations/ar.po
Normal file
2613
calibre-plugin/translations/ar.po
Normal file
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
2613
calibre-plugin/translations/ja.po
Normal file
2613
calibre-plugin/translations/ja.po
Normal file
File diff suppressed because it is too large
Load diff
2613
calibre-plugin/translations/ko.po
Normal file
2613
calibre-plugin/translations/ko.po
Normal file
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
2612
calibre-plugin/translations/mr.po
Normal file
2612
calibre-plugin/translations/mr.po
Normal file
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
2616
calibre-plugin/translations/pl.po
Normal file
2616
calibre-plugin/translations/pl.po
Normal file
File diff suppressed because it is too large
Load diff
2615
calibre-plugin/translations/pt.po
Normal file
2615
calibre-plugin/translations/pt.po
Normal file
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
2616
calibre-plugin/translations/ru.po
Normal file
2616
calibre-plugin/translations/ru.po
Normal file
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
2612
calibre-plugin/translations/ta.po
Normal file
2612
calibre-plugin/translations/ta.po
Normal file
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
|
|
@ -18,6 +18,7 @@ logger = logging.getLogger(__name__)
|
|||
import re
|
||||
|
||||
from calibre.ebooks.oeb.iterator import EbookIterator
|
||||
from fanficfare.six import text_type as unicode
|
||||
|
||||
RE_HTML_BODY = re.compile(u'<body[^>]*>(.*)</body>', re.UNICODE | re.DOTALL | re.IGNORECASE)
|
||||
RE_STRIP_MARKUP = re.compile(u'<[^>]+>', re.UNICODE)
|
||||
|
|
@ -28,7 +29,7 @@ def get_word_count(book_path):
|
|||
Estimate a word count
|
||||
'''
|
||||
from calibre.utils.localization import get_lang
|
||||
|
||||
|
||||
iterator = _open_epub_file(book_path)
|
||||
|
||||
lang = iterator.opf.language
|
||||
|
|
@ -52,7 +53,7 @@ def _get_epub_standard_word_count(iterator, lang='en'):
|
|||
'''
|
||||
|
||||
book_text = _read_epub_contents(iterator, strip_html=True)
|
||||
|
||||
|
||||
try:
|
||||
from calibre.spell.break_iterator import count_words
|
||||
wordcount = count_words(book_text, lang)
|
||||
|
|
@ -67,7 +68,7 @@ def _get_epub_standard_word_count(iterator, lang='en'):
|
|||
wordcount = get_wordcount_obj(book_text)
|
||||
wordcount = wordcount.words
|
||||
logger.debug('\tWord count - old method:%s'%wordcount)
|
||||
|
||||
|
||||
return wordcount
|
||||
|
||||
def _read_epub_contents(iterator, strip_html=False):
|
||||
|
|
@ -92,4 +93,3 @@ def _extract_body_text(data):
|
|||
if body:
|
||||
return RE_STRIP_MARKUP.sub('', body[0]).replace('.','. ')
|
||||
return ''
|
||||
|
||||
|
|
|
|||
|
|
@ -18,7 +18,6 @@
|
|||
from __future__ import absolute_import
|
||||
|
||||
import re
|
||||
import codecs
|
||||
|
||||
stack = []
|
||||
|
||||
|
|
|
|||
|
|
@ -16,19 +16,21 @@
|
|||
#
|
||||
from __future__ import absolute_import
|
||||
|
||||
try:
|
||||
# just a way to switch between web service and CLI/PI
|
||||
import google.appengine.api
|
||||
try: # just a way to switch between CLI and PI
|
||||
from calibre.constants import DEBUG
|
||||
if os.environ.get('CALIBRE_WORKER', None) is not None or DEBUG:
|
||||
loghandler.setLevel(logging.DEBUG)
|
||||
logger.setLevel(logging.DEBUG)
|
||||
else:
|
||||
loghandler.setLevel(logging.CRITICAL)
|
||||
logger.setLevel(logging.CRITICAL)
|
||||
except:
|
||||
try: # just a way to switch between CLI and PI
|
||||
import calibre.constants
|
||||
except:
|
||||
import sys
|
||||
if sys.version_info >= (2, 7):
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
loghandler=logging.StreamHandler()
|
||||
loghandler.setFormatter(logging.Formatter("FFF: %(levelname)s: %(asctime)s: %(filename)s(%(lineno)d): %(message)s"))
|
||||
logger.addHandler(loghandler)
|
||||
loghandler.setLevel(logging.DEBUG)
|
||||
logger.setLevel(logging.DEBUG)
|
||||
import sys
|
||||
if sys.version_info >= (2, 7):
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
loghandler=logging.StreamHandler()
|
||||
loghandler.setFormatter(logging.Formatter("FFF: %(levelname)s: %(asctime)s: %(filename)s(%(lineno)d): %(message)s"))
|
||||
logger.addHandler(loghandler)
|
||||
loghandler.setLevel(logging.DEBUG)
|
||||
logger.setLevel(logging.DEBUG)
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2011 Fanficdownloader team, 2018 FanFicFare team
|
||||
# Copyright 2011 Fanficdownloader team, 2020 FanFicFare team
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
|
|
@ -16,12 +16,11 @@
|
|||
#
|
||||
|
||||
from __future__ import absolute_import
|
||||
import os, re, sys, glob, types
|
||||
from os.path import dirname, basename, normpath
|
||||
import os, re, sys, types
|
||||
from contextlib import contextmanager
|
||||
import logging
|
||||
|
||||
# py2 vs py3 transition
|
||||
from ..six import text_type as unicode
|
||||
from ..six.moves.urllib.parse import urlparse
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
|
@ -31,16 +30,20 @@ from .. import configurable as configurable
|
|||
|
||||
## must import each adapter here.
|
||||
|
||||
from . import base_adapter
|
||||
from . import base_efiction_adapter
|
||||
from . import adapter_test1
|
||||
from . import adapter_test2
|
||||
from . import adapter_test3
|
||||
from . import adapter_test4
|
||||
from . import adapter_fanfictionnet
|
||||
from . import adapter_fanficcastletvnet
|
||||
from . import adapter_fictionalleyorg
|
||||
from . import adapter_fictionalleyarchiveorg
|
||||
from . import adapter_fictionpresscom
|
||||
from . import adapter_ficwadcom
|
||||
from . import adapter_fimfictionnet
|
||||
from . import adapter_mediaminerorg
|
||||
from . import adapter_potionsandsnitches
|
||||
from . import adapter_tenhawkpresents
|
||||
from . import adapter_adastrafanficcom
|
||||
from . import adapter_tthfanficorg
|
||||
from . import adapter_twilightednet
|
||||
|
|
@ -48,13 +51,9 @@ from . import adapter_whoficcom
|
|||
from . import adapter_siyecouk
|
||||
from . import adapter_archiveofourownorg
|
||||
from . import adapter_ficbooknet
|
||||
from . import adapter_nfacommunitycom
|
||||
from . import adapter_midnightwhispers
|
||||
from . import adapter_ksarchivecom
|
||||
from . import adapter_archiveskyehawkecom
|
||||
from . import adapter_squidgeorgpeja
|
||||
from . import adapter_libraryofmoriacom
|
||||
from . import adapter_wraithbaitcom
|
||||
from . import adapter_ashwindersycophanthexcom
|
||||
from . import adapter_chaossycophanthexcom
|
||||
from . import adapter_erosnsapphosycophanthexcom
|
||||
|
|
@ -63,51 +62,27 @@ from . import adapter_occlumencysycophanthexcom
|
|||
from . import adapter_phoenixsongnet
|
||||
from . import adapter_walkingtheplankorg
|
||||
from . import adapter_dokugacom
|
||||
from . import adapter_iketernalnet
|
||||
from . import adapter_storiesofardacom
|
||||
from . import adapter_destinysgatewaycom
|
||||
from . import adapter_ncisfictioncom
|
||||
from . import adapter_fanfiktionde
|
||||
from . import adapter_ponyfictionarchivenet
|
||||
from . import adapter_themasquenet
|
||||
from . import adapter_pretendercentrecom
|
||||
from . import adapter_darksolaceorg
|
||||
from . import adapter_finestoriescom
|
||||
from . import adapter_hpfanficarchivecom
|
||||
from . import adapter_twilightarchivescom
|
||||
from . import adapter_nhamagicalworldsus
|
||||
from . import adapter_hlfictionnet
|
||||
from . import adapter_storyroomcom
|
||||
from . import adapter_dracoandginnycom
|
||||
from . import adapter_scarvesandcoffeenet
|
||||
from . import adapter_thepetulantpoetesscom
|
||||
from . import adapter_wolverineandroguecom
|
||||
from . import adapter_merlinficdtwinscouk
|
||||
from . import adapter_thehookupzonenet
|
||||
from . import adapter_bloodtiesfancom
|
||||
from . import adapter_qafficcom
|
||||
from . import adapter_efpfanficnet
|
||||
from . import adapter_potterficscom
|
||||
from . import adapter_efictionestelielde
|
||||
from . import adapter_imagineeficcom
|
||||
from . import adapter_potterheadsanonymouscom
|
||||
from . import adapter_fictionpadcom
|
||||
from . import adapter_storiesonlinenet
|
||||
from . import adapter_trekiverseorg
|
||||
from . import adapter_literotica
|
||||
from . import adapter_voracity2eficcom
|
||||
from . import adapter_spikeluvercom
|
||||
from . import adapter_bloodshedversecom
|
||||
from . import adapter_fanfichu
|
||||
from . import adapter_fictionmaniatv
|
||||
from . import adapter_themaplebookshelf
|
||||
from . import adapter_fannation
|
||||
from . import adapter_sheppardweircom
|
||||
from . import adapter_samandjacknet
|
||||
from . import adapter_csiforensicscom
|
||||
from . import adapter_lotrfanfictioncom
|
||||
from . import adapter_fanfictionjunkiesde
|
||||
from . import adapter_tgstorytimecom
|
||||
from . import adapter_itcouldhappennet
|
||||
from . import adapter_forumsspacebattlescom
|
||||
from . import adapter_forumssufficientvelocitycom
|
||||
from . import adapter_forumquestionablequestingcom
|
||||
|
|
@ -115,10 +90,7 @@ from . import adapter_ninelivesarchivecom
|
|||
from . import adapter_masseffect2in
|
||||
from . import adapter_quotevcom
|
||||
from . import adapter_mcstoriescom
|
||||
from . import adapter_buffygilescom
|
||||
from . import adapter_andromedawebcom
|
||||
from . import adapter_naiceanilmenet
|
||||
from . import adapter_deepinmysoulnet
|
||||
from . import adapter_adultfanfictionorg
|
||||
from . import adapter_fictionhuntcom
|
||||
from . import adapter_royalroadcom
|
||||
|
|
@ -127,47 +99,48 @@ from . import adapter_bdsmlibrarycom
|
|||
from . import adapter_asexstoriescom
|
||||
from . import adapter_gluttonyfictioncom
|
||||
from . import adapter_valentchambercom
|
||||
from . import adapter_looselugscom
|
||||
from . import adapter_wwwgiantessworldnet
|
||||
from . import adapter_lotrgficcom
|
||||
from . import adapter_tomparisdormcom
|
||||
from . import adapter_sugarquillnet
|
||||
from . import adapter_starslibrarynet
|
||||
from . import adapter_fanficauthorsnet
|
||||
from . import adapter_fireflyfansnet
|
||||
from . import adapter_sebklainenet
|
||||
from . import adapter_shriftweborgbfa
|
||||
from . import adapter_trekfanfictionnet
|
||||
from . import adapter_wuxiaworldcom
|
||||
from . import adapter_wwwlushstoriescom
|
||||
from . import adapter_wwwutopiastoriescom
|
||||
from . import adapter_sinfuldreamscomunicornfic
|
||||
from . import adapter_sinfuldreamscomwhisperedmuse
|
||||
from . import adapter_sinfuldreamscomwickedtemptation
|
||||
from . import adapter_asianfanficscom
|
||||
from . import adapter_webnovelcom
|
||||
from . import adapter_deandamagecom
|
||||
from . import adapter_mttjustoncenet
|
||||
from . import adapter_narutoficorg
|
||||
from . import adapter_starskyhutcharchivenet
|
||||
from . import adapter_swordborderlineangelcom
|
||||
from . import adapter_tasteofpoisoninkubationnet
|
||||
from . import adapter_thedelphicexpansecom
|
||||
from . import adapter_thundercatsfansorg
|
||||
from . import adapter_www13hoursorg
|
||||
from . import adapter_wwwaneroticstorycom
|
||||
from . import adapter_gravitytalescom
|
||||
from . import adapter_lcfanficcom
|
||||
from . import adapter_noveltrovecom
|
||||
from . import adapter_inkbunnynet
|
||||
from . import adapter_alternatehistorycom
|
||||
from . import adapter_wattpadcom
|
||||
from . import adapter_novelonlinefullcom
|
||||
from . import adapter_wwwnovelallcom
|
||||
from . import adapter_wuxiaworldco
|
||||
from . import adapter_harrypotterfanfictioncom
|
||||
from . import adapter_hentaifoundrycom
|
||||
from . import adapter_mugglenetfanfictioncom
|
||||
from . import adapter_fanficsme
|
||||
from . import adapter_fanfictalkcom
|
||||
from . import adapter_scifistoriescom
|
||||
from . import adapter_chireadscom
|
||||
from . import adapter_scribblehubcom
|
||||
from . import adapter_fictionlive
|
||||
from . import adapter_thesietchcom
|
||||
from . import adapter_squidgeworldorg
|
||||
from . import adapter_novelfull
|
||||
from . import adapter_psychficcom
|
||||
from . import adapter_deviantartcom
|
||||
from . import adapter_readonlymindcom
|
||||
from . import adapter_wwwsunnydaleafterdarkcom
|
||||
from . import adapter_syosetucom
|
||||
from . import adapter_kakuyomujp
|
||||
from . import adapter_fanfictionsfr
|
||||
from . import adapter_touchfluffytail
|
||||
from . import adapter_spiritfanfictioncom
|
||||
from . import adapter_superlove
|
||||
from . import adapter_cfaa
|
||||
from . import adapter_althistorycom
|
||||
|
||||
## This bit of complexity allows adapters to be added by just adding
|
||||
## importing. It eliminates the long if/else clauses we used to need
|
||||
|
|
@ -206,6 +179,20 @@ def get_url_chapter_range(url_in):
|
|||
ch_end = ch_begin
|
||||
return url,ch_begin,ch_end
|
||||
|
||||
# Call as ' with busy_cursor:"
|
||||
@contextmanager
|
||||
def lightweight_adapter(url):
|
||||
adapter = None
|
||||
try:
|
||||
if not getNormalStoryURL.__dummyconfig:
|
||||
getNormalStoryURL.__dummyconfig = configurable.Configuration(["test1.com"],"EPUB",lightweight=True)
|
||||
adapter = getAdapter(getNormalStoryURL.__dummyconfig,url)
|
||||
yield adapter
|
||||
except:
|
||||
yield None
|
||||
finally:
|
||||
del adapter
|
||||
|
||||
def getNormalStoryURL(url):
|
||||
r = getNormalStoryURLSite(url)
|
||||
if r:
|
||||
|
|
@ -213,24 +200,45 @@ def getNormalStoryURL(url):
|
|||
else:
|
||||
return None
|
||||
|
||||
def getNormalStoryURLSite(url):
|
||||
# print("getNormalStoryURLSite:%s"%url)
|
||||
if not getNormalStoryURL.__dummyconfig:
|
||||
getNormalStoryURL.__dummyconfig = configurable.Configuration(["test1.com"],"EPUB",lightweight=True)
|
||||
# pulling up an adapter is pretty low over-head. If
|
||||
# it fails, it's a bad url.
|
||||
try:
|
||||
adapter = getAdapter(getNormalStoryURL.__dummyconfig,url)
|
||||
url = adapter.url
|
||||
site = adapter.getSiteDomain()
|
||||
del adapter
|
||||
return (url,site)
|
||||
except:
|
||||
return None
|
||||
|
||||
# kludgey function static/singleton
|
||||
# Note it's *not* on lightweight_adapter because it can't reference
|
||||
# itself in its definition.
|
||||
getNormalStoryURL.__dummyconfig = None
|
||||
|
||||
def getNormalStoryURLSite(url):
|
||||
with lightweight_adapter(url) as adapter:
|
||||
if adapter:
|
||||
return (adapter.url,adapter.getSiteDomain())
|
||||
else:
|
||||
return None
|
||||
|
||||
## Originally defined for INI [storyUrl] sections where story URL
|
||||
## contains a title that can change, now also used for reject list.
|
||||
## waaaay faster with classmethod.
|
||||
def get_section_url(url):
|
||||
cls = _get_class_for(url)[0]
|
||||
if cls:
|
||||
return cls.get_section_url(url)
|
||||
else:
|
||||
## might be a url from a removed adapter.
|
||||
## return unchanged in that case.
|
||||
return url
|
||||
|
||||
def get_url_search(url):
|
||||
'''
|
||||
For adapters that have story URLs that can change. This is
|
||||
used for searching the Calibre library by identifiers:url for
|
||||
sites (generally) that contain author or title that can
|
||||
change, but also have a unique identifier that doesn't.
|
||||
|
||||
returns a string containing a regexp, not a compiled re object.
|
||||
'''
|
||||
cls = _get_class_for(url)[0]
|
||||
if not cls:
|
||||
## still apply common processing.
|
||||
cls = base_adapter.BaseSiteAdapter
|
||||
return cls.get_url_search(url)
|
||||
|
||||
def getAdapter(config,url,anyurl=False):
|
||||
|
||||
#logger.debug("trying url:"+url)
|
||||
|
|
@ -258,8 +266,7 @@ def getConfigSections():
|
|||
def get_bulk_load_sites():
|
||||
# for now, all eFiction Base adapters are assumed to allow bulk_load.
|
||||
sections = set()
|
||||
for cls in filter( lambda x : issubclass(x,base_efiction_adapter.BaseEfictionAdapter),
|
||||
__class_list):
|
||||
for cls in [x for x in __class_list if issubclass(x,base_efiction_adapter.BaseEfictionAdapter) ]:
|
||||
sections.update( [ x.replace('www.','') for x in cls.getConfigSections() ] )
|
||||
return sections
|
||||
|
||||
|
|
|
|||
|
|
@ -15,219 +15,24 @@
|
|||
# limitations under the License.
|
||||
#
|
||||
|
||||
# Software: eFiction
|
||||
from __future__ import absolute_import
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import re
|
||||
|
||||
from ..htmlcleanup import stripHTML
|
||||
from .. import exceptions as exceptions
|
||||
|
||||
# py2 vs py3 transition
|
||||
from ..six import text_type as unicode
|
||||
from ..six import string_types as basestring
|
||||
from ..six.moves.urllib.error import HTTPError
|
||||
|
||||
from .base_adapter import BaseSiteAdapter, makeDate
|
||||
|
||||
class AdAstraFanficComSiteAdapter(BaseSiteAdapter):
|
||||
|
||||
def __init__(self, config, url):
|
||||
BaseSiteAdapter.__init__(self, config, url)
|
||||
self.story.setMetadata('siteabbrev','aaff')
|
||||
self.is_adult=False
|
||||
|
||||
# get storyId from url--url validation guarantees query is only sid=1234
|
||||
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
|
||||
|
||||
|
||||
# normalized story URL.
|
||||
self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId'))
|
||||
|
||||
|
||||
@staticmethod
|
||||
def getSiteDomain():
|
||||
return 'www.adastrafanfic.com'
|
||||
|
||||
@classmethod
|
||||
def getSiteExampleURLs(cls):
|
||||
return "http://"+cls.getSiteDomain()+"/viewstory.php?sid=1234"
|
||||
|
||||
def getSiteURLPattern(self):
|
||||
return re.escape("http://"+self.getSiteDomain()+"/viewstory.php?sid=")+r"\d+$"
|
||||
|
||||
def use_pagecache(self):
|
||||
'''
|
||||
adapters that will work with the page cache need to implement
|
||||
this and change it to True.
|
||||
'''
|
||||
return True
|
||||
|
||||
def extractChapterUrlsAndMetadata(self):
|
||||
|
||||
if self.is_adult or self.getConfig("is_adult"):
|
||||
addurl = "&warning=5"
|
||||
else:
|
||||
addurl=""
|
||||
|
||||
url = self.url+'&index=1'+addurl
|
||||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
except HTTPError as e:
|
||||
if e.code == 404:
|
||||
raise exceptions.StoryDoesNotExist(self.url)
|
||||
else:
|
||||
raise e
|
||||
|
||||
if "Content is only suitable for mature adults. May contain explicit language and adult themes. Equivalent of NC-17." in data:
|
||||
raise exceptions.AdultCheckRequired(self.url)
|
||||
|
||||
# problems with some stories, but only in calibre. I suspect
|
||||
# issues with different SGML parsers in python. This is a
|
||||
# nasty hack, but it works.
|
||||
data = data[data.index("<body"):]
|
||||
|
||||
# use BeautifulSoup HTML parser to make everything easier to find.
|
||||
soup = self.make_soup(data)
|
||||
|
||||
## Title
|
||||
a = soup.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$"))
|
||||
self.story.setMetadata('title',stripHTML(a))
|
||||
|
||||
# Find authorid and URL from... author url.
|
||||
a = soup.find('a', href=re.compile(r"viewuser.php"))
|
||||
self.story.setMetadata('authorId',a['href'].split('=')[1])
|
||||
self.story.setMetadata('authorUrl','http://'+self.host+'/'+a['href'])
|
||||
self.story.setMetadata('author',a.string)
|
||||
|
||||
# Find the chapters:
|
||||
for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"&chapter=\d+$")):
|
||||
# just in case there's tags, like <i> in chapter titles.
|
||||
self.add_chapter(chapter,'http://'+self.host+'/'+chapter['href']+addurl)
|
||||
|
||||
|
||||
## <meta name='description' content='<p>Description</p> ...' >
|
||||
## Summary, strangely, is in the content attr of a <meta name='description'> tag
|
||||
## which is escaped HTML. Unfortunately, we can't use it because they don't
|
||||
## escape (') chars in the desc, breakin the tag.
|
||||
#meta_desc = soup.find('meta',{'name':'description'})
|
||||
#metasoup = bs.BeautifulStoneSoup(meta_desc['content'])
|
||||
#self.story.setMetadata('description',stripHTML(metasoup))
|
||||
|
||||
def defaultGetattr(d,k):
|
||||
try:
|
||||
return d[k]
|
||||
except:
|
||||
return ""
|
||||
|
||||
# <span class="label">Rated:</span> NC-17<br /> etc
|
||||
labels = soup.findAll('span',{'class':'label'})
|
||||
for labelspan in labels:
|
||||
value = labelspan.nextSibling
|
||||
label = labelspan.string
|
||||
|
||||
if 'Summary' in label:
|
||||
## Everything until the next span class='label'
|
||||
svalue = ''
|
||||
while value and 'label' not in defaultGetattr(value,'class'):
|
||||
svalue += unicode(value)
|
||||
value = value.nextSibling
|
||||
# sometimes poorly formated desc (<p> w/o </p>) leads
|
||||
# to all labels being included.
|
||||
svalue=svalue[:svalue.find('<span class="label">')]
|
||||
self.setDescription(url,svalue)
|
||||
#self.story.setMetadata('description',stripHTML(svalue))
|
||||
|
||||
if 'Rated' in label:
|
||||
self.story.setMetadata('rating', value)
|
||||
|
||||
if 'Word count' in label:
|
||||
self.story.setMetadata('numWords', value)
|
||||
|
||||
if 'Categories' in label:
|
||||
cats = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=categories'))
|
||||
catstext = [cat.string for cat in cats]
|
||||
for cat in catstext:
|
||||
self.story.addToList('category',cat.string)
|
||||
|
||||
if 'Characters' in label:
|
||||
chars = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=characters'))
|
||||
charstext = [char.string for char in chars]
|
||||
for char in charstext:
|
||||
self.story.addToList('characters',char.string)
|
||||
|
||||
if 'Genre' in label:
|
||||
genres = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=1'))
|
||||
genrestext = [genre.string for genre in genres]
|
||||
self.genre = ', '.join(genrestext)
|
||||
for genre in genrestext:
|
||||
self.story.addToList('genre',genre.string)
|
||||
|
||||
if 'Warnings' in label:
|
||||
warnings = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=2'))
|
||||
warningstext = [warning.string for warning in warnings]
|
||||
self.warning = ', '.join(warningstext)
|
||||
for warning in warningstext:
|
||||
self.story.addToList('warnings',warning.string)
|
||||
|
||||
if 'Completed' in label:
|
||||
if 'Yes' in value:
|
||||
self.story.setMetadata('status', 'Completed')
|
||||
else:
|
||||
self.story.setMetadata('status', 'In-Progress')
|
||||
|
||||
if 'Published' in label:
|
||||
self.story.setMetadata('datePublished', makeDate(value.strip(), "%d %b %Y"))
|
||||
|
||||
if 'Updated' in label:
|
||||
# there's a stray [ at the end.
|
||||
#value = value[0:-1]
|
||||
self.story.setMetadata('dateUpdated', makeDate(value.strip(), "%d %b %Y"))
|
||||
|
||||
try:
|
||||
# Find Series name from series URL.
|
||||
a = soup.find('a', href=re.compile(r"viewseries.php\?seriesid=\d+"))
|
||||
series_name = a.string
|
||||
series_url = 'http://'+self.host+'/'+a['href']
|
||||
|
||||
# use BeautifulSoup HTML parser to make everything easier to find.
|
||||
seriessoup = self_make_soup(self._fetchUrl(series_url))
|
||||
storyas = seriessoup.findAll('a', href=re.compile(r'^viewstory.php\?sid=\d+$'))
|
||||
i=1
|
||||
for a in storyas:
|
||||
if a['href'] == ('viewstory.php?sid='+self.story.getMetadata('storyId')):
|
||||
self.setSeries(series_name, i)
|
||||
self.story.setMetadata('seriesUrl',series_url)
|
||||
break
|
||||
i+=1
|
||||
|
||||
except:
|
||||
# I find it hard to care if the series parsing fails
|
||||
pass
|
||||
|
||||
|
||||
def getChapterText(self, url):
|
||||
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
data = self._fetchUrl(url)
|
||||
# problems with some stories, but only in calibre. I suspect
|
||||
# issues with different SGML parsers in python. This is a
|
||||
# nasty hack, but it works.
|
||||
data = data[data.index("<body"):]
|
||||
|
||||
soup = self.make_soup(data)
|
||||
|
||||
span = soup.find('div', {'id' : 'story'})
|
||||
|
||||
if None == span:
|
||||
raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url)
|
||||
|
||||
return self.utf8FromSoup(url,span)
|
||||
from .base_otw_adapter import BaseOTWAdapter
|
||||
|
||||
def getClass():
|
||||
return AdAstraFanficComSiteAdapter
|
||||
return AdastrafanficComAdapter
|
||||
|
||||
class AdastrafanficComAdapter(BaseOTWAdapter):
|
||||
|
||||
def __init__(self, config, url):
|
||||
BaseOTWAdapter.__init__(self, config, url)
|
||||
|
||||
# Each adapter needs to have a unique site abbreviation.
|
||||
self.story.setMetadata('siteabbrev','aaff')
|
||||
|
||||
@staticmethod # must be @staticmethod, don't remove it.
|
||||
def getSiteDomain():
|
||||
# The site domain. Does have www here, if it uses it.
|
||||
return 'www.adastrafanfic.com'
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
# -- coding: utf-8 --
|
||||
# Copyright 2013 Fanficdownloader team, 2018 FanFicFare team
|
||||
# Copyright 2013 Fanficdownloader team, 2020 FanFicFare team
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
|
|
@ -22,15 +22,12 @@ from __future__ import unicode_literals
|
|||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import re
|
||||
import sys
|
||||
from bs4 import UnicodeDammit
|
||||
|
||||
from ..htmlcleanup import stripHTML
|
||||
from .. import exceptions as exceptions
|
||||
|
||||
# py2 vs py3 transition
|
||||
from ..six import text_type as unicode
|
||||
from ..six.moves.urllib.error import HTTPError
|
||||
|
||||
from .base_adapter import BaseSiteAdapter, makeDate
|
||||
|
||||
|
|
@ -60,8 +57,8 @@ class AdultFanFictionOrgAdapter(BaseSiteAdapter):
|
|||
# normalized story URL.(checking self.zone against list
|
||||
# removed--it was redundant w/getAcceptDomains and
|
||||
# getSiteURLPattern both)
|
||||
self._setURL('http://{0}.{1}/story.php?no={2}'.format(self.zone, self.getBaseDomain(), self.story.getMetadata('storyId')))
|
||||
#self._setURL('http://' + self.zone + '.' + self.getBaseDomain() + '/story.php?no='+self.story.getMetadata('storyId'))
|
||||
self._setURL('https://{0}.{1}/story.php?no={2}'.format(self.zone, self.getBaseDomain(), self.story.getMetadata('storyId')))
|
||||
#self._setURL('https://' + self.zone + '.' + self.getBaseDomain() + '/story.php?no='+self.story.getMetadata('storyId'))
|
||||
|
||||
# Each adapter needs to have a unique site abbreviation.
|
||||
#self.story.setMetadata('siteabbrev',self.getSiteAbbrev())
|
||||
|
|
@ -71,9 +68,7 @@ class AdultFanFictionOrgAdapter(BaseSiteAdapter):
|
|||
|
||||
# The date format will vary from site to site.
|
||||
# http://docs.python.org/library/datetime.html#strftime-strptime-behavior
|
||||
self.dateformat = "%Y-%m-%d"
|
||||
|
||||
|
||||
self.dateformat = "%B %d, %Y"
|
||||
|
||||
## Added because adult-fanfiction.org does send you to
|
||||
## www.adult-fanfiction.org when you go to it and it also moves
|
||||
|
|
@ -116,79 +111,31 @@ class AdultFanFictionOrgAdapter(BaseSiteAdapter):
|
|||
|
||||
@classmethod
|
||||
def getSiteExampleURLs(self):
|
||||
return ("http://anime.adult-fanfiction.org/story.php?no=123456789 "
|
||||
+ "http://anime2.adult-fanfiction.org/story.php?no=123456789 "
|
||||
+ "http://bleach.adult-fanfiction.org/story.php?no=123456789 "
|
||||
+ "http://books.adult-fanfiction.org/story.php?no=123456789 "
|
||||
+ "http://buffy.adult-fanfiction.org/story.php?no=123456789 "
|
||||
+ "http://cartoon.adult-fanfiction.org/story.php?no=123456789 "
|
||||
+ "http://celeb.adult-fanfiction.org/story.php?no=123456789 "
|
||||
+ "http://comics.adult-fanfiction.org/story.php?no=123456789 "
|
||||
+ "http://ff.adult-fanfiction.org/story.php?no=123456789 "
|
||||
+ "http://games.adult-fanfiction.org/story.php?no=123456789 "
|
||||
+ "http://hp.adult-fanfiction.org/story.php?no=123456789 "
|
||||
+ "http://inu.adult-fanfiction.org/story.php?no=123456789 "
|
||||
+ "http://lotr.adult-fanfiction.org/story.php?no=123456789 "
|
||||
+ "http://manga.adult-fanfiction.org/story.php?no=123456789 "
|
||||
+ "http://movies.adult-fanfiction.org/story.php?no=123456789 "
|
||||
+ "http://naruto.adult-fanfiction.org/story.php?no=123456789 "
|
||||
+ "http://ne.adult-fanfiction.org/story.php?no=123456789 "
|
||||
+ "http://original.adult-fanfiction.org/story.php?no=123456789 "
|
||||
+ "http://tv.adult-fanfiction.org/story.php?no=123456789 "
|
||||
+ "http://xmen.adult-fanfiction.org/story.php?no=123456789 "
|
||||
+ "http://ygo.adult-fanfiction.org/story.php?no=123456789 "
|
||||
+ "http://yuyu.adult-fanfiction.org/story.php?no=123456789")
|
||||
return ("https://anime.adult-fanfiction.org/story.php?no=123456789 "
|
||||
+ "https://anime2.adult-fanfiction.org/story.php?no=123456789 "
|
||||
+ "https://bleach.adult-fanfiction.org/story.php?no=123456789 "
|
||||
+ "https://books.adult-fanfiction.org/story.php?no=123456789 "
|
||||
+ "https://buffy.adult-fanfiction.org/story.php?no=123456789 "
|
||||
+ "https://cartoon.adult-fanfiction.org/story.php?no=123456789 "
|
||||
+ "https://celeb.adult-fanfiction.org/story.php?no=123456789 "
|
||||
+ "https://comics.adult-fanfiction.org/story.php?no=123456789 "
|
||||
+ "https://ff.adult-fanfiction.org/story.php?no=123456789 "
|
||||
+ "https://games.adult-fanfiction.org/story.php?no=123456789 "
|
||||
+ "https://hp.adult-fanfiction.org/story.php?no=123456789 "
|
||||
+ "https://inu.adult-fanfiction.org/story.php?no=123456789 "
|
||||
+ "https://lotr.adult-fanfiction.org/story.php?no=123456789 "
|
||||
+ "https://manga.adult-fanfiction.org/story.php?no=123456789 "
|
||||
+ "https://movies.adult-fanfiction.org/story.php?no=123456789 "
|
||||
+ "https://naruto.adult-fanfiction.org/story.php?no=123456789 "
|
||||
+ "https://ne.adult-fanfiction.org/story.php?no=123456789 "
|
||||
+ "https://original.adult-fanfiction.org/story.php?no=123456789 "
|
||||
+ "https://tv.adult-fanfiction.org/story.php?no=123456789 "
|
||||
+ "https://xmen.adult-fanfiction.org/story.php?no=123456789 "
|
||||
+ "https://ygo.adult-fanfiction.org/story.php?no=123456789 "
|
||||
+ "https://yuyu.adult-fanfiction.org/story.php?no=123456789")
|
||||
|
||||
def getSiteURLPattern(self):
|
||||
return r'http?://(anime|anime2|bleach|books|buffy|cartoon|celeb|comics|ff|games|hp|inu|lotr|manga|movies|naruto|ne|original|tv|xmen|ygo|yuyu)\.adult-fanfiction\.org/story\.php\?no=\d+$'
|
||||
|
||||
##This is not working right now, so I'm commenting it out, but leaving it for future testing
|
||||
## Login seems to be reasonably standard across eFiction sites.
|
||||
#def needToLoginCheck(self, data):
|
||||
##This adapter will always require a login
|
||||
# return True
|
||||
|
||||
# <form name="login" method="post" action="">
|
||||
# <div class="top">E-mail: <span id="sprytextfield1">
|
||||
# <input name="email" type="text" id="email" size="20" maxlength="255" />
|
||||
# <span class="textfieldRequiredMsg">Email is required.</span><span class="textfieldInvalidFormatMsg">Invalid E-mail.</span></span></div>
|
||||
# <div class="top">Password: <span id="sprytextfield2">
|
||||
# <input name="pass1" type="password" id="pass1" size="20" maxlength="32" />
|
||||
# <span class="textfieldRequiredMsg">password is required.</span><span class="textfieldMinCharsMsg">Minimum 8 characters8.</span><span class="textfieldMaxCharsMsg">Exceeded 32 characters.</span></span></div>
|
||||
# <div class="top"><br /> <input name="loginsubmittop" type="hidden" id="loginsubmit" value="TRUE" />
|
||||
# <input type="submit" value="Login" />
|
||||
# </div>
|
||||
# </form>
|
||||
|
||||
|
||||
##This is not working right now, so I'm commenting it out, but leaving it for future testing
|
||||
#def performLogin(self, url, soup):
|
||||
# params = {}
|
||||
|
||||
# if self.password:
|
||||
# params['email'] = self.username
|
||||
# params['pass1'] = self.password
|
||||
# else:
|
||||
# params['email'] = self.getConfig("username")
|
||||
# params['pass1'] = self.getConfig("password")
|
||||
# params['submit'] = 'Login'
|
||||
|
||||
# # copy all hidden input tags to pick up appropriate tokens.
|
||||
# for tag in soup.findAll('input',{'type':'hidden'}):
|
||||
# params[tag['name']] = tag['value']
|
||||
|
||||
# logger.debug("Will now login to URL {0} as {1} with password: {2}".format(url, params['email'],params['pass1']))
|
||||
|
||||
# d = self._postUrl(url, params, usecache=False)
|
||||
# d = self._fetchUrl(url, params, usecache=False)
|
||||
# soup = self.make_soup(d)
|
||||
|
||||
#if not (soup.find('form', {'name' : 'login'}) == None):
|
||||
# logger.info("Failed to login to URL %s as %s" % (url, params['email']))
|
||||
# raise exceptions.FailedToLogin(url,params['email'])
|
||||
# return False
|
||||
#else:
|
||||
# return True
|
||||
return r'https?://(anime|anime2|bleach|books|buffy|cartoon|celeb|comics|ff|games|hp|inu|lotr|manga|movies|naruto|ne|original|tv|xmen|ygo|yuyu)\.adult-fanfiction\.org/story\.php\?no=\d+$'
|
||||
|
||||
## Getting the chapter list and the meta data, plus 'is adult' checking.
|
||||
def doExtractChapterUrlsAndMetadata(self, get_cover=True):
|
||||
|
|
@ -196,211 +143,109 @@ class AdultFanFictionOrgAdapter(BaseSiteAdapter):
|
|||
## You need to have your is_adult set to true to get this story
|
||||
if not (self.is_adult or self.getConfig("is_adult")):
|
||||
raise exceptions.AdultCheckRequired(self.url)
|
||||
else:
|
||||
d = self.post_request('https://www.adult-fanfiction.org/globals/ajax/age-verify.php', {"verify":"1"})
|
||||
if "Age verified successfully" not in d:
|
||||
raise exceptions.FailedToDownload("Failed to Verify Age: {0}".format(d))
|
||||
|
||||
url = self.url
|
||||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
except HTTPError as e:
|
||||
if e.code == 404:
|
||||
raise exceptions.StoryDoesNotExist("Code: 404. {0}".format(url))
|
||||
elif e.code == 410:
|
||||
raise exceptions.StoryDoesNotExist("Code: 410. {0}".format(url))
|
||||
elif e.code == 401:
|
||||
self.needToLogin = True
|
||||
data = ''
|
||||
else:
|
||||
raise e
|
||||
data = self.get_request(url)
|
||||
# logger.debug(data)
|
||||
|
||||
if "The dragons running the back end of the site can not seem to find the story you are looking for." in data:
|
||||
raise exceptions.StoryDoesNotExist("{0}.{1} says: The dragons running the back end of the site can not seem to find the story you are looking for.".format(self.zone, self.getBaseDomain()))
|
||||
|
||||
# use BeautifulSoup HTML parser to make everything easier to find.
|
||||
soup = self.make_soup(data)
|
||||
|
||||
##This is not working right now, so I'm commenting it out, but leaving it for future testing
|
||||
#self.performLogin(url, soup)
|
||||
|
||||
# Now go hunting for all the meta data and the chapter list.
|
||||
soup = self.make_soup(data)
|
||||
|
||||
## Title
|
||||
## Some of the titles have a backslash on the story page, but not on the Author's page
|
||||
## So I am removing it from the title, so it can be found on the Author's page further in the code.
|
||||
## Also, some titles may have extra spaces ' ', and the search on the Author's page removes them,
|
||||
## so I have to here as well. I used multiple replaces to make sure, since I did the same below.
|
||||
a = soup.find('a', href=re.compile(r'story.php\?no='+self.story.getMetadata('storyId')+"$"))
|
||||
self.story.setMetadata('title',stripHTML(a).replace('\\','').replace(' ',' ').replace(' ',' ').replace(' ',' ').strip())
|
||||
h1 = soup.find('h1')
|
||||
# logger.debug("Title:%s"%h1)
|
||||
self.story.setMetadata('title',stripHTML(h1).replace('\\','').replace(' ',' ').replace(' ',' ').replace(' ',' ').strip())
|
||||
|
||||
# Find the chapters from first list only
|
||||
chapters = soup.select_one('select.chapter-select').select('option')
|
||||
for chapter in chapters:
|
||||
self.add_chapter(chapter,self.url+'&chapter='+chapter['value'])
|
||||
|
||||
# Find the chapters:
|
||||
chapters = soup.find('div',{'class':'dropdown-content'})
|
||||
for i, chapter in enumerate(chapters.findAll('a')):
|
||||
self.add_chapter(chapter,self.url+'&chapter='+unicode(i+1))
|
||||
|
||||
|
||||
# Find authorid and URL from... author url.
|
||||
a = soup.find('a', href=re.compile(r"profile.php\?no=\d+"))
|
||||
a = soup.find('a', href=re.compile(r"profile.php\?id=\d+"))
|
||||
if a == None:
|
||||
# I know that the original author of fanficfare wants to always have metadata,
|
||||
# I know that the original author of fanficfare wants to always have metadata,
|
||||
# but I posit that if the story is there, even if we can't get the metadata from the
|
||||
# author page, the story should still be able to be downloaded, which is what I've done here.
|
||||
self.story.setMetadata('authorId','000000000')
|
||||
self.story.setMetadata('authorUrl','http://www.adult-fanfiction.org')
|
||||
self.story.setMetadata('authorUrl','https://www.adult-fanfiction.org')
|
||||
self.story.setMetadata('author','Unknown')
|
||||
logger.warning('There was no author found for the story... Metadata will not be retreived.')
|
||||
self.setDescription(url,'>>>>>>>>>> No Summary Given <<<<<<<<<<')
|
||||
self.setDescription(url,'>>>>>>>>>> No Summary Given, Unknown Author <<<<<<<<<<')
|
||||
else:
|
||||
self.story.setMetadata('authorId',a['href'].split('=')[1])
|
||||
self.story.setMetadata('authorUrl',a['href'])
|
||||
self.story.setMetadata('author',stripHTML(a))
|
||||
|
||||
##The story page does not give much Metadata, so we go to the Author's page
|
||||
|
||||
##Get the first Author page to see if there are multiple pages.
|
||||
##AFF doesn't care if the page number is larger than the actual pages,
|
||||
##it will continue to show the last page even if the variable is larger than the actual page
|
||||
author_Url = '{0}&view=story&zone={1}&page=1'.format(self.story.getMetadata('authorUrl'), self.zone)
|
||||
#author_Url = self.story.getMetadata('authorUrl')+'&view=story&zone='+self.zone+'&page=1'
|
||||
|
||||
##I'm resetting the author page to the zone for this story
|
||||
self.story.setMetadata('authorUrl',author_Url)
|
||||
|
||||
logger.debug('Getting the author page: {0}'.format(author_Url))
|
||||
try:
|
||||
adata = self._fetchUrl(author_Url)
|
||||
except HTTPError as e:
|
||||
if e.code in 404:
|
||||
raise exceptions.StoryDoesNotExist("Author Page: Code: 404. {0}".format(author_Url))
|
||||
elif e.code == 410:
|
||||
raise exceptions.StoryDoesNotExist("Author Page: Code: 410. {0}".format(author_Url))
|
||||
else:
|
||||
raise e
|
||||
|
||||
if "The member you are looking for does not exist." in adata:
|
||||
raise exceptions.StoryDoesNotExist("{0}.{1} says: The member you are looking for does not exist.".format(self.zone, self.getBaseDomain()))
|
||||
#raise exceptions.StoryDoesNotExist(self.zone+'.'+self.getBaseDomain() +" says: The member you are looking for does not exist.")
|
||||
|
||||
## The story page does not give much Metadata, so we go to
|
||||
## the Author's page. Except it's actually a sub-req for
|
||||
## list of author's stories for that subdomain
|
||||
author_Url = 'https://members.{0}/load-user-stories.php?subdomain={1}&uid={2}'.format(
|
||||
self.getBaseDomain(),
|
||||
self.zone,
|
||||
self.story.getMetadata('authorId'))
|
||||
|
||||
logger.debug('Getting the load-user-stories page: {0}'.format(author_Url))
|
||||
adata = self.get_request(author_Url)
|
||||
|
||||
none_found = "No stories found in this category."
|
||||
if none_found in adata:
|
||||
raise exceptions.StoryDoesNotExist("{0}.{1} says: {2}".format(self.zone, self.getBaseDomain(), none_found))
|
||||
|
||||
asoup = self.make_soup(adata)
|
||||
|
||||
##Getting the number of pages
|
||||
pages=asoup.find('div',{'class' : 'pagination'}).findAll('li')[-1].find('a')
|
||||
if not pages == None:
|
||||
pages = pages['href'].split('=')[-1]
|
||||
else:
|
||||
pages = 0
|
||||
|
||||
##If there is only 1 page of stories, check it to get the Metadata,
|
||||
if pages == 0:
|
||||
a = asoup.findAll('li')
|
||||
for lc2 in a:
|
||||
if lc2.find('a', href=re.compile(r'story.php\?no='+self.story.getMetadata('storyId')+"$")):
|
||||
break
|
||||
## otherwise go through the pages
|
||||
else:
|
||||
page=1
|
||||
i=0
|
||||
while i == 0:
|
||||
##We already have the first page, so if this is the first time through, skip getting the page
|
||||
if page != 1:
|
||||
author_Url = '{0}&view=story&zone={1}&page={2}'.format(self.story.getMetadata('authorUrl'), self.zone, unicode(page))
|
||||
logger.debug('Getting the author page: {0}'.format(author_Url))
|
||||
try:
|
||||
adata = self._fetchUrl(author_Url)
|
||||
except HTTPError as e:
|
||||
if e.code in 404:
|
||||
raise exceptions.StoryDoesNotExist("Author Page: Code: 404. {0}".format(author_Url))
|
||||
elif e.code == 410:
|
||||
raise exceptions.StoryDoesNotExist("Author Page: Code: 410. {0}".format(author_Url))
|
||||
else:
|
||||
raise e
|
||||
##This will probably never be needed, since AFF doesn't seem to care what number you put as
|
||||
## the page number, it will default to the last page, even if you use 1000, for an author
|
||||
## that only hase 5 pages of stories, but I'm keeping it in to appease Saint Justin Case (just in case).
|
||||
if "The member you are looking for does not exist." in adata:
|
||||
raise exceptions.StoryDoesNotExist("{0}.{1} says: The member you are looking for does not exist.".format(self.zone, self.getBaseDomain()))
|
||||
# we look for the li element that has the story here
|
||||
asoup = self.make_soup(adata)
|
||||
|
||||
a = asoup.findAll('li')
|
||||
for lc2 in a:
|
||||
if lc2.find('a', href=re.compile(r'story.php\?no='+self.story.getMetadata('storyId')+"$")):
|
||||
i=1
|
||||
break
|
||||
page = page + 1
|
||||
if page > int(pages):
|
||||
break
|
||||
|
||||
##Split the Metadata up into a list
|
||||
##We have to change the soup type to a string, then remove the newlines, and double spaces,
|
||||
##then changes the <br/> to '-:-', which seperates the different elemeents.
|
||||
##Then we strip the HTML elements from the string.
|
||||
##There is also a double <br/>, so we have to fix that, then remove the leading and trailing '-:-'.
|
||||
##They are always in the same order.
|
||||
## EDIT 09/26/2016: Had some trouble with unicode errors... so I had to put in the decode/encode parts to fix it
|
||||
liMetadata = unicode(lc2).replace('\n','').replace('\r','').replace('\t',' ').replace(' ',' ').replace(' ',' ').replace(' ',' ')
|
||||
liMetadata = stripHTML(liMetadata.replace(r'<br/>','-:-').replace('<!-- <br /-->','-:-'))
|
||||
liMetadata = liMetadata.strip('-:-').strip('-:-').encode('utf-8')
|
||||
for i, value in enumerate(liMetadata.decode('utf-8').split('-:-')):
|
||||
if i == 0:
|
||||
# The value for the title has been manipulated, so may not be the same as gotten at the start.
|
||||
# I'm going to use the href from the lc2 retrieved from the author's page to determine if it is correct.
|
||||
if lc2.find('a', href=re.compile(r'story.php\?no='+self.story.getMetadata('storyId')+"$"))['href'] != url:
|
||||
raise exceptions.StoryDoesNotExist('Did not find story in author story list: {0}'.format(author_Url))
|
||||
elif i == 1:
|
||||
##Get the description
|
||||
self.setDescription(url,stripHTML(value.strip()))
|
||||
else:
|
||||
# the rest of the values can be missing, so instead of hardcoding the numbers, we search for them.
|
||||
if 'Located :' in value:
|
||||
self.story.setMetadata('category',value.replace(r'>',r'>').replace(r'Located :',r'').strip())
|
||||
elif 'Category :' in value:
|
||||
# Get the Category
|
||||
self.story.setMetadata('category',value.replace(r'>',r'>').replace(r'Located :',r'').strip())
|
||||
elif 'Content Tags :' in value:
|
||||
# Get the Erotic Tags
|
||||
value = stripHTML(value.replace(r'Content Tags :',r'')).strip()
|
||||
for code in re.split(r'\s',value):
|
||||
self.story.addToList('eroticatags',code)
|
||||
elif 'Posted :' in value:
|
||||
# Get the Posted Date
|
||||
value = value.replace(r'Posted :',r'').strip()
|
||||
if value.startswith('008'):
|
||||
# It is unknown how the 200 became 008, but I'm going to change it back here
|
||||
value = value.replace('008','200')
|
||||
elif value.startswith('0000'):
|
||||
# Since the date is showing as 0000,
|
||||
# I'm going to put the memberdate here
|
||||
value = asoup.find('div',{'id':'contentdata'}).find('p').get_text(strip=True).replace('Member Since','').strip()
|
||||
self.story.setMetadata('datePublished', makeDate(stripHTML(value), self.dateformat))
|
||||
elif 'Edited :' in value:
|
||||
# Get the 'Updated' Edited date
|
||||
# AFF has the time for the Updated date, and we only want the date,
|
||||
# so we take the first 10 characters only
|
||||
value = value.replace(r'Edited :',r'').strip()[0:10]
|
||||
if value.startswith('008'):
|
||||
# It is unknown how the 200 became 008, but I'm going to change it back here
|
||||
value = value.replace('008','200')
|
||||
self.story.setMetadata('dateUpdated', makeDate(stripHTML(value), self.dateformat))
|
||||
elif value.startswith('0000') or '-00-' in value:
|
||||
# Since the date is showing as 0000,
|
||||
# or there is -00- in the date,
|
||||
# I'm going to put the Published date here
|
||||
self.story.setMetadata('dateUpdated', self.story.getMetadata('datPublished'))
|
||||
else:
|
||||
self.story.setMetadata('dateUpdated', makeDate(stripHTML(value), self.dateformat))
|
||||
else:
|
||||
# This catches the blank elements, and the Review and Dragon Prints.
|
||||
# I am not interested in these, so do nothing
|
||||
zzzzzzz=0
|
||||
|
||||
# logger.debug(asoup)
|
||||
|
||||
story_card = asoup.select_one('div.story-card:has(a[href="{0}"])'.format(url))
|
||||
# logger.debug(story_card)
|
||||
|
||||
## Category
|
||||
## I've only seen one category per story so far, but just in case:
|
||||
for cat in story_card.select('div.story-card-category'):
|
||||
# remove Category:, old code suggests Located: is also
|
||||
# possible, so removing by <strong>
|
||||
cat.find("strong").decompose()
|
||||
self.story.addToList('category',stripHTML(cat))
|
||||
|
||||
self.setDescription(url,story_card.select_one('div.story-card-description'))
|
||||
|
||||
for tag in story_card.select('span.story-tag'):
|
||||
self.story.addToList('eroticatags',stripHTML(tag))
|
||||
|
||||
## created/updates share formatting
|
||||
for meta in story_card.select('div.story-card-meta-item span:last-child'):
|
||||
meta = stripHTML(meta)
|
||||
if 'Created: ' in meta:
|
||||
meta = meta.replace('Created: ','')
|
||||
self.story.setMetadata('datePublished', makeDate(meta, self.dateformat))
|
||||
|
||||
if 'Updated: ' in meta:
|
||||
meta = meta.replace('Updated: ','')
|
||||
self.story.setMetadata('dateUpdated', makeDate(meta, self.dateformat))
|
||||
|
||||
# grab the text for an individual chapter.
|
||||
def getChapterText(self, url):
|
||||
#Since each chapter is on 1 page, we don't need to do anything special, just get the content of the page.
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
soup = self.make_soup(self._fetchUrl(url))
|
||||
chaptertag = soup.find('div',{'class' : 'pagination'}).parent.findNext('td')
|
||||
soup = self.make_soup(self.get_request(url))
|
||||
chaptertag = soup.select_one('div.chapter-body')
|
||||
if None == chaptertag:
|
||||
raise exceptions.FailedToDownload("Error downloading Chapter: {0}! Missing required element!".format(url))
|
||||
## chapter text includes a copy of story title, author,
|
||||
## chapter title, & eroticatags specific to the chapter. Did
|
||||
## before, too.
|
||||
|
||||
return self.utf8FromSoup(url,chaptertag)
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2018 FanFicFare team
|
||||
# Copyright 2020 FanFicFare team
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
|
|
@ -16,15 +16,18 @@
|
|||
#
|
||||
|
||||
from __future__ import absolute_import
|
||||
from .adapter_forumquestionablequestingcom import QuestionablequestingComAdapter
|
||||
from .base_xenforo2forum_adapter import BaseXenForo2ForumAdapter
|
||||
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
def getClass():
|
||||
return WWWAlternatehistoryComAdapter
|
||||
|
||||
class WWWAlternatehistoryComAdapter(QuestionablequestingComAdapter):
|
||||
class WWWAlternatehistoryComAdapter(BaseXenForo2ForumAdapter):
|
||||
|
||||
def __init__(self, config, url):
|
||||
QuestionablequestingComAdapter.__init__(self, config, url)
|
||||
BaseXenForo2ForumAdapter.__init__(self, config, url)
|
||||
|
||||
# Each adapter needs to have a unique site abbreviation.
|
||||
self.story.setMetadata('siteabbrev','ah')
|
||||
|
|
@ -35,7 +38,9 @@ class WWWAlternatehistoryComAdapter(QuestionablequestingComAdapter):
|
|||
return 'www.alternatehistory.com'
|
||||
|
||||
@classmethod
|
||||
def getURLPrefix(cls):
|
||||
def getPathPrefix(cls):
|
||||
# in case it needs more than just site/
|
||||
return 'https://' + cls.getSiteDomain() + '/forum'
|
||||
return '/forum/'
|
||||
|
||||
def get_post_created_date(self,souptag):
|
||||
return self.make_date(souptag.find('div', {'class':'message-inner'}))
|
||||
|
|
|
|||
40
fanficfare/adapters/adapter_althistorycom.py
Normal file
40
fanficfare/adapters/adapter_althistorycom.py
Normal file
|
|
@ -0,0 +1,40 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2026 FanFicFare team
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
from __future__ import absolute_import
|
||||
import re
|
||||
|
||||
from .base_xenforo2forum_adapter import BaseXenForo2ForumAdapter
|
||||
|
||||
def getClass():
|
||||
return AltHistoryComAdapter
|
||||
|
||||
## NOTE: This is a different site than www.alternatehistory.com.
|
||||
|
||||
class AltHistoryComAdapter(BaseXenForo2ForumAdapter):
|
||||
|
||||
def __init__(self, config, url):
|
||||
BaseXenForo2ForumAdapter.__init__(self, config, url)
|
||||
|
||||
# Each adapter needs to have a unique site abbreviation.
|
||||
self.story.setMetadata('siteabbrev','ahc')
|
||||
|
||||
@staticmethod # must be @staticmethod, don't remove it.
|
||||
def getSiteDomain():
|
||||
# The site domain. Does have www here, if it uses it.
|
||||
return 'althistory.com'
|
||||
|
||||
|
|
@ -1,296 +0,0 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2018 FanFicFare team
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
# ####### Not all lables are captured. they are not formtted correctly on the
|
||||
# ####### webpage.
|
||||
|
||||
# Software: eFiction
|
||||
from __future__ import absolute_import
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import re
|
||||
from ..htmlcleanup import stripHTML
|
||||
from .. import exceptions as exceptions
|
||||
|
||||
# py2 vs py3 transition
|
||||
from ..six import text_type as unicode
|
||||
from ..six.moves.urllib.error import HTTPError
|
||||
|
||||
from .base_adapter import BaseSiteAdapter, makeDate
|
||||
|
||||
def getClass():
|
||||
return AndromedaWebComAdapter # XXX
|
||||
|
||||
# Class name has to be unique. Our convention is camel case the
|
||||
# sitename with Adapter at the end. www is skipped.
|
||||
class AndromedaWebComAdapter(BaseSiteAdapter): # XXX
|
||||
|
||||
def __init__(self, config, url):
|
||||
BaseSiteAdapter.__init__(self, config, url)
|
||||
|
||||
self.username = "NoneGiven" # if left empty, site doesn't return any message at all.
|
||||
self.password = ""
|
||||
self.is_adult=False
|
||||
|
||||
# get storyId from url--url validation guarantees query is only sid=1234
|
||||
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
|
||||
|
||||
|
||||
# normalized story URL.
|
||||
# XXX Most sites don't have the /fiction part. Replace all to remove it usually.
|
||||
self._setURL('http://' + self.getSiteDomain() + '/fiction/viewstory.php?sid='+self.story.getMetadata('storyId'))
|
||||
|
||||
# Each adapter needs to have a unique site abbreviation.
|
||||
self.story.setMetadata('siteabbrev','awc') # XXX
|
||||
|
||||
# The date format will vary from site to site.
|
||||
# http://docs.python.org/library/datetime.html#strftime-strptime-behavior
|
||||
self.dateformat = "%d %b %Y" # XXX
|
||||
|
||||
@staticmethod # must be @staticmethod, don't remove it.
|
||||
def getSiteDomain():
|
||||
# The site domain. Does have www here, if it uses it.
|
||||
return 'www.andromeda-web.com' # XXX
|
||||
|
||||
@classmethod
|
||||
def getSiteExampleURLs(cls):
|
||||
return "http://"+cls.getSiteDomain()+"/fiction/viewstory.php?sid=1234"
|
||||
|
||||
def getSiteURLPattern(self):
|
||||
return re.escape("http://"+self.getSiteDomain()+"/fiction/viewstory.php?sid=")+r"\d+$"
|
||||
|
||||
## Login seems to be reasonably standard across eFiction sites.
|
||||
def needToLoginCheck(self, data):
|
||||
if 'Registered Users Only' in data \
|
||||
or 'There is no such account on our website' in data \
|
||||
or "That password doesn't match the one in our database" in data:
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
|
||||
def performLogin(self, url):
|
||||
params = {}
|
||||
|
||||
if self.password:
|
||||
params['penname'] = self.username
|
||||
params['password'] = self.password
|
||||
else:
|
||||
params['penname'] = self.getConfig("username")
|
||||
params['password'] = self.getConfig("password")
|
||||
params['cookiecheck'] = '1'
|
||||
params['submit'] = 'Submit'
|
||||
|
||||
loginUrl = 'http://' + self.getSiteDomain() + '/user.php?action=login'
|
||||
logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
|
||||
params['penname']))
|
||||
|
||||
d = self._fetchUrl(loginUrl, params)
|
||||
|
||||
if "Member Account" not in d : #Member Account
|
||||
logger.info("Failed to login to URL %s as %s" % (loginUrl,
|
||||
params['penname']))
|
||||
raise exceptions.FailedToLogin(url,params['penname'])
|
||||
return False
|
||||
else:
|
||||
return True
|
||||
|
||||
## Getting the chapter list and the meta data, plus 'is adult' checking.
|
||||
def extractChapterUrlsAndMetadata(self):
|
||||
|
||||
if self.is_adult or self.getConfig("is_adult"):
|
||||
# Weirdly, different sites use different warning numbers.
|
||||
# If the title search below fails, there's a good chance
|
||||
# you need a different number. print data at that point
|
||||
# and see what the 'click here to continue' url says.
|
||||
addurl = "&warning=2"
|
||||
else:
|
||||
addurl=""
|
||||
|
||||
# index=1 makes sure we see the story chapter index. Some
|
||||
# sites skip that for one-chapter stories.
|
||||
url = self.url+'&index=1'+addurl
|
||||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
except HTTPError as e:
|
||||
if e.code == 404:
|
||||
raise exceptions.StoryDoesNotExist(self.url)
|
||||
else:
|
||||
raise e
|
||||
|
||||
if self.needToLoginCheck(data):
|
||||
# need to log in for this one.
|
||||
self.performLogin(url)
|
||||
data = self._fetchUrl(url)
|
||||
|
||||
# Since the warning text can change by warning level, let's
|
||||
# look for the warning pass url. ksarchive uses
|
||||
# &warning= -- actually, so do other sites. Must be an
|
||||
# eFiction book.
|
||||
|
||||
# fiction/viewstory.php?sid=1882&warning=4
|
||||
# fiction/viewstory.php?sid=1654&ageconsent=ok&warning=2
|
||||
#print data
|
||||
m = re.search(r"'fiction/viewstory.php\?sid=10(&warning=2)'",data)
|
||||
m = re.search(r"'fiction/viewstory.php\?sid=\d+((?:&ageconsent=ok)?&warning=\d+)'",data)
|
||||
if m != None:
|
||||
if self.is_adult or self.getConfig("is_adult"):
|
||||
# We tried the default and still got a warning, so
|
||||
# let's pull the warning number from the 'continue'
|
||||
# link and reload data.
|
||||
addurl = m.group(1)
|
||||
# correct stupid & error in url.
|
||||
addurl = addurl.replace("&","&")
|
||||
url = self.url+'&index=1'+addurl
|
||||
logger.debug("URL 2nd try: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
except HTTPError as e:
|
||||
if e.code == 404:
|
||||
raise exceptions.StoryDoesNotExist(self.url)
|
||||
else:
|
||||
raise e
|
||||
else:
|
||||
raise exceptions.AdultCheckRequired(self.url)
|
||||
|
||||
if "Access denied. This story has not been validated by the adminstrators of this site." in data:
|
||||
raise exceptions.FailedToDownload(self.getSiteDomain() +" says: Access denied. This story has not been validated by the adminstrators of this site.")
|
||||
|
||||
# use BeautifulSoup HTML parser to make everything easier to find.
|
||||
soup = self.make_soup(data)
|
||||
# print data
|
||||
|
||||
# Now go hunting for all the meta data and the chapter list.
|
||||
|
||||
pagetitle = soup.find('div',{'id':'content'})
|
||||
|
||||
## Title
|
||||
a = pagetitle.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$"))
|
||||
|
||||
self.story.setMetadata('title',stripHTML(a))
|
||||
|
||||
# Find authorid and URL from... author url.
|
||||
a = pagetitle.find('a', href=re.compile(r"viewuser.php\?uid=\d+"))
|
||||
self.story.setMetadata('authorId',a['href'].split('=')[1])
|
||||
self.story.setMetadata('authorUrl','http://'+self.host+'/'+a['href'])
|
||||
self.story.setMetadata('author',a.string)
|
||||
|
||||
# Find the chapters:
|
||||
for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"&chapter=\d+$")):
|
||||
# just in case there's tags, like <i> in chapter titles.
|
||||
self.add_chapter(chapter,'http://'+self.host+'/fiction/'+chapter['href']+addurl)
|
||||
|
||||
# eFiction sites don't help us out a lot with their meta data
|
||||
# formating, so it's a little ugly.
|
||||
|
||||
# utility method
|
||||
def defaultGetattr(d,k):
|
||||
try:
|
||||
return d[k]
|
||||
except:
|
||||
return ""
|
||||
|
||||
# <span class="label">Rated:</span> NC-17<br /> etc
|
||||
labels = soup.findAll('span',{'class':'label'})
|
||||
for labelspan in labels:
|
||||
value = labelspan.nextSibling
|
||||
label = labelspan.string
|
||||
|
||||
if 'Summary' in label:
|
||||
## Everything until the next span class='label'
|
||||
svalue = ""
|
||||
while 'label' not in defaultGetattr(value,'class'):
|
||||
svalue += unicode(value)
|
||||
value = value.nextSibling
|
||||
self.setDescription(url,svalue)
|
||||
#self.story.setMetadata('description',stripHTML(svalue))
|
||||
|
||||
if 'Rated' in label:
|
||||
self.story.setMetadata('rating', value)
|
||||
|
||||
if 'Word count' in label:
|
||||
self.story.setMetadata('numWords', value)
|
||||
|
||||
if 'Categories' in label:
|
||||
cats = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=categories'))
|
||||
for cat in cats:
|
||||
self.story.addToList('category',cat.string)
|
||||
|
||||
if 'Characters' in label:
|
||||
chars = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=characters'))
|
||||
for char in chars:
|
||||
self.story.addToList('characters',char.string)
|
||||
|
||||
if 'Genre' in label:
|
||||
genres = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=1'))
|
||||
for genre in genres:
|
||||
self.story.addToList('genre',genre.string)
|
||||
|
||||
if 'Warnings' in label:
|
||||
warnings = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=3'))
|
||||
for warning in warnings:
|
||||
self.story.addToList('warnings',warning.string)
|
||||
|
||||
if 'Completed' in label:
|
||||
if 'Yes' in value:
|
||||
self.story.setMetadata('status', 'Completed')
|
||||
else:
|
||||
self.story.setMetadata('status', 'In-Progress')
|
||||
|
||||
if 'Published' in label:
|
||||
self.story.setMetadata('datePublished', makeDate(stripHTML(value), self.dateformat))
|
||||
|
||||
if 'Updated' in label:
|
||||
# there's a stray [ at the end.
|
||||
#value = value[0:-1]
|
||||
self.story.setMetadata('dateUpdated', makeDate(stripHTML(value), self.dateformat))
|
||||
|
||||
try:
|
||||
# Find Series name from series URL.
|
||||
a = soup.find('a', href=re.compile(r"fiction/viewseries.php\?seriesid=\d+"))
|
||||
series_name = a.string
|
||||
series_url = 'http://'+self.host+'/'+a['href']
|
||||
|
||||
# use BeautifulSoup HTML parser to make everything easier to find.
|
||||
seriessoup = self.make_soup(self._fetchUrl(series_url))
|
||||
storyas = seriessoup.findAll('a', href=re.compile(r'^fiction/viewstory.php\?sid=\d+$'))
|
||||
i=1
|
||||
for a in storyas:
|
||||
if a['href'] == ('fiction/viewstory.php?sid='+self.story.getMetadata('storyId')):
|
||||
self.setSeries(series_name, i)
|
||||
self.story.setMetadata('seriesUrl',series_url)
|
||||
break
|
||||
i+=1
|
||||
|
||||
except:
|
||||
# I find it hard to care if the series parsing fails
|
||||
pass
|
||||
|
||||
# grab the text for an individual chapter.
|
||||
def getChapterText(self, url):
|
||||
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
soup = self.make_soup(self._fetchUrl(url))
|
||||
|
||||
div = soup.find('div', {'class' : 'story'})
|
||||
|
||||
if None == div:
|
||||
raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url)
|
||||
|
||||
return self.utf8FromSoup(url,div)
|
||||
|
|
@ -1,6 +1,6 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2014 Fanficdownloader team, 2018 FanFicFare team
|
||||
# Copyright 2014 Fanficdownloader team, 2020 FanFicFare team
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
|
|
@ -18,509 +18,52 @@
|
|||
from __future__ import absolute_import
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import re
|
||||
import json
|
||||
|
||||
from ..htmlcleanup import stripHTML
|
||||
from .. import exceptions as exceptions
|
||||
|
||||
# py2 vs py3 transition
|
||||
from ..six import text_type as unicode
|
||||
from ..six.moves.urllib.error import HTTPError
|
||||
|
||||
from .base_adapter import BaseSiteAdapter, makeDate
|
||||
from .base_otw_adapter import BaseOTWAdapter
|
||||
|
||||
def getClass():
|
||||
return ArchiveOfOurOwnOrgAdapter
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class ArchiveOfOurOwnOrgAdapter(BaseSiteAdapter):
|
||||
class ArchiveOfOurOwnOrgAdapter(BaseOTWAdapter):
|
||||
|
||||
def __init__(self, config, url):
|
||||
BaseSiteAdapter.__init__(self, config, url)
|
||||
|
||||
self.username = "NoneGiven" # if left empty, site doesn't return any message at all.
|
||||
self.password = ""
|
||||
self.is_adult=False
|
||||
|
||||
self.full_work_soup = None
|
||||
self.use_full_work_soup = True
|
||||
|
||||
# get storyId from url--url validation guarantees query is only sid=1234
|
||||
self.story.setMetadata('storyId',self.parsedUrl.path.split('/',)[2])
|
||||
|
||||
# get storyId from url--url validation guarantees query correct
|
||||
m = re.match(self.getSiteURLPattern(),url)
|
||||
if m:
|
||||
self.story.setMetadata('storyId',m.group('id'))
|
||||
|
||||
# normalized story URL.
|
||||
self._setURL('https://' + self.getSiteDomain() + '/works/'+self.story.getMetadata('storyId'))
|
||||
else:
|
||||
raise exceptions.InvalidStoryURL(url,
|
||||
self.getSiteDomain(),
|
||||
self.getSiteExampleURLs())
|
||||
BaseOTWAdapter.__init__(self, config, url)
|
||||
|
||||
# Each adapter needs to have a unique site abbreviation.
|
||||
self.story.setMetadata('siteabbrev','ao3')
|
||||
|
||||
# The date format will vary from site to site.
|
||||
# http://docs.python.org/library/datetime.html#strftime-strptime-behavior
|
||||
self.dateformat = "%Y-%b-%d"
|
||||
|
||||
@staticmethod # must be @staticmethod, don't remove it.
|
||||
def getSiteDomain():
|
||||
# The site domain. Does have www here, if it uses it.
|
||||
return 'archiveofourown.org'
|
||||
|
||||
# The certificate is only valid for the following names:
|
||||
# ao3.org,
|
||||
# archiveofourown.com,
|
||||
# archiveofourown.net,
|
||||
# archiveofourown.org,
|
||||
# www.ao3.org,
|
||||
|
||||
@classmethod
|
||||
def getAcceptDomains(cls):
|
||||
return ['archiveofourown.org','archiveofourown.com','download.archiveofourown.org','download.archiveofourown.com']
|
||||
return ['archiveofourown.org',
|
||||
'archiveofourown.com',
|
||||
'archiveofourown.net',
|
||||
'archiveofourown.gay',
|
||||
'download.archiveofourown.org',
|
||||
'download.archiveofourown.com',
|
||||
'download.archiveofourown.net',
|
||||
'ao3.org',
|
||||
]
|
||||
|
||||
@classmethod
|
||||
def getSiteExampleURLs(cls):
|
||||
return "https://"+cls.getSiteDomain()+"/works/123456 https://"+cls.getSiteDomain()+"/collections/Some_Archive/works/123456 https://"+cls.getSiteDomain()+"/works/123456/chapters/78901"
|
||||
|
||||
def getSiteURLPattern(self):
|
||||
# https://archiveofourown.org/collections/Smallville_Slash_Archive/works/159770
|
||||
# Discard leading zeros from story ID numbers--AO3 doesn't use them in it's own chapter URLs.
|
||||
return r"https?://(download\.)?archiveofourown\.(org|com)(/collections/[^/]+)?/works/0*(?P<id>\d+)"
|
||||
|
||||
## Login
|
||||
def needToLoginCheck(self, data):
|
||||
if 'This work is only available to registered users of the Archive.' in data \
|
||||
or "The password or user name you entered doesn't match our records" in data:
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
|
||||
def performLogin(self, url, data):
|
||||
|
||||
params = {}
|
||||
if self.password:
|
||||
params['user[login]'] = self.username
|
||||
params['user[password]'] = self.password
|
||||
else:
|
||||
params['user[login]'] = self.getConfig("username")
|
||||
params['user[password]'] = self.getConfig("password")
|
||||
params['user[remember_me]'] = '1'
|
||||
params['commit'] = 'Log in'
|
||||
params['utf8'] = u'\x2713' # utf8 *is* required now. hex code works better than actual character for some reason. u'✓'
|
||||
|
||||
# authenticity_token now comes from a completely separate json call.
|
||||
token_json = json.loads(self._fetchUrl('https://' + self.getSiteDomain() + "/token_dispenser.json"))
|
||||
params['authenticity_token'] = token_json['token']
|
||||
|
||||
loginUrl = 'https://' + self.getSiteDomain() + '/users/login'
|
||||
logger.info("Will now login to URL (%s) as (%s)" % (loginUrl,
|
||||
params['user[login]']))
|
||||
|
||||
d = self._postUrl(loginUrl, params)
|
||||
|
||||
if 'href="/users/logout"' not in d :
|
||||
logger.info("Failed to login to URL %s as %s" % (loginUrl,
|
||||
params['user[login]']))
|
||||
raise exceptions.FailedToLogin(url,params['user[login]'])
|
||||
return False
|
||||
else:
|
||||
return True
|
||||
|
||||
def use_pagecache(self):
|
||||
'''
|
||||
adapters that will work with the page cache need to implement
|
||||
this and change it to True.
|
||||
'''
|
||||
return True
|
||||
|
||||
## Getting the chapter list and the meta data, plus 'is adult' checking.
|
||||
def extractChapterUrlsAndMetadata(self):
|
||||
|
||||
if self.is_adult or self.getConfig("is_adult"):
|
||||
addurl = "?view_adult=true"
|
||||
else:
|
||||
addurl=""
|
||||
|
||||
metaurl = self.url+addurl
|
||||
url = self.url+'/navigate'+addurl
|
||||
logger.info("url: "+url)
|
||||
logger.info("metaurl: "+metaurl)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
meta = self._fetchUrl(metaurl)
|
||||
|
||||
if "This work could have adult content. If you proceed you have agreed that you are willing to see such content." in meta:
|
||||
raise exceptions.AdultCheckRequired(self.url)
|
||||
|
||||
except HTTPError as e:
|
||||
if e.code == 404:
|
||||
raise exceptions.StoryDoesNotExist(self.url)
|
||||
else:
|
||||
raise e
|
||||
|
||||
if "Sorry, we couldn't find the work you were looking for." in data:
|
||||
raise exceptions.StoryDoesNotExist(self.url)
|
||||
|
||||
# need to log in for this one, or always_login.
|
||||
if self.needToLoginCheck(data) or \
|
||||
( self.getConfig("always_login") and 'href="/users/logout"' not in data ):
|
||||
self.performLogin(url,data)
|
||||
data = self._fetchUrl(url,usecache=False)
|
||||
meta = self._fetchUrl(metaurl,usecache=False)
|
||||
|
||||
# use BeautifulSoup HTML parser to make everything easier to find.
|
||||
soup = self.make_soup(data)
|
||||
for tag in soup.findAll('div',id='admin-banner'):
|
||||
tag.extract()
|
||||
metasoup = self.make_soup(meta)
|
||||
for tag in metasoup.findAll('div',id='admin-banner'):
|
||||
tag.extract()
|
||||
|
||||
# Now go hunting for all the meta data and the chapter list.
|
||||
|
||||
## Title
|
||||
a = soup.find('a', href=re.compile(r"/works/\d+$"))
|
||||
self.story.setMetadata('title',stripHTML(a))
|
||||
|
||||
if self.getConfig("always_login"):
|
||||
# deliberately using always_login instead of checking for
|
||||
# actual login so we don't have a case where these show up
|
||||
# for a user only when they get user-restricted stories.
|
||||
try:
|
||||
# is bookmarked if has update /bookmarks/ form --
|
||||
# create bookmark form uses different url
|
||||
self.story.setMetadata('bookmarked',
|
||||
None != metasoup.find('form',action=re.compile(r'^/bookmarks/')))
|
||||
self.story.extendList('bookmarktags',
|
||||
metasoup.find('input',id='bookmark_tag_string')['value'].split(', '))
|
||||
self.story.setMetadata('bookmarkprivate',
|
||||
metasoup.find('input',id='bookmark_private').has_attr('checked'))
|
||||
self.story.setMetadata('bookmarkrec',
|
||||
metasoup.find('input',id='bookmark_rec').has_attr('checked'))
|
||||
except KeyError:
|
||||
pass
|
||||
self.story.setMetadata('bookmarksummary',
|
||||
stripHTML(metasoup.find('textarea',id='bookmark_notes')))
|
||||
|
||||
# Find authorid and URL from... author url.
|
||||
alist = soup.findAll('a', href=re.compile(r"/users/\w+/pseuds/\w+"))
|
||||
if len(alist) < 1: # ao3 allows for author 'Anonymous' with no author link.
|
||||
self.story.setMetadata('author','Anonymous')
|
||||
self.story.setMetadata('authorUrl','https://archiveofourown.org/')
|
||||
self.story.setMetadata('authorId','0')
|
||||
else:
|
||||
for a in alist:
|
||||
self.story.addToList('authorId',a['href'].split('/')[-1])
|
||||
self.story.addToList('authorUrl','https://'+self.host+a['href'])
|
||||
self.story.addToList('author',a.text)
|
||||
|
||||
byline = metasoup.find('h3',{'class':'byline'})
|
||||
if byline:
|
||||
self.story.setMetadata('byline',stripHTML(byline))
|
||||
|
||||
# byline:
|
||||
# <h3 class="byline heading">
|
||||
# Hope Roy [archived by <a href="/users/ssa_archivist/pseuds/ssa_archivist" rel="author">ssa_archivist</a>]
|
||||
# </h3>
|
||||
# stripped:"Hope Roy [archived by ssa_archivist]"
|
||||
m = re.match(r'(?P<author>.*) \[archived by ?(?P<archivist>.*)\]',stripHTML(byline))
|
||||
if( m and
|
||||
len(alist) == 1 and
|
||||
self.getConfig('use_archived_author') ):
|
||||
self.story.setMetadata('author',m.group('author'))
|
||||
|
||||
newestChapter = None
|
||||
self.newestChapterNum = None # save for comparing during update.
|
||||
# Scan all chapters to find the oldest and newest, on AO3 it's
|
||||
# possible for authors to insert new chapters out-of-order or
|
||||
# change the dates of earlier ones by editing them--That WILL
|
||||
# break epub update.
|
||||
# Find the chapters:
|
||||
chapters=soup.findAll('a', href=re.compile(r'/works/'+self.story.getMetadata('storyId')+"/chapters/\d+$"))
|
||||
self.story.setMetadata('numChapters',len(chapters))
|
||||
logger.debug("numChapters: (%s)"%self.story.getMetadata('numChapters'))
|
||||
if len(chapters)==1:
|
||||
self.add_chapter(self.story.getMetadata('title'),'https://'+self.host+chapters[0]['href'])
|
||||
else:
|
||||
for index, chapter in enumerate(chapters):
|
||||
# strip just in case there's tags, like <i> in chapter titles.
|
||||
# (2013-09-21)
|
||||
date = stripHTML(chapter.findNext('span'))[1:-1]
|
||||
chapterDate = makeDate(date,self.dateformat)
|
||||
self.add_chapter(chapter,'https://'+self.host+chapter['href'],
|
||||
{'date':chapterDate.strftime(self.getConfig("datechapter_format",self.getConfig("datePublished_format","%Y-%m-%d")))})
|
||||
if newestChapter == None or chapterDate > newestChapter:
|
||||
newestChapter = chapterDate
|
||||
self.newestChapterNum = index
|
||||
|
||||
a = metasoup.find('blockquote',{'class':'userstuff'})
|
||||
if a != None:
|
||||
a.name='div' # Change blockquote to div.
|
||||
self.setDescription(url,a)
|
||||
#self.story.setMetadata('description',a.text)
|
||||
|
||||
a = metasoup.find('dd',{'class':"rating tags"})
|
||||
if a != None:
|
||||
self.story.setMetadata('rating',stripHTML(a.text))
|
||||
|
||||
d = metasoup.find('dd',{'class':"language"})
|
||||
if d != None:
|
||||
self.story.setMetadata('language',stripHTML(d.text))
|
||||
|
||||
a = metasoup.find('dd',{'class':"fandom tags"})
|
||||
if a != None:
|
||||
fandoms = a.findAll('a',{'class':"tag"})
|
||||
for fandom in fandoms:
|
||||
self.story.addToList('fandoms',fandom.string)
|
||||
|
||||
a = metasoup.find('dd',{'class':"warning tags"})
|
||||
if a != None:
|
||||
warnings = a.findAll('a',{'class':"tag"})
|
||||
for warning in warnings:
|
||||
self.story.addToList('warnings',warning.string)
|
||||
|
||||
a = metasoup.find('dd',{'class':"freeform tags"})
|
||||
if a != None:
|
||||
genres = a.findAll('a',{'class':"tag"})
|
||||
for genre in genres:
|
||||
self.story.addToList('freeformtags',genre.string)
|
||||
|
||||
a = metasoup.find('dd',{'class':"category tags"})
|
||||
if a != None:
|
||||
genres = a.findAll('a',{'class':"tag"})
|
||||
for genre in genres:
|
||||
if genre != "Gen":
|
||||
self.story.addToList('ao3categories',genre.string)
|
||||
|
||||
a = metasoup.find('dd',{'class':"character tags"})
|
||||
if a != None:
|
||||
chars = a.findAll('a',{'class':"tag"})
|
||||
for char in chars:
|
||||
self.story.addToList('characters',char.string)
|
||||
|
||||
a = metasoup.find('dd',{'class':"relationship tags"})
|
||||
if a != None:
|
||||
ships = a.findAll('a',{'class':"tag"})
|
||||
for ship in ships:
|
||||
self.story.addToList('ships',ship.string)
|
||||
|
||||
a = metasoup.find('dd',{'class':"collections"})
|
||||
if a != None:
|
||||
collections = a.findAll('a')
|
||||
for collection in collections:
|
||||
self.story.addToList('collections',collection.string)
|
||||
|
||||
stats = metasoup.find('dl',{'class':'stats'})
|
||||
dt = stats.findAll('dt')
|
||||
dd = stats.findAll('dd')
|
||||
for x in range(0,len(dt)):
|
||||
label = dt[x].text
|
||||
value = dd[x].text
|
||||
|
||||
if 'Words:' in label:
|
||||
self.story.setMetadata('numWords', value)
|
||||
|
||||
if 'Comments:' in label:
|
||||
self.story.setMetadata('comments', value)
|
||||
|
||||
if 'Kudos:' in label:
|
||||
self.story.setMetadata('kudos', value)
|
||||
|
||||
if 'Hits:' in label:
|
||||
self.story.setMetadata('hits', value)
|
||||
|
||||
if 'Bookmarks:' in label:
|
||||
self.story.setMetadata('bookmarks', value)
|
||||
|
||||
if 'Chapters:' in label:
|
||||
if value.split('/')[0] == value.split('/')[1]:
|
||||
self.story.setMetadata('status', 'Completed')
|
||||
else:
|
||||
self.story.setMetadata('status', 'In-Progress')
|
||||
|
||||
|
||||
if 'Published' in label:
|
||||
self.story.setMetadata('datePublished', makeDate(stripHTML(value), self.dateformat))
|
||||
self.story.setMetadata('dateUpdated', makeDate(stripHTML(value), self.dateformat))
|
||||
|
||||
if 'Updated' in label:
|
||||
self.story.setMetadata('dateUpdated', makeDate(stripHTML(value), self.dateformat))
|
||||
|
||||
if 'Completed' in label:
|
||||
self.story.setMetadata('dateUpdated', makeDate(stripHTML(value), self.dateformat))
|
||||
|
||||
|
||||
# Find Series name from series URL.
|
||||
ddseries = metasoup.find('dd',{'class':"series"})
|
||||
|
||||
if ddseries:
|
||||
for i, a in enumerate(ddseries.findAll('a', href=re.compile(r"/series/\d+"))):
|
||||
series_name = stripHTML(a)
|
||||
series_url = 'https://'+self.host+a['href']
|
||||
series_index = int(stripHTML(a.previousSibling).replace(', ','').split(' ')[1]) # "Part # of" or ", Part #"
|
||||
self.story.setMetadata('series%02d'%i,"%s [%s]"%(series_name,series_index))
|
||||
self.story.setMetadata('series%02dUrl'%i,series_url)
|
||||
if i == 0:
|
||||
self.setSeries(series_name, series_index)
|
||||
self.story.setMetadata('seriesUrl',series_url)
|
||||
|
||||
def hookForUpdates(self,chaptercount):
|
||||
if self.oldchapters and len(self.oldchapters) > self.newestChapterNum:
|
||||
logger.info("Existing epub has %s chapters\nNewest chapter is %s. Discarding old chapters from there on."%(len(self.oldchapters), self.newestChapterNum+1))
|
||||
self.oldchapters = self.oldchapters[:self.newestChapterNum]
|
||||
return len(self.oldchapters)
|
||||
|
||||
## Normalize chapter URLs because a) site has changed from http to
|
||||
## https and b) in case of title change. That way updates to
|
||||
## existing stories don't re-download all chapters.
|
||||
def normalize_chapterurl(self,url):
|
||||
url = re.sub(r"https?://("+self.getSiteDomain()+"/works/\d+/chapters/\d+)(\?view_adult=true)?$",
|
||||
r"https://\1",url)
|
||||
def mod_url_request(self, url):
|
||||
return url
|
||||
|
||||
# grab the text for an individual chapter.
|
||||
def getChapterTextNum(self, url, index):
|
||||
## FYI: Chapter urls used to include ?view_adult=true in each
|
||||
## one. With cookiejar being passed now, that's not
|
||||
## necessary. However, there is a corner case with plugin--If
|
||||
## a user-required story is attempted after gathering metadata
|
||||
## for one that needs adult, but not user AND the user doesn't
|
||||
## enter a valid user, the is_adult cookie from before can be
|
||||
## lost.
|
||||
logger.debug('Getting chapter text for: %s index: %s' % (url,index))
|
||||
|
||||
save_chapter_soup = self.make_soup('<div class="story"></div>')
|
||||
## use the div because the full soup will also have <html><body>.
|
||||
## need save_chapter_soup for .new_tag()
|
||||
save_chapter=save_chapter_soup.find('div')
|
||||
|
||||
whole_dl_soup = chapter_dl_soup = None
|
||||
|
||||
if self.use_full_work_soup and self.getConfig("use_view_full_work",True) and self.num_chapters() > 1:
|
||||
logger.debug("USE view_full_work")
|
||||
## Assumed view_adult=true was cookied during metadata
|
||||
if not self.full_work_soup:
|
||||
self.full_work_soup = self.make_soup(self._fetchUrl(self.url+"?view_full_work=true"))
|
||||
|
||||
whole_dl_soup = self.full_work_soup
|
||||
chapter_dl_soup = whole_dl_soup.find('div',{'id':'chapter-%s'%(index+1)})
|
||||
if not chapter_dl_soup:
|
||||
self.use_full_work_soup = False
|
||||
logger.warn("chapter-%s not found in view_full_work--ending use_view_full_work"%(index+1))
|
||||
if not chapter_dl_soup:
|
||||
whole_dl_soup = chapter_dl_soup = self.make_soup(self._fetchUrl(url))
|
||||
if None == chapter_dl_soup:
|
||||
raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url)
|
||||
|
||||
exclude_notes=self.getConfigList('exclude_notes')
|
||||
|
||||
def append_tag(elem,tag,string):
|
||||
'''bs4 requires tags be added separately.'''
|
||||
new_tag = save_chapter_soup.new_tag(tag)
|
||||
new_tag.string=string
|
||||
elem.append(new_tag)
|
||||
return new_tag
|
||||
|
||||
## These are the over-all work's 'Notes at the beginning'.
|
||||
## They only appear on the first chapter in individual chapter
|
||||
## pages and before chapter-1 div. Appending removes
|
||||
## headnotes from whole_dl_soup, so be sure to only do it on
|
||||
## the first chapter.
|
||||
if 'authorheadnotes' not in exclude_notes and index == 0:
|
||||
headnotes = whole_dl_soup.find('div', {'class' : "preface group"}).find('div', {'class' : "notes module"})
|
||||
if headnotes != None:
|
||||
## Also include ul class='associations'.
|
||||
ulassoc = headnotes.find('ul', {'class' : "associations"})
|
||||
headnotes = headnotes.find('blockquote', {'class' : "userstuff"})
|
||||
if headnotes != None or ulassoc != None:
|
||||
append_tag(save_chapter,'b',"Author's Note:")
|
||||
if ulassoc != None:
|
||||
# fix relative links--all examples so far have been.
|
||||
for alink in ulassoc.find_all('a'):
|
||||
if 'http' not in alink['href']:
|
||||
alink['href']='https://' + self.getSiteDomain() + alink['href']
|
||||
save_chapter.append(ulassoc)
|
||||
if headnotes != None:
|
||||
save_chapter.append(headnotes)
|
||||
|
||||
## Can appear on every chapter
|
||||
if 'chaptersummary' not in exclude_notes:
|
||||
chapsumm = chapter_dl_soup.find('div', {'id' : "summary"})
|
||||
if chapsumm != None:
|
||||
chapsumm = chapsumm.find('blockquote')
|
||||
append_tag(save_chapter,'b',"Summary for the Chapter:")
|
||||
save_chapter.append(chapsumm)
|
||||
|
||||
## Can appear on every chapter
|
||||
if 'chapterheadnotes' not in exclude_notes:
|
||||
chapnotes = chapter_dl_soup.find('div', {'id' : "notes"})
|
||||
if chapnotes != None:
|
||||
chapnotes = chapnotes.find('blockquote')
|
||||
if chapnotes != None:
|
||||
append_tag(save_chapter,'b',"Notes for the Chapter:")
|
||||
save_chapter.append(chapnotes)
|
||||
|
||||
text = chapter_dl_soup.find('div', {'class' : "userstuff module"})
|
||||
chtext = text.find('h3', {'class' : "landmark heading"})
|
||||
if chtext:
|
||||
chtext.extract()
|
||||
save_chapter.append(text)
|
||||
|
||||
## Can appear on every chapter
|
||||
if 'chapterfootnotes' not in exclude_notes:
|
||||
chapfoot = chapter_dl_soup.find('div', {'class' : "end notes module", 'role' : "complementary"})
|
||||
if chapfoot != None:
|
||||
chapfoot = chapfoot.find('blockquote')
|
||||
append_tag(save_chapter,'b',"Notes for the Chapter:")
|
||||
save_chapter.append(chapfoot)
|
||||
|
||||
skip_on_update_tags = []
|
||||
## These are the over-all work's 'Notes at the end'.
|
||||
## They only appear on the last chapter in individual chapter
|
||||
## pages and after chapter-# div. Appending removes
|
||||
## headnotes from whole_dl_soup, so be sure to only do it on
|
||||
## the last chapter.
|
||||
if 'authorfootnotes' not in exclude_notes and index+1 == self.num_chapters():
|
||||
footnotes = whole_dl_soup.find('div', {'id' : "work_endnotes"})
|
||||
if footnotes != None:
|
||||
footnotes = footnotes.find('blockquote')
|
||||
if footnotes:
|
||||
b = append_tag(save_chapter,'b',"Author's Note:")
|
||||
skip_on_update_tags.append(b)
|
||||
skip_on_update_tags.append(footnotes)
|
||||
save_chapter.append(footnotes)
|
||||
|
||||
## It looks like 'Inspired by' links now all appear in the ul
|
||||
## class=associations tag in authorheadnotes. This code is
|
||||
## left in case I'm wrong and there are still stories with div
|
||||
## id=children inspired links at the end.
|
||||
if 'inspiredlinks' not in exclude_notes and index+1 == self.num_chapters():
|
||||
inspiredlinks = whole_dl_soup.find('div', {'id' : "children"})
|
||||
if inspiredlinks != None:
|
||||
if inspiredlinks:
|
||||
inspiredlinks.find('h3').name='b' # don't want a big h3 at the end.
|
||||
# fix relative links--all examples so far have been.
|
||||
for alink in inspiredlinks.find_all('a'):
|
||||
if 'http' not in alink['href']:
|
||||
alink['href']='https://' + self.getSiteDomain() + alink['href']
|
||||
skip_on_update_tags.append(inspiredlinks)
|
||||
save_chapter.append(inspiredlinks)
|
||||
|
||||
## AO3 story end notes end up in the 'last' chapter, but if
|
||||
## updated, then there's a new 'last' chapter. This option
|
||||
## applies the 'skip_on_ffdl_update' class to those tags which
|
||||
## means they will be removed during epub reading for update.
|
||||
## Results: only the last chapter will have end notes.
|
||||
## Side-effect: An 'Update Always' that doesn't add a new
|
||||
## lasts chapter will remove the end notes.
|
||||
if self.getConfig("remove_authorfootnotes_on_update"):
|
||||
for skip_tag in skip_on_update_tags:
|
||||
if skip_tag.has_attr('class'):
|
||||
skip_tag['class'].append('skip_on_ffdl_update')
|
||||
else:
|
||||
skip_tag['class']=['skip_on_ffdl_update']
|
||||
# logger.debug(skip_tag)
|
||||
|
||||
return self.utf8FromSoup(url,save_chapter)
|
||||
def mod_url_request(self, url):
|
||||
## add / to *not* replace media.archiveofourown.org
|
||||
if self.getConfig("use_archive_transformativeworks_org",False):
|
||||
return url.replace("/archiveofourown.org","/archive.transformativeworks.org")
|
||||
elif self.getConfig("use_archiveofourown_gay",False):
|
||||
return url.replace("/archiveofourown.org","/archiveofourown.gay")
|
||||
else:
|
||||
return url
|
||||
|
|
|
|||
|
|
@ -1,185 +0,0 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2011 Fanficdownloader team, 2018 FanFicFare team
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
from __future__ import absolute_import
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import re
|
||||
from ..htmlcleanup import stripHTML
|
||||
from .. import exceptions as exceptions
|
||||
|
||||
# py2 vs py3 transition
|
||||
from ..six import text_type as unicode
|
||||
from ..six.moves.urllib.error import HTTPError
|
||||
|
||||
from .base_adapter import BaseSiteAdapter, makeDate
|
||||
|
||||
|
||||
def getClass():
|
||||
return ArchiveSkyeHawkeComAdapter
|
||||
|
||||
# Class name has to be unique. Our convention is camel case the
|
||||
# sitename with Adapter at the end. www is skipped.
|
||||
class ArchiveSkyeHawkeComAdapter(BaseSiteAdapter):
|
||||
|
||||
def __init__(self, config, url):
|
||||
BaseSiteAdapter.__init__(self, config, url)
|
||||
|
||||
self.username = "NoneGiven" # if left empty, site doesn't return any message at all.
|
||||
self.password = ""
|
||||
self.is_adult=False
|
||||
|
||||
# get storyId from url--url validation guarantees query is only sid=1234
|
||||
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
|
||||
|
||||
|
||||
# normalized story URL.
|
||||
self._setURL('http://' + self.getSiteDomain() + '/story.php?no='+self.story.getMetadata('storyId'))
|
||||
|
||||
# Each adapter needs to have a unique site abbreviation.
|
||||
self.story.setMetadata('siteabbrev','ash')
|
||||
|
||||
# The date format will vary from site to site.
|
||||
# http://docs.python.org/library/datetime.html#strftime-strptime-behavior
|
||||
self.dateformat = "%Y-%m-%d"
|
||||
|
||||
@staticmethod # must be @staticmethod, don't remove it.
|
||||
def getSiteDomain():
|
||||
# The site domain. Does have www here, if it uses it.
|
||||
return 'archive.skyehawke.com'
|
||||
|
||||
@classmethod
|
||||
def getAcceptDomains(cls):
|
||||
return ['archive.skyehawke.com','www.skyehawke.com']
|
||||
|
||||
@classmethod
|
||||
def getSiteExampleURLs(cls):
|
||||
return "http://archive.skyehawke.com/story.php?no=1234 http://www.skyehawke.com/archive/story.php?no=1234 http://skyehawke.com/archive/story.php?no=1234"
|
||||
|
||||
def getSiteURLPattern(self):
|
||||
return r"https?://(archive|www)\.skyehawke\.com/(archive/)?story\.php\?no=\d+$"
|
||||
|
||||
## Getting the chapter list and the meta data, plus 'is adult' checking.
|
||||
def extractChapterUrlsAndMetadata(self):
|
||||
|
||||
url = self.url
|
||||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
except HTTPError as e:
|
||||
if e.code == 404:
|
||||
raise exceptions.StoryDoesNotExist(self.url)
|
||||
else:
|
||||
raise e
|
||||
|
||||
# use BeautifulSoup HTML parser to make everything easier to find.
|
||||
soup = self.make_soup(data)
|
||||
# print data
|
||||
|
||||
# Now go hunting for all the meta data and the chapter list.
|
||||
|
||||
## Title
|
||||
a = soup.find('div', {'class':"story border"}).find('span',{'class':'left'})
|
||||
title=stripHTML(a).split('"')[1]
|
||||
self.story.setMetadata('title',title)
|
||||
|
||||
# Find authorid and URL from... author url.
|
||||
author = a.find('a')
|
||||
self.story.setMetadata('authorId',author['href'].split('=')[1])
|
||||
self.story.setMetadata('authorUrl','http://'+self.host+'/'+author['href'])
|
||||
self.story.setMetadata('author',author.string)
|
||||
|
||||
authorSoup = self.make_soup(self._fetchUrl(self.story.getMetadata('authorUrl')))
|
||||
|
||||
chapter=soup.find('select',{'name':'chapter'}).findAll('option')
|
||||
|
||||
for i in range(1,len(chapter)):
|
||||
ch=chapter[i]
|
||||
self.add_chapter(ch,ch['value'])
|
||||
|
||||
|
||||
# eFiction sites don't help us out a lot with their meta data
|
||||
# formating, so it's a little ugly.
|
||||
|
||||
box=soup.find('div', {'class': "container borderridge"})
|
||||
sum=box.find('span').text
|
||||
self.setDescription(url,sum)
|
||||
|
||||
boxes=soup.findAll('div', {'class': "container bordersolid"})
|
||||
for box in boxes:
|
||||
if box.find('b') != None and box.find('b').text == "History and Story Information":
|
||||
|
||||
for b in box.findAll('b'):
|
||||
if "words" in b.nextSibling:
|
||||
self.story.setMetadata('numWords', b.text)
|
||||
if "archived" in b.previousSibling:
|
||||
self.story.setMetadata('datePublished', makeDate(stripHTML(b.text), self.dateformat))
|
||||
if "updated" in b.previousSibling:
|
||||
self.story.setMetadata('dateUpdated', makeDate(stripHTML(b.text), self.dateformat))
|
||||
if "fandom" in b.nextSibling:
|
||||
self.story.addToList('category', b.text)
|
||||
|
||||
for br in box.findAll('br'):
|
||||
br.replaceWith('split')
|
||||
genre=box.text.split("Genre:")[1].split("split")[0]
|
||||
if not "Unspecified" in genre:
|
||||
self.story.addToList('genre',genre)
|
||||
|
||||
|
||||
if box.find('span') != None and box.find('span').text == "WARNING":
|
||||
|
||||
rating=box.findAll('span')[1]
|
||||
rating.find('br').replaceWith('split')
|
||||
rating=rating.text.replace("This story is rated",'').split('split')[0]
|
||||
self.story.setMetadata('rating',rating)
|
||||
logger.debug(self.story.getMetadata('rating'))
|
||||
|
||||
warnings=box.find('ol')
|
||||
if warnings != None:
|
||||
warnings=warnings.text.replace(']', '').replace('[', '').split(' ')
|
||||
for warning in warnings:
|
||||
self.story.addToList('warnings',warning)
|
||||
|
||||
|
||||
for asoup in authorSoup.findAll('div', {'class':"story bordersolid"}):
|
||||
if asoup.find('a')['href'] == 'story.php?no='+self.story.getMetadata('storyId'):
|
||||
if '[ Completed ]' in asoup.text:
|
||||
self.story.setMetadata('status', 'Completed')
|
||||
else:
|
||||
self.story.setMetadata('status', 'In-Progress')
|
||||
chars=asoup.findNext('div').text.split('Characters')[1].split(']')[0]
|
||||
for char in chars.split(','):
|
||||
if not "None" in char:
|
||||
self.story.addToList('characters',char)
|
||||
break
|
||||
|
||||
|
||||
|
||||
# grab the text for an individual chapter.
|
||||
def getChapterText(self, url):
|
||||
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
soup = self.make_soup(self._fetchUrl(url))
|
||||
|
||||
div = soup.find('div',{'class':"chapter bordersolid"}).findNext('div').findNext('div')
|
||||
|
||||
if None == div:
|
||||
raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url)
|
||||
|
||||
return self.utf8FromSoup(url,div)
|
||||
|
|
@ -18,18 +18,13 @@
|
|||
from __future__ import absolute_import
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import re
|
||||
import os
|
||||
|
||||
from bs4.element import Comment
|
||||
from ..htmlcleanup import stripHTML
|
||||
from .. import exceptions as exceptions
|
||||
import sys
|
||||
|
||||
# py2 vs py3 transition
|
||||
from ..six import text_type as unicode
|
||||
from ..six.moves.urllib import parse as urlparse
|
||||
from ..six.moves.urllib.error import HTTPError
|
||||
|
||||
from .base_adapter import BaseSiteAdapter, makeDate
|
||||
|
||||
|
|
@ -81,16 +76,10 @@ class ASexStoriesComAdapter(BaseSiteAdapter):
|
|||
if not (self.is_adult or self.getConfig("is_adult")):
|
||||
raise exceptions.AdultCheckRequired(self.url)
|
||||
|
||||
try:
|
||||
data1 = self._fetchUrl(self.url)
|
||||
soup1 = self.make_soup(data1)
|
||||
#strip comments from soup
|
||||
[comment.extract() for comment in soup1.find_all(text=lambda text:isinstance(text, Comment))]
|
||||
except HTTPError as e:
|
||||
if e.code == 404:
|
||||
raise exceptions.StoryDoesNotExist(self.url)
|
||||
else:
|
||||
raise e
|
||||
data1 = self.get_request(self.url)
|
||||
soup1 = self.make_soup(data1)
|
||||
#strip comments from soup
|
||||
[comment.extract() for comment in soup1.find_all(string=lambda text:isinstance(text, Comment))]
|
||||
|
||||
if 'Page Not Found.' in data1:
|
||||
raise exceptions.StoryDoesNotExist(self.url)
|
||||
|
|
@ -103,7 +92,7 @@ class ASexStoriesComAdapter(BaseSiteAdapter):
|
|||
self.story.setMetadata('title', title.string)
|
||||
|
||||
# Author
|
||||
author = soup1.find('div',{'class':'story-info'}).findAll('div',{'class':'story-info-bl'})[1].find('a')
|
||||
author = soup1.find('div',{'class':'story-info'}).find_all('div',{'class':'story-info-bl'})[1].find('a')
|
||||
authorurl = author['href']
|
||||
self.story.setMetadata('author', author.string)
|
||||
self.story.setMetadata('authorUrl', authorurl)
|
||||
|
|
@ -123,7 +112,7 @@ class ASexStoriesComAdapter(BaseSiteAdapter):
|
|||
### add it before the rest of the pages, if any
|
||||
self.add_chapter('1', self.url)
|
||||
|
||||
chapterTable = soup1.find('div',{'class':'pages'}).findAll('a')
|
||||
chapterTable = soup1.find('div',{'class':'pages'}).find_all('a')
|
||||
|
||||
if chapterTable is not None:
|
||||
# Multi-chapter story
|
||||
|
|
@ -135,7 +124,7 @@ class ASexStoriesComAdapter(BaseSiteAdapter):
|
|||
self.add_chapter(chapterTitle, chapterUrl)
|
||||
|
||||
|
||||
rated = soup1.find('div',{'class':'story-info'}).findAll('div',{'class':'story-info-bl5'})[0].find('img')['title'].replace('- Rate','').strip()
|
||||
rated = soup1.find('div',{'class':'story-info'}).find_all('div',{'class':'story-info-bl5'})[0].find('img')['title'].replace('- Rate','').strip()
|
||||
self.story.setMetadata('rating',rated)
|
||||
|
||||
self.story.setMetadata('dateUpdated', makeDate('01/01/2001', '%m/%d/%Y'))
|
||||
|
|
@ -148,7 +137,7 @@ class ASexStoriesComAdapter(BaseSiteAdapter):
|
|||
logger.debug('Getting chapter text from <%s>' % url)
|
||||
#logger.info('Getting chapter text from <%s>' % url)
|
||||
|
||||
data1 = self._fetchUrl(url)
|
||||
data1 = self.get_request(url)
|
||||
soup1 = self.make_soup(data1)
|
||||
|
||||
# get story text
|
||||
|
|
|
|||
|
|
@ -25,7 +25,6 @@ from .. import exceptions as exceptions
|
|||
|
||||
# py2 vs py3 transition
|
||||
from ..six import text_type as unicode
|
||||
from ..six.moves.urllib.error import HTTPError
|
||||
|
||||
from .base_adapter import BaseSiteAdapter, makeDate
|
||||
|
||||
|
|
@ -49,7 +48,7 @@ class AshwinderSycophantHexComAdapter(BaseSiteAdapter):
|
|||
|
||||
|
||||
# normalized story URL.
|
||||
self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId'))
|
||||
self._setURL('https://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId'))
|
||||
|
||||
# Each adapter needs to have a unique site abbreviation.
|
||||
self.story.setMetadata('siteabbrev','asph')
|
||||
|
|
@ -65,10 +64,10 @@ class AshwinderSycophantHexComAdapter(BaseSiteAdapter):
|
|||
|
||||
@classmethod
|
||||
def getSiteExampleURLs(cls):
|
||||
return "http://"+cls.getSiteDomain()+"/viewstory.php?sid=1234"
|
||||
return "https://"+cls.getSiteDomain()+"/viewstory.php?sid=1234"
|
||||
|
||||
def getSiteURLPattern(self):
|
||||
return re.escape("http://"+self.getSiteDomain()+"/viewstory.php?sid=")+r"\d+$"
|
||||
return r"https?://"+re.escape(self.getSiteDomain()+"/viewstory.php?sid=")+r"\d+$"
|
||||
|
||||
## Login seems to be reasonably standard across eFiction sites.
|
||||
def needToLoginCheck(self, data):
|
||||
|
|
@ -93,11 +92,11 @@ class AshwinderSycophantHexComAdapter(BaseSiteAdapter):
|
|||
params['intent'] = ''
|
||||
params['submit'] = 'Submit'
|
||||
|
||||
loginUrl = 'http://' + self.getSiteDomain() + '/user.php'
|
||||
loginUrl = 'https://' + self.getSiteDomain() + '/user.php'
|
||||
logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
|
||||
params['penname']))
|
||||
|
||||
d = self._fetchUrl(loginUrl, params)
|
||||
d = self.post_request(loginUrl, params)
|
||||
|
||||
if "Logout" not in d : #Member Account
|
||||
logger.info("Failed to login to URL %s as %s" % (loginUrl,
|
||||
|
|
@ -114,45 +113,37 @@ class AshwinderSycophantHexComAdapter(BaseSiteAdapter):
|
|||
url = self.url
|
||||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
except HTTPError as e:
|
||||
if e.code == 404:
|
||||
raise exceptions.StoryDoesNotExist(self.url)
|
||||
else:
|
||||
raise e
|
||||
data = self.get_request(url)
|
||||
|
||||
if self.needToLoginCheck(data):
|
||||
# need to log in for this one.
|
||||
self.performLogin(url)
|
||||
data = self._fetchUrl(url)
|
||||
data = self.get_request(url)
|
||||
|
||||
if "Access denied. This story has not been validated by the adminstrators of this site." in data:
|
||||
raise exceptions.AccessDenied(self.getSiteDomain() +" says: Access denied. This story has not been validated by the adminstrators of this site.")
|
||||
|
||||
# use BeautifulSoup HTML parser to make everything easier to find.
|
||||
soup = self.make_soup(data)
|
||||
# print data
|
||||
|
||||
# Now go hunting for all the meta data and the chapter list.
|
||||
|
||||
# Find authorid and URL from... author url.
|
||||
a = soup.find('a', href=re.compile(r"viewuser.php\?uid=\d+"))
|
||||
self.story.setMetadata('authorId',a['href'].split('=')[1])
|
||||
self.story.setMetadata('authorUrl','http://'+self.host+'/'+a['href'])
|
||||
self.story.setMetadata('authorUrl','https://'+self.host+'/'+a['href'])
|
||||
self.story.setMetadata('author',a.string)
|
||||
asoup = self.make_soup(self._fetchUrl(self.story.getMetadata('authorUrl')))
|
||||
asoup = self.make_soup(self.get_request(self.story.getMetadata('authorUrl')))
|
||||
|
||||
try:
|
||||
# in case link points somewhere other than the first chapter
|
||||
a = soup.findAll('option')[1]['value']
|
||||
a = soup.find_all('option')[1]['value']
|
||||
self.story.setMetadata('storyId',a.split('=',)[1])
|
||||
url = 'http://'+self.host+'/'+a
|
||||
soup = self.make_soup(self._fetchUrl(url))
|
||||
url = 'https://'+self.host+'/'+a
|
||||
soup = self.make_soup(self.get_request(url))
|
||||
except:
|
||||
pass
|
||||
|
||||
for info in asoup.findAll('table', {'width' : '100%', 'bordercolor' : re.compile(r'#')}):
|
||||
for info in asoup.find_all('table', {'width' : '100%', 'bordercolor' : re.compile(r'#')}):
|
||||
a = info.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$"))
|
||||
if a != None:
|
||||
self.story.setMetadata('title',stripHTML(a))
|
||||
|
|
@ -160,13 +151,13 @@ class AshwinderSycophantHexComAdapter(BaseSiteAdapter):
|
|||
|
||||
|
||||
# Find the chapters:
|
||||
chapters=soup.findAll('a', href=re.compile(r'viewstory.php\?sid=\d+&i=1$'))
|
||||
chapters=soup.find_all('a', href=re.compile(r'viewstory.php\?sid=\d+&i=1$'))
|
||||
if len(chapters) == 0:
|
||||
self.add_chapter(self.story.getMetadata('title'),url)
|
||||
else:
|
||||
for chapter in chapters:
|
||||
# just in case there's tags, like <i> in chapter titles.
|
||||
self.add_chapter(chapter,'http://'+self.host+'/'+chapter['href'])
|
||||
self.add_chapter(chapter,'https://'+self.host+'/'+chapter['href'])
|
||||
|
||||
|
||||
# eFiction sites don't help us out a lot with their meta data
|
||||
|
|
@ -179,7 +170,7 @@ class AshwinderSycophantHexComAdapter(BaseSiteAdapter):
|
|||
except:
|
||||
return ""
|
||||
|
||||
cats = info.findAll('a',href=re.compile('categories.php'))
|
||||
cats = info.find_all('a',href=re.compile('categories.php'))
|
||||
for cat in cats:
|
||||
self.story.addToList('category',cat.string)
|
||||
|
||||
|
|
@ -197,7 +188,7 @@ class AshwinderSycophantHexComAdapter(BaseSiteAdapter):
|
|||
## <td><span class="sb"><b>Published:</b> 04/08/2007</td>
|
||||
|
||||
## one story had <b>Updated...</b> in the description. Restrict to sub-table
|
||||
labels = info.find('table').findAll('b')
|
||||
labels = info.find('table').find_all('b')
|
||||
for labelspan in labels:
|
||||
value = labelspan.nextSibling
|
||||
label = stripHTML(labelspan)
|
||||
|
|
@ -240,7 +231,7 @@ class AshwinderSycophantHexComAdapter(BaseSiteAdapter):
|
|||
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
data = self._fetchUrl(url)
|
||||
data = self.get_request(url)
|
||||
|
||||
soup = self.make_soup(data) # some chapters seem to be hanging up on those tags, so it is safer to close them
|
||||
|
||||
|
|
|
|||
|
|
@ -9,8 +9,6 @@ from ..htmlcleanup import stripHTML
|
|||
from .. import exceptions as exceptions
|
||||
|
||||
# py2 vs py3 transition
|
||||
from ..six import text_type as unicode
|
||||
from ..six.moves.urllib.error import HTTPError
|
||||
|
||||
from .base_adapter import BaseSiteAdapter, makeDate
|
||||
|
||||
|
|
@ -63,7 +61,7 @@ class AsianFanFicsComAdapter(BaseSiteAdapter):
|
|||
def getSiteURLPattern(self):
|
||||
return r"https?://"+re.escape(self.getSiteDomain())+r"/story/view/0*(?P<id>\d+)"
|
||||
|
||||
def performLogin(self, url, soup):
|
||||
def performLogin(self, url, data):
|
||||
params = {}
|
||||
if self.password:
|
||||
params['username'] = self.username
|
||||
|
|
@ -76,29 +74,30 @@ class AsianFanFicsComAdapter(BaseSiteAdapter):
|
|||
raise exceptions.FailedToLogin(url,params['username'])
|
||||
|
||||
params['from_url'] = url
|
||||
params['csrf_aff_token'] = soup.find('input',{'name':'csrf_aff_token'})['value']
|
||||
if not params['csrf_aff_token']:
|
||||
raise exceptions.FailedToDownload('Error when logging in. This usually means a change in the website code.')
|
||||
# capture token from JS script, not appearing in form now.
|
||||
csrf_token_search = 'csrfToken = "'
|
||||
params['csrf_aff_token'] = data[data.index(csrf_token_search)+len(csrf_token_search):]
|
||||
params['csrf_aff_token'] = params['csrf_aff_token'][:params['csrf_aff_token'].index('"')]
|
||||
|
||||
loginUrl = 'https://' + self.getSiteDomain() + '/login/index'
|
||||
logger.info("Will now login to URL (%s) as (%s)" % (loginUrl, params['username']))
|
||||
|
||||
data = self._postUrl(loginUrl, params)
|
||||
data = self.post_request(loginUrl, params)
|
||||
soup = self.make_soup(data)
|
||||
if self.loginNeededCheck(soup):
|
||||
if self.loginNeededCheck(data):
|
||||
logger.info('Failed to login to URL %s as %s' % (loginUrl, params['username']))
|
||||
raise exceptions.FailedToLogin(url,params['username'])
|
||||
|
||||
def loginNeededCheck(self,soup):
|
||||
return soup.find('div',{'id':'login'}) != None
|
||||
def loginNeededCheck(self,data):
|
||||
return "isLoggedIn = false" in data
|
||||
|
||||
def doStorySubscribe(self, url, soup):
|
||||
subHref = soup.find('a',{'id':'subscribe'})
|
||||
if subHref:
|
||||
#does not work when using https - 403
|
||||
subUrl = 'http://' + self.getSiteDomain() + subHref['href']
|
||||
self._fetchUrl(subUrl)
|
||||
data = self._fetchUrl(url,usecache=False)
|
||||
self.get_request(subUrl)
|
||||
data = self.get_request(url,usecache=False)
|
||||
soup = self.make_soup(data)
|
||||
check = soup.find('div',{'class':'click-to-read-full'})
|
||||
if check:
|
||||
|
|
@ -108,37 +107,29 @@ class AsianFanFicsComAdapter(BaseSiteAdapter):
|
|||
else:
|
||||
return False
|
||||
|
||||
def use_pagecache(self):
|
||||
'''
|
||||
adapters that will work with the page cache need to implement
|
||||
this and change it to True.
|
||||
'''
|
||||
return True
|
||||
|
||||
## Getting the chapter list and the meta data, plus 'is adult' checking.
|
||||
def doExtractChapterUrlsAndMetadata(self,get_cover=True):
|
||||
url = self.url
|
||||
logger.info("url: "+url)
|
||||
soup = None
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
data = self.get_request(url)
|
||||
soup = self.make_soup(data)
|
||||
except exceptions.HTTPErrorFFF as e:
|
||||
if e.status_code != 404:
|
||||
raise
|
||||
data = self.decode_data(e.data)
|
||||
|
||||
except HTTPError as e:
|
||||
if e.code == 404:
|
||||
raise exceptions.StoryDoesNotExist(self.url)
|
||||
else:
|
||||
raise e
|
||||
|
||||
# use BeautifulSoup HTML parser to make everything easier to find.
|
||||
soup = self.make_soup(data)
|
||||
|
||||
if self.loginNeededCheck(soup):
|
||||
# logger.debug(data)
|
||||
if not soup or self.loginNeededCheck(data):
|
||||
# always login if not already to avoid lots of headaches
|
||||
self.performLogin(url,soup)
|
||||
self.performLogin(url,data)
|
||||
# refresh website after logging in
|
||||
data = self._fetchUrl(url,usecache=False)
|
||||
data = self.get_request(url,usecache=False)
|
||||
soup = self.make_soup(data)
|
||||
|
||||
# subscription check
|
||||
# logger.debug(soup)
|
||||
subCheck = soup.find('div',{'class':'click-to-read-full'})
|
||||
if subCheck and self.getConfig("auto_sub"):
|
||||
subSoup = self.doStorySubscribe(url,soup)
|
||||
|
|
@ -155,8 +146,8 @@ class AsianFanFicsComAdapter(BaseSiteAdapter):
|
|||
|
||||
# Find authorid and URL from... author url.
|
||||
mainmeta = soup.find('footer', {'class': 'main-meta'})
|
||||
alist = mainmeta.find('span', text='Author(s)')
|
||||
alist = alist.parent.findAll('a', href=re.compile(r"/profile/view/\d+"))
|
||||
alist = mainmeta.find('span', string='Author(s)')
|
||||
alist = alist.parent.find_all('a', href=re.compile(r"/profile/u/[^/]+"))
|
||||
for a in alist:
|
||||
self.story.addToList('authorId',a['href'].split('/')[-1])
|
||||
self.story.addToList('authorUrl','https://'+self.host+a['href'])
|
||||
|
|
@ -166,16 +157,23 @@ class AsianFanFicsComAdapter(BaseSiteAdapter):
|
|||
self.newestChapterNum = None
|
||||
# Find the chapters:
|
||||
chapters=soup.find('select',{'name':'chapter-nav'})
|
||||
chapters=chapters.findAll('option')
|
||||
self.story.setMetadata('numChapters',len(chapters))
|
||||
hrefattr=None
|
||||
if chapters:
|
||||
chapters=chapters.find_all('option')
|
||||
hrefattr='value'
|
||||
else: # didn't find <select name='chapter-nav', look for alternative
|
||||
chapters=soup.find('div',{'class':'widget--chapters'}).find_all('a')
|
||||
hrefattr='href'
|
||||
for index, chapter in enumerate(chapters):
|
||||
if chapter.text != 'Foreword': # skip the foreword
|
||||
self.add_chapter(chapter.text,'https://' + self.getSiteDomain() + chapter['value']) # note: AFF cuts off chapter names in list. this gets kind of fixed later on
|
||||
if chapter.text != 'Foreword' and 'Collapse chapters' not in chapter.text:
|
||||
self.add_chapter(chapter.text,'https://' + self.getSiteDomain() + chapter[hrefattr])
|
||||
# note: AFF cuts off chapter names in list. this gets kind of fixed later on
|
||||
|
||||
|
||||
# find timestamp
|
||||
a = soup.find('span', text='Updated')
|
||||
a = soup.find('span', string='Updated')
|
||||
if a == None:
|
||||
a = soup.find('span', text='Published') # use published date if work was never updated
|
||||
a = soup.find('span', string='Published') # use published date if work was never updated
|
||||
a = a.parent.find('time')
|
||||
chapterDate = makeDate(a['datetime'],self.dateformat)
|
||||
if newestChapter == None or chapterDate > newestChapter:
|
||||
|
|
@ -183,89 +181,110 @@ class AsianFanFicsComAdapter(BaseSiteAdapter):
|
|||
self.newestChapterNum = index
|
||||
|
||||
# story status
|
||||
a = mainmeta.find('span', text='Completed')
|
||||
a = mainmeta.find('span', string='Completed')
|
||||
if a:
|
||||
self.story.setMetadata('status', 'Completed')
|
||||
else:
|
||||
self.story.setMetadata('status', 'In-Progress')
|
||||
|
||||
# story description
|
||||
jsonlink = soup.find('link',href=re.compile(r'/api/forewords/[0-9]+/foreword_[0-9a-z]+.json'))
|
||||
fore_json = json.loads(self._fetchUrl(jsonlink['href']))
|
||||
content = self.make_soup(fore_json['post']).find('body') # BS4 adds <html><body> if not present.
|
||||
a = content.find('div', {'id':'story-description'})
|
||||
try:
|
||||
jsonlink = soup.find('script',string=re.compile(r'/api/forewords/[0-9]+/foreword_[0-9a-z]+.json')).get_text().split('"')[1] # grabs url from quotation marks
|
||||
fore_json = json.loads(self.get_request(jsonlink))
|
||||
content = self.make_soup(fore_json['post']).find('body') # BS4 adds <html><body> if not present.
|
||||
a = content.find('div', {'id':'story-description'})
|
||||
except:
|
||||
# not all stories have foreward link.
|
||||
a = soup.find('div', {'id':'story-description'})
|
||||
if a:
|
||||
self.setDescription(url,a)
|
||||
|
||||
# story tags
|
||||
a = mainmeta.find('span',text='Tags')
|
||||
a = mainmeta.find('span',string='Tags')
|
||||
if a:
|
||||
tags = a.parent.findAll('a')
|
||||
tags = a.parent.find_all('a')
|
||||
for tag in tags:
|
||||
self.story.addToList('tags', tag.text)
|
||||
|
||||
# story tags
|
||||
a = mainmeta.find('span',text='Characters')
|
||||
a = mainmeta.find('span',string='Characters')
|
||||
if a:
|
||||
self.story.addToList('characters', a.nextSibling)
|
||||
|
||||
# published on
|
||||
a = soup.find('span', text='Published')
|
||||
a = soup.find('span', string='Published')
|
||||
a = a.parent.find('time')
|
||||
self.story.setMetadata('datePublished', makeDate(a['datetime'], self.dateformat))
|
||||
|
||||
# updated on
|
||||
a = soup.find('span', text='Updated')
|
||||
a = soup.find('span', string='Updated')
|
||||
if a:
|
||||
a = a.parent.find('time')
|
||||
self.story.setMetadata('dateUpdated', makeDate(a['datetime'], self.dateformat))
|
||||
|
||||
# word count
|
||||
a = soup.find('span', text='Total Word Count')
|
||||
a = soup.find('span', string='Total Word Count')
|
||||
if a:
|
||||
a = a.find_next('span')
|
||||
self.story.setMetadata('numWords', int(a.text.split()[0]))
|
||||
|
||||
# upvote, subs, and views
|
||||
a = soup.find('div',{'class':'title-meta'})
|
||||
spans = a.findAll('span', recursive=False)
|
||||
self.story.addToList('upvotes', re.search('\(([^)]+)', spans[0].find('span').text).group(1))
|
||||
self.story.addToList('subscribers', re.search('\(([^)]+)', spans[1].find('span').text).group(1))
|
||||
if enumerate(spans) == 2: # views can be private
|
||||
self.story.addToList('views', spans[2].find('span').text.split()[0])
|
||||
spans = a.find_all('span', recursive=False)
|
||||
self.story.setMetadata('upvotes', re.search(r'\(([^)]+)', spans[0].find('span').text).group(1))
|
||||
self.story.setMetadata('subscribers', re.search(r'\(([^)]+)', spans[1].find('span').text).group(1))
|
||||
if len(spans) > 2: # views can be private
|
||||
self.story.setMetadata('views', spans[2].text.split()[0])
|
||||
|
||||
# cover art in the form of a div before chapter content
|
||||
if get_cover:
|
||||
cover_url = ""
|
||||
a = soup.find('div',{'id':'bodyText'})
|
||||
a = a.find('div',{'class':'text-center'})
|
||||
if a:
|
||||
cover_url = a.find('img')['src']
|
||||
a = a.find('div',{'class':'text-center'})
|
||||
if a:
|
||||
cover_url = a.find('img')['src']
|
||||
self.setCoverImage(url,cover_url)
|
||||
|
||||
# grab the text for an individual chapter
|
||||
def getChapterText(self, url):
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
data = self._fetchUrl(url)
|
||||
data = self.get_request(url)
|
||||
soup = self.make_soup(data)
|
||||
# logger.debug(data)
|
||||
|
||||
try:
|
||||
# https://www.asianfanfics.com/api/chapters/4791923/chapter_46d32e413d1a702a26f7637eabbfb6f3.json
|
||||
jsonlink = soup.find('link',href=re.compile(r'/api/chapters/[0-9]+/chapter_[0-9a-z]+.json'))
|
||||
chap_json = json.loads(self._fetchUrl(jsonlink['href']))
|
||||
content = self.make_soup(chap_json['post']).find('body') # BS4 adds <html><body> if not present.
|
||||
content.name='div' # change body to a div.
|
||||
if self.getConfig('inject_chapter_title'):
|
||||
# the dumbest workaround ever for the abbreviated chapter titles from before
|
||||
logger.debug("Injecting full-length chapter title")
|
||||
newTitle = soup.find('h1', {'id' : 'chapter-title'}).text
|
||||
newTitle = self.make_soup('<h3>%s</h3>' % (newTitle)).find('body') # BS4 adds <html><body> if not present.
|
||||
newTitle.name='div' # change body to a div.
|
||||
newTitle.append(content)
|
||||
return self.utf8FromSoup(url,newTitle)
|
||||
else:
|
||||
return self.utf8FromSoup(url,content)
|
||||
except Exception as e:
|
||||
raise exceptions.FailedToDownload("Error downloading Chapter: %s %s!" % (url,e))
|
||||
ageform = soup.select_one('form[action="/account/toggle_age"]')
|
||||
# logger.debug(ageform)
|
||||
if ageform and (self.is_adult or self.getConfig("is_adult")):
|
||||
params = {}
|
||||
params['is_of_age']=ageform.select_one('input#is_of_age')['value']
|
||||
params['current_url']=ageform.select_one('input#current_url')['value']
|
||||
params['csrf_aff_token']=ageform.select_one('input[name="csrf_aff_token"]')['value']
|
||||
loginUrl = 'https://' + self.getSiteDomain() + '/account/mark_over_18'
|
||||
logger.info("Will now toggle age to URL (%s)" % (loginUrl))
|
||||
# logger.debug(params)
|
||||
data = self.post_request(loginUrl, params)
|
||||
soup = self.make_soup(data)
|
||||
# logger.debug(data)
|
||||
|
||||
content = soup.find('div', {'id': 'user-submitted-body'})
|
||||
|
||||
if self.getConfig('inject_chapter_image'):
|
||||
logger.debug("Injecting chapter image")
|
||||
imgdiv = soup.select_one('div#bodyText div.bot-spacer')
|
||||
if imgdiv:
|
||||
content.insert(0, "\n")
|
||||
content.insert(0, imgdiv)
|
||||
content.insert(0, "\n")
|
||||
|
||||
if self.getConfig('inject_chapter_title'):
|
||||
logger.debug("Injecting full-length chapter title")
|
||||
title = soup.find('h1', {'id' : 'chapter-title'}).text
|
||||
newTitle = soup.new_tag('h3')
|
||||
newTitle.string = title
|
||||
content.insert(0, "\n")
|
||||
content.insert(0, newTitle)
|
||||
content.insert(0, "\n")
|
||||
|
||||
return self.utf8FromSoup(url,content)
|
||||
|
|
|
|||
|
|
@ -50,7 +50,6 @@ import time
|
|||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import re
|
||||
import urllib
|
||||
import sys
|
||||
from bs4 import Comment
|
||||
from ..htmlcleanup import stripHTML
|
||||
|
|
@ -59,7 +58,6 @@ from .. import exceptions as exceptions
|
|||
# py2 vs py3 transition
|
||||
from ..six import text_type as unicode
|
||||
from ..six.moves.urllib import parse as urlparse
|
||||
from ..six.moves.urllib.error import HTTPError
|
||||
|
||||
from .base_adapter import BaseSiteAdapter, makeDate
|
||||
|
||||
|
|
@ -99,28 +97,14 @@ class BDSMLibraryComSiteAdapter(BaseSiteAdapter):
|
|||
def getSiteURLPattern(self):
|
||||
return r"https?://"+re.escape(self.getSiteDomain()+"/stories/story.php?storyid=")+r"\d+$"
|
||||
|
||||
def use_pagecache(self):
|
||||
'''
|
||||
adapters that will work with the page cache need to implement
|
||||
this and change it to True.
|
||||
'''
|
||||
return True
|
||||
|
||||
def extractChapterUrlsAndMetadata(self):
|
||||
if not (self.is_adult or self.getConfig("is_adult")):
|
||||
raise exceptions.AdultCheckRequired(self.url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(self.url)
|
||||
soup = self.make_soup(data)
|
||||
except HTTPError as e:
|
||||
if e.code == 404:
|
||||
raise exceptions.StoryDoesNotExist(self.url)
|
||||
else:
|
||||
raise e
|
||||
|
||||
data = self.get_request(self.url)
|
||||
if 'The story does not exist' in data:
|
||||
raise exceptions.StoryDoesNotExist(self.url)
|
||||
soup = self.make_soup(data)
|
||||
|
||||
# Extract metadata
|
||||
title=soup.title.text.replace('BDSM Library - Story: ','').replace('\\','')
|
||||
|
|
@ -128,34 +112,21 @@ class BDSMLibraryComSiteAdapter(BaseSiteAdapter):
|
|||
|
||||
# Author
|
||||
author = soup.find('a', href=re.compile(r"/stories/author.php\?authorid=\d+"))
|
||||
i = 0
|
||||
while author == None:
|
||||
time.sleep(1)
|
||||
logger.warning('A problem retrieving the author information. Trying Again')
|
||||
try:
|
||||
data = self._fetchUrl(self.url)
|
||||
soup = self.make_soup(data)
|
||||
except HTTPError as e:
|
||||
if e.code == 404:
|
||||
raise exceptions.StoryDoesNotExist(self.url)
|
||||
else:
|
||||
raise e
|
||||
author = soup.find('a', href=re.compile(r"/stories/author.php\?authorid=\d+"))
|
||||
i += 1
|
||||
if i == 20:
|
||||
logger.info('Too Many cycles... exiting')
|
||||
sys.exit()
|
||||
|
||||
|
||||
authorurl = urlparse.urljoin(self.url, author['href'])
|
||||
self.story.setMetadata('author', author.text)
|
||||
self.story.setMetadata('authorUrl', authorurl)
|
||||
authorid = author['href'].split('=')[1]
|
||||
self.story.setMetadata('authorId', authorid)
|
||||
if author:
|
||||
authorurl = urlparse.urljoin(self.url, author['href'])
|
||||
self.story.setMetadata('author', author.text)
|
||||
self.story.setMetadata('authorUrl', authorurl)
|
||||
authorid = author['href'].split('=')[1]
|
||||
self.story.setMetadata('authorId', authorid)
|
||||
else:
|
||||
logger.info("Failed to find Author, setting to Anonymous")
|
||||
self.story.setMetadata('author','Anonymous')
|
||||
self.story.setMetadata('authorUrl','https://' + self.getSiteDomain() + '/')
|
||||
self.story.setMetadata('authorId','0')
|
||||
|
||||
# Find the chapters:
|
||||
# The update date is with the chapter links... so we will update it here as well
|
||||
for chapter in soup.findAll('a', href=re.compile(r'/stories/chapter.php\?storyid='+self.story.getMetadata('storyId')+"&chapterid=\d+$")):
|
||||
for chapter in soup.find_all('a', href=re.compile(r'/stories/chapter.php\?storyid='+self.story.getMetadata('storyId')+r"&chapterid=\d+$")):
|
||||
value = chapter.findNext('td').findNext('td').string.replace('(added on','').replace(')','').strip()
|
||||
self.story.setMetadata('dateUpdated', makeDate(value, self.dateformat))
|
||||
self.add_chapter(chapter,'https://'+self.getSiteDomain()+chapter['href'])
|
||||
|
|
@ -163,11 +134,11 @@ class BDSMLibraryComSiteAdapter(BaseSiteAdapter):
|
|||
|
||||
# Get the MetaData
|
||||
# Erotia Tags
|
||||
tags = soup.findAll('a',href=re.compile(r'/stories/search.php\?selectedcode'))
|
||||
tags = soup.find_all('a',href=re.compile(r'/stories/search.php\?selectedcode'))
|
||||
for tag in tags:
|
||||
self.story.addToList('eroticatags',tag.text)
|
||||
|
||||
for td in soup.findAll('td'):
|
||||
for td in soup.find_all('td'):
|
||||
if len(td.text)>0:
|
||||
if 'Added on:' in td.text and '<table' not in unicode(td):
|
||||
value = td.text.replace('Added on:','').strip()
|
||||
|
|
@ -187,7 +158,7 @@ class BDSMLibraryComSiteAdapter(BaseSiteAdapter):
|
|||
#Since each chapter is on 1 page, we don't need to do anything special, just get the content of the page.
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
soup = self.make_soup(self._fetchUrl(url))
|
||||
soup = self.make_soup(self.get_request(url))
|
||||
chaptertag = soup.find('div',{'class' : 'storyblock'})
|
||||
|
||||
# Some of the stories have the chapters in <pre> sections, so have to check for that
|
||||
|
|
@ -198,20 +169,20 @@ class BDSMLibraryComSiteAdapter(BaseSiteAdapter):
|
|||
raise exceptions.FailedToDownload("Error downloading Chapter: {0}! Missing required element!".format(url))
|
||||
|
||||
#strip comments from soup
|
||||
[comment.extract() for comment in chaptertag.findAll(text=lambda text:isinstance(text, Comment))]
|
||||
[comment.extract() for comment in chaptertag.find_all(string=lambda text:isinstance(text, Comment))]
|
||||
|
||||
# BDSM Library basically wraps it's own html around the document,
|
||||
# so we will be removing the script, title and meta content from the
|
||||
# storyblock
|
||||
for tag in chaptertag.findAll('head') + chaptertag.findAll('style') + chaptertag.findAll('title') + chaptertag.findAll('meta') + chaptertag.findAll('o:p') + chaptertag.findAll('link'):
|
||||
for tag in chaptertag.find_all('head') + chaptertag.find_all('style') + chaptertag.find_all('title') + chaptertag.find_all('meta') + chaptertag.find_all('o:p') + chaptertag.find_all('link'):
|
||||
tag.extract()
|
||||
|
||||
for tag in chaptertag.findAll('o:smarttagtype'):
|
||||
for tag in chaptertag.find_all('o:smarttagtype'):
|
||||
tag.name = 'span'
|
||||
|
||||
## I'm going to take the attributes off all of the tags
|
||||
## because they usually refer to the style that we removed above.
|
||||
for tag in chaptertag.findAll(True):
|
||||
for tag in chaptertag.find_all(True):
|
||||
tag.attrs = None
|
||||
|
||||
return self.utf8FromSoup(url,chaptertag)
|
||||
|
|
|
|||
|
|
@ -1,17 +1,13 @@
|
|||
from __future__ import absolute_import
|
||||
from datetime import timedelta
|
||||
import re
|
||||
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
from bs4 import BeautifulSoup
|
||||
from ..htmlcleanup import stripHTML
|
||||
|
||||
# py2 vs py3 transition
|
||||
from ..six import text_type as unicode
|
||||
from ..six.moves.urllib import parse as urlparse
|
||||
from ..six.moves.urllib.error import HTTPError
|
||||
|
||||
from .base_adapter import BaseSiteAdapter, makeDate
|
||||
from .. import exceptions
|
||||
|
|
@ -47,19 +43,6 @@ class BloodshedverseComAdapter(BaseSiteAdapter):
|
|||
self._setURL(self.READ_URL_TEMPLATE % story_no)
|
||||
self.story.setMetadata('siteabbrev', self.SITE_ABBREVIATION)
|
||||
|
||||
def _customized_fetch_url(self, url, exception=None, parameters=None):
|
||||
if exception:
|
||||
try:
|
||||
data = self._fetchUrl(url, parameters)
|
||||
except HTTPError:
|
||||
raise exception(self.url)
|
||||
# Just let self._fetchUrl throw the exception, don't catch and
|
||||
# customize it.
|
||||
else:
|
||||
data = self._fetchUrl(url, parameters)
|
||||
|
||||
return self.make_soup(data)
|
||||
|
||||
@staticmethod
|
||||
def getSiteDomain():
|
||||
return BloodshedverseComAdapter.SITE_DOMAIN
|
||||
|
|
@ -78,7 +61,8 @@ class BloodshedverseComAdapter(BaseSiteAdapter):
|
|||
|
||||
def extractChapterUrlsAndMetadata(self):
|
||||
logger.debug("URL: "+self.url)
|
||||
soup = self._customized_fetch_url(self.url)
|
||||
|
||||
soup = self.make_soup(self.get_request(self.url))
|
||||
|
||||
# Since no 404 error code we have to raise the exception ourselves.
|
||||
# A title that is just 'by' indicates that there is no author name
|
||||
|
|
@ -105,7 +89,7 @@ class BloodshedverseComAdapter(BaseSiteAdapter):
|
|||
# Get the URL to the author's page and find the correct story entry to
|
||||
# scrape the metadata
|
||||
author_url = urlparse.urljoin(self.url, soup.find('a', {'class': 'headline'})['href'])
|
||||
soup = self._customized_fetch_url(author_url)
|
||||
soup = self.make_soup(self.get_request(author_url))
|
||||
|
||||
# Ignore first list_box div, it only contains the author information
|
||||
for list_box in soup('div', {'class': 'list_box'})[1:]:
|
||||
|
|
@ -133,7 +117,7 @@ class BloodshedverseComAdapter(BaseSiteAdapter):
|
|||
|
||||
summary_div = list_box.find('div', {'class': 'list_summary'})
|
||||
if not self.getConfig('keep_summary_html'):
|
||||
summary = ''.join(summary_div(text=True))
|
||||
summary = ''.join(summary_div(string=True))
|
||||
else:
|
||||
summary = self.utf8FromSoup(author_url, summary_div)
|
||||
|
||||
|
|
@ -173,9 +157,6 @@ class BloodshedverseComAdapter(BaseSiteAdapter):
|
|||
|
||||
self.story.addToList('warnings', warning)
|
||||
|
||||
elif key == 'Chapters':
|
||||
self.story.setMetadata('numChapters', int(value))
|
||||
|
||||
elif key == 'Words':
|
||||
# Apparently only numChapters need to be an integer for
|
||||
# some strange reason. Remove possible ',' characters as to
|
||||
|
|
@ -190,12 +171,13 @@ class BloodshedverseComAdapter(BaseSiteAdapter):
|
|||
# ugly %p(am/pm) hack moved into makeDate so other sites can use it.
|
||||
self.story.setMetadata('dateUpdated', date)
|
||||
|
||||
if self.story.getMetadata('rating') == 'NC-17' and not (self.is_adult or self.getConfig('is_adult')):
|
||||
if self.story.getMetadataRaw('rating') == 'NC-17' and not (self.is_adult or self.getConfig('is_adult')):
|
||||
raise exceptions.AdultCheckRequired(self.url)
|
||||
|
||||
def getChapterText(self, url):
|
||||
soup = self._customized_fetch_url(url)
|
||||
storytext_div = soup.find('div', {'class': 'storytext'})
|
||||
soup = self.make_soup(self.get_request(url))
|
||||
storytext_div = soup.find('div', {'class': 'tl'})
|
||||
storytext_div = storytext_div.find('div', {'class': ''})
|
||||
|
||||
if self.getConfig('strip_text_links'):
|
||||
for anchor in storytext_div('a', {'class': 'FAtxtL'}):
|
||||
|
|
|
|||
|
|
@ -1,326 +0,0 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2011 Fanficdownloader team, 2018 FanFicFare team
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
# Software: eFiction
|
||||
from __future__ import absolute_import
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import re
|
||||
from bs4.element import Tag
|
||||
from ..htmlcleanup import stripHTML
|
||||
from .. import exceptions as exceptions
|
||||
|
||||
# py2 vs py3 transition
|
||||
from ..six import text_type as unicode
|
||||
from ..six.moves.urllib.error import HTTPError
|
||||
|
||||
from .base_adapter import BaseSiteAdapter, makeDate
|
||||
|
||||
# By virtue of being recent and requiring both is_adult and user/pass,
|
||||
# adapter_fanficcastletvnet.py is the best choice for learning to
|
||||
# write adapters--especially for sites that use the eFiction system.
|
||||
# Most sites that have ".../viewstory.php?sid=123" in the story URL
|
||||
# are eFiction.
|
||||
|
||||
# For non-eFiction sites, it can be considerably more complex, but
|
||||
# this is still a good starting point.
|
||||
|
||||
# In general an 'adapter' needs to do these five things:
|
||||
|
||||
# - 'Register' correctly with the downloader
|
||||
# - Site Login (if needed)
|
||||
# - 'Are you adult?' check (if needed--some do one, some the other, some both)
|
||||
# - Grab the chapter list
|
||||
# - Grab the story meta-data (some (non-eFiction) adapters have to get it from the author page)
|
||||
# - Grab the chapter texts
|
||||
|
||||
# Search for XXX comments--that's where things are most likely to need changing.
|
||||
|
||||
# This function is called by the downloader in all adapter_*.py files
|
||||
# in this dir to register the adapter class. So it needs to be
|
||||
# updated to reflect the class below it. That, plus getSiteDomain()
|
||||
# take care of 'Registering'.
|
||||
def getClass():
|
||||
return BloodTiesFansComAdapter # XXX
|
||||
|
||||
# Class name has to be unique. Our convention is camel case the
|
||||
# sitename with Adapter at the end. www is skipped.
|
||||
class BloodTiesFansComAdapter(BaseSiteAdapter): # XXX
|
||||
|
||||
def __init__(self, config, url):
|
||||
BaseSiteAdapter.__init__(self, config, url)
|
||||
|
||||
self.is_adult=False
|
||||
|
||||
# get storyId from url--url validation guarantees query is only sid=1234
|
||||
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
|
||||
|
||||
|
||||
# normalized story URL.
|
||||
# XXX Most sites don't have the /fanfic part. Replace all to remove it usually.
|
||||
self._setURL('http://' + self.getSiteDomain() + '/fiction/viewstory.php?sid='+self.story.getMetadata('storyId'))
|
||||
|
||||
# Each adapter needs to have a unique site abbreviation.
|
||||
self.story.setMetadata('siteabbrev','btf') # XXX
|
||||
|
||||
# The date format will vary from site to site.
|
||||
# http://docs.python.org/library/datetime.html#strftime-strptime-behavior
|
||||
self.dateformat = "%d %b %Y" # XXX
|
||||
|
||||
@staticmethod # must be @staticmethod, don't remove it.
|
||||
def getSiteDomain():
|
||||
# The site domain. Does have www here, if it uses it.
|
||||
return 'bloodties-fans.com' # XXX
|
||||
|
||||
@classmethod
|
||||
def getSiteExampleURLs(cls):
|
||||
return "http://"+cls.getSiteDomain()+"/fiction/viewstory.php?sid=1234"
|
||||
|
||||
def getSiteURLPattern(self):
|
||||
return re.escape("http://"+self.getSiteDomain()+"/fiction/viewstory.php?sid=")+r"\d+$"
|
||||
|
||||
## Login seems to be reasonably standard across eFiction sites.
|
||||
def needToLoginCheck(self, data):
|
||||
if 'Registered Users Only' in data \
|
||||
or 'There is no such account on our website' in data \
|
||||
or "That password doesn't match the one in our database" in data:
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
|
||||
def performLogin(self, url):
|
||||
params = {}
|
||||
|
||||
if self.password:
|
||||
params['penname'] = self.username
|
||||
params['password'] = self.password
|
||||
else:
|
||||
params['penname'] = self.getConfig("username")
|
||||
params['password'] = self.getConfig("password")
|
||||
params['cookiecheck'] = '1'
|
||||
params['submit'] = 'Submit'
|
||||
|
||||
loginUrl = 'http://' + self.getSiteDomain() + '/fiction/user.php?action=login'
|
||||
logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
|
||||
params['penname']))
|
||||
|
||||
d = self._fetchUrl(loginUrl, params)
|
||||
|
||||
if "Member Account" not in d : #Member Account
|
||||
logger.info("Failed to login to URL %s as %s" % (loginUrl,
|
||||
params['penname']))
|
||||
raise exceptions.FailedToLogin(url,params['penname'])
|
||||
return False
|
||||
else:
|
||||
return True
|
||||
|
||||
## Getting the chapter list and the meta data, plus 'is adult' checking.
|
||||
def extractChapterUrlsAndMetadata(self):
|
||||
|
||||
if self.is_adult or self.getConfig("is_adult"):
|
||||
# Weirdly, different sites use different warning numbers.
|
||||
# If the title search below fails, there's a good chance
|
||||
# you need a different number. print data at that point
|
||||
# and see what the 'click here to continue' url says.
|
||||
|
||||
# Furthermore, there's a couple sites now with more than
|
||||
# one warning level for different ratings. And they're
|
||||
# fussy about it. midnightwhispers has three: 4, 2 & 1.
|
||||
# we'll try 1 first.
|
||||
addurl = "&ageconsent=ok&warning=4" # XXX
|
||||
else:
|
||||
addurl=""
|
||||
|
||||
# index=1 makes sure we see the story chapter index. Some
|
||||
# sites skip that for one-chapter stories.
|
||||
url = self.url+'&index=1'+addurl
|
||||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
except HTTPError as e:
|
||||
if e.code == 404:
|
||||
raise exceptions.StoryDoesNotExist(self.url)
|
||||
else:
|
||||
raise e
|
||||
|
||||
# The actual text that is used to announce you need to be an
|
||||
# adult varies from site to site. Again, print data before
|
||||
# the title search to troubleshoot.
|
||||
|
||||
# Since the warning text can change by warning level, let's
|
||||
# look for the warning pass url. nfacommunity uses
|
||||
# &warning= -- actually, so do other sites. Must be an
|
||||
# eFiction book.
|
||||
|
||||
# viewstory.php?sid=561&warning=4
|
||||
# viewstory.php?sid=561&warning=1
|
||||
# viewstory.php?sid=561&warning=2
|
||||
#print data
|
||||
#m = re.search(r"'viewstory.php\?sid=1882(&warning=4)'",data)
|
||||
m = re.search(r"'viewstory.php\?sid=\d+((?:&ageconsent=ok)?&warning=\d+)'",data)
|
||||
if m != None:
|
||||
if self.is_adult or self.getConfig("is_adult"):
|
||||
# We tried the default and still got a warning, so
|
||||
# let's pull the warning number from the 'continue'
|
||||
# link and reload data.
|
||||
addurl = m.group(1)
|
||||
# correct stupid & error in url.
|
||||
addurl = addurl.replace("&","&")
|
||||
url = self.url+'&index=1'+addurl
|
||||
logger.debug("URL 2nd try: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
except HTTPError as e:
|
||||
if e.code == 404:
|
||||
raise exceptions.StoryDoesNotExist(self.url)
|
||||
else:
|
||||
raise e
|
||||
else:
|
||||
raise exceptions.AdultCheckRequired(self.url)
|
||||
|
||||
if "Access denied. This story has not been validated by the adminstrators of this site." in data:
|
||||
raise exceptions.AccessDenied(self.getSiteDomain() +" says: Access denied. This story has not been validated by the adminstrators of this site.")
|
||||
|
||||
# use BeautifulSoup HTML parser to make everything easier to find.
|
||||
soup = self.make_soup(data)
|
||||
# print data
|
||||
|
||||
# Now go hunting for all the meta data and the chapter list.
|
||||
|
||||
## Title
|
||||
a = soup.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$"))
|
||||
self.story.setMetadata('title',stripHTML(a))
|
||||
|
||||
# Find authorid and URL from... author url.
|
||||
a = soup.find('a', href=re.compile(r"viewuser.php\?uid=\d+"))
|
||||
self.story.setMetadata('authorId',a['href'].split('=')[1])
|
||||
self.story.setMetadata('authorUrl','http://'+self.host+'/fiction/'+a['href'])
|
||||
self.story.setMetadata('author',a.string)
|
||||
|
||||
# Find the chapters:
|
||||
for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"&chapter=\d+$")):
|
||||
# just in case there's tags, like <i> in chapter titles.
|
||||
self.add_chapter(chapter,'http://'+self.host+'/fiction/'+chapter['href']+addurl)
|
||||
|
||||
|
||||
# eFiction sites don't help us out a lot with their meta data
|
||||
# formating, so it's a little ugly.
|
||||
|
||||
# utility method
|
||||
def defaultGetattr(d,k):
|
||||
try:
|
||||
return d[k]
|
||||
except:
|
||||
return ""
|
||||
|
||||
listbox = soup.find('div',{'class':'listbox'})
|
||||
# <strong>Rating:</strong> M<br /> etc
|
||||
labels = listbox.findAll('strong')
|
||||
for labelspan in labels:
|
||||
value = labelspan.nextSibling
|
||||
label = labelspan.string
|
||||
|
||||
if 'Summary' in label:
|
||||
## Everything until the next strong tag.
|
||||
svalue = ""
|
||||
while not isinstance(value,Tag) or value.name != 'strong':
|
||||
svalue += unicode(value)
|
||||
value = value.nextSibling
|
||||
self.setDescription(url,svalue)
|
||||
#self.story.setMetadata('description',stripHTML(svalue))
|
||||
|
||||
if 'Rating' in label:
|
||||
self.story.setMetadata('rating', value)
|
||||
|
||||
if 'Words' in label:
|
||||
value=re.sub(r"\|",r"",value)
|
||||
self.story.setMetadata('numWords', value)
|
||||
|
||||
if 'Categories' in label:
|
||||
cats = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=categories'))
|
||||
catstext = [cat.string for cat in cats]
|
||||
for cat in catstext:
|
||||
self.story.addToList('category',cat.string)
|
||||
|
||||
if 'Characters' in label:
|
||||
chars = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=characters'))
|
||||
charstext = [char.string for char in chars]
|
||||
for char in charstext:
|
||||
self.story.addToList('characters',char.string)
|
||||
|
||||
if 'Completed' in label:
|
||||
if 'Yes' in value:
|
||||
self.story.setMetadata('status', 'Completed')
|
||||
else:
|
||||
self.story.setMetadata('status', 'In-Progress')
|
||||
|
||||
if 'Published' in label:
|
||||
value=re.sub(r"\|",r"",value)
|
||||
self.story.setMetadata('datePublished', makeDate(stripHTML(value), self.dateformat))
|
||||
|
||||
if 'Updated' in label:
|
||||
value=re.sub(r"\|",r"",value)
|
||||
self.story.setMetadata('dateUpdated', makeDate(stripHTML(value), self.dateformat))
|
||||
|
||||
# moved outside because they changed *most*, but not *all* labels to <strong>
|
||||
ships = listbox.findAll('a',href=re.compile(r'browse.php.type=class&(amp;)?type_id=2')) # crappy html: & vs & in url.
|
||||
shipstext = [ship.string for ship in ships]
|
||||
for ship in shipstext:
|
||||
self.story.addToList('ships',ship.string)
|
||||
|
||||
genres = listbox.findAll('a',href=re.compile(r'browse.php\?type=class&(amp;)?type_id=1')) # crappy html: & vs & in url.
|
||||
genrestext = [genre.string for genre in genres]
|
||||
for genre in genrestext:
|
||||
self.story.addToList('genre',genre.string)
|
||||
|
||||
|
||||
try:
|
||||
# Find Series name from series URL.
|
||||
a = soup.find('a', href=re.compile(r"viewseries.php\?seriesid=\d+"))
|
||||
series_name = a.string
|
||||
series_url = 'http://'+self.host+'/fiction/'+a['href']
|
||||
|
||||
# use BeautifulSoup HTML parser to make everything easier to find.
|
||||
seriessoup = self.make_soup(self._fetchUrl(series_url))
|
||||
storyas = seriessoup.findAll('a', href=re.compile(r'^viewstory.php\?sid=\d+$'))
|
||||
i=1
|
||||
for a in storyas:
|
||||
if a['href'] == ('viewstory.php?sid='+self.story.getMetadata('storyId')):
|
||||
self.setSeries(series_name, i)
|
||||
self.story.setMetadata('seriesUrl',series_url)
|
||||
break
|
||||
i+=1
|
||||
|
||||
except:
|
||||
# I find it hard to care if the series parsing fails
|
||||
pass
|
||||
|
||||
# grab the text for an individual chapter.
|
||||
def getChapterText(self, url):
|
||||
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
soup = self.make_soup(self._fetchUrl(url))
|
||||
|
||||
div = soup.find('div', {'id' : 'story'})
|
||||
|
||||
if None == div:
|
||||
raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url)
|
||||
|
||||
return self.utf8FromSoup(url,div)
|
||||
|
|
@ -1,295 +0,0 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2018 FanFicFare team
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
# Software: eFiction
|
||||
from __future__ import absolute_import
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import re
|
||||
from ..htmlcleanup import stripHTML
|
||||
from .. import exceptions as exceptions
|
||||
|
||||
# py2 vs py3 transition
|
||||
from ..six import text_type as unicode
|
||||
from ..six.moves.urllib.error import HTTPError
|
||||
|
||||
from .base_adapter import BaseSiteAdapter, makeDate
|
||||
|
||||
def getClass():
|
||||
return BuffyGilesComAdapter
|
||||
|
||||
# Class name has to be unique. Our convention is camel case the
|
||||
# sitename with Adapter at the end. www is skipped.
|
||||
class BuffyGilesComAdapter(BaseSiteAdapter):
|
||||
|
||||
def __init__(self, config, url):
|
||||
BaseSiteAdapter.__init__(self, config, url)
|
||||
|
||||
self.username = "NoneGiven" # if left empty, site doesn't return any message at all.
|
||||
self.password = ""
|
||||
self.is_adult=False
|
||||
|
||||
# get storyId from url--url validation guarantees query is only sid=1234
|
||||
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
|
||||
|
||||
|
||||
# normalized story URL.
|
||||
# XXX Most sites don't have the /efiction part. Replace all to remove it usually.
|
||||
self._setURL('http://' + self.getSiteDomain() + '/efiction/viewstory.php?sid='+self.story.getMetadata('storyId'))
|
||||
|
||||
# Each adapter needs to have a unique site abbreviation.
|
||||
self.story.setMetadata('siteabbrev','bufg')
|
||||
|
||||
# The date format will vary from site to site.
|
||||
# http://docs.python.org/library/datetime.html#strftime-strptime-behavior
|
||||
self.dateformat = "%d/%m/%y"
|
||||
|
||||
@staticmethod # must be @staticmethod, don't remove it.
|
||||
def getSiteDomain():
|
||||
# The site domain. Does have www here, if it uses it.
|
||||
return 'buffygiles.velocitygrass.com'
|
||||
|
||||
@classmethod
|
||||
def getSiteExampleURLs(cls):
|
||||
return "http://"+cls.getSiteDomain()+"/efiction/viewstory.php?sid=1234"
|
||||
|
||||
def getSiteURLPattern(self):
|
||||
return re.escape("http://"+self.getSiteDomain()+"/efiction/viewstory.php?sid=")+r"\d+$"
|
||||
|
||||
## Login seems to be reasonably standard across eFiction sites.
|
||||
def needToLoginCheck(self, data):
|
||||
if 'Registered Users Only' in data \
|
||||
or 'There is no such account on our website' in data \
|
||||
or "That password doesn't match the one in our database" in data:
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
|
||||
def performLogin(self, url):
|
||||
params = {}
|
||||
|
||||
if self.password:
|
||||
params['penname'] = self.username
|
||||
params['password'] = self.password
|
||||
else:
|
||||
params['penname'] = self.getConfig("username")
|
||||
params['password'] = self.getConfig("password")
|
||||
params['cookiecheck'] = '1'
|
||||
params['submit'] = 'Submit'
|
||||
|
||||
loginUrl = 'http://' + self.getSiteDomain() + '/user.php?action=login'
|
||||
logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
|
||||
params['penname']))
|
||||
|
||||
d = self._fetchUrl(loginUrl, params)
|
||||
|
||||
if "Member Account" not in d : #Member Account
|
||||
logger.info("Failed to login to URL %s as %s" % (loginUrl,
|
||||
params['penname']))
|
||||
raise exceptions.FailedToLogin(url,params['penname'])
|
||||
return False
|
||||
else:
|
||||
return True
|
||||
|
||||
## Getting the chapter list and the meta data, plus 'is adult' checking.
|
||||
def extractChapterUrlsAndMetadata(self):
|
||||
|
||||
if self.is_adult or self.getConfig("is_adult"):
|
||||
# Weirdly, different sites use different warning numbers.
|
||||
# If the title search below fails, there's a good chance
|
||||
# you need a different number. print data at that point
|
||||
# and see what the 'click here to continue' url says.
|
||||
addurl = "&warning=5"
|
||||
else:
|
||||
addurl=""
|
||||
|
||||
# index=1 makes sure we see the story chapter index. Some
|
||||
# sites skip that for one-chapter stories.
|
||||
url = self.url+'&index=1'+addurl
|
||||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
except HTTPError as e:
|
||||
if e.code == 404:
|
||||
raise exceptions.StoryDoesNotExist(self.url)
|
||||
else:
|
||||
raise e
|
||||
|
||||
if self.needToLoginCheck(data):
|
||||
# need to log in for this one.
|
||||
self.performLogin(url)
|
||||
data = self._fetchUrl(url)
|
||||
|
||||
# Since the warning text can change by warning level, let's
|
||||
# look for the warning pass url. ksarchive uses
|
||||
# &warning= -- actually, so do other sites. Must be an
|
||||
# eFiction book.
|
||||
|
||||
# efiction/viewstory.php?sid=1882&warning=4
|
||||
# efiction/viewstory.php?sid=1654&ageconsent=ok&warning=5
|
||||
#print data
|
||||
m = re.search(r"'efiction/viewstory.php\?sid=542(&warning=5)'",data)
|
||||
m = re.search(r"'efiction/viewstory.php\?sid=\d+((?:&ageconsent=ok)?&warning=\d+)'",data)
|
||||
if m != None:
|
||||
if self.is_adult or self.getConfig("is_adult"):
|
||||
# We tried the default and still got a warning, so
|
||||
# let's pull the warning number from the 'continue'
|
||||
# link and reload data.
|
||||
addurl = m.group(1)
|
||||
# correct stupid & error in url.
|
||||
addurl = addurl.replace("&","&")
|
||||
url = self.url+'&index=1'+addurl
|
||||
logger.debug("URL 2nd try: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
except HTTPError as e:
|
||||
if e.code == 404:
|
||||
raise exceptions.StoryDoesNotExist(self.url)
|
||||
else:
|
||||
raise e
|
||||
else:
|
||||
raise exceptions.AdultCheckRequired(self.url)
|
||||
|
||||
if "Access denied. This story has not been validated by the adminstrators of this site." in data:
|
||||
raise exceptions.FailedToDownload(self.getSiteDomain() +" says: Access denied. This story has not been validated by the adminstrators of this site.")
|
||||
|
||||
# use BeautifulSoup HTML parser to make everything easier to find.
|
||||
soup = self.make_soup(data)
|
||||
# print data
|
||||
|
||||
# Now go hunting for all the meta data and the chapter list.
|
||||
|
||||
pagetitle = soup.find('div',{'id':'pagetitle'})
|
||||
|
||||
## Title
|
||||
a = pagetitle.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$"))
|
||||
|
||||
self.story.setMetadata('title',stripHTML(a))
|
||||
|
||||
# Find authorid and URL from... author url.
|
||||
a = pagetitle.find('a', href=re.compile(r"viewuser.php\?uid=\d+"))
|
||||
self.story.setMetadata('authorId',a['href'].split('=')[1])
|
||||
self.story.setMetadata('authorUrl','http://'+self.host+'/'+a['href'])
|
||||
self.story.setMetadata('author',a.string)
|
||||
|
||||
# Find the chapters:
|
||||
for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"&chapter=\d+$")):
|
||||
# just in case there's tags, like <i> in chapter titles.
|
||||
self.add_chapter(chapter,'http://'+self.host+'/efiction/'+chapter['href']+addurl)
|
||||
|
||||
|
||||
# eFiction sites don't help us out a lot with their meta data
|
||||
# formating, so it's a little ugly.
|
||||
|
||||
# utility method
|
||||
def defaultGetattr(d,k):
|
||||
try:
|
||||
return d[k]
|
||||
except:
|
||||
return ""
|
||||
|
||||
# <span class="label">Rated:</span> NC-17<br /> etc
|
||||
labels = soup.findAll('span',{'class':'label'})
|
||||
for labelspan in labels:
|
||||
value = labelspan.nextSibling
|
||||
label = labelspan.string
|
||||
|
||||
if 'Summary' in label:
|
||||
## Everything until the next span class='label'
|
||||
svalue = ""
|
||||
while 'label' not in defaultGetattr(value,'class'):
|
||||
svalue += unicode(value)
|
||||
value = value.nextSibling
|
||||
self.setDescription(url,svalue)
|
||||
#self.story.setMetadata('description',stripHTML(svalue))
|
||||
|
||||
if 'Rated' in label:
|
||||
self.story.setMetadata('rating', value)
|
||||
|
||||
if 'Word count' in label:
|
||||
self.story.setMetadata('numWords', value)
|
||||
|
||||
if 'Categories' in label:
|
||||
cats = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=categories'))
|
||||
for cat in cats:
|
||||
self.story.addToList('category',cat.string)
|
||||
|
||||
if 'Characters' in label:
|
||||
chars = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=characters'))
|
||||
for char in chars:
|
||||
self.story.addToList('characters',char.string)
|
||||
|
||||
if 'Genre' in label:
|
||||
genres = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=1'))
|
||||
for genre in genres:
|
||||
self.story.addToList('genre',genre.string)
|
||||
|
||||
if 'Warnings' in label:
|
||||
warnings = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=3'))
|
||||
for warning in warnings:
|
||||
self.story.addToList('warnings',warning.string)
|
||||
|
||||
if 'Completed' in label:
|
||||
if 'Yes' in value:
|
||||
self.story.setMetadata('status', 'Completed')
|
||||
else:
|
||||
self.story.setMetadata('status', 'In-Progress')
|
||||
|
||||
if 'Published' in label:
|
||||
self.story.setMetadata('datePublished', makeDate(stripHTML(value), self.dateformat))
|
||||
|
||||
if 'Updated' in label:
|
||||
# there's a stray [ at the end.
|
||||
#value = value[0:-1]
|
||||
self.story.setMetadata('dateUpdated', makeDate(stripHTML(value), self.dateformat))
|
||||
|
||||
try:
|
||||
# Find Series name from series URL.
|
||||
a = soup.find('a', href=re.compile(r"efiction/viewseries.php\?seriesid=\d+"))
|
||||
series_name = a.string
|
||||
series_url = 'http://'+self.host+'/'+a['href']
|
||||
|
||||
# use BeautifulSoup HTML parser to make everything easier to find.
|
||||
seriessoup = self.make_soup(self._fetchUrl(series_url))
|
||||
storyas = seriessoup.findAll('a', href=re.compile(r'^efiction/viewstory.php\?sid=\d+$'))
|
||||
i=1
|
||||
for a in storyas:
|
||||
if a['href'] == ('efiction/viewstory.php?sid='+self.story.getMetadata('storyId')):
|
||||
self.setSeries(series_name, i)
|
||||
self.story.setMetadata('seriesUrl',series_url)
|
||||
break
|
||||
i+=1
|
||||
|
||||
except:
|
||||
# I find it hard to care if the series parsing fails
|
||||
pass
|
||||
|
||||
# grab the text for an individual chapter.
|
||||
def getChapterText(self, url):
|
||||
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
soup = self.make_soup(self._fetchUrl(url))
|
||||
|
||||
div = soup.find('div', {'id' : 'story'})
|
||||
|
||||
if None == div:
|
||||
raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url)
|
||||
|
||||
return self.utf8FromSoup(url,div)
|
||||
|
|
@ -1,6 +1,6 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2011 Fanficdownloader team, 2018 FanFicFare team
|
||||
# Copyright 2024 FanFicFare team
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
|
|
@ -16,24 +16,23 @@
|
|||
#
|
||||
|
||||
from __future__ import absolute_import
|
||||
from ..htmlcleanup import stripHTML
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Software: eFiction
|
||||
from .base_efiction_adapter import BaseEfictionAdapter
|
||||
|
||||
class StarskyHutchArchiveNetSiteAdapter(BaseEfictionAdapter):
|
||||
|
||||
@staticmethod
|
||||
def getSiteDomain():
|
||||
return 'www.starskyhutcharchive.net'
|
||||
|
||||
@classmethod
|
||||
def getSiteAbbrev(self):
|
||||
return 'shan'
|
||||
|
||||
@classmethod
|
||||
def getDateFormat(self):
|
||||
return "%m/%d/%Y"
|
||||
from .base_otw_adapter import BaseOTWAdapter
|
||||
|
||||
def getClass():
|
||||
return StarskyHutchArchiveNetSiteAdapter
|
||||
return CFAAAdapter
|
||||
|
||||
class CFAAAdapter(BaseOTWAdapter):
|
||||
|
||||
def __init__(self, config, url):
|
||||
BaseOTWAdapter.__init__(self, config, url)
|
||||
|
||||
# Each adapter needs to have a unique site abbreviation.
|
||||
self.story.setMetadata('siteabbrev','cfaa')
|
||||
|
||||
@staticmethod # must be @staticmethod, don't remove it.
|
||||
def getSiteDomain():
|
||||
# The site domain. Does have www here, if it uses it.
|
||||
return 'www.cfaarchive.org'
|
||||
|
|
@ -25,7 +25,6 @@ from .. import exceptions as exceptions
|
|||
|
||||
# py2 vs py3 transition
|
||||
from ..six import text_type as unicode
|
||||
from ..six.moves.urllib.error import HTTPError
|
||||
|
||||
from .base_adapter import BaseSiteAdapter, makeDate
|
||||
|
||||
|
|
@ -87,13 +86,7 @@ class ChaosSycophantHexComAdapter(BaseSiteAdapter):
|
|||
url = self.url+'&index=1'+addurl
|
||||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
except HTTPError as e:
|
||||
if e.code == 404:
|
||||
raise exceptions.StoryDoesNotExist(self.url)
|
||||
else:
|
||||
raise e
|
||||
data = self.get_request(url)
|
||||
|
||||
# The actual text that is used to announce you need to be an
|
||||
# adult varies from site to site. Again, print data before
|
||||
|
|
@ -104,11 +97,9 @@ class ChaosSycophantHexComAdapter(BaseSiteAdapter):
|
|||
if "Access denied. This story has not been validated by the adminstrators of this site." in data:
|
||||
raise exceptions.AccessDenied(self.getSiteDomain() +" says: Access denied. This story has not been validated by the adminstrators of this site.")
|
||||
|
||||
# use BeautifulSoup HTML parser to make everything easier to find.
|
||||
soup = self.make_soup(data)
|
||||
# print data
|
||||
|
||||
# Now go hunting for all the meta data and the chapter list.
|
||||
|
||||
## Title
|
||||
pt = soup.find('div', {'id' : 'pagetitle'})
|
||||
|
|
@ -125,7 +116,7 @@ class ChaosSycophantHexComAdapter(BaseSiteAdapter):
|
|||
self.story.setMetadata('rating', rating)
|
||||
|
||||
# Find the chapters:
|
||||
for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"&chapter=\d+$")):
|
||||
for chapter in soup.find_all('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+r"&chapter=\d+$")):
|
||||
# just in case there's tags, like <i> in chapter titles.
|
||||
self.add_chapter(chapter,'http://'+self.host+'/'+chapter['href']+addurl)
|
||||
|
||||
|
|
@ -143,7 +134,7 @@ class ChaosSycophantHexComAdapter(BaseSiteAdapter):
|
|||
|
||||
# <span class="label">Rated:</span> NC-17<br /> etc
|
||||
|
||||
labels = soup.findAll('span',{'class':'label'})
|
||||
labels = soup.find_all('span',{'class':'label'})
|
||||
|
||||
value = labels[0].previousSibling
|
||||
svalue = ""
|
||||
|
|
@ -163,22 +154,22 @@ class ChaosSycophantHexComAdapter(BaseSiteAdapter):
|
|||
self.story.setMetadata('numWords', value.split(' -')[0])
|
||||
|
||||
if 'Categories' in label:
|
||||
cats = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=categories'))
|
||||
cats = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=categories'))
|
||||
for cat in cats:
|
||||
self.story.addToList('category',cat.string)
|
||||
|
||||
if 'Characters' in label:
|
||||
chars = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=characters'))
|
||||
chars = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=characters'))
|
||||
for char in chars:
|
||||
self.story.addToList('characters',char.string)
|
||||
|
||||
if 'Genre' in label:
|
||||
genres = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=1'))
|
||||
genres = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=class&type_id=1'))
|
||||
for genre in genres:
|
||||
self.story.addToList('genre',genre.string)
|
||||
|
||||
if 'Warnings' in label:
|
||||
warnings = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=2'))
|
||||
warnings = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=class&type_id=2'))
|
||||
for warning in warnings:
|
||||
self.story.addToList('warnings',warning.string)
|
||||
|
||||
|
|
@ -202,9 +193,8 @@ class ChaosSycophantHexComAdapter(BaseSiteAdapter):
|
|||
series_name = a.string
|
||||
series_url = 'http://'+self.host+'/'+a['href']
|
||||
|
||||
# use BeautifulSoup HTML parser to make everything easier to find.
|
||||
seriessoup = self.make_soup(self._fetchUrl(series_url))
|
||||
storyas = seriessoup.findAll('a', href=re.compile(r'^viewstory.php\?sid=\d+$'))
|
||||
seriessoup = self.make_soup(self.get_request(series_url))
|
||||
storyas = seriessoup.find_all('a', href=re.compile(r'^viewstory.php\?sid=\d+$'))
|
||||
i=1
|
||||
for a in storyas:
|
||||
if a['href'] == ('viewstory.php?sid='+self.story.getMetadata('storyId')):
|
||||
|
|
@ -222,7 +212,7 @@ class ChaosSycophantHexComAdapter(BaseSiteAdapter):
|
|||
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
soup = self.make_soup(self._fetchUrl(url))
|
||||
soup = self.make_soup(self.get_request(url))
|
||||
|
||||
div = soup.find('div', {'id' : 'story'})
|
||||
|
||||
|
|
|
|||
107
fanficfare/adapters/adapter_chireadscom.py
Normal file
107
fanficfare/adapters/adapter_chireadscom.py
Normal file
|
|
@ -0,0 +1,107 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2020 FanFicFare team
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
|
||||
from __future__ import absolute_import
|
||||
import logging
|
||||
import re
|
||||
# py2 vs py3 transition
|
||||
|
||||
from .base_adapter import BaseSiteAdapter, makeDate
|
||||
from fanficfare.htmlcleanup import stripHTML
|
||||
from .. import exceptions as exceptions
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def getClass():
|
||||
return ChireadsComSiteAdapter
|
||||
|
||||
|
||||
class ChireadsComSiteAdapter(BaseSiteAdapter):
|
||||
NEW_DATE_FORMAT = '%Y/%m/%d %H:%M:%S'
|
||||
OLD_DATE_FORMAT = '%m/%d/%Y %I:%M:%S %p'
|
||||
|
||||
def __init__(self, config, url):
|
||||
BaseSiteAdapter.__init__(self, config, url)
|
||||
self.story.setMetadata('siteabbrev', 'chireads')
|
||||
|
||||
# get storyId from url--url validation guarantees query correct
|
||||
match = re.match(self.getSiteURLPattern(), url)
|
||||
if not match:
|
||||
raise exceptions.InvalidStoryURL(url, self.getSiteDomain(), self.getSiteExampleURLs())
|
||||
|
||||
story_id = match.group('id')
|
||||
self.story.setMetadata('storyId', story_id)
|
||||
self._setURL('https://%s/category/translatedtales/%s/' % (self.getSiteDomain(), story_id))
|
||||
|
||||
@staticmethod
|
||||
def getSiteDomain():
|
||||
return 'chireads.com'
|
||||
|
||||
@classmethod
|
||||
def getSiteExampleURLs(cls):
|
||||
return 'https://%s/category/translatedtales/story-name' % cls.getSiteDomain()
|
||||
|
||||
def getSiteURLPattern(self):
|
||||
return r'https?://chireads\.com/category/translatedtales/(?P<id>[^/]+)(/)?'
|
||||
|
||||
def extractChapterUrlsAndMetadata(self):
|
||||
logger.debug('URL: %s', self.url)
|
||||
|
||||
data = self.get_request(self.url)
|
||||
|
||||
soup = self.make_soup(data)
|
||||
info = soup.select_one('.inform-inform-data')
|
||||
self.story.setMetadata('title', stripHTML(info.h3).split(' | ')[0])
|
||||
|
||||
self.setCoverImage(self.url, soup.select_one('.inform-product > img')['src'])
|
||||
|
||||
# Unicode strings because ':' isn't ':', but \xef\xbc\x9a
|
||||
# author = stripHTML(info.h6).split(u' ')[0].replace(u'Auteur : ', '', 1)
|
||||
|
||||
author = stripHTML(info.h6).split('Babelcheck')[0].replace('Auteur : ', '').replace('\xc2\xa0', '')
|
||||
# author = stripHTML(info.h6).split('\xa0')[0].replace(u'Auteur : ', '', 1)
|
||||
self.story.setMetadata('author', author)
|
||||
self.story.setMetadata('authorId', author)
|
||||
## site doesn't have authorUrl links.
|
||||
|
||||
datestr = stripHTML(soup.select_one('.newestchapitre > div > a')['href'])[-11:-1]
|
||||
date = makeDate(datestr, '%Y/%m/%d')
|
||||
if date:
|
||||
self.story.setMetadata('dateUpdated', date)
|
||||
|
||||
intro = stripHTML(info.select_one('.inform-inform-txt').span)
|
||||
self.setDescription(self.url, intro)
|
||||
|
||||
for content in soup.find_all('div', {'id': 'content'}):
|
||||
for a in content.find_all('a'):
|
||||
self.add_chapter(a.get_text(), a['href'])
|
||||
|
||||
|
||||
def getChapterText(self, url):
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
data = self.get_request(url)
|
||||
soup = self.make_soup(data)
|
||||
|
||||
content = soup.select_one('#content')
|
||||
|
||||
if None == content:
|
||||
raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url)
|
||||
|
||||
return self.utf8FromSoup(url,content)
|
||||
|
|
@ -20,7 +20,6 @@ from __future__ import absolute_import
|
|||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import re
|
||||
import sys
|
||||
|
||||
from bs4.element import Comment
|
||||
from ..htmlcleanup import stripHTML
|
||||
|
|
@ -28,7 +27,6 @@ from .. import exceptions as exceptions
|
|||
|
||||
# py2 vs py3 transition
|
||||
from ..six import text_type as unicode
|
||||
from ..six.moves.urllib.error import HTTPError
|
||||
|
||||
from .base_adapter import BaseSiteAdapter, makeDate
|
||||
|
||||
|
|
@ -51,7 +49,7 @@ class ChosenTwoFanFicArchiveAdapter(BaseSiteAdapter):
|
|||
|
||||
|
||||
# normalized story URL.
|
||||
self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId'))
|
||||
self._setURL('https://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId'))
|
||||
|
||||
# Each adapter needs to have a unique site abbreviation.
|
||||
self.story.setMetadata('siteabbrev','chosen2')
|
||||
|
|
@ -67,17 +65,10 @@ class ChosenTwoFanFicArchiveAdapter(BaseSiteAdapter):
|
|||
|
||||
@classmethod
|
||||
def getSiteExampleURLs(cls):
|
||||
return "http://"+cls.getSiteDomain()+"/viewstory.php?sid=1234"
|
||||
return "https://"+cls.getSiteDomain()+"/viewstory.php?sid=1234"
|
||||
|
||||
def getSiteURLPattern(self):
|
||||
return re.escape("http://"+self.getSiteDomain()+"/viewstory.php?sid=")+r"\d+$"
|
||||
|
||||
def use_pagecache(self):
|
||||
'''
|
||||
adapters that will work with the page cache need to implement
|
||||
this and change it to True.
|
||||
'''
|
||||
return True
|
||||
return r"https?"+re.escape("://"+self.getSiteDomain()+"/viewstory.php?sid=")+r"\d+$"
|
||||
|
||||
## Getting the chapter list and the meta data, plus 'is adult' checking.
|
||||
def extractChapterUrlsAndMetadata(self):
|
||||
|
|
@ -87,19 +78,13 @@ class ChosenTwoFanFicArchiveAdapter(BaseSiteAdapter):
|
|||
addURL = "&ageconsent=ok&warning=3"
|
||||
else:
|
||||
addURL = ""
|
||||
|
||||
|
||||
# index=1 makes sure we see the story chapter index. Some
|
||||
# sites skip that for one-chapter stories.
|
||||
url = '{0}&index=1{1}'.format(self.url,addURL)
|
||||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
except HTTPError as e:
|
||||
if e.code == 404:
|
||||
raise exceptions.StoryDoesNotExist(self.url)
|
||||
else:
|
||||
raise e
|
||||
data = self.get_request(url)
|
||||
|
||||
if "Content is only suitable for mature adults. May contain explicit language and adult themes. Equivalent of NC-17." in data:
|
||||
raise exceptions.AdultCheckRequired(self.url)
|
||||
|
|
@ -107,15 +92,13 @@ class ChosenTwoFanFicArchiveAdapter(BaseSiteAdapter):
|
|||
if "Access denied. This story has not been validated by the adminstrators of this site." in data:
|
||||
raise exceptions.AccessDenied("{0} says: Access denied. This story has not been validated by the adminstrators of this site.".format(self.getSiteDomain()))
|
||||
|
||||
# use BeautifulSoup HTML parser to make everything easier to find.
|
||||
soup = self.make_soup(data)
|
||||
|
||||
# Now go hunting for all the meta data and the chapter list.
|
||||
|
||||
## Title
|
||||
## Some stories have a banner that has it's own a tag before the actual text title...
|
||||
## so I'm checking the pagetitle div for all a tags that match the criteria, then taking the last.
|
||||
a = soup.find('div',{'id':'pagetitle'}).findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$"))[-1]
|
||||
a = soup.find('div',{'id':'pagetitle'}).find_all('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$"))[-1]
|
||||
self.story.setMetadata('title',stripHTML(a))
|
||||
|
||||
# Find authorid and URL from... author url.
|
||||
|
|
@ -123,14 +106,14 @@ class ChosenTwoFanFicArchiveAdapter(BaseSiteAdapter):
|
|||
# so I'm checking the pagetitle div for this as well
|
||||
a = soup.find('div',{'id':'pagetitle'}).find('a', href=re.compile(r"viewuser.php\?uid=\d+"))
|
||||
self.story.setMetadata('authorId',a['href'].split('=')[1])
|
||||
self.story.setMetadata('authorUrl','http://'+self.host+'/'+a['href'])
|
||||
self.story.setMetadata('authorUrl','https://'+self.host+'/'+a['href'])
|
||||
self.story.setMetadata('author',a.string)
|
||||
|
||||
# Find the chapters:
|
||||
for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"&chapter=\d+$")):
|
||||
for chapter in soup.find_all('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+r"&chapter=\d+$")):
|
||||
# just in case there's tags, like <i> in chapter titles.
|
||||
#self.add_chapter(chapter,'http://'+self.host+'/'+chapter['href'])
|
||||
self.add_chapter(chapter,'http://{0}/{1}{2}'.format(self.host, chapter['href'],addURL))
|
||||
self.add_chapter(chapter,'https://{0}/{1}{2}'.format(self.host, chapter['href'],addURL))
|
||||
|
||||
|
||||
# eFiction sites don't help us out a lot with their meta data
|
||||
|
|
@ -144,7 +127,7 @@ class ChosenTwoFanFicArchiveAdapter(BaseSiteAdapter):
|
|||
return ""
|
||||
|
||||
# <span class="label">Rated:</span> NC-17<br /> etc
|
||||
labels = soup.findAll('span',{'class':'label'})
|
||||
labels = soup.find_all('span',{'class':'label'})
|
||||
for labelspan in labels:
|
||||
val = labelspan.nextSibling
|
||||
value = unicode('')
|
||||
|
|
@ -166,27 +149,27 @@ class ChosenTwoFanFicArchiveAdapter(BaseSiteAdapter):
|
|||
self.story.setMetadata('numWords', stripHTML(value))
|
||||
|
||||
if 'Categories' in label:
|
||||
cats = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=categories'))
|
||||
cats = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=categories'))
|
||||
for cat in cats:
|
||||
self.story.addToList('category',cat.string)
|
||||
|
||||
if 'Characters' in label:
|
||||
chars = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=characters'))
|
||||
chars = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=characters'))
|
||||
for char in chars:
|
||||
self.story.addToList('characters',char.string)
|
||||
|
||||
if 'Genre' in label:
|
||||
genres = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=1')) # XXX
|
||||
genres = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=class&type_id=1')) # XXX
|
||||
for genre in genres:
|
||||
self.story.addToList('genre',genre.string)
|
||||
|
||||
if 'Pairing' in label:
|
||||
ships = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=4'))
|
||||
ships = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=class&type_id=4'))
|
||||
for ship in ships:
|
||||
self.story.addToList('ships',ship.string)
|
||||
|
||||
if 'Warnings' in label:
|
||||
warnings = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=2')) # XXX
|
||||
warnings = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=class&type_id=2')) # XXX
|
||||
for warning in warnings:
|
||||
self.story.addToList('warnings',warning.string)
|
||||
|
||||
|
|
@ -209,17 +192,16 @@ class ChosenTwoFanFicArchiveAdapter(BaseSiteAdapter):
|
|||
# Find Series name from series URL.
|
||||
a = soup.find('a', href=re.compile(r"viewseries.php\?seriesid=\d+"))
|
||||
series_name = a.string
|
||||
series_url = 'http://'+self.host+'/'+a['href']
|
||||
series_url = 'https://'+self.host+'/'+a['href']
|
||||
|
||||
# use BeautifulSoup HTML parser to make everything easier to find.
|
||||
seriessoup = self.make_soup(self._fetchUrl(series_url))
|
||||
seriessoup = self.make_soup(self.get_request(series_url))
|
||||
# can't use ^viewstory...$ in case of higher rated stories with javascript href.
|
||||
storyas = seriessoup.findAll('a', href=re.compile(r'viewstory.php\?sid=\d+'))
|
||||
storyas = seriessoup.find_all('a', href=re.compile(r'viewstory.php\?sid=\d+'))
|
||||
i=1
|
||||
for a in storyas:
|
||||
# skip 'report this' and 'TOC' links
|
||||
if 'contact.php' not in a['href'] and 'index' not in a['href']:
|
||||
if a['href'] == ('viewstory.php?sid='+self.story.getMetadata('storyId')):
|
||||
# this site has several links to each story.
|
||||
if a.text == 'Latest Chapter':
|
||||
if ('viewstory.php?sid='+self.story.getMetadata('storyId')) in a['href']:
|
||||
self.setSeries(series_name, i)
|
||||
self.story.setMetadata('seriesUrl',series_url)
|
||||
break
|
||||
|
|
@ -234,7 +216,7 @@ class ChosenTwoFanFicArchiveAdapter(BaseSiteAdapter):
|
|||
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
soup = self.make_soup(self._fetchUrl(url))
|
||||
soup = self.make_soup(self.get_request(url))
|
||||
|
||||
div = soup.find('div', {'id' : 'story'})
|
||||
|
||||
|
|
|
|||
|
|
@ -1,232 +0,0 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2011 Fanficdownloader team, 2018 FanFicFare team
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
from __future__ import absolute_import
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import re
|
||||
from ..htmlcleanup import stripHTML
|
||||
from .. import exceptions as exceptions
|
||||
|
||||
# py2 vs py3 transition
|
||||
from ..six import text_type as unicode
|
||||
from ..six.moves.urllib.error import HTTPError
|
||||
|
||||
from .base_adapter import BaseSiteAdapter, makeDate
|
||||
|
||||
|
||||
def getClass():
|
||||
return CSIForensicsComAdapter
|
||||
|
||||
# Class name has to be unique. Our convention is camel case the
|
||||
# sitename with Adapter at the end. www is skipped.
|
||||
class CSIForensicsComAdapter(BaseSiteAdapter):
|
||||
|
||||
def __init__(self, config, url):
|
||||
BaseSiteAdapter.__init__(self, config, url)
|
||||
|
||||
self.username = "NoneGiven" # if left empty, site doesn't return any message at all.
|
||||
self.password = ""
|
||||
self.is_adult=False
|
||||
|
||||
# get storyId from url--url validation guarantees query is only sid=1234
|
||||
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
|
||||
|
||||
|
||||
|
||||
self._setURL('https://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId'))
|
||||
|
||||
# Each adapter needs to have a unique site abbreviation.
|
||||
self.story.setMetadata('siteabbrev','csiforensics')
|
||||
|
||||
# The date format will vary from site to site.
|
||||
# http://docs.python.org/library/datetime.html#strftime-strptime-behavior
|
||||
self.dateformat = "%d %b %Y"
|
||||
|
||||
@staticmethod # must be @staticmethod, don't remove it.
|
||||
def getSiteDomain():
|
||||
# The site domain. Does have www here, if it uses it.
|
||||
return 'csi-forensics.com'
|
||||
|
||||
@classmethod
|
||||
def getSiteExampleURLs(cls):
|
||||
return "https://"+cls.getSiteDomain()+"/viewstory.php?sid=1234"
|
||||
|
||||
def getSiteURLPattern(self):
|
||||
return r"https?://"+re.escape(self.getSiteDomain()+"/viewstory.php?sid=")+r"\d+$"
|
||||
|
||||
## Getting the chapter list and the meta data, plus 'is adult' checking.
|
||||
def extractChapterUrlsAndMetadata(self):
|
||||
|
||||
if self.is_adult or self.getConfig("is_adult"):
|
||||
# Weirdly, different sites use different warning numbers.
|
||||
# If the title search below fails, there's a good chance
|
||||
# you need a different number. print data at that point
|
||||
# and see what the 'click here to continue' url says.
|
||||
addurl = "&ageconsent=ok&warning=5&skin=elegantcsi"
|
||||
else:
|
||||
addurl="&skin=elegantcsi"
|
||||
|
||||
# index=1 makes sure we see the story chapter index. Some
|
||||
# sites skip that for one-chapter stories.
|
||||
url = self.url+'&index=1'+addurl
|
||||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
except HTTPError as e:
|
||||
if e.code == 404:
|
||||
raise exceptions.StoryDoesNotExist(self.url)
|
||||
else:
|
||||
raise e
|
||||
|
||||
# The actual text that is used to announce you need to be an
|
||||
# adult varies from site to site. Again, print data before
|
||||
# the title search to troubleshoot.
|
||||
if "This story is rated NC-17, and therefore is not suitable for minors. If you are below the age required to view such material in your locality, please return from whence you came." in data: # XXX
|
||||
raise exceptions.AdultCheckRequired(self.url)
|
||||
|
||||
if "Access denied. This story has not been validated by the adminstrators of this site." in data:
|
||||
raise exceptions.AccessDenied(self.getSiteDomain() +" says: Access denied. This story has not been validated by the adminstrators of this site.")
|
||||
|
||||
# use BeautifulSoup HTML parser to make everything easier to find.
|
||||
soup = self.make_soup(data)
|
||||
# print data
|
||||
|
||||
# Now go hunting for all the meta data and the chapter list.
|
||||
|
||||
## Title
|
||||
|
||||
pt = soup.find('div', {'id' : 'pagetitle'})
|
||||
a = pt.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$"))
|
||||
self.story.setMetadata('title',a.string)
|
||||
|
||||
# Find authorid and URL from... author url.
|
||||
a = soup.find('a', href=re.compile(r"viewuser.php\?uid=\d+"))
|
||||
self.story.setMetadata('authorId',a['href'].split('=')[1])
|
||||
self.story.setMetadata('authorUrl','https://'+self.host+'/'+a['href'])
|
||||
self.story.setMetadata('author',a.string)
|
||||
|
||||
# Rating
|
||||
rate = stripHTML(soup.find('div',{'id':'pagetitle'}))
|
||||
rate = rate[rate.rindex('[')+1:rate.rindex(']')]
|
||||
self.story.setMetadata('rating', rate)
|
||||
|
||||
# Find the chapters:
|
||||
for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"&chapter=\d+$")):
|
||||
# just in case there's tags, like <i> in chapter titles.
|
||||
self.add_chapter(chapter,'https://'+self.host+'/'+chapter['href']+addurl)
|
||||
|
||||
|
||||
# eFiction sites don't help us out a lot with their meta data
|
||||
# formating, so it's a little ugly.
|
||||
|
||||
# utility method
|
||||
def defaultGetattr(d,k):
|
||||
try:
|
||||
return d[k]
|
||||
except:
|
||||
return ""
|
||||
|
||||
smalldiv = soup.find('div', {'class' : 'small'})
|
||||
|
||||
|
||||
chars = smalldiv.findAll('a',href=re.compile(r'browse.php\?type=characters'))
|
||||
for char in chars:
|
||||
self.story.addToList('characters',char.string)
|
||||
|
||||
metatext = stripHTML(smalldiv)
|
||||
|
||||
if 'Completed: Yes' in metatext:
|
||||
self.story.setMetadata('status', 'Completed')
|
||||
else:
|
||||
self.story.setMetadata('status', 'In-Progress')
|
||||
|
||||
word=soup.find(text=re.compile("Word count:")).split(':')
|
||||
self.story.setMetadata('numWords', word[1])
|
||||
|
||||
cats = smalldiv.findAll('a',href=re.compile(r'browse.php\?type=categories'))
|
||||
for cat in cats:
|
||||
self.story.addToList('category',cat.string)
|
||||
|
||||
warnings = smalldiv.findAll('a',href=re.compile(r'browse.php\?type=class(&)type_id=2(&)classid=\d+'))
|
||||
for warning in warnings:
|
||||
self.story.addToList('warnings',warning.string)
|
||||
|
||||
date=soup.find('div',{'class' : 'bottom'})
|
||||
pd=date.find(text=re.compile("Published:")).string.split(': ')
|
||||
self.story.setMetadata('datePublished', makeDate(stripHTML(pd[1].split(' U')[0]), self.dateformat))
|
||||
self.story.setMetadata('dateUpdated', makeDate(stripHTML(pd[2]), self.dateformat))
|
||||
|
||||
# <span class="label">Rated:</span> NC-17<br /> etc
|
||||
labels = soup.findAll('span',{'class':'label'})
|
||||
pub=0
|
||||
for labelspan in labels:
|
||||
value = labelspan.nextSibling
|
||||
label = labelspan.string
|
||||
|
||||
if 'Genres' in label:
|
||||
genres = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=1'))
|
||||
for genre in genres:
|
||||
self.story.addToList('genre',genre.string)
|
||||
|
||||
if 'Warnings' in label:
|
||||
warnings = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=2'))
|
||||
for warning in warnings:
|
||||
self.story.addToList('warnings',warning.string)
|
||||
|
||||
try:
|
||||
# Find Series name from series URL.
|
||||
a = soup.find('a', href=re.compile(r"viewseries.php\?seriesid=\d+"))
|
||||
series_name = a.string
|
||||
series_url = 'https://'+self.host+'/'+a['href']
|
||||
|
||||
# use BeautifulSoup HTML parser to make everything easier to find.
|
||||
seriessoup = self.make_soup(self._fetchUrl(series_url))
|
||||
storyas = seriessoup.findAll('a', href=re.compile(r'^viewstory.php\?sid=\d+$'))
|
||||
i=1
|
||||
for a in storyas:
|
||||
if a['href'] == ('viewstory.php?sid='+self.story.getMetadata('storyId')):
|
||||
self.setSeries(series_name, i)
|
||||
self.story.setMetadata('seriesUrl',series_url)
|
||||
break
|
||||
i+=1
|
||||
|
||||
except:
|
||||
# I find it hard to care if the series parsing fails
|
||||
pass
|
||||
|
||||
smalldiv.extract()
|
||||
|
||||
# Summary
|
||||
summary = soup.find('div', {'class' : 'content'})
|
||||
self.setDescription(url,summary)
|
||||
|
||||
# grab the text for an individual chapter.
|
||||
def getChapterText(self, url):
|
||||
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
soup = self.make_soup(self._fetchUrl(url))
|
||||
|
||||
div = soup.find('div', {'id' : 'story'})
|
||||
|
||||
if None == div:
|
||||
raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url)
|
||||
|
||||
return self.utf8FromSoup(url,div)
|
||||
|
||||
|
|
@ -1,295 +0,0 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2013 Fanficdownloader team, 2018 FanFicFare team
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
# Software: eFiction
|
||||
from __future__ import absolute_import
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import re
|
||||
from ..htmlcleanup import stripHTML
|
||||
from .. import exceptions as exceptions
|
||||
|
||||
# py2 vs py3 transition
|
||||
from ..six import text_type as unicode
|
||||
from ..six.moves.urllib.error import HTTPError
|
||||
|
||||
from .base_adapter import BaseSiteAdapter, makeDate
|
||||
|
||||
def getClass():
|
||||
return DeepInMySoulNetAdapter ## XXX
|
||||
|
||||
# Class name has to be unique. Our convention is camel case the
|
||||
# sitename with Adapter at the end. www is skipped.
|
||||
class DeepInMySoulNetAdapter(BaseSiteAdapter): # XXX
|
||||
|
||||
def __init__(self, config, url):
|
||||
BaseSiteAdapter.__init__(self, config, url)
|
||||
|
||||
self.username = "NoneGiven" # if left empty, site doesn't return any message at all.
|
||||
self.password = ""
|
||||
self.is_adult=False
|
||||
|
||||
# get storyId from url--url validation guarantees query is only sid=1234
|
||||
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
|
||||
|
||||
|
||||
# normalized story URL.
|
||||
# XXX Most sites don't have the /fiction part. Replace all to remove it usually.
|
||||
self._setURL('https://' + self.getSiteDomain() + '/fiction/viewstory.php?sid='+self.story.getMetadata('storyId'))
|
||||
|
||||
# Each adapter needs to have a unique site abbreviation.
|
||||
self.story.setMetadata('siteabbrev','dimsn') ## XXX
|
||||
|
||||
# The date format will vary from site to site.
|
||||
# http://docs.python.org/library/datetime.html#strftime-strptime-behavior
|
||||
self.dateformat = "%B %d, %Y"
|
||||
|
||||
@staticmethod # must be @staticmethod, don't remove it.
|
||||
def getSiteDomain():
|
||||
# The site domain. Does have www here, if it uses it.
|
||||
return 'www.deepinmysoul.net' # XXX
|
||||
|
||||
@classmethod
|
||||
def getSiteExampleURLs(cls):
|
||||
return "https://"+cls.getSiteDomain()+"/fiction/viewstory.php?sid=1234"
|
||||
|
||||
def getSiteURLPattern(self):
|
||||
return "https?://"+re.escape(self.getSiteDomain()+"/fiction/viewstory.php?sid=")+r"\d+$"
|
||||
|
||||
## Login seems to be reasonably standard across eFiction sites.
|
||||
def needToLoginCheck(self, data):
|
||||
if 'Registered Users Only' in data \
|
||||
or 'There is no such account on our website' in data \
|
||||
or "That password doesn't match the one in our database" in data:
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
|
||||
def performLogin(self, url):
|
||||
params = {}
|
||||
|
||||
if self.password:
|
||||
params['penname'] = self.username
|
||||
params['password'] = self.password
|
||||
else:
|
||||
params['penname'] = self.getConfig("username")
|
||||
params['password'] = self.getConfig("password")
|
||||
params['cookiecheck'] = '1'
|
||||
params['submit'] = 'Submit'
|
||||
|
||||
loginUrl = 'https://' + self.getSiteDomain() + '/fiction/user.php?action=login'
|
||||
logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
|
||||
params['penname']))
|
||||
|
||||
d = self._fetchUrl(loginUrl, params)
|
||||
|
||||
if "Member Account" not in d : #Member Account
|
||||
logger.info("Failed to login to URL %s as %s" % (loginUrl,
|
||||
params['penname']))
|
||||
raise exceptions.FailedToLogin(url,params['penname'])
|
||||
return False
|
||||
else:
|
||||
return True
|
||||
|
||||
## Getting the chapter list and the meta data, plus 'is adult' checking.
|
||||
def extractChapterUrlsAndMetadata(self):
|
||||
|
||||
if self.is_adult or self.getConfig("is_adult"):
|
||||
# Weirdly, different sites use different warning numbers.
|
||||
# If the title search below fails, there's a good chance
|
||||
# you need a different number. print data at that point
|
||||
# and see what the 'click here to continue' url says.
|
||||
addurl = "&warning=4"
|
||||
else:
|
||||
addurl=""
|
||||
|
||||
# index=1 makes sure we see the story chapter index. Some
|
||||
# sites skip that for one-chapter stories.
|
||||
url = self.url+'&index=1'+addurl
|
||||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
except HTTPError as e:
|
||||
if e.code == 404:
|
||||
raise exceptions.StoryDoesNotExist(self.url)
|
||||
else:
|
||||
raise e
|
||||
|
||||
if self.needToLoginCheck(data):
|
||||
# need to log in for this one.
|
||||
self.performLogin(url)
|
||||
data = self._fetchUrl(url)
|
||||
|
||||
# Since the warning text can change by warning level, let's
|
||||
# look for the warning pass url. ksarchive uses
|
||||
# &warning= -- actually, so do other sites. Must be an
|
||||
# eFiction book.
|
||||
|
||||
# fiction/viewstory.php?sid=1882&warning=4
|
||||
# fiction/viewstory.php?sid=1654&ageconsent=ok&warning=5
|
||||
#print data
|
||||
m = re.search(r"'fiction/viewstory.php\?sid=29(&warning=4)'",data)
|
||||
m = re.search(r"'fiction/viewstory.php\?sid=\d+((?:&ageconsent=ok)?&warning=\d+)'",data)
|
||||
if m != None:
|
||||
if self.is_adult or self.getConfig("is_adult"):
|
||||
# We tried the default and still got a warning, so
|
||||
# let's pull the warning number from the 'continue'
|
||||
# link and reload data.
|
||||
addurl = m.group(1)
|
||||
# correct stupid & error in url.
|
||||
addurl = addurl.replace("&","&")
|
||||
url = self.url+'&index=1'+addurl
|
||||
logger.debug("URL 2nd try: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
except HTTPError as e:
|
||||
if e.code == 404:
|
||||
raise exceptions.StoryDoesNotExist(self.url)
|
||||
else:
|
||||
raise e
|
||||
else:
|
||||
raise exceptions.AdultCheckRequired(self.url)
|
||||
|
||||
if "Access denied. This story has not been validated by the adminstrators of this site." in data:
|
||||
raise exceptions.FailedToDownload(self.getSiteDomain() +" says: Access denied. This story has not been validated by the adminstrators of this site.")
|
||||
|
||||
# use BeautifulSoup HTML parser to make everything easier to find.
|
||||
soup = self.make_soup(data)
|
||||
|
||||
# Now go hunting for all the meta data and the chapter list.
|
||||
|
||||
pagetitle = soup.find('div',{'id':'pagecontent'})
|
||||
|
||||
## Title
|
||||
a = pagetitle.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$"))
|
||||
|
||||
self.story.setMetadata('title',stripHTML(a))
|
||||
|
||||
# Find authorid and URL from... author url.
|
||||
a = pagetitle.find('a', href=re.compile(r"viewuser.php\?uid=\d+"))
|
||||
self.story.setMetadata('authorId',a['href'].split('=')[1])
|
||||
self.story.setMetadata('authorUrl','https://'+self.host+'/'+a['href'])
|
||||
self.story.setMetadata('author',a.string)
|
||||
|
||||
# Find the chapters:
|
||||
for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"&chapter=\d+$")):
|
||||
# just in case there's tags, like <i> in chapter titles.
|
||||
self.add_chapter(chapter,'https://'+self.host+'/fiction/'+chapter['href']+addurl)
|
||||
|
||||
|
||||
# eFiction sites don't help us out a lot with their meta data
|
||||
# formating, so it's a little ugly.
|
||||
|
||||
# utility method
|
||||
def defaultGetattr(d,k):
|
||||
try:
|
||||
return d[k]
|
||||
except:
|
||||
return ""
|
||||
|
||||
# <span class="label">Rated:</span> NC-17<br /> etc
|
||||
labels = soup.findAll('span',{'class':'label'})
|
||||
for labelspan in labels:
|
||||
value = labelspan.nextSibling
|
||||
label = labelspan.string
|
||||
|
||||
if 'Summary' in label:
|
||||
## Everything until the next span class='label'
|
||||
svalue = ""
|
||||
while 'label' not in defaultGetattr(value,'class'):
|
||||
svalue += unicode(value)
|
||||
value = value.nextSibling
|
||||
self.setDescription(url,svalue)
|
||||
#self.story.setMetadata('description',stripHTML(svalue))
|
||||
|
||||
if 'Rated' in label:
|
||||
self.story.setMetadata('rating', value)
|
||||
|
||||
if 'Word count' in label:
|
||||
self.story.setMetadata('numWords', value)
|
||||
|
||||
if 'Categories' in label:
|
||||
cats = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=categories'))
|
||||
for cat in cats:
|
||||
self.story.addToList('category',cat.string)
|
||||
|
||||
if 'Characters' in label:
|
||||
chars = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=characters'))
|
||||
for char in chars:
|
||||
self.story.addToList('characters',char.string)
|
||||
|
||||
if 'Genre' in label:
|
||||
genres = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=1'))
|
||||
for genre in genres:
|
||||
self.story.addToList('genre',genre.string)
|
||||
|
||||
if 'Warnings' in label:
|
||||
warnings = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=3'))
|
||||
for warning in warnings:
|
||||
self.story.addToList('warnings',warning.string)
|
||||
|
||||
if 'Completed' in label:
|
||||
if 'Yes' in value:
|
||||
self.story.setMetadata('status', 'Completed')
|
||||
else:
|
||||
self.story.setMetadata('status', 'In-Progress')
|
||||
|
||||
if 'Published' in label:
|
||||
self.story.setMetadata('datePublished', makeDate(stripHTML(value), self.dateformat))
|
||||
|
||||
if 'Updated' in label:
|
||||
# there's a stray [ at the end.
|
||||
#value = value[0:-1]
|
||||
self.story.setMetadata('dateUpdated', makeDate(stripHTML(value), self.dateformat))
|
||||
|
||||
try:
|
||||
# Find Series name from series URL.
|
||||
a = soup.find('a', href=re.compile(r"fiction/viewseries.php\?seriesid=\d+"))
|
||||
series_name = a.string
|
||||
series_url = 'https://'+self.host+'/'+a['href']
|
||||
|
||||
# use BeautifulSoup HTML parser to make everything easier to find.
|
||||
seriessoup = self.make_soup(self._fetchUrl(series_url))
|
||||
storyas = seriessoup.findAll('a', href=re.compile(r'^fiction/viewstory.php\?sid=\d+$'))
|
||||
i=1
|
||||
for a in storyas:
|
||||
if a['href'] == ('fiction/viewstory.php?sid='+self.story.getMetadata('storyId')):
|
||||
self.setSeries(series_name, i)
|
||||
self.story.setMetadata('seriesUrl',series_url)
|
||||
break
|
||||
i+=1
|
||||
|
||||
except:
|
||||
# I find it hard to care if the series parsing fails
|
||||
pass
|
||||
|
||||
# grab the text for an individual chapter.
|
||||
def getChapterText(self, url):
|
||||
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
soup = self.make_soup(self._fetchUrl(url))
|
||||
|
||||
div = soup.find('div', {'id' : 'story'})
|
||||
|
||||
if None == div:
|
||||
raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url)
|
||||
|
||||
return self.utf8FromSoup(url,div)
|
||||
|
||||
256
fanficfare/adapters/adapter_deviantartcom.py
Normal file
256
fanficfare/adapters/adapter_deviantartcom.py
Normal file
|
|
@ -0,0 +1,256 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2021 FanFicFare team
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
|
||||
from __future__ import absolute_import
|
||||
import logging
|
||||
import re
|
||||
# py2 vs py3 transition
|
||||
from ..six.moves.urllib.parse import urlparse
|
||||
|
||||
from .base_adapter import BaseSiteAdapter, makeDate
|
||||
from fanficfare.htmlcleanup import stripHTML
|
||||
from .. import exceptions as exceptions
|
||||
from fanficfare.dateutils import parse_relative_date_string
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def getClass():
|
||||
return DeviantArtComSiteAdapter
|
||||
|
||||
|
||||
class DeviantArtComSiteAdapter(BaseSiteAdapter):
|
||||
def __init__(self, config, url):
|
||||
BaseSiteAdapter.__init__(self, config, url)
|
||||
self.story.setMetadata('siteabbrev', 'dac')
|
||||
|
||||
self.username = 'NoneGiven'
|
||||
self.password = ''
|
||||
self.is_adult = False
|
||||
|
||||
match = re.match(self.getSiteURLPattern(), url)
|
||||
if not match:
|
||||
raise exceptions.InvalidStoryURL(url, self.getSiteDomain(), self.getSiteExampleURLs())
|
||||
|
||||
story_id = match.group('id')
|
||||
author = match.group('author')
|
||||
self.story.setMetadata('author', author)
|
||||
self.story.setMetadata('authorId', author)
|
||||
self.story.setMetadata('authorUrl', 'https://www.deviantart.com/' + author)
|
||||
self._setURL(url)
|
||||
|
||||
@staticmethod
|
||||
def getSiteDomain():
|
||||
return 'www.deviantart.com'
|
||||
|
||||
@classmethod
|
||||
def getAcceptDomains(cls):
|
||||
return ['www.deviantart.com']
|
||||
|
||||
@classmethod
|
||||
def getProtocol(self):
|
||||
return 'https'
|
||||
|
||||
@classmethod
|
||||
def getSiteExampleURLs(cls):
|
||||
return 'https://%s/<author>/art/<work-name>' % cls.getSiteDomain()
|
||||
|
||||
def getSiteURLPattern(self):
|
||||
return r'https?://www\.deviantart\.com/(?P<author>[^/]+)/art/(?P<id>[^/]+)/?'
|
||||
|
||||
def performLogin(self, url):
|
||||
if self.username and self.username != 'NoneGiven':
|
||||
username = self.username
|
||||
else:
|
||||
username = self.getConfig('username')
|
||||
|
||||
# logger.debug("\n\nusername:(%s)\n\n"%username)
|
||||
if not username:
|
||||
logger.info("Login Required for URL %s" % url)
|
||||
raise exceptions.FailedToLogin(url,username)
|
||||
|
||||
data = self.get_request_raw('https://www.deviantart.com/users/login', referer=url, usecache=False)
|
||||
data = self.decode_data(data)
|
||||
soup = self.make_soup(data)
|
||||
params = {
|
||||
'referer': 'https://www.deviantart.com/_sisu/do/signin', # soup.find('input', {'name': 'referer'})['value'],
|
||||
'referer_type': soup.find('input', {'name': 'referer_type'})['value'],
|
||||
'csrf_token': soup.find('input', {'name': 'csrf_token'})['value'],
|
||||
'challenge': soup.find('input', {'name': 'challenge'})['value'],
|
||||
'lu_token': soup.find('input', {'name': 'lu_token'})['value'],
|
||||
'remember': 'on',
|
||||
'username': username
|
||||
}
|
||||
|
||||
loginUrl = 'https://' + self.getSiteDomain() + '/_sisu/do/step2'
|
||||
logger.debug('Will now login to deviantARt as (%s)' % username)
|
||||
|
||||
result = self.post_request(loginUrl, params, usecache=False)
|
||||
soup = self.make_soup(result)
|
||||
if not soup.find('input', {'name': 'lu_token2'}):
|
||||
logger.info("Login Failed for URL %s (no lu_token2 found)" % url)
|
||||
raise exceptions.FailedToLogin(url,username)
|
||||
|
||||
params = {
|
||||
'referer': 'https://www.deviantart.com/_sisu/do/signin', # soup.find('input', {'name': 'referer'})['value'],
|
||||
'referer_type': soup.find('input', {'name': 'referer_type'})['value'],
|
||||
'csrf_token': soup.find('input', {'name': 'csrf_token'})['value'],
|
||||
'challenge': soup.find('input', {'name': 'challenge'})['value'],
|
||||
'lu_token': soup.find('input', {'name': 'lu_token'})['value'],
|
||||
'lu_token2': soup.find('input', {'name': 'lu_token2'})['value'],
|
||||
'remember': 'on',
|
||||
'username': ''
|
||||
}
|
||||
|
||||
if self.password:
|
||||
params['password'] = self.password
|
||||
else:
|
||||
params['password'] = self.getConfig('password')
|
||||
|
||||
# logger.debug("\n\nparams['password']:(%s)\n\n"%params['password'])
|
||||
loginUrl = 'https://' + self.getSiteDomain() + '/_sisu/do/signin'
|
||||
logger.debug('Will now send password to deviantARt')
|
||||
|
||||
result = self.post_request(loginUrl, params, usecache=False)
|
||||
|
||||
if 'Log In | DeviantArt' in result:
|
||||
logger.error('Failed to login to deviantArt as %s' % username)
|
||||
raise exceptions.FailedToLogin('https://www.deviantart.com', username)
|
||||
else:
|
||||
return True
|
||||
|
||||
def requiresLogin(self, data):
|
||||
return '</a> has limited the viewing of this artwork to members of the DeviantArt community only' in data
|
||||
|
||||
def isLoggedIn(self, data):
|
||||
return '<form id="logout-form" action="https://www.deviantart.com/users/logout" method="POST">' in data
|
||||
|
||||
def isWatchersOnly(self, data):
|
||||
return '>Watchers-Only Deviation<' in data
|
||||
|
||||
def requiresMatureContentEnabled(self, data):
|
||||
return (
|
||||
'>This content is intended for mature audiences<' in data
|
||||
or '>This deviation is intended for mature audiences<' in data
|
||||
or '>This filter hides content that may be inappropriate for some viewers<' in data
|
||||
or '>May contain sensitive content<' in data
|
||||
or '>Log in to view<' in data
|
||||
or '>This deviation has been labeled as containing themes not suitable for all deviants.<' in data
|
||||
)
|
||||
|
||||
def extractChapterUrlsAndMetadata(self):
|
||||
logger.debug('URL: %s', self.url)
|
||||
|
||||
data = self.get_request(self.url)
|
||||
soup = self.make_soup(data)
|
||||
|
||||
## story can require login outright, or it can show up as
|
||||
## watchers-only or mature-enabled without the same 'requires
|
||||
## login' strings.
|
||||
if self.requiresLogin(data) or ( not self.isLoggedIn(data) and
|
||||
(self.isWatchersOnly(data) or
|
||||
self.requiresMatureContentEnabled(data)) ):
|
||||
if self.performLogin(self.url):
|
||||
data = self.get_request(self.url, usecache=False)
|
||||
soup = self.make_soup(data)
|
||||
|
||||
## Check watchers only and mature enabled again, separately,
|
||||
## after login because they can still apply after login.
|
||||
if self.isWatchersOnly(data):
|
||||
raise exceptions.FailedToDownload(
|
||||
'Deviation is only available for watchers.' +
|
||||
'You must watch this author before you can download it.'
|
||||
)
|
||||
if self.requiresMatureContentEnabled(data):
|
||||
raise exceptions.FailedToDownload(
|
||||
'Deviation is set as mature, you must go into your account ' +
|
||||
'and enable showing of mature content.'
|
||||
)
|
||||
|
||||
appurl = soup.select_one('meta[property="og:url"]')['content']
|
||||
if appurl:
|
||||
story_id = urlparse(appurl).path.lstrip('/')
|
||||
else:
|
||||
logger.debug("Looking for JS story id")
|
||||
## after login, this is only found in a JS block. Dunno why.
|
||||
## F875A309-B0DB-860E-5079-790D0FBE5668
|
||||
match = re.match(r'\\"deviationUuid\\":\\"(?P<id>[A-Z0-9-]+)\\",',data)
|
||||
if match:
|
||||
story_id = match.group('id')
|
||||
else:
|
||||
raise exceptions.FailedToDownload('Failed to find Story ID.')
|
||||
self.story.setMetadata('storyId', story_id)
|
||||
|
||||
title = soup.select_one('h1').get_text()
|
||||
self.story.setMetadata('title', stripHTML(title))
|
||||
|
||||
## dA has no concept of status
|
||||
# self.story.setMetadata('status', 'Completed')
|
||||
|
||||
pubdate = soup.select_one('time').get_text()
|
||||
|
||||
# Maybe do this better, but this works
|
||||
try:
|
||||
self.story.setMetadata('datePublished', makeDate(pubdate, '%b %d, %Y'))
|
||||
except:
|
||||
self.story.setMetadata('datePublished', parse_relative_date_string(pubdate))
|
||||
|
||||
# do description here if appropriate
|
||||
|
||||
story_tags = soup.select('a[href^="https://www.deviantart.com/tag"] span')
|
||||
if story_tags is not None:
|
||||
for tag in story_tags:
|
||||
self.story.addToList('genre', tag.get_text())
|
||||
|
||||
self.add_chapter(title, self.url)
|
||||
|
||||
def getChapterText(self, url):
|
||||
logger.debug('Getting chapter text from: %s', url)
|
||||
data = self.get_request(url)
|
||||
# logger.debug(data)
|
||||
soup = self.make_soup(data)
|
||||
|
||||
# remove comments section to avoid false matches
|
||||
comments = soup.select_one('[data-hook=comments_thread]')
|
||||
if comments:
|
||||
comments.decompose()
|
||||
# previous search not always found in some stories.
|
||||
# <div id="comments"></div> inside the real containing
|
||||
# div seems more common
|
||||
commentsdiv = soup.select_one('div#comments')
|
||||
if commentsdiv:
|
||||
commentsdiv.parent.decompose()
|
||||
|
||||
# three different 'content' tags to look for.
|
||||
# This is the current in Oct 2024
|
||||
content = soup.select_one('[data-editor-viewer="1"]')
|
||||
|
||||
if content is None:
|
||||
# older story? I can't find any of this style in Oct2024
|
||||
content = soup.select_one('[data-id="rich-content-viewer"]')
|
||||
|
||||
if content is None:
|
||||
# olderer story, but used by some older (2018) posts
|
||||
content = soup.select_one('.legacy-journal')
|
||||
|
||||
if content is None:
|
||||
raise exceptions.FailedToDownload(
|
||||
'Could not find story text. Please open a bug with the URL %s' % self.url
|
||||
)
|
||||
|
||||
return self.utf8FromSoup(url, content)
|
||||
|
|
@ -23,8 +23,6 @@ from ..htmlcleanup import stripHTML
|
|||
from .. import exceptions as exceptions
|
||||
|
||||
# py2 vs py3 transition
|
||||
from ..six import text_type as unicode
|
||||
from ..six.moves.urllib.error import HTTPError
|
||||
|
||||
from .base_adapter import BaseSiteAdapter, makeDate
|
||||
|
||||
|
|
@ -76,7 +74,7 @@ class DokugaComAdapter(BaseSiteAdapter):
|
|||
return "http://"+cls.getSiteDomain()+"/fanfiction/story/1234/1 http://"+cls.getSiteDomain()+"/spark/story/1234/1"
|
||||
|
||||
def getSiteURLPattern(self):
|
||||
return r"http://"+self.getSiteDomain()+"/(fanfiction|spark)?/story/\d+/?\d+?$"
|
||||
return r"http://"+self.getSiteDomain()+r"/(fanfiction|spark)?/story/\d+/?\d+?$"
|
||||
|
||||
## Login seems to be reasonably standard across eFiction sites.
|
||||
def needToLoginCheck(self, data):
|
||||
|
|
@ -97,17 +95,17 @@ class DokugaComAdapter(BaseSiteAdapter):
|
|||
params['Submit'] = 'Submit'
|
||||
|
||||
# copy all hidden input tags to pick up appropriate tokens.
|
||||
for tag in soup.findAll('input',{'type':'hidden'}):
|
||||
for tag in soup.find_all('input',{'type':'hidden'}):
|
||||
params[tag['name']] = tag['value']
|
||||
|
||||
loginUrl = 'http://' + self.getSiteDomain() + '/fanfiction'
|
||||
logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
|
||||
params['username']))
|
||||
|
||||
d = self._postUrl(loginUrl, params)
|
||||
d = self.post_request(loginUrl, params)
|
||||
|
||||
if "Your session has expired. Please log in again." in d:
|
||||
d = self._postUrl(loginUrl, params)
|
||||
d = self.post_request(loginUrl, params)
|
||||
|
||||
if "Logout" not in d : #Member Account
|
||||
logger.info("Failed to login to URL %s as %s" % (loginUrl,
|
||||
|
|
@ -125,28 +123,20 @@ class DokugaComAdapter(BaseSiteAdapter):
|
|||
url = self.url
|
||||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
except HTTPError as e:
|
||||
if e.code == 404:
|
||||
raise exceptions.StoryDoesNotExist(self.url)
|
||||
else:
|
||||
raise e
|
||||
data = self.get_request(url)
|
||||
|
||||
# use BeautifulSoup HTML parser to make everything easier to find.
|
||||
soup = self.make_soup(data)
|
||||
|
||||
if self.needToLoginCheck(data):
|
||||
# need to log in for this one.
|
||||
self.performLogin(url,soup)
|
||||
data = self._fetchUrl(url)
|
||||
data = self.get_request(url)
|
||||
soup = self.make_soup(data)
|
||||
|
||||
if "Access denied. This story has not been validated by the adminstrators of this site." in data:
|
||||
raise exceptions.AccessDenied(self.getSiteDomain() +" says: Access denied. This story has not been validated by the adminstrators of this site.")
|
||||
# print data
|
||||
|
||||
# Now go hunting for all the meta data and the chapter list.
|
||||
|
||||
## Title and author
|
||||
a = soup.find('div', {'align' : 'center'}).find('h3')
|
||||
|
|
@ -163,7 +153,7 @@ class DokugaComAdapter(BaseSiteAdapter):
|
|||
self.story.setMetadata('title',stripHTML(a))
|
||||
|
||||
# Find the chapters:
|
||||
chapters = soup.find('select').findAll('option')
|
||||
chapters = soup.find('select').find_all('option')
|
||||
if len(chapters)==1:
|
||||
self.add_chapter(self.story.getMetadata('title'),'http://'+self.host+'/'+self.section+'/story/'+self.story.getMetadata('storyId')+'/1')
|
||||
else:
|
||||
|
|
@ -172,13 +162,13 @@ class DokugaComAdapter(BaseSiteAdapter):
|
|||
self.add_chapter(chapter,'http://'+self.host+'/'+self.section+'/story/'+self.story.getMetadata('storyId')+'/'+chapter['value'])
|
||||
|
||||
|
||||
asoup = self.make_soup(self._fetchUrl(alink))
|
||||
asoup = self.make_soup(self.get_request(alink))
|
||||
|
||||
if 'fanfiction' in self.section:
|
||||
asoup=asoup.find('div', {'id' : 'cb_tabid_52'}).find('div')
|
||||
|
||||
#grab the rest of the metadata from the author's page
|
||||
for div in asoup.findAll('div'):
|
||||
for div in asoup.find_all('div'):
|
||||
nav=div.find('a', href=re.compile(r'/fanfiction/story/'+self.story.getMetadata('storyId')+"/1$"))
|
||||
if nav != None:
|
||||
break
|
||||
|
|
@ -218,7 +208,7 @@ class DokugaComAdapter(BaseSiteAdapter):
|
|||
|
||||
else:
|
||||
asoup=asoup.find('div', {'id' : 'maincol'}).find('div', {'class' : 'padding'})
|
||||
for div in asoup.findAll('div'):
|
||||
for div in asoup.find_all('div'):
|
||||
nav=div.find('a', href=re.compile(r'/spark/story/'+self.story.getMetadata('storyId')+"/1$"))
|
||||
if nav != None:
|
||||
break
|
||||
|
|
@ -262,7 +252,7 @@ class DokugaComAdapter(BaseSiteAdapter):
|
|||
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
soup = self.make_soup(self._fetchUrl(url))
|
||||
soup = self.make_soup(self.get_request(url))
|
||||
|
||||
div = soup.find('div', {'id' : 'chtext'})
|
||||
|
||||
|
|
|
|||
|
|
@ -25,7 +25,6 @@ from .. import exceptions as exceptions
|
|||
|
||||
# py2 vs py3 transition
|
||||
from ..six import text_type as unicode
|
||||
from ..six.moves.urllib.error import HTTPError
|
||||
|
||||
from .base_adapter import BaseSiteAdapter, makeDate
|
||||
|
||||
|
|
@ -94,7 +93,7 @@ class DracoAndGinnyComAdapter(BaseSiteAdapter):
|
|||
logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
|
||||
params['penname']))
|
||||
|
||||
d = self._fetchUrl(loginUrl, params)
|
||||
d = self.post_request(loginUrl, params)
|
||||
|
||||
if "Member Account" not in d : #Member Account
|
||||
logger.info("Failed to login to URL %s as %s" % (loginUrl,
|
||||
|
|
@ -121,18 +120,12 @@ class DracoAndGinnyComAdapter(BaseSiteAdapter):
|
|||
url = self.url+'&index=1'+addurl
|
||||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
except HTTPError as e:
|
||||
if e.code == 404:
|
||||
raise exceptions.StoryDoesNotExist(self.url)
|
||||
else:
|
||||
raise e
|
||||
data = self.get_request(url)
|
||||
|
||||
if self.needToLoginCheck(data):
|
||||
# need to log in for this one.
|
||||
self.performLogin(url)
|
||||
data = self._fetchUrl(url)
|
||||
data = self.get_request(url)
|
||||
|
||||
m = re.search(r"'viewstory.php\?sid=\d+((?:&ageconsent=ok)?&warning=\d+)'",data)
|
||||
if m != None:
|
||||
|
|
@ -146,24 +139,16 @@ class DracoAndGinnyComAdapter(BaseSiteAdapter):
|
|||
url = self.url+'&index=1'+addurl
|
||||
logger.debug("URL 2nd try: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
except HTTPError as e:
|
||||
if e.code == 404:
|
||||
raise exceptions.StoryDoesNotExist(self.url)
|
||||
else:
|
||||
raise e
|
||||
data = self.get_request(url)
|
||||
else:
|
||||
raise exceptions.AdultCheckRequired(self.url)
|
||||
|
||||
if "Access denied. This story has not been validated by the adminstrators of this site." in data:
|
||||
raise exceptions.AccessDenied(self.getSiteDomain() +" says: Access denied. This story has not been validated by the adminstrators of this site.")
|
||||
|
||||
# use BeautifulSoup HTML parser to make everything easier to find.
|
||||
soup = self.make_soup(data)
|
||||
# print data
|
||||
|
||||
# Now go hunting for all the meta data and the chapter list.
|
||||
|
||||
## Title
|
||||
a = soup.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$"))
|
||||
|
|
@ -176,7 +161,7 @@ class DracoAndGinnyComAdapter(BaseSiteAdapter):
|
|||
self.story.setMetadata('author',a.string)
|
||||
|
||||
# Find the chapters:
|
||||
for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"&chapter=\d+$")):
|
||||
for chapter in soup.find_all('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+r"&chapter=\d+$")):
|
||||
# just in case there's tags, like <i> in chapter titles.
|
||||
self.add_chapter(chapter,'http://'+self.host+'/'+chapter['href']+addurl)
|
||||
|
||||
|
|
@ -196,13 +181,13 @@ class DracoAndGinnyComAdapter(BaseSiteAdapter):
|
|||
|
||||
self.setDescription(url,content.find('blockquote'))
|
||||
|
||||
for genre in content.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=1')):
|
||||
for genre in content.find_all('a',href=re.compile(r'browse.php\?type=class&type_id=1')):
|
||||
self.story.addToList('genre',genre.string)
|
||||
|
||||
for warning in content.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=2')):
|
||||
for warning in content.find_all('a',href=re.compile(r'browse.php\?type=class&type_id=2')):
|
||||
self.story.addToList('warnings',warning.string)
|
||||
|
||||
labels = content.findAll('b')
|
||||
labels = content.find_all('b')
|
||||
|
||||
for labelspan in labels:
|
||||
value = labelspan.nextSibling
|
||||
|
|
@ -223,22 +208,22 @@ class DracoAndGinnyComAdapter(BaseSiteAdapter):
|
|||
self.story.setMetadata('rating', value)
|
||||
|
||||
if 'Categories' in label:
|
||||
cats = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=categories'))
|
||||
cats = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=categories'))
|
||||
for cat in cats:
|
||||
self.story.addToList('category',cat.string)
|
||||
|
||||
if 'Characters' in label:
|
||||
chars = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=characters'))
|
||||
chars = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=characters'))
|
||||
for char in chars:
|
||||
self.story.addToList('characters',char.string)
|
||||
|
||||
if 'Genre' in label:
|
||||
genres = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=1'))
|
||||
genres = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=class&type_id=1'))
|
||||
for genre in genres:
|
||||
self.story.addToList('genre',genre.string)
|
||||
|
||||
if 'Warnings' in label:
|
||||
warnings = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=2'))
|
||||
warnings = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=class&type_id=2'))
|
||||
for warning in warnings:
|
||||
self.story.addToList('warnings',warning.string)
|
||||
|
||||
|
|
@ -260,10 +245,9 @@ class DracoAndGinnyComAdapter(BaseSiteAdapter):
|
|||
series_name = a.string
|
||||
series_url = 'http://'+self.host+'/'+a['href']
|
||||
|
||||
# use BeautifulSoup HTML parser to make everything easier to find.
|
||||
seriessoup = self.make_soup(self._fetchUrl(series_url))
|
||||
seriessoup = self.make_soup(self.get_request(series_url))
|
||||
# can't use ^viewstory...$ in case of higher rated stories with javascript href.
|
||||
storyas = seriessoup.findAll('a', href=re.compile(r'viewstory.php\?sid=\d+'))
|
||||
storyas = seriessoup.find_all('a', href=re.compile(r'viewstory.php\?sid=\d+'))
|
||||
i=1
|
||||
for a in storyas:
|
||||
# skip 'report this' and 'TOC' links
|
||||
|
|
@ -283,7 +267,7 @@ class DracoAndGinnyComAdapter(BaseSiteAdapter):
|
|||
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
soup = self.make_soup(self._fetchUrl(url))
|
||||
soup = self.make_soup(self.get_request(url))
|
||||
|
||||
div = soup.find('div', {'class' : 'listbox'})
|
||||
|
||||
|
|
|
|||
|
|
@ -1,218 +0,0 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2013 Fanficdownloader team, 2018 FanFicFare team
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
# Software: eFiction
|
||||
from __future__ import absolute_import
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import re
|
||||
from ..htmlcleanup import stripHTML
|
||||
from .. import exceptions as exceptions
|
||||
|
||||
# py2 vs py3 transition
|
||||
from ..six import text_type as unicode
|
||||
from ..six.moves.urllib.error import HTTPError
|
||||
|
||||
from .base_adapter import BaseSiteAdapter, makeDate
|
||||
|
||||
def getClass():
|
||||
return EfictionEstelielDeAdapter
|
||||
|
||||
# Class name has to be unique. Our convention is camel case the
|
||||
# sitename with Adapter at the end. www is skipped.
|
||||
class EfictionEstelielDeAdapter(BaseSiteAdapter):
|
||||
|
||||
def __init__(self, config, url):
|
||||
BaseSiteAdapter.__init__(self, config, url)
|
||||
|
||||
self.username = "NoneGiven" # if left empty, site doesn't return any message at all.
|
||||
self.password = ""
|
||||
self.is_adult=False
|
||||
|
||||
# get storyId from url--url validation guarantees query is only sid=1234
|
||||
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
|
||||
|
||||
|
||||
# normalized story URL.
|
||||
self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId'))
|
||||
|
||||
# Each adapter needs to have a unique site abbreviation.
|
||||
self.story.setMetadata('siteabbrev','eesd')
|
||||
|
||||
# The date format will vary from site to site.
|
||||
# http://docs.python.org/library/datetime.html#strftime-strptime-behavior
|
||||
self.dateformat = "%B %d, %Y"
|
||||
|
||||
@staticmethod # must be @staticmethod, don't remove it.
|
||||
def getSiteDomain():
|
||||
# The site domain. Does have www here, if it uses it.
|
||||
return 'efiction.esteliel.de'
|
||||
|
||||
@classmethod
|
||||
def getSiteExampleURLs(cls):
|
||||
return "http://"+cls.getSiteDomain()+"/viewstory.php?sid=1234"
|
||||
|
||||
def getSiteURLPattern(self):
|
||||
return re.escape("http://"+self.getSiteDomain()+"/viewstory.php?sid=")+r"\d+$"
|
||||
|
||||
|
||||
## Getting the chapter list and the meta data, plus 'is adult' checking.
|
||||
def extractChapterUrlsAndMetadata(self):
|
||||
|
||||
# index=1 makes sure we see the story chapter index. Some
|
||||
# sites skip that for one-chapter stories.
|
||||
url = self.url+'&index=1'
|
||||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
except HTTPError as e:
|
||||
if e.code == 404:
|
||||
raise exceptions.StoryDoesNotExist(self.url)
|
||||
else:
|
||||
raise e
|
||||
|
||||
if "Access denied. This story has not been validated by the adminstrators of this site." in data:
|
||||
raise exceptions.AccessDenied(self.getSiteDomain() +" says: Access denied. This story has not been validated by the adminstrators of this site.")
|
||||
|
||||
# Now go hunting for all the meta data and the chapter list.
|
||||
|
||||
## Title and author
|
||||
# use BeautifulSoup HTML parser to make everything easier to find.
|
||||
soup = self.make_soup(data)
|
||||
# print data
|
||||
|
||||
# Now go hunting for all the meta data and the chapter list.
|
||||
|
||||
pagetitle = soup.find('div',{'id':'pagetitle'})
|
||||
## Title
|
||||
a = pagetitle.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$"))
|
||||
self.story.setMetadata('title',stripHTML(a))
|
||||
|
||||
# Find authorid and URL from... author url.
|
||||
a = pagetitle.find('a', href=re.compile(r"viewuser.php\?uid=\d+"))
|
||||
self.story.setMetadata('authorId',a['href'].split('=')[1])
|
||||
self.story.setMetadata('authorUrl','http://'+self.host+'/'+a['href'])
|
||||
self.story.setMetadata('author',a.string)
|
||||
|
||||
# Find the chapters:
|
||||
for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"&chapter=\d+$")):
|
||||
# just in case there's tags, like <i> in chapter titles.
|
||||
self.add_chapter(chapter,'http://'+self.host+'/'+chapter['href'])
|
||||
|
||||
|
||||
# eFiction sites don't help us out a lot with their meta data
|
||||
# formating, so it's a little ugly.
|
||||
|
||||
# utility method
|
||||
def defaultGetattr(d,k):
|
||||
try:
|
||||
return d[k]
|
||||
except:
|
||||
return ""
|
||||
|
||||
# <span class="label">Rated:</span> NC-17<br /> etc
|
||||
list = soup.find('div', {'class':'listbox'})
|
||||
labelspan=list.find('span',{'class':'label'})
|
||||
value = labelspan.nextSibling
|
||||
label = labelspan.string
|
||||
genres = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=1'))
|
||||
for genre in genres:
|
||||
self.story.addToList('genre',genre.string)
|
||||
|
||||
labels = list.findAll('b')
|
||||
for labelspan in labels:
|
||||
value = labelspan.nextSibling
|
||||
label = labelspan.string
|
||||
|
||||
if 'Summary' in label:
|
||||
## Everything until the next span class='label'
|
||||
svalue = ""
|
||||
while 'Rating' not in unicode(value):
|
||||
svalue += unicode(value)
|
||||
value = value.nextSibling
|
||||
self.setDescription(url,svalue)
|
||||
#self.story.setMetadata('description',stripHTML(svalue))
|
||||
|
||||
if 'Rating' in label:
|
||||
self.story.setMetadata('rating', value)
|
||||
|
||||
if 'Words' in label:
|
||||
self.story.setMetadata('numWords', value)
|
||||
|
||||
if 'Category' in label:
|
||||
cats = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=categories'))
|
||||
for cat in cats:
|
||||
self.story.addToList('category',cat.string)
|
||||
|
||||
if 'Characters' in label:
|
||||
chars = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=characters'))
|
||||
for char in chars:
|
||||
self.story.addToList('characters',char.string)
|
||||
|
||||
if 'Completed' in label:
|
||||
if 'Yes' in value:
|
||||
self.story.setMetadata('status', 'Completed')
|
||||
else:
|
||||
self.story.setMetadata('status', 'In-Progress')
|
||||
|
||||
if 'Published' in label:
|
||||
self.story.setMetadata('datePublished', makeDate(stripHTML(value), self.dateformat))
|
||||
|
||||
if 'Updated' in label:
|
||||
# there's a stray [ at the end.
|
||||
#value = value[0:-1]
|
||||
self.story.setMetadata('dateUpdated', makeDate(stripHTML(value), self.dateformat))
|
||||
|
||||
try:
|
||||
if list.find('a', href=re.compile(r"series.php")) != None:
|
||||
for series in asoup.findAll('a', href=re.compile(r"series.php\?seriesid=\d+")):
|
||||
# Find Series name from series URL.
|
||||
series_url = 'http://'+self.host+'/'+series['href']
|
||||
# use BeautifulSoup HTML parser to make everything easier to find.
|
||||
seriessoup = self.make_soup(self._fetchUrl(series_url))
|
||||
storyas = seriessoup.findAll('a', href=re.compile(r'^viewstory.php\?sid=\d+$'))
|
||||
i=1
|
||||
for a in storyas:
|
||||
if a['href'] == ('viewstory.php?sid='+self.story.getMetadata('storyId')):
|
||||
name=seriessoup.find('div', {'id' : 'pagetitle'})
|
||||
name.find('a').extract()
|
||||
self.setSeries(name.text.split(' by[')[0], i)
|
||||
self.story.setMetadata('seriesUrl',series_url)
|
||||
i=0
|
||||
break
|
||||
i+=1
|
||||
if i == 0:
|
||||
break
|
||||
|
||||
except:
|
||||
# I find it hard to care if the series parsing fails
|
||||
pass
|
||||
|
||||
# grab the text for an individual chapter.
|
||||
def getChapterText(self, url):
|
||||
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
soup = self.make_soup(self._fetchUrl(url))
|
||||
|
||||
div = soup.find('div', {'id' : 'story'})
|
||||
|
||||
if None == div:
|
||||
raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url)
|
||||
|
||||
return self.utf8FromSoup(url,div)
|
||||
|
|
@ -24,8 +24,6 @@ from ..htmlcleanup import stripHTML
|
|||
from .. import exceptions as exceptions
|
||||
|
||||
# py2 vs py3 transition
|
||||
from ..six import text_type as unicode
|
||||
from ..six.moves.urllib.error import HTTPError
|
||||
|
||||
from .base_adapter import BaseSiteAdapter, makeDate
|
||||
|
||||
|
|
@ -93,7 +91,7 @@ class EFPFanFicNet(BaseSiteAdapter):
|
|||
logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
|
||||
params['penname']))
|
||||
|
||||
d = self._fetchUrl(loginUrl, params)
|
||||
d = self.post_request(loginUrl, params)
|
||||
|
||||
if '<a class="menu" href="newaccount.php">' in d : # register for new account link
|
||||
logger.info("Failed to login to URL %s as %s" % (loginUrl,
|
||||
|
|
@ -109,27 +107,19 @@ class EFPFanFicNet(BaseSiteAdapter):
|
|||
url = self.url
|
||||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
except HTTPError as e:
|
||||
if e.code == 404:
|
||||
raise exceptions.StoryDoesNotExist(self.url)
|
||||
else:
|
||||
raise e
|
||||
data = self.get_request(url)
|
||||
|
||||
if self.needToLoginCheck(data):
|
||||
# need to log in for this one.
|
||||
self.performLogin(url)
|
||||
data = self._fetchUrl(url)
|
||||
data = self.get_request(url)
|
||||
|
||||
# if "Access denied. This story has not been validated by the adminstrators of this site." in data:
|
||||
# raise exceptions.AccessDenied(self.getSiteDomain() +" says: Access denied. This story has not been validated by the adminstrators of this site.")
|
||||
|
||||
# use BeautifulSoup HTML parser to make everything easier to find.
|
||||
soup = self.make_soup(data)
|
||||
# print data
|
||||
|
||||
# Now go hunting for all the meta data and the chapter list.
|
||||
|
||||
## Title
|
||||
a = soup.find('a', href=re.compile(r'^viewstory\.php\?sid='+self.story.getMetadata('storyId')+"$"))
|
||||
|
|
@ -148,7 +138,7 @@ class EFPFanFicNet(BaseSiteAdapter):
|
|||
# no selector found, so it's a one-chapter story.
|
||||
self.add_chapter(self.story.getMetadata('title'),url)
|
||||
else:
|
||||
allOptions = select.findAll('option', {'value' : re.compile(r'viewstory')})
|
||||
allOptions = select.find_all('option', {'value' : re.compile(r'viewstory')})
|
||||
for o in allOptions:
|
||||
url = u'https://%s/%s' % ( self.getSiteDomain(),
|
||||
o['value'])
|
||||
|
|
@ -180,14 +170,14 @@ class EFPFanFicNet(BaseSiteAdapter):
|
|||
if authsoup != None:
|
||||
# last author link with offset should be the 'next' link.
|
||||
authurl = u'https://%s/%s' % ( self.getSiteDomain(),
|
||||
authsoup.findAll('a',href=re.compile(r'viewuser\.php\?uid=\d+&catid=&offset='))[-1]['href'] )
|
||||
authsoup.find_all('a',href=re.compile(r'viewuser\.php\?uid=\d+&catid=&offset='))[-1]['href'] )
|
||||
|
||||
# Need author page for most of the metadata.
|
||||
logger.debug("fetching author page: (%s)"%authurl)
|
||||
authsoup = self.make_soup(self._fetchUrl(authurl))
|
||||
authsoup = self.make_soup(self.get_request(authurl))
|
||||
#print("authsoup:%s"%authsoup)
|
||||
|
||||
storyas = authsoup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+r'&i=1$'))
|
||||
storyas = authsoup.find_all('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+r'&i=1$'))
|
||||
for storya in storyas:
|
||||
#print("======storya:%s"%storya)
|
||||
storyblock = storya.findParent('div',{'class':'storybloc'})
|
||||
|
|
@ -204,7 +194,7 @@ class EFPFanFicNet(BaseSiteAdapter):
|
|||
# Tipo di coppia: Het | Personaggi: Akasuna no Sasori , Akatsuki, Nuovo Personaggio | Note: OOC | Avvertimenti: Tematiche delicate<br />
|
||||
# Categoria: <a href="categories.php?catid=1&parentcatid=1">Anime & Manga</a> > <a href="categories.php?catid=108&parentcatid=108">Naruto</a> | Contesto: Naruto Shippuuden | Leggi le <a href="reviews.php?sid=1331275&a=">3</a> recensioni</div>
|
||||
|
||||
cats = noteblock.findAll('a',href=re.compile(r'browse.php\?type=categories'))
|
||||
cats = noteblock.find_all('a',href=re.compile(r'browse.php\?type=categories'))
|
||||
for cat in cats:
|
||||
self.story.addToList('category',cat.string)
|
||||
|
||||
|
|
@ -270,10 +260,9 @@ class EFPFanFicNet(BaseSiteAdapter):
|
|||
series_name = a.string
|
||||
series_url = 'https://'+self.host+'/'+a['href']
|
||||
|
||||
# use BeautifulSoup HTML parser to make everything easier to find.
|
||||
seriessoup = self.make_soup(self._fetchUrl(series_url))
|
||||
seriessoup = self.make_soup(self.get_request(series_url))
|
||||
# can't use ^viewstory...$ in case of higher rated stories with javascript href.
|
||||
storyas = seriessoup.findAll('a', href=re.compile(r'viewstory.php\?sid=\d+&i=1'))
|
||||
storyas = seriessoup.find_all('a', href=re.compile(r'viewstory.php\?sid=\d+&i=1'))
|
||||
i=1
|
||||
for a in storyas:
|
||||
if a['href'] == ('viewstory.php?sid='+self.story.getMetadata('storyId'))+'&i=1':
|
||||
|
|
@ -291,7 +280,7 @@ class EFPFanFicNet(BaseSiteAdapter):
|
|||
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
soup = self.make_soup(self._fetchUrl(url))
|
||||
soup = self.make_soup(self.get_request(url))
|
||||
|
||||
div = soup.find('div', {'class' : 'storia'})
|
||||
|
||||
|
|
@ -299,11 +288,11 @@ class EFPFanFicNet(BaseSiteAdapter):
|
|||
raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url)
|
||||
|
||||
# remove any header and 'o:p' tags.
|
||||
for tag in div.findAll("head") + div.findAll("o:p"):
|
||||
for tag in div.find_all("head") + div.find_all("o:p"):
|
||||
tag.extract()
|
||||
|
||||
# change any html and body tags to div.
|
||||
for tag in div.findAll("html") + div.findAll("body"):
|
||||
for tag in div.find_all("html") + div.find_all("body"):
|
||||
tag.name='div'
|
||||
|
||||
# remove extra bogus doctype.
|
||||
|
|
|
|||
|
|
@ -25,7 +25,6 @@ from .. import exceptions as exceptions
|
|||
|
||||
# py2 vs py3 transition
|
||||
from ..six import text_type as unicode
|
||||
from ..six.moves.urllib.error import HTTPError
|
||||
|
||||
from .base_adapter import BaseSiteAdapter, makeDate
|
||||
|
||||
|
|
@ -87,13 +86,7 @@ class ErosnSapphoSycophantHexComAdapter(BaseSiteAdapter):
|
|||
url = self.url+'&index=1'+addurl
|
||||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
except HTTPError as e:
|
||||
if e.code == 404:
|
||||
raise exceptions.StoryDoesNotExist(self.url)
|
||||
else:
|
||||
raise e
|
||||
data = self.get_request(url)
|
||||
|
||||
m = re.search(r"'viewstory.php\?sid=\d+((?:&ageconsent=ok)?&warning=\d+)'",data)
|
||||
if m != None:
|
||||
|
|
@ -107,24 +100,16 @@ class ErosnSapphoSycophantHexComAdapter(BaseSiteAdapter):
|
|||
url = self.url+'&index=1'+addurl
|
||||
logger.debug("URL 2nd try: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
except HTTPError as e:
|
||||
if e.code == 404:
|
||||
raise exceptions.StoryDoesNotExist(self.url)
|
||||
else:
|
||||
raise e
|
||||
data = self.get_request(url)
|
||||
else:
|
||||
raise exceptions.AdultCheckRequired(self.url)
|
||||
|
||||
if "Access denied. This story has not been validated by the adminstrators of this site." in data:
|
||||
raise exceptions.AccessDenied(self.getSiteDomain() +" says: Access denied. This story has not been validated by the adminstrators of this site.")
|
||||
|
||||
# use BeautifulSoup HTML parser to make everything easier to find.
|
||||
soup = self.make_soup(data)
|
||||
# print data
|
||||
|
||||
# Now go hunting for all the meta data and the chapter list.
|
||||
|
||||
## Title
|
||||
pt = soup.find('div', {'id' : 'pagetitle'})
|
||||
|
|
@ -141,7 +126,7 @@ class ErosnSapphoSycophantHexComAdapter(BaseSiteAdapter):
|
|||
self.story.setMetadata('rating', rating)
|
||||
|
||||
# Find the chapters:
|
||||
for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"&chapter=\d+$")):
|
||||
for chapter in soup.find_all('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+r"&chapter=\d+$")):
|
||||
# just in case there's tags, like <i> in chapter titles.
|
||||
self.add_chapter(chapter,'http://'+self.host+'/'+chapter['href']+addurl)
|
||||
|
||||
|
|
@ -159,7 +144,7 @@ class ErosnSapphoSycophantHexComAdapter(BaseSiteAdapter):
|
|||
|
||||
# <span class="label">Rated:</span> NC-17<br /> etc
|
||||
|
||||
labels = soup.findAll('span',{'class':'label'})
|
||||
labels = soup.find_all('span',{'class':'label'})
|
||||
|
||||
value = labels[0].previousSibling
|
||||
svalue = ""
|
||||
|
|
@ -179,22 +164,22 @@ class ErosnSapphoSycophantHexComAdapter(BaseSiteAdapter):
|
|||
self.story.setMetadata('numWords', value.split(' -')[0])
|
||||
|
||||
if 'Categories' in label:
|
||||
cats = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=categories'))
|
||||
cats = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=categories'))
|
||||
for cat in cats:
|
||||
self.story.addToList('category',cat.string)
|
||||
|
||||
if 'Characters' in label:
|
||||
chars = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=characters'))
|
||||
chars = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=characters'))
|
||||
for char in chars:
|
||||
self.story.addToList('characters',char.string)
|
||||
|
||||
if 'Genre' in label:
|
||||
genres = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=1'))
|
||||
genres = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=class&type_id=1'))
|
||||
for genre in genres:
|
||||
self.story.addToList('genre',genre.string)
|
||||
|
||||
if 'Warnings' in label:
|
||||
warnings = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=2'))
|
||||
warnings = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=class&type_id=2'))
|
||||
for warning in warnings:
|
||||
self.story.addToList('warnings',warning.string)
|
||||
|
||||
|
|
@ -218,9 +203,8 @@ class ErosnSapphoSycophantHexComAdapter(BaseSiteAdapter):
|
|||
series_name = a.string
|
||||
series_url = 'http://'+self.host+'/'+a['href']
|
||||
|
||||
# use BeautifulSoup HTML parser to make everything easier to find.
|
||||
seriessoup = self.make_soup(self._fetchUrl(series_url))
|
||||
storyas = seriessoup.findAll('a', href=re.compile(r'viewstory.php\?sid=\d+'))
|
||||
seriessoup = self.make_soup(self.get_request(series_url))
|
||||
storyas = seriessoup.find_all('a', href=re.compile(r'viewstory.php\?sid=\d+'))
|
||||
i=1
|
||||
for a in storyas:
|
||||
# skip 'report this' and 'TOC' links
|
||||
|
|
@ -240,7 +224,7 @@ class ErosnSapphoSycophantHexComAdapter(BaseSiteAdapter):
|
|||
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
soup = self.make_soup(self._fetchUrl(url))
|
||||
soup = self.make_soup(self.get_request(url))
|
||||
|
||||
div = soup.find('div', {'id' : 'story'})
|
||||
|
||||
|
|
|
|||
|
|
@ -23,15 +23,12 @@ from __future__ import unicode_literals
|
|||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import re
|
||||
import sys
|
||||
from bs4 import UnicodeDammit, Comment
|
||||
|
||||
from ..htmlcleanup import stripHTML
|
||||
from .. import exceptions as exceptions
|
||||
|
||||
# py2 vs py3 transition
|
||||
from ..six import text_type as unicode
|
||||
from ..six.moves.urllib.error import HTTPError
|
||||
|
||||
from .base_adapter import BaseSiteAdapter, makeDate
|
||||
|
||||
|
|
@ -56,6 +53,9 @@ class FanficAuthorsNetAdapter(BaseSiteAdapter):
|
|||
#Setting the 'Zone' for each "Site"
|
||||
self.zone = self.parsedUrl.netloc.replace('.fanficauthors.net','')
|
||||
|
||||
# site change .nsns to -nsns
|
||||
self.zone = self.zone.replace('.nsns','-nsns')
|
||||
|
||||
# normalized story URL.
|
||||
self._setURL('https://{0}.{1}/{2}/'.format(
|
||||
self.zone, self.getBaseDomain(), self.story.getMetadata('storyId')))
|
||||
|
|
@ -82,7 +82,10 @@ class FanficAuthorsNetAdapter(BaseSiteAdapter):
|
|||
@classmethod
|
||||
def getAcceptDomains(cls):
|
||||
|
||||
# need both .nsns(old) and -nsns(new) because it's a domain
|
||||
# change, not just URL change.
|
||||
return ['aaran-st-vines.nsns.fanficauthors.net',
|
||||
'aaran-st-vines-nsns.fanficauthors.net',
|
||||
'abraxan.fanficauthors.net',
|
||||
'bobmin.fanficauthors.net',
|
||||
'canoncansodoff.fanficauthors.net',
|
||||
|
|
@ -98,9 +101,12 @@ class FanficAuthorsNetAdapter(BaseSiteAdapter):
|
|||
'jeconais.fanficauthors.net',
|
||||
'kinsfire.fanficauthors.net',
|
||||
'kokopelli.nsns.fanficauthors.net',
|
||||
'kokopelli-nsns.fanficauthors.net',
|
||||
'ladya.nsns.fanficauthors.net',
|
||||
'ladya-nsns.fanficauthors.net',
|
||||
'lorddwar.fanficauthors.net',
|
||||
'mrintel.nsns.fanficauthors.net',
|
||||
'mrintel-nsns.fanficauthors.net',
|
||||
'musings-of-apathy.fanficauthors.net',
|
||||
'ruskbyte.fanficauthors.net',
|
||||
'seelvor.fanficauthors.net',
|
||||
|
|
@ -111,7 +117,7 @@ class FanficAuthorsNetAdapter(BaseSiteAdapter):
|
|||
################################################################################################
|
||||
@classmethod
|
||||
def getSiteExampleURLs(self):
|
||||
return ("https://aaran-st-vines.nsns.fanficauthors.net/A_Story_Name/ "
|
||||
return ("https://aaran-st-vines-nsns.fanficauthors.net/A_Story_Name/ "
|
||||
+ "https://abraxan.fanficauthors.net/A_Story_Name/ "
|
||||
+ "https://bobmin.fanficauthors.net/A_Story_Name/ "
|
||||
+ "https://canoncansodoff.fanficauthors.net/A_Story_Name/ "
|
||||
|
|
@ -126,10 +132,10 @@ class FanficAuthorsNetAdapter(BaseSiteAdapter):
|
|||
+ "https://jbern.fanficauthors.net/A_Story_Name/ "
|
||||
+ "https://jeconais.fanficauthors.net/A_Story_Name/ "
|
||||
+ "https://kinsfire.fanficauthors.net/A_Story_Name/ "
|
||||
+ "https://kokopelli.nsns.fanficauthors.net/A_Story_Name/ "
|
||||
+ "https://ladya.nsns.fanficauthors.net/A_Story_Name/ "
|
||||
+ "https://kokopelli-nsns.fanficauthors.net/A_Story_Name/ "
|
||||
+ "https://ladya-nsns.fanficauthors.net/A_Story_Name/ "
|
||||
+ "https://lorddwar.fanficauthors.net/A_Story_Name/ "
|
||||
+ "https://mrintel.nsns.fanficauthors.net/A_Story_Name/ "
|
||||
+ "https://mrintel-nsns.fanficauthors.net/A_Story_Name/ "
|
||||
+ "https://musings-of-apathy.fanficauthors.net/A_Story_Name/ "
|
||||
+ "https://ruskbyte.fanficauthors.net/A_Story_Name/ "
|
||||
+ "https://seelvor.fanficauthors.net/A_Story_Name/ "
|
||||
|
|
@ -139,14 +145,15 @@ class FanficAuthorsNetAdapter(BaseSiteAdapter):
|
|||
|
||||
################################################################################################
|
||||
def getSiteURLPattern(self):
|
||||
## .nsns kept here to match both . and -
|
||||
return r'https?://(aaran-st-vines.nsns|abraxan|bobmin|canoncansodoff|chemprof|copperbadge|crys|deluded-musings|draco664|fp|frenchsession|ishtar|jbern|jeconais|kinsfire|kokopelli.nsns|ladya.nsns|lorddwar|mrintel.nsns|musings-of-apathy|ruskbyte|seelvor|tenhawk|viridian|whydoyouneedtoknow)\.fanficauthors\.net/([a-zA-Z0-9_]+)/'
|
||||
|
||||
def use_pagecache(self):
|
||||
'''
|
||||
adapters that will work with the page cache need to implement
|
||||
this and change it to True.
|
||||
'''
|
||||
return True
|
||||
@classmethod
|
||||
def get_section_url(cls,url):
|
||||
## only changing .nsns to -nsns and only when part of the
|
||||
## domain.
|
||||
url = url.replace('.nsns.fanficauthors.net','-nsns.fanficauthors.net')
|
||||
return url
|
||||
|
||||
################################################################################################
|
||||
def doExtractChapterUrlsAndMetadata(self, get_cover=True):
|
||||
|
|
@ -154,37 +161,7 @@ class FanficAuthorsNetAdapter(BaseSiteAdapter):
|
|||
url = self.url
|
||||
logger.debug("URL: "+url)
|
||||
|
||||
params={}
|
||||
if self.password:
|
||||
params['username'] = self.username
|
||||
params['password'] = self.password
|
||||
else:
|
||||
params['username'] = self.getConfig("username")
|
||||
params['password'] = self.getConfig("password")
|
||||
|
||||
if not params['username']:
|
||||
raise exceptions.FailedToLogin('You need to have your username and password set.',params['username'])
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url+'index/', params, usecache=False)
|
||||
except HTTPError as e:
|
||||
if e.code == 404:
|
||||
raise exceptions.StoryDoesNotExist("Code: 404. {0}".format(url))
|
||||
elif e.code == 410:
|
||||
raise exceptions.StoryDoesNotExist("Code: 410. {0}".format(url))
|
||||
elif e.code == 401:
|
||||
self.needToLogin = True
|
||||
data = ''
|
||||
else:
|
||||
raise e
|
||||
|
||||
if "The requested file has not been found" in data:
|
||||
raise exceptions.StoryDoesNotExist(
|
||||
"{0}.{1} says: The requested file has not been found".format(
|
||||
self.zone, self.getBaseDomain()))
|
||||
|
||||
# use BeautifulSoup HTML parser to make everything easier to find.
|
||||
soup = self.make_soup(data)
|
||||
soup = self.make_soup(self.get_request(url+'index/'))
|
||||
|
||||
# Find authorid and URL.
|
||||
# There is no place where the author's name is listed,
|
||||
|
|
@ -196,77 +173,83 @@ class FanficAuthorsNetAdapter(BaseSiteAdapter):
|
|||
self.story.setMetadata('author',a)
|
||||
self.story.setMetadata('authorUrl','https://{0}/'.format(self.parsedUrl.netloc))
|
||||
|
||||
loginUrl = self.story.getMetadata('authorUrl')+'account/'
|
||||
loginsoup = self.make_soup(self._fetchUrl(loginUrl))
|
||||
if True:
|
||||
# if self.performLogin(loginUrl, loginsoup):
|
||||
# Now go hunting for all the meta data and the chapter list.
|
||||
## Title
|
||||
a = soup.find('h2')
|
||||
self.story.setMetadata('title',stripHTML(a))
|
||||
|
||||
## Title
|
||||
a = soup.find('h2')
|
||||
self.story.setMetadata('title',stripHTML(a))
|
||||
# Find the chapters:
|
||||
# The published and update dates are with the chapter links...
|
||||
# so we have to get them from there.
|
||||
chapters = soup.find_all('a', href=re.compile('/'+self.story.getMetadata(
|
||||
'storyId')+'/([a-zA-Z0-9_]+)/'))
|
||||
|
||||
# Find the chapters:
|
||||
# The published and update dates are with the chapter links...
|
||||
# so we have to get them from there.
|
||||
chapters = soup.findAll('a', href=re.compile('/'+self.story.getMetadata(
|
||||
'storyId')+'/([a-zA-Z0-9_]+)/'))
|
||||
# Here we are getting the published date. It is the date the first chapter was "updated"
|
||||
updatedate = stripHTML(unicode(chapters[0].parent)).split('Uploaded on:')[1].strip()
|
||||
updatedate = updatedate.replace('st ',' ').replace('nd ',' ').replace(
|
||||
'rd ',' ').replace('th ',' ')
|
||||
self.story.setMetadata('datePublished', makeDate(updatedate, self.dateformat))
|
||||
|
||||
# Here we are getting the published date. It is the date the first chapter was "updated"
|
||||
updatedate = stripHTML(unicode(chapters[0].parent)).split('Uploaded on:')[1].strip()
|
||||
updatedate = updatedate.replace('st ',' ').replace('nd ',' ').replace(
|
||||
'rd ',' ').replace('th ',' ')
|
||||
self.story.setMetadata('datePublished', makeDate(updatedate, self.dateformat))
|
||||
|
||||
for i, chapter in enumerate(chapters):
|
||||
if '/reviews/' not in chapter['href']:
|
||||
# here we get the update date. We will update this for every chapter,
|
||||
# so we get the last one.
|
||||
updatedate = stripHTML(unicode(chapters[i].parent)).split(
|
||||
'Uploaded on:')[1].strip()
|
||||
updatedate = updatedate.replace('st ',' ').replace('nd ',' ').replace(
|
||||
'rd ',' ').replace('th ',' ')
|
||||
self.story.setMetadata('dateUpdated', makeDate(updatedate, self.dateformat))
|
||||
|
||||
if '::' in stripHTML(unicode(chapter)):
|
||||
chapter_title = stripHTML(unicode(chapter).split('::')[1])
|
||||
else:
|
||||
chapter_title = stripHTML(unicode(chapter))
|
||||
chapter_Url = self.story.getMetadata('authorUrl')+chapter['href'][1:]
|
||||
self.add_chapter(chapter_title, chapter_Url)
|
||||
|
||||
# Status: Completed - Rating: Adult Only - Chapters: 19 - Word count: 323,805 - Genre: Post-OotP
|
||||
# Status: In progress - Rating: Adult Only - Chapters: 42 - Word count: 395,991 - Genre: Action/Adventure, Angst, Drama, Romance, Tragedy
|
||||
# Status: Completed - Rating: Everyone - Chapters: 1 - Word count: 876 - Genre: Sorrow
|
||||
# Status: In progress - Rating: Mature - Chapters: 39 - Word count: 314,544 - Genre: Drama - Romance
|
||||
div = soup.find('div',{'class':'well'})
|
||||
# logger.debug(div.find_all('p')[1])
|
||||
metaline = re.sub(r' +',' ',stripHTML(div.find_all('p')[1]).replace('\n',' '))
|
||||
# logger.debug(metaline)
|
||||
match = re.match(r"Status: (?P<status>.+?) - Rating: (?P<rating>.+?) - Chapters: [0-9,]+ - Word count: (?P<numWords>[0-9,]+?) - Genre: (?P<genre>.+?)$",metaline)
|
||||
if match:
|
||||
# logger.debug(match.group('status'))
|
||||
# logger.debug(match.group('rating'))
|
||||
# logger.debug(match.group('numWords'))
|
||||
# logger.debug(match.group('genre'))
|
||||
if "Completed" in match.group('status'):
|
||||
self.story.setMetadata('status',"Completed")
|
||||
else:
|
||||
self.story.setMetadata('status',"In-Progress")
|
||||
self.story.setMetadata('rating',match.group('rating'))
|
||||
self.story.setMetadata('numWords',match.group('numWords'))
|
||||
self.story.extendList('genre',re.split(r'[;,-]',match.group('genre')))
|
||||
# Status: Completed - Rating: Adult Only - Chapters: 19 - Word count: 323,805 - Genre: Post-OotP
|
||||
# Status: In progress - Rating: Adult Only - Chapters: 42 - Word count: 395,991 - Genre: Action/Adventure, Angst, Drama, Romance, Tragedy
|
||||
# Status: Completed - Rating: Everyone - Chapters: 1 - Word count: 876 - Genre: Sorrow
|
||||
# Status: In progress - Rating: Mature - Chapters: 39 - Word count: 314,544 - Genre: Drama - Romance
|
||||
div = soup.find('div',{'class':'well'})
|
||||
# logger.debug(div.find_all('p')[1])
|
||||
metaline = re.sub(r' +',' ',stripHTML(div.find_all('p')[1]).replace('\n',' '))
|
||||
# logger.debug(metaline)
|
||||
match = re.match(r"Status: (?P<status>.+?) - Rating: (?P<rating>.+?) - Chapters: [0-9,]+ - Word count: (?P<numWords>[0-9,]+?) - Genre: ?(?P<genre>.*?)$",metaline)
|
||||
if match:
|
||||
# logger.debug(match.group('status'))
|
||||
# logger.debug(match.group('rating'))
|
||||
# logger.debug(match.group('numWords'))
|
||||
# logger.debug(match.group('genre'))
|
||||
if "Completed" in match.group('status'):
|
||||
self.story.setMetadata('status',"Completed")
|
||||
else:
|
||||
raise exceptions.FailedToDownload("Error parsing metadata: '{0}'".format(url))
|
||||
self.story.setMetadata('status',"In-Progress")
|
||||
self.story.setMetadata('rating',match.group('rating'))
|
||||
self.story.setMetadata('numWords',match.group('numWords'))
|
||||
self.story.extendList('genre',re.split(r'[;,-]',match.group('genre')))
|
||||
else:
|
||||
raise exceptions.FailedToDownload("Error parsing metadata: '{0}'".format(url))
|
||||
|
||||
summary = div.find('blockquote').get_text()
|
||||
self.setDescription(url,summary)
|
||||
summary = div.find('blockquote').get_text()
|
||||
self.setDescription(url,summary)
|
||||
|
||||
## Raising AdultCheckRequired after collecting chapters gives
|
||||
## a double chapter list. So does genre, but it de-dups
|
||||
## automatically.
|
||||
if( self.story.getMetadataRaw('rating') in ['Mature','Adult Only']
|
||||
and not (self.is_adult or self.getConfig("is_adult")) ):
|
||||
raise exceptions.AdultCheckRequired(self.url)
|
||||
|
||||
for i, chapter in enumerate(chapters):
|
||||
if '/reviews/' not in chapter['href']:
|
||||
# here we get the update date. We will update this for every chapter,
|
||||
# so we get the last one.
|
||||
updatedate = stripHTML(unicode(chapters[i].parent)).split(
|
||||
'Uploaded on:')[1].strip()
|
||||
updatedate = updatedate.replace('st ',' ').replace('nd ',' ').replace(
|
||||
'rd ',' ').replace('th ',' ')
|
||||
self.story.setMetadata('dateUpdated', makeDate(updatedate, self.dateformat))
|
||||
|
||||
if '::' in stripHTML(unicode(chapter)):
|
||||
chapter_title = stripHTML(unicode(chapter).split('::')[1])
|
||||
else:
|
||||
chapter_title = stripHTML(unicode(chapter))
|
||||
chapter_Url = self.story.getMetadata('authorUrl')+chapter['href'][1:]
|
||||
self.add_chapter(chapter_title, chapter_Url)
|
||||
|
||||
# grab the text for an individual chapter.
|
||||
def getChapterText(self, url):
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
if( self.story.getMetadataRaw('rating') in ['Mature','Adult Only'] and
|
||||
(self.is_adult or self.getConfig("is_adult")) ):
|
||||
addurl = "?bypass=1"
|
||||
else:
|
||||
addurl=""
|
||||
|
||||
soup = self.make_soup(self._fetchUrl(url))
|
||||
soup = self.make_soup(self.get_request(url+addurl))
|
||||
|
||||
story = soup.find('div',{'class':'story'})
|
||||
|
||||
|
|
@ -275,8 +258,8 @@ class FanficAuthorsNetAdapter(BaseSiteAdapter):
|
|||
"Error downloading Chapter: '{0}'! Missing required element!".format(url))
|
||||
|
||||
#Now, there are a lot of extranious tags within the story division.. so we will remove them.
|
||||
for tag in story.findAll('ul',{'class':'pager'}) + story.findAll(
|
||||
'div',{'class':'alert'}) + story.findAll('div', {'class':'btn-group'}):
|
||||
for tag in story.find_all('ul',{'class':'pager'}) + story.find_all(
|
||||
'div',{'class':'alert'}) + story.find_all('div', {'class':'btn-group'}):
|
||||
tag.extract()
|
||||
|
||||
return self.utf8FromSoup(url,story)
|
||||
|
|
|
|||
|
|
@ -1,316 +0,0 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2014 Fanficdownloader team, 2018 FanFicFare team
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
# Software: eFiction
|
||||
from __future__ import absolute_import
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import re
|
||||
from ..htmlcleanup import stripHTML
|
||||
from .. import exceptions as exceptions
|
||||
|
||||
# py2 vs py3 transition
|
||||
from ..six import text_type as unicode
|
||||
from ..six.moves.urllib.error import HTTPError
|
||||
|
||||
from .base_adapter import BaseSiteAdapter, makeDate
|
||||
|
||||
# In general an 'adapter' needs to do these five things:
|
||||
|
||||
# - 'Register' correctly with the downloader
|
||||
# - Site Login (if needed)
|
||||
# - 'Are you adult?' check (if needed--some do one, some the other, some both)
|
||||
# - Grab the chapter list
|
||||
# - Grab the story meta-data (some (non-eFiction) adapters have to get it from the author page)
|
||||
# - Grab the chapter texts
|
||||
|
||||
# Search for XXX comments--that's where things are most likely to need changing.
|
||||
|
||||
# This function is called by the downloader in all adapter_*.py files
|
||||
# in this dir to register the adapter class. So it needs to be
|
||||
# updated to reflect the class below it. That, plus getSiteDomain()
|
||||
# take care of 'Registering'.
|
||||
def getClass():
|
||||
return FanficCastleTVNetAdapter # XXX
|
||||
|
||||
# Class name has to be unique. Our convention is camel case the
|
||||
# sitename with Adapter at the end. www is skipped.
|
||||
class FanficCastleTVNetAdapter(BaseSiteAdapter): # XXX
|
||||
|
||||
def __init__(self, config, url):
|
||||
BaseSiteAdapter.__init__(self, config, url)
|
||||
|
||||
self.username = "NoneGiven" # if left empty, site doesn't return any message at all.
|
||||
self.password = ""
|
||||
self.is_adult=False
|
||||
|
||||
# get storyId from url--url validation guarantees query is only sid=1234
|
||||
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
|
||||
|
||||
|
||||
# normalized story URL.
|
||||
# XXX Most sites don't have the /fanfic part. Replace all to remove it usually.
|
||||
self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId'))
|
||||
|
||||
# Each adapter needs to have a unique site abbreviation.
|
||||
self.story.setMetadata('siteabbrev','csltv') # XXX
|
||||
|
||||
# The date format will vary from site to site.
|
||||
# http://docs.python.org/library/datetime.html#strftime-strptime-behavior
|
||||
self.dateformat = "%b %d, %Y" # XXX
|
||||
|
||||
@staticmethod # must be @staticmethod, don't remove it.
|
||||
def getSiteDomain():
|
||||
# The site domain. Does have www here, if it uses it.
|
||||
return 'fanfic.castletv.net' # XXX
|
||||
|
||||
@classmethod
|
||||
def getSiteExampleURLs(cls):
|
||||
return "http://"+cls.getSiteDomain()+"/viewstory.php?sid=1234"
|
||||
|
||||
def getSiteURLPattern(self):
|
||||
return re.escape("http://"+self.getSiteDomain()+"/viewstory.php?sid=")+r"\d+$"
|
||||
|
||||
## Login seems to be reasonably standard across eFiction sites.
|
||||
def needToLoginCheck(self, data):
|
||||
if 'Registered Users Only' in data \
|
||||
or 'There is no such account on our website' in data \
|
||||
or "That password doesn't match the one in our database" in data:
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
|
||||
def performLogin(self, url):
|
||||
params = {}
|
||||
|
||||
if self.password:
|
||||
params['penname'] = self.username
|
||||
params['password'] = self.password
|
||||
else:
|
||||
params['penname'] = self.getConfig("username")
|
||||
params['password'] = self.getConfig("password")
|
||||
params['cookiecheck'] = '1'
|
||||
params['submit'] = 'Submit'
|
||||
|
||||
loginUrl = 'http://' + self.getSiteDomain() + '/user.php?action=login'
|
||||
logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
|
||||
params['penname']))
|
||||
|
||||
d = self._fetchUrl(loginUrl, params)
|
||||
|
||||
if "Member Account" not in d : #Member Account
|
||||
logger.info("Failed to login to URL %s as %s" % (loginUrl,
|
||||
params['penname']))
|
||||
raise exceptions.FailedToLogin(url,params['penname'])
|
||||
return False
|
||||
else:
|
||||
return True
|
||||
|
||||
## Getting the chapter list and the meta data, plus 'is adult' checking.
|
||||
def extractChapterUrlsAndMetadata(self):
|
||||
|
||||
if self.is_adult or self.getConfig("is_adult"):
|
||||
# Weirdly, different sites use different warning numbers.
|
||||
# If the title search below fails, there's a good chance
|
||||
# you need a different number. print data at that point
|
||||
# and see what the 'click here to continue' url says.
|
||||
addurl = "&ageconsent=ok&warning=3"
|
||||
else:
|
||||
addurl=""
|
||||
|
||||
# index=1 makes sure we see the story chapter index. Some
|
||||
# sites skip that for one-chapter stories.
|
||||
url = self.url+'&index=1'+addurl
|
||||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
except HTTPError as e:
|
||||
if e.code == 404:
|
||||
raise exceptions.StoryDoesNotExist(self.url)
|
||||
else:
|
||||
raise e
|
||||
|
||||
if self.needToLoginCheck(data):
|
||||
# need to log in for this one.
|
||||
self.performLogin(url)
|
||||
data = self._fetchUrl(url)
|
||||
|
||||
m = re.search(r"'viewstory.php\?sid=\d+((?:&ageconsent=ok)?&warning=\d+)'",data)
|
||||
if m != None:
|
||||
if self.is_adult or self.getConfig("is_adult"):
|
||||
# We tried the default and still got a warning, so
|
||||
# let's pull the warning number from the 'continue'
|
||||
# link and reload data.
|
||||
addurl = m.group(1)
|
||||
# correct stupid & error in url.
|
||||
addurl = addurl.replace("&","&")
|
||||
url = self.url+'&index=1'+addurl
|
||||
logger.debug("URL 2nd try: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
except HTTPError as e:
|
||||
if e.code == 404:
|
||||
raise exceptions.StoryDoesNotExist(self.url)
|
||||
else:
|
||||
raise e
|
||||
else:
|
||||
raise exceptions.AdultCheckRequired(self.url)
|
||||
|
||||
if "Access denied. This story has not been validated by the adminstrators of this site." in data:
|
||||
raise exceptions.AccessDenied(self.getSiteDomain() +" says: Access denied. This story has not been validated by the adminstrators of this site.")
|
||||
|
||||
# use BeautifulSoup HTML parser to make everything easier to find.
|
||||
soup = self.make_soup(data)
|
||||
# print data
|
||||
|
||||
# Now go hunting for all the meta data and the chapter list.
|
||||
|
||||
pagetitle = soup.find('div',{'id':'pagetitle'})
|
||||
## Title
|
||||
a = pagetitle.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$"))
|
||||
self.story.setMetadata('title',stripHTML(a))
|
||||
|
||||
# Find authorid and URL from... author url.
|
||||
a = pagetitle.find('a', href=re.compile(r"viewuser.php\?uid=\d+"))
|
||||
self.story.setMetadata('authorId',a['href'].split('=')[1])
|
||||
self.story.setMetadata('authorUrl','http://'+self.host+'/'+a['href'])
|
||||
self.story.setMetadata('author',a.string)
|
||||
|
||||
# Reviews
|
||||
reviewdata = soup.find('div', {'id' : 'sort'})
|
||||
a = reviewdata.findAll('a', href=re.compile(r'reviews.php\?type=ST&(amp;)?item='+self.story.getMetadata('storyId')+"$"))[1] # second one.
|
||||
self.story.setMetadata('reviews',stripHTML(a))
|
||||
|
||||
# Find the chapters:
|
||||
for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"&chapter=\d+$")):
|
||||
# just in case there's tags, like <i> in chapter titles.
|
||||
self.add_chapter(chapter,'http://'+self.host+'/'+chapter['href']+addurl)
|
||||
|
||||
|
||||
# eFiction sites don't help us out a lot with their meta data
|
||||
# formating, so it's a little ugly.
|
||||
|
||||
# utility method
|
||||
def defaultGetattr(d,k):
|
||||
try:
|
||||
return d[k]
|
||||
except:
|
||||
return ""
|
||||
|
||||
# <span class="label">Rated:</span> NC-17<br /> etc
|
||||
labels = soup.findAll('span',{'class':'label'})
|
||||
for labelspan in labels:
|
||||
value = labelspan.nextSibling
|
||||
label = labelspan.string
|
||||
|
||||
if 'Summary' in label:
|
||||
## Everything until the next span class='label'
|
||||
svalue = ""
|
||||
while value and 'label' not in defaultGetattr(value,'class'):
|
||||
svalue += unicode(value)
|
||||
value = value.nextSibling
|
||||
self.setDescription(url,svalue)
|
||||
#self.story.setMetadata('description',stripHTML(svalue))
|
||||
|
||||
if 'Rated' in label:
|
||||
self.story.setMetadata('rating', value)
|
||||
|
||||
if 'Word count' in label:
|
||||
self.story.setMetadata('numWords', value)
|
||||
|
||||
if 'Categories' in label:
|
||||
cats = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=categories'))
|
||||
catstext = [cat.string for cat in cats]
|
||||
for cat in catstext:
|
||||
self.story.addToList('category',cat.string)
|
||||
|
||||
if 'Characters' in label:
|
||||
chars = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=characters'))
|
||||
charstext = [char.string for char in chars]
|
||||
for char in charstext:
|
||||
self.story.addToList('characters',char.string)
|
||||
|
||||
## Not all sites use Genre, but there's no harm to
|
||||
## leaving it in. Check to make sure the type_id number
|
||||
## is correct, though--it's site specific.
|
||||
if 'Genre' in label:
|
||||
genres = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=2')) # XXX
|
||||
genrestext = [genre.string for genre in genres]
|
||||
self.genre = ', '.join(genrestext)
|
||||
for genre in genrestext:
|
||||
self.story.addToList('genre',genre.string)
|
||||
|
||||
## Not all sites use Warnings, but there's no harm to
|
||||
## leaving it in. Check to make sure the type_id number
|
||||
## is correct, though--it's site specific.
|
||||
if 'Warnings' in label:
|
||||
warnings = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=2')) # XXX
|
||||
warningstext = [warning.string for warning in warnings]
|
||||
self.warning = ', '.join(warningstext)
|
||||
for warning in warningstext:
|
||||
self.story.addToList('warnings',warning.string)
|
||||
|
||||
if 'Completed' in label:
|
||||
if 'Yes' in value:
|
||||
self.story.setMetadata('status', 'Completed')
|
||||
else:
|
||||
self.story.setMetadata('status', 'In-Progress')
|
||||
|
||||
if 'Published' in label:
|
||||
self.story.setMetadata('datePublished', makeDate(stripHTML(value), self.dateformat))
|
||||
|
||||
if 'Updated' in label:
|
||||
# there's a stray [ at the end.
|
||||
#value = value[0:-1]
|
||||
self.story.setMetadata('dateUpdated', makeDate(stripHTML(value), self.dateformat))
|
||||
|
||||
try:
|
||||
# Find Series name from series URL.
|
||||
a = soup.find('a', href=re.compile(r"viewseries.php\?seriesid=\d+"))
|
||||
series_name = a.string
|
||||
series_url = 'http://'+self.host+'/'+a['href']
|
||||
|
||||
# use BeautifulSoup HTML parser to make everything easier to find.
|
||||
seriessoup = self.make_soup(self._fetchUrl(series_url))
|
||||
storyas = seriessoup.findAll('a', href=re.compile(r'^viewstory.php\?sid=\d+$'))
|
||||
i=1
|
||||
for a in storyas:
|
||||
if a['href'] == ('viewstory.php?sid='+self.story.getMetadata('storyId')):
|
||||
self.setSeries(series_name, i)
|
||||
self.story.setMetadata('seriesUrl',series_url)
|
||||
break
|
||||
i+=1
|
||||
|
||||
except:
|
||||
# I find it hard to care if the series parsing fails
|
||||
pass
|
||||
|
||||
# grab the text for an individual chapter.
|
||||
def getChapterText(self, url):
|
||||
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
soup = self.make_soup(self._fetchUrl(url))
|
||||
|
||||
div = soup.find('div', {'id' : 'story'})
|
||||
|
||||
if None == div:
|
||||
raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url)
|
||||
|
||||
return self.utf8FromSoup(url,div)
|
||||
|
|
@ -1,202 +0,0 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2014 Fanficdownloader team, 2018 FanFicFare team
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
from __future__ import absolute_import
|
||||
import re
|
||||
# py2 vs py3 transition
|
||||
from ..six import text_type as unicode
|
||||
from ..six import ensure_text
|
||||
from ..six.moves.urllib import parse as urlparse
|
||||
from ..six.moves.urllib.error import HTTPError
|
||||
|
||||
from .base_adapter import BaseSiteAdapter, makeDate
|
||||
from .. import exceptions
|
||||
|
||||
|
||||
_SOURCE_CODE_ENCODING = 'utf-8'
|
||||
|
||||
|
||||
def getClass():
|
||||
return FanficHuAdapter
|
||||
|
||||
|
||||
def _get_query_data(url):
|
||||
components = urlparse.urlparse(url)
|
||||
query_data = urlparse.parse_qs(components.query)
|
||||
return dict((key, data[0]) for key, data in query_data.items())
|
||||
|
||||
|
||||
class FanficHuAdapter(BaseSiteAdapter):
|
||||
SITE_ABBREVIATION = 'ffh'
|
||||
SITE_DOMAIN = 'fanfic.hu'
|
||||
SITE_LANGUAGE = 'Hungarian'
|
||||
|
||||
BASE_URL = 'https://' + SITE_DOMAIN + '/merengo/'
|
||||
VIEW_STORY_URL_TEMPLATE = BASE_URL + 'viewstory.php?sid=%s'
|
||||
|
||||
DATE_FORMAT = '%m/%d/%Y'
|
||||
|
||||
def __init__(self, config, url):
|
||||
BaseSiteAdapter.__init__(self, config, url)
|
||||
|
||||
query_data = urlparse.parse_qs(self.parsedUrl.query)
|
||||
story_id = query_data['sid'][0]
|
||||
|
||||
self.story.setMetadata('storyId', story_id)
|
||||
self._setURL(self.VIEW_STORY_URL_TEMPLATE % story_id)
|
||||
self.story.setMetadata('siteabbrev', self.SITE_ABBREVIATION)
|
||||
self.story.setMetadata('language', self.SITE_LANGUAGE)
|
||||
|
||||
def _customized_fetch_url(self, url, exception=None, parameters=None):
|
||||
if exception:
|
||||
try:
|
||||
data = self._fetchUrl(url, parameters)
|
||||
except HTTPError:
|
||||
raise exception(self.url)
|
||||
# Just let self._fetchUrl throw the exception, don't catch and
|
||||
# customize it.
|
||||
else:
|
||||
data = self._fetchUrl(url, parameters)
|
||||
|
||||
return self.make_soup(data)
|
||||
|
||||
@staticmethod
|
||||
def getSiteDomain():
|
||||
return FanficHuAdapter.SITE_DOMAIN
|
||||
|
||||
@classmethod
|
||||
def getSiteExampleURLs(cls):
|
||||
return cls.VIEW_STORY_URL_TEMPLATE % 1234
|
||||
|
||||
def getSiteURLPattern(self):
|
||||
return re.escape(self.VIEW_STORY_URL_TEMPLATE[:-2]).replace('https','https?') + r'\d+$'
|
||||
|
||||
def extractChapterUrlsAndMetadata(self):
|
||||
soup = self._customized_fetch_url(self.url + '&i=1')
|
||||
|
||||
if ensure_text(soup.title.string).strip(u' :') == u'írta':
|
||||
raise exceptions.StoryDoesNotExist(self.url)
|
||||
|
||||
chapter_options = soup.find('form', action='viewstory.php').select('option')
|
||||
# Remove redundant "Fejezetek" option
|
||||
chapter_options.pop(0)
|
||||
|
||||
# If there is still more than one entry remove chapter overview entry
|
||||
if len(chapter_options) > 1:
|
||||
chapter_options.pop(0)
|
||||
|
||||
for option in chapter_options:
|
||||
url = urlparse.urljoin(self.url, option['value'])
|
||||
self.add_chapter(option.string, url)
|
||||
|
||||
author_url = urlparse.urljoin(self.BASE_URL, soup.find('a', href=lambda href: href and href.startswith('viewuser.php?uid='))['href'])
|
||||
soup = self._customized_fetch_url(author_url)
|
||||
|
||||
story_id = self.story.getMetadata('storyId')
|
||||
for table in soup('table', {'class': 'mainnav'}):
|
||||
title_anchor = table.find('span', {'class': 'storytitle'}).a
|
||||
href = title_anchor['href']
|
||||
if href.startswith('javascript:'):
|
||||
href = href.rsplit(' ', 1)[1].strip("'")
|
||||
query_data = _get_query_data(href)
|
||||
|
||||
if query_data['sid'] == story_id:
|
||||
break
|
||||
else:
|
||||
# This should never happen, the story must be found on the author's
|
||||
# page.
|
||||
raise exceptions.FailedToDownload(self.url)
|
||||
|
||||
self.story.setMetadata('title', title_anchor.string)
|
||||
|
||||
rows = table('tr')
|
||||
|
||||
anchors = rows[0].div('a')
|
||||
author_anchor = anchors[1]
|
||||
query_data = _get_query_data(author_anchor['href'])
|
||||
self.story.setMetadata('author', author_anchor.string)
|
||||
self.story.setMetadata('authorId', query_data['uid'])
|
||||
self.story.setMetadata('authorUrl', urlparse.urljoin(self.BASE_URL, author_anchor['href']))
|
||||
self.story.setMetadata('reviews', anchors[3].string)
|
||||
|
||||
if self.getConfig('keep_summary_html'):
|
||||
self.story.setMetadata('description', self.utf8FromSoup(author_url, rows[1].td))
|
||||
else:
|
||||
self.story.setMetadata('description', ''.join(rows[1].td(text=True)))
|
||||
|
||||
for row in rows[3:]:
|
||||
index = 0
|
||||
cells = row('td')
|
||||
|
||||
while index < len(cells):
|
||||
cell = cells[index]
|
||||
key = ensure_text(cell.b.string).strip(u':')
|
||||
try:
|
||||
value = ensure_text(cells[index+1].string)
|
||||
except:
|
||||
value = None
|
||||
|
||||
if key == u'Kategória':
|
||||
for anchor in cells[index+1]('a'):
|
||||
self.story.addToList('category', anchor.string)
|
||||
|
||||
elif key == u'Szereplõk':
|
||||
if cells[index+1].string:
|
||||
for name in cells[index+1].string.split(', '):
|
||||
self.story.addToList('character', name)
|
||||
|
||||
elif key == u'Korhatár':
|
||||
if value != 'nem korhatáros':
|
||||
self.story.setMetadata('rating', value)
|
||||
|
||||
elif key == u'Figyelmeztetések':
|
||||
for b_tag in cells[index+1]('b'):
|
||||
self.story.addToList('warnings', b_tag.string)
|
||||
|
||||
elif key == u'Jellemzõk':
|
||||
for genre in cells[index+1].string.split(', '):
|
||||
self.story.addToList('genre', genre)
|
||||
|
||||
elif key == u'Fejezetek':
|
||||
self.story.setMetadata('numChapters', int(value))
|
||||
|
||||
elif key == u'Megjelenés':
|
||||
self.story.setMetadata('datePublished', makeDate(value, self.DATE_FORMAT))
|
||||
|
||||
elif key == u'Frissítés':
|
||||
self.story.setMetadata('dateUpdated', makeDate(value, self.DATE_FORMAT))
|
||||
|
||||
elif key == u'Szavak':
|
||||
self.story.setMetadata('numWords', value)
|
||||
|
||||
elif key == u'Befejezett':
|
||||
self.story.setMetadata('status', 'Completed' if value == 'Nem' else 'In-Progress')
|
||||
|
||||
index += 2
|
||||
|
||||
if self.story.getMetadata('rating') == '18':
|
||||
if not (self.is_adult or self.getConfig('is_adult')):
|
||||
raise exceptions.AdultCheckRequired(self.url)
|
||||
|
||||
def getChapterText(self, url):
|
||||
soup = self._customized_fetch_url(url)
|
||||
story_cell = soup.find('form', action='viewstory.php').parent.parent
|
||||
|
||||
for div in story_cell('div'):
|
||||
div.extract()
|
||||
|
||||
return self.utf8FromSoup(url, story_cell)
|
||||
324
fanficfare/adapters/adapter_fanficsme.py
Normal file
324
fanficfare/adapters/adapter_fanficsme.py
Normal file
|
|
@ -0,0 +1,324 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2014 Fanficdownloader team, 2018 FanFicFare team
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
from __future__ import absolute_import
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import re
|
||||
|
||||
from ..htmlcleanup import stripHTML
|
||||
from .. import exceptions as exceptions
|
||||
|
||||
# py2 vs py3 transition
|
||||
|
||||
from .base_adapter import BaseSiteAdapter, makeDate
|
||||
|
||||
def getClass():
|
||||
return FanFicsMeAdapter
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class FanFicsMeAdapter(BaseSiteAdapter):
|
||||
|
||||
def __init__(self, config, url):
|
||||
BaseSiteAdapter.__init__(self, config, url)
|
||||
|
||||
self.username = "NoneGiven" # if left empty, site doesn't return any message at all.
|
||||
self.password = ""
|
||||
self.is_adult=False
|
||||
|
||||
self.full_work_soup = None
|
||||
self.use_full_work_soup = True
|
||||
|
||||
## All Russian as far as I know.
|
||||
self.story.setMetadata('language','Russian')
|
||||
|
||||
# get storyId from url--url validation guarantees query correct
|
||||
m = re.match(self.getSiteURLPattern(),url)
|
||||
if m:
|
||||
self.story.setMetadata('storyId',m.group('id'))
|
||||
|
||||
# normalized story URL.
|
||||
self._setURL('https://' + self.getSiteDomain() + '/fic'+self.story.getMetadata('storyId'))
|
||||
else:
|
||||
raise exceptions.InvalidStoryURL(url,
|
||||
self.getSiteDomain(),
|
||||
self.getSiteExampleURLs())
|
||||
|
||||
# Each adapter needs to have a unique site abbreviation.
|
||||
self.story.setMetadata('siteabbrev','ffme')
|
||||
|
||||
# The date format will vary from site to site.
|
||||
# http://docs.python.org/library/datetime.html#strftime-strptime-behavior
|
||||
self.dateformat = "%d.%m.%Y"
|
||||
|
||||
@staticmethod # must be @staticmethod, don't remove it.
|
||||
def getSiteDomain():
|
||||
# The site domain. Does have www here, if it uses it.
|
||||
return 'fanfics.me'
|
||||
|
||||
@classmethod
|
||||
def getSiteExampleURLs(cls):
|
||||
return "https://"+cls.getSiteDomain()+"/fic1234 https://"+cls.getSiteDomain()+"/read.php?id=1234 https://"+cls.getSiteDomain()+"/read.php?id=1234&chapter=2"
|
||||
|
||||
def getSiteURLPattern(self):
|
||||
# https://fanfics.me/fic137282
|
||||
# https://fanfics.me/read.php?id=137282
|
||||
# https://fanfics.me/read.php?id=137282&chapter=2
|
||||
# https://fanfics.me/download.php?fic=137282&format=epub
|
||||
return r"https?://"+re.escape(self.getSiteDomain())+r"/(fic|read\.php\?id=|download\.php\?fic=)(?P<id>\d+)"
|
||||
|
||||
## Login
|
||||
def needToLoginCheck(self, data):
|
||||
return '<form name="autent" action="https://fanfics.me/autent.php" method="post">' in data
|
||||
|
||||
def performLogin(self, url):
|
||||
'''
|
||||
<form name="autent" action="https://fanfics.me/autent.php" method="post">
|
||||
Имя:<br>
|
||||
<input class="input_3" type="text" name="name" id="name"><br>
|
||||
Пароль:<br>
|
||||
<input class="input_3" type="password" name="pass" id="pass"><br>
|
||||
<input type="checkbox" name="nocookie" id="nocookie" /> <label for="nocookie">Чужой компьютер</label><br>
|
||||
<input class="modern_button" type="submit" value="Войти">
|
||||
<div class="lostpass center"><a href="/index.php?section=lostpass">Забыл пароль</a></div>
|
||||
'''
|
||||
params = {}
|
||||
if self.password:
|
||||
params['name'] = self.username
|
||||
params['pass'] = self.password
|
||||
else:
|
||||
params['name'] = self.getConfig("username")
|
||||
params['pass'] = self.getConfig("password")
|
||||
|
||||
loginUrl = 'https://' + self.getSiteDomain() + '/autent.php'
|
||||
logger.info("Will now login to URL (%s) as (%s)" % (loginUrl,
|
||||
params['name']))
|
||||
## must need a cookie or something.
|
||||
self.get_request(loginUrl, usecache=False)
|
||||
d = self.post_request(loginUrl, params, usecache=False)
|
||||
|
||||
if self.needToLoginCheck(d):
|
||||
logger.info("Failed to login to URL %s as %s" % (loginUrl,
|
||||
params['name']))
|
||||
raise exceptions.FailedToLogin(url,params['name'])
|
||||
return False
|
||||
else:
|
||||
return True
|
||||
|
||||
## Getting the chapter list and the meta data, plus 'is adult' checking.
|
||||
def extractChapterUrlsAndMetadata(self):
|
||||
|
||||
url = self.url
|
||||
logger.info("url: "+url)
|
||||
|
||||
data = self.get_request(url)
|
||||
|
||||
soup = self.make_soup(data)
|
||||
|
||||
## restrict meta searches to header.
|
||||
fichead = soup.find('div',class_='FicHead')
|
||||
def get_meta_content(title):
|
||||
val_label = fichead.find('div',string=re.compile(u'^'+title+u':'))
|
||||
if val_label:
|
||||
return val_label.find_next('div')
|
||||
|
||||
## fanfics.me doesn't have separate adult--you have to set
|
||||
## your age to 18+ in your user account
|
||||
## Rating
|
||||
## R, NC-17, PG-13 require login
|
||||
## doesn't: General
|
||||
#('Рейтинг', 'rating', False, False)
|
||||
# val_label = fichead.find('div',string=u'Рейтинг:')
|
||||
# val = stripHTML(val_label.find_next('div'))
|
||||
# logger.debug(val)
|
||||
self.story.setMetadata('rating',stripHTML(get_meta_content(u'Рейтинг')))
|
||||
|
||||
## Need to login for any rating higher than General.
|
||||
if self.story.getMetadataRaw('rating') != 'General' and self.needToLoginCheck(data):
|
||||
self.performLogin(url)
|
||||
# reload after login.
|
||||
data = self.get_request(url,usecache=False)
|
||||
soup = self.make_soup(data)
|
||||
fichead = soup.find('div',class_='FicHead')
|
||||
|
||||
## Title
|
||||
## <h1>Третья сторона <span class="small green">(гет)</span></h1>
|
||||
h = fichead.find('h1')
|
||||
span = h.find('span')
|
||||
## I haven't found a term for what fanfics.me calls this, but
|
||||
## it translates to Get Jen Slash Femslash
|
||||
self.story.addToList('category',stripHTML(span)[1:-1])
|
||||
span.extract()
|
||||
self.story.setMetadata('title',stripHTML(h))
|
||||
|
||||
## author(s):
|
||||
content = get_meta_content(u'Авторы?')
|
||||
if content:
|
||||
alist = content.find_all('a', class_='user')
|
||||
for a in alist:
|
||||
self.story.addToList('authorId',a['href'].split('/user')[-1])
|
||||
self.story.addToList('authorUrl','https://'+self.host+a['href'])
|
||||
self.story.addToList('author',stripHTML(a))
|
||||
# can be deliberately anonymous.
|
||||
if not alist:
|
||||
self.story.setMetadata('author','Anonymous')
|
||||
self.story.setMetadata('authorUrl','https://'+self.host)
|
||||
self.story.setMetadata('authorId','0')
|
||||
|
||||
# translator(s) in different strings
|
||||
content = get_meta_content(u'Переводчикк?и?')
|
||||
if content:
|
||||
for a in content.find_all('a', class_='user'):
|
||||
self.story.addToList('translatorsId',a['href'].split('/user')[-1])
|
||||
self.story.addToList('translatorsUrl','https://'+self.host+a['href'])
|
||||
self.story.addToList('translators',stripHTML(a))
|
||||
|
||||
# If there are translators, but no authors, copy translators to authors.
|
||||
if self.story.getList('translators') and not self.story.getList('author'):
|
||||
self.story.extendList('authorId',self.story.getList('translatorsId'))
|
||||
self.story.extendList('authorUrl',self.story.getList('translatorsUrl'))
|
||||
self.story.extendList('author',self.story.getList('translators'))
|
||||
|
||||
# beta(s)
|
||||
content = get_meta_content(u'Бета')
|
||||
if content:
|
||||
for a in content.find_all('a', class_='user'):
|
||||
self.story.addToList('betasId',a['href'].split('/user')[-1])
|
||||
self.story.addToList('betasUrl','https://'+self.host+a['href'])
|
||||
self.story.addToList('betas',stripHTML(a))
|
||||
|
||||
content = get_meta_content(u'Фандом')
|
||||
self.story.extendList('fandoms', [ stripHTML(a) for a in
|
||||
fichead.find_all('a',href=re.compile(r'/fandom\d+$')) ] )
|
||||
|
||||
## 'Characters' header has both ships and chars lists
|
||||
content = get_meta_content(u'Персонажи')
|
||||
if content:
|
||||
self.story.extendList('ships', [ stripHTML(a) for a in
|
||||
content.find_all('a',href=re.compile(r'/paring\d+_\d+$')) ] )
|
||||
for ship in self.story.getList('ships'):
|
||||
self.story.extendList('characters', ship.split('/'))
|
||||
self.story.extendList('characters', [ stripHTML(a) for a in
|
||||
content.find_all('a',href=re.compile(r'/character\d+$')) ] )
|
||||
|
||||
self.story.extendList('genre',stripHTML(get_meta_content(u'Жанр')).split(', '))
|
||||
## fanfics.me includes 'AU' and 'OOC' as warnings...
|
||||
content = get_meta_content(u'Предупреждение')
|
||||
if content:
|
||||
self.story.extendList('warnings',stripHTML(content).split(', '))
|
||||
|
||||
content = get_meta_content(u'События')
|
||||
if content:
|
||||
self.story.extendList('events', [ stripHTML(a) for a in
|
||||
content.find_all('a',href=re.compile(r'/find\?keyword=\d+$')) ] )
|
||||
|
||||
## Original work block
|
||||
content = get_meta_content(u'Оригинал')
|
||||
if content:
|
||||
# only going to record URL.
|
||||
titletd = content.find('td',string=u'Ссылка:')
|
||||
self.story.setMetadata('originUrl',stripHTML(titletd.find_next('td')))
|
||||
|
||||
## size block, only saving word count.
|
||||
content = get_meta_content(u'Размер')
|
||||
words = stripHTML(content.find('a'))
|
||||
words = re.sub(r'[^0-9]','',words) # only keep numbers
|
||||
self.story.setMetadata('numWords',words)
|
||||
|
||||
## status by color code
|
||||
statuscolors = {'red':'In-Progress',
|
||||
'green':'Completed',
|
||||
'blue':'Hiatus'}
|
||||
content = get_meta_content(u'Статус')
|
||||
self.story.setMetadata('status',statuscolors[content.span['class'][0]])
|
||||
|
||||
# desc
|
||||
self.setDescription(url,soup.find('div',id='summary_'+self.story.getMetadata('storyId')))
|
||||
|
||||
# cover
|
||||
div = fichead.find('div',class_='FicHead_cover')
|
||||
if div:
|
||||
# get the larger version.
|
||||
self.setCoverImage(self.url,div.img['src'].replace('_200_300',''))
|
||||
|
||||
# dates
|
||||
# <span class="DateUpdate" title="Опубликовано 22.04.2020, изменено 22.04.2020">22.04.2020 - 22.04.2020</span>
|
||||
datespan = soup.find('span',class_='DateUpdate')
|
||||
dates = stripHTML(datespan).split(" - ")
|
||||
self.story.setMetadata('datePublished', makeDate(dates[0], self.dateformat))
|
||||
self.story.setMetadata('dateUpdated', makeDate(dates[1], self.dateformat))
|
||||
|
||||
# series
|
||||
seriesdiv = soup.find('div',id='fic_info_content_serie')
|
||||
if seriesdiv:
|
||||
seriesa = seriesdiv.find('a', href=re.compile(r'/serie\d+$'))
|
||||
i=1
|
||||
for a in seriesdiv.find_all('a', href=re.compile(r'/fic\d+$')):
|
||||
if a['href'] == ('/fic'+self.story.getMetadata('storyId')):
|
||||
self.setSeries(stripHTML(seriesa), i)
|
||||
self.story.setMetadata('seriesUrl','https://'+self.host+seriesa['href'])
|
||||
break
|
||||
i+=1
|
||||
|
||||
|
||||
chapteruls = soup.find_all('ul',class_='FicContents')
|
||||
if chapteruls:
|
||||
for ul in chapteruls:
|
||||
# logger.debug(ul.prettify())
|
||||
for chapter in ul.find_all('li'):
|
||||
a = chapter.find('a')
|
||||
# logger.debug(a.prettify())
|
||||
if a and a.has_attr('href'):
|
||||
# logger.debug(chapter.prettify())
|
||||
self.add_chapter(stripHTML(a),'https://' + self.getSiteDomain() + a['href'])
|
||||
else:
|
||||
self.add_chapter(self.story.getMetadata('title'),
|
||||
'https://' + self.getSiteDomain() +
|
||||
'/read.php?id='+self.story.getMetadata('storyId')+'&chapter=0')
|
||||
|
||||
return
|
||||
|
||||
# grab the text for an individual chapter.
|
||||
def getChapterTextNum(self, url, index):
|
||||
logger.debug('Getting chapter text for: %s index: %s' % (url,index))
|
||||
m = re.match(r'.*&chapter=(\d+).*',url)
|
||||
if m:
|
||||
index=m.group(1)
|
||||
logger.debug("Using index(%s) from &chapter="%index)
|
||||
|
||||
chapter_div = None
|
||||
if self.use_full_work_soup and self.getConfig("use_view_full_work",True) and self.num_chapters() > 1:
|
||||
logger.debug("USE view_full_work")
|
||||
## Assumed view_adult=true was cookied during metadata
|
||||
if not self.full_work_soup:
|
||||
self.full_work_soup = self.make_soup(self.get_request(
|
||||
'https://' + self.getSiteDomain() + '/read.php?id='+self.story.getMetadata('storyId')))
|
||||
|
||||
whole_dl_soup = self.full_work_soup
|
||||
chapter_div = whole_dl_soup.find('div',{'id':'c%s'%(index)})
|
||||
if not chapter_div:
|
||||
self.use_full_work_soup = False
|
||||
logger.warning("c%s not found in view_full_work--ending use_view_full_work"%(index))
|
||||
if chapter_div == None:
|
||||
whole_dl_soup = self.make_soup(self.get_request(url))
|
||||
chapter_div = whole_dl_soup.find('div',{'id':'c%s'%(index)})
|
||||
if None == chapter_div:
|
||||
raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url)
|
||||
|
||||
return self.utf8FromSoup(url,chapter_div)
|
||||
224
fanficfare/adapters/adapter_fanfictalkcom.py
Normal file
224
fanficfare/adapters/adapter_fanfictalkcom.py
Normal file
|
|
@ -0,0 +1,224 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2013 Fanficdownloader team, 2020 FanFicFare team
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
# Software: eFiction
|
||||
from __future__ import absolute_import
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import re
|
||||
from ..htmlcleanup import stripHTML
|
||||
from .. import exceptions as exceptions
|
||||
|
||||
# py2 vs py3 transition
|
||||
|
||||
from .base_adapter import BaseSiteAdapter, makeDate
|
||||
|
||||
def getClass():
|
||||
return FanfictalkComAdapter
|
||||
|
||||
# Class name has to be unique. Our convention is camel case the
|
||||
# sitename with Adapter at the end. www is skipped.
|
||||
class FanfictalkComAdapter(BaseSiteAdapter):
|
||||
|
||||
def __init__(self, config, url):
|
||||
BaseSiteAdapter.__init__(self, config, url)
|
||||
|
||||
self.username = "NoneGiven" # if left empty, site doesn't return any message at all.
|
||||
self.password = ""
|
||||
self.is_adult=False
|
||||
|
||||
# get storyId from url--url validation guarantees query is only sid=1234
|
||||
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
|
||||
|
||||
# normalized story URL.
|
||||
self._setURL('https://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId'))
|
||||
|
||||
# Each adapter needs to have a unique site abbreviation.
|
||||
self.story.setMetadata('siteabbrev','ahpfftc')
|
||||
|
||||
# The date format will vary from site to site.
|
||||
# http://docs.python.org/library/datetime.html#strftime-strptime-behavior
|
||||
self.dateformat = "%d %b %Y"
|
||||
|
||||
@classmethod
|
||||
def getAcceptDomains(cls):
|
||||
return [cls.getSiteDomain(),'archive.hpfanfictalk.com','fanfictalk.com']
|
||||
|
||||
@classmethod
|
||||
def getConfigSections(cls):
|
||||
"Only needs to be overriden if has additional ini sections."
|
||||
return [cls.getConfigSection(),'archive.hpfanfictalk.com','fanfictalk.com']
|
||||
|
||||
@staticmethod # must be @stgetAcceptDomainsaticmethod, don't remove it.
|
||||
def getSiteDomain():
|
||||
# The site domain. Does have www here, if it uses it.
|
||||
return 'archive.fanfictalk.com'
|
||||
|
||||
@classmethod
|
||||
def getSiteExampleURLs(cls):
|
||||
return "https://"+cls.getSiteDomain()+"/viewstory.php?sid=1234"
|
||||
|
||||
def getSiteURLPattern(self):
|
||||
return r"https?://("+r"|".join([x.replace('.',r'\.') for x in self.getAcceptDomains()])+r")(/archive)?/viewstory\.php\?sid=\d+$"
|
||||
|
||||
## Getting the chapter list and the meta data, plus 'is adult' checking.
|
||||
def extractChapterUrlsAndMetadata(self):
|
||||
|
||||
if self.is_adult or self.getConfig("is_adult"):
|
||||
# Weirdly, different sites use different warning numbers.
|
||||
# If the title search below fails, there's a good chance
|
||||
# you need a different number. print data at that point
|
||||
# and see what the 'click here to continue' url says.
|
||||
addurl = "&ageconsent=ok&warning=3"
|
||||
else:
|
||||
addurl=""
|
||||
|
||||
# index=1 makes sure we see the story chapter index. Some
|
||||
# sites skip that for one-chapter stories.
|
||||
url = self.url+'&index=1'+addurl
|
||||
logger.debug("URL: "+url)
|
||||
|
||||
data = self.get_request(url)
|
||||
|
||||
if "Access denied. This story has not been validated by the adminstrators of this site." in data:
|
||||
raise exceptions.AccessDenied(self.getSiteDomain() +" says: Access denied. This story has not been validated by the adminstrators of this site.")
|
||||
|
||||
## Title and author
|
||||
soup = self.make_soup(data)
|
||||
# logger.debug(soup)
|
||||
|
||||
|
||||
pagetitle = soup.select_one('div#pagetitle')
|
||||
# logger.debug(pagetitle)
|
||||
## Title
|
||||
a = pagetitle.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$"))
|
||||
self.story.setMetadata('title',stripHTML(a))
|
||||
|
||||
# Find authorid and URL from... author url.
|
||||
for a in pagetitle.find_all('a', href=re.compile(r"viewuser.php\?uid=\d+")):
|
||||
self.story.addToList('authorId',a['href'].split('=')[1])
|
||||
self.story.addToList('authorUrl','https://'+self.host+'/'+a['href'])
|
||||
self.story.addToList('author',stripHTML(a))
|
||||
|
||||
# Find the chapters:
|
||||
for chapter in soup.find_all('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+r"&chapter=\d+$")):
|
||||
# just in case there's tags, like <i> in chapter titles.
|
||||
self.add_chapter(chapter,'https://'+self.host+'/'+chapter['href'])
|
||||
|
||||
# categories
|
||||
for a in soup.select("div#sort a"):
|
||||
self.story.addToList('category',stripHTML(a))
|
||||
|
||||
# this site has two divs with class=gb-50 and no immediate container.
|
||||
gb50s = soup.find_all('div', {'class':'gb-50'})
|
||||
|
||||
def list_from_urls(parent, regex, metadata):
|
||||
urls = parent.find_all('a',href=re.compile(regex))
|
||||
for url in urls:
|
||||
self.story.addToList(metadata,stripHTML(url))
|
||||
|
||||
list_from_urls(gb50s[0],r'browse.php\?type=characters','characters')
|
||||
list_from_urls(gb50s[0],r'browse.php\?type=class&type_id=11','ships')
|
||||
list_from_urls(gb50s[0],r'browse.php\?type=class&type_id=10','representation')
|
||||
list_from_urls(gb50s[0],r'browse.php\?type=class&type_id=7','storytype')
|
||||
list_from_urls(gb50s[0],r'browse.php\?type=class&type_id=14','house')
|
||||
list_from_urls(gb50s[1],r'browse.php\?type=class&type_id=8','warnings')
|
||||
list_from_urls(gb50s[1],r'browse.php\?type=class&type_id=15','contentwarnings')
|
||||
list_from_urls(gb50s[1],r'browse.php\?type=class&type_id=4','genre')
|
||||
list_from_urls(gb50s[1],r'browse.php\?type=class&type_id=13','tropes')
|
||||
|
||||
bq = soup.find('blockquote2')
|
||||
if bq:
|
||||
# blockquote2??? Whatever. But we're changing it to a real tag.
|
||||
bq.name='div'
|
||||
self.setDescription(url,bq)
|
||||
|
||||
# usually use something more precise for label search, but
|
||||
# site doesn't group much.
|
||||
labels = soup.find_all('b')
|
||||
for labelspan in labels:
|
||||
# logger.debug(labelspan)
|
||||
value = labelspan.nextSibling
|
||||
label = stripHTML(labelspan)
|
||||
# logger.debug(value)
|
||||
# logger.debug(label)
|
||||
|
||||
if 'Words:' in label:
|
||||
stripHTML(value)
|
||||
self.story.setMetadata('numWords', stripHTML(value).replace('·',''))
|
||||
|
||||
if 'Published:' in label:
|
||||
self.story.setMetadata('datePublished', makeDate(stripHTML(value).replace('·',''), self.dateformat))
|
||||
|
||||
if 'Updated:' in label:
|
||||
self.story.setMetadata('dateUpdated', makeDate(stripHTML(value).replace('·',''), self.dateformat))
|
||||
|
||||
# Site allows stories to be in several series at once. FFF
|
||||
# isn't thrilled with that, we have series00, series01, etc.
|
||||
# Example:
|
||||
# https://archive.fanfictalk.com/viewstory.php?sid=483
|
||||
|
||||
if self.getConfig("collect_series"):
|
||||
seriesspan = soup.find('span',label='Series')
|
||||
for i, seriesa in enumerate(seriesspan.find_all('a', href=re.compile(r"viewseries\.php\?seriesid=\d+"))):
|
||||
# logger.debug(seriesa)
|
||||
series_name = stripHTML(seriesa)
|
||||
series_url = 'https://'+self.host+'/'+seriesa['href']
|
||||
|
||||
seriessoup = self.make_soup(self.get_request(series_url))
|
||||
storyas = seriessoup.find_all('a', href=re.compile(r'viewstory.php\?sid=\d+'))
|
||||
# logger.debug(storyas)
|
||||
j=1
|
||||
found = False
|
||||
for storya in storyas:
|
||||
# logger.debug(storya)
|
||||
## allow for JS links.
|
||||
if ('viewstory.php?sid='+self.story.getMetadata('storyId')) in storya['href']:
|
||||
found = True
|
||||
break
|
||||
j+=1
|
||||
if found:
|
||||
series_index = j
|
||||
self.story.setMetadata('series%02d'%i,"%s [%s]"%(series_name,series_index))
|
||||
self.story.setMetadata('series%02dUrl'%i,series_url)
|
||||
if i == 0:
|
||||
self.setSeries(series_name, series_index)
|
||||
self.story.setMetadata('seriesUrl',series_url)
|
||||
else:
|
||||
logger.debug("Story URL not found in series (%s) page, not including."%series_url)
|
||||
|
||||
# grab the text for an individual chapter.
|
||||
def getChapterText(self, url):
|
||||
|
||||
if self.is_adult or self.getConfig("is_adult"):
|
||||
# Weirdly, different sites use different warning numbers.
|
||||
# If the title search below fails, there's a good chance
|
||||
# you need a different number. print data at that point
|
||||
# and see what the 'click here to continue' url says.
|
||||
addurl = "&ageconsent=ok&warning=3"
|
||||
else:
|
||||
addurl=""
|
||||
|
||||
logger.debug('Getting chapter text from: %s' % (url+addurl))
|
||||
soup = self.make_soup(self.get_request(url+addurl))
|
||||
|
||||
div = soup.find('div', {'id' : 'story'})
|
||||
|
||||
if None == div:
|
||||
raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url)
|
||||
|
||||
return self.utf8FromSoup(url,div)
|
||||
|
|
@ -1,285 +0,0 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2011 Fanficdownloader team, 2018 FanFicFare team
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
# Software: eFiction
|
||||
from __future__ import absolute_import
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import re
|
||||
from ..htmlcleanup import stripHTML
|
||||
from .. import exceptions as exceptions
|
||||
|
||||
# py2 vs py3 transition
|
||||
from ..six import text_type as unicode
|
||||
from ..six.moves.urllib.error import HTTPError
|
||||
|
||||
from .base_adapter import BaseSiteAdapter, makeDate
|
||||
|
||||
# By virtue of being recent and requiring both is_adult and user/pass,
|
||||
# adapter_fanficcastletvnet.py is the best choice for learning to
|
||||
# write adapters--especially for sites that use the eFiction system.
|
||||
# Most sites that have ".../viewstory.php?sid=123" in the story URL
|
||||
# are eFiction.
|
||||
|
||||
# For non-eFiction sites, it can be considerably more complex, but
|
||||
# this is still a good starting point.
|
||||
|
||||
# In general an 'adapter' needs to do these five things:
|
||||
|
||||
# - 'Register' correctly with the downloader
|
||||
# - Site Login (if needed)
|
||||
# - 'Are you adult?' check (if needed--some do one, some the other, some both)
|
||||
# - Grab the chapter list
|
||||
# - Grab the story meta-data (some (non-eFiction) adapters have to get it from the author page)
|
||||
# - Grab the chapter texts
|
||||
|
||||
# Search for XXX comments--that's where things are most likely to need changing.
|
||||
|
||||
# This function is called by the downloader in all adapter_*.py files
|
||||
# in this dir to register the adapter class. So it needs to be
|
||||
# updated to reflect the class below it. That, plus getSiteDomain()
|
||||
# take care of 'Registering'.
|
||||
def getClass():
|
||||
return FanfictionJunkiesDeAdapter # XXX
|
||||
|
||||
# Class name has to be unique. Our convention is camel case the
|
||||
# sitename with Adapter at the end. www is skipped.
|
||||
class FanfictionJunkiesDeAdapter(BaseSiteAdapter): # XXX
|
||||
|
||||
def __init__(self, config, url):
|
||||
BaseSiteAdapter.__init__(self, config, url)
|
||||
|
||||
self.username = "NoneGiven" # if left empty, site doesn't return any message at all.
|
||||
self.password = ""
|
||||
self.is_adult=False
|
||||
|
||||
# get storyId from url--url validation guarantees query is only sid=1234
|
||||
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
|
||||
|
||||
|
||||
# normalized story URL.
|
||||
# XXX Most sites don't have the /fanfic part. Replace all to remove it usually.
|
||||
self._setURL('http://' + self.getSiteDomain() + '/efiction/viewstory.php?sid='+self.story.getMetadata('storyId'))
|
||||
|
||||
# Each adapter needs to have a unique site abbreviation.
|
||||
self.story.setMetadata('siteabbrev','ffjde') # XXX
|
||||
|
||||
# The date format will vary from site to site.
|
||||
# http://docs.python.org/library/datetime.html#strftime-strptime-behavior
|
||||
self.dateformat = "%d/%m/%y" # XXX
|
||||
|
||||
@staticmethod # must be @staticmethod, don't remove it.
|
||||
def getSiteDomain():
|
||||
# The site domain. Does have www here, if it uses it.
|
||||
return 'fanfiction-junkies.de' # XXX
|
||||
|
||||
@classmethod
|
||||
def getSiteExampleURLs(cls):
|
||||
return "http://"+cls.getSiteDomain()+"/efiction/viewstory.php?sid=1234"
|
||||
|
||||
def getSiteURLPattern(self):
|
||||
return re.escape("http://"+self.getSiteDomain()+"/efiction/viewstory.php?sid=")+r"\d+$"
|
||||
|
||||
## Login seems to be reasonably standard across eFiction sites.
|
||||
def needToLoginCheck(self, data):
|
||||
if 'Registered Users Only' in data \
|
||||
or 'There is no such account on our website' in data \
|
||||
or "That password doesn't match the one in our database" in data:
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
|
||||
def performLogin(self, url):
|
||||
params = {}
|
||||
|
||||
if self.password:
|
||||
params['penname'] = self.username
|
||||
params['password'] = self.password
|
||||
else:
|
||||
params['penname'] = self.getConfig("username")
|
||||
params['password'] = self.getConfig("password")
|
||||
params['cookiecheck'] = '1'
|
||||
params['submit'] = 'Submit'
|
||||
|
||||
loginUrl = 'http://' + self.getSiteDomain() + '/efiction/user.php?action=login'
|
||||
logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
|
||||
params['penname']))
|
||||
|
||||
d = self._fetchUrl(loginUrl, params)
|
||||
|
||||
if "Member Account" not in d : #Member Account
|
||||
logger.info("Failed to login to URL %s as %s" % (loginUrl,
|
||||
params['penname']))
|
||||
raise exceptions.FailedToLogin(url,params['penname'])
|
||||
return False
|
||||
else:
|
||||
return True
|
||||
|
||||
## Getting the chapter list and the meta data, plus 'is adult' checking.
|
||||
def extractChapterUrlsAndMetadata(self):
|
||||
|
||||
if self.is_adult or self.getConfig("is_adult"):
|
||||
# Weirdly, different sites use different warning numbers.
|
||||
# If the title search below fails, there's a good chance
|
||||
# you need a different number. print data at that point
|
||||
# and see what the 'click here to continue' url says.
|
||||
addurl = "&ageconsent=ok&warning=1" # XXX
|
||||
else:
|
||||
addurl=""
|
||||
|
||||
# index=1 makes sure we see the story chapter index. Some
|
||||
# sites skip that for one-chapter stories.
|
||||
url = self.url+'&index=1'+addurl
|
||||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
except HTTPError as e:
|
||||
if e.code == 404:
|
||||
raise exceptions.StoryDoesNotExist(self.url)
|
||||
else:
|
||||
raise e
|
||||
|
||||
if self.needToLoginCheck(data):
|
||||
# need to log in for this one.
|
||||
self.performLogin(url)
|
||||
data = self._fetchUrl(url)
|
||||
|
||||
# The actual text that is used to announce you need to be an
|
||||
# adult varies from site to site. Again, print data before
|
||||
# the title search to troubleshoot.
|
||||
if "For adults only " in data: # XXX
|
||||
raise exceptions.AdultCheckRequired(self.url)
|
||||
|
||||
if "Access denied. This story has not been validated by the adminstrators of this site." in data:
|
||||
raise exceptions.AccessDenied(self.getSiteDomain() +" says: Access denied. This story has not been validated by the adminstrators of this site.")
|
||||
|
||||
# use BeautifulSoup HTML parser to make everything easier to find.
|
||||
soup = self.make_soup(data)
|
||||
# print data
|
||||
|
||||
# Now go hunting for all the meta data and the chapter list.
|
||||
|
||||
pagetitle = soup.find('h4')
|
||||
## Title
|
||||
a = pagetitle.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$"))
|
||||
self.story.setMetadata('title',a.string)
|
||||
|
||||
# Find authorid and URL from... author url.
|
||||
a = pagetitle.find('a', href=re.compile(r"viewuser.php\?uid=\d+"))
|
||||
self.story.setMetadata('authorId',a['href'].split('=')[1])
|
||||
self.story.setMetadata('authorUrl','http://'+self.host+'/efiction/'+a['href'])
|
||||
self.story.setMetadata('author',a.string)
|
||||
|
||||
# Reviews
|
||||
reviewdata = soup.find('div', {'id' : 'sort'})
|
||||
a = reviewdata.findAll('a', href=re.compile(r'reviews.php\?type=ST&(amp;)?item='+self.story.getMetadata('storyId')+"$"))[1] # second one.
|
||||
self.story.setMetadata('reviews',stripHTML(a))
|
||||
|
||||
# Find the chapters:
|
||||
for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"&chapter=\d+$")):
|
||||
# just in case there's tags, like <i> in chapter titles.
|
||||
self.add_chapter(chapter,'http://'+self.host+'/efiction/'+chapter['href']+addurl)
|
||||
|
||||
|
||||
# eFiction sites don't help us out a lot with their meta data
|
||||
# formating, so it's a little ugly.
|
||||
|
||||
# utility method
|
||||
def defaultGetattr(d,k):
|
||||
try:
|
||||
return d[k]
|
||||
except:
|
||||
return ""
|
||||
|
||||
|
||||
# <span class="label">Rated:</span> NC-17<br /> etc
|
||||
list = soup.find('div', {'class':'listbox'})
|
||||
|
||||
|
||||
labels = list.findAll('b')
|
||||
for labelspan in labels:
|
||||
value = labelspan.nextSibling
|
||||
label = labelspan.string
|
||||
|
||||
if 'Zusammenfassung' in label:
|
||||
self.setDescription(url,value)
|
||||
|
||||
if 'Eingestuft' in label:
|
||||
self.story.setMetadata('rating', value)
|
||||
|
||||
if u'Wörter' in label:
|
||||
self.story.setMetadata('numWords', value)
|
||||
|
||||
if 'Kategorie' in label:
|
||||
cats = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=categories'))
|
||||
for cat in cats:
|
||||
self.story.addToList('category',cat.string)
|
||||
|
||||
if 'Charaktere' in label:
|
||||
chars = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=characters'))
|
||||
for char in chars:
|
||||
self.story.addToList('characters',char.string)
|
||||
|
||||
if 'Abgeschlossen' in label:
|
||||
if 'Yes' in value:
|
||||
self.story.setMetadata('status', 'Completed')
|
||||
else:
|
||||
self.story.setMetadata('status', 'In-Progress')
|
||||
|
||||
if u'Veröffentlicht' in label:
|
||||
self.story.setMetadata('datePublished', makeDate(stripHTML(value), self.dateformat))
|
||||
|
||||
if 'Aktualisiert' in label:
|
||||
# there's a stray [ at the end.
|
||||
#value = value[0:-1]
|
||||
self.story.setMetadata('dateUpdated', makeDate(stripHTML(value), self.dateformat))
|
||||
|
||||
try:
|
||||
# Find Series name from series URL.
|
||||
a = soup.find('a', href=re.compile(r"viewseries.php\?seriesid=\d+"))
|
||||
series_name = a.string
|
||||
series_url = 'http://'+self.host+'/efiction/'+a['href']
|
||||
|
||||
# use BeautifulSoup HTML parser to make everything easier to find.
|
||||
seriessoup = self.make_soup(self._fetchUrl(series_url))
|
||||
storyas = seriessoup.findAll('a', href=re.compile(r'^viewstory.php\?sid=\d+$'))
|
||||
i=1
|
||||
for a in storyas:
|
||||
if a['href'] == ('viewstory.php?sid='+self.story.getMetadata('storyId')):
|
||||
self.setSeries(series_name, i)
|
||||
self.story.setMetadata('seriesUrl',series_url)
|
||||
break
|
||||
i+=1
|
||||
|
||||
except:
|
||||
# I find it hard to care if the series parsing fails
|
||||
pass
|
||||
|
||||
# grab the text for an individual chapter.
|
||||
def getChapterText(self, url):
|
||||
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
soup = self.make_soup(self._fetchUrl(url))
|
||||
|
||||
div = soup.find('div', {'id' : 'story'})
|
||||
|
||||
if None == div:
|
||||
raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url)
|
||||
|
||||
return self.utf8FromSoup(url,div)
|
||||
|
|
@ -23,17 +23,23 @@ import re
|
|||
|
||||
# py2 vs py3 transition
|
||||
from ..six import text_type as unicode
|
||||
from ..six.moves.urllib.error import HTTPError
|
||||
|
||||
from ..six.moves.urllib.parse import urlparse
|
||||
|
||||
from .. import exceptions as exceptions
|
||||
from ..htmlcleanup import stripHTML
|
||||
|
||||
from .base_adapter import BaseSiteAdapter, makeDate
|
||||
from .base_adapter import BaseSiteAdapter
|
||||
|
||||
ffnetgenres=["Adventure", "Angst", "Crime", "Drama", "Family", "Fantasy", "Friendship", "General",
|
||||
"Horror", "Humor", "Hurt-Comfort", "Mystery", "Parody", "Poetry", "Romance", "Sci-Fi",
|
||||
"Spiritual", "Supernatural", "Suspense", "Tragedy", "Western"]
|
||||
ffnetgenres=["Adventure", "Angst", "Crime", "Drama", "Family", "Fantasy",
|
||||
"Friendship", "General", "Horror", "Humor", "Hurt-Comfort",
|
||||
"Mystery", "Parody", "Poetry", "Romance", "Sci-Fi", "Spiritual",
|
||||
"Supernatural", "Suspense", "Tragedy", "Western"]
|
||||
|
||||
ffnetpluscategories=["+Anima", "Alex + Ada", "Rosario + Vampire", "Blood+",
|
||||
"+C: Sword and Cornett", "Norn9 - ノルン+ノネット",
|
||||
"Haré+Guu/ジャングルはいつもハレのちグゥ", "Lost+Brain",
|
||||
"Wicked + The Divine", "Alex + Ada", "RE: Alistair++",
|
||||
"Tristan + Isolde"]
|
||||
|
||||
class FanFictionNetSiteAdapter(BaseSiteAdapter):
|
||||
|
||||
|
|
@ -41,20 +47,8 @@ class FanFictionNetSiteAdapter(BaseSiteAdapter):
|
|||
BaseSiteAdapter.__init__(self, config, url)
|
||||
self.story.setMetadata('siteabbrev','ffnet')
|
||||
|
||||
# get storyId from url--url validation guarantees second part is storyId
|
||||
self.story.setMetadata('storyId',self.parsedUrl.path.split('/',)[2])
|
||||
self.set_story_idurl(url)
|
||||
|
||||
# normalized story URL.
|
||||
self._setURL("https://"+self.getSiteDomain()\
|
||||
+"/s/"+self.story.getMetadata('storyId')+"/1/")
|
||||
|
||||
# ffnet update emails have the latest chapter URL.
|
||||
# Frequently, when they arrive, not all the servers have the
|
||||
# latest chapter yet and going back to chapter 1 to pull the
|
||||
# chapter list doesn't get the latest. So save and use the
|
||||
# original URL given to pull chapter list & metadata.
|
||||
# Not used by plugin because URL gets normalized first for
|
||||
# eliminating duplicate story urls.
|
||||
self.origurl = url
|
||||
if "https://m." in self.origurl:
|
||||
## accept m(mobile)url, but use www.
|
||||
|
|
@ -72,24 +66,74 @@ class FanFictionNetSiteAdapter(BaseSiteAdapter):
|
|||
def getSiteExampleURLs(cls):
|
||||
return "https://www.fanfiction.net/s/1234/1/ https://www.fanfiction.net/s/1234/12/ http://www.fanfiction.net/s/1234/1/Story_Title http://m.fanfiction.net/s/1234/1/"
|
||||
|
||||
def set_story_idurl(self,url):
|
||||
parsedUrl = urlparse(url)
|
||||
pathparts = parsedUrl.path.split('/',)
|
||||
self.story.setMetadata('storyId',pathparts[2])
|
||||
self.urltitle='' if len(pathparts)<5 else pathparts[4]
|
||||
# normalized story URL.
|
||||
self._setURL("https://"+self.getSiteDomain()\
|
||||
+"/s/"+self.story.getMetadata('storyId')+"/1/"+self.urltitle)
|
||||
|
||||
## here so getSiteURLPattern and get_section_url(class method) can
|
||||
## both use it. Note adapter_fictionpresscom has one too.
|
||||
@classmethod
|
||||
def _get_site_url_pattern(cls):
|
||||
return r"https?://(www|m)?\.fanfiction\.net/s/(?P<id>\d+)(/\d+)?(/(?P<title>[^/]+))?/?$"
|
||||
|
||||
@classmethod
|
||||
def get_section_url(cls,url):
|
||||
## minimal URL used for section names in INI and reject list
|
||||
## for comparison
|
||||
# logger.debug("pre--url:%s"%url)
|
||||
m = re.match(cls._get_site_url_pattern(),url)
|
||||
if m:
|
||||
url = "https://"+cls.getSiteDomain()\
|
||||
+"/s/"+m.group('id')+"/1/"
|
||||
# logger.debug("post-url:%s"%url)
|
||||
return url
|
||||
|
||||
@classmethod
|
||||
def get_url_search(cls,url):
|
||||
regexp = super(getClass(), cls).get_url_search(url)
|
||||
regexp = re.sub(r"^(?P<keep>.*net/s/\d+/\d+/)(?P<urltitle>[^\$]*)?",
|
||||
r"\g<keep>(.*)",regexp)
|
||||
logger.debug(regexp)
|
||||
return regexp
|
||||
|
||||
def getSiteURLPattern(self):
|
||||
return r"https?://(www|m)?\.fanfiction\.net/s/\d+(/\d+)?(/|/[^/]+)?/?$"
|
||||
return self._get_site_url_pattern()
|
||||
|
||||
def _fetchUrl(self,url,parameters=None,extrasleep=1.0,usecache=True):
|
||||
## ffnet(and, I assume, fpcom) tends to fail more if hit too
|
||||
## fast. This is in additional to what ever the
|
||||
## slow_down_sleep_time setting is.
|
||||
return BaseSiteAdapter._fetchUrl(self,url,
|
||||
parameters=parameters,
|
||||
extrasleep=extrasleep,
|
||||
usecache=usecache)
|
||||
## normalized chapter URLs DO contain the story title now, but
|
||||
## normalized to current urltitle in case of title changes.
|
||||
def normalize_chapterurl(self,url):
|
||||
return re.sub(r"https?://(www|m)\.(?P<keep>fanfiction\.net/s/\d+/\d+/).*",
|
||||
r"https://www.\g<keep>",url)+self.urltitle
|
||||
|
||||
def use_pagecache(self):
|
||||
'''
|
||||
adapters that will work with the page cache need to implement
|
||||
this and change it to True.
|
||||
'''
|
||||
return True
|
||||
def get_request(self,url,usecache=True):
|
||||
## use super version if not set or isn't a chapter URL with a
|
||||
## title.
|
||||
if( not self.getConfig("try_shortened_title_urls") or
|
||||
not re.match(r"https?://www\.fanfiction\.net/s/\d+/\d+/(?P<title>[^/]+)$", url) ):
|
||||
return super(getClass(), self).get_request(url,usecache)
|
||||
|
||||
## kludgey way to attempt more than one URL variant by
|
||||
## removing title one letter at a time. Note that network and
|
||||
## open_pages_in_browser retries still happen first.
|
||||
titlelen = len(url.split('/')[-1])
|
||||
maxcut = min([4,titlelen])
|
||||
j = 0
|
||||
while j < maxcut: # should actually leave loop either by
|
||||
# return or exception raise.
|
||||
try:
|
||||
useurl = url
|
||||
if j: # j==0, full URL, then remove letters.
|
||||
useurl = url[:-j]
|
||||
return super(getClass(), self).get_request(useurl,usecache)
|
||||
except exceptions.HTTPErrorFFF as fffe:
|
||||
if j >= maxcut or 'Page not found or expired' not in unicode(fffe):
|
||||
raise
|
||||
j = j+1
|
||||
|
||||
def doExtractChapterUrlsAndMetadata(self,get_cover=True):
|
||||
|
||||
|
|
@ -99,16 +143,9 @@ class FanFictionNetSiteAdapter(BaseSiteAdapter):
|
|||
url = self.origurl
|
||||
logger.debug("URL: "+url)
|
||||
|
||||
# use BeautifulSoup HTML parser to make everything easier to find.
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
#logger.debug("\n===================\n%s\n===================\n"%data)
|
||||
soup = self.make_soup(data)
|
||||
except HTTPError as e:
|
||||
if e.code == 404:
|
||||
raise exceptions.StoryDoesNotExist(url)
|
||||
else:
|
||||
raise e
|
||||
data = self.get_request(url)
|
||||
#logger.debug("\n===================\n%s\n===================\n"%data)
|
||||
soup = self.make_soup(data)
|
||||
|
||||
if "Unable to locate story" in data or "Story Not Found" in data:
|
||||
raise exceptions.StoryDoesNotExist(url)
|
||||
|
|
@ -118,33 +155,48 @@ class FanFictionNetSiteAdapter(BaseSiteAdapter):
|
|||
if "Please check to see you are not using an outdated url." in data:
|
||||
raise exceptions.FailedToDownload("Error downloading Chapter: %s! 'Chapter not found. Please check to see you are not using an outdated url.'" % url)
|
||||
|
||||
if "Category for this story has been disabled" in data:
|
||||
raise exceptions.FailedToDownload("FanFiction.Net has removed the category for this story and will no longer serve it.")
|
||||
|
||||
# <link rel="canonical" href="//www.fanfiction.net/s/13551154/100/Haze-Gray">
|
||||
canonicalurl = soup.select_one('link[rel=canonical]')['href']
|
||||
self.set_story_idurl(canonicalurl)
|
||||
|
||||
## ffnet used to have a tendency to send out update notices in
|
||||
## email before all their servers were showing the update on
|
||||
## the first chapter. It generates another server request and
|
||||
## doesn't seem to be needed lately, so now default it to off.
|
||||
try:
|
||||
chapcount = len(soup.find('select', { 'name' : 'chapter' } ).find_all('option'))
|
||||
# get chapter part of url.
|
||||
except:
|
||||
chapcount = 1
|
||||
have_later_meta = False
|
||||
if self.getConfig('check_next_chapter'):
|
||||
try:
|
||||
## ffnet used to have a tendency to send out update
|
||||
## notices in email before all their servers were
|
||||
## showing the update on the first chapter. It
|
||||
## generates another server request and doesn't seem
|
||||
## to be needed lately, so now default it to off.
|
||||
try:
|
||||
chapcount = len(soup.find('select', { 'name' : 'chapter' } ).findAll('option'))
|
||||
# get chapter part of url.
|
||||
except:
|
||||
chapcount = 1
|
||||
tryurl = "https://%s/s/%s/%d/"%(self.getSiteDomain(),
|
||||
self.story.getMetadata('storyId'),
|
||||
chapcount+1)
|
||||
tryurl = "https://%s/s/%s/%d/%s"%(self.getSiteDomain(),
|
||||
self.story.getMetadata('storyId'),
|
||||
chapcount+1,
|
||||
self.urltitle)
|
||||
logger.debug('=Trying newer chapter: %s' % tryurl)
|
||||
newdata = self._fetchUrl(tryurl)
|
||||
newdata = self.get_request(tryurl)
|
||||
if "not found. Please check to see you are not using an outdated url." not in newdata \
|
||||
and "This request takes too long to process, it is timed out by the server." not in newdata:
|
||||
logger.debug('=======Found newer chapter: %s' % tryurl)
|
||||
soup = self.make_soup(newdata)
|
||||
except HTTPError as e:
|
||||
if e.code == 503:
|
||||
raise e
|
||||
have_later_meta = True
|
||||
except Exception as e:
|
||||
logger.warn("Caught an exception reading URL: %s Exception %s."%(unicode(url),unicode(e)))
|
||||
pass
|
||||
logger.warning("Caught exception in check_next_chapter URL: %s Exception %s."%(unicode(tryurl),unicode(e)))
|
||||
|
||||
if self.getConfig('meta_from_last_chapter') and not have_later_meta and chapcount > 1:
|
||||
tryurl = "https://%s/s/%s/%d/%s"%(self.getSiteDomain(),
|
||||
self.story.getMetadata('storyId'),
|
||||
chapcount,
|
||||
self.urltitle)
|
||||
logger.debug('=Trying last chapter for meta_from_last_chapter: %s' % tryurl)
|
||||
newdata = self.get_request(tryurl)
|
||||
soup = self.make_soup(newdata)
|
||||
have_later_meta = True
|
||||
|
||||
# Find authorid and URL from... author url.
|
||||
a = soup.find('a', href=re.compile(r"^/u/\d+"))
|
||||
|
|
@ -159,8 +211,8 @@ class FanFictionNetSiteAdapter(BaseSiteAdapter):
|
|||
## 2) cat1_cat2_Crossover
|
||||
## For 1, use the second link.
|
||||
## For 2, fetch the crossover page and pull the two categories from there.
|
||||
|
||||
categories = soup.find('div',{'id':'pre_story_links'}).findAll('a',{'class':'xcontrast_txt'})
|
||||
pre_links = soup.find('div',{'id':'pre_story_links'})
|
||||
categories = pre_links.find_all('a',{'class':'xcontrast_txt'})
|
||||
#print("xcontrast_txt a:%s"%categories)
|
||||
if len(categories) > 1:
|
||||
# Strangely, the ones with *two* links are the
|
||||
|
|
@ -168,20 +220,17 @@ class FanFictionNetSiteAdapter(BaseSiteAdapter):
|
|||
# of Book, Movie, etc.
|
||||
self.story.addToList('category',stripHTML(categories[1]))
|
||||
elif 'Crossover' in categories[0]['href']:
|
||||
caturl = "https://%s%s"%(self.getSiteDomain(),categories[0]['href'])
|
||||
catsoup = self.make_soup(self._fetchUrl(caturl))
|
||||
found = False
|
||||
for a in catsoup.findAll('a',href=re.compile(r"^/crossovers/.+?/\d+/")):
|
||||
self.story.addToList('category',stripHTML(a))
|
||||
found = True
|
||||
if not found:
|
||||
# Fall back. I ran across a story with a Crossver
|
||||
# category link to a broken page once.
|
||||
# http://www.fanfiction.net/s/2622060/1/
|
||||
# Naruto + Harry Potter Crossover
|
||||
logger.info("Fall back category collection")
|
||||
for c in stripHTML(categories[0]).replace(" Crossover","").split(' + '):
|
||||
self.story.addToList('category',c)
|
||||
## turns out there's only a handful of ffnet category's
|
||||
## with '+' in. Keep a list and look for them
|
||||
## specifically instead of looking up the crossover page.
|
||||
crossover_cat = stripHTML(categories[0]).replace(" Crossover","")
|
||||
for pluscat in ffnetpluscategories:
|
||||
if pluscat in crossover_cat:
|
||||
self.story.addToList('category',pluscat)
|
||||
crossover_cat = crossover_cat.replace(pluscat,'')
|
||||
for cat in crossover_cat.split(' + '):
|
||||
if cat:
|
||||
self.story.addToList('category',cat)
|
||||
|
||||
a = soup.find('a', href=re.compile(r'https?://www\.fictionratings\.com/'))
|
||||
rating = a.string
|
||||
|
|
@ -202,7 +251,7 @@ class FanFictionNetSiteAdapter(BaseSiteAdapter):
|
|||
|
||||
|
||||
grayspan = gui_table1i.find('span', {'class':'xgray xcontrast_txt'})
|
||||
# for b in grayspan.findAll('button'):
|
||||
# for b in grayspan.find_all('button'):
|
||||
# b.extract()
|
||||
metatext = stripHTML(grayspan).replace('Hurt/Comfort','Hurt-Comfort')
|
||||
#logger.debug("metatext:(%s)"%metatext)
|
||||
|
|
@ -241,7 +290,7 @@ class FanFictionNetSiteAdapter(BaseSiteAdapter):
|
|||
|
||||
# Updated: <span data-xutime='1368059198'>5/8</span> - Published: <span data-xutime='1278984264'>7/12/2010</span>
|
||||
# Published: <span data-xutime='1384358726'>8m ago</span>
|
||||
dates = soup.findAll('span',{'data-xutime':re.compile(r'^\d+$')})
|
||||
dates = soup.find_all('span',{'data-xutime':re.compile(r'^\d+$')})
|
||||
if len(dates) > 1 :
|
||||
# updated get set to the same as published upstream if not found.
|
||||
self.story.setMetadata('dateUpdated',datetime.fromtimestamp(float(dates[0]['data-xutime'])))
|
||||
|
|
@ -289,42 +338,51 @@ class FanFictionNetSiteAdapter(BaseSiteAdapter):
|
|||
# Try the larger image first.
|
||||
cover_url = ""
|
||||
try:
|
||||
img = soup.select('img.lazy.cimage')
|
||||
cover_url=img[0]['data-original']
|
||||
img = soup.select_one('img.lazy.cimage')
|
||||
cover_url=img['data-original']
|
||||
except:
|
||||
img = soup.select('img.cimage')
|
||||
if img:
|
||||
cover_url=img[0]['src']
|
||||
## Nov 2023 - src is always "/static/images/d_60_90.jpg" now
|
||||
## Only take cover if there's data-original
|
||||
## Primary motivator is to prevent unneeded author page hits.
|
||||
pass
|
||||
logger.debug("cover_url:%s"%cover_url)
|
||||
|
||||
authimg_url = ""
|
||||
if cover_url and self.getConfig('skip_author_cover'):
|
||||
authsoup = self.make_soup(self._fetchUrl(self.story.getMetadata('authorUrl')))
|
||||
if cover_url and self.getConfig('skip_author_cover') and self.getConfig('include_images'):
|
||||
try:
|
||||
img = authsoup.select('img.lazy.cimage')
|
||||
authimg_url=img[0]['data-original']
|
||||
except:
|
||||
img = authsoup.select('img.cimage')
|
||||
if img:
|
||||
authimg_url=img[0]['src']
|
||||
authsoup = self.make_soup(self.get_request(self.story.getMetadata('authorUrl')))
|
||||
try:
|
||||
img = authsoup.select_one('img.lazy.cimage')
|
||||
authimg_url=img['data-original']
|
||||
except:
|
||||
img = authsoup.select_one('img.cimage')
|
||||
if img:
|
||||
authimg_url=img['src']
|
||||
|
||||
logger.debug("authimg_url:%s"%authimg_url)
|
||||
logger.debug("authimg_url:%s"%authimg_url)
|
||||
|
||||
## ffnet uses different sizes on auth & story pages, but same id.
|
||||
## //ffcdn2012t-fictionpressllc.netdna-ssl.com/image/1936929/150/
|
||||
## //ffcdn2012t-fictionpressllc.netdna-ssl.com/image/1936929/180/
|
||||
try:
|
||||
cover_id = cover_url.split('/')[4]
|
||||
except:
|
||||
cover_id = None
|
||||
try:
|
||||
authimg_id = authimg_url.split('/')[4]
|
||||
except:
|
||||
authimg_id = None
|
||||
## ffnet uses different sizes on auth & story pages, but same id.
|
||||
## Old URLs:
|
||||
## //ffcdn2012t-fictionpressllc.netdna-ssl.com/image/1936929/150/
|
||||
## //ffcdn2012t-fictionpressllc.netdna-ssl.com/image/1936929/180/
|
||||
## After Dec 2020 ffnet changes:
|
||||
## /image/6472517/180/
|
||||
## /image/6472517/150/
|
||||
try:
|
||||
cover_id = cover_url.split('/')[-3]
|
||||
except:
|
||||
cover_id = None
|
||||
try:
|
||||
authimg_id = authimg_url.split('/')[-3]
|
||||
except:
|
||||
authimg_id = None
|
||||
|
||||
## don't use cover if it matches the auth image.
|
||||
if cover_id and authimg_id and cover_id == authimg_id:
|
||||
cover_url = None
|
||||
## don't use cover if it matches the auth image.
|
||||
if cover_id and authimg_id and cover_id == authimg_id:
|
||||
logger.debug("skip_author_cover: cover_url matches authimg_url: don't use")
|
||||
cover_url = None
|
||||
except Exception as e:
|
||||
logger.warning("Caught exception in skip_author_cover: %s."%unicode(e))
|
||||
|
||||
if cover_url:
|
||||
self.setCoverImage(url,cover_url)
|
||||
|
|
@ -337,31 +395,37 @@ class FanFictionNetSiteAdapter(BaseSiteAdapter):
|
|||
# no selector found, so it's a one-chapter story.
|
||||
self.add_chapter(self.story.getMetadata('title'),url)
|
||||
else:
|
||||
allOptions = select.findAll('option')
|
||||
allOptions = select.find_all('option')
|
||||
for o in allOptions:
|
||||
url = u'https://%s/s/%s/%s/' % ( self.getSiteDomain(),
|
||||
self.story.getMetadata('storyId'),
|
||||
o['value'])
|
||||
## title URL will be put back on chapter URL during
|
||||
## normalize_chapterurl() anyway, but also here for
|
||||
## clarity
|
||||
url = u'https://%s/s/%s/%s/%s' % ( self.getSiteDomain(),
|
||||
self.story.getMetadata('storyId'),
|
||||
o['value'],
|
||||
self.urltitle)
|
||||
# just in case there's tags, like <i> in chapter titles.
|
||||
title = u"%s" % o
|
||||
title = re.sub(r'<[^>]+>','',title)
|
||||
self.add_chapter(title,url)
|
||||
|
||||
|
||||
return
|
||||
|
||||
def getChapterText(self, url):
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
## ffnet(and, I assume, fpcom) tends to fail more if hit too
|
||||
## fast. This is in additional to what ever the
|
||||
## slow_down_sleep_time setting is.
|
||||
data = self._fetchUrl(url,extrasleep=4.0)
|
||||
logger.debug('Getting chapter text from: %s' % (url))
|
||||
|
||||
if "Please email this error message in full to <a href='mailto:support@fanfiction.com'>support@fanfiction.com</a>" in data:
|
||||
## title URL was put back on chapter URL during
|
||||
## normalize_chapterurl()
|
||||
data = self.get_request(url)
|
||||
|
||||
if "Please email this error message in full to <a href='mailto:" in data:
|
||||
raise exceptions.FailedToDownload("Error downloading Chapter: %s! FanFiction.net Site Error!" % url)
|
||||
|
||||
soup = self.make_soup(data)
|
||||
|
||||
## remove inline ads -- only seen with flaresolverr
|
||||
for adtag in soup.select("div.google-auto-placed"):
|
||||
adtag.decompose()
|
||||
|
||||
div = soup.find('div', {'id' : 'storytextp'})
|
||||
|
||||
if None == div:
|
||||
|
|
|
|||
157
fanficfare/adapters/adapter_fanfictionsfr.py
Normal file
157
fanficfare/adapters/adapter_fanfictionsfr.py
Normal file
|
|
@ -0,0 +1,157 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2024 FanFicFare team
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
|
||||
from __future__ import absolute_import
|
||||
import io
|
||||
import logging
|
||||
import re
|
||||
import zipfile
|
||||
|
||||
from bs4 import BeautifulSoup
|
||||
# py2 vs py3 transition
|
||||
|
||||
from .base_adapter import BaseSiteAdapter, makeDate
|
||||
from fanficfare.htmlcleanup import stripHTML
|
||||
from .. import exceptions as exceptions
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def getClass():
|
||||
return FanfictionsFrSiteAdapter
|
||||
|
||||
|
||||
class FanfictionsFrSiteAdapter(BaseSiteAdapter):
|
||||
def __init__(self, config, url):
|
||||
BaseSiteAdapter.__init__(self, config, url)
|
||||
self.story.setMetadata('siteabbrev', 'fanfictionsfr')
|
||||
self.story.setMetadata('langcode','fr')
|
||||
self.story.setMetadata('language','Français')
|
||||
|
||||
# get storyId from url--url validation guarantees query correct
|
||||
match = re.match(self.getSiteURLPattern(), url)
|
||||
if not match:
|
||||
raise exceptions.InvalidStoryURL(url, self.getSiteDomain(), self.getSiteExampleURLs())
|
||||
|
||||
story_id = match.group('id')
|
||||
self.story.setMetadata('storyId', story_id)
|
||||
fandom_name = match.group('fandom')
|
||||
|
||||
self._setURL('https://%s/fanfictions/%s/%s/chapters.html' % (self.getSiteDomain(), fandom_name, story_id))
|
||||
|
||||
@staticmethod
|
||||
def getSiteDomain():
|
||||
return 'www.fanfictions.fr'
|
||||
|
||||
@classmethod
|
||||
def getSiteExampleURLs(cls):
|
||||
return 'https://%s/fanfictions/fandom/fanfiction-id/chapters.html' % cls.getSiteDomain()
|
||||
|
||||
def getSiteURLPattern(self):
|
||||
return r'https?://(?:www\.)?fanfictions\.fr/fanfictions/(?P<fandom>[^/]+)/(?P<id>[^/]+)(/chapters.html)?'
|
||||
|
||||
def extractChapterUrlsAndMetadata(self):
|
||||
logger.debug('URL: %s', self.url)
|
||||
|
||||
data = self.get_request(self.url)
|
||||
soup = self.make_soup(data)
|
||||
|
||||
# detect if the fanfiction is 'suspended' (chapters unavailable)
|
||||
alert_div = soup.find('div', id='alertInactiveFic')
|
||||
if alert_div:
|
||||
raise exceptions.FailedToDownload("Failed to download the fanfiction, most likely because it is suspended.")
|
||||
|
||||
title_element = soup.find('h1', itemprop='name')
|
||||
self.story.setMetadata('title', stripHTML(title_element))
|
||||
|
||||
author_div = soup.find('div', itemprop='author')
|
||||
author_name = stripHTML(author_div.a)
|
||||
author_id = author_div.a['href'].split('/')[-1].replace('.html', '')
|
||||
|
||||
self.story.setMetadata('author', author_name)
|
||||
self.story.setMetadata('authorId', author_id)
|
||||
|
||||
published_date_element = soup.find('span', class_='date-distance')
|
||||
published_date_text = published_date_element['data-date']
|
||||
published_date = makeDate(published_date_text, '%Y-%m-%d %H:%M:%S')
|
||||
if published_date:
|
||||
self.story.setMetadata('datePublished', published_date)
|
||||
|
||||
status_element = soup.find('p', title="Statut de la fanfiction").find('span', class_='badge')
|
||||
french_status = stripHTML(status_element)
|
||||
status_translation = {
|
||||
"En cours": "In-Progress",
|
||||
"Terminée": "Completed",
|
||||
"One-shot": "Completed",
|
||||
}
|
||||
self.story.setMetadata('status', status_translation.get(french_status, french_status))
|
||||
|
||||
genre_elements = soup.find('div', title="Format et genres").find_all('span', class_="highlightable")
|
||||
self.story.extendList('genre', [ stripHTML(genre) for genre in genre_elements[1:] ])
|
||||
|
||||
category_elements = soup.find_all('li', class_="breadcrumb-item")
|
||||
self.story.extendList('category', [ stripHTML(category) for category in category_elements[-2].find_all('a') ])
|
||||
|
||||
first_description = soup.find('p', itemprop='abstract')
|
||||
self.setDescription(self.url, first_description)
|
||||
|
||||
chapter_cards = soup.find_all(class_=['card', 'chapter'])
|
||||
|
||||
for chapter_card in chapter_cards:
|
||||
chapter_title_tag = chapter_card.find('h2')
|
||||
if chapter_title_tag:
|
||||
chapter_title = stripHTML(chapter_title_tag)
|
||||
chapter_link = 'https://'+self.getSiteDomain()+chapter_title_tag.find('a')['href']
|
||||
|
||||
# Clean up the chapter title by replacing multiple spaces and newline characters with a single space
|
||||
chapter_title = re.sub(r'\s+', ' ', chapter_title)
|
||||
|
||||
self.add_chapter(chapter_title, chapter_link)
|
||||
|
||||
last_chapter_div = chapter_cards[-1]
|
||||
updated_date_element = last_chapter_div.find('span', class_='date-distance')
|
||||
last_chapter_update_date = updated_date_element['data-date']
|
||||
date = makeDate(last_chapter_update_date, '%Y-%m-%d %H:%M:%S')
|
||||
if date:
|
||||
self.story.setMetadata('dateUpdated', date)
|
||||
|
||||
|
||||
def getChapterText(self, url):
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
response, redirection_url = self.get_request_redirected(url)
|
||||
|
||||
if "telecharger_pdf.html" in redirection_url:
|
||||
with zipfile.ZipFile(io.BytesIO(response.encode('latin1'))) as z:
|
||||
# Assuming there's only one text file inside the zip
|
||||
file_list = z.namelist()
|
||||
if len(file_list) != 1:
|
||||
raise exceptions.FailedToDownload("Error downloading Chapter: %s! Zip file should contain exactly one text file!" % url)
|
||||
text_filename = file_list[0]
|
||||
with z.open(text_filename) as text_file:
|
||||
# Decode the text file with windows-1252 encoding
|
||||
text = text_file.read().decode('windows-1252')
|
||||
return text.replace("\r\n", "<br>\r\n")
|
||||
else:
|
||||
soup = self.make_soup(response)
|
||||
|
||||
div_content = soup.find('div', id='readarea')
|
||||
if div_content is None:
|
||||
raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url)
|
||||
|
||||
return self.utf8FromSoup(url, div_content)
|
||||
|
|
@ -20,14 +20,11 @@ import time
|
|||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import re
|
||||
import urllib
|
||||
from ..htmlcleanup import stripHTML
|
||||
from .. import exceptions as exceptions
|
||||
|
||||
|
||||
# py2 vs py3 transition
|
||||
from ..six import text_type as unicode
|
||||
from ..six.moves.urllib.error import HTTPError
|
||||
|
||||
from .base_adapter import BaseSiteAdapter, makeDate
|
||||
|
||||
|
|
@ -71,13 +68,6 @@ class FanFiktionDeAdapter(BaseSiteAdapter):
|
|||
def getSiteURLPattern(self):
|
||||
return r"https?"+re.escape("://"+self.getSiteDomain()+"/s/")+r"\w+(/\d+)?"
|
||||
|
||||
def use_pagecache(self):
|
||||
'''
|
||||
adapters that will work with the page cache need to implement
|
||||
this and change it to True.
|
||||
'''
|
||||
return True
|
||||
|
||||
## Login seems to be reasonably standard across eFiction sites.
|
||||
def needToLoginCheck(self, data):
|
||||
if 'Diese Geschichte wurde als entwicklungsbeeintr' in data \
|
||||
|
|
@ -103,7 +93,7 @@ class FanFiktionDeAdapter(BaseSiteAdapter):
|
|||
loginUrl = 'https://www.fanfiktion.de/'
|
||||
logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
|
||||
params['nickname']))
|
||||
soup = self.make_soup(self._postUrl(loginUrl,params))
|
||||
soup = self.make_soup(self.post_request(loginUrl,params))
|
||||
if not soup.find('a', title='Logout'):
|
||||
logger.info("Failed to login to URL %s as %s" % (loginUrl,
|
||||
params['nickname']))
|
||||
|
|
@ -118,27 +108,19 @@ class FanFiktionDeAdapter(BaseSiteAdapter):
|
|||
url = self.url
|
||||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
except HTTPError as e:
|
||||
if e.code == 404:
|
||||
raise exceptions.StoryDoesNotExist(self.url)
|
||||
else:
|
||||
raise e
|
||||
data = self.get_request(url)
|
||||
|
||||
if self.needToLoginCheck(data):
|
||||
# need to log in for this one.
|
||||
self.performLogin(url)
|
||||
data = self._fetchUrl(url,usecache=False)
|
||||
data = self.get_request(url,usecache=False)
|
||||
|
||||
if "Uhr ist diese Geschichte nur nach einer" in data:
|
||||
raise exceptions.FailedToDownload(self.getSiteDomain() +" says: Auserhalb der Zeit von 23:00 Uhr bis 04:00 Uhr ist diese Geschichte nur nach einer erfolgreichen Altersverifikation zuganglich.")
|
||||
|
||||
# use BeautifulSoup HTML parser to make everything easier to find.
|
||||
soup = self.make_soup(data)
|
||||
# print data
|
||||
# logger.debug(data)
|
||||
|
||||
# Now go hunting for all the meta data and the chapter list.
|
||||
|
||||
## Title
|
||||
a = soup.find('a', href=re.compile(r'/s/'+self.story.getMetadata('storyId')+"/"))
|
||||
|
|
@ -152,10 +134,11 @@ class FanFiktionDeAdapter(BaseSiteAdapter):
|
|||
self.story.setMetadata('author',stripHTML(a))
|
||||
|
||||
# Find the chapters:
|
||||
for chapter in soup.find('select').findAll('option'):
|
||||
for chapter in soup.find('select').find_all('option'):
|
||||
self.add_chapter(chapter,'https://'+self.host+'/s/'+self.story.getMetadata('storyId')+'/'+chapter['value'])
|
||||
|
||||
self.story.setMetadata('numWords',stripHTML(soup.find("span",title="Wörter").parent))
|
||||
## title="Wörter" failed with max_zalgo:1
|
||||
self.story.setMetadata('numWords',stripHTML(soup.find("span",{'class':"fa-keyboard"}).parent).replace('.','')) # 1.234 = 1,234
|
||||
self.story.setMetadata('language','German')
|
||||
|
||||
self.story.setMetadata('datePublished', makeDate(stripHTML(head.find('span',title='erstellt').parent), self.dateformat))
|
||||
|
|
@ -166,36 +149,45 @@ class FanFiktionDeAdapter(BaseSiteAdapter):
|
|||
self.story.extendList('genre',genres[:genres.index(' / ')].split(', '))
|
||||
self.story.setMetadata('rating', genres[genres.index(' / ')+3:])
|
||||
|
||||
self.story.addToList('category',stripHTML(soup.find('span',id='ffcbox-story-topic-1')).split('/')[2].strip())
|
||||
# self.story.addToList('category',stripHTML(soup.find('span',id='ffcbox-story-topic-1')).split('/')[2].strip())
|
||||
for a in soup.find('span',id='ffcbox-story-topic-1').find_all('a',href=re.compile(r'/c/')):
|
||||
cat = stripHTML(a)
|
||||
if cat != 'Fanfiction':
|
||||
self.story.addToList('category',cat)
|
||||
|
||||
for span in soup.find_all('span',class_='badge-character'):
|
||||
self.story.addToList('characters',stripHTML(span))
|
||||
|
||||
try:
|
||||
self.story.setMetadata('native_status', head.find_all('span',{'class':'titled-icon'})[3]['title'])
|
||||
except e:
|
||||
logger.debug("Failed to find native status:%s"%e)
|
||||
|
||||
if head.find('span',title='Fertiggestellt'):
|
||||
if head.find('span',title='fertiggestellt'):
|
||||
self.story.setMetadata('status', 'Completed')
|
||||
elif head.find('span',title='Pausiert'):
|
||||
elif head.find('span',title='pausiert'):
|
||||
self.story.setMetadata('status', 'Paused')
|
||||
elif head.find('span',title='Abgebrochen'):
|
||||
elif head.find('span',title='abgebrochen'):
|
||||
self.story.setMetadata('status', 'Cancelled')
|
||||
else:
|
||||
self.story.setMetadata('status', 'In-Progress')
|
||||
|
||||
## Get description from own URL:
|
||||
## /?a=v&storyid=46ccbef30000616306614050&s=1
|
||||
descsoup = self.make_soup(self._fetchUrl("https://"+self.getSiteDomain()+"/?a=v&storyid="+self.story.getMetadata('storyId')+"&s=1"))
|
||||
self.setDescription(url,stripHTML(descsoup))
|
||||
## Get description
|
||||
descdiv = soup.select_one('div#story-summary-inline div')
|
||||
if descdiv:
|
||||
if 'center' in descdiv['class']:
|
||||
del descdiv['class']
|
||||
self.setDescription(url,descdiv)
|
||||
|
||||
# #find metadata on the author's page
|
||||
# asoup = self.make_soup(self._fetchUrl("https://"+self.getSiteDomain()+"?a=q&a1=v&t=nickdetailsstories&lbi=stories&ar=0&nick="+self.story.getMetadata('authorId')))
|
||||
# tr=asoup.findAll('tr')
|
||||
# asoup = self.make_soup(self.get_request("https://"+self.getSiteDomain()+"?a=q&a1=v&t=nickdetailsstories&lbi=stories&ar=0&nick="+self.story.getMetadata('authorId')))
|
||||
# tr=asoup.find_all('tr')
|
||||
# for i in range(1,len(tr)):
|
||||
# a = tr[i].find('a')
|
||||
# if '/s/'+self.story.getMetadata('storyId')+'/1/' in a['href']:
|
||||
# break
|
||||
|
||||
# td = tr[i].findAll('td')
|
||||
# td = tr[i].find_all('td')
|
||||
# self.story.addToList('category',stripHTML(td[2]))
|
||||
# self.story.setMetadata('rating', stripHTML(td[5]))
|
||||
# self.story.setMetadata('numWords', stripHTML(td[6]))
|
||||
|
|
@ -209,10 +201,10 @@ class FanFiktionDeAdapter(BaseSiteAdapter):
|
|||
logger.debug('Getting chapter text from: %s' % url)
|
||||
time.sleep(0.5) ## ffde has "floodlock" protection
|
||||
|
||||
soup = self.make_soup(self._fetchUrl(url))
|
||||
soup = self.make_soup(self.get_request(url))
|
||||
|
||||
div = soup.find('div', {'id' : 'storytext'})
|
||||
for a in div.findAll('script'):
|
||||
for a in div.find_all('script'):
|
||||
a.extract()
|
||||
|
||||
if None == div:
|
||||
|
|
|
|||
|
|
@ -1,45 +0,0 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2014 Fanficdownloader team, 2018 FanFicFare team
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
# Software: eFiction
|
||||
from __future__ import absolute_import
|
||||
import re
|
||||
from .base_efiction_adapter import BaseEfictionAdapter
|
||||
|
||||
class FanNationAdapter(BaseEfictionAdapter):
|
||||
|
||||
@staticmethod
|
||||
def getSiteDomain():
|
||||
return 'fannation.shades-of-moonlight.com'
|
||||
|
||||
@classmethod
|
||||
def getPathToArchive(self):
|
||||
return '/archive'
|
||||
|
||||
@classmethod
|
||||
def getSiteAbbrev(self):
|
||||
return 'fannation'
|
||||
|
||||
def handleMetadataPair(self, key, value):
|
||||
if key == 'Romance':
|
||||
for val in re.split("\s*,\s*", value):
|
||||
self.story.addToList('romance', val)
|
||||
else:
|
||||
super(FanNationAdapter, self).handleMetadataPair(key, value)
|
||||
|
||||
def getClass():
|
||||
return FanNationAdapter
|
||||
|
|
@ -1,6 +1,6 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2011 Fanficdownloader team, 2018 FanFicFare team
|
||||
# Copyright 2011 Fanficdownloader team, 2020 FanFicFare team
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
|
|
@ -15,20 +15,18 @@
|
|||
# limitations under the License.
|
||||
#
|
||||
|
||||
from __future__ import absolute_import
|
||||
import datetime
|
||||
from __future__ import absolute_import,unicode_literals
|
||||
# import datetime
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import json
|
||||
import re
|
||||
from .. import translit
|
||||
# from .. import translit
|
||||
|
||||
|
||||
from ..htmlcleanup import stripHTML
|
||||
from .. import exceptions as exceptions
|
||||
from .. import exceptions# as exceptions
|
||||
|
||||
# py2 vs py3 transition
|
||||
from ..six import text_type as unicode
|
||||
from ..six.moves.urllib.error import HTTPError
|
||||
|
||||
from .base_adapter import BaseSiteAdapter, makeDate
|
||||
|
||||
|
|
@ -60,34 +58,42 @@ class FicBookNetAdapter(BaseSiteAdapter):
|
|||
|
||||
# The date format will vary from site to site.
|
||||
# http://docs.python.org/library/datetime.html#strftime-strptime-behavior
|
||||
self.dateformat = "%d %m %Y"
|
||||
self.dateformat = u"%d %m %Y г., %H:%M"
|
||||
|
||||
@staticmethod # must be @staticmethod, don't remove it.
|
||||
def getSiteDomain():
|
||||
# The site domain. Does have www here, if it uses it.
|
||||
return 'www.ficbook.net'
|
||||
return 'ficbook.net'
|
||||
|
||||
@classmethod
|
||||
def getSiteExampleURLs(cls):
|
||||
return "https://"+cls.getSiteDomain()+"/readfic/12345 https://"+cls.getSiteDomain()+"/readfic/93626/246417#part_content"
|
||||
return "https://"+cls.getSiteDomain()+"/readfic/12345 https://"+cls.getSiteDomain()+"/readfic/93626/246417#part_content https://"+cls.getSiteDomain()+"/readfic/578de1cd-a8b4-7ff1-aa49-750426508b82 https://"+cls.getSiteDomain()+"/readfic/578de1cd-a8b4-7ff1-aa49-750426508b82/94793742#part_content"
|
||||
|
||||
def getSiteURLPattern(self):
|
||||
return r"https?://"+re.escape(self.getSiteDomain()+"/readfic/")+r"\d+"
|
||||
return r"https?://"+re.escape(self.getSiteDomain()+"/readfic/")+r"[\d\-a-zA-Z]+"
|
||||
|
||||
def performLogin(self,url,data):
|
||||
params = {}
|
||||
if self.password:
|
||||
params['login'] = self.username
|
||||
params['password'] = self.password
|
||||
else:
|
||||
params['login'] = self.getConfig("username")
|
||||
params['password'] = self.getConfig("password")
|
||||
|
||||
logger.debug("Try to login in as (%s)" % params['login'])
|
||||
d = self.post_request('https://' + self.getSiteDomain() + '/login_check_static',params,usecache=False)
|
||||
|
||||
if 'Войти используя аккаунт на сайте' in d:
|
||||
raise exceptions.FailedToLogin(url,params['login'])
|
||||
|
||||
return True
|
||||
|
||||
## Getting the chapter list and the meta data, plus 'is adult' checking.
|
||||
def extractChapterUrlsAndMetadata(self):
|
||||
def extractChapterUrlsAndMetadata(self,get_cover=True):
|
||||
url=self.url
|
||||
logger.debug("URL: "+url)
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
except HTTPError as e:
|
||||
if e.code == 404:
|
||||
raise exceptions.StoryDoesNotExist(self.url)
|
||||
else:
|
||||
raise e
|
||||
|
||||
|
||||
# use BeautifulSoup HTML parser to make everything easier to find.
|
||||
data = self.get_request(url)
|
||||
soup = self.make_soup(data)
|
||||
|
||||
adult_div = soup.find('div',id='adultCoverWarning')
|
||||
|
|
@ -97,10 +103,11 @@ class FicBookNetAdapter(BaseSiteAdapter):
|
|||
else:
|
||||
raise exceptions.AdultCheckRequired(self.url)
|
||||
|
||||
# Now go hunting for all the meta data and the chapter list.
|
||||
|
||||
## Title
|
||||
a = soup.find('section',{'class':'chapter-info'}).find('h1')
|
||||
try:
|
||||
a = soup.find('section',{'class':'chapter-info'}).find('h1')
|
||||
except AttributeError:
|
||||
raise exceptions.FailedToDownload("Error collecting meta: %s! Missing required element!" % url)
|
||||
# kill '+' marks if present.
|
||||
sup = a.find('sup')
|
||||
if sup:
|
||||
|
|
@ -110,44 +117,12 @@ class FicBookNetAdapter(BaseSiteAdapter):
|
|||
|
||||
# Find authorid and URL from... author url.
|
||||
# assume first avatar-nickname -- there can be a second marked 'beta'.
|
||||
a = soup.find('a',{'class':'avatar-nickname'})
|
||||
a = soup.find('a',{'class':'creator-username'})
|
||||
self.story.setMetadata('authorId',a.text) # Author's name is unique
|
||||
self.story.setMetadata('authorUrl','https://'+self.host+'/'+a['href'])
|
||||
self.story.setMetadata('authorUrl','https://'+self.host+a['href'])
|
||||
self.story.setMetadata('author',a.text)
|
||||
logger.debug("Author: (%s)"%self.story.getMetadata('author'))
|
||||
|
||||
# Find the chapters:
|
||||
pubdate = None
|
||||
chapters = soup.find('ul', {'class' : 'table-of-contents'})
|
||||
if chapters != None:
|
||||
chapters=chapters.findAll('a', href=re.compile(r'/readfic/'+self.story.getMetadata('storyId')+"/\d+#part_content$"))
|
||||
self.story.setMetadata('numChapters',len(chapters))
|
||||
for x in range(0,len(chapters)):
|
||||
chapter=chapters[x]
|
||||
churl='https://'+self.host+chapter['href']
|
||||
self.add_chapter(chapter,churl)
|
||||
## First chapter doesn't always have a date, skip it.
|
||||
if pubdate == None and chapter.parent.find('span'):
|
||||
pubdate = translit.translit(stripHTML(chapter.parent.find('span')))
|
||||
# pubdate = translit.translit(stripHTML(self.make_soup(self._fetchUrl(churl)).find('div', {'class' : 'part_added'}).find('span')))
|
||||
if x == len(chapters)-1:
|
||||
update = translit.translit(stripHTML(chapter.parent.find('span')))
|
||||
# update = translit.translit(stripHTML(self.make_soup(self._fetchUrl(churl)).find('div', {'class' : 'part_added'}).find('span')))
|
||||
else:
|
||||
self.add_chapter(self.story.getMetadata('title'),url)
|
||||
self.story.setMetadata('numChapters',1)
|
||||
pubdate=translit.translit(stripHTML(soup.find('div',{'class':'title-area'}).find('span')))
|
||||
update=pubdate
|
||||
|
||||
logger.debug("numChapters: (%s)"%self.story.getMetadata('numChapters'))
|
||||
|
||||
if not ',' in pubdate:
|
||||
pubdate=datetime.date.today().strftime(self.dateformat)
|
||||
if not ',' in update:
|
||||
update=datetime.date.today().strftime(self.dateformat)
|
||||
pubdate=pubdate.split(',')[0]
|
||||
update=update.split(',')[0]
|
||||
|
||||
fullmon = {"yanvarya":"01", u"января":"01",
|
||||
"fievralya":"02", u"февраля":"02",
|
||||
"marta":"03", u"марта":"03",
|
||||
|
|
@ -161,44 +136,68 @@ class FicBookNetAdapter(BaseSiteAdapter):
|
|||
"noyabrya":"11", u"ноября":"11",
|
||||
"diekabrya":"12", u"декабря":"12" }
|
||||
|
||||
for (name,num) in fullmon.items():
|
||||
if name in pubdate:
|
||||
pubdate = pubdate.replace(name,num)
|
||||
if name in update:
|
||||
update = update.replace(name,num)
|
||||
# Find the chapters:
|
||||
pubdate = None
|
||||
chapters = soup.find('ul', {'class' : 'list-of-fanfic-parts'})
|
||||
if chapters is not None:
|
||||
for chapdiv in chapters.find_all('li', {'class':'part'}):
|
||||
chapter=chapdiv.find('a',href=re.compile(r'/readfic/'+self.story.getMetadata('storyId')+r"/\d+#part_content$"))
|
||||
churl='https://'+self.host+chapter['href']
|
||||
|
||||
self.story.setMetadata('dateUpdated', makeDate(update, self.dateformat))
|
||||
self.story.setMetadata('datePublished', makeDate(pubdate, self.dateformat))
|
||||
# Find the chapter dates.
|
||||
date_str = chapdiv.find('span', {'title': True})['title'].replace(u"\u202fг. в", "")
|
||||
for month_name, month_num in fullmon.items():
|
||||
date_str = date_str.replace(month_name, month_num)
|
||||
chapterdate = makeDate(date_str,self.dateformat)
|
||||
self.add_chapter(chapter,churl,
|
||||
{'date':chapterdate.strftime(self.getConfig("datechapter_format",self.getConfig("datePublished_format",self.dateformat)))})
|
||||
|
||||
if pubdate is None and chapterdate:
|
||||
pubdate = chapterdate
|
||||
update = chapterdate
|
||||
else:
|
||||
self.add_chapter(self.story.getMetadata('title'),url)
|
||||
date_str = soup.find('div', {'class' : 'part-date'}).find('span', {'title': True})['title'].replace(u"\u202fг. в", "")
|
||||
for month_name, month_num in fullmon.items():
|
||||
date_str = date_str.replace(month_name, month_num)
|
||||
pubdate = update = makeDate(date_str,self.dateformat)
|
||||
|
||||
logger.debug("numChapters: (%s)"%self.story.getMetadata('numChapters'))
|
||||
|
||||
self.story.setMetadata('dateUpdated', update)
|
||||
self.story.setMetadata('datePublished', pubdate)
|
||||
self.story.setMetadata('language','Russian')
|
||||
|
||||
## after site change, I don't see word count anywhere.
|
||||
# pr=soup.find('a', href=re.compile(r'/printfic/\w+'))
|
||||
# pr='https://'+self.host+pr['href']
|
||||
# pr = self.make_soup(self._fetchUrl(pr))
|
||||
# pr=pr.findAll('div', {'class' : 'part_text'})
|
||||
# i=0
|
||||
# for part in pr:
|
||||
# i=i+len(stripHTML(part).split(' '))
|
||||
# self.story.setMetadata('numWords', unicode(i))
|
||||
dlinfo = soup.select_one('header.d-flex.flex-column.gap-12.word-break')
|
||||
|
||||
|
||||
dlinfo = soup.find('dl',{'class':'info'})
|
||||
series_label = dlinfo.select_one('div.description.word-break').find('strong', string='Серия:')
|
||||
logger.debug('Series: %s'%str(series_label))
|
||||
if series_label:
|
||||
series_div = series_label.find_next_sibling("div")
|
||||
# No accurate series number as for that, additional request needs to be made
|
||||
self.setSeries(stripHTML(series_div.a), 1)
|
||||
self.story.setMetadata('seriesUrl','https://' + self.getSiteDomain() + series_div.a.get('href'))
|
||||
|
||||
i=0
|
||||
fandoms = dlinfo.find('dd').findAll('a', href=re.compile(r'/fanfiction/\w+'))
|
||||
fandoms = dlinfo.select_one('div:not([class])').find_all('a', href=re.compile(r'/fanfiction/\w+'))
|
||||
for fandom in fandoms:
|
||||
self.story.addToList('category',fandom.string)
|
||||
i=i+1
|
||||
if i > 1:
|
||||
self.story.addToList('genre', u'Кроссовер')
|
||||
|
||||
for genre in dlinfo.findAll('a',href=re.compile(r'/genres/')):
|
||||
self.story.addToList('genre',stripHTML(genre))
|
||||
tags = soup.find('div',{'class':'tags'})
|
||||
if tags:
|
||||
for genre in tags.find_all('a',href=re.compile(r'/tags/')):
|
||||
self.story.addToList('genre',stripHTML(genre))
|
||||
|
||||
ratingdt = dlinfo.find('dt',text='Рейтинг:')
|
||||
self.story.setMetadata('rating', stripHTML(ratingdt.next_sibling))
|
||||
logger.debug("category: (%s)"%self.story.getMetadata('category'))
|
||||
logger.debug("genre: (%s)"%self.story.getMetadata('genre'))
|
||||
|
||||
# meta=table.findAll('a', href=re.compile(r'/ratings/'))
|
||||
ratingdt = dlinfo.find('div',{'class':re.compile(r'badge-rating-.*')})
|
||||
self.story.setMetadata('rating', stripHTML(ratingdt.find('span')))
|
||||
|
||||
# meta=table.find_all('a', href=re.compile(r'/ratings/'))
|
||||
# i=0
|
||||
# for m in meta:
|
||||
# if i == 0:
|
||||
|
|
@ -211,37 +210,184 @@ class FicBookNetAdapter(BaseSiteAdapter):
|
|||
# elif i == 2:
|
||||
# self.story.addToList('warnings', m.find('b').text)
|
||||
|
||||
if dlinfo.find('span', {'style' : 'color: green'}):
|
||||
if dlinfo.find('div', {'class':'badge-status-finished'}):
|
||||
self.story.setMetadata('status', 'Completed')
|
||||
else:
|
||||
self.story.setMetadata('status', 'In-Progress')
|
||||
|
||||
try:
|
||||
self.story.setMetadata('universe', stripHTML(dlinfo.find('a', href=re.compile('/fandom_universe/'))))
|
||||
except AttributeError:
|
||||
pass
|
||||
|
||||
tags = dlinfo.findAll('dt')
|
||||
for tag in tags:
|
||||
label = translit.translit(tag.text)
|
||||
if 'Piersonazhi:' in label or u'Персонажи:' in label:
|
||||
chars=stripHTML(tag.next_sibling).split(', ')
|
||||
for char in chars:
|
||||
self.story.addToList('characters',char)
|
||||
break
|
||||
paircharsdt = soup.find('strong',string='Пэйринг и персонажи:')
|
||||
# site keeps both ships and indiv chars in /pairings/ links.
|
||||
if paircharsdt:
|
||||
for paira in paircharsdt.find_next('div').find_all('a', href=re.compile(r'/pairings/')):
|
||||
if 'pairing-highlight' in paira['class']:
|
||||
self.story.addToList('ships',stripHTML(paira))
|
||||
chars=stripHTML(paira).split('/')
|
||||
for char in chars:
|
||||
self.story.addToList('characters',char)
|
||||
else:
|
||||
self.story.addToList('characters',stripHTML(paira))
|
||||
|
||||
summary=soup.find('div', {'class' : 'urlize'})
|
||||
self.setDescription(url,summary)
|
||||
#self.story.setMetadata('description', summary.text)
|
||||
summary=soup.find('div', itemprop='description')
|
||||
if summary:
|
||||
# Fix for the text not displaying properly
|
||||
summary['class'].append('part_text')
|
||||
self.setDescription(url,summary)
|
||||
#self.story.setMetadata('description', summary.text)
|
||||
|
||||
stats = soup.find('div', {'class':'hat-actions-container'})
|
||||
targetdata = stats.find_all('span', {'class' : 'main-info'})
|
||||
for data in targetdata:
|
||||
svg_class = data.find('svg')['class'][1] if data.find('svg') else None
|
||||
value = int(stripHTML(data)) if stripHTML(data).isdigit() else 0
|
||||
|
||||
if svg_class == 'ic_thumbs-up' and value > 0:
|
||||
self.story.setMetadata('likes', value)
|
||||
#logger.debug("likes: (%s)"%self.story.getMetadata('likes'))
|
||||
elif svg_class == 'ic_bubble-dark' and value > 0:
|
||||
self.story.setMetadata('reviews', value)
|
||||
#logger.debug("reviews: (%s)"%self.story.getMetadata('reviews'))
|
||||
elif svg_class == 'ic_bookmark' and value > 0:
|
||||
self.story.setMetadata('numCollections', value)
|
||||
logger.debug("numCollections: (%s)"%self.story.getMetadata('numCollections'))
|
||||
|
||||
# Grab the amount of pages and words
|
||||
targetpages = soup.find('strong',string='Размер:').find_next('div')
|
||||
if targetpages:
|
||||
targetpages_text = re.sub(r"(?<!\,)\s| ", "", targetpages.text, flags=re.UNICODE | re.MULTILINE)
|
||||
|
||||
pages_raw = re.search(r'(\d+)(?:страницы|страниц)', targetpages_text, re.UNICODE)
|
||||
pages = int(pages_raw.group(1))
|
||||
if pages > 0:
|
||||
self.story.setMetadata('pages', pages)
|
||||
logger.debug("pages: (%s)"%self.story.getMetadata('pages'))
|
||||
|
||||
numWords_raw = re.search(r"(\d+)(?:слова|слов)", targetpages_text, re.UNICODE)
|
||||
numWords = int(numWords_raw.group(1))
|
||||
if numWords > 0:
|
||||
self.story.setMetadata('numWords', numWords)
|
||||
logger.debug("numWords: (%s)"%self.story.getMetadata('numWords'))
|
||||
|
||||
# Grab FBN Category
|
||||
class_tag = soup.select_one('div[class^="badge-with-icon direction"]').find('span', {'class' : 'badge-text'}).text
|
||||
if class_tag:
|
||||
self.story.setMetadata('classification',class_tag)
|
||||
#logger.debug("classification: (%s)"%self.story.getMetadata('classification'))
|
||||
|
||||
# Find dedication.
|
||||
ded = soup.find('div', {'class' : 'js-public-beta-dedication'})
|
||||
if ded:
|
||||
ded['class'].append('part_text')
|
||||
self.story.setMetadata('dedication',ded)
|
||||
|
||||
# Find author comment
|
||||
comm = soup.find('div', {'class' : 'js-public-beta-author-comment'})
|
||||
if comm:
|
||||
comm['class'].append('part_text')
|
||||
self.story.setMetadata('authorcomment',comm)
|
||||
|
||||
follows = stats.find('fanfic-follow-button')[':follow-count']
|
||||
if int(follows) > 0:
|
||||
self.story.setMetadata('follows', int(follows))
|
||||
logger.debug("follows: (%s)"%self.story.getMetadata('follows'))
|
||||
|
||||
# Grab the amount of awards
|
||||
numAwards = 0
|
||||
try:
|
||||
awards = soup.find('fanfic-reward-list')[':initial-fic-rewards-list']
|
||||
award_list = json.loads(awards)
|
||||
numAwards = int(len(award_list))
|
||||
# Grab the awards, but if multiple awards have the same name, only one will be kept; only an issue with hundreds of them.
|
||||
self.story.extendList('awards', [str(award['user_text']) for award in award_list])
|
||||
#logger.debug("awards (%s)"%self.story.getMetadata('awards'))
|
||||
except (TypeError, KeyError):
|
||||
logger.debug("Could not grab the awards")
|
||||
|
||||
if numAwards > 0:
|
||||
self.story.setMetadata('numAwards', numAwards)
|
||||
logger.debug("Num Awards (%s)"%self.story.getMetadata('numAwards'))
|
||||
|
||||
if get_cover:
|
||||
cover = soup.find('fanfic-cover', {'class':"jsVueComponent"})
|
||||
if cover is not None:
|
||||
self.setCoverImage(url,cover['src-original'])
|
||||
|
||||
def replace_formatting(self,tag):
|
||||
tname = tag.name
|
||||
## operating on plain text because BS4 is hard to work on
|
||||
## text with.
|
||||
## stripHTML() discards whitespace around other tags, like <i>
|
||||
txt = tag.get_text()
|
||||
txt = txt.replace("\n","<br/>")
|
||||
soup = self.make_soup("<"+tname+">"+txt+"</"+tname+">")
|
||||
return soup.find(tname)
|
||||
|
||||
# grab the text for an individual chapter.
|
||||
def getChapterText(self, url):
|
||||
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
soup = self.make_soup(self._fetchUrl(url))
|
||||
soup = self.make_soup(self.get_request(url))
|
||||
|
||||
chapter = soup.find('div', {'class' : 'public_beta'})
|
||||
if chapter == None:
|
||||
chapter = soup.find('div', {'id' : 'content'})
|
||||
if chapter is None: ## still needed?
|
||||
chapter = soup.find('div', {'class' : 'public_beta_disabled'})
|
||||
|
||||
if None == chapter:
|
||||
if chapter is None:
|
||||
raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url)
|
||||
|
||||
## ficbook uses weird CSS white-space: pre-wrap; for
|
||||
## paragraphing. Doesn't work with txt output
|
||||
if 'part_text' in chapter['class'] and self.getConfig('replace_text_formatting'):
|
||||
## copy classes, except part_text
|
||||
divclasses = chapter['class']
|
||||
divclasses.remove('part_text')
|
||||
chapter = self.replace_formatting(chapter)
|
||||
chapter['class'] = divclasses
|
||||
|
||||
exclude_notes=self.getConfigList('exclude_notes')
|
||||
if 'headnotes' not in exclude_notes:
|
||||
# Find the headnote
|
||||
head_note = soup.select_one("div.part-comment-top div.js-public-beta-comment-before")
|
||||
if head_note:
|
||||
# Create the structure for the headnote
|
||||
head_notes_div_tag = soup.new_tag('div', attrs={'class': 'fff_chapter_notes fff_head_notes'})
|
||||
head_b_tag = soup.new_tag('b')
|
||||
head_b_tag.string = 'Примечания:'
|
||||
if 'text-preline' in head_note['class'] and self.getConfig('replace_text_formatting'):
|
||||
head_blockquote_tag = self.replace_formatting(head_note)
|
||||
head_blockquote_tag.name = 'blockquote'
|
||||
else:
|
||||
head_blockquote_tag = soup.new_tag('blockquote')
|
||||
head_blockquote_tag.string = stripHTML(head_note)
|
||||
head_notes_div_tag.append(head_b_tag)
|
||||
head_notes_div_tag.append(head_blockquote_tag)
|
||||
# Prepend the headnotes to the chapter, <hr> to mimic the site
|
||||
chapter.insert(0, head_notes_div_tag)
|
||||
chapter.insert(1, soup.new_tag('hr'))
|
||||
|
||||
if 'footnotes' not in exclude_notes:
|
||||
# Find the endnote
|
||||
end_note = soup.select_one("div.part-comment-bottom div.js-public-beta-comment-after")
|
||||
if end_note:
|
||||
# Create the structure for the footnote
|
||||
end_notes_div_tag = soup.new_tag('div', attrs={'class': 'fff_chapter_notes fff_foot_notes'})
|
||||
end_b_tag = soup.new_tag('b')
|
||||
end_b_tag.string = 'Примечания:'
|
||||
if 'text-preline' in end_note['class'] and self.getConfig('replace_text_formatting'):
|
||||
end_blockquote_tag = self.replace_formatting(end_note)
|
||||
end_blockquote_tag.name = 'blockquote'
|
||||
else:
|
||||
end_blockquote_tag = soup.new_tag('blockquote')
|
||||
end_blockquote_tag.string = stripHTML(end_note)
|
||||
end_notes_div_tag.append(end_b_tag)
|
||||
end_notes_div_tag.append(end_blockquote_tag)
|
||||
# Append the endnotes to the chapter, <hr> to mimic the site
|
||||
chapter.append(soup.new_tag('hr'))
|
||||
chapter.append(end_notes_div_tag)
|
||||
|
||||
return self.utf8FromSoup(url,chapter)
|
||||
|
|
|
|||
225
fanficfare/adapters/adapter_fictionalleyarchiveorg.py
Normal file
225
fanficfare/adapters/adapter_fictionalleyarchiveorg.py
Normal file
|
|
@ -0,0 +1,225 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2011 Fanficdownloader team, 2021 FanFicFare team
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
from __future__ import absolute_import
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import re
|
||||
from ..htmlcleanup import stripHTML
|
||||
from .. import exceptions as exceptions
|
||||
|
||||
from .base_adapter import BaseSiteAdapter, makeDate
|
||||
|
||||
class FictionAlleyArchiveOrgSiteAdapter(BaseSiteAdapter):
|
||||
|
||||
def __init__(self, config, url):
|
||||
BaseSiteAdapter.__init__(self, config, url)
|
||||
self.story.setMetadata('siteabbrev','fa')
|
||||
self.is_adult=False
|
||||
|
||||
# get storyId from url--url validation guarantees query correct
|
||||
m = re.match(self.getSiteURLPattern(),url)
|
||||
if m:
|
||||
# normalized story URL.
|
||||
url = "https://"+self.getSiteDomain()+"/authors/"+m.group('auth')+"/"+m.group('id')+".html"
|
||||
self._setURL(url)
|
||||
else:
|
||||
raise exceptions.InvalidStoryURL(url,
|
||||
self.getSiteDomain(),
|
||||
self.getSiteExampleURLs())
|
||||
# The date format will vary from site to site.
|
||||
# http://docs.python.org/library/datetime.html#strftime-strptime-behavior
|
||||
self.dateformat = "%m/%d/%Y"
|
||||
|
||||
def _setURL(self,url):
|
||||
# logger.debug("set URL:%s"%url)
|
||||
super(FictionAlleyArchiveOrgSiteAdapter, self)._setURL(url)
|
||||
m = re.match(self.getSiteURLPattern(),url)
|
||||
if m:
|
||||
self.story.setMetadata('authorId',m.group('auth'))
|
||||
self.story.setMetadata('storyId',m.group('id'))
|
||||
|
||||
@staticmethod
|
||||
def getSiteDomain():
|
||||
return 'www.fictionalley-archive.org'
|
||||
|
||||
@classmethod
|
||||
def getAcceptDomains(cls):
|
||||
return ['www.fictionalley-archive.org',
|
||||
'www.fictionalley.org']
|
||||
|
||||
@classmethod
|
||||
def getSiteExampleURLs(cls):
|
||||
return "https://"+cls.getSiteDomain()+"/authors/drt/DA.html https://"+cls.getSiteDomain()+"/authors/drt/JOTP01a.html"
|
||||
|
||||
@classmethod
|
||||
def getURLDomain(cls):
|
||||
return 'https://' + cls.getSiteDomain()
|
||||
|
||||
def getSiteURLPattern(self):
|
||||
# http://www.fictionalley-archive.org/authors/drt/DA.html
|
||||
# http://www.fictionalley-archive.org/authors/drt/JOTP01a.html
|
||||
return r"https?://www.fictionalley(-archive)?.org/authors/(?P<auth>[a-zA-Z0-9_]+)/(?P<id>[a-zA-Z0-9_]+)\.html"
|
||||
|
||||
def extractChapterUrlsAndMetadata(self):
|
||||
|
||||
## could be either chapter list page or one-shot text page.
|
||||
logger.debug("URL: "+self.url)
|
||||
|
||||
(data,rurl) = self.get_request_redirected(self.url)
|
||||
if rurl != self.url:
|
||||
self._setURL(rurl)
|
||||
logger.debug("set to redirected url:%s"%self.url)
|
||||
soup = self.make_soup(data)
|
||||
|
||||
# If chapter list page, get the first chapter to look for adult check
|
||||
chapterlinklist = soup.select('h5.mb-1 > a')
|
||||
# logger.debug(chapterlinklist)
|
||||
|
||||
if not chapterlinklist:
|
||||
# no chapter list, it's either a chapter URL or a single chapter story
|
||||
# <nav aria-label="Chapter Navigation">
|
||||
# <a class="page-link" href="/authors/mz_xxo/HPATOTFI.html">Index</a>
|
||||
storya = soup.select_one('nav[aria-label="Chapter Navigation"] a')
|
||||
# logger.debug(storya)
|
||||
if storya:
|
||||
## multi chapter story
|
||||
self._setURL(self.getURLDomain()+storya['href'])
|
||||
logger.debug("Normalizing to URL: "+self.url)
|
||||
# ## title's right there...
|
||||
# self.story.setMetadata('title',stripHTML(storya))
|
||||
data = self.get_request(self.url)
|
||||
soup = self.make_soup(data)
|
||||
chapterlinklist = soup.select('h5.mb-1 > a')
|
||||
# logger.debug(chapterlinklist)
|
||||
else:
|
||||
## single chapter story.
|
||||
# logger.debug("Single chapter story")
|
||||
pass
|
||||
|
||||
self.story.setMetadata('title',stripHTML(soup.select_one('h1')))
|
||||
|
||||
## authorid already set.
|
||||
## <h1 class="title" align="center">Just Off The Platform II by <a href="http://www.fictionalley.org/authors/drt/">DrT</a></h1>
|
||||
authora=soup.select_one('h1 + h3 > a')
|
||||
self.story.setMetadata('author',stripHTML(authora))
|
||||
self.story.setMetadata('authorUrl',self.getURLDomain()+authora['href'])
|
||||
|
||||
if chapterlinklist:
|
||||
# Find the chapters:
|
||||
for chapter in chapterlinklist:
|
||||
listitem = chapter.parent.parent.parent
|
||||
# logger.debug(listitem)
|
||||
# date
|
||||
date = stripHTML(listitem.select_one('small.text-nowrap'))
|
||||
chapterDate = makeDate(date,self.dateformat)
|
||||
wordshits = listitem.select('span.font-weight-normal')
|
||||
chap_data = {
|
||||
'date':chapterDate.strftime(self.getConfig("datechapter_format",self.getConfig("datePublished_format","%Y-%m-%d"))),
|
||||
'words':stripHTML(wordshits[0]),
|
||||
'hits':stripHTML(wordshits[1]),
|
||||
'summary':stripHTML(listitem.select_one('p.my-2')),
|
||||
}
|
||||
# logger.debug(chap_data)
|
||||
self.add_chapter(chapter,self.getURLDomain()+chapter['href'], chap_data)
|
||||
else:
|
||||
self.add_chapter(self.story.getMetadata('title'),self.url)
|
||||
|
||||
cardbody = soup.select_one('div.card-body')
|
||||
|
||||
searchs_to_meta = (
|
||||
# sitetype, ffftype, islist
|
||||
('Rating', 'rating', False),
|
||||
('House', 'house', True),
|
||||
('Character', 'characters', True),
|
||||
('Genre', 'genre', True),
|
||||
('Era', 'era', True),
|
||||
('Spoiler', 'spoilers', True),
|
||||
('Ship', 'ships', True),
|
||||
)
|
||||
for (sitetype,ffftype, islist) in searchs_to_meta:
|
||||
# logger.debug((sitetype,ffftype, islist))
|
||||
tags = cardbody.select('a[href^="/stories?Include.%s"]'%sitetype)
|
||||
# logger.debug(tags)
|
||||
if tags:
|
||||
if islist:
|
||||
self.story.extendList(ffftype, [ stripHTML(a) for a in tags ])
|
||||
else:
|
||||
self.story.setMetadata(ffftype, stripHTML(tags[0]))
|
||||
|
||||
|
||||
# Published: 09/26/2003 Updated: 04/13/2004 Words: 14,268 Chapters: 5 Hits: 743
|
||||
badgeinfos = cardbody.select('div.badge-info')
|
||||
# logger.debug(badgeinfos)
|
||||
for badge in badgeinfos:
|
||||
txt = stripHTML(badge)
|
||||
(key,val)=txt.split(':')
|
||||
# logger.debug((key,val))
|
||||
if key in ( 'Published', 'Updated'):
|
||||
date = makeDate(val,self.dateformat)
|
||||
self.story.setMetadata('date'+key,date)
|
||||
elif key in ('Hits'):
|
||||
self.story.setMetadata(key.lower(),val)
|
||||
elif key == 'Words':
|
||||
self.story.setMetadata('numWords',val)
|
||||
|
||||
summary = soup.find('dt',string='Story Summary:')
|
||||
if summary:
|
||||
summary = summary.find_next_sibling('dd')
|
||||
summary.name='div'
|
||||
self.setDescription(self.url,summary)
|
||||
|
||||
return
|
||||
|
||||
def getChapterText(self, url):
|
||||
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
data = self.get_request(url)
|
||||
soup = self.make_soup(data)
|
||||
|
||||
# this may be a brittle way to get the chapter text.
|
||||
# Site doesn't give a lot of hints.
|
||||
chaptext = soup.select_one('main#content div:not([class])')
|
||||
|
||||
# not sure how, but we can get html, etc tags still in some
|
||||
# stories. That breaks later updates because it confuses
|
||||
# epubutils.py
|
||||
# Yes, this still applies to fictionalley-archive.
|
||||
|
||||
for tag in chaptext.find_all('head') + chaptext.find_all('meta') + chaptext.find_all('script'):
|
||||
tag.extract()
|
||||
|
||||
for tag in chaptext.find_all('body') + chaptext.find_all('html'):
|
||||
tag.name = 'div'
|
||||
|
||||
if self.getConfig('include_author_notes'):
|
||||
row = chaptext.find_previous_sibling('div',class_='row')
|
||||
logger.debug(row)
|
||||
andt = row.find('dt',string="Author's Note:")
|
||||
logger.debug(andt)
|
||||
if andt:
|
||||
chaptext.insert(0,andt.parent.extract())
|
||||
# post notes aren't as structured(?)
|
||||
for div in chaptext.find_next_siblings('div',class_='row'):
|
||||
chaptext.append(div.extract())
|
||||
|
||||
# logger.debug(chaptext)
|
||||
return self.utf8FromSoup(url,chaptext)
|
||||
|
||||
def getClass():
|
||||
return FictionAlleyArchiveOrgSiteAdapter
|
||||
|
|
@ -1,239 +0,0 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2011 Fanficdownloader team, 2018 FanFicFare team
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
from __future__ import absolute_import
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import re
|
||||
import urllib
|
||||
from ..htmlcleanup import stripHTML
|
||||
from .. import exceptions as exceptions
|
||||
|
||||
# py2 vs py3 transition
|
||||
from ..six import text_type as unicode
|
||||
from ..six.moves.urllib.error import HTTPError
|
||||
|
||||
from .base_adapter import BaseSiteAdapter, makeDate
|
||||
|
||||
class FictionAlleyOrgSiteAdapter(BaseSiteAdapter):
|
||||
|
||||
def __init__(self, config, url):
|
||||
BaseSiteAdapter.__init__(self, config, url)
|
||||
self.story.setMetadata('siteabbrev','fa')
|
||||
self.is_adult=False
|
||||
|
||||
# get storyId from url--url validation guarantees query correct
|
||||
m = re.match(self.getSiteURLPattern(),url)
|
||||
if m:
|
||||
self.story.setMetadata('authorId',m.group('auth'))
|
||||
self.story.setMetadata('storyId',m.group('id'))
|
||||
|
||||
# normalized story URL.
|
||||
self._setURL(url)
|
||||
else:
|
||||
raise exceptions.InvalidStoryURL(url,
|
||||
self.getSiteDomain(),
|
||||
self.getSiteExampleURLs())
|
||||
|
||||
@staticmethod
|
||||
def getSiteDomain():
|
||||
return 'www.fictionalley.org'
|
||||
|
||||
@classmethod
|
||||
def getSiteExampleURLs(cls):
|
||||
return "http://"+cls.getSiteDomain()+"/authors/drt/DA.html http://"+cls.getSiteDomain()+"/authors/drt/JOTP01a.html"
|
||||
|
||||
def getSiteURLPattern(self):
|
||||
# http://www.fictionalley.org/authors/drt/DA.html
|
||||
# http://www.fictionalley.org/authors/drt/JOTP01a.html
|
||||
return re.escape("http://"+self.getSiteDomain())+"/authors/(?P<auth>[a-zA-Z0-9_]+)/(?P<id>[a-zA-Z0-9_]+)\.html"
|
||||
|
||||
def _postFetchWithIAmOld(self,url):
|
||||
if self.is_adult or self.getConfig("is_adult"):
|
||||
params={'iamold':'Yes',
|
||||
'action':'ageanswer'}
|
||||
logger.info("Attempting to get cookie for %s" % url)
|
||||
## posting on list doesn't work, but doesn't hurt, either.
|
||||
data = self._postUrl(url,params)
|
||||
else:
|
||||
data = self._fetchUrl(url)
|
||||
return data
|
||||
|
||||
def extractChapterUrlsAndMetadata(self):
|
||||
|
||||
## could be either chapter list page or one-shot text page.
|
||||
url = self.url
|
||||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._postFetchWithIAmOld(url)
|
||||
except HTTPError as e:
|
||||
if e.code == 404:
|
||||
raise exceptions.StoryDoesNotExist(self.url)
|
||||
else:
|
||||
raise e
|
||||
|
||||
# use BeautifulSoup HTML parser to make everything easier to find.
|
||||
soup = self.make_soup(data)
|
||||
|
||||
chapterdata = data
|
||||
# If chapter list page, get the first chapter to look for adult check
|
||||
chapterlinklist = soup.findAll('a',{'class':'chapterlink'})
|
||||
if chapterlinklist:
|
||||
chapterdata = self._postFetchWithIAmOld(chapterlinklist[0]['href'])
|
||||
|
||||
if "Are you over seventeen years old" in chapterdata:
|
||||
raise exceptions.AdultCheckRequired(self.url)
|
||||
|
||||
if not chapterlinklist:
|
||||
# no chapter list, chapter URL: change to list link.
|
||||
# second a tag inside div breadcrumbs
|
||||
storya = soup.find('div',{'class':'breadcrumbs'}).findAll('a')[1]
|
||||
self._setURL(storya['href'])
|
||||
url=self.url
|
||||
logger.debug("Normalizing to URL: "+url)
|
||||
## title's right there...
|
||||
self.story.setMetadata('title',stripHTML(storya))
|
||||
data = self._fetchUrl(url)
|
||||
soup = self.make_soup(data)
|
||||
chapterlinklist = soup.findAll('a',{'class':'chapterlink'})
|
||||
else:
|
||||
## still need title from somewhere. If chapterlinklist,
|
||||
## then chapterdata contains a chapter, find title the
|
||||
## same way.
|
||||
chapsoup = self.make_soup(chapterdata)
|
||||
storya = chapsoup.find('div',{'class':'breadcrumbs'}).findAll('a')[1]
|
||||
self.story.setMetadata('title',stripHTML(storya))
|
||||
del chapsoup
|
||||
|
||||
del chapterdata
|
||||
|
||||
## authorid already set.
|
||||
## <h1 class="title" align="center">Just Off The Platform II by <a href="http://www.fictionalley.org/authors/drt/">DrT</a></h1>
|
||||
authora=soup.find('h1',{'class':'title'}).find('a')
|
||||
self.story.setMetadata('author',authora.string)
|
||||
self.story.setMetadata('authorUrl',authora['href'])
|
||||
|
||||
if len(chapterlinklist) == 1:
|
||||
self.add_chapter(self.story.getMetadata('title'),chapterlinklist[0]['href'])
|
||||
else:
|
||||
# Find the chapters:
|
||||
for chapter in chapterlinklist:
|
||||
# just in case there's tags, like <i> in chapter titles.
|
||||
self.add_chapter(chapter,chapter['href'])
|
||||
|
||||
|
||||
## Go scrape the rest of the metadata from the author's page.
|
||||
data = self._fetchUrl(self.story.getMetadata('authorUrl'))
|
||||
soup = self.make_soup(data)
|
||||
|
||||
# <dl><dt><a class = "Rid story" href = "http://www.fictionalley.org/authors/aafro_man_ziegod/TMH.html">
|
||||
# [Rid] The Magical Hottiez</a> by <a class = "pen_name" href = "http://www.fictionalley.org/authors/aafro_man_ziegod/">Aafro Man Ziegod</a> </small></dt>
|
||||
# <dd><small class = "storyinfo"><a href = "http://www.fictionalley.org/ratings.html" target = "_new">Rating:</a> PG-13 - Spoilers: PS/SS, CoS, PoA, GoF, QTTA, FB - 4264 hits - 5060 words<br />
|
||||
# Genre: Humor, Romance - Main character(s): None - Ships: None - Era: Multiple Eras<br /></small>
|
||||
# Chaos ensues after Witch Weekly, seeking to increase readers, decides to create a boyband out of five seemingly talentless wizards: Harry Potter, Draco Malfoy, Ron Weasley, Neville Longbottom, and Oliver "Toss Your Knickers Here" Wood.<br />
|
||||
# <small class = "storyinfo">Published: June 3, 2002 (between Goblet of Fire and Order of Phoenix) - Updated: June 3, 2002</small>
|
||||
# </dd></dl>
|
||||
|
||||
storya = soup.find('a',{'href':self.story.getMetadata('storyUrl')})
|
||||
storydd = storya.findNext('dd')
|
||||
|
||||
# Rating: PG - Spoilers: None - 2525 hits - 736 words
|
||||
# Genre: Humor - Main character(s): H, R - Ships: None - Era: Multiple Eras
|
||||
# Harry and Ron are back at it again! They reeeeeeally don't want to be back, because they know what's awaiting them. "VH1 Goes Inside..." is back! Why? 'Cos there are soooo many more couples left to pick on.
|
||||
# Published: September 25, 2004 (between Order of Phoenix and Half-Blood Prince) - Updated: September 25, 2004
|
||||
|
||||
## change to text and regexp find.
|
||||
metastr = stripHTML(storydd).replace('\n',' ').replace('\t',' ')
|
||||
|
||||
m = re.match(r".*?Rating: (.+?) -.*?",metastr)
|
||||
if m:
|
||||
self.story.setMetadata('rating', m.group(1))
|
||||
|
||||
m = re.match(r".*?Genre: (.+?) -.*?",metastr)
|
||||
if m:
|
||||
for g in m.group(1).split(','):
|
||||
self.story.addToList('genre',g)
|
||||
|
||||
m = re.match(r".*?Published: ([a-zA-Z]+ \d\d?, \d\d\d\d).*?",metastr)
|
||||
if m:
|
||||
self.story.setMetadata('datePublished',makeDate(m.group(1), "%B %d, %Y"))
|
||||
|
||||
m = re.match(r".*?Updated: ([a-zA-Z]+ \d\d?, \d\d\d\d).*?",metastr)
|
||||
if m:
|
||||
self.story.setMetadata('dateUpdated',makeDate(m.group(1), "%B %d, %Y"))
|
||||
|
||||
m = re.match(r".*? (\d+) words Genre.*?",metastr)
|
||||
if m:
|
||||
self.story.setMetadata('numWords', m.group(1))
|
||||
|
||||
for small in storydd.findAll('small'):
|
||||
small.extract() ## removes the <small> tags, leaving only the summary.
|
||||
storydd.name = 'div' ## change tag name else Calibre treats it oddly.
|
||||
self.setDescription(url,storydd)
|
||||
#self.story.setMetadata('description',stripHTML(storydd))
|
||||
|
||||
return
|
||||
|
||||
def getChapterText(self, url):
|
||||
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
data = self._fetchUrl(url)
|
||||
# find <!-- headerend --> & <!-- footerstart --> and
|
||||
# replaced with matching div pair for easier parsing.
|
||||
# Yes, it's an evil kludge, but what can ya do? Using
|
||||
# something other than div prevents soup from pairing
|
||||
# our div with poor html inside the story text.
|
||||
crazy = "crazytagstringnobodywouldstumbleonaccidently"
|
||||
data = data.replace('<!-- headerend -->','<'+crazy+' id="storytext">').replace('<!-- footerstart -->','</'+crazy+'>')
|
||||
|
||||
# problems with some stories confusing Soup. This is a nasty
|
||||
# hack, but it works.
|
||||
data = data[data.index('<'+crazy+''):]
|
||||
# ditto with extra crap at the end.
|
||||
data = data[:data.index('</'+crazy+'>')+len('</'+crazy+'>')]
|
||||
|
||||
soup = self.make_soup(data)
|
||||
body = soup.findAll('body') ## some stories use a nested body and body
|
||||
## tag, in which case we don't
|
||||
## need crazytagstringnobodywouldstumbleonaccidently
|
||||
## and use the second one instead.
|
||||
if len(body)>1:
|
||||
text = body[1]
|
||||
text.name='div' # force to be a div to avoid multiple body tags.
|
||||
else:
|
||||
text = soup.find(crazy, {'id' : 'storytext'})
|
||||
text.name='div' # change to div tag.
|
||||
|
||||
if not data or not text:
|
||||
raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url)
|
||||
|
||||
# not sure how, but we can get html, etc tags still in some
|
||||
# stories. That breaks later updates because it confuses
|
||||
# epubutils.py
|
||||
for tag in text.findAll('head'):
|
||||
tag.extract()
|
||||
|
||||
for tag in text.findAll('body') + text.findAll('html'):
|
||||
tag.name = 'div'
|
||||
|
||||
return self.utf8FromSoup(url,text)
|
||||
|
||||
def getClass():
|
||||
return FictionAlleyOrgSiteAdapter
|
||||
|
||||
|
|
@ -1,6 +1,6 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2019 FanFicFare team
|
||||
# Copyright 2022 FanFicFare team
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
|
|
@ -23,11 +23,96 @@ from .. import exceptions as exceptions
|
|||
from ..htmlcleanup import stripHTML
|
||||
|
||||
# py2 vs py3 transition
|
||||
from ..six import text_type as unicode
|
||||
from ..six.moves.urllib.error import HTTPError
|
||||
|
||||
from .base_adapter import BaseSiteAdapter, makeDate
|
||||
|
||||
ampfandoms = ["A Falcone & Driscoll Investigation",
|
||||
"Alias Smith & Jones",
|
||||
"Atelier Escha & Logy",
|
||||
"Austin & Ally",
|
||||
"Baby & Me/赤ちゃんと僕",
|
||||
"Barney & Friends",
|
||||
"Between Love & Goodbye",
|
||||
"Beyond Good & Evil",
|
||||
"Bill & Ted's Excellent Adventure/Bogus Journey",
|
||||
"BLACK & WHITE",
|
||||
"Bonnie & Clyde",
|
||||
"Brandy & Mr. Whiskers",
|
||||
"Brothers & Sisters",
|
||||
"Bucket & Skinner's Epic Adventures",
|
||||
"Calvin & Hobbes",
|
||||
"Cats & Dogs",
|
||||
"Command & Conquer",
|
||||
"Devil & Devil",
|
||||
"Dharma & Greg",
|
||||
"Dicky & Dawn",
|
||||
"Drake & Josh",
|
||||
"Edgar & Ellen",
|
||||
"Franklin & Bash",
|
||||
"Gabby Duran & The Unsittables",
|
||||
"Girls und Panzer/ガールズ&パンツァー",
|
||||
"Gnomeo & Juliet",
|
||||
"Grim Adventures of Billy & Mandy",
|
||||
"Half & Half/ハーフ・アンド・ハーフ",
|
||||
"Hansel & Gretel",
|
||||
"Hatfields & McCoys",
|
||||
"High & Low - The Story of S.W.O.R.D.",
|
||||
"Home & Away",
|
||||
"Hudson & Rex",
|
||||
"Huntik: Secrets & Seekers",
|
||||
"Imagine Me & You",
|
||||
"Jekyll & Hyde",
|
||||
"Jonathan Strange & Mr. Norrell",
|
||||
"Knight's & Magic/ナイツ&マジック",
|
||||
"Law & Order: Los Angeles",
|
||||
"Law & Order: Organized Crime",
|
||||
"Lilo & Stitch",
|
||||
"Locke & Key",
|
||||
"Lockwood & Co.",
|
||||
"Lost & Found Music Studios",
|
||||
"Lu & Og",
|
||||
"Me & My Brothers",
|
||||
"Melissa & Joey",
|
||||
"Mickey Mouse & Friends",
|
||||
"Mike & Molly",
|
||||
"Mike, Lu & Og",
|
||||
"Miraculous: Tales of Ladybug & Cat Noir",
|
||||
"Mork & Mindy",
|
||||
"Mount&Blade",
|
||||
"Mr. & Mrs. Smith",
|
||||
"Mr. Peabody & Sherman",
|
||||
"Muhyo & Roji",
|
||||
"Nicky, Ricky, Dicky & Dawn",
|
||||
"Oliver & Company",
|
||||
"Ozzy & Drix",
|
||||
"Panty & Stocking with Garterbelt/パンティ&ストッキングwithガーターベルト",
|
||||
"Penryn & the End of Days",
|
||||
"Prep & Landing",
|
||||
"Prince & Hero/王子とヒーロー",
|
||||
"Prince & Me",
|
||||
"Puzzle & Dragons",
|
||||
"Ren & Stimpy Show",
|
||||
"Rizzoli & Isles",
|
||||
"Romeo & Juliet",
|
||||
"Rosemary & Thyme",
|
||||
"Sam & Cat",
|
||||
"Sam & Max",
|
||||
"Sapphire & Steel",
|
||||
"Scott & Bailey",
|
||||
"Shakespeare & Hathaway: Private Investigators",
|
||||
"Soul Nomad & the World Eaters",
|
||||
"Superman & Lois",
|
||||
"Tiger & Bunny/タイガー&バニー",
|
||||
"Trains & Automobiles",
|
||||
"Upin & Ipin",
|
||||
"Wallace & Gromit",
|
||||
"Witch & Wizard",
|
||||
"Wolverine & the X-Men",
|
||||
"Yotsuba&!/よつばと!",
|
||||
"Young & Hungry",
|
||||
]
|
||||
|
||||
|
||||
class FictionHuntComSiteAdapter(BaseSiteAdapter):
|
||||
|
||||
def __init__(self, config, url):
|
||||
|
|
@ -59,7 +144,7 @@ class FictionHuntComSiteAdapter(BaseSiteAdapter):
|
|||
|
||||
# The date format will vary from site to site.
|
||||
# http://docs.python.org/library/datetime.html#strftime-strptime-behavior
|
||||
self.dateformat = "%d %b %Y"
|
||||
self.dateformat = "%Y-%m-%d %H:%M:%S"
|
||||
|
||||
@staticmethod
|
||||
def getSiteDomain():
|
||||
|
|
@ -75,12 +160,47 @@ class FictionHuntComSiteAdapter(BaseSiteAdapter):
|
|||
## http://fictionhunt.com/read/12411643/1
|
||||
return r"https?://(www.)?fictionhunt.com/(?P<type>read|stories)/(?P<id>[0-9a-z]+)(/(?P<title>[^/]+))?(/|/[^/]+)*/?$"
|
||||
|
||||
def use_pagecache(self):
|
||||
'''
|
||||
adapters that will work with the page cache need to implement
|
||||
this and change it to True.
|
||||
'''
|
||||
return True
|
||||
def needToLoginCheck(self, data):
|
||||
## FH is apparently reporting "Story has been removed" for all
|
||||
## chapters when not logged in now.
|
||||
if 'https://fictionhunt.com/login' in data:
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
|
||||
def performLogin(self, url):
|
||||
params = {}
|
||||
|
||||
if self.password:
|
||||
params['identifier'] = self.username
|
||||
params['password'] = self.password
|
||||
else:
|
||||
params['identifier'] = self.getConfig("username")
|
||||
params['password'] = self.getConfig("password")
|
||||
params['remember'] = 'on'
|
||||
|
||||
loginUrl = 'https://' + self.getSiteDomain() + '/login'
|
||||
|
||||
if not params['identifier']:
|
||||
logger.info("This site requires login.")
|
||||
raise exceptions.FailedToLogin(url,params['identifier'])
|
||||
|
||||
## need to pull empty login page first to get authenticity_token
|
||||
logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
|
||||
params['identifier']))
|
||||
soup = self.make_soup(self.get_request(loginUrl,usecache=False))
|
||||
params['_token']=soup.find('input', {'name':'_token'})['value']
|
||||
|
||||
d = self.post_request(loginUrl, params, usecache=False)
|
||||
# logger.debug(d)
|
||||
|
||||
if self.needToLoginCheck(d):
|
||||
logger.info("Failed to login to URL %s as %s" % (loginUrl,
|
||||
params['identifier']))
|
||||
raise exceptions.FailedToLogin(url,params['identifier'])
|
||||
return False
|
||||
else:
|
||||
return True
|
||||
|
||||
def doExtractChapterUrlsAndMetadata(self,get_cover=True):
|
||||
|
||||
|
|
@ -88,35 +208,34 @@ class FictionHuntComSiteAdapter(BaseSiteAdapter):
|
|||
# metadata and chapter list
|
||||
|
||||
url = self.url
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
data = self.get_request(url)
|
||||
|
||||
## As per #784, site isn't requiring login anymore.
|
||||
## Login check commented since we've seen it toggle before.
|
||||
# if self.needToLoginCheck(data):
|
||||
# self.performLogin(url)
|
||||
# data = self.get_request(url,usecache=False)
|
||||
|
||||
soup = self.make_soup(data)
|
||||
## detect old storyUrl, switch to new storyUrl:
|
||||
canonlink = soup.find('link',rel='canonical')
|
||||
if canonlink:
|
||||
# logger.debug(canonlink)
|
||||
canonlink = re.sub(r"/chapters/\d+","",canonlink['href'])
|
||||
# logger.debug(canonlink)
|
||||
self._setURL(canonlink)
|
||||
url = self.url
|
||||
data = self.get_request(url)
|
||||
soup = self.make_soup(data)
|
||||
else:
|
||||
# in case title changed
|
||||
self._setURL(soup.select_one("div.Story__details a")['href'])
|
||||
url = self.url
|
||||
|
||||
## detect old storyUrl, switch to new storyUrl:
|
||||
canonlink = soup.find('link',rel='canonical')
|
||||
if canonlink:
|
||||
# logger.debug(canonlink)
|
||||
canonlink = re.sub(r"/chapters/\d+","",canonlink['href'])
|
||||
# logger.debug(canonlink)
|
||||
self._setURL(canonlink)
|
||||
url = self.url
|
||||
data = self._fetchUrl(url)
|
||||
soup = self.make_soup(data)
|
||||
else:
|
||||
# in case title changed
|
||||
self._setURL(soup.select_one("div.Story__details a")['href'])
|
||||
url = self.url
|
||||
|
||||
|
||||
except HTTPError as e:
|
||||
if e.code == 404:
|
||||
raise exceptions.StoryDoesNotExist(self.url)
|
||||
else:
|
||||
raise e
|
||||
|
||||
# logger.debug(data)
|
||||
self.story.setMetadata('title',stripHTML(soup.find('h1',{'class':'Story__title'})))
|
||||
|
||||
summhead = soup.find('h5',text='Summary')
|
||||
summhead = soup.find('h5',string='Summary')
|
||||
self.setDescription(url,summhead.find_next('div'))
|
||||
|
||||
## author:
|
||||
|
|
@ -125,40 +244,43 @@ class FictionHuntComSiteAdapter(BaseSiteAdapter):
|
|||
self.story.setMetadata('authorUrl',autha['href'])
|
||||
self.story.setMetadata('author',autha.string)
|
||||
|
||||
updlab = soup.find('label',string='Last Updated:')
|
||||
if updlab:
|
||||
update = updlab.find_next('time')['datetime']
|
||||
self.story.setMetadata('dateUpdated', makeDate(update, self.dateformat))
|
||||
|
||||
publab = soup.find('label',string='Published:')
|
||||
if publab:
|
||||
pubdate = publab.find_next('time')['datetime']
|
||||
self.story.setMetadata('datePublished', makeDate(pubdate, self.dateformat))
|
||||
|
||||
## need author page for some metadata.
|
||||
authsoup = None
|
||||
authpagea = autha
|
||||
authstorya = None
|
||||
|
||||
## Rating and exact word count doesn't appear on the summary
|
||||
## page, try to get from author page.
|
||||
|
||||
## find story url, might need to spin through author's pages.
|
||||
while authpagea and not authstorya:
|
||||
# logger.debug(authpagea)
|
||||
authsoup = self.make_soup(self._fetchUrl(authpagea['href']))
|
||||
authpagea = authsoup.find('a',{'class':'page-link','rel':'next'})
|
||||
authstorya = authsoup.select('h4.Story__item-title a[href=%s]'%self.url)
|
||||
authsoup = self.make_soup(self.get_request(authpagea['href']))
|
||||
authpagea = authsoup.find('a',{'rel':'next'})
|
||||
# CSS selectors don't allow : or / unquoted, which
|
||||
# BS4(and dependencies) didn't used to enforce.
|
||||
authstorya = authsoup.select_one('h4.Story__item-title a[href="%s"]'%self.url)
|
||||
|
||||
if not authstorya:
|
||||
raise exceptions.FailedToDownload("Error finding %s on author page(s)" % self.url)
|
||||
|
||||
meta = authstorya[0].parent.parent.select("div.Story__meta-info")[0]
|
||||
## remove delimiters
|
||||
for span in authstorya[0].parent.parent.select("div.Story__meta-info span.delimiter"):
|
||||
span.extract()
|
||||
meta.find('span').extract() # discard author link
|
||||
|
||||
update = stripHTML(meta.find('span').extract()).split(':')[1].strip()
|
||||
self.story.setMetadata('dateUpdated', makeDate(update, self.dateformat))
|
||||
|
||||
pubdate = stripHTML(meta.find('span').extract()).split(':')[1].strip()
|
||||
self.story.setMetadata('datePublished', makeDate(pubdate, self.dateformat))
|
||||
|
||||
meta = authstorya.find_parent('li').find('div',class_='Story__meta-info')
|
||||
meta=meta.text.split()
|
||||
self.story.setMetadata('numWords',meta[meta.index('words')-1])
|
||||
self.story.setMetadata('rating',meta[meta.index('Rating:')+1])
|
||||
# logger.debug(meta)
|
||||
|
||||
# Find original ffnet URL
|
||||
a = soup.find('a', text="Source")
|
||||
a = soup.find('a', string="Source")
|
||||
self.story.setMetadata('origin',stripHTML(a))
|
||||
self.story.setMetadata('originUrl',a['href'])
|
||||
|
||||
|
|
@ -177,15 +299,37 @@ class FictionHuntComSiteAdapter(BaseSiteAdapter):
|
|||
for a in soup.select('a[href*="pairings="]'):
|
||||
self.story.addToList('ships',stripHTML(a).replace("+","/"))
|
||||
|
||||
for chapa in soup.select('ul.StoryContents__chapters a'):
|
||||
self.add_chapter(stripHTML(chapa.find('span',{'class':'chapter-title'})),chapa['href'])
|
||||
for a in soup.select('div.Story__type a[href*="fandoms="]'):
|
||||
# logger.debug(a)
|
||||
fandomstr=stripHTML(a).replace(' Fanfiction','').strip()
|
||||
# logger.debug("'%s'"%fandomstr)
|
||||
## haven't thought of a better way to detect and *not*
|
||||
## split on fandoms with a '&' in them.
|
||||
for ampfandom in ampfandoms:
|
||||
if ampfandom in fandomstr:
|
||||
self.story.addToList('category',ampfandom)
|
||||
fandomstr = fandomstr.replace(ampfandom,'')
|
||||
for fandom in fandomstr.split('&'):
|
||||
if fandom:
|
||||
self.story.addToList('category',fandom)
|
||||
|
||||
## Currently no 'Original' stories on the site, but does list
|
||||
## it as a search type. Set extratags: and uncomment this if
|
||||
## and when.
|
||||
# if self.story.getList('category'):
|
||||
# self.story.addToList('category', 'FanFiction')
|
||||
# else:
|
||||
# self.story.addToList('category', 'Original')
|
||||
|
||||
for chapli in soup.select('ul.StoryContents__chapters li'):
|
||||
self.add_chapter(stripHTML(chapli.select_one('span.chapter-title')),chapli.select_one('a')['href'])
|
||||
|
||||
if self.num_chapters() == 0:
|
||||
raise exceptions.FailedToDownload("Story at %s has no chapters." % self.url)
|
||||
|
||||
def getChapterText(self, url):
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
data = self._fetchUrl(url)
|
||||
data = self.get_request(url)
|
||||
|
||||
soup = self.make_soup(data)
|
||||
|
||||
|
|
|
|||
594
fanficfare/adapters/adapter_fictionlive.py
Normal file
594
fanficfare/adapters/adapter_fictionlive.py
Normal file
|
|
@ -0,0 +1,594 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2011 Fanficdownloader team, 2020 FanFicFare team
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
#### Hazel's fiction.live fanficfare adapter
|
||||
# what an *adventure* this was. fiction.live is an angular web3.0 app that does async background stuff everywhere.
|
||||
# they're not kidding about it being live.
|
||||
# can I wrangle it's stories into books for offline reading? yes I 98% can!
|
||||
|
||||
### won't support, because they aren't part of the text
|
||||
# chat, threads, chat replies on vote options
|
||||
|
||||
### can't support because wtf this is a book
|
||||
# music / audio embeds
|
||||
# per-user achivement tracking with fancy achievement-get animations
|
||||
# story scripting (shows script tags visible in the text, not computed values or input fields)
|
||||
|
||||
import re
|
||||
import json
|
||||
from datetime import datetime
|
||||
|
||||
import itertools
|
||||
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# __package__ = 'fanficfare.adapters' # fixes dev issues with unknown package base
|
||||
|
||||
from .base_adapter import BaseSiteAdapter
|
||||
from ..htmlcleanup import stripHTML
|
||||
from .. import exceptions as exceptions
|
||||
from ..six import ensure_text
|
||||
|
||||
def getClass():
|
||||
return FictionLiveAdapter
|
||||
|
||||
class FictionLiveAdapter(BaseSiteAdapter):
|
||||
|
||||
def __init__(self, config, url):
|
||||
BaseSiteAdapter.__init__(self, config, url)
|
||||
|
||||
self.story.setMetadata('siteabbrev','flive')
|
||||
self.story_id = self.parsedUrl.path.split('/')[3]
|
||||
self.story.setMetadata('storyId', self.story_id)
|
||||
|
||||
self.chapter_id_to_api = {}
|
||||
|
||||
# normalize URL. omits title in the url
|
||||
self._setURL("https://fiction.live/stories//{s_id}".format(s_id = self.story_id));
|
||||
|
||||
@staticmethod
|
||||
def getSiteDomain():
|
||||
return "fiction.live"
|
||||
|
||||
@classmethod
|
||||
def getAcceptDomains(cls):
|
||||
return ["fiction.live", "beta.fiction.live"] # I still remember anonkun, but the domain has now lapsed
|
||||
|
||||
def getSiteURLPattern(self):
|
||||
# I'd like to thank regex101.com for helping me screw this up less
|
||||
return r"https?://(beta\.)?fiction\.live/[^/]*/[^/]*/([a-zA-Z0-9\-]+)(/(home)?)?$"
|
||||
|
||||
@classmethod
|
||||
def getSiteExampleURLs(cls):
|
||||
return ("https://fiction.live/stories/Example-Story-Title/17CharacterIDhere/home "
|
||||
+"https://fiction.live/stories/Example-Story-With-Long-ID/-20CharacterIDisHere "
|
||||
+"https://fiction.live/Sci-fi/Example-Story-With-URL-Genre/17CharacterIDhere/ "
|
||||
+"https://fiction.live/stories/Example-Story-With-UUID/00000000-0000-4000-0000-000000000000/")
|
||||
|
||||
@classmethod
|
||||
def get_section_url(cls,url):
|
||||
## minimal URL used for section names in INI and reject list
|
||||
## for comparison
|
||||
# logger.debug("pre--url:%s"%url)
|
||||
url = re.sub(r"https?://(beta\.)?fiction\.live/[^/]*/[^/]*/(?P<id>[a-zA-Z0-9\-]+)(/(home)?)?$",r'https://fiction.live/stories//\g<id>',url)
|
||||
# logger.debug("post-url:%s"%url)
|
||||
return url
|
||||
|
||||
def parse_timestamp(self, timestamp):
|
||||
# fiction.live date format is unix-epoch milliseconds. not a good fit for fanficfare's makeDate.
|
||||
# doesn't use a timezone object and returns tz-naive datetimes. I *think* I can leave the rest to fanficfare
|
||||
return datetime.fromtimestamp(timestamp / 1000.0, None)
|
||||
|
||||
def img_url_trans(self,imgurl):
|
||||
"Apparently site changed cdn URLs for images more than once."
|
||||
# logger.debug("pre--imgurl:%s"%imgurl)
|
||||
imgurl = re.sub(r'(\w+)\.cloudfront\.net',r'cdn6.fiction.live/file/fictionlive',imgurl)
|
||||
imgurl = re.sub(r'www\.filepicker\.io/api/file/(\w+)',r'cdn4.fiction.live/fp/\1',imgurl)
|
||||
imgurl = re.sub(r'cdn[34].fiction.live/(.+)',r'cdn6.fiction.live/file/fictionlive/\1',imgurl)
|
||||
# logger.debug("post-imgurl:%s"%imgurl)
|
||||
return imgurl
|
||||
|
||||
def doExtractChapterUrlsAndMetadata(self, get_cover=True):
|
||||
|
||||
metadata_url = "https://fiction.live/api/node/{s_id}/"
|
||||
response = self.get_request(metadata_url.format(s_id = self.story_id))
|
||||
|
||||
if not response: # this is how fiction.live responds to nonsense urls -- HTTP200 with empty response
|
||||
raise exceptions.StoryDoesNotExist("Empty response for " + self.url)
|
||||
|
||||
data = json.loads(response)
|
||||
|
||||
## get metadata for multi route chapters
|
||||
if 'multiRoute' in data and data['multiRoute'] == True:
|
||||
route_metadata_url = "https://fiction.live/api/anonkun/routes/{s_id}/"
|
||||
response = self.get_request(route_metadata_url.format(s_id = self.story_id))
|
||||
|
||||
if not response: # this is how fiction.live responds to nonsense urls -- HTTP200 with empty response
|
||||
raise exceptions.StoryDoesNotExist("Empty response for " + self.url)
|
||||
|
||||
data["route_metadata"] = json.loads(response)
|
||||
|
||||
self.extract_metadata(data, get_cover)
|
||||
self.add_chapters(data)
|
||||
|
||||
def extract_metadata(self, data, get_cover):
|
||||
# on one hand, we've got nicely-formatted JSON and can just index into the thing we want, no parsing needed.
|
||||
# on the other, nearly *everything* in this api is optional. found that out the hard way.
|
||||
|
||||
# not optional
|
||||
self.story.setMetadata('title', stripHTML(data['t']))
|
||||
|
||||
# stories have ut, rt, ct, and cht. fairly sure that ut = update time and rt = release time.
|
||||
# ct is 'creation time' and everything in the api has it -- you can create stories and edit before publishing
|
||||
# cht is *chunktime* -- newest story chunk added.
|
||||
# ut for update time includes other kinds of update -- threads, chat etc
|
||||
# ct <= rt <= cht <= ut
|
||||
self.story.setMetadata("dateUpdated", self.parse_timestamp(data['cht']))
|
||||
self.story.setMetadata("datePublished", self.parse_timestamp(data['rt']))
|
||||
|
||||
self.most_recent_chunk = data['cht'] if 'cht' in data else 9999999999999998
|
||||
|
||||
# nearly everything optional from here out
|
||||
|
||||
if 'storyStatus' in data:
|
||||
status_translate = {'active': "In-Progress", 'finished': "Completed"} # fiction.live to fanficfare
|
||||
status = data['storyStatus']
|
||||
self.story.setMetadata('status', status_translate.get(status, status.title()))
|
||||
elif 'complete' in data:
|
||||
if data['complete'] == True:
|
||||
self.story.setMetadata('status', "Completed")
|
||||
else:
|
||||
self.story.setMetadata('status', "In-Progress")
|
||||
else:
|
||||
self.story.setMetadata('status', "In-Progress")
|
||||
|
||||
if 'contentRating' in data:
|
||||
self.story.setMetadata('rating', data['contentRating'])
|
||||
elif 'tAge' in data:
|
||||
self.story.setMetadata('rating', data['tAge'])
|
||||
else:
|
||||
self.story.setMetadata('rating', "teen")
|
||||
|
||||
if 'w' in data: self.story.setMetadata('numWords', data['w'])
|
||||
if 'likeCount' in data: self.story.setMetadata('likes', data['likeCount'])
|
||||
if 'rInput' in data: self.story.setMetadata('reader_input', data['rInput'].title())
|
||||
|
||||
summary = stripHTML(data['d']) if 'd' in data else ""
|
||||
firstblock = data['b'].strip() if 'b' in data else ""
|
||||
self.setDescription(self.url, summary if not firstblock else summary + "\n<br />\n" + firstblock)
|
||||
|
||||
tags = data['ta'] if 'ta' in data else []
|
||||
|
||||
if (self.story.getMetadataRaw('rating') in {"nsfw", "adult"} or 'smut' in tags) and \
|
||||
not (self.is_adult or self.getConfig("is_adult")):
|
||||
raise exceptions.AdultCheckRequired(self.url)
|
||||
|
||||
show_spoiler_tags = self.getConfig('show_spoiler_tags')
|
||||
spoiler_tags = data['spoilerTags'] if 'spoilerTags' in data else []
|
||||
for tag in tags:
|
||||
if show_spoiler_tags or not tag in spoiler_tags:
|
||||
self.story.addToList('tags', tag)
|
||||
|
||||
authors = data['u'] # non-optional
|
||||
if len(authors) > 1:
|
||||
for author in data['u']:
|
||||
if '_id' in author and author['n']: # some stories have spurious co-authors (may have been fixed?)
|
||||
self.story.addToList('author', author['n'])
|
||||
self.story.addToList('authorUrl', "https://fiction.live/user/" + author['n'] + "/")
|
||||
self.story.addToList('authorId', author['_id'])
|
||||
else: # TODO: can avoid this?
|
||||
author = authors[0]
|
||||
self.story.setMetadata('author', author['n'])
|
||||
self.story.setMetadata('authorUrl', "https://fiction.live/user/" + author['n'] + "/")
|
||||
self.story.setMetadata('authorId', author['_id'])
|
||||
|
||||
if 'isLive' in data and data['isLive']:
|
||||
self.story.setMetadata('live', "Now! (at time of download)")
|
||||
elif 'nextLive' in data and data['nextLive']:
|
||||
# formatted to match site, not other fanficfare timestamps
|
||||
next_live_time = self.parse_timestamp(data['nextLive'])
|
||||
self.story.setMetadata('live', next_live_time)
|
||||
|
||||
show_nsfw_cover_images = self.getConfig('show_nsfw_cover_images')
|
||||
nsfw_cover = data['nsfwCover'] if 'nsfwCover' in data else False
|
||||
if get_cover and 'i' in data:
|
||||
if show_nsfw_cover_images or not nsfw_cover:
|
||||
coverUrl = data['i'][0]
|
||||
self.setCoverImage(self.url, coverUrl)
|
||||
|
||||
# gonna need these later for adding details to achievement-granting links in the text
|
||||
try:
|
||||
self.achievements = data['achievements']['achievements']
|
||||
except KeyError:
|
||||
self.achievements = []
|
||||
|
||||
def add_chapters(self, data):
|
||||
|
||||
## chapter urls are for the api. they return json and aren't user-navigatable, or the same as on the website
|
||||
chunkrange_url = "https://fiction.live/api/anonkun/chapters/{s_id}/{start}/{end}/"
|
||||
|
||||
## api url to get content of a multi route chapter. requires only the route id and no timestamps
|
||||
route_chunkrange_url = "https://fiction.live/api/anonkun/route/{c_id}/chapters"
|
||||
|
||||
def add_chapter_url(title, bounds):
|
||||
"Adds a chapter url based on the start/end chunk-range timestamps."
|
||||
start, end = bounds
|
||||
end -= 1
|
||||
chapter_url = chunkrange_url.format(s_id = data['_id'], start = start, end = end)
|
||||
self.add_chapter(title, chapter_url)
|
||||
|
||||
def add_route_chapter_url(title, route_id):
|
||||
"Adds a route chapter url based on the route id."
|
||||
chapter_url = route_chunkrange_url.format(c_id = route_id)
|
||||
self.add_chapter(title, chapter_url)
|
||||
|
||||
def pair(iterable):
|
||||
"[1,2,3,4] -> [(1, 2), (2, 3), (3, 4)]"
|
||||
a, b = itertools.tee(iterable, 2)
|
||||
next(b, None)
|
||||
return list(zip(a, b))
|
||||
|
||||
def map_chap_ids_to_api(chapter_ids, route_ids, times):
|
||||
for index, bounds in enumerate(times):
|
||||
start, end = bounds
|
||||
end -= 1
|
||||
chapter_url = chunkrange_url.format(s_id = data['_id'], start = start, end = end)
|
||||
self.chapter_id_to_api[chapter_ids[index]] = chapter_url
|
||||
|
||||
for route_id in route_ids:
|
||||
chapter_url = route_chunkrange_url.format(c_id = route_id)
|
||||
self.chapter_id_to_api[route_id] = chapter_url
|
||||
|
||||
## first thing to do is seperate out the appendices
|
||||
appendices, maintext, routes = [], [], []
|
||||
chapters = data['bm'] if 'bm' in data else []
|
||||
|
||||
## not all stories use multiple routes. Those that do have a route id and a title for each route
|
||||
if 'route_metadata' in data and data['route_metadata']:
|
||||
for r in data['route_metadata']:
|
||||
# checking if route title even exists or is None, since most things in the api are optional
|
||||
if 't' in r and r['t'] is not None:
|
||||
title = r['t']
|
||||
else:
|
||||
title = ""
|
||||
routes.append({"id": r['_id'], "title": title})
|
||||
|
||||
for c in chapters:
|
||||
appendices.append(c) if c['title'].startswith('#special') else maintext.append(c)
|
||||
|
||||
## main-text chapter extraction processing. *should* now handle all the edge cases.
|
||||
## relies on fanficfare ignoring empty chapters!
|
||||
|
||||
titles = ["Home"] + [c['title'] for c in maintext]
|
||||
chapter_ids = ['home'] + [c['id'] for c in maintext]
|
||||
times = [data['ct']] + [c['ct'] for c in maintext] + [self.most_recent_chunk + 2] # need to be 1 over, and add_url etc does -1
|
||||
times = pair(times)
|
||||
|
||||
if self.getConfig('include_appendices', True): # Add appendices after main text if desired
|
||||
titles = titles + ["Appendix: " + a['title'][9:] for a in appendices]
|
||||
chapter_ids = chapter_ids + [a['id'] for a in appendices]
|
||||
times = times + [(a['ct'], a['ct'] + 2) for a in appendices]
|
||||
|
||||
route_ids = [r['id'] for r in routes]
|
||||
|
||||
map_chap_ids_to_api(chapter_ids, route_ids, times) # Map chapter ids to API URLs for use when comparing the two
|
||||
|
||||
# doesn't actually run without the call to list.
|
||||
list(map(add_chapter_url, titles, times))
|
||||
|
||||
for r in routes: # add route at the end, after appendices
|
||||
route_id = r['id'] # to get route chapter content, the route id is needed, not the timestamp
|
||||
chapter_title = "Route: " + r['title'] # 'Route: ' at beginning of name, since it's a multiroute chapter
|
||||
add_route_chapter_url(chapter_title, route_id)
|
||||
|
||||
def getChapterText(self, url):
|
||||
|
||||
chunk_handler = {
|
||||
"choice" : self.format_choice,
|
||||
"readerPost" : self.format_readerposts,
|
||||
"chapter" : self.format_chapter
|
||||
}
|
||||
|
||||
response = self.get_request(url)
|
||||
data = json.loads(response)
|
||||
|
||||
if data == []:
|
||||
return ""
|
||||
# and *now* we can assume there's at least one chunk in the data -- chapters can be totally empty.
|
||||
|
||||
# are we trying to read an appendix? check the first chunk to find out.
|
||||
getting_appendix = len(data) == 1 and 't' in data[0] and data[0]['t'].startswith("#special")
|
||||
|
||||
text = ""
|
||||
|
||||
for count, chunk in enumerate(data):
|
||||
|
||||
# logger.debug(count) # pollutes the debug log, shows which chunk crashed the handler
|
||||
|
||||
text += "<div>" # chapter chunks aren't always well-delimited in their contents
|
||||
|
||||
# appendix chunks are mixed in with other things
|
||||
if not getting_appendix and 't' in chunk and chunk['t'].startswith("#special"): # t = title = bookmark
|
||||
continue
|
||||
|
||||
handler = chunk_handler.get(chunk['nt'], self.format_unknown) # nt = node type
|
||||
text += handler(chunk)
|
||||
|
||||
show_timestamps = self.getConfig('show_timestamps')
|
||||
if show_timestamps and 'ct' in chunk:
|
||||
#logger.debug("Adding timestamp for chunk...")
|
||||
timestamp = ensure_text(self.parse_timestamp(chunk['ct']).strftime("%x -- %X"))
|
||||
text += '<div class="ut">' + timestamp + '</div>'
|
||||
|
||||
text += "</div><br />\n"
|
||||
|
||||
## soup to repair the most egregious HTML errors.
|
||||
return self.utf8FromSoup(url,self.make_soup(text))
|
||||
|
||||
### everything from here out is chunk data handling.
|
||||
|
||||
def format_chapter(self, chunk):
|
||||
"""Handles any formatting in the chapter body text for text chapters.
|
||||
In the 'default case' where we're getting boring chapter-chunk body text, just calls utf8fromSoup
|
||||
and returns the text as is on the website."""
|
||||
|
||||
soup = self.make_soup(chunk['b'] if 'b' in chunk else "")
|
||||
|
||||
if self.getConfig('legend_spoilers',True):
|
||||
soup = self.add_spoiler_legends(soup)
|
||||
|
||||
if self.achievements:
|
||||
soup = self.append_achievments(soup)
|
||||
|
||||
return str(soup)
|
||||
|
||||
def add_spoiler_legends(self, soup):
|
||||
# find spoiler links and change link-anchor block to legend block
|
||||
spoilers = soup.find_all('a', class_="tydai-spoiler")
|
||||
for link_tag in spoilers:
|
||||
link_tag.name = 'fieldset'
|
||||
legend = soup.new_tag('legend')
|
||||
legend.string = "Spoiler"
|
||||
link_tag.insert(0, legend)
|
||||
return soup
|
||||
|
||||
def fictionlive_normalize(self, string):
|
||||
# might be able to use this to preserve titles in normalized urls, if the scheme is the same
|
||||
|
||||
# BUG: in achivement ids these are all replaced, but I *don't* know that the list is complete.
|
||||
# should be rare, thankfully. *most* authors don't use any funny characters in the achievment's *ID*
|
||||
special_chars = "\"\\,.!?+=/[](){}<>_'@#$%^&*~`;:|" # not the hyphen, which is used to represent spaces
|
||||
|
||||
return string.lower().replace(" ", "-").translate({ord(x) : None for x in special_chars})
|
||||
|
||||
def append_achievments(self, soup):
|
||||
# achivements are present in the text as a kind of link, and you get the shiny popup by clicking them.
|
||||
achievement_links = soup.find_all('a', class_="tydai-achievement")
|
||||
|
||||
achieved_ids = []
|
||||
for link_tag in achievement_links:
|
||||
# these are not only prepended by a unicode lightning-bolt, but also format clearly as a link
|
||||
# should use .u css selector -- part of output_css defaults? or just let replace_tags_with_spans do it?
|
||||
new_u = soup.new_tag('u')
|
||||
new_u.string = link_tag.text # copy out the link text into a new element
|
||||
# html entities for improved compatability with AZW3 conversion
|
||||
link_tag.string = "⚡" # then overwrite
|
||||
link_tag.insert(1, new_u)
|
||||
|
||||
## while we've got the achievment links, get the ids from the link
|
||||
a_id = link_tag['data-id']
|
||||
a_id = self.fictionlive_normalize(a_id)
|
||||
|
||||
achieved_ids.append(a_id)
|
||||
|
||||
if achieved_ids:
|
||||
logger.debug("achievements (this chunk): " + ", ".join(achieved_ids))
|
||||
|
||||
# can't replicate the animated shiny announcement popup, so have an end-of-chunk announcement instead
|
||||
# TODO: achievement images -- does anyone use them?
|
||||
a_source = "<br />\n<fieldset><legend>⚡ Achievement obtained!</legend>\n<h4>{}</h4>\n{}</fieldset>\n"
|
||||
|
||||
for a_id in achieved_ids:
|
||||
if a_id in self.achievements:
|
||||
a_title = self.achievements[a_id]['t'] if 't' in self.achievements[a_id] else a_id.title()
|
||||
a_text = self.achievements[a_id]['d'] if 'd' in self.achievements[a_id] else ""
|
||||
soup.append(self.make_soup(a_source.format(a_title, a_text)))
|
||||
else:
|
||||
a_title = a_id.title()
|
||||
error = "<br />\n<fieldset><legend>Error: Achievement not found.</legend>Couldn't find '{}'. Ask the story author to check if the achievment exists."
|
||||
soup.append(self.make_soup(error.format(a_title)))
|
||||
|
||||
return soup
|
||||
|
||||
def count_votes(self, chunk):
|
||||
"""So, fiction.live's api doesn't return the counted votes you see on the website.
|
||||
After all, it needs to allow for things like revoking a vote,
|
||||
with the count live and updated in realtime on your client.
|
||||
So instead we get the raw vote-data, but have to count it ourselves."""
|
||||
|
||||
# optional.
|
||||
choices = chunk['choices'] if 'choices' in chunk else []
|
||||
|
||||
def counter(votes):
|
||||
output = [0] * len(choices)
|
||||
for vote in votes.values():
|
||||
## votes are either a single option-index or a list of option-indicies, depending on the choice type
|
||||
if 'multiple' in chunk and chunk['multiple'] == False:
|
||||
vote = [vote] # normalize to list
|
||||
for v in vote:
|
||||
# v should only be int, but there is at least one story where some unrelated string was returned,
|
||||
# so let's just ignore non-int values here
|
||||
if not isinstance(v, int):
|
||||
continue
|
||||
if 0 <= v < len(choices):
|
||||
output[v] += 1
|
||||
return output
|
||||
|
||||
# I believe that verified is always a subset of all votes, but that's not enforced here
|
||||
total_votes = counter(chunk['votes'] if 'votes' in chunk else {})
|
||||
verified_votes = counter(chunk['userVotes'] if 'userVotes' in chunk else {})
|
||||
|
||||
# Choices can link to route chapters, where the index of the choice in list 'choices' is a key in the
|
||||
# 'routes' dict and the dict value is the route id.
|
||||
# That route id is needed for the url to create the internal link from the choice to the route chapter.
|
||||
routes = chunk['routes'] if 'routes' in chunk else {}
|
||||
if choices and len(routes) > 0:
|
||||
altered_choices = []
|
||||
for i, choice in enumerate(choices):
|
||||
choice_index = str(i)
|
||||
if choice_index in routes.keys():
|
||||
route_chunkrange_url = "https://fiction.live/api/anonkun/route/{c_id}/chapters"
|
||||
route_url = route_chunkrange_url.format(c_id=routes[choice_index])
|
||||
choice_link = "<a data-orighref='" + route_url + "' >" + choice + "</a>"
|
||||
altered_choices.append(choice_link)
|
||||
else:
|
||||
altered_choices.append(choice)
|
||||
choices = altered_choices
|
||||
|
||||
return zip(choices, verified_votes, total_votes)
|
||||
|
||||
def format_choice(self, chunk):
|
||||
|
||||
options = self.count_votes(chunk)
|
||||
|
||||
# crossed-out writeins. authors can censor user-written choices, and (optionally) offer a reason.
|
||||
x_outs = [int(x) for x in chunk['xOut']] if 'xOut' in chunk else []
|
||||
x_reasons = chunk['xOutReasons'] if 'xOutReasons' in chunk else {}
|
||||
|
||||
closed = "closed" if 'closed' in chunk else "open" # BUG: check on reopened votes
|
||||
|
||||
num_voters = len(chunk['votes']) if 'votes' in chunk else 0
|
||||
|
||||
vote_title = chunk['b'] if 'b' in chunk else "Choices"
|
||||
|
||||
output = ""
|
||||
# start with the header
|
||||
output += u"<h4><span>" + vote_title + " — <small>Voting " + closed
|
||||
output += u" — " + str(num_voters) + " voters</small></span></h4>\n"
|
||||
|
||||
# we've got everything needed to build the html for our vote table.
|
||||
output += "<table class=\"voteblock\">\n"
|
||||
|
||||
# filter out the crossed-out options, which display last
|
||||
crossed = []
|
||||
for index, (choice_text, verified_votes, total_votes) in enumerate(options):
|
||||
if index in x_outs:
|
||||
crossed.append((index, choice_text, verified_votes, total_votes))
|
||||
else:
|
||||
output += "<tr class=\"choiceitem\"><td>" + str(choice_text) + "</td><td class=\"votecount\">"
|
||||
if verified_votes > 0:
|
||||
output += "★" + str(verified_votes) + "/"
|
||||
output += str(total_votes)+ " </td></tr>\n"
|
||||
|
||||
# crossed out options are: displayed last, struckthrough, smaller, with the reason below, and no vote count.
|
||||
# also greyed out, but that's a bit much.
|
||||
for index, choice_text, _, _ in crossed:
|
||||
if choice_text == "permanentlyRemoved":
|
||||
continue
|
||||
else:
|
||||
x_reason = x_reasons[str(index)] if str(index) in x_reasons else ""
|
||||
output += "<tr class=\"choiceitem\"><td colspan=\"2\"><small><strike>" \
|
||||
+ str(choice_text) + "</strike><br>" + str(x_reason) + "</small></td></tr>"
|
||||
|
||||
output += "</table>\n"
|
||||
|
||||
return output
|
||||
|
||||
def format_readerposts(self, chunk):
|
||||
|
||||
closed = "Closed" if 'closed' in chunk else "Open"
|
||||
|
||||
posts = chunk['votes'] if 'votes' in chunk else {}
|
||||
dice = chunk['dice'] if 'dice' in chunk else {}
|
||||
|
||||
# now matches the site and does *not* include dicerolls as posts!
|
||||
num_votes = str(len(posts)) + " posts" if len(posts) != 0 else "be the first to post."
|
||||
|
||||
posts_title = chunk['b'] if 'b' in chunk else "Reader Posts"
|
||||
|
||||
output = ""
|
||||
output += u"<h4><span>" + posts_title + " — <small> Posting " + closed
|
||||
output += u" — " + num_votes + "</small></span></h4>\n"
|
||||
|
||||
## so. a voter can roll with their post. these rolls are in a seperate dict, but have the **same uid**.
|
||||
## they're then formatted with the roll above the writein for that user.
|
||||
## I *think* that formatting roll-only before writein-only posts is correct, but tbh, it's hard to tell.
|
||||
## writeins are usually opened by the author for posts or rolls, not both at once.
|
||||
## people tend to only mix the two by accident.
|
||||
if dice != {}:
|
||||
for uid, roll in dice.items():
|
||||
output += '<div class="choiceitem">'
|
||||
if roll: # optional. just because there's a list entry for it doesn't mean it has a value!
|
||||
output += '<div class="dice">' + str(roll) + '</div>\n'
|
||||
if uid in posts:
|
||||
post = posts[uid]
|
||||
if post:
|
||||
output += str(post)
|
||||
del posts[uid] # it's handled here with the roll instead of later
|
||||
output += '</div>'
|
||||
|
||||
for post in posts.values():
|
||||
if post:
|
||||
output += '<div class="choiceitem">' + str(post) + '</div>\n'
|
||||
|
||||
return output
|
||||
|
||||
def normalize_chapterurl(self, url):
|
||||
if url.startswith(r'https://fiction.live/api/anonkun/chapters'):
|
||||
return url
|
||||
|
||||
pattern = None
|
||||
|
||||
if url.startswith(r'https://fiction.live/api/anonkun/route'):
|
||||
pattern = r"https?://(?:beta\.)?fiction\.live/[^/]*/[^/]*/[a-zA-Z0-9]+/routes/([a-zA-Z0-9]+)"
|
||||
elif url.startswith(r'https://fiction.live/'):
|
||||
pattern = r"https?://(?:beta\.)?fiction\.live/[^/]*/[^/]*/[a-zA-Z0-9]+/[^/]*(/[a-zA-Z0-9]+|home)"
|
||||
# regex101 rocks
|
||||
|
||||
if not pattern:
|
||||
return url
|
||||
|
||||
match = re.match(pattern, url)
|
||||
if not match:
|
||||
return url
|
||||
|
||||
chapter_id = match.group(1)
|
||||
|
||||
if chapter_id.startswith('/'):
|
||||
chapter_id = chapter_id[1:]
|
||||
|
||||
if chapter_id and chapter_id in self.chapter_id_to_api:
|
||||
return self.chapter_id_to_api[chapter_id]
|
||||
|
||||
return url
|
||||
|
||||
def format_unknown(self, chunk):
|
||||
raise NotImplementedError("Unknown chunk type ({}) in fiction.live story.".format(chunk))
|
||||
|
||||
# in future, I'd like to handle audio embeds somehow. but they're not availble to add to stories right now.
|
||||
# pretty sure they'll just format as a link (with a special tydai-audio class) and should be easier than achievements
|
||||
|
||||
# TODO: verify that show_timestamps is working, check times!
|
||||
|
||||
# TODO: find a story that uses achievement images and implement them?
|
||||
|
||||
### known bugs:
|
||||
|
||||
# TODO: support chapter urls for single-chapter / chapter-range downloads
|
||||
# complicated -- urls for getChapterText are API urls generated by add_chapters, not the public/website ones
|
||||
# in particular, may need more API reversing to figure out how to get the *end* of the chunk range
|
||||
# find in 'bm' in the metadata?
|
||||
|
|
@ -1,9 +1,10 @@
|
|||
from __future__ import absolute_import
|
||||
import re
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
# py2 vs py3 transition
|
||||
from ..six import text_type as unicode
|
||||
from ..six.moves.urllib import parse as urlparse
|
||||
from ..six.moves.urllib.error import HTTPError
|
||||
|
||||
from .base_adapter import BaseSiteAdapter, makeDate
|
||||
|
||||
|
|
@ -39,23 +40,6 @@ class FictionManiaTVAdapter(BaseSiteAdapter):
|
|||
self._setURL(self.READ_TEXT_STORY_URL_TEMPLATE % story_id)
|
||||
self.story.setMetadata('siteabbrev', self.SITE_ABBREVIATION)
|
||||
|
||||
# Always single chapters, probably should use the Anthology feature to
|
||||
# merge chapters of a story
|
||||
self.story.setMetadata('numChapters', 1)
|
||||
|
||||
def _customized_fetch_url(self, url, exception=None, parameters=None):
|
||||
if exception:
|
||||
try:
|
||||
data = self._fetchUrl(url, parameters)
|
||||
except HTTPError:
|
||||
raise exception(self.url)
|
||||
# Just let self._fetchUrl throw the exception, don't catch and
|
||||
# customize it.
|
||||
else:
|
||||
data = self._fetchUrl(url, parameters)
|
||||
|
||||
return self.make_soup(data)
|
||||
|
||||
@staticmethod
|
||||
def getSiteDomain():
|
||||
return FictionManiaTVAdapter.SITE_DOMAIN
|
||||
|
|
@ -65,11 +49,11 @@ class FictionManiaTVAdapter(BaseSiteAdapter):
|
|||
return cls.READ_TEXT_STORY_URL_TEMPLATE % 1234
|
||||
|
||||
def getSiteURLPattern(self):
|
||||
return r'https?' + re.escape(self.BASE_URL[len('https'):]) + '(readtextstory|readxstory|details)\.html\?storyID=\d+$'
|
||||
return r'https?' + re.escape(self.BASE_URL[len('https'):]) + r'(readtextstory|readhtmlstory|readxstory|details)\.html\?storyID=\d+$'
|
||||
|
||||
def extractChapterUrlsAndMetadata(self):
|
||||
url = self.DETAILS_URL_TEMPLATE % self.story.getMetadata('storyId')
|
||||
soup = self._customized_fetch_url(url)
|
||||
soup = self.make_soup(self.get_request(url))
|
||||
|
||||
keep_summary_html = self.getConfig('keep_summary_html')
|
||||
for row in soup.find('table')('tr'):
|
||||
|
|
@ -122,7 +106,7 @@ class FictionManiaTVAdapter(BaseSiteAdapter):
|
|||
self.story.setMetadata('rating', value)
|
||||
|
||||
elif key == 'Complete':
|
||||
self.story.setMetadata('status', 'Completed' if value == 'Complete' else 'In-Progress')
|
||||
self.story.setMetadata('status', 'Completed' if value == 'yes' else 'In-Progress')
|
||||
|
||||
elif key == 'Categories':
|
||||
for element in cells[1]('a'):
|
||||
|
|
@ -152,22 +136,78 @@ class FictionManiaTVAdapter(BaseSiteAdapter):
|
|||
self.story.setMetadata('readings', value)
|
||||
|
||||
def getChapterText(self, url):
|
||||
soup = self._customized_fetch_url(url)
|
||||
element = soup.find('pre')
|
||||
element.name = 'div'
|
||||
if self.getConfig("download_text_version",False):
|
||||
soup = self.make_soup(self.get_request(url))
|
||||
element = soup.find('pre')
|
||||
element.name = 'div'
|
||||
|
||||
# The story's content is contained in a <pre> tag, probably taken 1:1
|
||||
# from the source text file. A simple replacement of all newline
|
||||
# characters with a break line tag should take care of formatting.
|
||||
# The story's content is contained in a <pre> tag, probably taken 1:1
|
||||
# from the source text file. A simple replacement of all newline
|
||||
# characters with a break line tag should take care of formatting.
|
||||
|
||||
# While wrapping in paragraphs would be possible, it's too much work,
|
||||
# I'd rather display the story 1:1 like it was found in the pre tag.
|
||||
content = unicode(element)
|
||||
content = content.replace('\n', '<br/>')
|
||||
# While wrapping in paragraphs would be possible, it's too much work,
|
||||
# I'd rather display the story 1:1 like it was found in the pre tag.
|
||||
content = unicode(element)
|
||||
content = content.replace('\n', '<br/>')
|
||||
|
||||
if self.getConfig('non_breaking_spaces'):
|
||||
return content.replace(' ', ' ')
|
||||
if self.getConfig('non_breaking_spaces'):
|
||||
return content.replace(' ', ' ')
|
||||
|
||||
## Normally, getChapterText should use self.utf8FromSoup(),
|
||||
## but this is converting from plain(ish) text. -- JM
|
||||
return content
|
||||
## Normally, getChapterText should use self.utf8FromSoup(),
|
||||
## but this is converting from plain(ish) text. -- JM
|
||||
return content
|
||||
|
||||
else:
|
||||
|
||||
# try SWI (story with images) version first
|
||||
# <div style="margin-left:10ex;margin-right:10ex">
|
||||
## fetching SWI version now instead of text.
|
||||
htmlurl = url.replace('readtextstory','readhtmlstory')
|
||||
## Used to find by style, but it's inconsistent now. we've seen:
|
||||
## margin-left:10ex;margin-right:10ex
|
||||
## margin-right: 5%; margin-left: 5%
|
||||
## margin-left:5%; margin-right:5%
|
||||
## margin-left:5%; margin-right:5%; background: white
|
||||
## And there's some without a <div> tag (or an unclosed div)
|
||||
## Only the comments appear to be consistent.
|
||||
beginmarker='<!--Read or display the file-->'
|
||||
endmarker='''<hr size=1 noshade>
|
||||
<!--review add read, top and bottom-->
|
||||
'''
|
||||
data = self.get_request(htmlurl)
|
||||
try:
|
||||
## if both markers are found, assume whatever is in between
|
||||
## is the chapter text.
|
||||
soup = self.make_soup(data[data.index(beginmarker):data.index(endmarker)])
|
||||
return self.utf8FromSoup(htmlurl,soup)
|
||||
except Exception as e:
|
||||
# logger.debug(e)
|
||||
# logger.debug(soup)
|
||||
logger.debug("Story With Images(SWI) not found, falling back to HTML.")
|
||||
|
||||
## fetching html version now instead of text.
|
||||
## Note that html and SWI pages are *not* formatted the same.
|
||||
soup = self.make_soup(self.get_request(url.replace('readtextstory','readxstory')))
|
||||
# logger.debug(soup)
|
||||
|
||||
# remove first hr and everything before
|
||||
remove = soup.find('hr')
|
||||
# logger.debug(remove)
|
||||
for tag in remove.find_previous_siblings():
|
||||
tag.extract()
|
||||
remove.extract()
|
||||
|
||||
# remove trailing hr, parent tags and everything after.
|
||||
remove = soup.find('hr',size='1') # <center><hr size=1>
|
||||
if remove.parent.name == 'center':
|
||||
## can also be directly in body without <center>
|
||||
remove = remove.parent
|
||||
# logger.debug(remove)
|
||||
for tag in remove.find_next_siblings():
|
||||
tag.extract()
|
||||
remove.extract()
|
||||
|
||||
content = soup.find('body')
|
||||
content.name='div'
|
||||
|
||||
return self.utf8FromSoup(url,content)
|
||||
|
|
|
|||
|
|
@ -1,195 +0,0 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2013 Fanficdownloader team, 2018 FanFicFare team
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
from __future__ import absolute_import
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import re
|
||||
import json
|
||||
|
||||
|
||||
#from ..htmlcleanup import stripHTML
|
||||
from .. import exceptions as exceptions
|
||||
|
||||
# py2 vs py3 transition
|
||||
from ..six import text_type as unicode
|
||||
from ..six.moves.urllib.error import HTTPError
|
||||
|
||||
from .base_adapter import BaseSiteAdapter, makeDate
|
||||
|
||||
class FictionPadSiteAdapter(BaseSiteAdapter):
|
||||
|
||||
def __init__(self, config, url):
|
||||
BaseSiteAdapter.__init__(self, config, url)
|
||||
self.story.setMetadata('siteabbrev','fpad')
|
||||
self.dateformat = "%Y-%m-%dT%H:%M:%SZ"
|
||||
self.is_adult=False
|
||||
self.username = None
|
||||
self.password = None
|
||||
# get storyId from url--url validation guarantees query correct
|
||||
m = re.match(self.getSiteURLPattern(),url)
|
||||
if m:
|
||||
self.story.setMetadata('storyId',m.group('id'))
|
||||
|
||||
# normalized story URL.
|
||||
self._setURL("https://"+self.getSiteDomain()
|
||||
+"/author/"+m.group('author')
|
||||
+"/stories/"+self.story.getMetadata('storyId'))
|
||||
else:
|
||||
raise exceptions.InvalidStoryURL(url,
|
||||
self.getSiteDomain(),
|
||||
self.getSiteExampleURLs())
|
||||
|
||||
@staticmethod
|
||||
def getSiteDomain():
|
||||
return 'fictionpad.com'
|
||||
|
||||
@classmethod
|
||||
def getSiteExampleURLs(cls):
|
||||
return "https://fictionpad.com/author/Author/stories/1234/Some-Title"
|
||||
|
||||
def getSiteURLPattern(self):
|
||||
# http://fictionpad.com/author/Serdd/stories/4275
|
||||
return r"http(s)?://(www\.)?fictionpad\.com/author/(?P<author>[^/]+)/stories/(?P<id>\d+)"
|
||||
|
||||
# <form method="post" action="/signin">
|
||||
# <input name="authenticity_token" type="hidden" value="u+cfdXh46dRnwVnSlmE2B2BFmHgu760paqgBG6KQeos=" />
|
||||
# <input type="hidden" name="remember" value="1">
|
||||
# <strong class="help-start text-center">or with FictionPad</strong>
|
||||
# <label class="control-label hidden-placeholder">Pseudonym or Email Address</label>
|
||||
# <input name="login" class="input-block-level" type="text" placeholder="Pseudonym or Email Address" maxlength="50" required autofocus>
|
||||
# <label class="control-label hidden-placeholder">Password</label>
|
||||
# <input name="password" class="input-block-level" type="password" placeholder="Password" minlength="6" required>
|
||||
# <button type="submit" class="btn btn-primary btn-block">Sign In</button>
|
||||
# <p class="help-end">
|
||||
# <a href="/passwordreset">Forgot your password?</a>
|
||||
# </p>
|
||||
# </form>
|
||||
def performLogin(self):
|
||||
params = {}
|
||||
|
||||
if self.password:
|
||||
params['login'] = self.username
|
||||
params['password'] = self.password
|
||||
else:
|
||||
params['login'] = self.getConfig("username")
|
||||
params['password'] = self.getConfig("password")
|
||||
params['remember'] = '1'
|
||||
|
||||
loginUrl = 'http://' + self.getSiteDomain() + '/signin'
|
||||
logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
|
||||
params['login']))
|
||||
|
||||
## need to pull empty login page first to get authenticity_token
|
||||
soup = self.make_soup(self._fetchUrl(loginUrl))
|
||||
params['authenticity_token']=soup.find('input', {'name':'authenticity_token'})['value']
|
||||
|
||||
data = self._postUrl(loginUrl, params)
|
||||
|
||||
if "Invalid email/pseudonym and password combination." in data:
|
||||
logger.info("Failed to login to URL %s as %s" % (loginUrl,
|
||||
params['login']))
|
||||
raise exceptions.FailedToLogin(loginUrl,params['login'])
|
||||
|
||||
|
||||
def extractChapterUrlsAndMetadata(self):
|
||||
# fetch the chapter. From that we will get almost all the
|
||||
# metadata and chapter list
|
||||
|
||||
url=self.url
|
||||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
if "This is a mature story. Please sign in to read it." in data:
|
||||
self.performLogin()
|
||||
data = self._fetchUrl(url)
|
||||
|
||||
find = "wordyarn.config.page = "
|
||||
data = data[data.index(find)+len(find):]
|
||||
data = data[:data.index("</script>")]
|
||||
data = data[:data.rindex(";")]
|
||||
data = data.replace('tables:','"tables":')
|
||||
tables = json.loads(data)['tables']
|
||||
except HTTPError as e:
|
||||
if e.code == 404:
|
||||
raise exceptions.StoryDoesNotExist(url)
|
||||
else:
|
||||
raise e
|
||||
|
||||
# looks like only one author per story allowed.
|
||||
author = tables['users'][0]
|
||||
story = tables['stories'][0]
|
||||
story_ver = tables['story_versions'][0]
|
||||
logger.debug("story:%s"%story)
|
||||
|
||||
self.story.setMetadata('authorId',author['id'])
|
||||
self.story.setMetadata('author',author['display_name'])
|
||||
self.story.setMetadata('authorUrl','https://'+self.host+'/author/'+author['display_name']+'/stories')
|
||||
|
||||
self.story.setMetadata('title',story_ver['title'])
|
||||
self.setDescription(url,story_ver['description'])
|
||||
|
||||
if not ('assets/story_versions/covers' in story_ver['profile_image_url@2x']):
|
||||
self.setCoverImage(url,story_ver['profile_image_url@2x'])
|
||||
|
||||
self.story.setMetadata('datePublished',makeDate(story['published_at'], self.dateformat))
|
||||
self.story.setMetadata('dateUpdated',makeDate(story['published_at'], self.dateformat))
|
||||
|
||||
self.story.setMetadata('followers',story['followers_count'])
|
||||
self.story.setMetadata('comments',story['comments_count'])
|
||||
self.story.setMetadata('views',story['views_count'])
|
||||
self.story.setMetadata('likes',int(story['likes'])) # no idea why they floated these.
|
||||
if 'dislikes' in story:
|
||||
self.story.setMetadata('dislikes',int(story['dislikes']))
|
||||
|
||||
if story_ver['is_complete']:
|
||||
self.story.setMetadata('status', 'Completed')
|
||||
else:
|
||||
self.story.setMetadata('status', 'In-Progress')
|
||||
|
||||
self.story.setMetadata('rating', story_ver['maturity_level'])
|
||||
self.story.setMetadata('numWords', unicode(story_ver['word_count']))
|
||||
|
||||
for i in tables['fandoms']:
|
||||
self.story.addToList('category',i['name'])
|
||||
|
||||
for i in tables['genres']:
|
||||
self.story.addToList('genre',i['name'])
|
||||
|
||||
for i in tables['characters']:
|
||||
self.story.addToList('characters',i['name'])
|
||||
|
||||
for c in tables['chapters']:
|
||||
chtitle = "Chapter %d"%c['number']
|
||||
if c['title']:
|
||||
chtitle += " - %s"%c['title']
|
||||
self.add_chapter(chtitle,c['body_url'])
|
||||
|
||||
|
||||
def getChapterText(self, url):
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
if not url:
|
||||
data = u"<em>This chapter has no text.</em>"
|
||||
else:
|
||||
data = self._fetchUrl(url)
|
||||
soup = self.make_soup(u"<div id='story'>"+data+u"</div>")
|
||||
return self.utf8FromSoup(url,soup)
|
||||
|
||||
def getClass():
|
||||
return FictionPadSiteAdapter
|
||||
|
||||
|
|
@ -21,7 +21,6 @@ logger = logging.getLogger(__name__)
|
|||
import re
|
||||
|
||||
# py2 vs py3 transition
|
||||
from ..six import text_type as unicode
|
||||
|
||||
## They're from the same people and pretty much identical.
|
||||
from .adapter_fanfictionnet import FanFictionNetSiteAdapter
|
||||
|
|
@ -44,8 +43,15 @@ class FictionPressComSiteAdapter(FanFictionNetSiteAdapter):
|
|||
def getSiteExampleURLs(cls):
|
||||
return "https://www.fictionpress.com/s/1234/1/ https://www.fictionpress.com/s/1234/12/ http://www.fictionpress.com/s/1234/1/Story_Title http://m.fictionpress.com/s/1234/1/"
|
||||
|
||||
def getSiteURLPattern(self):
|
||||
return r"https?://(www|m)?\.fictionpress\.com/s/\d+(/\d+)?(/|/[a-zA-Z0-9_-]+)?/?$"
|
||||
@classmethod
|
||||
def _get_site_url_pattern(cls):
|
||||
return r"https?://(www|m)?\.fictionpress\.com/s/(?P<id>\d+)(/\d+)?(/(?P<title>[^/]+))?/?$"
|
||||
|
||||
## normalized chapter URLs DO contain the story title now, but
|
||||
## normalized to current urltitle in case of title changes.
|
||||
def normalize_chapterurl(self,url):
|
||||
return re.sub(r"https?://(www|m)\.(?P<keep>fictionpress\.com/s/\d+/\d+/).*",
|
||||
r"https://www.\g<keep>",url)+self.urltitle
|
||||
|
||||
def getClass():
|
||||
return FictionPressComSiteAdapter
|
||||
|
|
|
|||
|
|
@ -25,7 +25,6 @@ from ..htmlcleanup import stripHTML
|
|||
|
||||
# py2 vs py3 transition
|
||||
from ..six import text_type as unicode
|
||||
from ..six.moves.urllib.error import HTTPError
|
||||
|
||||
from .base_adapter import BaseSiteAdapter, makeDate
|
||||
|
||||
|
|
@ -50,7 +49,7 @@ class FicwadComSiteAdapter(BaseSiteAdapter):
|
|||
return "https://ficwad.com/story/1234"
|
||||
|
||||
def getSiteURLPattern(self):
|
||||
return r"https?:"+re.escape(r"//"+self.getSiteDomain())+"/story/\d+?$"
|
||||
return r"https?:"+re.escape(r"//"+self.getSiteDomain())+r"/story/\d+?$"
|
||||
|
||||
def performLogin(self,url):
|
||||
params = {}
|
||||
|
|
@ -65,9 +64,10 @@ class FicwadComSiteAdapter(BaseSiteAdapter):
|
|||
loginUrl = 'https://' + self.getSiteDomain() + '/account/login'
|
||||
logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
|
||||
params['username']))
|
||||
d = self._postUrl(loginUrl,params,usecache=False)
|
||||
d = self.post_request(loginUrl,params,usecache=False)
|
||||
|
||||
if "Login attempt failed..." in d:
|
||||
if "Login attempt failed..." in d or \
|
||||
'<div id="error">Please enter your username and password.</div>' in d:
|
||||
logger.info("Failed to login to URL %s as %s" % (loginUrl,
|
||||
params['username']))
|
||||
raise exceptions.FailedToLogin(url,params['username'])
|
||||
|
|
@ -75,13 +75,6 @@ class FicwadComSiteAdapter(BaseSiteAdapter):
|
|||
else:
|
||||
return True
|
||||
|
||||
def use_pagecache(self):
|
||||
'''
|
||||
adapters that will work with the page cache need to implement
|
||||
this and change it to True.
|
||||
'''
|
||||
return True
|
||||
|
||||
def extractChapterUrlsAndMetadata(self):
|
||||
|
||||
# fetch the chapter. From that we will get almost all the
|
||||
|
|
@ -90,54 +83,41 @@ class FicwadComSiteAdapter(BaseSiteAdapter):
|
|||
url = self.url
|
||||
logger.debug("URL: "+url)
|
||||
|
||||
# use BeautifulSoup HTML parser to make everything easier to find.
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
# non-existent/removed story urls get thrown to the front page.
|
||||
if "<h4>Featured Story</h4>" in data:
|
||||
raise exceptions.StoryDoesNotExist(self.url)
|
||||
soup = self.make_soup(data)
|
||||
except HTTPError as e:
|
||||
if e.code == 404:
|
||||
raise exceptions.StoryDoesNotExist(self.url)
|
||||
else:
|
||||
raise e
|
||||
data = self.get_request(url)
|
||||
# non-existent/removed story urls get thrown to the front page.
|
||||
if "<h4>Featured Story</h4>" in data:
|
||||
raise exceptions.StoryDoesNotExist(self.url)
|
||||
soup = self.make_soup(data)
|
||||
|
||||
# if blocked, attempt login.
|
||||
if soup.find("div",{"class":"blocked"}) or soup.find("li",{"class":"blocked"}):
|
||||
if self.performLogin(url): # performLogin raises
|
||||
# FailedToLogin if it fails.
|
||||
soup = self.make_soup(self._fetchUrl(url,usecache=False))
|
||||
soup = self.make_soup(self.get_request(url,usecache=False))
|
||||
|
||||
divstory = soup.find('div',id='story')
|
||||
storya = divstory.find('a',href=re.compile("^/story/\d+$"))
|
||||
storya = divstory.find('a',href=re.compile(r"^/story/\d+$"))
|
||||
if storya : # if there's a story link in the divstory header, this is a chapter page.
|
||||
# normalize story URL on chapter list.
|
||||
self.story.setMetadata('storyId',storya['href'].split('/',)[2])
|
||||
url = "https://"+self.getSiteDomain()+storya['href']
|
||||
logger.debug("Normalizing to URL: "+url)
|
||||
self._setURL(url)
|
||||
try:
|
||||
soup = self.make_soup(self._fetchUrl(url))
|
||||
except HTTPError as e:
|
||||
if e.code == 404:
|
||||
raise exceptions.StoryDoesNotExist(self.url)
|
||||
else:
|
||||
raise e
|
||||
soup = self.make_soup(self.get_request(url))
|
||||
|
||||
# if blocked, attempt login.
|
||||
if soup.find("div",{"class":"blocked"}) or soup.find("li",{"class":"blocked"}):
|
||||
if self.performLogin(url): # performLogin raises
|
||||
# FailedToLogin if it fails.
|
||||
soup = self.make_soup(self._fetchUrl(url,usecache=False))
|
||||
soup = self.make_soup(self.get_request(url,usecache=False))
|
||||
|
||||
# title - first h4 tag will be title.
|
||||
titleh4 = soup.find('div',{'class':'storylist'}).find('h4')
|
||||
self.story.setMetadata('title', stripHTML(titleh4.a))
|
||||
|
||||
if 'Deleted story' in self.story.getMetadata('title'):
|
||||
if 'Deleted story' in self.story.getMetadataRaw('title'):
|
||||
raise exceptions.StoryDoesNotExist("This story was deleted. %s"%self.url)
|
||||
|
||||
|
||||
# Find authorid and URL from... author url.
|
||||
a = soup.find('span',{'class':'author'}).find('a', href=re.compile(r"^/a/"))
|
||||
self.story.setMetadata('authorId',a['href'].split('/')[2])
|
||||
|
|
@ -150,14 +130,14 @@ class FicwadComSiteAdapter(BaseSiteAdapter):
|
|||
#self.story.setMetadata('description', storydiv.find("blockquote",{'class':'summary'}).p.string)
|
||||
|
||||
# most of the meta data is here:
|
||||
metap = storydiv.find("p",{"class":"meta"})
|
||||
metap = storydiv.find("div",{"class":"meta"})
|
||||
self.story.addToList('category',metap.find("a",href=re.compile(r"^/category/\d+")).string)
|
||||
|
||||
# warnings
|
||||
# <span class="req"><a href="/help/38" title="Medium Spoilers">[!!] </a> <a href="/help/38" title="Rape/Sexual Violence">[R] </a> <a href="/help/38" title="Violence">[V] </a> <a href="/help/38" title="Child/Underage Sex">[Y] </a></span>
|
||||
spanreq = metap.find("span",{"class":"story-warnings"})
|
||||
if spanreq: # can be no warnings.
|
||||
for a in spanreq.findAll("a"):
|
||||
for a in spanreq.find_all("a"):
|
||||
self.story.addToList('warnings',a['title'])
|
||||
|
||||
## perhaps not the most efficient way to parse this, using
|
||||
|
|
@ -207,7 +187,7 @@ class FicwadComSiteAdapter(BaseSiteAdapter):
|
|||
# no list found, so it's a one-chapter story.
|
||||
self.add_chapter(self.story.getMetadata('title'),url)
|
||||
else:
|
||||
chapterlistlis = storylistul.findAll('li')
|
||||
chapterlistlis = storylistul.find_all('li')
|
||||
for chapterli in chapterlistlis:
|
||||
if "blocked" in chapterli['class']:
|
||||
# paranoia check. We should already be logged in by now.
|
||||
|
|
@ -222,7 +202,7 @@ class FicwadComSiteAdapter(BaseSiteAdapter):
|
|||
|
||||
def getChapterText(self, url):
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
soup = self.make_soup(self._fetchUrl(url))
|
||||
soup = self.make_soup(self.get_request(url))
|
||||
|
||||
span = soup.find('div', {'id' : 'storytext'})
|
||||
|
||||
|
|
@ -233,4 +213,3 @@ class FicwadComSiteAdapter(BaseSiteAdapter):
|
|||
|
||||
def getClass():
|
||||
return FicwadComSiteAdapter
|
||||
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2011 Fanficdownloader team, 2018 FanFicFare team
|
||||
# Copyright 2011 Fanficdownloader team, 2020 FanFicFare team
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
|
|
@ -28,7 +28,6 @@ from .. import exceptions as exceptions
|
|||
|
||||
# py2 vs py3 transition
|
||||
from ..six import text_type as unicode
|
||||
from ..six.moves.urllib.error import HTTPError
|
||||
from ..six.moves import http_cookiejar as cl
|
||||
|
||||
from .base_adapter import BaseSiteAdapter, makeDate
|
||||
|
|
@ -66,13 +65,6 @@ class FimFictionNetSiteAdapter(BaseSiteAdapter):
|
|||
def getSiteURLPattern(self):
|
||||
return r"https?://(www|mobile)\.fimfiction\.(net|com)/story/\d+/?.*"
|
||||
|
||||
def use_pagecache(self):
|
||||
'''
|
||||
adapters that will work with the page cache need to implement
|
||||
this and change it to True.
|
||||
'''
|
||||
return True
|
||||
|
||||
def set_adult_cookie(self):
|
||||
cookie = cl.Cookie(version=0, name='view_mature', value='true',
|
||||
port=None, port_specified=False,
|
||||
|
|
@ -101,12 +93,23 @@ class FimFictionNetSiteAdapter(BaseSiteAdapter):
|
|||
loginUrl = 'https://' + self.getSiteDomain() + '/ajax/login'
|
||||
logger.info("Will now login to URL (%s) as (%s)" % (loginUrl,
|
||||
params['username']))
|
||||
d = self._postUrl(loginUrl, params)
|
||||
d = self.post_request(loginUrl, params)
|
||||
if "signing_key" not in d :
|
||||
logger.info("Failed to login to URL %s as %s" % (loginUrl,
|
||||
params['username']))
|
||||
raise exceptions.FailedToLogin(url,params['username'])
|
||||
|
||||
def make_soup(self,data):
|
||||
soup = super(FimFictionNetSiteAdapter, self).make_soup(data)
|
||||
for img in soup.select('img.lazy-img, img.user_image'):
|
||||
## FimF has started a 'camo' mechanism for images that
|
||||
## gets block by CF. attr data-source is original source.
|
||||
if img.has_attr('data-source'):
|
||||
img['src'] = img['data-source']
|
||||
elif img.has_attr('data-src'):
|
||||
img['src'] = img['data-src']
|
||||
return soup
|
||||
|
||||
def doExtractChapterUrlsAndMetadata(self,get_cover=True):
|
||||
|
||||
if self.is_adult or self.getConfig("is_adult"):
|
||||
|
|
@ -114,22 +117,17 @@ class FimFictionNetSiteAdapter(BaseSiteAdapter):
|
|||
|
||||
## Only needed with password protected stories, which you have
|
||||
## to have logged into in the website using this account.
|
||||
self.performLogin(self.url)
|
||||
if self.getConfig("always_login"):
|
||||
self.performLogin(self.url)
|
||||
|
||||
##---------------------------------------------------------------------------------------------------
|
||||
## Get the story's title page. Check if it exists.
|
||||
|
||||
try:
|
||||
# don't use cache if manual is_adult--should only happen
|
||||
# if it's an adult story and they don't have is_adult in ini.
|
||||
data = self.do_fix_blockquotes(self._fetchUrl(self.url,
|
||||
usecache=(not self.is_adult)))
|
||||
soup = self.make_soup(data)
|
||||
except HTTPError as e:
|
||||
if e.code == 404:
|
||||
raise exceptions.StoryDoesNotExist(self.url)
|
||||
else:
|
||||
raise e
|
||||
# don't use cache if manual is_adult--should only happen
|
||||
# if it's an adult story and they don't have is_adult in ini.
|
||||
data = self.do_fix_blockquotes(self.get_request(self.url,
|
||||
usecache=(not self.is_adult)))
|
||||
soup = self.make_soup(data)
|
||||
|
||||
if "Warning: mysql_fetch_array(): supplied argument is not a valid MySQL result resource" in data:
|
||||
raise exceptions.StoryDoesNotExist(self.url)
|
||||
|
|
@ -153,7 +151,8 @@ class FimFictionNetSiteAdapter(BaseSiteAdapter):
|
|||
self.story.setMetadata("authorId", author['href'].split('/')[2])
|
||||
self.story.setMetadata("authorUrl", "https://%s/user/%s/%s" % (self.getSiteDomain(),
|
||||
self.story.getMetadata('authorId'),
|
||||
self.story.getMetadata('author')))
|
||||
# meta entry author can be changed by the user.
|
||||
stripHTML(author)))
|
||||
|
||||
#Rating text is replaced with full words for historical compatibility after the site changed
|
||||
#on 2014-10-27
|
||||
|
|
@ -181,12 +180,13 @@ class FimFictionNetSiteAdapter(BaseSiteAdapter):
|
|||
|
||||
# Cover image
|
||||
if get_cover:
|
||||
storyImage = storyContentBox.find('img', {'class':'lazy-img'})
|
||||
storyImage = soup.select_one('div.story_container__story_image img')
|
||||
if storyImage:
|
||||
coverurl = storyImage['data-fullsize']
|
||||
# try setting from data-fullsize, if fails, try using data-src
|
||||
if self.setCoverImage(self.url,coverurl)[0] == "failedtoload":
|
||||
coverurl = storyImage['data-src']
|
||||
cover_set = self.setCoverImage(self.url,coverurl)[0]
|
||||
if not cover_set or cover_set.startswith("failedtoload"):
|
||||
coverurl = storyImage['src']
|
||||
self.setCoverImage(self.url,coverurl)
|
||||
|
||||
coverSource = storyImage.parent.find('a', {'class':'source'})
|
||||
|
|
@ -286,7 +286,7 @@ class FimFictionNetSiteAdapter(BaseSiteAdapter):
|
|||
|
||||
# Highest view for a chapter and total views
|
||||
viewSpan = storyToolbar.find('span', {'title':re.compile(r'.*\btotal views\b.*')})
|
||||
viewResults = re.search('([0-9]*) views \/ ([0-9]*)', viewSpan['title'].replace(',',''))
|
||||
viewResults = re.search(r'([0-9]*) views \/ ([0-9]*)', viewSpan['title'].replace(',',''))
|
||||
self.story.setMetadata("views", viewResults.group(1))
|
||||
self.story.setMetadata("total_views", viewResults.group(2))
|
||||
|
||||
|
|
@ -298,16 +298,26 @@ class FimFictionNetSiteAdapter(BaseSiteAdapter):
|
|||
descriptionMeta = soup.find('meta', {'property':'og:description'})
|
||||
self.story.setMetadata("short_description", stripHTML(descriptionMeta['content']))
|
||||
|
||||
#groups
|
||||
# groups.
|
||||
# If there are more than X groups, there's a 'Show all' button
|
||||
# that calls for a JSON containing HTML with the full list.
|
||||
# But it doesn't work reliably with FlareSolverr.
|
||||
groupList = None
|
||||
groupButton = soup.find('button', {'data-click':'showAll'})
|
||||
if groupButton != None and groupButton.find('i', {'class':'fa-search-plus'}):
|
||||
groupResponse = self._fetchUrl("https://www.fimfiction.net/ajax/stories/%s/groups" % (self.story.getMetadata("storyId")))
|
||||
groupData = json.loads(groupResponse)
|
||||
groupList = self.make_soup(groupData["content"])
|
||||
else:
|
||||
try:
|
||||
groupResponse = self.get_request("https://www.fimfiction.net/ajax/stories/%s/groups" % (self.story.getMetadata("storyId")))
|
||||
groupData = json.loads(groupResponse)
|
||||
groupList = self.make_soup(groupData["content"])
|
||||
except Exception as e:
|
||||
logger.warning("Collecting 'groups' (AKA 'Featured In') from JSON failed:%s"%e)
|
||||
logger.warning("Only 'groups' initially shown on the page will be collected.")
|
||||
logger.warning("This is a known issue with JSON and FlareSolverr. See #1122")
|
||||
|
||||
if not groupList:
|
||||
groupList = soup.find('ul', {'id':'story-groups-list'})
|
||||
|
||||
if not (groupList == None):
|
||||
if groupList:
|
||||
for groupContent in groupList.find_all('a'):
|
||||
self.story.addToList("groupsUrl", 'https://'+self.host+groupContent["href"])
|
||||
groupName = groupContent.find('span', {"class":"group-name"})
|
||||
|
|
@ -318,7 +328,7 @@ class FimFictionNetSiteAdapter(BaseSiteAdapter):
|
|||
|
||||
#sequels
|
||||
for header in soup.find_all('h1', {'class':'header-stories'}):
|
||||
# I don't know why using text=re.compile with find() wouldn't work, but it didn't.
|
||||
# I don't know why using string=re.compile with find() wouldn't work, but it didn't.
|
||||
if header.text.startswith('Sequels'):
|
||||
sequelContainer = header.parent
|
||||
for sequel in sequelContainer.find_all('a', {'class':'story_link'}):
|
||||
|
|
@ -373,7 +383,7 @@ class FimFictionNetSiteAdapter(BaseSiteAdapter):
|
|||
def getChapterText(self, url):
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
data = self._fetchUrl(url)
|
||||
data = self.get_request(url)
|
||||
|
||||
soup = self.make_soup(data)
|
||||
|
||||
|
|
@ -391,3 +401,40 @@ class FimFictionNetSiteAdapter(BaseSiteAdapter):
|
|||
raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url)
|
||||
|
||||
return self.utf8FromSoup(url,soup)
|
||||
|
||||
def before_get_urls_from_page(self,url,normalize):
|
||||
## Unlike most that show the links to 'adult' stories, but protect
|
||||
## them, FimF doesn't even show them if not logged in.
|
||||
# data = self.get_request(url)
|
||||
if self.getConfig("is_adult"):
|
||||
self.set_adult_cookie()
|
||||
|
||||
def get_urls_from_page(self,url,normalize):
|
||||
iterate = self.getConfig('scrape_bookshelf', default=False)
|
||||
if not re.search(r'fimfiction\.net/bookshelf/(?P<listid>.+?)/',url) or iterate == 'legacy':
|
||||
return super().get_urls_from_page(url,normalize)
|
||||
|
||||
self.before_get_urls_from_page(url,normalize)
|
||||
|
||||
final_urls = list()
|
||||
while True:
|
||||
data = self.get_request(url,usecache=True)
|
||||
soup = self.make_soup(data)
|
||||
paginator = soup.select_one('div.paginator-container > div.page_list > ul').find_all('li')
|
||||
logger.debug("Paginator: " + str(len(paginator)))
|
||||
stories_container = soup.select_one('div.content > div.two-columns > div.left').find_all('article', recursive=False)
|
||||
x = 0
|
||||
logger.debug("Container "+str(len(stories_container)))
|
||||
for story_raw in stories_container:
|
||||
x += 1
|
||||
story_url = story_raw.select_one('div.story_content_box > header.title > div > a.story_name').get('href')
|
||||
url_story = ('https://' + self.getSiteDomain() + story_url)
|
||||
#logger.debug(url_story)
|
||||
final_urls.append(url_story)
|
||||
logger.debug("Discovered %s new stories."%str(x))
|
||||
|
||||
next_button = paginator[-1].select_one('a')
|
||||
logger.debug("Next button: " + next_button.get_text())
|
||||
if next_button.get_text() or not iterate:
|
||||
return {'urllist': final_urls}
|
||||
url = ('https://' + self.getSiteDomain() + next_button.get('href'))
|
||||
|
|
|
|||
|
|
@ -21,10 +21,8 @@ from __future__ import absolute_import
|
|||
''' This adapter will download the stories from the www.fireflyfans.net forum pages '''
|
||||
import logging
|
||||
import re
|
||||
import sys
|
||||
# py2 vs py3 transition
|
||||
from ..six import text_type as unicode
|
||||
from ..six.moves.urllib.error import HTTPError
|
||||
|
||||
from .base_adapter import BaseSiteAdapter, makeDate
|
||||
|
||||
|
|
@ -79,19 +77,12 @@ class FireFlyFansNetSiteAdapter(BaseSiteAdapter):
|
|||
url = self.url
|
||||
logger.debug("URL: " + url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
except HTTPError as e:
|
||||
if e.code == 404:
|
||||
raise exceptions.StoryDoesNotExist(self.url)
|
||||
else:
|
||||
raise e
|
||||
data = self.get_request(url)
|
||||
|
||||
if 'Something bad happened, but hell if I know what it is.' in data:
|
||||
raise exceptions.StoryDoesNotExist(
|
||||
'{0} says: GORAMIT!!! SOMETHING WENT WRONG! Something bad happened, but hell if I know what it is.'.format(self.url))
|
||||
|
||||
# use BeautifulSoup HTML parser to make everything easier to find.
|
||||
soup = self.make_soup(data)
|
||||
|
||||
# Title
|
||||
|
|
@ -102,6 +93,9 @@ class FireFlyFansNetSiteAdapter(BaseSiteAdapter):
|
|||
|
||||
a = soup.find('a', href=re.compile(r"profileshow.aspx\?u="))
|
||||
self.story.setMetadata('authorId', a['href'].split('=')[1])
|
||||
if not self.story.getMetadata('authorId'):
|
||||
logger.warning("Site authorUrl missing authorId, using SiteMissingAuthorId")
|
||||
self.story.setMetadata('authorId', 'SiteMissingAuthorId')
|
||||
self.story.setMetadata('authorUrl', 'http://' +
|
||||
self.host + '/' + a['href'])
|
||||
self.story.setMetadata('author', a.string)
|
||||
|
|
@ -111,7 +105,6 @@ class FireFlyFansNetSiteAdapter(BaseSiteAdapter):
|
|||
# to download them one at a time yourself. I'm also setting the status to
|
||||
# complete
|
||||
self.add_chapter(self.story.getMetadata('title'), self.url)
|
||||
self.story.setMetadata('numChapters', 1)
|
||||
self.story.setMetadata('status', 'Completed')
|
||||
|
||||
## some stories do not have a summary listed, so I'm setting it here.
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2018 FanFicFare team
|
||||
# Copyright 2024 FanFicFare team
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
|
|
@ -17,17 +17,16 @@
|
|||
|
||||
from __future__ import absolute_import
|
||||
import re
|
||||
from ..htmlcleanup import stripHTML
|
||||
|
||||
from .base_xenforoforum_adapter import BaseXenForoForumAdapter
|
||||
from .base_xenforo2forum_adapter import BaseXenForo2ForumAdapter
|
||||
|
||||
def getClass():
|
||||
return QuestionablequestingComAdapter
|
||||
|
||||
class QuestionablequestingComAdapter(BaseXenForoForumAdapter):
|
||||
class QuestionablequestingComAdapter(BaseXenForo2ForumAdapter):
|
||||
|
||||
def __init__(self, config, url):
|
||||
BaseXenForoForumAdapter.__init__(self, config, url)
|
||||
BaseXenForo2ForumAdapter.__init__(self, config, url)
|
||||
|
||||
# Each adapter needs to have a unique site abbreviation.
|
||||
self.story.setMetadata('siteabbrev','qq')
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2018 FanFicFare team
|
||||
# Copyright 2019 FanFicFare team
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
|
|
@ -18,15 +18,15 @@
|
|||
from __future__ import absolute_import
|
||||
import re
|
||||
|
||||
from .base_xenforoforum_adapter import BaseXenForoForumAdapter
|
||||
from .base_xenforo2forum_adapter import BaseXenForo2ForumAdapter
|
||||
|
||||
def getClass():
|
||||
return ForumsSpacebattlesComAdapter
|
||||
|
||||
class ForumsSpacebattlesComAdapter(BaseXenForoForumAdapter):
|
||||
class ForumsSpacebattlesComAdapter(BaseXenForo2ForumAdapter):
|
||||
|
||||
def __init__(self, config, url):
|
||||
BaseXenForoForumAdapter.__init__(self, config, url)
|
||||
BaseXenForo2ForumAdapter.__init__(self, config, url)
|
||||
|
||||
# Each adapter needs to have a unique site abbreviation.
|
||||
self.story.setMetadata('siteabbrev','fsb')
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2018 FanFicFare team
|
||||
# Copyright 2019 FanFicFare team
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
|
|
@ -18,15 +18,15 @@
|
|||
from __future__ import absolute_import
|
||||
import re
|
||||
|
||||
from .base_xenforoforum_adapter import BaseXenForoForumAdapter
|
||||
from .base_xenforo2forum_adapter import BaseXenForo2ForumAdapter
|
||||
|
||||
def getClass():
|
||||
return ForumsSufficientVelocityComAdapter
|
||||
|
||||
class ForumsSufficientVelocityComAdapter(BaseXenForoForumAdapter):
|
||||
class ForumsSufficientVelocityComAdapter(BaseXenForo2ForumAdapter):
|
||||
|
||||
def __init__(self, config, url):
|
||||
BaseXenForoForumAdapter.__init__(self, config, url)
|
||||
BaseXenForo2ForumAdapter.__init__(self, config, url)
|
||||
|
||||
# Each adapter needs to have a unique site abbreviation.
|
||||
self.story.setMetadata('siteabbrev','fsv')
|
||||
|
|
|
|||
|
|
@ -1,201 +0,0 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2014 Fanficdownloader team, 2018 FanFicFare team
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
####################################################################################################
|
||||
## Adapted by GComyn on April 21, 2017
|
||||
####################################################################################################
|
||||
|
||||
from __future__ import absolute_import
|
||||
import logging
|
||||
import re
|
||||
import time
|
||||
from datetime import datetime
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
try:
|
||||
# If feedparser ever becomes an included dependency for FanFicFare
|
||||
import feedparser
|
||||
except ImportError:
|
||||
try:
|
||||
# A version of feedparser is available in the Calibre plugin version
|
||||
from calibre.web.feeds import feedparser
|
||||
except ImportError:
|
||||
# logger.warn('No version of feedparser module available, falling back to naive published and updated date')
|
||||
feedparser = None
|
||||
|
||||
# py2 vs py3 transition
|
||||
from ..six import text_type as unicode
|
||||
from ..six.moves.urllib.error import HTTPError
|
||||
|
||||
from .base_adapter import BaseSiteAdapter
|
||||
|
||||
from .. import exceptions as exceptions
|
||||
from ..htmlcleanup import stripHTML
|
||||
|
||||
|
||||
####################################################################################################
|
||||
def getClass():
|
||||
return GravityTalesComSiteAdapter
|
||||
|
||||
|
||||
####################################################################################################
|
||||
class GravityTalesComSiteAdapter(BaseSiteAdapter):
|
||||
|
||||
def __init__(self, config, url):
|
||||
BaseSiteAdapter.__init__(self, config, url)
|
||||
|
||||
self.username = "NoneGiven" # if left empty, site doesn't return any message at all.
|
||||
self.password = ""
|
||||
self.is_adult = False
|
||||
|
||||
# get storyId from url
|
||||
# http://gravitytales.com/novel/a-dragons-curiosity
|
||||
self.story.setMetadata('storyId',self.parsedUrl.path.split('/')[2])
|
||||
|
||||
# Each adapter needs to have a unique site abbreviation.
|
||||
self.story.setMetadata('siteabbrev','gtcom')
|
||||
|
||||
# The date format will vary from site to site.
|
||||
# http://docs.python.org/library/datetime.html#strftime-strptime-behavior
|
||||
## There are no dates listed on this site, so am commenting this out
|
||||
#self.dateformat = "%Y-%b-%d"
|
||||
|
||||
####################################################################################################
|
||||
@staticmethod # must be @staticmethod, don't remove it.
|
||||
def getSiteDomain():
|
||||
# The site domain. Does have www here, if it uses it.
|
||||
return 'gravitytales.com'
|
||||
|
||||
####################################################################################################
|
||||
@classmethod
|
||||
def getSiteExampleURLs(cls):
|
||||
return "http://"+cls.getSiteDomain()+"/novel/a-story-name"
|
||||
|
||||
####################################################################################################
|
||||
def getSiteURLPattern(self):
|
||||
return r"http://"+re.escape(self.getSiteDomain())+r"/novel/*(?P<id>[^/]+)"
|
||||
|
||||
####################################################################################################
|
||||
## Getting the chapter list and the meta data, plus 'is adult' checking.
|
||||
def doExtractChapterUrlsAndMetadata(self, get_cover=True):
|
||||
|
||||
url = self.url
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
except HTTPError as e:
|
||||
if e.code == 404:
|
||||
raise exceptions.StoryDoesNotExist('Error 404: {0}'.format(self.url))
|
||||
else:
|
||||
raise e
|
||||
|
||||
# use BeautifulSoup HTML parser to make everything easier to find.
|
||||
soup = self.make_soup(data)
|
||||
|
||||
# Now go hunting for all the meta data and the chapter list.
|
||||
|
||||
## This is the block that holds the metadata
|
||||
bookdetails = soup.find('div', {'class':'main-content'})
|
||||
|
||||
## Title
|
||||
title = bookdetails.h3
|
||||
for tag in title.find_all('span'):
|
||||
tag.extract()
|
||||
self.story.setMetadata('title',stripHTML(title))
|
||||
|
||||
author = stripHTML(bookdetails.h4)
|
||||
self.story.setMetadata('author', author)
|
||||
self.story.setMetadata('authorId', author)
|
||||
self.story.setMetadata('authorUrl', url)
|
||||
|
||||
# Find authorid and URL from... author url.
|
||||
bookdesc = bookdetails.find('div', {'class':'desc'})
|
||||
addtosys = False
|
||||
paras = bookdesc.find_all()
|
||||
synopsis = ''
|
||||
for para in paras:
|
||||
parat = stripHTML(para)
|
||||
## I had a section of code that took the author name from the list, and added it to
|
||||
## the author name from the <h4>... and a section that took the title from the list,
|
||||
## and added it to the title from the <h3>...
|
||||
## but decided to remove them and let it be added to the synopsis.
|
||||
if parat[:7] == 'Genres:' and unicode(para)[:2] == '<p':
|
||||
genres = parat[8:].split(', ')
|
||||
for genre in genres:
|
||||
self.story.addToList('genre', genre)
|
||||
elif parat[:11] == 'Translator:' and unicode(para)[:2] == '<p':
|
||||
self.story.setMetadata('translator', parat.replace('Translator:', '').strip())
|
||||
elif parat[:7] == 'Status:' and unicode(para)[:2] == '<p':
|
||||
status = parat[8:].strip()
|
||||
self.story.setMetadata('status', status)
|
||||
elif unicode(para)[:2] == '<p' or unicode(para)[:2] == '<h' or unicode(para)[:2] == '<u':
|
||||
synopsis += ' ' + unicode(para)
|
||||
|
||||
if not self.getConfig('keep_summary_html'):
|
||||
synopsis = stripHTML(synopsis)
|
||||
|
||||
while '<br/> <br/>' in synopsis:
|
||||
synopsis = synopsis.replace('<br/> <br/>', '<br/>')
|
||||
|
||||
self.setDescription(url, unicode(synopsis))
|
||||
|
||||
## this is constantly being forbidden, so I'm commenting it out for now.
|
||||
# if get_cover:
|
||||
# cover_meta = soup.find('div', {'id':'coverImg'})
|
||||
# cover_url = cover_meta['style'].replace('background-image: url(', '').replace(');', '')
|
||||
# self.setCoverImage(url, cover_url)
|
||||
|
||||
## Getting the ChapterUrls
|
||||
## fetch from separate chapters url.
|
||||
chap_url = self.story.getMetadata('storyUrl')+"/chapters"
|
||||
chap_soup = self.make_soup(self._fetchUrl(chap_url))
|
||||
found_chaps = {}
|
||||
for alink in chap_soup.find_all('a',href=re.compile(self.getSiteDomain())): # ignore anchor links
|
||||
## Some stories have that same chapters in different sections
|
||||
if alink['href'] not in found_chaps:
|
||||
self.add_chapter(alink,alink['href'])
|
||||
found_chaps[alink['href']] = alink['href']
|
||||
|
||||
if feedparser:
|
||||
# Parse published and updated date from latest RSS feed entry. The RSS feed urls seems to appear due to
|
||||
# some JavaScript on the page, so get the URL by mangling the URL (this is not very robust, but probably
|
||||
# good enough)
|
||||
rss_feed_url = url.replace('/novel/', '/feed/')
|
||||
feed = feedparser.parse(rss_feed_url)
|
||||
date_updated = datetime.fromtimestamp(
|
||||
time.mktime(feed.entries[0].published_parsed)) if feed.entries else datetime.now()
|
||||
else:
|
||||
# Fall back to the previous method of generating the published and update date...
|
||||
date_updated = datetime.now()
|
||||
|
||||
# Since the original published date isn't available, we'll simply use the updated date
|
||||
self.story.setMetadata('datePublished', date_updated)
|
||||
self.story.setMetadata('dateUpdated', date_updated)
|
||||
|
||||
# grab the text for an individual chapter.
|
||||
def getChapterText(self, url):
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
data = self._fetchUrl(url)
|
||||
html = self.make_soup(data)
|
||||
|
||||
story = html.find('div', {'id':'chapterContent'})
|
||||
|
||||
if story == None:
|
||||
raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url)
|
||||
|
||||
return self.utf8FromSoup(url,story)
|
||||
|
|
@ -1,191 +0,0 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2011 Fanficdownloader team, 2018 FanFicFare team
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
from __future__ import absolute_import
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import re
|
||||
from ..htmlcleanup import stripHTML, removeAllEntities
|
||||
from .. import exceptions as exceptions
|
||||
|
||||
# py2 vs py3 transition
|
||||
from ..six import text_type as unicode
|
||||
from ..six.moves.urllib.error import HTTPError
|
||||
|
||||
from .base_adapter import BaseSiteAdapter, makeDate
|
||||
|
||||
class HarryPotterFanFictionComSiteAdapter(BaseSiteAdapter):
|
||||
|
||||
def __init__(self, config, url):
|
||||
BaseSiteAdapter.__init__(self, config, url)
|
||||
self.story.setMetadata('siteabbrev','hp')
|
||||
self.is_adult=False
|
||||
|
||||
# get storyId from url--url validation guarantees query is only psid=1234
|
||||
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
|
||||
|
||||
# The date format will vary from site to site.
|
||||
# http://docs.python.org/library/datetime.html#strftime-strptime-behavior
|
||||
self.dateformat = "%Y-%m-%d %H:%M%p"
|
||||
|
||||
# normalized story URL.
|
||||
self._setURL('https://' + self.getSiteDomain() + '/viewstory.php?psid='+self.story.getMetadata('storyId'))
|
||||
|
||||
|
||||
@staticmethod
|
||||
def getSiteDomain():
|
||||
return 'harrypotterfanfiction.com'
|
||||
|
||||
@classmethod
|
||||
def getSiteExampleURLs(cls):
|
||||
return "https://harrypotterfanfiction.com/viewstory.php?psid=1234"
|
||||
|
||||
def getSiteURLPattern(self):
|
||||
return r"https?"+re.escape("://")+r"(www\.)?"+re.escape("harrypotterfanfiction.com/viewstory.php?psid=")+r"\d+$"
|
||||
|
||||
def use_pagecache(self):
|
||||
'''
|
||||
adapters that will work with the page cache need to implement
|
||||
this and change it to True.
|
||||
'''
|
||||
return True
|
||||
|
||||
def extractChapterUrlsAndMetadata(self):
|
||||
|
||||
url = self.url
|
||||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
except HTTPError as e:
|
||||
if e.code == 404:
|
||||
raise exceptions.StoryDoesNotExist(self.url)
|
||||
else:
|
||||
raise e
|
||||
|
||||
## Don't know if these still apply
|
||||
# if "Access denied. This story has not been validated by the adminstrators of this site." in data:
|
||||
# raise exceptions.AccessDenied(self.getSiteDomain() +" says: Access denied. This story has not been validated by the adminstrators of this site.")
|
||||
# elif "ERROR locating story meta for psid" in data:
|
||||
# raise exceptions.StoryDoesNotExist(self.url)
|
||||
|
||||
# use BeautifulSoup HTML parser to make everything easier to find.
|
||||
soup = self.make_soup(data)
|
||||
|
||||
## Title
|
||||
h2 = soup.find('h2')
|
||||
h2.find('i').extract() # remove author
|
||||
self.story.setMetadata('title',stripHTML(h2))
|
||||
## Don't know if these still apply
|
||||
## javascript:if (confirm('Please note. This story may contain adult themes. By clicking here you are stating that you are over 17. Click cancel if you do not meet this requirement.')) location = '?psid=290995'
|
||||
# if "This story may contain adult themes." in a['href'] and not (self.is_adult or self.getConfig("is_adult")):
|
||||
# raise exceptions.AdultCheckRequired(self.url)
|
||||
|
||||
|
||||
# Find authorid and URL from... author url.
|
||||
a = soup.find('a', href=re.compile(r"viewuser.php\?uid=\d+"))
|
||||
self.story.setMetadata('authorId',a['href'].split('=')[1])
|
||||
self.story.setMetadata('authorUrl','https://'+self.host+'/'+a['href'])
|
||||
self.story.setMetadata('author',a.string[3:]) # remove 'by '
|
||||
|
||||
## hpcom doesn't always give us total words--but it does give
|
||||
## us words/chapter. I'd rather add than fetch and parse
|
||||
## another page.
|
||||
chapter_words=0
|
||||
for tr in soup.find('table',{'class':'table-chapters'}).find('tbody').findAll('tr'):
|
||||
tdstr = tr.findAll('td')[2].string
|
||||
chapter = tr.find('a')
|
||||
chpt=re.sub(r'^.*?(\?chapterid=\d+).*?',r'\1',chapter['href'])
|
||||
added = self.add_chapter(chapter,'https://'+self.host+'/viewstory.php'+chpt)
|
||||
if added and tdstr and tdstr.isdigit():
|
||||
chapter_words+=int(tdstr)
|
||||
## used below if total words from site not found
|
||||
|
||||
# fetch author page to get story description.
|
||||
authorsoup = self.make_soup(self._fetchUrl(self.story.getMetadata('authorUrl')))
|
||||
|
||||
# assumes don't need to worry about story URLs in descs.
|
||||
storya = authorsoup.find('a', href=re.compile(r"^/viewstory.php\?psid="+self.story.getMetadata('storyId')))
|
||||
storydiv = storya.find_parent('div')
|
||||
|
||||
# desc is escaped html in attr on iframe.
|
||||
iframe = storydiv.find('iframe')
|
||||
iframesrc = removeAllEntities(iframe['srcdoc'])
|
||||
# logger.debug(iframesrc)
|
||||
descsoup=self.make_soup(iframesrc)
|
||||
desc = descsoup.body
|
||||
desc.name='div' # change body tag to div
|
||||
del desc['class'] # clear class='iframe'
|
||||
# logger.debug(desc.body)
|
||||
self.setDescription(url,desc)
|
||||
|
||||
# <div class='entry'>
|
||||
# <div class='entry__key'>Rating</div>
|
||||
# <div class='entry__value'>Mature</div>
|
||||
# </div>
|
||||
|
||||
meta_key_map = {
|
||||
'Rating':'rating',
|
||||
'Words':'numWords',
|
||||
'Characters':'characters',
|
||||
'Primary Relationship':'ships',
|
||||
'Secondary Relationship(s)':'ships',
|
||||
'Genre(s)':'genre',
|
||||
'Era':'era',
|
||||
'Advisory':'warnings',
|
||||
'Story Reviews':'reviews',
|
||||
# 'Status':'', # Status is treated special
|
||||
'First Published':'datePublished',
|
||||
'Last Updated':'dateUpdated',
|
||||
}
|
||||
for key in soup.find_all('div',{'class':'entry__key'}):
|
||||
value = stripHTML(key.find_next('div',{'class':'entry__value'}))
|
||||
key = stripHTML(key)
|
||||
meta = meta_key_map.get(key,None)
|
||||
if meta:
|
||||
if meta.startswith('date'):
|
||||
value = makeDate(value,self.dateformat)
|
||||
if meta in ('characters','genre','ships'):
|
||||
self.story.extendList(meta,value.split(','))
|
||||
else:
|
||||
self.story.setMetadata(meta,value)
|
||||
if key == 'Status':
|
||||
if value == 'WIP':
|
||||
value = 'In-Progress'
|
||||
elif value == 'COMPLETED':
|
||||
value = 'Completed'
|
||||
# 'Abandoned' and other possible values used as-is
|
||||
self.story.setMetadata('status',value)
|
||||
|
||||
# older stories don't present total words, use sum from chapters.
|
||||
if not self.story.getMetadata('numWords'):
|
||||
self.story.setMetadata('numWords',chapter_words)
|
||||
|
||||
def getChapterText(self, url):
|
||||
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
data = self._fetchUrl(url)
|
||||
soup = self.make_soup(data)
|
||||
div = soup.find('div', {'class' : 'storytext-container'})
|
||||
if None == div:
|
||||
raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url)
|
||||
|
||||
return self.utf8FromSoup(url,div)
|
||||
|
||||
def getClass():
|
||||
return HarryPotterFanFictionComSiteAdapter
|
||||
|
|
@ -23,8 +23,6 @@ from ..htmlcleanup import stripHTML
|
|||
from .. import exceptions as exceptions
|
||||
|
||||
# py2 vs py3 transition
|
||||
from ..six import text_type as unicode
|
||||
from ..six.moves.urllib.error import HTTPError
|
||||
|
||||
from .base_adapter import BaseSiteAdapter, makeDate
|
||||
|
||||
|
|
@ -59,13 +57,6 @@ class HentaiFoundryComSiteAdapter(BaseSiteAdapter):
|
|||
def getSiteURLPattern(self):
|
||||
return r"https?"+re.escape("://")+r"(www\.)?"+re.escape("hentai-foundry.com/stories/user/")+r"(?P<authorId>[^/]+)/(?P<storyId>\d+)/(?P<storyURLTitle>[^/]+)" # ignore any chapter
|
||||
|
||||
def use_pagecache(self):
|
||||
'''
|
||||
adapters that will work with the page cache need to implement
|
||||
this and change it to True.
|
||||
'''
|
||||
return True
|
||||
|
||||
def extractChapterUrlsAndMetadata(self):
|
||||
url = self.url
|
||||
logger.debug("URL: "+url)
|
||||
|
|
@ -76,13 +67,7 @@ class HentaiFoundryComSiteAdapter(BaseSiteAdapter):
|
|||
else:
|
||||
url = url+"?enterAgree=1"
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
except HTTPError as e:
|
||||
if e.code == 404:
|
||||
raise exceptions.StoryDoesNotExist(self.url)
|
||||
else:
|
||||
raise e
|
||||
data = self.get_request(url)
|
||||
|
||||
soup = self.make_soup(data)
|
||||
|
||||
|
|
@ -180,7 +165,7 @@ class HentaiFoundryComSiteAdapter(BaseSiteAdapter):
|
|||
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
data = self._fetchUrl(url)
|
||||
data = self.get_request(url)
|
||||
soup = self.make_soup(data)
|
||||
div = soup.select_one("section#viewChapter div.boxbody")
|
||||
if None == div:
|
||||
|
|
|
|||
|
|
@ -1,226 +0,0 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2012 Fanficdownloader team, 2018 FanFicFare team
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
# Software: eFiction
|
||||
from __future__ import absolute_import
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import re
|
||||
from ..htmlcleanup import stripHTML
|
||||
from .. import exceptions as exceptions
|
||||
|
||||
# py2 vs py3 transition
|
||||
from ..six import text_type as unicode
|
||||
from ..six.moves.urllib.error import HTTPError
|
||||
|
||||
from .base_adapter import BaseSiteAdapter, makeDate
|
||||
|
||||
def getClass():
|
||||
return HLFictionNetAdapter
|
||||
|
||||
# Class name has to be unique. Our convention is camel case the
|
||||
# sitename with Adapter at the end. www is skipped.
|
||||
class HLFictionNetAdapter(BaseSiteAdapter):
|
||||
|
||||
def __init__(self, config, url):
|
||||
BaseSiteAdapter.__init__(self, config, url)
|
||||
|
||||
self.username = "NoneGiven" # if left empty, site doesn't return any message at all.
|
||||
self.password = ""
|
||||
self.is_adult=False
|
||||
|
||||
# get storyId from url--url validation guarantees query is only sid=1234
|
||||
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
|
||||
|
||||
|
||||
# normalized story URL.
|
||||
self._setURL('https://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId'))
|
||||
|
||||
# Each adapter needs to have a unique site abbreviation.
|
||||
self.story.setMetadata('siteabbrev','hlf')
|
||||
|
||||
# The date format will vary from site to site.
|
||||
# http://docs.python.org/library/datetime.html#strftime-strptime-behavior
|
||||
self.dateformat = "%m/%d/%y"
|
||||
|
||||
@staticmethod # must be @staticmethod, don't remove it.
|
||||
def getSiteDomain():
|
||||
# The site domain. Does have www here, if it uses it.
|
||||
return 'hlfiction.net'
|
||||
|
||||
@classmethod
|
||||
def getSiteExampleURLs(cls):
|
||||
return "https://"+cls.getSiteDomain()+"/viewstory.php?sid=1234"
|
||||
|
||||
def getSiteURLPattern(self):
|
||||
return r"https?://"+re.escape(self.getSiteDomain()+"/viewstory.php?sid=")+r"\d+$"
|
||||
|
||||
## Getting the chapter list and the meta data, plus 'is adult' checking.
|
||||
def extractChapterUrlsAndMetadata(self):
|
||||
|
||||
# index=1 makes sure we see the story chapter index. Some
|
||||
# sites skip that for one-chapter stories.
|
||||
url = self.url
|
||||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
except HTTPError as e:
|
||||
if e.code == 404:
|
||||
raise exceptions.StoryDoesNotExist(self.url)
|
||||
else:
|
||||
raise e
|
||||
|
||||
if "Access denied. This story has not been validated by the adminstrators of this site." in data:
|
||||
raise exceptions.AccessDenied(self.getSiteDomain() +" says: Access denied. This story has not been validated by the adminstrators of this site.")
|
||||
|
||||
# use BeautifulSoup HTML parser to make everything easier to find.
|
||||
soup = self.make_soup(data)
|
||||
# print data
|
||||
|
||||
# Now go hunting for all the meta data and the chapter list.
|
||||
|
||||
## Title and author
|
||||
a = soup.find('div', {'id' : 'pagetitle'})
|
||||
|
||||
aut = a.find('a', href=re.compile(r"viewuser.php\?uid=\d+"))
|
||||
self.story.setMetadata('authorId',aut['href'].split('=')[1])
|
||||
self.story.setMetadata('authorUrl','https://'+self.host+'/'+aut['href'])
|
||||
self.story.setMetadata('author',aut.string)
|
||||
aut.extract()
|
||||
|
||||
self.story.setMetadata('title',stripHTML(a)[:(len(a.string)-3)])
|
||||
|
||||
# Find the chapters:
|
||||
chapters=soup.find('select')
|
||||
if chapters != None:
|
||||
for chapter in chapters.findAll('option'):
|
||||
# just in case there's tags, like <i> in chapter titles.
|
||||
self.add_chapter(chapter,'https://'+self.host+'/viewstory.php?sid='+self.story.getMetadata('storyId')+'&chapter='+chapter['value'])
|
||||
else:
|
||||
self.add_chapter(self.story.getMetadata('title'),url)
|
||||
|
||||
|
||||
asoup = self.make_soup(self._fetchUrl(self.story.getMetadata('authorUrl')))
|
||||
|
||||
for list in asoup.findAll('div', {'class' : re.compile('listbox')}):
|
||||
a = list.find('a')
|
||||
if ('viewstory.php?sid='+self.story.getMetadata('storyId')) in a['href']:
|
||||
break
|
||||
|
||||
# eFiction sites don't help us out a lot with their meta data
|
||||
# formating, so it's a little ugly.
|
||||
|
||||
# utility method
|
||||
def defaultGetattr(d,k):
|
||||
try:
|
||||
return d[k]
|
||||
except:
|
||||
return ""
|
||||
|
||||
# <span class="label">Rated:</span> NC-17<br /> etc
|
||||
labels = list.findAll('span', {'class' : 'classification'})
|
||||
for labelspan in labels:
|
||||
label = labelspan.string
|
||||
value = labelspan.nextSibling
|
||||
|
||||
if 'Summary' in label:
|
||||
## Everything until the next span class='label'
|
||||
svalue = ""
|
||||
while 'classification' not in defaultGetattr(value,'class'):
|
||||
svalue += unicode(value)
|
||||
value = value.nextSibling
|
||||
self.setDescription(url,svalue)
|
||||
#self.story.setMetadata('description',stripHTML(svalue))
|
||||
|
||||
if 'Rated' in label:
|
||||
self.story.setMetadata('rating', value[:len(value)-2])
|
||||
|
||||
if 'Word count' in label:
|
||||
self.story.setMetadata('numWords', value)
|
||||
|
||||
if 'Categories' in label:
|
||||
cats = labelspan.parent.findAll('a',href=re.compile(r'categories.php\?catid=\d+'))
|
||||
for cat in cats:
|
||||
self.story.addToList('category',cat.string)
|
||||
|
||||
if 'Characters' in label:
|
||||
for char in value.string.split(', '):
|
||||
if not 'None' in char:
|
||||
self.story.addToList('characters',char)
|
||||
|
||||
if 'Genre' in label:
|
||||
for genre in value.string.split(', '):
|
||||
if not 'None' in genre:
|
||||
self.story.addToList('genre',genre)
|
||||
|
||||
if 'Warnings' in label:
|
||||
for warning in value.string.split(', '):
|
||||
if not 'None' in warning:
|
||||
self.story.addToList('warnings',warning)
|
||||
|
||||
if 'Completed' in label:
|
||||
if 'Yes' in value:
|
||||
self.story.setMetadata('status', 'Completed')
|
||||
else:
|
||||
self.story.setMetadata('status', 'In-Progress')
|
||||
|
||||
if 'Published' in label:
|
||||
self.story.setMetadata('datePublished', makeDate(stripHTML(value), self.dateformat))
|
||||
|
||||
if 'Updated' in label:
|
||||
# there's a stray [ at the end.
|
||||
#value = value[0:-1]
|
||||
self.story.setMetadata('dateUpdated', makeDate(stripHTML(value), self.dateformat))
|
||||
|
||||
try:
|
||||
# Find Series name from series URL.
|
||||
a = list.find('a', href=re.compile(r"series.php\?seriesid=\d+"))
|
||||
series_name = a.string
|
||||
series_url = 'https://'+self.host+'/'+a['href']
|
||||
|
||||
# use BeautifulSoup HTML parser to make everything easier to find.
|
||||
seriessoup = self.make_soup(self._fetchUrl(series_url))
|
||||
# can't use ^viewstory...$ in case of higher rated stories with javascript href.
|
||||
storyas = seriessoup.findAll('a', href=re.compile(r'viewstory.php\?sid=\d+'))
|
||||
i=1
|
||||
for a in storyas:
|
||||
# skip 'report this' and 'TOC' links
|
||||
if 'contact.php' not in a['href'] and 'index' not in a['href']:
|
||||
if ('viewstory.php?sid='+self.story.getMetadata('storyId')) in a['href']:
|
||||
self.setSeries(series_name, i)
|
||||
self.story.setMetadata('seriesUrl',series_url)
|
||||
break
|
||||
i+=1
|
||||
|
||||
except:
|
||||
# I find it hard to care if the series parsing fails
|
||||
pass
|
||||
|
||||
# grab the text for an individual chapter.
|
||||
def getChapterText(self, url):
|
||||
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
soup = self.make_soup(self._fetchUrl(url))
|
||||
|
||||
div = soup.find('div', {'id' : 'story'})
|
||||
|
||||
if None == div:
|
||||
raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url)
|
||||
|
||||
return self.utf8FromSoup(url,div)
|
||||
|
|
@ -1,222 +0,0 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2012 Fanficdownloader team, 2018 FanFicFare team
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
# Software: eFiction
|
||||
from __future__ import absolute_import
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import re
|
||||
from bs4.element import Comment
|
||||
from ..htmlcleanup import stripHTML
|
||||
from .. import exceptions as exceptions
|
||||
|
||||
# py2 vs py3 transition
|
||||
from ..six import text_type as unicode
|
||||
from ..six.moves.urllib.error import HTTPError
|
||||
|
||||
from .base_adapter import BaseSiteAdapter, makeDate
|
||||
|
||||
def getClass():
|
||||
return HPFanficArchiveComAdapter
|
||||
|
||||
# Class name has to be unique. Our convention is camel case the
|
||||
# sitename with Adapter at the end. www is skipped.
|
||||
class HPFanficArchiveComAdapter(BaseSiteAdapter):
|
||||
|
||||
def __init__(self, config, url):
|
||||
BaseSiteAdapter.__init__(self, config, url)
|
||||
|
||||
self.username = "NoneGiven" # if left empty, site doesn't return any message at all.
|
||||
self.password = ""
|
||||
self.is_adult=False
|
||||
|
||||
# get storyId from url--url validation guarantees query is only sid=1234
|
||||
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
|
||||
|
||||
|
||||
# normalized story URL.
|
||||
self._setURL('https://' + self.getSiteDomain() + '/stories/viewstory.php?sid='+self.story.getMetadata('storyId'))
|
||||
|
||||
# Each adapter needs to have a unique site abbreviation.
|
||||
self.story.setMetadata('siteabbrev','hpffa')
|
||||
|
||||
# The date format will vary from site to site.
|
||||
# http://docs.python.org/library/datetime.html#strftime-strptime-behavior
|
||||
self.dateformat = "%B %d, %Y"
|
||||
|
||||
@staticmethod # must be @staticmethod, don't remove it.
|
||||
def getSiteDomain():
|
||||
# The site domain. Does have www here, if it uses it.
|
||||
return 'hpfanficarchive.com'
|
||||
|
||||
@classmethod
|
||||
def getSiteExampleURLs(cls):
|
||||
return "https://"+cls.getSiteDomain()+"/stories/viewstory.php?sid=1234"
|
||||
|
||||
def getSiteURLPattern(self):
|
||||
return r"https?:"+re.escape("//"+self.getSiteDomain()+"/stories/viewstory.php?sid=")+r"\d+$"
|
||||
|
||||
## Getting the chapter list and the meta data, plus 'is adult' checking.
|
||||
def extractChapterUrlsAndMetadata(self):
|
||||
|
||||
# index=1 makes sure we see the story chapter index. Some
|
||||
# sites skip that for one-chapter stories.
|
||||
url = self.url
|
||||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
except HTTPError as e:
|
||||
if e.code == 404:
|
||||
raise exceptions.StoryDoesNotExist(self.url)
|
||||
else:
|
||||
raise e
|
||||
|
||||
|
||||
if "Access denied. This story has not been validated by the adminstrators of this site." in data:
|
||||
raise exceptions.AccessDenied(self.getSiteDomain() +" says: Access denied. This story has not been validated by the adminstrators of this site.")
|
||||
elif "That story either does not exist on this archive or has not been validated by the adminstrators of this site." in data:
|
||||
raise exceptions.AccessDenied(self.getSiteDomain() +" says: That story either does not exist on this archive or has not been validated by the adminstrators of this site.")
|
||||
|
||||
# use BeautifulSoup HTML parser to make everything easier to find.
|
||||
soup = self.make_soup(data)
|
||||
# print data
|
||||
|
||||
# Now go hunting for all the meta data and the chapter list.
|
||||
|
||||
## Title
|
||||
a = soup.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$"))
|
||||
self.story.setMetadata('title',stripHTML(a))
|
||||
|
||||
# Find authorid and URL from... author url.
|
||||
a = soup.find('div', id="mainpage").find('a', href=re.compile(r"viewuser.php\?uid=\d+"))
|
||||
self.story.setMetadata('authorId',a['href'].split('=')[1])
|
||||
self.story.setMetadata('authorUrl','https://'+self.host+'/stories/'+a['href'])
|
||||
self.story.setMetadata('author',a.string)
|
||||
|
||||
# Find the chapters:
|
||||
for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"&chapter=\d+$")):
|
||||
# just in case there's tags, like <i> in chapter titles.
|
||||
self.add_chapter(chapter,'https://'+self.host+'/stories/'+chapter['href'])
|
||||
|
||||
|
||||
# eFiction sites don't help us out a lot with their meta data
|
||||
# formating, so it's a little ugly.
|
||||
|
||||
# utility method
|
||||
def defaultGetattr(d,k):
|
||||
try:
|
||||
return d[k]
|
||||
except:
|
||||
return ""
|
||||
|
||||
# <span class="label">Rated:</span> NC-17<br /> etc
|
||||
labels = soup.findAll('span',{'class':'label'})
|
||||
for labelspan in labels:
|
||||
val = labelspan.nextSibling
|
||||
value = unicode('')
|
||||
while val and not 'label' in defaultGetattr(val,'class'):
|
||||
# print("val:%s"%val)
|
||||
if not isinstance(val,Comment):
|
||||
value += unicode(val)
|
||||
val = val.nextSibling
|
||||
label = labelspan.string
|
||||
# print("label:%s\nvalue:%s"%(label,value))
|
||||
|
||||
if 'Summary' in label:
|
||||
self.setDescription(url,value)
|
||||
|
||||
if 'Rated' in label:
|
||||
self.story.setMetadata('rating', stripHTML(value))
|
||||
|
||||
if 'Word count' in label:
|
||||
self.story.setMetadata('numWords', stripHTML(value))
|
||||
|
||||
if 'Categories' in label:
|
||||
cats = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=categories'))
|
||||
for cat in cats:
|
||||
self.story.addToList('category',cat.string)
|
||||
|
||||
if 'Characters' in label:
|
||||
chars = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=characters'))
|
||||
for char in chars:
|
||||
self.story.addToList('characters',char.string)
|
||||
|
||||
if 'Genre' in label:
|
||||
genres = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=1')) # XXX
|
||||
for genre in genres:
|
||||
self.story.addToList('genre',genre.string)
|
||||
|
||||
if 'Pairing' in label:
|
||||
ships = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=4'))
|
||||
for ship in ships:
|
||||
self.story.addToList('ships',ship.string)
|
||||
|
||||
if 'Warnings' in label:
|
||||
warnings = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=2')) # XXX
|
||||
for warning in warnings:
|
||||
self.story.addToList('warnings',warning.string)
|
||||
|
||||
if 'Completed' in label:
|
||||
if 'Yes' in stripHTML(value):
|
||||
self.story.setMetadata('status', 'Completed')
|
||||
else:
|
||||
self.story.setMetadata('status', 'In-Progress')
|
||||
|
||||
if 'Published' in label:
|
||||
self.story.setMetadata('datePublished', makeDate(stripHTML(value), self.dateformat))
|
||||
|
||||
if 'Updated' in label:
|
||||
self.story.setMetadata('dateUpdated', makeDate(stripHTML(value), self.dateformat))
|
||||
|
||||
try:
|
||||
# Find Series name from series URL.
|
||||
a = soup.find('a', href=re.compile(r"viewseries.php\?seriesid=\d+"))
|
||||
series_name = a.string
|
||||
series_url = 'https://'+self.host+'/stories/'+a['href']
|
||||
|
||||
# use BeautifulSoup HTML parser to make everything easier to find.
|
||||
seriessoup = self.make_soup(self._fetchUrl(series_url))
|
||||
# can't use ^viewstory...$ in case of higher rated stories with javascript href.
|
||||
storyas = seriessoup.findAll('a', href=re.compile(r'viewstory.php\?sid=\d+'))
|
||||
i=1
|
||||
for a in storyas:
|
||||
# skip 'report this' and 'TOC' links
|
||||
if 'contact.php' not in a['href'] and 'index' not in a['href']:
|
||||
if a['href'] == ('viewstory.php?sid='+self.story.getMetadata('storyId')):
|
||||
self.setSeries(series_name, i)
|
||||
self.story.setMetadata('seriesUrl',series_url)
|
||||
break
|
||||
i+=1
|
||||
|
||||
except:
|
||||
# I find it hard to care if the series parsing fails
|
||||
pass
|
||||
|
||||
# grab the text for an individual chapter.
|
||||
def getChapterText(self, url):
|
||||
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
soup = self.make_soup(self._fetchUrl(url))
|
||||
|
||||
div = soup.find('div', {'id' : 'story'})
|
||||
|
||||
if None == div:
|
||||
raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url)
|
||||
|
||||
return self.utf8FromSoup(url,div)
|
||||
|
|
@ -1,277 +0,0 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2011 Fanficdownloader team, 2018 FanFicFare team
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
# Software: eFiction
|
||||
from __future__ import absolute_import
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import re
|
||||
from ..htmlcleanup import stripHTML
|
||||
from .. import exceptions as exceptions
|
||||
|
||||
# py2 vs py3 transition
|
||||
from ..six import text_type as unicode
|
||||
from ..six.moves.urllib.error import HTTPError
|
||||
|
||||
from .base_adapter import BaseSiteAdapter, makeDate
|
||||
|
||||
def getClass():
|
||||
return IkEternalNetAdapter
|
||||
|
||||
# Class name has to be unique. Our convention is camel case the
|
||||
# sitename with Adapter at the end. www is skipped.
|
||||
class IkEternalNetAdapter(BaseSiteAdapter):
|
||||
|
||||
def __init__(self, config, url):
|
||||
BaseSiteAdapter.__init__(self, config, url)
|
||||
|
||||
self.username = "NoneGiven" # if left empty, site doesn't return any message at all.
|
||||
self.password = ""
|
||||
self.is_adult=False
|
||||
|
||||
# get storyId from url--url validation guarantees query is only sid=1234
|
||||
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
|
||||
|
||||
|
||||
# normalized story URL.
|
||||
self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId'))
|
||||
|
||||
# Each adapter needs to have a unique site abbreviation.
|
||||
self.story.setMetadata('siteabbrev','ike')
|
||||
|
||||
# The date format will vary from site to site.
|
||||
# http://docs.python.org/library/datetime.html#strftime-strptime-behavior
|
||||
self.dateformat = "%B %d, %Y"
|
||||
|
||||
@staticmethod # must be @staticmethod, don't remove it.
|
||||
def getSiteDomain():
|
||||
# The site domain. Does have www here, if it uses it.
|
||||
return 'www.ik-eternal.net'
|
||||
|
||||
@classmethod
|
||||
def getSiteExampleURLs(cls):
|
||||
return "http://"+cls.getSiteDomain()+"/viewstory.php?sid=1234"
|
||||
|
||||
def getSiteURLPattern(self):
|
||||
return re.escape("http://"+self.getSiteDomain()+"/viewstory.php?sid=")+r"\d+$"
|
||||
|
||||
## Login seems to be reasonably standard across eFiction sites.
|
||||
def needToLoginCheck(self, data):
|
||||
if 'Registered Users Only' in data \
|
||||
or 'There is no such account on our website' in data \
|
||||
or "That password doesn't match the one in our database" in data:
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
|
||||
def performLogin(self, url):
|
||||
params = {}
|
||||
|
||||
if self.password:
|
||||
params['penname'] = self.username
|
||||
params['password'] = self.password
|
||||
else:
|
||||
params['penname'] = self.getConfig("username")
|
||||
params['password'] = self.getConfig("password")
|
||||
params['cookiecheck'] = '1'
|
||||
params['submit'] = 'Submit'
|
||||
|
||||
loginUrl = 'http://' + self.getSiteDomain() + '/user.php?action=login'
|
||||
logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
|
||||
params['penname']))
|
||||
|
||||
d = self._fetchUrl(loginUrl, params)
|
||||
|
||||
if "Member Account" not in d : #Member Account
|
||||
logger.info("Failed to login to URL %s as %s" % (loginUrl,
|
||||
params['penname']))
|
||||
raise exceptions.FailedToLogin(url,params['penname'])
|
||||
return False
|
||||
else:
|
||||
return True
|
||||
|
||||
## Getting the chapter list and the meta data, plus 'is adult' checking.
|
||||
def extractChapterUrlsAndMetadata(self):
|
||||
|
||||
if self.is_adult or self.getConfig("is_adult"):
|
||||
# Weirdly, different sites use different warning numbers.
|
||||
# If the title search below fails, there's a good chance
|
||||
# you need a different number. print data at that point
|
||||
# and see what the 'click here to continue' url says.
|
||||
addurl = "&warning=1"
|
||||
else:
|
||||
addurl=""
|
||||
|
||||
# index=1 makes sure we see the story chapter index. Some
|
||||
# sites skip that for one-chapter stories.
|
||||
url = self.url+'&index=1'+addurl
|
||||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
except HTTPError as e:
|
||||
if e.code == 404:
|
||||
raise exceptions.StoryDoesNotExist(self.url)
|
||||
else:
|
||||
raise e
|
||||
|
||||
if self.needToLoginCheck(data):
|
||||
# need to log in for this one.
|
||||
self.performLogin(url)
|
||||
data = self._fetchUrl(url)
|
||||
|
||||
# The actual text that is used to announce you need to be an
|
||||
# adult varies from site to site. Again, print data before
|
||||
# the title search to troubleshoot.
|
||||
|
||||
# Since the warning text can change by warning level, let's
|
||||
# look for the warning pass url. ksarchive uses
|
||||
# &warning= -- actually, so do other sites. Must be an
|
||||
# eFiction book.
|
||||
|
||||
# viewstory.php?sid=1882&warning=4
|
||||
# viewstory.php?sid=1654&ageconsent=ok&warning=5
|
||||
#print data
|
||||
#m = re.search(r"'viewstory.php\?sid=1882(&warning=4)'",data)
|
||||
m = re.search(r"'viewstory.php\?sid=\d+((?:&ageconsent=ok)?&warning=\d+)'",data)
|
||||
if m != None:
|
||||
if self.is_adult or self.getConfig("is_adult"):
|
||||
# We tried the default and still got a warning, so
|
||||
# let's pull the warning number from the 'continue'
|
||||
# link and reload data.
|
||||
addurl = m.group(1)
|
||||
# correct stupid & error in url.
|
||||
addurl = addurl.replace("&","&")
|
||||
url = self.url+'&index=1'+addurl
|
||||
logger.debug("URL 2nd try: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
except HTTPError as e:
|
||||
if e.code == 404:
|
||||
raise exceptions.StoryDoesNotExist(self.url)
|
||||
else:
|
||||
raise e
|
||||
else:
|
||||
raise exceptions.AdultCheckRequired(self.url)
|
||||
|
||||
if "Access denied. This story has not been validated by the adminstrators of this site." in data:
|
||||
raise exceptions.AccessDenied(self.getSiteDomain() +" says: Access denied. This story has not been validated by the adminstrators of this site.")
|
||||
|
||||
# use BeautifulSoup HTML parser to make everything easier to find.
|
||||
soup = self.make_soup(data)
|
||||
# print data
|
||||
|
||||
# Now go hunting for all the meta data and the chapter list.
|
||||
|
||||
## Title
|
||||
a = soup.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$"))
|
||||
self.story.setMetadata('title',stripHTML(a))
|
||||
|
||||
# Find authorid and URL from... author url.
|
||||
a = soup.find('a', href=re.compile(r"viewuser.php\?uid=\d+"))
|
||||
self.story.setMetadata('authorId',a['href'].split('=')[1])
|
||||
self.story.setMetadata('authorUrl','http://'+self.host+'/'+a['href'])
|
||||
self.story.setMetadata('author',a.string)
|
||||
|
||||
# Find the chapters:
|
||||
for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"&chapter=\d+$")):
|
||||
# just in case there's tags, like <i> in chapter titles.
|
||||
self.add_chapter(chapter,'http://'+self.host+'/'+chapter['href']+addurl)
|
||||
|
||||
|
||||
# eFiction sites don't help us out a lot with their meta data
|
||||
# formating, so it's a little ugly.
|
||||
|
||||
# utility method
|
||||
def defaultGetattr(d,k):
|
||||
try:
|
||||
return d[k]
|
||||
except:
|
||||
return ""
|
||||
|
||||
# <span class="label">Rated:</span> NC-17<br /> etc
|
||||
asoup = soup.find('div', {'class': 'listbox'})
|
||||
for a in asoup.findAll('p'):
|
||||
a.name='br'
|
||||
labels = asoup.findAll('span',{'class':'label'})
|
||||
for labelspan in labels:
|
||||
value = labelspan.nextSibling
|
||||
label = labelspan.string
|
||||
|
||||
if 'Summary' in label:
|
||||
## Everything until the next span class='label'
|
||||
svalue = ""
|
||||
while 'label' not in defaultGetattr(value,'class'):
|
||||
svalue += unicode(value)
|
||||
value = value.nextSibling
|
||||
self.setDescription(url,svalue)
|
||||
#self.story.setMetadata('description',stripHTML(svalue))
|
||||
|
||||
if 'Rated' in label:
|
||||
self.story.setMetadata('rating', value)
|
||||
|
||||
if 'Word count' in label:
|
||||
self.story.setMetadata('numWords', value)
|
||||
|
||||
if 'Categories' in label:
|
||||
cats = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=categories'))
|
||||
for cat in cats:
|
||||
self.story.addToList('category',cat.string)
|
||||
|
||||
if 'Characters' in label:
|
||||
chars = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=characters'))
|
||||
for char in chars:
|
||||
self.story.addToList('characters',char.string)
|
||||
|
||||
if 'Genre' in label:
|
||||
genres = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=1'))
|
||||
for genre in genres:
|
||||
self.story.addToList('genre',genre.string)
|
||||
|
||||
if 'Warnings' in label:
|
||||
warnings = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=2'))
|
||||
for warning in warnings:
|
||||
self.story.addToList('warnings',warning.string)
|
||||
|
||||
if 'Completed' in label:
|
||||
if 'Yes' in value:
|
||||
self.story.setMetadata('status', 'Completed')
|
||||
else:
|
||||
self.story.setMetadata('status', 'In-Progress')
|
||||
|
||||
if 'Published' in label:
|
||||
self.story.setMetadata('datePublished', makeDate(stripHTML(value), self.dateformat))
|
||||
|
||||
if 'Updated' in label:
|
||||
# there's a stray [ at the end.
|
||||
#value = value[0:-1]
|
||||
self.story.setMetadata('dateUpdated', makeDate(stripHTML(value), self.dateformat))
|
||||
|
||||
# grab the text for an individual chapter.
|
||||
def getChapterText(self, url):
|
||||
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
soup = self.make_soup(self._fetchUrl(url))
|
||||
|
||||
div = soup.find('div', {'id' : 'story'})
|
||||
|
||||
if None == div:
|
||||
raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url)
|
||||
|
||||
return self.utf8FromSoup(url,div)
|
||||
|
|
@ -25,7 +25,6 @@ from .. import exceptions as exceptions
|
|||
|
||||
# py2 vs py3 transition
|
||||
from ..six import text_type as unicode
|
||||
from ..six.moves.urllib.error import HTTPError
|
||||
|
||||
from .base_adapter import BaseSiteAdapter, makeDate
|
||||
|
||||
|
|
@ -94,7 +93,7 @@ class ImagineEFicComAdapter(BaseSiteAdapter):
|
|||
logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
|
||||
params['penname']))
|
||||
|
||||
d = self._fetchUrl(loginUrl, params)
|
||||
d = self.post_request(loginUrl, params)
|
||||
|
||||
if "Member Account" not in d : #Member Account
|
||||
logger.info("Failed to login to URL %s as %s" % (loginUrl,
|
||||
|
|
@ -121,18 +120,12 @@ class ImagineEFicComAdapter(BaseSiteAdapter):
|
|||
url = self.url+'&index=1'+addurl
|
||||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
except HTTPError as e:
|
||||
if e.code == 404:
|
||||
raise exceptions.StoryDoesNotExist(self.url)
|
||||
else:
|
||||
raise e
|
||||
data = self.get_request(url)
|
||||
|
||||
if self.needToLoginCheck(data):
|
||||
# need to log in for this one.
|
||||
self.performLogin(url)
|
||||
data = self._fetchUrl(url)
|
||||
data = self.get_request(url)
|
||||
|
||||
m = re.search(r"'viewstory.php\?sid=\d+((?:&ageconsent=ok)?&warning=\d+)'",data)
|
||||
if m != None:
|
||||
|
|
@ -146,24 +139,16 @@ class ImagineEFicComAdapter(BaseSiteAdapter):
|
|||
url = self.url+'&index=1'+addurl
|
||||
logger.debug("URL 2nd try: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
except HTTPError as e:
|
||||
if e.code == 404:
|
||||
raise exceptions.StoryDoesNotExist(self.url)
|
||||
else:
|
||||
raise e
|
||||
data = self.get_request(url)
|
||||
else:
|
||||
raise exceptions.AdultCheckRequired(self.url)
|
||||
|
||||
if "Access denied. This story has not been validated by the adminstrators of this site." in data:
|
||||
raise exceptions.AccessDenied(self.getSiteDomain() +" says: Access denied. This story has not been validated by the adminstrators of this site.")
|
||||
|
||||
# use BeautifulSoup HTML parser to make everything easier to find.
|
||||
soup = self.make_soup(data)
|
||||
# print data
|
||||
|
||||
# Now go hunting for all the meta data and the chapter list.
|
||||
|
||||
## Title
|
||||
a = soup.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$"))
|
||||
|
|
@ -176,7 +161,7 @@ class ImagineEFicComAdapter(BaseSiteAdapter):
|
|||
self.story.setMetadata('author',a.string)
|
||||
|
||||
# Find the chapters:
|
||||
for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"&chapter=\d+$")):
|
||||
for chapter in soup.find_all('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+r"&chapter=\d+$")):
|
||||
# just in case there's tags, like <i> in chapter titles.
|
||||
self.add_chapter(chapter,'https://'+self.host+'/'+chapter['href']+addurl)
|
||||
|
||||
|
|
@ -193,7 +178,7 @@ class ImagineEFicComAdapter(BaseSiteAdapter):
|
|||
|
||||
|
||||
# <span class="label">Rated:</span> NC-17<br /> etc
|
||||
labels = soup.findAll('span',{'class':'label'})
|
||||
labels = soup.find_all('span',{'class':'label'})
|
||||
for labelspan in labels:
|
||||
value = labelspan.nextSibling
|
||||
label = labelspan.string
|
||||
|
|
@ -214,22 +199,22 @@ class ImagineEFicComAdapter(BaseSiteAdapter):
|
|||
self.story.setMetadata('numWords', value)
|
||||
|
||||
if 'Categories' in label:
|
||||
cats = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=categories'))
|
||||
cats = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=categories'))
|
||||
for cat in cats:
|
||||
self.story.addToList('category',cat.string)
|
||||
|
||||
if 'Characters' in label:
|
||||
chars = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=characters'))
|
||||
chars = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=characters'))
|
||||
for char in chars:
|
||||
self.story.addToList('characters',char.string)
|
||||
|
||||
if 'Genre' in label:
|
||||
genres = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=1'))
|
||||
genres = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=class&type_id=1'))
|
||||
for genre in genres:
|
||||
self.story.addToList('genre',genre.string)
|
||||
|
||||
if 'Warnings' in label:
|
||||
warnings = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=2'))
|
||||
warnings = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=class&type_id=2'))
|
||||
for warning in warnings:
|
||||
self.story.addToList('warnings',warning.string)
|
||||
|
||||
|
|
@ -251,10 +236,9 @@ class ImagineEFicComAdapter(BaseSiteAdapter):
|
|||
series_name = a.string
|
||||
series_url = 'https://'+self.host+'/'+a['href']
|
||||
|
||||
# use BeautifulSoup HTML parser to make everything easier to find.
|
||||
seriessoup = self.make_soup(self._fetchUrl(series_url))
|
||||
seriessoup = self.make_soup(self.get_request(series_url))
|
||||
# can't use ^viewstory...$ in case of higher rated stories with javascript href.
|
||||
storyas = seriessoup.findAll('a', href=re.compile(r'viewstory.php\?sid=\d+'))
|
||||
storyas = seriessoup.find_all('a', href=re.compile(r'viewstory.php\?sid=\d+'))
|
||||
i=1
|
||||
for a in storyas:
|
||||
# skip 'report this' and 'TOC' links
|
||||
|
|
@ -274,7 +258,7 @@ class ImagineEFicComAdapter(BaseSiteAdapter):
|
|||
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
soup = self.make_soup(self._fetchUrl(url))
|
||||
soup = self.make_soup(self.get_request(url))
|
||||
|
||||
div = soup.find('div', {'id' : 'story'})
|
||||
|
||||
|
|
|
|||
Some files were not shown because too many files have changed in this diff Show more
Loading…
Reference in a new issue