mirror of
https://github.com/JimmXinu/FanFicFare.git
synced 2026-05-09 05:21:13 +02:00
Compare commits
3693 commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
a172a7bd2b | ||
|
|
ab103dce6e | ||
|
|
892e9207f0 | ||
|
|
b4e392fae1 | ||
|
|
d9525d9726 | ||
|
|
cb77b12754 | ||
|
|
b41a633821 | ||
|
|
50c8db2992 | ||
|
|
ef6dd99bfe | ||
|
|
59796ff537 | ||
|
|
8ee0a6e898 | ||
|
|
c53fc362bd | ||
|
|
c87cfc1057 | ||
|
|
6ee151c90a | ||
|
|
db01c828a0 | ||
|
|
4d03874f06 | ||
|
|
36f56483e6 | ||
|
|
18e45a403b | ||
|
|
2e25172ba3 | ||
|
|
65e3fd562b | ||
|
|
7089bf6689 | ||
|
|
061dc1333f | ||
|
|
0a7fb5c090 | ||
|
|
cf02f729ae | ||
|
|
730c4f77f9 | ||
|
|
c02da29cbd | ||
|
|
b87d796221 | ||
|
|
436370fe5b | ||
|
|
ac77f31bc2 | ||
|
|
16f2c74e4b | ||
|
|
af5c2aa0bc | ||
|
|
31dec5b62d | ||
|
|
97d37fcfc1 | ||
|
|
c730aa2f68 | ||
|
|
4e2e359dee | ||
|
|
bb96049934 | ||
|
|
84965ef25f | ||
|
|
348d129a1e | ||
|
|
4794e9bc51 | ||
|
|
d46dc76ae1 | ||
|
|
08bae8d9be | ||
|
|
405c37aeb5 | ||
|
|
270e01c3c7 | ||
|
|
12d57f5950 | ||
|
|
562b3a4ecd | ||
|
|
e69045fd98 | ||
|
|
747bde3394 | ||
|
|
aa00c7ae03 | ||
|
|
0539f818f3 | ||
|
|
41a6f56f44 | ||
|
|
e3832245e6 | ||
|
|
909b64c83c | ||
|
|
732f5e2571 | ||
|
|
d9dd04396e | ||
|
|
36e2183d45 | ||
|
|
040b7205b8 | ||
|
|
d8ed180eb1 | ||
|
|
2a6c1e74db | ||
|
|
b7c8c96153 | ||
|
|
a16096592c | ||
|
|
bb34eecc7c | ||
|
|
ceed7ef1a8 | ||
|
|
1d2a887c2d | ||
|
|
a3f3302312 | ||
|
|
ecf005b145 | ||
|
|
3bd074fa2c | ||
|
|
0fd95daa8e | ||
|
|
1b57e49d98 | ||
|
|
db0d39c9cd | ||
|
|
cbde66cf41 | ||
|
|
17331e9eb3 | ||
|
|
9b96c151a5 | ||
|
|
1b65a30798 | ||
|
|
c9a47877f7 | ||
|
|
bdc77ad0f6 | ||
|
|
719971c76c | ||
|
|
c74dba472a | ||
|
|
c1fb7f0fc5 | ||
|
|
94c932cd2f | ||
|
|
27fb765c0d | ||
|
|
06ce46f64a | ||
|
|
c04d85fa97 | ||
|
|
b6cdc30db5 | ||
|
|
9bbb5e8b01 | ||
|
|
18ce6e6fba | ||
|
|
507910f5da | ||
|
|
ccf7801a89 | ||
|
|
9a52a10626 | ||
|
|
6963153aac | ||
|
|
ee357cd5b4 | ||
|
|
b84e3d2858 | ||
|
|
9377fc6671 | ||
|
|
aaa0fa613a | ||
|
|
eac5acfbfa | ||
|
|
8dca1ef343 | ||
|
|
28e8f61cf8 | ||
|
|
78abf476ea | ||
|
|
2b1f9446dd | ||
|
|
9815736b4e | ||
|
|
3f54cce9a1 | ||
|
|
223138b8e5 | ||
|
|
4aa47c8bab | ||
|
|
a97a85f357 | ||
|
|
ffc3696d84 | ||
|
|
86c4e1974b | ||
|
|
b6fd7c2ca4 | ||
|
|
326300b40e | ||
|
|
282bafe514 | ||
|
|
061a8feccf | ||
|
|
26c9b6d2ce | ||
|
|
ed02d61953 | ||
|
|
b58d54b8ea | ||
|
|
1bc3ffc269 | ||
|
|
cbd295f911 | ||
|
|
35653f533f | ||
|
|
ea7afea8c2 | ||
|
|
384a2fe8b7 | ||
|
|
b278cac620 | ||
|
|
e23de49fb5 | ||
|
|
f64f041546 | ||
|
|
1d53c506c9 | ||
|
|
c8d6ce8004 | ||
|
|
3f08417c04 | ||
|
|
79ebf6a02b | ||
|
|
41dfb8eab8 | ||
|
|
590b663170 | ||
|
|
9bb408c8b3 | ||
|
|
5d6a63a8ca | ||
|
|
4078ccfdb1 | ||
|
|
79c29121c3 | ||
|
|
dea48d9e07 | ||
|
|
c165196a35 | ||
|
|
c385013db9 | ||
|
|
8780aa3105 | ||
|
|
12c7bfe29c | ||
|
|
08d0b8a4e0 | ||
|
|
1d401f8dba | ||
|
|
193bb3ed61 | ||
|
|
63fd8cd660 | ||
|
|
26a1152390 | ||
|
|
e0907147f7 | ||
|
|
99bba3ff12 | ||
|
|
3fdb6630fb | ||
|
|
0d6b789c9f | ||
|
|
edaa03ef42 | ||
|
|
4e17a10792 | ||
|
|
9fd48e0168 | ||
|
|
818e990184 | ||
|
|
9bb7b54023 | ||
|
|
af6695e27f | ||
|
|
46293f2d02 | ||
|
|
7f968ba102 | ||
|
|
1e5cb9b184 | ||
|
|
9627e6e62c | ||
|
|
5e644098f9 | ||
|
|
fa3a56d096 | ||
|
|
ba18216ef8 | ||
|
|
f207e31b3b | ||
|
|
0e1ace18e4 | ||
|
|
b17a632640 | ||
|
|
485d4631f9 | ||
|
|
30929bc38e | ||
|
|
ae4311f4dd | ||
|
|
3a3c35ea1f | ||
|
|
19dd89fb4d | ||
|
|
b247a7465b | ||
|
|
d5c20db681 | ||
|
|
a599ff6ad2 | ||
|
|
e21c6604a1 | ||
|
|
273c1931f4 | ||
|
|
fdf29eeade | ||
|
|
06e55728d0 | ||
|
|
0a3ab4bc9d | ||
|
|
a4a91b373f | ||
|
|
a68e771026 | ||
|
|
d7c79fcb3b | ||
|
|
5cc05ed96d | ||
|
|
e5b5768f11 | ||
|
|
6cf2519ef9 | ||
|
|
f4f98e0877 | ||
|
|
bb8fb9efa5 | ||
|
|
be38778d72 | ||
|
|
55d8efbdcd | ||
|
|
9df7822e32 | ||
|
|
69e6a3d2cf | ||
|
|
8ea03be5f3 | ||
|
|
75a213beb9 | ||
|
|
ead830c60a | ||
|
|
20681315e7 | ||
|
|
e2961eaadf | ||
|
|
7f0d7f70be | ||
|
|
c5264c2147 | ||
|
|
ff402c16ca | ||
|
|
4a9da1c02e | ||
|
|
c14f1014b8 | ||
|
|
74bc398994 | ||
|
|
6e8e74fc55 | ||
|
|
68ad4c87aa | ||
|
|
fe82aed91d | ||
|
|
7d14bf6e90 | ||
|
|
39500a9386 | ||
|
|
d5f8891e4f | ||
|
|
edce6949ae | ||
|
|
bec6fac2ea | ||
|
|
a9bd19a079 | ||
|
|
7135ba5892 | ||
|
|
9ba4c100ca | ||
|
|
fe565149ba | ||
|
|
624f60a5c1 | ||
|
|
5c79ac0b5c | ||
|
|
615711f904 | ||
|
|
2f77bd9e97 | ||
|
|
abdc881812 | ||
|
|
1ba73bf316 | ||
|
|
a359c6b326 | ||
|
|
ff64356e85 | ||
|
|
0271b14f6c | ||
|
|
bf845e200f | ||
|
|
e94ff6e1e8 | ||
|
|
07313d2744 | ||
|
|
bd2026df7e | ||
|
|
0fa177ff79 | ||
|
|
d84c72a215 | ||
|
|
c319857da0 | ||
|
|
df586e9bb7 | ||
|
|
354a5708ce | ||
|
|
096face5d2 | ||
|
|
02e3bddd5c | ||
|
|
9dadef1905 | ||
|
|
2e8a899d8c | ||
|
|
623915f623 | ||
|
|
57865ca53d | ||
|
|
e9c4b9ef30 | ||
|
|
0ad088b663 | ||
|
|
e37a7f72be | ||
|
|
9befe122dd | ||
|
|
e6d6227ff1 | ||
|
|
d854a6efe7 | ||
|
|
a97af94f8a | ||
|
|
e2ea97e99a | ||
|
|
215f6dd8ff | ||
|
|
687aa9c3ba | ||
|
|
523cf78640 | ||
|
|
90e50964b6 | ||
|
|
a83823ea13 | ||
|
|
727aa6f1bc | ||
|
|
072d929298 | ||
|
|
992c5a1378 | ||
|
|
f8937c1af3 | ||
|
|
af5c78e2e9 | ||
|
|
4a26dfdfff | ||
|
|
a82ef5dbae | ||
|
|
6adc995fa5 | ||
|
|
f534efd3df | ||
|
|
f41e64141a | ||
|
|
94036e3fbb | ||
|
|
9142609c61 | ||
|
|
f9d7b893ee | ||
|
|
4e2ae7441d | ||
|
|
87dbef980f | ||
|
|
921f8c287b | ||
|
|
637c6e3cc3 | ||
|
|
ba90ff9f3a | ||
|
|
34e84b2942 | ||
|
|
31eb7f421a | ||
|
|
85d4656005 | ||
|
|
006b8873a5 | ||
|
|
3246036f88 | ||
|
|
6d114532e2 | ||
|
|
2edb1d58d5 | ||
|
|
8dc3c5d3d8 | ||
|
|
2ec8c97e28 | ||
|
|
c51161c3d1 | ||
|
|
bd645a97c7 | ||
|
|
f7cbfa56bb | ||
|
|
07fd16813f | ||
|
|
2fe971c79f | ||
|
|
e4082c6235 | ||
|
|
960d5ba11a | ||
|
|
066539793d | ||
|
|
5b312494fb | ||
|
|
e628b10247 | ||
|
|
61c063ed72 | ||
|
|
11d3f601c9 | ||
|
|
3b8d0f63d4 | ||
|
|
b8b30c6a78 | ||
|
|
b007f68a88 | ||
|
|
6d8a67ef2e | ||
|
|
ab66e9e285 | ||
|
|
b3f7add5a1 | ||
|
|
800be43d24 | ||
|
|
70f77e17e2 | ||
|
|
caf46ba421 | ||
|
|
686ed80230 | ||
|
|
56689a10c4 | ||
|
|
065d077752 | ||
|
|
c8f817e830 | ||
|
|
1432241319 | ||
|
|
0e9f60f8a6 | ||
|
|
74de62385f | ||
|
|
d2f69eb5d5 | ||
|
|
c3655d59ca | ||
|
|
aca07bbf59 | ||
|
|
3edd3c3e7b | ||
|
|
61ba096c6e | ||
|
|
47fd71c4b9 | ||
|
|
e1d0bed52d | ||
|
|
acb88cbefc | ||
|
|
f1e7cabf6a | ||
|
|
21ec27ffd4 | ||
|
|
5567e6417d | ||
|
|
af352a480c | ||
|
|
92069dc638 | ||
|
|
76e9421858 | ||
|
|
70558bf444 | ||
|
|
b60dfdcc28 | ||
|
|
b976439669 | ||
|
|
6de50509ed | ||
|
|
4d9c38d3c2 | ||
|
|
90ecb63be4 | ||
|
|
bd49f8e8fa | ||
|
|
21c0315e60 | ||
|
|
fc97fa6d5c | ||
|
|
2c3bf3c642 | ||
|
|
a9c725d32a | ||
|
|
f936c5b0fb | ||
|
|
53344afa49 | ||
|
|
d5addfa2fd | ||
|
|
6d8375a9f3 | ||
|
|
7bc03ac798 | ||
|
|
05d62a5343 | ||
|
|
31115f9245 | ||
|
|
26ee692208 | ||
|
|
dd43d25f76 | ||
|
|
fffd15d7ea | ||
|
|
7c2700c8ea | ||
|
|
94518c4f25 | ||
|
|
531b965b22 | ||
|
|
658b637716 | ||
|
|
44f5feacfb | ||
|
|
52451a3eba | ||
|
|
7123f7dd6f | ||
|
|
08a0f9b5fc | ||
|
|
74ac96a67e | ||
|
|
9eed0340e9 | ||
|
|
73b90c0291 | ||
|
|
c33a6e6b05 | ||
|
|
d77cc15586 | ||
|
|
21483f7227 | ||
|
|
6c0df42fe7 | ||
|
|
c3a90a8914 | ||
|
|
e7f66d293a | ||
|
|
e49b3a6be0 | ||
|
|
ae72efdc00 | ||
|
|
bc935e213a | ||
|
|
a8e0eabbd8 | ||
|
|
81b84a8133 | ||
|
|
a973b8c926 | ||
|
|
08ccc659ca | ||
|
|
fb610de27a | ||
|
|
29d2e3734b | ||
|
|
48cf17c7b7 | ||
|
|
ac61c2bb68 | ||
|
|
a12d2a688b | ||
|
|
52027eac46 | ||
|
|
a1d4fba728 | ||
|
|
69872b922c | ||
|
|
7bd1a1acfc | ||
|
|
80e5a22f0d | ||
|
|
3cd4188bd8 | ||
|
|
21d16dbe90 | ||
|
|
5ce7875851 | ||
|
|
35be14a168 | ||
|
|
930940c7fd | ||
|
|
f001f19a47 | ||
|
|
fd7382fb56 | ||
|
|
c69e940d2a | ||
|
|
31dcd8e6ff | ||
|
|
0bd85c10a8 | ||
|
|
b075c22261 | ||
|
|
87b3e04fa1 | ||
|
|
630f09e644 | ||
|
|
a0463fc85b | ||
|
|
de7d8079d9 | ||
|
|
4aad0ec913 | ||
|
|
c379b45cb9 | ||
|
|
82825d1b16 | ||
|
|
11b2d5643e | ||
|
|
06dc2add8f | ||
|
|
ab7198bb8f | ||
|
|
d854733ffa | ||
|
|
a2cc6bcdd3 | ||
|
|
c9accda3f8 | ||
|
|
8e55d1e6f4 | ||
|
|
9b8eb547fc | ||
|
|
62b3c9264e | ||
|
|
370be379f0 | ||
|
|
1addfe14fc | ||
|
|
e510fb027e | ||
|
|
86b807805f | ||
|
|
0ace02ee75 | ||
|
|
38ad74af68 | ||
|
|
6c70a60cdb | ||
|
|
80ee0ca9b9 | ||
|
|
8b143a0c1b | ||
|
|
9fb86da341 | ||
|
|
5c703122ec | ||
|
|
75f89beab1 | ||
|
|
fc9d184f20 | ||
|
|
6c411e054a | ||
|
|
dbef4719d9 | ||
|
|
da6b4c25f2 | ||
|
|
23004e3953 | ||
|
|
4a15c2a7d5 | ||
|
|
84dad2ec43 | ||
|
|
5ac38fc327 | ||
|
|
35e0ada643 | ||
|
|
a9533364ec | ||
|
|
4a03186ce6 | ||
|
|
a0271e2957 | ||
|
|
11491c6383 | ||
|
|
24dccc73f0 | ||
|
|
8e3a88776a | ||
|
|
28141ce9d1 | ||
|
|
ffaa3bf82a | ||
|
|
d0d05d6c3b | ||
|
|
6d74a58181 | ||
|
|
de85fd42f7 | ||
|
|
c4aebd40df | ||
|
|
81cb631491 | ||
|
|
35aa5d2143 | ||
|
|
a8b1489233 | ||
|
|
ffb179c9a1 | ||
|
|
6d8d7ab66f | ||
|
|
a128083ce8 | ||
|
|
9f78ec0177 | ||
|
|
d941810825 | ||
|
|
ba1975342c | ||
|
|
27cfac45e4 | ||
|
|
64a4eb2bb2 | ||
|
|
371f995fda | ||
|
|
816bbdfd66 | ||
|
|
cdd6df8a57 | ||
|
|
5d4489bb28 | ||
|
|
a9944cd255 | ||
|
|
c284b2a6c6 | ||
|
|
15dde72f14 | ||
|
|
ff0f22565c | ||
|
|
33813b4047 | ||
|
|
ae3accca27 | ||
|
|
d998467f7a | ||
|
|
29fddbce8e | ||
|
|
a4e1db32e0 | ||
|
|
81aea65555 | ||
|
|
9005f9db4c | ||
|
|
7de040d8db | ||
|
|
9c53cf236e | ||
|
|
2e6ac07020 | ||
|
|
3febac62a8 | ||
|
|
c4ea6ca5fd | ||
|
|
75f9fb2d38 | ||
|
|
e4f83c52ca | ||
|
|
eb54731ae9 | ||
|
|
eb24bcb2ac | ||
|
|
ffa533e5fd | ||
|
|
bd76066905 | ||
|
|
eb17af9252 | ||
|
|
4471b1f980 | ||
|
|
9cfd88c098 | ||
|
|
c1cf8995ea | ||
|
|
55995be7de | ||
|
|
869686f363 | ||
|
|
f45a05ddb6 | ||
|
|
434ff0de74 | ||
|
|
d0ece28197 | ||
|
|
cd1db0a462 | ||
|
|
075c5cb7c2 | ||
|
|
b8740ca1c7 | ||
|
|
3db3e28595 | ||
|
|
b610d49f6b | ||
|
|
35afca430a | ||
|
|
1499037e19 | ||
|
|
1aaa4102a5 | ||
|
|
049c9af0e4 | ||
|
|
482b6b67eb | ||
|
|
cdb752df6a | ||
|
|
0412355001 | ||
|
|
0dc049aedb | ||
|
|
832387dea0 | ||
|
|
94bd4bf236 | ||
|
|
493e76df30 | ||
|
|
44b6e752f6 | ||
|
|
5d6f2c91c1 | ||
|
|
04ae49f944 | ||
|
|
020606fea1 | ||
|
|
711698620e | ||
|
|
968687bb82 | ||
|
|
07ab6d137b | ||
|
|
d51ac5d6f5 | ||
|
|
478d2e8f17 | ||
|
|
67a1dcee90 | ||
|
|
af834b1e40 | ||
|
|
ae535e2518 | ||
|
|
96d36ae71a | ||
|
|
480b7239e5 | ||
|
|
2666164c5b | ||
|
|
6ef8d1b215 | ||
|
|
654619e7e2 | ||
|
|
4ea869a764 | ||
|
|
837df18cb0 | ||
|
|
248f1c022b | ||
|
|
4fabf9e65c | ||
|
|
b7c318f520 | ||
|
|
89a15e1b16 | ||
|
|
5b41097abc | ||
|
|
a672b6dbdf | ||
|
|
e4d5d43efa | ||
|
|
cc572857e0 | ||
|
|
2f52ae31c0 | ||
|
|
3ddf801925 | ||
|
|
182695b0af | ||
|
|
656e67cc57 | ||
|
|
34215ce0ee | ||
|
|
c706aed271 | ||
|
|
e5f8e5bba4 | ||
|
|
11d8fae876 | ||
|
|
4a14e5fc86 | ||
|
|
7548ce6ae0 | ||
|
|
e113bbfb1c | ||
|
|
d1ccdfd21f | ||
|
|
68e8f49e9f | ||
|
|
49a0328268 | ||
|
|
25ea3fcaad | ||
|
|
a5378ca419 | ||
|
|
e0b733b60d | ||
|
|
33b2b10bf3 | ||
|
|
c468c26208 | ||
|
|
9d29f888b3 | ||
|
|
d1e8a77489 | ||
|
|
ef66e73fa4 | ||
|
|
7f128587c0 | ||
|
|
53a7a60dbc | ||
|
|
71a61ff166 | ||
|
|
9c051e6c3b | ||
|
|
f0d89498dc | ||
|
|
abb370a852 | ||
|
|
4b9054d1b4 | ||
|
|
2d0db171a8 | ||
|
|
7f67465767 | ||
|
|
6801d5e01d | ||
|
|
b01914c24e | ||
|
|
dd41f99288 | ||
|
|
37db56e6b3 | ||
|
|
f0a08f7647 | ||
|
|
2593f742c9 | ||
|
|
6ac299c198 | ||
|
|
3eda289349 | ||
|
|
95a7bdd3a9 | ||
|
|
84257e7388 | ||
|
|
465bffd896 | ||
|
|
eabfd1bef3 | ||
|
|
8d6676617c | ||
|
|
c47b620f67 | ||
|
|
df94cc439e | ||
|
|
08032778bd | ||
|
|
52deec3fd8 | ||
|
|
5b443d4363 | ||
|
|
4170cfd9a6 | ||
|
|
ae4735df04 | ||
|
|
6041036787 | ||
|
|
d451265621 | ||
|
|
677f213337 | ||
|
|
8537702028 | ||
|
|
6d3d4d1ae6 | ||
|
|
1f42c188fa | ||
|
|
9346985718 | ||
|
|
4585afde50 | ||
|
|
bee6cb9ba6 | ||
|
|
581b627a3e | ||
|
|
4436001494 | ||
|
|
6116a19986 | ||
|
|
99fd4ea0e5 | ||
|
|
a613b842f2 | ||
|
|
6462c5c366 | ||
|
|
8c4a8cd2da | ||
|
|
7a0ea3ce96 | ||
|
|
f14fe9d3aa | ||
|
|
36add28269 | ||
|
|
87b4171dd4 | ||
|
|
951acf61b4 | ||
|
|
8674b54753 | ||
|
|
b7e5bf0468 | ||
|
|
0f12c127b6 | ||
|
|
50c51dc993 | ||
|
|
65bf03a613 | ||
|
|
0bb8421f98 | ||
|
|
108e603e63 | ||
|
|
1868ed842e | ||
|
|
6c505a6170 | ||
|
|
72d508b0bf | ||
|
|
d6f2faf170 | ||
|
|
92cbff7db9 | ||
|
|
4bb2d50921 | ||
|
|
c3d8bc4fd0 | ||
|
|
37ae6cbdbb | ||
|
|
b953daa3c2 | ||
|
|
463910cd54 | ||
|
|
95bfdf907f | ||
|
|
85550aeaf6 | ||
|
|
5b20926f2c | ||
|
|
c915aceb85 | ||
|
|
36d56b867c | ||
|
|
e1cec84075 | ||
|
|
ba3676d73f | ||
|
|
80f50b298f | ||
|
|
9120504249 | ||
|
|
55c7ca9c10 | ||
|
|
704ea89d72 | ||
|
|
8eecd0aa7d | ||
|
|
c53f99d01c | ||
|
|
438a1265f2 | ||
|
|
86766223cb | ||
|
|
1fa94de1d9 | ||
|
|
56d1cf19ef | ||
|
|
701c096ed4 | ||
|
|
aab3e1c601 | ||
|
|
8d040a4926 | ||
|
|
4453cbb143 | ||
|
|
0c173f8110 | ||
|
|
a14b39eb4c | ||
|
|
c9cb51f8c4 | ||
|
|
dbe6c6105c | ||
|
|
04231eecfe | ||
|
|
a55a4c93a5 | ||
|
|
dcd4f0f6a5 | ||
|
|
792ab02195 | ||
|
|
7a87310403 | ||
|
|
7e070528a1 | ||
|
|
4f3af1395f | ||
|
|
1fc4f3d70b | ||
|
|
12ee3dae5e | ||
|
|
cf28bc26f0 | ||
|
|
bd41796231 | ||
|
|
f21f039b3a | ||
|
|
7263f4120c | ||
|
|
22e0e8da66 | ||
|
|
7173bf0803 | ||
|
|
7246cdf853 | ||
|
|
c60b296bc9 | ||
|
|
a8a86533ad | ||
|
|
d1c5847a58 | ||
|
|
68e0d70fcb | ||
|
|
74b28f7ead | ||
|
|
acda805c3c | ||
|
|
a37fbbbd51 | ||
|
|
2cdb6036ea | ||
|
|
77afdc0208 | ||
|
|
7e0e68f66f | ||
|
|
bbec6fcd5f | ||
|
|
631fe6c9c9 | ||
|
|
a86755ad98 | ||
|
|
42d2b00007 | ||
|
|
ad10cad0b0 | ||
|
|
71d3589ebc | ||
|
|
84ed1827be | ||
|
|
ce29a6923e | ||
|
|
d96d194b2b | ||
|
|
5cb3bccf45 | ||
|
|
e6639323b7 | ||
|
|
f94e0eaf32 | ||
|
|
37bcb1284b | ||
|
|
295bd2e1ab | ||
|
|
45b4a8d8bf | ||
|
|
cdb60423fe | ||
|
|
50f913843b | ||
|
|
581d6f6657 | ||
|
|
e03f65332a | ||
|
|
3e9abec817 | ||
|
|
0d8f84ba23 | ||
|
|
c646419336 | ||
|
|
622a4eb44b | ||
|
|
d4fbc73b41 | ||
|
|
391f469a99 | ||
|
|
a0ca55d7f6 | ||
|
|
a4bbe27771 | ||
|
|
a5e2d1eb45 | ||
|
|
7a89d03339 | ||
|
|
ae638fd0a1 | ||
|
|
26a59b373a | ||
|
|
479c0b7d95 | ||
|
|
52a0bb6e0e | ||
|
|
f2f333c807 | ||
|
|
3f2f2a33d3 | ||
|
|
ba9272822b | ||
|
|
9575044262 | ||
|
|
7306e81a30 | ||
|
|
19f9132109 | ||
|
|
f340ba50da | ||
|
|
6e90c7ed7b | ||
|
|
0a81bc7c6b | ||
|
|
f5dd6b90fc | ||
|
|
e1a9438595 | ||
|
|
97a72380e6 | ||
|
|
a6a3a4e240 | ||
|
|
b6b1e6ecdc | ||
|
|
85cf21a32c | ||
|
|
918ed4a23e | ||
|
|
84d6106a30 | ||
|
|
6761cae9c1 | ||
|
|
e330ccbe94 | ||
|
|
da7059e978 | ||
|
|
893345dc33 | ||
|
|
9fcc6fe68a | ||
|
|
0c02f17d67 | ||
|
|
11c8805f4c | ||
|
|
cf065fa706 | ||
|
|
3c94c9d308 | ||
|
|
831bea725f | ||
|
|
b748283484 | ||
|
|
28af7e1722 | ||
|
|
1673da5a4b | ||
|
|
c97c0e822d | ||
|
|
ce24ac70d9 | ||
|
|
9ab4739710 | ||
|
|
685084e711 | ||
|
|
dd049ac297 | ||
|
|
516f7464b7 | ||
|
|
46be37e034 | ||
|
|
693f0aa774 | ||
|
|
646693ca3e | ||
|
|
22534986d3 | ||
|
|
18b183585a | ||
|
|
5862ba627e | ||
|
|
c38f4ab400 | ||
|
|
f5c9fcf029 | ||
|
|
9e206d2215 | ||
|
|
b1b2451fa6 | ||
|
|
91f2f84c10 | ||
|
|
16ba74c98e | ||
|
|
0cc3b81580 | ||
|
|
c769900332 | ||
|
|
a84e6ab385 | ||
|
|
af163c27e0 | ||
|
|
016452ec89 | ||
|
|
b584779a13 | ||
|
|
01d97ed770 | ||
|
|
607ef27fe1 | ||
|
|
448a9cfaef | ||
|
|
88fb6069fc | ||
|
|
cd5fd2cab4 | ||
|
|
a21fcf7e77 | ||
|
|
627a8dbff5 | ||
|
|
dd1207f11e | ||
|
|
49aec452ca | ||
|
|
e033f71ece | ||
|
|
62b097f3d5 | ||
|
|
3098c1983f | ||
|
|
37626680f9 | ||
|
|
d99fe607da | ||
|
|
c80f22cdd3 | ||
|
|
0b6402ca8a | ||
|
|
26a7633337 | ||
|
|
3ee7614441 | ||
|
|
718ae6ac83 | ||
|
|
e0686eada2 | ||
|
|
9f1fd42889 | ||
|
|
a088a34c89 | ||
|
|
14cdc10ee3 | ||
|
|
8667643e7c | ||
|
|
e6d123a17d | ||
|
|
ae28b714b3 | ||
|
|
33cd1642f8 | ||
|
|
63ec69f9f2 | ||
|
|
20ea9a00ed | ||
|
|
779222b66d | ||
|
|
afb2b9fe29 | ||
|
|
20052e1922 | ||
|
|
e03f3f40da | ||
|
|
00f6656d7d | ||
|
|
dd2c1a48b5 | ||
|
|
a37588a8f7 | ||
|
|
fc99805a85 | ||
|
|
d73b1732d3 | ||
|
|
043fb289bf | ||
|
|
a0332f27be | ||
|
|
99285763d3 | ||
|
|
26467d8f35 | ||
|
|
930ba5bb19 | ||
|
|
fb552c823a | ||
|
|
bfc0c4f3ef | ||
|
|
216cb27f03 | ||
|
|
21a5ded593 | ||
|
|
ff07987a02 | ||
|
|
bd6afdafb8 | ||
|
|
fd7c5ac867 | ||
|
|
87eb84b5fa | ||
|
|
784cb711d8 | ||
|
|
54a00a934b | ||
|
|
c638ac8457 | ||
|
|
b710a4cdc7 | ||
|
|
16c8c6b445 | ||
|
|
5cee35149f | ||
|
|
de201c7263 | ||
|
|
222a4f4828 | ||
|
|
7d6af47f60 | ||
|
|
1c05d58d1a | ||
|
|
8152b51353 | ||
|
|
d387eafff2 | ||
|
|
fe5605ea50 | ||
|
|
7f97decb8a | ||
|
|
cfd28dd1ff | ||
|
|
2c43eab432 | ||
|
|
fda597ddae | ||
|
|
7502c0f2fb | ||
|
|
eaeeda6911 | ||
|
|
8850c1a62b | ||
|
|
0205ec4ccb | ||
|
|
2600bf7be5 | ||
|
|
012ff40f0f | ||
|
|
0df9e39931 | ||
|
|
97fcc3af33 | ||
|
|
be40433377 | ||
|
|
a1f29cb034 | ||
|
|
b2b584d832 | ||
|
|
415cd6597e | ||
|
|
d1d5d61b87 | ||
|
|
2c11ecc5c8 | ||
|
|
0ac66425f8 | ||
|
|
367d3e4435 | ||
|
|
05b7147e64 | ||
|
|
200c877418 | ||
|
|
84323c1608 | ||
|
|
3ba2edef2d | ||
|
|
e5cc1cccf2 | ||
|
|
c50ffc40dc | ||
|
|
1f8106c1f3 | ||
|
|
d9ca72571e | ||
|
|
ecb0620929 | ||
|
|
c6b381e61a | ||
|
|
faf352bf80 | ||
|
|
269b7d5bd1 | ||
|
|
439d617364 | ||
|
|
d0c85feda5 | ||
|
|
25ebc603e7 | ||
|
|
1683d950c3 | ||
|
|
961bb28ecd | ||
|
|
bbb3db31a8 | ||
|
|
c917c5da3d | ||
|
|
edc2056e75 | ||
|
|
84b7cbcda2 | ||
|
|
44484670f2 | ||
|
|
0b442422ab | ||
|
|
d0448af52e | ||
|
|
e82585ecc7 | ||
|
|
ff36bd30c5 | ||
|
|
12b2117c77 | ||
|
|
34ec532eed | ||
|
|
2fa23ce9fd | ||
|
|
8399061dc9 | ||
|
|
86ab2806fa | ||
|
|
6f77504ca9 | ||
|
|
a259297092 | ||
|
|
2c662b6f33 | ||
|
|
548d6a5a58 | ||
|
|
f3d2513d32 | ||
|
|
8b20756095 | ||
|
|
8f093769ce | ||
|
|
f6dafecfa1 | ||
|
|
98f95a7da8 | ||
|
|
f3d373c8ca | ||
|
|
536ff35d66 | ||
|
|
6d31c5fb94 | ||
|
|
5730d3583a | ||
|
|
da64336967 | ||
|
|
480311c442 | ||
|
|
8b44e3d4b6 | ||
|
|
9049625ec2 | ||
|
|
d8c70ceae2 | ||
|
|
95bb8a0c7f | ||
|
|
9b1a64616b | ||
|
|
8a6894fa28 | ||
|
|
7c4e819c93 | ||
|
|
9bedeb55a0 | ||
|
|
6c92d45d97 | ||
|
|
c7c029c706 | ||
|
|
6fec02f79e | ||
|
|
fc3e8bb8ff | ||
|
|
3f52734da2 | ||
|
|
cde8a739fb | ||
|
|
dc5837badb | ||
|
|
43a2d5cd67 | ||
|
|
2c0a1d1046 | ||
|
|
64aaaf6daa | ||
|
|
dd2a076b6f | ||
|
|
cf7f84c886 | ||
|
|
98a5a120c1 | ||
|
|
77d35d88c7 | ||
|
|
f25ed9efbb | ||
|
|
de7e4803a3 | ||
|
|
1516b100d2 | ||
|
|
7ff2976dfe | ||
|
|
f4426d0532 | ||
|
|
f4fbbf0d34 | ||
|
|
57cf738df5 | ||
|
|
edb09d1a7e | ||
|
|
84c5e245e6 | ||
|
|
95cece7e9c | ||
|
|
ea345b059d | ||
|
|
6ca6d47066 | ||
|
|
fea04ed16c | ||
|
|
84b3b6d61e | ||
|
|
4f0be16f0b | ||
|
|
f8fc1a2881 | ||
|
|
f9471377bb | ||
|
|
152088de87 | ||
|
|
82702ea958 | ||
|
|
3432a786d5 | ||
|
|
4fd8972f6a | ||
|
|
e4847653c6 | ||
|
|
6e73c7400a | ||
|
|
5c40f4073a | ||
|
|
da3777a0ca | ||
|
|
dd636bb55f | ||
|
|
6fcfdaabf3 | ||
|
|
e26eb9d9cc | ||
|
|
732d40f5c8 | ||
|
|
814cf2931c | ||
|
|
5e4f041509 | ||
|
|
8862ec985f | ||
|
|
c887697d61 | ||
|
|
30115980af | ||
|
|
be057e296f | ||
|
|
a5d42e07c9 | ||
|
|
6484f588e4 | ||
|
|
83a5c28d71 | ||
|
|
96a129a70f | ||
|
|
51e6892a5e | ||
|
|
47ad5c1e1f | ||
|
|
bdb90941d3 | ||
|
|
a2e9d29cf6 | ||
|
|
b43bec4126 | ||
|
|
5992f835fb | ||
|
|
263c840f30 | ||
|
|
7786b1b5a9 | ||
|
|
b1ce5f8956 | ||
|
|
5e6ab494b9 | ||
|
|
b99560acca | ||
|
|
b146552e39 | ||
|
|
8468a502bb | ||
|
|
1b96617c78 | ||
|
|
7ac179e068 | ||
|
|
f29f3f973a | ||
|
|
e775bd451d | ||
|
|
bef71a49b6 | ||
|
|
e5ab3e1d0c | ||
|
|
bb06ffdaea | ||
|
|
5ce7aa5c48 | ||
|
|
85450360de | ||
|
|
ec6873f95f | ||
|
|
e4d5b61ef6 | ||
|
|
644bd369e4 | ||
|
|
dede2376c3 | ||
|
|
2bd727bec2 | ||
|
|
50c85d4835 | ||
|
|
7103630e55 | ||
|
|
a31d58bca3 | ||
|
|
6ae424d3ff | ||
|
|
3b703da1f3 | ||
|
|
6695f23079 | ||
|
|
5d4d8e6239 | ||
|
|
b14590c112 | ||
|
|
e11e09f935 | ||
|
|
4e0aa707b9 | ||
|
|
0845deb095 | ||
|
|
2719705a1a | ||
|
|
346da2cdee | ||
|
|
db39aaf4ff | ||
|
|
22ea1d4a15 | ||
|
|
4365e852fe | ||
|
|
6a474eb0a0 | ||
|
|
020d8d9e5b | ||
|
|
220ca33cc9 | ||
|
|
2cee4cca06 | ||
|
|
a31ace8032 | ||
|
|
6d0495eab8 | ||
|
|
6d6457a32f | ||
|
|
befe0e5254 | ||
|
|
2c41230b74 | ||
|
|
0e1e92750c | ||
|
|
b27854b8a5 | ||
|
|
2c504ae67e | ||
|
|
24d02895ef | ||
|
|
01887e37b4 | ||
|
|
628f76c20a | ||
|
|
f31e7b1860 | ||
|
|
073d52a17c | ||
|
|
eac3531f31 | ||
|
|
7873e25779 | ||
|
|
f468611b01 | ||
|
|
d3aea54b6c | ||
|
|
1d5afe8cd6 | ||
|
|
91d6aacc74 | ||
|
|
0036ba94d9 | ||
|
|
3711663a12 | ||
|
|
7e2eb531ba | ||
|
|
39cca07432 | ||
|
|
001cdd34c7 | ||
|
|
4cb0201970 | ||
|
|
56da4a2850 | ||
|
|
f613fea791 | ||
|
|
ccd25b0c93 | ||
|
|
60c14c2cef | ||
|
|
895274ad24 | ||
|
|
bf13b81837 | ||
|
|
adeb9f26c3 | ||
|
|
c3631f6ac7 | ||
|
|
1301fc3dc4 | ||
|
|
d76fa989d1 | ||
|
|
53dd0073f1 | ||
|
|
b6b0b0a8c5 | ||
|
|
c0573d76fd | ||
|
|
44b803a529 | ||
|
|
c6705a82db | ||
|
|
66813584f5 | ||
|
|
e61829052e | ||
|
|
701d358ea6 | ||
|
|
15d434fce2 | ||
|
|
c801729215 | ||
|
|
2e192380f0 | ||
|
|
4c4355a910 | ||
|
|
7c17a2dcd0 | ||
|
|
186a97042b | ||
|
|
d2f6d2d6b8 | ||
|
|
0c1bbd0c96 | ||
|
|
f5f9a7d303 | ||
|
|
224bd11821 | ||
|
|
6d6cac850b | ||
|
|
d81cc0bd4a | ||
|
|
73459f2b83 | ||
|
|
aa8c96de7b | ||
|
|
61a7701e78 | ||
|
|
337086b90b | ||
|
|
20003aa49d | ||
|
|
e1d5a68a90 | ||
|
|
ac5f94a6ac | ||
|
|
d85e3b977e | ||
|
|
fead675aae | ||
|
|
c33267750d | ||
|
|
9c5badc2bf | ||
|
|
b65713f902 | ||
|
|
8ad18383cc | ||
|
|
6e1892dd4e | ||
|
|
f593295d06 | ||
|
|
7eb142e598 | ||
|
|
4d322a8fae | ||
|
|
ccea7827ce | ||
|
|
ed2bb78657 | ||
|
|
8871352b2c | ||
|
|
04632728bc | ||
|
|
d92475b980 | ||
|
|
89c4b68b9f | ||
|
|
6e97d98118 | ||
|
|
e326b81b3f | ||
|
|
a7ced3d78a | ||
|
|
c78ff37f56 | ||
|
|
560abad128 | ||
|
|
1adba9193a | ||
|
|
a6d492d970 | ||
|
|
56a7f271ff | ||
|
|
3fffd22996 | ||
|
|
d11d4c5263 | ||
|
|
ed5260f035 | ||
|
|
5df1608d74 | ||
|
|
773b2600c5 | ||
|
|
d0fddf2da6 | ||
|
|
8ccc3dc129 | ||
|
|
2b001f003b | ||
|
|
dd88bef85a | ||
|
|
2a6e92e586 | ||
|
|
102b23434b | ||
|
|
7ea7c8497c | ||
|
|
2faafdd9f3 | ||
|
|
a09c84258f | ||
|
|
8a3ce58d4e | ||
|
|
599a89ee6a | ||
|
|
5b0b91eb46 | ||
|
|
cddfd8b835 | ||
|
|
770c9fa167 | ||
|
|
ecf4b10238 | ||
|
|
4c64b406df | ||
|
|
031b9052d1 | ||
|
|
f276b836c7 | ||
|
|
e63b05ff16 | ||
|
|
0113d07a63 | ||
|
|
c0b6e918ad | ||
|
|
92d3c7c8f0 | ||
|
|
543c741502 | ||
|
|
018f87767d | ||
|
|
238884ad53 | ||
|
|
cd83136278 | ||
|
|
6759803ccd | ||
|
|
b5f6a447b9 | ||
|
|
b26b124cfe | ||
|
|
e58df9ac97 | ||
|
|
11f7c6f115 | ||
|
|
662b808ba9 | ||
|
|
dbeba818f7 | ||
|
|
666c3b4143 | ||
|
|
e2dba246b2 | ||
|
|
4e57d27a57 | ||
|
|
4a58c43af9 | ||
|
|
1d2006761d | ||
|
|
23bc94451e | ||
|
|
a1f3349da0 | ||
|
|
f99889d5e8 | ||
|
|
137138a8ab | ||
|
|
640b0eac0e | ||
|
|
73b78d6335 | ||
|
|
7558c998df | ||
|
|
387aad83b6 | ||
|
|
43b07b6d6a | ||
|
|
b6abcc41cf | ||
|
|
a307c128fa | ||
|
|
16b78523e5 | ||
|
|
8084761154 | ||
|
|
d3dd5a86a8 | ||
|
|
69510094d3 | ||
|
|
b0ca83f760 | ||
|
|
2c707a74dd | ||
|
|
dfbbed0709 | ||
|
|
842b2d2d55 | ||
|
|
af22795cd5 | ||
|
|
cd71351181 | ||
|
|
86b3f49e6b | ||
|
|
7e53863d15 | ||
|
|
a5832e8d02 | ||
|
|
fc68c4574a | ||
|
|
f4a7a8657e | ||
|
|
943bf1f36c | ||
|
|
2482416ea5 | ||
|
|
431369ed42 | ||
|
|
314ff73280 | ||
|
|
ce6df518a2 | ||
|
|
99049da5c6 | ||
|
|
1d73c51712 | ||
|
|
dead6872d4 | ||
|
|
7a93a494ec | ||
|
|
93cfc97d1d | ||
|
|
bcd16b7840 | ||
|
|
be9f626c85 | ||
|
|
1133f5cc3a | ||
|
|
b37ae23af7 | ||
|
|
e9574d66df | ||
|
|
55f6b882df | ||
|
|
8692665724 | ||
|
|
93f483e42c | ||
|
|
05e3415059 | ||
|
|
e6b66636b9 | ||
|
|
13c6a1fd77 | ||
|
|
9b6c6da639 | ||
|
|
7b596c1110 | ||
|
|
23e0977218 | ||
|
|
7fbcb054ad | ||
|
|
40a2af2b3d | ||
|
|
0b8180a2cf | ||
|
|
33f3aa8dd2 | ||
|
|
6682a3117b | ||
|
|
38ea209a40 | ||
|
|
295868b923 | ||
|
|
fc8e96cc9e | ||
|
|
58387605e6 | ||
|
|
1d5e5d3722 | ||
|
|
4aa9c1bf34 | ||
|
|
d347523942 | ||
|
|
a181c36ccb | ||
|
|
28a2b5e926 | ||
|
|
65a7538452 | ||
|
|
bb3a86298e | ||
|
|
d01ae7004a | ||
|
|
31f3384c8e | ||
|
|
97823bc12b | ||
|
|
7f2514c177 | ||
|
|
e3b487205d | ||
|
|
b5dd8d4565 | ||
|
|
7341598cc3 | ||
|
|
04dd608930 | ||
|
|
8b64b415c4 | ||
|
|
0da8d430d9 | ||
|
|
38570c26c7 | ||
|
|
78c6b3e5cd | ||
|
|
7550554c3e | ||
|
|
68bb6f6fcf | ||
|
|
bf01b1a7de | ||
|
|
c53cbfe156 | ||
|
|
a1f839d732 | ||
|
|
71de6900ee | ||
|
|
11665834b5 | ||
|
|
36eed1bc43 | ||
|
|
b39d6a33b7 | ||
|
|
9c554375aa | ||
|
|
7c6c82e0ac | ||
|
|
ceccc5baab | ||
|
|
379d6ac634 | ||
|
|
53c75ce01c | ||
|
|
08044e5c0d | ||
|
|
63b1d7ac72 | ||
|
|
63450c65e1 | ||
|
|
e9d206bf9b | ||
|
|
3913028800 | ||
|
|
b8879d6b75 | ||
|
|
7df74c2bbb | ||
|
|
1782a32674 | ||
|
|
20574c7e94 | ||
|
|
a78eb07c77 | ||
|
|
a8bdcde4bf | ||
|
|
523aa75588 | ||
|
|
2b36871281 | ||
|
|
0cff71b9d6 | ||
|
|
e3d358e4e0 | ||
|
|
afacc475b4 | ||
|
|
8051ef7c9f | ||
|
|
d0a13b63ff | ||
|
|
c5734f96b8 | ||
|
|
adefbcfcf8 | ||
|
|
eb9e3ba9fe | ||
|
|
6e3055e753 | ||
|
|
6c3a133ccd | ||
|
|
75af89464d | ||
|
|
b40676518c | ||
|
|
86b86b50f9 | ||
|
|
5fd455b981 | ||
|
|
58a8ca411c | ||
|
|
d2ff6ba5d2 | ||
|
|
cb3f7e1644 | ||
|
|
e2c6d4be99 | ||
|
|
20802c8a6b | ||
|
|
2243edb175 | ||
|
|
80c4f4cb56 | ||
|
|
b43d0e4b79 | ||
|
|
3c95a6a533 | ||
|
|
d3d0865a00 | ||
|
|
41e2f5ed75 | ||
|
|
8653b1520f | ||
|
|
a67dd3d7b0 | ||
|
|
bdeb2a80f7 | ||
|
|
0eb543a726 | ||
|
|
9c9a2a22f5 | ||
|
|
8aeb05a22d | ||
|
|
11670b30ba | ||
|
|
ff0a9a7335 | ||
|
|
33272aaa22 | ||
|
|
75fc53f93a | ||
|
|
890f416eae | ||
|
|
3a35e4d2d0 | ||
|
|
81ef198d00 | ||
|
|
d7f149e990 | ||
|
|
6e86f51164 | ||
|
|
a086de264c | ||
|
|
dc28197c7b | ||
|
|
de8443298e | ||
|
|
eee92b4ebb | ||
|
|
10a07fe4bf | ||
|
|
a8c10bb017 | ||
|
|
ecfa75c235 | ||
|
|
21bd4b951d | ||
|
|
ff6950b2e2 | ||
|
|
f9a39897a2 | ||
|
|
eeac5f2b9a | ||
|
|
98ea6ba721 | ||
|
|
2ca954f048 | ||
|
|
fa7cf95ee2 | ||
|
|
5680027b72 | ||
|
|
8c6c6991c2 | ||
|
|
addc024e49 | ||
|
|
335bfb02c2 | ||
|
|
fb94a3f3f1 | ||
|
|
9ea9cf4c68 | ||
|
|
e977587fae | ||
|
|
0c02cd98e0 | ||
|
|
c67e19e0bf | ||
|
|
4e4360ec62 | ||
|
|
e786090aeb | ||
|
|
03f2657a6e | ||
|
|
16be4cbbe5 | ||
|
|
53c8b69f1e | ||
|
|
28238b18ff | ||
|
|
f4c06014dd | ||
|
|
fb8ab400b7 | ||
|
|
f2d74defca | ||
|
|
c1c18a5a87 | ||
|
|
54e952748f | ||
|
|
30470c8f6a | ||
|
|
4da7db4305 | ||
|
|
23a00fb15a | ||
|
|
951cc73e46 | ||
|
|
53452ca410 | ||
|
|
01ba441a63 | ||
|
|
582c1a6e7f | ||
|
|
77d1037a90 | ||
|
|
52587ef69b | ||
|
|
ea66ae350b | ||
|
|
ad3a16f423 | ||
|
|
4cf37d449e | ||
|
|
2c00752e23 | ||
|
|
05e15487e4 | ||
|
|
99236e82ad | ||
|
|
b9f5686a3c | ||
|
|
b99a7fe494 | ||
|
|
f028bc9b6c | ||
|
|
bd1bfbfaf9 | ||
|
|
f61696fb3f | ||
|
|
f47f859de0 | ||
|
|
6a18f3509b | ||
|
|
02734791cd | ||
|
|
6194f3d9e7 | ||
|
|
197c6dde81 | ||
|
|
ea87916f4b | ||
|
|
b710bdaafd | ||
|
|
7b2d6a91fb | ||
|
|
c7a542fd17 | ||
|
|
fa2b3c9511 | ||
|
|
d6258ab74d | ||
|
|
f633ef8137 | ||
|
|
a4c6fd9ff7 | ||
|
|
0812d13003 | ||
|
|
c97407ae56 | ||
|
|
b2b56e6366 | ||
|
|
78e3689062 | ||
|
|
9f77f3a60d | ||
|
|
db85c2c4b3 | ||
|
|
dc26cef572 | ||
|
|
bc149a2deb | ||
|
|
1e46c97bbd | ||
|
|
790744c9e1 | ||
|
|
033c38fc91 | ||
|
|
825a2070c5 | ||
|
|
5128dc6743 | ||
|
|
8828e1fc28 | ||
|
|
a43949d123 | ||
|
|
61bc732810 | ||
|
|
555872bdef | ||
|
|
c0d776f64c | ||
|
|
a2dd11326f | ||
|
|
0904101b7d | ||
|
|
6fc9aa6dfc | ||
|
|
3b72126f5f | ||
|
|
80fb72928e | ||
|
|
8ee9fc36ab | ||
|
|
89e731031c | ||
|
|
619bc8a6f9 | ||
|
|
a2523f1a1e | ||
|
|
3499548a2f | ||
|
|
4460ee00cf | ||
|
|
89290bf7a4 | ||
|
|
a07b36b61f | ||
|
|
6f305d6254 | ||
|
|
7e356b733e | ||
|
|
f2c8ae6a0a | ||
|
|
b1ab540c11 | ||
|
|
9ca0bfc5d8 | ||
|
|
7011250353 | ||
|
|
744400b161 | ||
|
|
d0b81c1c7b | ||
|
|
adfaf141d3 | ||
|
|
a8047ba0a9 | ||
|
|
b142654dfc | ||
|
|
56d4688f2c | ||
|
|
df26e74145 | ||
|
|
8dd9154982 | ||
|
|
23e4f9468d | ||
|
|
aa966de4bc | ||
|
|
a711083e90 | ||
|
|
99bafb052b | ||
|
|
61b5cd8e43 | ||
|
|
1466ff2422 | ||
|
|
af8a979984 | ||
|
|
1d562d1fe4 | ||
|
|
d437654320 | ||
|
|
1eb5eb2d54 | ||
|
|
dbc90cfce5 | ||
|
|
cf5c0fd68c | ||
|
|
b02f40318c | ||
|
|
bc6d65de26 | ||
|
|
09f2fc4d4b | ||
|
|
5c06b32a30 | ||
|
|
125c55e1e3 | ||
|
|
841fe6e396 | ||
|
|
f245310927 | ||
|
|
5e31182bc8 | ||
|
|
0ca4d20720 | ||
|
|
2ddce1acd5 | ||
|
|
dc88a00ea4 | ||
|
|
df61e88714 | ||
|
|
36efc7366e | ||
|
|
a829d01e7c | ||
|
|
1459ad8611 | ||
|
|
2e78b153d5 | ||
|
|
467d79120e | ||
|
|
9080349615 | ||
|
|
2085dda0a3 | ||
|
|
52e69abb88 | ||
|
|
06fa73666f | ||
|
|
d7940213ab | ||
|
|
da5ec5b357 | ||
|
|
605fc0dbcf | ||
|
|
9da07fd160 | ||
|
|
913f8dc256 | ||
|
|
f8cb9e9364 | ||
|
|
7ec234a052 | ||
|
|
bb12670ef3 | ||
|
|
120a82c82b | ||
|
|
bd9128044a | ||
|
|
9e54b8d82b | ||
|
|
1f3f09d713 | ||
|
|
17c9a26c8a | ||
|
|
5755d462cc | ||
|
|
f4de32550c | ||
|
|
2ae9c679e1 | ||
|
|
9dc4de0f07 | ||
|
|
a64a415f59 | ||
|
|
5e02fdc2ae | ||
|
|
6d75c4b464 | ||
|
|
ff05648b04 | ||
|
|
0a114cd313 | ||
|
|
db3b17ed5f | ||
|
|
14231fdd0a | ||
|
|
76565e959a | ||
|
|
70e67f7960 | ||
|
|
9f244b9c01 | ||
|
|
6a1dccd270 | ||
|
|
b146954afd | ||
|
|
58cc24e9c4 | ||
|
|
c97de461a8 | ||
|
|
e759240175 | ||
|
|
0eb3abd44a | ||
|
|
e699910675 | ||
|
|
c92acf2b3b | ||
|
|
b439fa8bf0 | ||
|
|
7ac8d1f1aa | ||
|
|
991b928edb | ||
|
|
fb815c0453 | ||
|
|
74ddae0fd9 | ||
|
|
4dcc9ec510 | ||
|
|
2245167580 | ||
|
|
97fe1bbcf6 | ||
|
|
93fc626332 | ||
|
|
16f19e6b4a | ||
|
|
66ed3478cd | ||
|
|
982fd32a06 | ||
|
|
3be15436a8 | ||
|
|
443a543bb5 | ||
|
|
e28773850f | ||
|
|
52e740cf58 | ||
|
|
bdd8921328 | ||
|
|
3f2596c247 | ||
|
|
73305fe0df | ||
|
|
5ca13c71b3 | ||
|
|
06730f3f7b | ||
|
|
464a7a3ee3 | ||
|
|
bd52738e4c | ||
|
|
3b6a4b85a9 | ||
|
|
03d030feab | ||
|
|
1082dc5417 | ||
|
|
afb9f38ab4 | ||
|
|
9754747785 | ||
|
|
8ea2aca735 | ||
|
|
f7dcce698b | ||
|
|
b94779f7d4 | ||
|
|
b24db52b3d | ||
|
|
19571e3b2b | ||
|
|
3ae3d6c677 | ||
|
|
6924828c8d | ||
|
|
5aa8f2b25c | ||
|
|
f0b14e680e | ||
|
|
fc3f1c6588 | ||
|
|
e62c771a3f | ||
|
|
72ada92aa4 | ||
|
|
18aa2776b0 | ||
|
|
3a30d2c5ea | ||
|
|
e859aa23bf | ||
|
|
ca3a453447 | ||
|
|
7132d16053 | ||
|
|
fcf8dc2cde | ||
|
|
3aebb20ec2 | ||
|
|
db1d6d9e0c | ||
|
|
0501e98b13 | ||
|
|
0609d8bfae | ||
|
|
89c6d45786 | ||
|
|
48065e5d83 | ||
|
|
5c3a8931ed | ||
|
|
f994c67cc5 | ||
|
|
466e706f1c | ||
|
|
de01752a8b | ||
|
|
e2a3b48481 | ||
|
|
162dcf5fbd | ||
|
|
7ebc993891 | ||
|
|
c750ebc4d5 | ||
|
|
4c56c27b3b | ||
|
|
ab6c023903 | ||
|
|
7a30473ce2 | ||
|
|
0b117007dc | ||
|
|
dbba0d5cb2 | ||
|
|
37bb0b8e45 | ||
|
|
3063baeb20 | ||
|
|
d2d2584dc9 | ||
|
|
edad05c2d7 | ||
|
|
344824294d | ||
|
|
7d3c1c1e2b | ||
|
|
b1f65c9c4f | ||
|
|
b3126d3996 | ||
|
|
f26bc481d4 | ||
|
|
c65ce60f71 | ||
|
|
7077c85ada | ||
|
|
1dbfed1be2 | ||
|
|
f74f1a3561 | ||
|
|
bd3807f168 | ||
|
|
d161e21940 | ||
|
|
da800759ca | ||
|
|
f52947446a | ||
|
|
da79260189 | ||
|
|
0e4e3ab00a | ||
|
|
5dd2d3297c | ||
|
|
2ee505706c | ||
|
|
b938e15712 | ||
|
|
93d11a4b8d | ||
|
|
011e52dbb9 | ||
|
|
ad63699c5b | ||
|
|
e4d198b72b | ||
|
|
63c7edcecc | ||
|
|
125487003e | ||
|
|
935a0b2413 | ||
|
|
cce2f18d0c | ||
|
|
bccb7eed85 | ||
|
|
48917b4234 | ||
|
|
18226e2fe1 | ||
|
|
b57094dc5d | ||
|
|
91a7ce01a3 | ||
|
|
c26c9be76f | ||
|
|
a020de9f99 | ||
|
|
9f270e2b91 | ||
|
|
45ecbf8ede | ||
|
|
d5d2bae774 | ||
|
|
10e198c7ba | ||
|
|
d3034dc8df | ||
|
|
872644cbe6 | ||
|
|
1bd9c9667d | ||
|
|
64759be173 | ||
|
|
213f790f0e | ||
|
|
73a4d83eda | ||
|
|
6533f1a3c6 | ||
|
|
2a4a09f562 | ||
|
|
b77b5ccc1b | ||
|
|
66fb5d7bab | ||
|
|
6d6f273787 | ||
|
|
9eee629c38 | ||
|
|
c8a695c735 | ||
|
|
acd86e3902 | ||
|
|
0d1dd7ab5c | ||
|
|
ef7ba42f9a | ||
|
|
536a759a7f | ||
|
|
6207a2fdf7 | ||
|
|
9bc70b79e6 | ||
|
|
2cd6f53f76 | ||
|
|
4fb60c0a9c | ||
|
|
1d3067dfec | ||
|
|
92cc03cf6e | ||
|
|
3724695d23 | ||
|
|
6c0020fc4f | ||
|
|
583dd45610 | ||
|
|
87c30b3239 | ||
|
|
0053a29c64 | ||
|
|
c9a9e2e2d6 | ||
|
|
867f3fdb49 | ||
|
|
72ed9fcb4a | ||
|
|
fec9ec0a04 | ||
|
|
78ed49a45f | ||
|
|
aa88aacfe9 | ||
|
|
8e9a734299 | ||
|
|
1bfa1bc62b | ||
|
|
a757d97a40 | ||
|
|
711c9e3ad4 | ||
|
|
f139e6ea94 | ||
|
|
5ff77100c5 | ||
|
|
16478cdd5a | ||
|
|
192ca9b444 | ||
|
|
6a12c4d52f | ||
|
|
3e73f6c708 | ||
|
|
878aa0b1a6 | ||
|
|
b9fc710a87 | ||
|
|
617a76a3d1 | ||
|
|
2c8e87e85b | ||
|
|
e16694a798 | ||
|
|
0971c3c76b | ||
|
|
f6ac55beb6 | ||
|
|
d10c357036 | ||
|
|
f2c061080f | ||
|
|
dd75be8efe | ||
|
|
09828fc9c8 | ||
|
|
b1a1d7c6bc | ||
|
|
40835a969b | ||
|
|
a1c1bbd2d5 | ||
|
|
41ba08d2f6 | ||
|
|
03b93bb9d7 | ||
|
|
14c4e30576 | ||
|
|
52ae3d1ec0 | ||
|
|
0ca5326261 | ||
|
|
69b6fcc17b | ||
|
|
fb7abb7bee | ||
|
|
824e33abcd | ||
|
|
d6c7064254 | ||
|
|
f324c284ff | ||
|
|
35dbb1967b | ||
|
|
25427e17aa | ||
|
|
7e18176ffc | ||
|
|
7bc0be1788 | ||
|
|
16f9081a80 | ||
|
|
35e10c0a8b | ||
|
|
237c004e20 | ||
|
|
0e1d97915c | ||
|
|
ba50bff441 | ||
|
|
d97c4607a1 | ||
|
|
c7e716eaa0 | ||
|
|
c8f51ac64b | ||
|
|
9693cc4c35 | ||
|
|
9c53660299 | ||
|
|
97dabcfceb | ||
|
|
64666069d4 | ||
|
|
e1ff7e9739 | ||
|
|
715173f513 | ||
|
|
158b4b7553 | ||
|
|
e2d6614fe7 | ||
|
|
8a7423d27f | ||
|
|
494e3fbaaa | ||
|
|
b9f8d4e314 | ||
|
|
5ea0a3d140 | ||
|
|
a518de60b5 | ||
|
|
d9d61a04a8 | ||
|
|
fc4ff3d2de | ||
|
|
4559314798 | ||
|
|
2192b4fccd | ||
|
|
8b8dce8ba9 | ||
|
|
b1333ad5c2 | ||
|
|
e6e51fc9fd | ||
|
|
3222b0cedd | ||
|
|
bc2cea6187 | ||
|
|
eba91eaf65 | ||
|
|
d8daf768a9 | ||
|
|
670995ba3b | ||
|
|
591bcc663b | ||
|
|
aed2c5743f | ||
|
|
99aef392fe | ||
|
|
9345f6b875 | ||
|
|
e9bf516bb9 | ||
|
|
5faa05abf6 | ||
|
|
6252203b85 | ||
|
|
95dad358af | ||
|
|
3e719d7671 | ||
|
|
a62e02a6ad | ||
|
|
094afe8819 | ||
|
|
115cb44948 | ||
|
|
becca6e157 | ||
|
|
40bf0dab66 | ||
|
|
3611ccc16c | ||
|
|
477c0562a2 | ||
|
|
904385e502 | ||
|
|
72f8da76e5 | ||
|
|
97e789846c | ||
|
|
672ff9038b | ||
|
|
dddf955fae | ||
|
|
9562794e24 | ||
|
|
1e5f10888b | ||
|
|
2c94c90748 | ||
|
|
bb925dda04 | ||
|
|
c387e708e1 | ||
|
|
9b6657edb3 | ||
|
|
09b05199d1 | ||
|
|
61e35f0b53 | ||
|
|
78c6831226 | ||
|
|
43cf842721 | ||
|
|
760a5cbc9c | ||
|
|
b60c83bfd5 | ||
|
|
53fe026cfe | ||
|
|
f8bfc49ea8 | ||
|
|
3f43e5b929 | ||
|
|
fc049f53e0 | ||
|
|
34608575c7 | ||
|
|
868742f9d9 | ||
|
|
bb5e5166f6 | ||
|
|
212d076a50 | ||
|
|
1bc524db2d | ||
|
|
48df9f2023 | ||
|
|
bb7a4f3ea4 | ||
|
|
c0a1996589 | ||
|
|
555c675209 | ||
|
|
081bf75ba0 | ||
|
|
797dc6e420 | ||
|
|
adfc7494d1 | ||
|
|
d708e91725 | ||
|
|
c46d911cc4 | ||
|
|
f33a5de8b3 | ||
|
|
8428110d67 | ||
|
|
c294446082 | ||
|
|
98feb81475 | ||
|
|
f917e3955c | ||
|
|
2e17e3bef4 | ||
|
|
ecf22fea39 | ||
|
|
a78f8e94ee | ||
|
|
630945570d | ||
|
|
18b29a3f4e | ||
|
|
c1ae854548 | ||
|
|
e9f899ab57 | ||
|
|
fc7ec6b89a | ||
|
|
2a93c9191a | ||
|
|
312179570b | ||
|
|
5b9d7b422f | ||
|
|
d3f073a630 | ||
|
|
8506ed9b5b | ||
|
|
49bbd95880 | ||
|
|
4d1326c1bb | ||
|
|
d6684663bb | ||
|
|
4c93dc7097 | ||
|
|
14f3d71f70 | ||
|
|
5ee4a2e572 | ||
|
|
5f4504ccf2 | ||
|
|
5ab1779b4c | ||
|
|
6eb7597d8b | ||
|
|
0cc7aa54c7 | ||
|
|
badfc73fcb | ||
|
|
cbbdc20601 | ||
|
|
0fa4dfda3f | ||
|
|
88fe2477eb | ||
|
|
c2afc72d6c | ||
|
|
fc5123ff1f | ||
|
|
cb27cb64b6 | ||
|
|
a3ff446a3d | ||
|
|
1a3a6ec1e0 | ||
|
|
886e37b168 | ||
|
|
9f13145b2c | ||
|
|
af241ca42c | ||
|
|
0b9b066c18 | ||
|
|
fd1a1e357b | ||
|
|
0795225cc7 | ||
|
|
738d520938 | ||
|
|
dd0571d4bd | ||
|
|
3e4193a6d5 | ||
|
|
07dee591ab | ||
|
|
e7e183b296 | ||
|
|
eb04b3b7e4 | ||
|
|
2078e5923f | ||
|
|
b6751fddf4 | ||
|
|
4dd1488fec | ||
|
|
c1ecaf668e | ||
|
|
26f9ef0290 | ||
|
|
2d6a67ff18 | ||
|
|
32db6e2036 | ||
|
|
d3cb8e6be5 | ||
|
|
6cfc27cb87 | ||
|
|
3ee144475c | ||
|
|
c5b538c724 | ||
|
|
5b587a8608 | ||
|
|
5ce7c00ac3 | ||
|
|
c0d283b9c2 | ||
|
|
0c100d5917 | ||
|
|
1c0d7f93f7 | ||
|
|
f99810a1ca | ||
|
|
2ee5d71821 | ||
|
|
6bcf0f5499 | ||
|
|
5d00f16003 | ||
|
|
86dee0081d | ||
|
|
623158bb01 | ||
|
|
e3217dfed6 | ||
|
|
7aa451a3c1 | ||
|
|
f02b854343 | ||
|
|
fad21498d2 | ||
|
|
e1c27f8841 | ||
|
|
e380560cb3 | ||
|
|
03ebc65f6b | ||
|
|
b1c2fe6885 | ||
|
|
3c20a4c247 | ||
|
|
e4d81f0dff | ||
|
|
2191498ef6 | ||
|
|
8ef5dcc1b3 | ||
|
|
9fb72efa15 | ||
|
|
773f83bb06 | ||
|
|
e2b632c99a | ||
|
|
586ddce59f | ||
|
|
6695b9a846 | ||
|
|
6ffdf768bb | ||
|
|
1e38646026 | ||
|
|
ef6ceaf8b0 | ||
|
|
0c0534ea74 | ||
|
|
8ba5d2c423 | ||
|
|
ddf82749af | ||
|
|
3f6793b301 | ||
|
|
67d4eb46ee | ||
|
|
ff9db222b3 | ||
|
|
166a7795d6 | ||
|
|
dd261dec96 | ||
|
|
018bd04305 | ||
|
|
7c6817bc4f | ||
|
|
f8d976f42e | ||
|
|
d237ac849c | ||
|
|
e1f9de264f | ||
|
|
1d55f4778d | ||
|
|
20b5b8fb95 | ||
|
|
eb63b8bae5 | ||
|
|
5922d027b7 | ||
|
|
a46edf092d | ||
|
|
aa5706f372 | ||
|
|
75999010f0 | ||
|
|
a906d8f26b | ||
|
|
67d9eb92f4 | ||
|
|
59a19a7510 | ||
|
|
8894b87212 | ||
|
|
682b3ba325 | ||
|
|
3c67c4bf13 | ||
|
|
c8aec09a0e | ||
|
|
ec13618224 | ||
|
|
dfc68fd0ed | ||
|
|
12a7caa667 | ||
|
|
80a131b555 | ||
|
|
10993a4fe2 | ||
|
|
00d9c42e57 | ||
|
|
66520e236c | ||
|
|
edd089237e | ||
|
|
1d8c10b168 | ||
|
|
ca9ea7ef99 | ||
|
|
ea29473239 | ||
|
|
8175361275 | ||
|
|
4959b6eb4f | ||
|
|
bd3fb5dfe1 | ||
|
|
88bf48ce44 | ||
|
|
b9998abc48 | ||
|
|
b0cbb7da0d | ||
|
|
8e58e90e84 | ||
|
|
75b1cc23b5 | ||
|
|
3ba65f922b | ||
|
|
38a9c7db05 | ||
|
|
2e905841e2 | ||
|
|
7c262e71fa | ||
|
|
b948591389 | ||
|
|
0822212bcb | ||
|
|
22b1bca6cd | ||
|
|
f9a1fef55d | ||
|
|
df016a5e36 | ||
|
|
df4aabc517 | ||
|
|
2b12dc7054 | ||
|
|
9819e0b214 | ||
|
|
17cd3f3d04 | ||
|
|
1041fc44ec | ||
|
|
c555942bf4 | ||
|
|
34a4ad26da | ||
|
|
c2b6082345 | ||
|
|
f74e4bd252 | ||
|
|
10a6554c81 | ||
|
|
f82e534cb5 | ||
|
|
9572c25c0b | ||
|
|
ea4cf245ac | ||
|
|
39d23c8c98 | ||
|
|
2b9d4b4ebd | ||
|
|
eb51c671f5 | ||
|
|
6965a04403 | ||
|
|
48b8730571 | ||
|
|
005ccaded7 | ||
|
|
28c4c56806 | ||
|
|
030a0e7134 | ||
|
|
cb116af143 | ||
|
|
d9ae30cfe3 | ||
|
|
4b8392bb22 | ||
|
|
0ed828ec3b | ||
|
|
3048148b2a | ||
|
|
1930df68d1 | ||
|
|
c680f3bb64 | ||
|
|
96ff0dec5f | ||
|
|
4fee9b3011 | ||
|
|
865d1d9c69 | ||
|
|
56172edf6e | ||
|
|
e934417ba9 | ||
|
|
3dde6aff8f | ||
|
|
ec75736717 | ||
|
|
f08b922a80 | ||
|
|
dff03364d7 | ||
|
|
e3fb6d2a1c | ||
|
|
0e2885b6ca | ||
|
|
5948fd1109 | ||
|
|
ba8e7d7908 | ||
|
|
f96e4af3d2 | ||
|
|
96d0167538 | ||
|
|
8b1da6f6ec | ||
|
|
0479e418b2 | ||
|
|
88f1b9c44d | ||
|
|
ef98363abb | ||
|
|
1342e87c14 | ||
|
|
03c19c10a3 | ||
|
|
eeedfdee87 | ||
|
|
0e95125464 | ||
|
|
fd46963301 | ||
|
|
210a6a5589 | ||
|
|
b6b3c9425c | ||
|
|
7b35682ffd | ||
|
|
48e042064d | ||
|
|
e7a70a8301 | ||
|
|
ffde5bfdb5 | ||
|
|
967993cef2 | ||
|
|
c84f9f2895 | ||
|
|
37cdec2f27 | ||
|
|
6e68624f2a | ||
|
|
0cb2053be5 | ||
|
|
ea82a094f9 | ||
|
|
69a436af98 | ||
|
|
6c0a6594ff | ||
|
|
feeba370ed | ||
|
|
e433339f6b | ||
|
|
07f19f5f70 | ||
|
|
3ea533f5e6 | ||
|
|
56fe8dd657 | ||
|
|
04314d2b63 | ||
|
|
10fb77f00f | ||
|
|
a33f39dfec | ||
|
|
9d4f587e23 | ||
|
|
0be96953af | ||
|
|
fb474c8c45 | ||
|
|
b99f8afbe9 | ||
|
|
95297b58e0 | ||
|
|
10a7cf8aa7 | ||
|
|
b2a7986b8f | ||
|
|
2593044309 | ||
|
|
c3ff444b30 | ||
|
|
a1ea9d0f11 | ||
|
|
c74460bb56 | ||
|
|
50dff16eef | ||
|
|
d85b4b73a6 | ||
|
|
83dc85d801 | ||
|
|
18bf6445e0 | ||
|
|
19af3ea7de | ||
|
|
680bcc4280 | ||
|
|
30e076def7 | ||
|
|
1e0e2dde90 | ||
|
|
d33533f536 | ||
|
|
6d117363ed | ||
|
|
fb6d4eee01 | ||
|
|
9f668e2653 | ||
|
|
13fbf31f2c | ||
|
|
a42dccd9bf | ||
|
|
0453ecbc44 | ||
|
|
c5ce9c4cea | ||
|
|
fd11526da8 | ||
|
|
28901d293f | ||
|
|
a52949c2e6 | ||
|
|
e217a0b653 | ||
|
|
dda8acb21b | ||
|
|
e9f933a7f7 | ||
|
|
a2607ffa54 | ||
|
|
c6cafa87f2 | ||
|
|
f6d086e0dd | ||
|
|
9112346f41 | ||
|
|
34dc2e14b2 | ||
|
|
7b951d7f4d | ||
|
|
d33decd8f5 | ||
|
|
d652b4a9fe | ||
|
|
a160d28f27 | ||
|
|
7cb67982dd | ||
|
|
f772059654 | ||
|
|
4d13e477a5 | ||
|
|
5658967a8b | ||
|
|
3dd46ceee3 | ||
|
|
414fafc1e5 | ||
|
|
ae2b33ec7b | ||
|
|
cf2ae9b126 | ||
|
|
59ceff0af1 | ||
|
|
425f372968 | ||
|
|
fd8a7ce69b | ||
|
|
78d68892f7 | ||
|
|
2ee0ada0d1 | ||
|
|
9da3746b9c | ||
|
|
0f895205f6 | ||
|
|
81ec048517 | ||
|
|
72024b2b8e | ||
|
|
af38ed0878 | ||
|
|
0af9c874b1 | ||
|
|
b36278a7c8 | ||
|
|
ebb8608577 | ||
|
|
5b4e0e041b | ||
|
|
50bbb80633 | ||
|
|
0dcd5805fc | ||
|
|
769a5b44b5 | ||
|
|
3eb9755cce | ||
|
|
c53cbe8257 | ||
|
|
1af3b4ff92 | ||
|
|
1d1fc33093 | ||
|
|
6c9e84dc7f | ||
|
|
e2e6f74d42 | ||
|
|
8cc21d19ec | ||
|
|
738da1af0e | ||
|
|
285459758d | ||
|
|
b72ce6ecf3 | ||
|
|
9d8027ab7c | ||
|
|
3301b96390 | ||
|
|
9c43667b44 | ||
|
|
b9cf7e2a64 | ||
|
|
9a25c9d6f7 | ||
|
|
16049cc09b | ||
|
|
7c1a723a6d | ||
|
|
ee63036c6d | ||
|
|
a5a1322f28 | ||
|
|
07496ad0c3 | ||
|
|
0080310062 | ||
|
|
2beb2df77f | ||
|
|
0fa697b418 | ||
|
|
12383b6342 | ||
|
|
f91111de90 | ||
|
|
3a1447abea | ||
|
|
4108d5c1d1 | ||
|
|
b24cbbc954 | ||
|
|
8b0e0c8de5 | ||
|
|
b85c265fdd | ||
|
|
078950b2e3 | ||
|
|
34a12c48c1 | ||
|
|
84a7414981 | ||
|
|
64ddc71886 | ||
|
|
08cd7b0822 | ||
|
|
f1990600da | ||
|
|
a98770a18b | ||
|
|
5afd8ca3e2 | ||
|
|
91073658cc | ||
|
|
c27ffc52b2 | ||
|
|
5f9369176c | ||
|
|
31b2c75bed | ||
|
|
e8f75249da | ||
|
|
0a01ab7438 | ||
|
|
2278110d32 | ||
|
|
067a5fd244 | ||
|
|
5c52a1f43b | ||
|
|
cca3f362e6 | ||
|
|
175b4728d6 | ||
|
|
475efc8f04 | ||
|
|
338b288b38 | ||
|
|
0a2585808f | ||
|
|
d5ec157654 | ||
|
|
2fb09e6a2b | ||
|
|
793c04f262 | ||
|
|
af89ede8b4 | ||
|
|
d729386685 | ||
|
|
b7621c6555 | ||
|
|
bb14697397 | ||
|
|
cdf412660f | ||
|
|
50efa8f52d | ||
|
|
eae93edc1f | ||
|
|
0ee67a26ae | ||
|
|
e2901081f7 | ||
|
|
6925559f5f | ||
|
|
723489c230 | ||
|
|
0bf11d6ea3 | ||
|
|
bda69750a3 | ||
|
|
6eb39b6d46 | ||
|
|
d3b9dd0cde | ||
|
|
8a3b445241 | ||
|
|
1ca7036594 | ||
|
|
a819037b79 | ||
|
|
78fa57a63c | ||
|
|
7ad85b8beb | ||
|
|
fc5b7cb3b7 | ||
|
|
af63a3e770 | ||
|
|
3699991d6d | ||
|
|
915159a6d9 | ||
|
|
7b2aaee4ea | ||
|
|
2bd6435e72 | ||
|
|
dae4acd884 | ||
|
|
323fefa333 | ||
|
|
9dbe543dc9 | ||
|
|
e260b32014 | ||
|
|
d0db30fa1d | ||
|
|
c9297dd0c4 | ||
|
|
5264a15e68 | ||
|
|
7b4a3333e7 | ||
|
|
22359b0f4d | ||
|
|
6ab50b6eaa | ||
|
|
c9ef6cbdfe | ||
|
|
d40ff43a2f | ||
|
|
b35ca970f3 | ||
|
|
2ea7280f66 | ||
|
|
c9d7092f3a | ||
|
|
a353a54374 | ||
|
|
31c5696cd1 | ||
|
|
ebfe57d410 | ||
|
|
7c2fac6b7a | ||
|
|
de226a7e67 | ||
|
|
08cadaea10 | ||
|
|
35c662a616 | ||
|
|
4f5df9609e | ||
|
|
6cb97595b8 | ||
|
|
cd470000b0 | ||
|
|
6c8840eabe | ||
|
|
c70ec078ea | ||
|
|
4c27747416 | ||
|
|
6910c84225 | ||
|
|
7b21c9fbdd | ||
|
|
d0a2487674 | ||
|
|
13570e3f99 | ||
|
|
f069ae7897 | ||
|
|
9184ba9e0f | ||
|
|
a02108c95d | ||
|
|
3976266f70 | ||
|
|
df963ca78c | ||
|
|
3e3a13c096 | ||
|
|
c21c3a6041 | ||
|
|
5cae380257 | ||
|
|
710aaa32e6 | ||
|
|
8c46f82c26 | ||
|
|
123308bebd | ||
|
|
cbdb7649e1 | ||
|
|
29c718d7bd | ||
|
|
50af1fa781 | ||
|
|
b0bb9402da | ||
|
|
f2ed71a0a3 | ||
|
|
7177285f99 | ||
|
|
b2bb03921c | ||
|
|
2932c9b436 | ||
|
|
8021836a04 | ||
|
|
c2bc561688 | ||
|
|
d487b265f6 | ||
|
|
971db85948 | ||
|
|
7e29d4163d | ||
|
|
76e986117b | ||
|
|
8cb0b88c92 | ||
|
|
ad59e2cf45 | ||
|
|
d9101f315a | ||
|
|
59e24831b8 | ||
|
|
599a72d2fc | ||
|
|
882966cc0c | ||
|
|
f326301f38 | ||
|
|
f06ca006d0 | ||
|
|
1d077bda3f | ||
|
|
c68712a577 | ||
|
|
4d06041688 | ||
|
|
a9c87f4ecf | ||
|
|
ad529eb9ef | ||
|
|
afdf3e5205 | ||
|
|
34535be0a7 | ||
|
|
a0c551c46e | ||
|
|
54bee0ad0f | ||
|
|
158e0d5a5a | ||
|
|
58f3d1c268 | ||
|
|
599c9051f0 | ||
|
|
d2a6faa225 | ||
|
|
c759a9e769 | ||
|
|
b396a08828 | ||
|
|
15c21e8a6c | ||
|
|
cdf7db07b2 | ||
|
|
0055978a57 | ||
|
|
1578a9f724 | ||
|
|
c18822294f | ||
|
|
5bb53b83ae | ||
|
|
d1fb0d0d3c | ||
|
|
0c822bc0a0 | ||
|
|
4a4a9e0327 | ||
|
|
ade0458f1e | ||
|
|
c57470e955 | ||
|
|
f09a76fa61 | ||
|
|
64658387a5 | ||
|
|
69a1186978 | ||
|
|
af2dd1d063 | ||
|
|
6cd3aed4d6 | ||
|
|
2a7e1d2c19 | ||
|
|
2590ec564f | ||
|
|
fbebe48fec | ||
|
|
b86f637cee | ||
|
|
aba534a95d | ||
|
|
c1465af849 | ||
|
|
e7496f0e3a | ||
|
|
445e26158f | ||
|
|
cac74157b9 | ||
|
|
88dc400edd | ||
|
|
fd646e9924 | ||
|
|
c991f3cd3a | ||
|
|
12a5208ab2 | ||
|
|
008fdd6ea3 | ||
|
|
5175d40d6f | ||
|
|
7cefc329bc | ||
|
|
d55337e909 | ||
|
|
7585eafa77 | ||
|
|
a42a349e3c | ||
|
|
b0faa2ce21 | ||
|
|
28a23e1257 | ||
|
|
2861bb5f1e | ||
|
|
3f73e28a48 | ||
|
|
3459960c0f | ||
|
|
12ac620d71 | ||
|
|
402db7ff6e | ||
|
|
33d79d503e | ||
|
|
30aedd3bd7 | ||
|
|
b3ffa0767a | ||
|
|
a486d62d20 | ||
|
|
2e2389390e | ||
|
|
5e6b6a9b56 | ||
|
|
209a0a4a9e | ||
|
|
7cb05e38a3 | ||
|
|
894d25a938 | ||
|
|
79bd13f615 | ||
|
|
9a66915b37 | ||
|
|
d2fb987a1b | ||
|
|
b369d6aa35 | ||
|
|
4a13a03dd4 | ||
|
|
d4d3226803 | ||
|
|
c27fd27f5b | ||
|
|
a0b68ec7fe | ||
|
|
f4b9431603 | ||
|
|
545d7c5e8d | ||
|
|
e5bd062c26 | ||
|
|
90cdcb3d9e | ||
|
|
1df24b34af | ||
|
|
c0ee0cc702 | ||
|
|
135c969a0d | ||
|
|
b73093584f | ||
|
|
a7b71b94fd | ||
|
|
e315a11506 | ||
|
|
5caf276ec7 | ||
|
|
697f51d5b7 | ||
|
|
d5c1f484e6 | ||
|
|
a91345d626 | ||
|
|
8b0a4336de | ||
|
|
36e26b0592 | ||
|
|
0303b7eeb0 | ||
|
|
9f23d13c9c | ||
|
|
e27908e70a | ||
|
|
693c3d07ce | ||
|
|
986531d238 | ||
|
|
09dabc37ff | ||
|
|
0234c16117 | ||
|
|
7ec4dce57b | ||
|
|
c93b5d96fe | ||
|
|
a0948ff4f5 | ||
|
|
4cb37739d7 | ||
|
|
fbea3657ac | ||
|
|
9252a9d31d | ||
|
|
afd6b0cace | ||
|
|
23428e8f93 | ||
|
|
d226f4791f | ||
|
|
dda3c591b6 | ||
|
|
fc8efd457e | ||
|
|
49c9ea9837 | ||
|
|
befdeb193c | ||
|
|
f4a3b6e18d | ||
|
|
2fe661fbb7 | ||
|
|
b600199f64 | ||
|
|
48bbf4f2da | ||
|
|
6328c147d9 | ||
|
|
1dc801a6e0 | ||
|
|
632a551a08 | ||
|
|
1da9653ebb | ||
|
|
868d4317d9 | ||
|
|
0c2fe0b487 | ||
|
|
3fb3775ce8 | ||
|
|
b29d8d6b22 | ||
|
|
609cf53048 | ||
|
|
0ff0526d86 | ||
|
|
d0bcf638d7 | ||
|
|
20cca252b2 | ||
|
|
e1d087733f | ||
|
|
fe6e0263b8 | ||
|
|
71fff8511b | ||
|
|
74c38d2431 | ||
|
|
188c92332f | ||
|
|
5363fccbfe | ||
|
|
7e4e2d7844 | ||
|
|
1d3598ed8a | ||
|
|
dfd943f797 | ||
|
|
24c349c2e0 | ||
|
|
8cddb23c69 | ||
|
|
d6d61cd04f | ||
|
|
df26d2752a | ||
|
|
eb44773c87 | ||
|
|
1669c06703 | ||
|
|
c92effa01b | ||
|
|
0118bea9e2 | ||
|
|
fac2536d2c | ||
|
|
ffbb28a8bd | ||
|
|
1d73d2ffdb | ||
|
|
3ad076bd34 | ||
|
|
df29f57374 | ||
|
|
a0a831c2d4 | ||
|
|
67d3bbdca4 | ||
|
|
0bb83f62b7 | ||
|
|
b7480a5d3f | ||
|
|
9a0e244141 | ||
|
|
9f807bc9ca | ||
|
|
1bff056a49 | ||
|
|
e33f854402 | ||
|
|
e25bb1e3a0 | ||
|
|
9d226f9fe1 | ||
|
|
0aa3204dfb | ||
|
|
85c4e638dd | ||
|
|
3a23924721 | ||
|
|
7435d15fdb | ||
|
|
93a9c4a6c6 | ||
|
|
d2e26165bc | ||
|
|
fa784d58f9 | ||
|
|
3e61731df4 | ||
|
|
ebd77e5c52 | ||
|
|
3c77a68e61 | ||
|
|
54409dd083 | ||
|
|
37bc54feb9 | ||
|
|
c1929d60a1 | ||
|
|
1c6eba12cf | ||
|
|
c743d463c4 | ||
|
|
c73dfd9461 | ||
|
|
a2f1817f30 | ||
|
|
7a5730c720 | ||
|
|
137685cef5 | ||
|
|
c319d25fa3 | ||
|
|
d2b860ceb6 | ||
|
|
7d3c5445e2 | ||
|
|
d79d5aec98 | ||
|
|
681ddd0ad9 | ||
|
|
a14c97d335 | ||
|
|
23f93bde24 | ||
|
|
cbc7c4b64b | ||
|
|
cbf167d2a4 | ||
|
|
f12564b22f | ||
|
|
1654915282 | ||
|
|
ac3dc698bb | ||
|
|
206e8c87da | ||
|
|
232f0b1b24 | ||
|
|
ccdc926f22 | ||
|
|
406a9022ee | ||
|
|
4b71c0b6e3 | ||
|
|
f169fb53c7 | ||
|
|
46c67f87ca | ||
|
|
1328d3781b | ||
|
|
eeeabff8f7 | ||
|
|
0035f3a0f3 | ||
|
|
3da4281093 | ||
|
|
883938994b | ||
|
|
a2e1a04409 | ||
|
|
7feceb67ce | ||
|
|
164c63602b | ||
|
|
bbf7e7ddbb | ||
|
|
bc9a71adf6 | ||
|
|
361bd6fd24 | ||
|
|
aaaae4ddd8 | ||
|
|
0ce1323df1 | ||
|
|
e6e1876f44 | ||
|
|
1f17b9c9a0 | ||
|
|
ca07695481 | ||
|
|
ec956c4115 | ||
|
|
487b6067e9 | ||
|
|
a4093f9621 | ||
|
|
20197bacd9 | ||
|
|
26d15c6f34 | ||
|
|
2642a35009 | ||
|
|
328b36cb84 | ||
|
|
0107ce7d4d | ||
|
|
56e91c1f73 | ||
|
|
1b6f46a5b7 | ||
|
|
d334a6fa93 | ||
|
|
d8e7e345d0 | ||
|
|
bc6e7cafd8 | ||
|
|
dbbdca7497 | ||
|
|
e141ddfb7c | ||
|
|
47fab653c9 | ||
|
|
557c2ea601 | ||
|
|
69fc4b67a0 | ||
|
|
e985e15761 | ||
|
|
7c7ab004d2 | ||
|
|
3c4b8ff401 | ||
|
|
9689747063 | ||
|
|
7ba1f58788 | ||
|
|
6b3992e238 | ||
|
|
79688dc14d | ||
|
|
0c314fd644 | ||
|
|
a8a6519f01 | ||
|
|
ec88777733 | ||
|
|
7fbf4130d3 | ||
|
|
eacbd91d82 | ||
|
|
57e1181c48 | ||
|
|
00d15bee59 | ||
|
|
54f843ec06 | ||
|
|
e9f82c3343 | ||
|
|
020cef0b99 | ||
|
|
f36e59e9c1 | ||
|
|
3d675e5a25 | ||
|
|
594057738d | ||
|
|
7c5ee9b44a | ||
|
|
4d04fabe9c | ||
|
|
a1a7ea4d40 | ||
|
|
22d2ad4564 | ||
|
|
39368ce2ac | ||
|
|
3ee2597bc3 | ||
|
|
ba5027ad4d | ||
|
|
b66dc2a928 | ||
|
|
c1995507f1 | ||
|
|
e094793c17 | ||
|
|
8f21412374 | ||
|
|
869ed37137 | ||
|
|
ab1a22cf22 | ||
|
|
c1409b85c6 | ||
|
|
67c14a2ead | ||
|
|
c238cd5790 | ||
|
|
20e1c4c98f | ||
|
|
f59f1d0b37 | ||
|
|
e0f933c357 | ||
|
|
3a90dbaefb | ||
|
|
a2ec61dc8d | ||
|
|
c74519c9ef | ||
|
|
4878ce41c3 | ||
|
|
59badd392c | ||
|
|
f97b54328f | ||
|
|
50921e0435 | ||
|
|
5f0a706f2c | ||
|
|
aaf970d77c | ||
|
|
252d220caa | ||
|
|
7e81930a56 | ||
|
|
908da5744b | ||
|
|
f3fb857b89 | ||
|
|
9494920eef | ||
|
|
a37a14aa58 | ||
|
|
9901796331 | ||
|
|
b1c55ced18 | ||
|
|
f9395fd178 | ||
|
|
fe1ab04627 | ||
|
|
0af184921d | ||
|
|
977c07fa27 | ||
|
|
1e84465478 | ||
|
|
2dd2f9ed81 | ||
|
|
bfccd4838e | ||
|
|
f519641e6e | ||
|
|
b3d0918aab | ||
|
|
84a9d753a4 | ||
|
|
555f8a7ae6 | ||
|
|
6a6007441f | ||
|
|
8c982a6770 | ||
|
|
672fc984a6 | ||
|
|
031d9de356 | ||
|
|
f087d7dda9 | ||
|
|
cac7410d7e | ||
|
|
f779bad0de | ||
|
|
0027730789 | ||
|
|
cdb589966f | ||
|
|
029e70aa0b | ||
|
|
4b9bc818d7 | ||
|
|
54a35ca562 | ||
|
|
933072df5c | ||
|
|
11811226b4 | ||
|
|
ca41ce4123 | ||
|
|
311e4ad417 | ||
|
|
0c479fa579 | ||
|
|
62fff21f59 | ||
|
|
b53a3741bd | ||
|
|
f579ef6e08 | ||
|
|
9f557cc10a | ||
|
|
a83563961c | ||
|
|
10b9050e57 | ||
|
|
da4a09b3d5 | ||
|
|
de9a79ede8 | ||
|
|
bfc9c8b45c | ||
|
|
24abb202a7 | ||
|
|
4c3c624cc2 | ||
|
|
eba9f75c91 | ||
|
|
fbfcb9a5f6 | ||
|
|
836c02de53 | ||
|
|
7a2ff524d7 | ||
|
|
49a5536c51 | ||
|
|
1b543780a8 | ||
|
|
3405e0bda1 | ||
|
|
10029b41b1 | ||
|
|
ac16bfaeb3 | ||
|
|
b192fd0ad1 | ||
|
|
561fa5e319 | ||
|
|
e9fbb19d67 | ||
|
|
45a25394f5 | ||
|
|
cdce62f2aa | ||
|
|
45bc88d9bf | ||
|
|
a0e2db3925 | ||
|
|
4718a2c2de | ||
|
|
7c7946cc51 | ||
|
|
18804a52cf | ||
|
|
31ac92a06d | ||
|
|
c054449328 | ||
|
|
93e93dee92 | ||
|
|
b4feb0153d | ||
|
|
4d1be812e8 | ||
|
|
15a79ee0ca | ||
|
|
a2c558d864 | ||
|
|
dc77754c1a | ||
|
|
4a3640cc33 | ||
|
|
517082f4d1 | ||
|
|
72e9054b1f | ||
|
|
5012f64156 | ||
|
|
b945001851 | ||
|
|
3b0b37920f | ||
|
|
6f3d4bc3af | ||
|
|
ea013468e1 | ||
|
|
816237116d | ||
|
|
b8357c409c | ||
|
|
b717b0b2a2 | ||
|
|
2d4cda3ff9 | ||
|
|
5f9ef422dd | ||
|
|
6ba2fdc776 | ||
|
|
c732f52bb7 | ||
|
|
a941dae620 | ||
|
|
879f4bc062 | ||
|
|
e74af328cf | ||
|
|
73a2708fa1 | ||
|
|
bb439de0e4 | ||
|
|
f0e1ec7c41 | ||
|
|
f0b139085e | ||
|
|
5f8703059e | ||
|
|
d5f67b1244 | ||
|
|
45e336b61e | ||
|
|
666a9c958c | ||
|
|
aaceecef99 | ||
|
|
716679b012 | ||
|
|
d389d2b12e | ||
|
|
17b080c932 | ||
|
|
31182e18fc | ||
|
|
333444371c | ||
|
|
6de5f07ffc | ||
|
|
7669cac324 | ||
|
|
afadca7586 | ||
|
|
1897506613 | ||
|
|
8d2f9198c2 | ||
|
|
f531a6293b | ||
|
|
c8d25aa07c | ||
|
|
264c5473f0 | ||
|
|
d7902a9c1e | ||
|
|
96aeab1ddf | ||
|
|
7db7ce7337 | ||
|
|
8d95a8bab2 | ||
|
|
68b2a816fc | ||
|
|
8998bb482b | ||
|
|
aca86f43e3 | ||
|
|
7a03715ecc | ||
|
|
94d09def21 | ||
|
|
53143266c2 | ||
|
|
f1676589fd | ||
|
|
9944ffb7ac | ||
|
|
720e6f088e | ||
|
|
dff90a10df | ||
|
|
7b008f6b26 | ||
|
|
ce68248694 | ||
|
|
a26a303325 | ||
|
|
85713a3455 | ||
|
|
d4e1012328 | ||
|
|
e9d313f4ab | ||
|
|
2c4524f2f8 | ||
|
|
0ae8f31ae8 | ||
|
|
9ce631c5c0 | ||
|
|
3cbf9e5668 | ||
|
|
5c29744bb0 | ||
|
|
851acb95e2 | ||
|
|
0e42d1bb1b | ||
|
|
939a8b050a | ||
|
|
a96c5aa800 | ||
|
|
c5e973cc0c | ||
|
|
dbb3583045 | ||
|
|
3c0276e63a | ||
|
|
f01e8d8354 | ||
|
|
4f11380296 | ||
|
|
77eb5d41cb | ||
|
|
400b81720f | ||
|
|
d4cf0242e0 | ||
|
|
b5210fa2ba | ||
|
|
7ff6a8bbb7 | ||
|
|
86895fa405 | ||
|
|
20299506b8 | ||
|
|
9647f98de5 | ||
|
|
def9a64aa9 | ||
|
|
90edc483e2 | ||
|
|
defa942a29 | ||
|
|
6f6513a1eb | ||
|
|
c7b9d60500 | ||
|
|
f88ec2b9aa | ||
|
|
9cd8e20cd9 | ||
|
|
27c9bede73 | ||
|
|
63d3d235f2 | ||
|
|
0066929558 | ||
|
|
c2fabce099 | ||
|
|
fbf6a48020 | ||
|
|
38f4a4fd78 | ||
|
|
a925afb5d8 | ||
|
|
9804aaf4d6 | ||
|
|
86960b2b10 | ||
|
|
0d5099dbe4 | ||
|
|
48972cb5f2 | ||
|
|
b5ed99cc1f | ||
|
|
a2a3a01cde | ||
|
|
2d9738f13d | ||
|
|
03f930cf82 | ||
|
|
282af12792 | ||
|
|
0d71e3afe8 | ||
|
|
abf3476eea | ||
|
|
60763b8156 | ||
|
|
5c6ba3e62d | ||
|
|
95321588e4 | ||
|
|
ef88db8c1a | ||
|
|
60e7ca6858 | ||
|
|
109dd7f565 | ||
|
|
726c836587 | ||
|
|
4a75cc1526 | ||
|
|
0e1ae124bf | ||
|
|
9532e4de5c | ||
|
|
5ac11e19fc | ||
|
|
2397fdaaea | ||
|
|
0e97cd7d99 | ||
|
|
b25769109a | ||
|
|
000dc270ad | ||
|
|
47fdb9d83e | ||
|
|
158d396734 | ||
|
|
0f4b852a4b | ||
|
|
7fdca5e7b6 | ||
|
|
337aadc1fe | ||
|
|
5da0f20593 | ||
|
|
dcfcd112e1 | ||
|
|
14f9b5b82b | ||
|
|
955eb1a2d3 | ||
|
|
67f72497ea | ||
|
|
667b784dca | ||
|
|
e7e986f8d5 | ||
|
|
105efcf395 | ||
|
|
4d6cc75b9b | ||
|
|
cf6366dab4 | ||
|
|
69ecfb0837 | ||
|
|
933c7b43a4 | ||
|
|
b90bd847d2 | ||
|
|
23400f9557 | ||
|
|
1e247571ec | ||
|
|
777efc59fd | ||
|
|
a7ebdb78ab | ||
|
|
ada3f5ed2a | ||
|
|
4ec21b145f | ||
|
|
9228a8c262 | ||
|
|
23a1e6ef21 | ||
|
|
cc6ccff2b4 | ||
|
|
a4104d09b5 | ||
|
|
ac72c19676 | ||
|
|
10ec0e7bb0 | ||
|
|
c8c9520dd5 | ||
|
|
269e3d44b9 | ||
|
|
420b5e3331 | ||
|
|
2698a61231 | ||
|
|
59d6d121c4 | ||
|
|
8025ba06e4 | ||
|
|
b4e58a41c4 | ||
|
|
a81ac5312c | ||
|
|
ff1bd22193 | ||
|
|
1a7883e56c | ||
|
|
35fa48da02 | ||
|
|
6c3b243d12 | ||
|
|
cd70d20b46 | ||
|
|
e6f228c091 | ||
|
|
60651cb15b | ||
|
|
868e120ffc | ||
|
|
33838ba887 | ||
|
|
c8e7d4fbfa | ||
|
|
ed64da557e | ||
|
|
4992c952c9 | ||
|
|
ed4e366922 | ||
|
|
44b4385847 | ||
|
|
f234cd2e78 | ||
|
|
c38bfcf1da | ||
|
|
29ad115d6e | ||
|
|
753f1f34ab | ||
|
|
54c0177b15 | ||
|
|
e46942ccde | ||
|
|
59ca64719e | ||
|
|
67c1d1808a | ||
|
|
a69100ba52 | ||
|
|
46645c5b93 | ||
|
|
c7b7b393e3 | ||
|
|
dcec57c09a | ||
|
|
f8d5d9fb07 | ||
|
|
cbcfd32464 | ||
|
|
f4d05d4f24 | ||
|
|
dbe5f6a98d | ||
|
|
cff4dca5e6 | ||
|
|
adab8a6a59 | ||
|
|
831c6778d8 | ||
|
|
5f97e5b0e2 | ||
|
|
5b3d8c4377 | ||
|
|
8b99a042fc | ||
|
|
739cc681d5 | ||
|
|
b9a8a648d9 | ||
|
|
05411ee451 | ||
|
|
5e1ce3c45a | ||
|
|
a4127aee18 | ||
|
|
3b3e951100 | ||
|
|
40e9d6014a | ||
|
|
e4a5adceb6 | ||
|
|
a968f73db5 | ||
|
|
a1a49effad | ||
|
|
64f00bdb97 | ||
|
|
90a1375603 | ||
|
|
abda36cd92 | ||
|
|
9d2025afed | ||
|
|
741bc126d2 | ||
|
|
dffb4d3168 | ||
|
|
ab6684c1ad | ||
|
|
0763d16d3e | ||
|
|
2978f9ece8 | ||
|
|
7e80dacd92 | ||
|
|
881c55026f | ||
|
|
45d5728ae2 | ||
|
|
c8362433e2 | ||
|
|
012d12fb52 | ||
|
|
055c051807 | ||
|
|
26733f2651 | ||
|
|
c8b7c4412e | ||
|
|
469410cb94 | ||
|
|
3375632f2c | ||
|
|
66c1d8ffcf | ||
|
|
b8e3f33646 | ||
|
|
88c38d5d85 | ||
|
|
8676649e0b | ||
|
|
0b0703457c | ||
|
|
a8bdc69cea | ||
|
|
38e1e33cb2 | ||
|
|
2099de432a | ||
|
|
0bbe27b9e9 | ||
|
|
08bed37eba | ||
|
|
448eeeee46 | ||
|
|
eb9b8aebb7 | ||
|
|
4c20a843c7 | ||
|
|
e745962ff4 | ||
|
|
01e34ca0eb | ||
|
|
2a95105837 | ||
|
|
7d35b642fd | ||
|
|
a6ec8fd1d8 | ||
|
|
936a2409b4 | ||
|
|
e288691d1c | ||
|
|
1a5c8b02d0 | ||
|
|
de66ca06ae | ||
|
|
a56f42982c | ||
|
|
cf99d82e30 | ||
|
|
1c42040885 | ||
|
|
c79cb4e450 | ||
|
|
9a84b747e5 | ||
|
|
e53e2bfbe5 | ||
|
|
ee48decec5 | ||
|
|
777a07a019 | ||
|
|
5fe41d9b82 | ||
|
|
9689a627d0 | ||
|
|
87adf8f4e2 | ||
|
|
2b0d053c54 | ||
|
|
4976611375 | ||
|
|
66b683a6bc | ||
|
|
b249a05720 | ||
|
|
8038df7921 | ||
|
|
25eee7d314 | ||
|
|
6403ca3bff | ||
|
|
7a81dec11f | ||
|
|
4c2bcb32da | ||
|
|
ebf506a71e | ||
|
|
8f260451bd | ||
|
|
203523a770 | ||
|
|
a6fb0fcc7f | ||
|
|
5f5bde42d9 | ||
|
|
8a9ed58585 | ||
|
|
b4c90dd02a | ||
|
|
021ef30647 | ||
|
|
7590dd8003 | ||
|
|
c7d85985a7 | ||
|
|
42bd0b221d | ||
|
|
779d59885e | ||
|
|
bbb417acc3 | ||
|
|
65975828e3 | ||
|
|
3e5de53984 | ||
|
|
61db6d248d | ||
|
|
4d823ec7e2 | ||
|
|
52e3178300 | ||
|
|
2dadedecec | ||
|
|
9de8a5f7e2 | ||
|
|
f165c8b0f9 | ||
|
|
0229fab8b4 | ||
|
|
4afdc269e7 | ||
|
|
10fa02be11 | ||
|
|
e0d83ce545 | ||
|
|
640b13e074 | ||
|
|
b8da3c9722 | ||
|
|
befb6e0144 | ||
|
|
3975a37302 | ||
|
|
28bf3a35b8 | ||
|
|
3c72dee8e8 | ||
|
|
198c5d9ffc | ||
|
|
7a3e99db9d | ||
|
|
e9fe09d545 | ||
|
|
2674aa2ee2 | ||
|
|
4b938998a3 | ||
|
|
202532cbd2 | ||
|
|
720377d476 | ||
|
|
e167efa4d9 | ||
|
|
f97025a15e | ||
|
|
0f8b601db6 | ||
|
|
df25a2c768 | ||
|
|
29ada606c5 | ||
|
|
57c2e72b07 | ||
|
|
cd44d68a8d | ||
|
|
fe3f42a869 | ||
|
|
3fb8cba23e | ||
|
|
5a224ddb63 | ||
|
|
ee172fa0af | ||
|
|
d3a0626e02 | ||
|
|
8188c0013b | ||
|
|
4a294f35a6 | ||
|
|
ea8a072fd2 | ||
|
|
42d3ae121e | ||
|
|
6105518324 | ||
|
|
6093c2eb02 | ||
|
|
25a380c218 | ||
|
|
479bf1b50e | ||
|
|
a2024be25f | ||
|
|
b097545d91 | ||
|
|
b7828d7c23 | ||
|
|
326ece76e4 | ||
|
|
4df5027fda | ||
|
|
cbb48d2406 | ||
|
|
45abef86d7 | ||
|
|
c4fc2afd14 | ||
|
|
7ba0979f1e | ||
|
|
d01a3c1187 | ||
|
|
882bad50d7 | ||
|
|
3023dc035c | ||
|
|
5e0a036814 | ||
|
|
3488d35c1f | ||
|
|
188d3420f6 | ||
|
|
d423100e6b | ||
|
|
5ecf4a0ac3 | ||
|
|
d1849d807c | ||
|
|
5c324fc5b6 | ||
|
|
37aab6a4f5 | ||
|
|
fd43c98b96 | ||
|
|
cd697382c2 | ||
|
|
1ad765875e | ||
|
|
83255da41e | ||
|
|
e211958904 | ||
|
|
f2de6f6c02 | ||
|
|
d2f6d11202 | ||
|
|
32935e507d | ||
|
|
eea19a0f5d | ||
|
|
0387bd9e7e | ||
|
|
047eb9c37e | ||
|
|
15eaaa7e11 | ||
|
|
0f113df946 | ||
|
|
59e05c53de | ||
|
|
84e8ca85f3 | ||
|
|
7f689568ac | ||
|
|
b6512cef24 | ||
|
|
e86e69c233 | ||
|
|
f7bc9d01fb | ||
|
|
f49f81a90d | ||
|
|
426ce6ac8e | ||
|
|
787ac24f78 | ||
|
|
949d1e73ea | ||
|
|
2e6e807a3a | ||
|
|
dec7d6cbdc | ||
|
|
82935667da | ||
|
|
df44b2c41d | ||
|
|
75e09e20c6 | ||
|
|
ea794c39c2 | ||
|
|
06997cd5d8 | ||
|
|
3ed1730819 | ||
|
|
ca742585e5 | ||
|
|
e846a19ea7 | ||
|
|
ded6f59c79 | ||
|
|
2ac2f8a1eb | ||
|
|
737aedd12f | ||
|
|
cc48cab5c6 | ||
|
|
fd246c77c7 | ||
|
|
407ce79e2c | ||
|
|
723bdc39fa | ||
|
|
8df46811b5 | ||
|
|
f803564866 | ||
|
|
4306286128 | ||
|
|
397a181952 | ||
|
|
882948b07f | ||
|
|
92d028e6b1 | ||
|
|
ee32a4266e | ||
|
|
f346437bc1 | ||
|
|
509609ef96 | ||
|
|
5eb1f1f3c0 | ||
|
|
52570b5059 | ||
|
|
66b7de9668 | ||
|
|
ae827a21bc | ||
|
|
cdbee1e2b3 | ||
|
|
139976c1d1 | ||
|
|
be5202e225 | ||
|
|
e1adbcf128 | ||
|
|
b72c29db9a | ||
|
|
1bb7c46e9c | ||
|
|
6083764346 | ||
|
|
2f7ad102a9 | ||
|
|
33d057606e | ||
|
|
2b06e36276 | ||
|
|
5be7b3e788 | ||
|
|
95f2dfc170 | ||
|
|
0216509724 | ||
|
|
2c9b27f8e1 | ||
|
|
06eaf89459 | ||
|
|
33171cfd25 | ||
|
|
053b38603e | ||
|
|
777ecee10f | ||
|
|
fd4f68d226 | ||
|
|
cf9ad72984 | ||
|
|
934ad4a584 | ||
|
|
4e002d15e0 | ||
|
|
c5019a1eea | ||
|
|
9062806a33 | ||
|
|
63792a0a97 | ||
|
|
65623b2f52 | ||
|
|
7a57de78c4 | ||
|
|
238a554aaa | ||
|
|
1433576b9d | ||
|
|
4007f82ce6 | ||
|
|
f86d7d39b8 | ||
|
|
3c4e91c11d | ||
|
|
46ce5e9b75 | ||
|
|
8d39ad037e | ||
|
|
79c95cc346 | ||
|
|
bc24d615dc | ||
|
|
2b549c5a4d | ||
|
|
70f004182a | ||
|
|
93df1092fd | ||
|
|
fbbdd87d7f | ||
|
|
025cdb1e37 | ||
|
|
4a2d876351 | ||
|
|
86113bc53a | ||
|
|
35c3838220 | ||
|
|
30b5e56bcb | ||
|
|
ef059f375d | ||
|
|
9100fd3bfc | ||
|
|
bedff7f97a | ||
|
|
96b96a885f | ||
|
|
e1fdc4738b | ||
|
|
0d06766201 | ||
|
|
02fd536873 | ||
|
|
e2a9529189 | ||
|
|
a660b8e023 | ||
|
|
baea2e9469 | ||
|
|
db731e5296 | ||
|
|
890fd39d33 | ||
|
|
9c47698824 | ||
|
|
b678d70724 | ||
|
|
e199ea6ca6 | ||
|
|
0af88089c5 | ||
|
|
be37084d40 | ||
|
|
a62a1eb914 | ||
|
|
a03625b2dc | ||
|
|
09fd06cd52 | ||
|
|
3f5b82fb3d | ||
|
|
dca5a90682 | ||
|
|
9c2dbde065 | ||
|
|
9a8aed291e | ||
|
|
22c0834cd7 | ||
|
|
cb05247a59 | ||
|
|
fee343ccb3 | ||
|
|
c246c33c9a | ||
|
|
552cdcff1d | ||
|
|
30006698ad | ||
|
|
124eb4da85 | ||
|
|
b220f4db1c | ||
|
|
295728eafc | ||
|
|
6717fbfd89 | ||
|
|
e5cafa496d | ||
|
|
ca4abf9692 | ||
|
|
c945b0d4fe | ||
|
|
019772b278 | ||
|
|
6e6055a77b | ||
|
|
832ebdc218 | ||
|
|
358454be51 | ||
|
|
fcad70a350 | ||
|
|
4403181e38 | ||
|
|
279b3105ee | ||
|
|
769d5a77c2 | ||
|
|
2f62b2aa3f | ||
|
|
b1cea64b84 | ||
|
|
42879bdc34 | ||
|
|
2d8ae6238c | ||
|
|
7a8c16847c | ||
|
|
4d3579dc66 | ||
|
|
316b9c15db | ||
|
|
08c45f44e4 | ||
|
|
905ae4c299 | ||
|
|
6effcdcddc | ||
|
|
445b74bcb8 | ||
|
|
29bdb0cf35 | ||
|
|
a021ac65ed | ||
|
|
becba63ce4 | ||
|
|
2c3372c3b2 | ||
|
|
398f965726 | ||
|
|
9694cfa883 | ||
|
|
08e4942276 | ||
|
|
aa88e96b76 | ||
|
|
85f28d1054 | ||
|
|
d50e6d084b | ||
|
|
f7bf2f7d0a | ||
|
|
ff5e27a89c | ||
|
|
a93eeec5eb | ||
|
|
6fbf3bc282 | ||
|
|
83d923300d | ||
|
|
9397c5e1f7 | ||
|
|
c7cc2a3e0f | ||
|
|
2cd4be0db0 | ||
|
|
7b44ef106e | ||
|
|
39580268ac | ||
|
|
389eb8969c | ||
|
|
32857a9dad | ||
|
|
b5fa47838e | ||
|
|
2974899ed5 | ||
|
|
04737b3e85 | ||
|
|
67698baf11 | ||
|
|
5be511916b | ||
|
|
a999544859 | ||
|
|
2779e15961 | ||
|
|
c386df4e48 | ||
|
|
56fbe15dc9 | ||
|
|
95124c0638 | ||
|
|
c27c24d1b2 | ||
|
|
b4844fe1fe | ||
|
|
870fce58c4 | ||
|
|
1b4fc022ff | ||
|
|
d909ffb1f1 | ||
|
|
76c74006f0 | ||
|
|
b5aa116349 | ||
|
|
c48c5dd35a | ||
|
|
5c49248700 | ||
|
|
913a0f6520 | ||
|
|
dde61749d8 | ||
|
|
723298016d | ||
|
|
d0f8687520 | ||
|
|
556c1a677c | ||
|
|
28ef1d2aa9 | ||
|
|
ffb2d183e7 | ||
|
|
028c5e6ed2 | ||
|
|
101ef13956 | ||
|
|
3e98844d33 | ||
|
|
6fcd6199f3 | ||
|
|
c818eb30b5 | ||
|
|
3f06b86ef0 | ||
|
|
1435ce963c | ||
|
|
78e100cb9a | ||
|
|
eb8a5b2c68 | ||
|
|
4335900aa5 | ||
|
|
bc79a9af38 | ||
|
|
b8aa3e9a48 | ||
|
|
5aaf15c8b4 | ||
|
|
8d3ff6d319 | ||
|
|
b13fc666e4 | ||
|
|
00de815e65 | ||
|
|
5b1e8622d8 | ||
|
|
84f1f6f6d1 | ||
|
|
f099f9c1e2 | ||
|
|
5e66aabb97 | ||
|
|
fa10cd36d1 | ||
|
|
a0d8f3dbc4 | ||
|
|
ae1d045674 | ||
|
|
85b2c4b344 | ||
|
|
968a5cca70 | ||
|
|
ce11390484 | ||
|
|
2565a719cc | ||
|
|
e05c7a0d90 | ||
|
|
2dfa8b761b | ||
|
|
5726d6a2d2 | ||
|
|
a2a0ff0bfd | ||
|
|
8627bee253 | ||
|
|
33d2a77c07 | ||
|
|
a7a08b44ce | ||
|
|
64795c4921 | ||
|
|
98b45e147d | ||
|
|
3f196cd135 | ||
|
|
ea6efdf8ff | ||
|
|
7f038be6e3 | ||
|
|
1194be8652 | ||
|
|
eefad6628a | ||
|
|
f83193dd64 | ||
|
|
2762c3353f | ||
|
|
a893bdff92 | ||
|
|
a97b2d347e | ||
|
|
17aca1bb71 | ||
|
|
01c836f236 | ||
|
|
49f78457ee | ||
|
|
d43b90642f | ||
|
|
61c3af67e1 | ||
|
|
5179a2cd23 | ||
|
|
f1d4f2f8bb | ||
|
|
be990a00a2 | ||
|
|
de9d49c0fc | ||
|
|
a38621d66f | ||
|
|
bfd1f8907e | ||
|
|
b3ce28bc99 | ||
|
|
eb29c0b78f | ||
|
|
7d651a53d1 | ||
|
|
1ee5c36690 | ||
|
|
d283290cbf | ||
|
|
8f4cdfe24a | ||
|
|
0870e2056f | ||
|
|
308f9ffe6b | ||
|
|
f2db0cbc01 | ||
|
|
5a88e7fcf4 | ||
|
|
c80fe74729 | ||
|
|
7e9c337fb0 | ||
|
|
a5f6770589 | ||
|
|
58402ea6e5 | ||
|
|
ad1ce3bbb0 | ||
|
|
615b2f54b4 | ||
|
|
2d2805f1b8 | ||
|
|
0783a74b59 | ||
|
|
04d77dd214 | ||
|
|
0b9ea4bebb | ||
|
|
e3ab18589b | ||
|
|
957ff3edf4 | ||
|
|
cea3773e4f | ||
|
|
1a2392a8c8 | ||
|
|
ac3b288f3b | ||
|
|
611e6cecf2 | ||
|
|
7b97439fcd | ||
|
|
409729e55a | ||
|
|
8fb7f048b5 | ||
|
|
9186c2fae9 | ||
|
|
b87a076c86 | ||
|
|
db0d8ae339 | ||
|
|
d0f6b53fd5 | ||
|
|
3a3d57add4 | ||
|
|
e56cca5bd9 | ||
|
|
367f2a96ec | ||
|
|
34c580dbe8 | ||
|
|
231dd15abb | ||
|
|
05192b2c88 | ||
|
|
cd1e94dbf7 | ||
|
|
4043d7d301 | ||
|
|
b231abc036 | ||
|
|
576016ccaf | ||
|
|
254652c748 | ||
|
|
6cfa71c3f0 | ||
|
|
cc1359abf7 | ||
|
|
f3ba815757 | ||
|
|
c3fd518817 | ||
|
|
6e2bc4b8c4 | ||
|
|
0da6b59eff | ||
|
|
78f3b5e47a | ||
|
|
b888b846e4 | ||
|
|
d3b9315e91 | ||
|
|
252b127c15 | ||
|
|
def6b3991f | ||
|
|
aac7b5912f | ||
|
|
f1e66f247e | ||
|
|
ffee4aa495 | ||
|
|
2943b0964f | ||
|
|
f03bb3fcf0 | ||
|
|
ffa62c8cfa | ||
|
|
6c261914d8 | ||
|
|
f32f830004 | ||
|
|
8381dba465 | ||
|
|
f29f86d4f4 | ||
|
|
f3a22d1d37 | ||
|
|
e86874d124 | ||
|
|
4bd3b09c4f | ||
|
|
9e16cdf4ef | ||
|
|
612442a674 | ||
|
|
283e5c6f31 | ||
|
|
0d041e1188 | ||
|
|
01e6ce63b6 | ||
|
|
31187c9e13 | ||
|
|
b71f018e19 | ||
|
|
7002f5806b | ||
|
|
6d3cae9e6a | ||
|
|
13d19f927e | ||
|
|
5048dfe2a5 | ||
|
|
a172ffb106 | ||
|
|
de8f21aa92 | ||
|
|
63f0b16177 | ||
|
|
3e779975c3 | ||
|
|
a2c67e8594 | ||
|
|
3661966a9b | ||
|
|
109551d5bd | ||
|
|
c0030a619b | ||
|
|
7fd0ccba45 | ||
|
|
f1b3bc021e | ||
|
|
99ea6d5064 | ||
|
|
fecdc3f114 | ||
|
|
586ddb6084 | ||
|
|
66f7765c5c | ||
|
|
f68e2a0752 | ||
|
|
2be25aa2f2 | ||
|
|
173bbe8fa6 | ||
|
|
2bd644cd66 | ||
|
|
5630357ec1 | ||
|
|
9ef259c882 | ||
|
|
ab7dd7523f | ||
|
|
b58ca0f4f2 | ||
|
|
8ca3490c47 | ||
|
|
4d54cf419d | ||
|
|
bccc32bd2a | ||
|
|
e1529c21a6 | ||
|
|
85aec35ea9 | ||
|
|
4828a63e22 | ||
|
|
bd33668954 | ||
|
|
54ab68c7ab | ||
|
|
b34d01691f | ||
|
|
8b17846c4d | ||
|
|
d521cfdcf0 | ||
|
|
5dfb2242b3 | ||
|
|
bac2092d5a | ||
|
|
bd197a10c2 | ||
|
|
a2a3efce52 | ||
|
|
7d3e1ccc95 | ||
|
|
3e49134cf6 | ||
|
|
9a6bd88d00 | ||
|
|
71b107fc61 | ||
|
|
157b0555e4 | ||
|
|
04eaab1acf | ||
|
|
12430fdbdc | ||
|
|
771246c9de | ||
|
|
ae68c7997b | ||
|
|
0a97a55467 | ||
|
|
a04954294b | ||
|
|
5aa49e6744 | ||
|
|
4ec0daa137 | ||
|
|
84f9969a6c | ||
|
|
f2ff2c1206 | ||
|
|
e1fa2d698e | ||
|
|
0f809f36a9 | ||
|
|
27d06c9cba | ||
|
|
a6c551ee1b | ||
|
|
535451cfa0 | ||
|
|
f7d6c70eaf | ||
|
|
fb000ad24e | ||
|
|
5e09e9d718 | ||
|
|
2db8e070b4 | ||
|
|
04e9a27c06 | ||
|
|
e77a6c6fdc | ||
|
|
37156e8a6c | ||
|
|
45b07bdc8d | ||
|
|
620f512d02 | ||
|
|
8010d50ab8 | ||
|
|
83bceb8154 | ||
|
|
5d57fdbe6c | ||
|
|
2b03ad1b39 | ||
|
|
f743185771 | ||
|
|
f6b51d2b92 | ||
|
|
02bc402792 | ||
|
|
7b34b7e5c2 | ||
|
|
d8224d129e | ||
|
|
05dca7d974 | ||
|
|
e1796b2538 | ||
|
|
cc110de643 | ||
|
|
0739aa47db | ||
|
|
35fe2e983a | ||
|
|
acc89951df | ||
|
|
ef0815e916 | ||
|
|
f15d61e15a | ||
|
|
9699e79f46 | ||
|
|
892eb3a01f | ||
|
|
c98ccd7fbe | ||
|
|
5cf59c2fcb | ||
|
|
bb38ecde61 | ||
|
|
ea588f60ca | ||
|
|
7c5d100121 | ||
|
|
12d7199cf6 | ||
|
|
e82ae3e1f6 | ||
|
|
33480c3ce4 | ||
|
|
99880b7c97 | ||
|
|
7b363dfa58 | ||
|
|
97b0468e8f | ||
|
|
890bd99062 | ||
|
|
889bfb481f | ||
|
|
9cf3da6383 | ||
|
|
2eb5e6401d | ||
|
|
86d796e08a | ||
|
|
bfb211e7cc | ||
|
|
1dde324b51 | ||
|
|
51db9d2f48 | ||
|
|
11e8126cfd | ||
|
|
ab0d8279c7 | ||
|
|
03845d08c0 | ||
|
|
f2a617ba4e | ||
|
|
02bc486ff7 | ||
|
|
a6c79e4057 | ||
|
|
e18f52086b | ||
|
|
e4f35d883f | ||
|
|
c5105b1580 | ||
|
|
286da2be9d | ||
|
|
f73e73e807 | ||
|
|
beb33d82df | ||
|
|
65931d9785 | ||
|
|
2e0412d2d2 | ||
|
|
f6dcac829c | ||
|
|
72f6a858f6 | ||
|
|
271ed9a7f1 | ||
|
|
4b89d917b4 | ||
|
|
efb6365b90 | ||
|
|
abea5c17d7 | ||
|
|
9a45ad3641 | ||
|
|
fd2dddcd30 | ||
|
|
56a60471d2 | ||
|
|
b90ddd29a9 | ||
|
|
360acae48a | ||
|
|
489ffa5449 | ||
|
|
d009816831 | ||
|
|
ede19292d3 | ||
|
|
62d41b429c | ||
|
|
912ed6ba90 | ||
|
|
3545477f1f | ||
|
|
8e9eeaddf9 | ||
|
|
34f695bb60 | ||
|
|
c492b73e99 | ||
|
|
e70af2aeca | ||
|
|
6cf077afec | ||
|
|
fb8c9c13be | ||
|
|
f059007bbb | ||
|
|
4d38a0e099 | ||
|
|
9641842678 | ||
|
|
8c51c4ca04 | ||
|
|
29617e878a | ||
|
|
c82b20143d | ||
|
|
fce47d650e | ||
|
|
64376cf174 | ||
|
|
012e0ea34f | ||
|
|
2d3d334d9e | ||
|
|
5989d1d06b | ||
|
|
b47f7599e3 | ||
|
|
82ed9fb43f | ||
|
|
5c281f6ade | ||
|
|
a19331e311 | ||
|
|
cef419b574 | ||
|
|
fc61de611f | ||
|
|
9c939c039b | ||
|
|
98b630e8f8 | ||
|
|
0b4787ef3a | ||
|
|
2cbdea1f8b | ||
|
|
b28f2e8577 | ||
|
|
b046d16405 | ||
|
|
d56f5bf0eb | ||
|
|
11032d751b | ||
|
|
7239e4c43e | ||
|
|
3c96870db9 | ||
|
|
2ed5e5e6bb | ||
|
|
90490606e5 | ||
|
|
3acf2ec358 | ||
|
|
0cd624a9da | ||
|
|
6f118f0a1d | ||
|
|
b3dc95c66c | ||
|
|
945d3e16fe | ||
|
|
31c26ac49d | ||
|
|
e643315532 | ||
|
|
20e96aa056 | ||
|
|
4ad6d02e4d | ||
|
|
2aa0b5160e | ||
|
|
1344c895f3 | ||
|
|
bf5e95f4c0 | ||
|
|
993bd416a2 | ||
|
|
85f3aaf4f0 | ||
|
|
30e7c4aeaf | ||
|
|
40dc033848 | ||
|
|
0322060c28 | ||
|
|
18b99f1d36 | ||
|
|
312a15029f | ||
|
|
8c6e1e00c0 | ||
|
|
2d8b3b2b26 | ||
|
|
40739b87d2 | ||
|
|
6980f0b990 | ||
|
|
d9d776e8d2 | ||
|
|
6beb0bf2d3 | ||
|
|
53b75670b4 | ||
|
|
ae06a9b706 | ||
|
|
91b20b571b | ||
|
|
e3f027df84 | ||
|
|
aa9d3066e9 | ||
|
|
5cd9ea69c4 | ||
|
|
d55b35acb6 | ||
|
|
d10c32466b | ||
|
|
8b981aa938 | ||
|
|
dfe896f4cd | ||
|
|
07a32dc934 | ||
|
|
1159fd0038 | ||
|
|
6c22129327 | ||
|
|
715de7549a | ||
|
|
16679fa064 | ||
|
|
d5e09a7dd7 | ||
|
|
15b3a02edb | ||
|
|
4d36f24d37 | ||
|
|
4e2b14b566 | ||
|
|
6c53f49e5e | ||
|
|
ee69ddfd74 | ||
|
|
666cf0aee8 | ||
|
|
a5ec069da4 | ||
|
|
f233ab66f6 | ||
|
|
22005b454e | ||
|
|
a3ab3e0133 | ||
|
|
b48e12719e | ||
|
|
39782362d0 | ||
|
|
1a9f91cd04 | ||
|
|
2d47a0aff6 | ||
|
|
a55dee9a98 | ||
|
|
1f5c3bbddd | ||
|
|
e3a680d592 | ||
|
|
e2a718be1a | ||
|
|
da8bd87700 | ||
|
|
0030aebc05 | ||
|
|
a2ea915aa5 | ||
|
|
b6df554777 | ||
|
|
83c4416c46 | ||
|
|
1a5e1893ed | ||
|
|
ec100bcf85 | ||
|
|
f39c9d49f8 | ||
|
|
86fcfc033f | ||
|
|
e5d64f770f | ||
|
|
35ea240ab3 | ||
|
|
f6b637739f | ||
|
|
8018402a80 | ||
|
|
d5a596fdce | ||
|
|
12c887766a | ||
|
|
183cc57f31 | ||
|
|
9b4a5c95bd | ||
|
|
a692157363 | ||
|
|
d2ec906381 | ||
|
|
d8328985f7 | ||
|
|
24c76ea808 | ||
|
|
e0691d729f | ||
|
|
f2e63f3057 | ||
|
|
cd2029912e | ||
|
|
7adc1b4d54 | ||
|
|
09c684d744 | ||
|
|
56668c08e7 | ||
|
|
06ab7f9e06 | ||
|
|
a2fdb3b775 | ||
|
|
78c9f801a6 | ||
|
|
6da54ba6c4 | ||
|
|
b3a32ae240 | ||
|
|
c243b1db3e | ||
|
|
732d6603d8 | ||
|
|
fc71ae0848 | ||
|
|
cfc3e91b61 | ||
|
|
c672c6f5f1 | ||
|
|
c6a8f531df | ||
|
|
64b4ea1444 | ||
|
|
177d176ab4 | ||
|
|
aa39d72a8d | ||
|
|
40e4a5824c | ||
|
|
8223686f65 | ||
|
|
23221bc141 | ||
|
|
115abbafc2 | ||
|
|
e5a5cafde7 | ||
|
|
9dc9dadf9d | ||
|
|
a40534b7be | ||
|
|
4bb3ed111d | ||
|
|
4850f4fd7c | ||
|
|
1da7f6ec15 | ||
|
|
e2f2b5ed8c | ||
|
|
3fb308b051 | ||
|
|
6fc5c791bb | ||
|
|
8c93009b51 | ||
|
|
afdb1a7fb2 | ||
|
|
d8c1ef93e0 | ||
|
|
18fd2682bd | ||
|
|
317a5ac9a2 | ||
|
|
21645fa911 | ||
|
|
a5a7ddf5d4 | ||
|
|
f9d4b443b5 | ||
|
|
d9e8fba7b0 | ||
|
|
2c8b22c767 | ||
|
|
95b9dde621 | ||
|
|
01f3189aa7 | ||
|
|
24c0304aa5 | ||
|
|
5972a4fdbb | ||
|
|
a4b151190f | ||
|
|
39ec2405c6 | ||
|
|
e453ade9cb | ||
|
|
0bac261ecb | ||
|
|
68fee12d33 | ||
|
|
ef109b070c | ||
|
|
4ca49070e3 | ||
|
|
4382451fa6 | ||
|
|
3ae9b0f86b | ||
|
|
99482f9bda | ||
|
|
4ce18ca831 | ||
|
|
26ceb442f5 | ||
|
|
9e378b7dfa | ||
|
|
b156fa4914 | ||
|
|
2f65e2ccce | ||
|
|
362d3e7959 | ||
|
|
3acb60251f | ||
|
|
49fb2552e1 | ||
|
|
18c0429868 | ||
|
|
74f8161e18 | ||
|
|
239dc88d86 | ||
|
|
1b4a1241c8 | ||
|
|
84dc04bb15 | ||
|
|
1078279c97 | ||
|
|
5066ac07e0 | ||
|
|
9cda10e255 | ||
|
|
7ed6a982b7 | ||
|
|
ad5e46e024 | ||
|
|
15ac85a7fb | ||
|
|
8912f4ee18 | ||
|
|
a3424f816b | ||
|
|
d95cde81d9 | ||
|
|
9275858d68 | ||
|
|
dd8beff697 | ||
|
|
1630f0a6f7 | ||
|
|
1a9fcb137c | ||
|
|
60110721c8 | ||
|
|
0170ed5d0e | ||
|
|
927e709d78 | ||
|
|
e7e71cfd0a | ||
|
|
55e29802c6 | ||
|
|
06ae034a0c | ||
|
|
06fc6e7992 | ||
|
|
b6fd122b22 | ||
|
|
65388be613 | ||
|
|
b27152e12b | ||
|
|
9c0dca6d35 | ||
|
|
641c98cb7c | ||
|
|
d62e4469c0 | ||
|
|
9694b1c919 | ||
|
|
87da41ab84 | ||
|
|
5202e8ef1e | ||
|
|
44af744fc8 | ||
|
|
2c5335764e | ||
|
|
dd75218db8 | ||
|
|
556b8acdcd | ||
|
|
2a889d576d | ||
|
|
161f0f2b54 | ||
|
|
f7875f3619 | ||
|
|
c04e1e9ff5 | ||
|
|
6f149521c3 | ||
|
|
622f4d7dd1 | ||
|
|
e6ca5da2c9 | ||
|
|
b44be902d7 | ||
|
|
635216e93e | ||
|
|
433317dad2 | ||
|
|
36bfea9cdb | ||
|
|
1f003ea591 | ||
|
|
bbdf2fb003 | ||
|
|
973cabd18d | ||
|
|
dd770ff29c | ||
|
|
c404f9dca8 | ||
|
|
ce0b25c1bc | ||
|
|
826f0f6a6b | ||
|
|
afd08fcd8b | ||
|
|
9ac132f445 | ||
|
|
724796832e | ||
|
|
dddb771d79 | ||
|
|
ad328e8a9d | ||
|
|
a299d6d44a | ||
|
|
1c0c7cbbe3 | ||
|
|
a2062c687f | ||
|
|
e00a280528 | ||
|
|
354976e85a | ||
|
|
a490aa7c7a | ||
|
|
ca5036792f | ||
|
|
ce6e135dfa | ||
|
|
526bb36dcc | ||
|
|
d5080b1e3d | ||
|
|
a35f861a19 | ||
|
|
1d2a00feab | ||
|
|
f4da821ab8 | ||
|
|
d6c2b40ad7 | ||
|
|
611076bf6d | ||
|
|
006821724e | ||
|
|
808c0b7b49 | ||
|
|
118360102d | ||
|
|
0eed84d3ff | ||
|
|
15c2ab636f | ||
|
|
d2feac0c66 | ||
|
|
7f4bc5c36e | ||
|
|
db7777b161 | ||
|
|
c5da1af470 | ||
|
|
ba4c0b99a5 | ||
|
|
e085077e82 | ||
|
|
5afac3ce5b | ||
|
|
33c33b602f | ||
|
|
273ca6bcd7 | ||
|
|
20cea788b6 | ||
|
|
3e897e7758 | ||
|
|
3464134aa8 | ||
|
|
8a44ac2a65 | ||
|
|
178c207c19 | ||
|
|
fec9241932 | ||
|
|
569e3c2324 | ||
|
|
d93aff207c | ||
|
|
2bf6335073 | ||
|
|
a77cfe595b | ||
|
|
54f2e43363 | ||
|
|
e41bb51453 | ||
|
|
4b241e7d79 | ||
|
|
6983e6d49f | ||
|
|
0f2d44152a | ||
|
|
977580c5fa | ||
|
|
9397b4f064 | ||
|
|
6bdfb870cd | ||
|
|
bfecebbc80 | ||
|
|
576bb310a1 | ||
|
|
3e6ae39419 | ||
|
|
3eaf5a60a8 | ||
|
|
0094e3be97 | ||
|
|
2c103e03cb | ||
|
|
4d3054ad8e | ||
|
|
a452163152 | ||
|
|
97f7eb1564 | ||
|
|
9c1a0d09a1 | ||
|
|
9a6ad62771 | ||
|
|
387fbd2276 | ||
|
|
62ad41c4ed | ||
|
|
9c6395b759 | ||
|
|
aebd8c89e5 | ||
|
|
b36f41335a | ||
|
|
f5a627e008 | ||
|
|
4976d3fd9f | ||
|
|
fe195f7808 | ||
|
|
dbd9e745e5 | ||
|
|
e8a5dc2be7 | ||
|
|
31502261f0 | ||
|
|
51efff5a39 | ||
|
|
441bc58dfb | ||
|
|
22d9fc6eac | ||
|
|
3f93ec2b42 | ||
|
|
44f457a740 | ||
|
|
cd0178030c | ||
|
|
1d99fc11d7 | ||
|
|
a58d6cc530 | ||
|
|
a7d5f35565 | ||
|
|
76b1ab3dac | ||
|
|
e8043dd9a1 | ||
|
|
10aadfb907 | ||
|
|
06aebc1707 | ||
|
|
7b98e41c9b | ||
|
|
2327ada979 | ||
|
|
ac7ab2264d | ||
|
|
24dc3b5fed | ||
|
|
042904676b | ||
|
|
52687894ab | ||
|
|
3c9d92d13d | ||
|
|
d9279b8142 | ||
|
|
228b94592e | ||
|
|
537cf41403 | ||
|
|
ee4544fdfc | ||
|
|
1670cdd02d | ||
|
|
b070359f1c | ||
|
|
bbde21ff7d | ||
|
|
d457e5f843 | ||
|
|
15111c6e13 | ||
|
|
32fd9dd892 | ||
|
|
e66bb6def7 | ||
|
|
3896d0406e | ||
|
|
20feaef131 | ||
|
|
7f22c4a74a | ||
|
|
f6831b07b5 | ||
|
|
7a175ba9d5 | ||
|
|
7ca8a5eb3b | ||
|
|
cb585e84c1 | ||
|
|
b52d493e2a | ||
|
|
97a65c90d6 | ||
|
|
d617f24eb6 | ||
|
|
12b123d0d5 | ||
|
|
5cd4f23011 | ||
|
|
8cfa65d01b | ||
|
|
ace3396de3 | ||
|
|
82ca30f9eb | ||
|
|
ab1d3bb70c | ||
|
|
de06b8831e | ||
|
|
55d2c77a0c | ||
|
|
8ba295c526 | ||
|
|
908c15468e | ||
|
|
3f8669f723 | ||
|
|
036d2f3cdb | ||
|
|
46e4e2cbab | ||
|
|
eb7612fb9f | ||
|
|
beb8fc6e09 | ||
|
|
05c744e676 | ||
|
|
8cf5f1b401 | ||
|
|
21fa480594 | ||
|
|
c4e10b6e89 | ||
|
|
1b14537f45 | ||
|
|
6868fd4e5f | ||
|
|
0d1165259d | ||
|
|
3a1b40e503 | ||
|
|
046a216df3 | ||
|
|
acce4b191e | ||
|
|
70d415b4d8 | ||
|
|
48e480f175 | ||
|
|
8b26e0e78b | ||
|
|
9ccea3e19e | ||
|
|
a455c31dc7 | ||
|
|
d4fdf736bf | ||
|
|
e8dba4e565 | ||
|
|
28cef36ce8 | ||
|
|
118371b819 | ||
|
|
48a3955147 | ||
|
|
66aa4ed2c1 | ||
|
|
6f4ab86abf | ||
|
|
69acd90c8b | ||
|
|
fea4cef885 | ||
|
|
0ea0765557 | ||
|
|
d66c4c3cee |
517 changed files with 161311 additions and 45516 deletions
9
.gitignore
vendored
9
.gitignore
vendored
|
|
@ -15,6 +15,13 @@
|
|||
|
||||
# usually perl -pi.back -e edits.
|
||||
*.back
|
||||
*.bak
|
||||
|
||||
# pycharm project specific settings files
|
||||
.idea
|
||||
|
||||
# vscode project specific settings file
|
||||
.vscode
|
||||
|
||||
cleanup.sh
|
||||
FanFictionDownLoader.zip
|
||||
|
|
@ -26,3 +33,5 @@ build
|
|||
dist
|
||||
FanFicFare.egg-info
|
||||
personal.ini
|
||||
appcfg_oauth2_tokens
|
||||
venv/
|
||||
|
|
|
|||
|
|
@ -1 +1,3 @@
|
|||
include DESCRIPTION.rst
|
||||
include README.md
|
||||
include LICENSE
|
||||
|
|
|
|||
78
README.md
78
README.md
|
|
@ -1,19 +1,71 @@
|
|||
FanFicFare
|
||||
[FanFicFare](https://github.com/JimmXinu/FanFicFare)
|
||||
==========
|
||||
|
||||
This is the repository for the FanFicFare project.
|
||||
FanFicFare makes reading stories from various websites much easier by helping
|
||||
you download them to EBook files.
|
||||
|
||||
FanFicFare is the rename and move of the previous FanFictionDownLoader (AKA
|
||||
FFDL, AKA fanficdownloader) project.
|
||||
FanFicFare was previously known as FanFictionDownLoader (AKA
|
||||
FFDL, AKA fanficdownloader).
|
||||
|
||||
This program is available as a [calibre
|
||||
plugin](http://www.mobileread.com/forums/showthread.php?p=3084025), a
|
||||
[command-line interface](https://pypi.python.org/pypi/FanFicFare) (via
|
||||
pip), and a [web service](http://fanficfare.appspot.com/).
|
||||
Main features:
|
||||
|
||||
There's additional info in the project
|
||||
[wiki](https://github.com/JimmXinu/FanFicFare/wiki) pages.
|
||||
- Download FanFiction stories from over [100 different sites](https://github.com/JimmXinu/FanFicFare/wiki/SupportedSites). into ebooks.
|
||||
|
||||
There's also a [FanFicFare
|
||||
maillist](https://groups.google.com/group/fanfic-downloader) for
|
||||
discussion and announcements.
|
||||
- Update previously downloaded EPUB format ebooks, downloading only new chapters.
|
||||
|
||||
- Get Story URLs from Web Pages.
|
||||
|
||||
- Support for downloading images in the story text. (EPUB and HTML
|
||||
only -- download EPUB and convert to AZW3 for Kindle) More details on
|
||||
configuring images in stories and cover images can be found in the
|
||||
[FAQs] or [this post in the old FFDL thread].
|
||||
|
||||
- Support for cover image. (EPUB only)
|
||||
|
||||
- Optionally keep an Update Log of past updates (EPUB only).
|
||||
|
||||
There's additional info in the project [wiki] pages.
|
||||
|
||||
There's also a [FanFicFare maillist] for discussion and announcements and a [discussion thread] for the Calibre plugin.
|
||||
|
||||
Getting FanFicFare
|
||||
==================
|
||||
|
||||
### Official Releases
|
||||
|
||||
This program is available as:
|
||||
|
||||
- A Calibre plugin from within Calibre or directly from the plugin [discussion thread], or;
|
||||
- A Command Line Interface (CLI) [Python
|
||||
package](https://pypi.python.org/pypi/FanFicFare) that you can
|
||||
install with:
|
||||
```
|
||||
pip install FanFicFare
|
||||
```
|
||||
- _As of late November 2019, the web service version is shutdown. See the [Wiki Home](https://github.com/JimmXinu/FanFicFare/wiki#web-service-version) page for details._
|
||||
|
||||
### Test Versions
|
||||
|
||||
FanFicFare is released roughly every month, but new test versions are posted more frequently as changes are made.
|
||||
|
||||
Test versions are available at:
|
||||
|
||||
- The [test plugin] is posted at MobileRead.
|
||||
- The test version of CLI for pip install is uploaded to the testpypi repository and can be installed with:
|
||||
```
|
||||
pip install --extra-index-url https://test.pypi.org/simple/ --upgrade FanFicFare
|
||||
```
|
||||
|
||||
### Other Releases
|
||||
|
||||
Other versions may be available depending on your OS. I(JimmXinu) don't directly support these:
|
||||
|
||||
- **Arch Linux**: The latest CLI release can be obtained from the [fanficfare](https://aur.archlinux.org/packages/fanficfare) AUR package. It will install the calibre plugin, if calibre is installed.
|
||||
|
||||
|
||||
[this post in the old FFDL thread]: https://www.mobileread.com/forums/showthread.php?p=1982785#post1982785
|
||||
[FAQs]: https://github.com/JimmXinu/FanFicFare/wiki/FAQs#can-fanficfare-download-a-story-containing-images
|
||||
[FanFicFare maillist]: https://groups.google.com/group/fanfic-downloader
|
||||
[wiki]: https://github.com/JimmXinu/FanFicFare/wiki
|
||||
[discussion thread]: https://www.mobileread.com/forums/showthread.php?t=259221
|
||||
[test plugin]: https://www.mobileread.com/forums/showthread.php?p=3084025&postcount=2
|
||||
|
|
|
|||
|
|
@ -1,8 +1,9 @@
|
|||
[main]
|
||||
host = https://www.transifex.com
|
||||
|
||||
[calibre-plugins.fanfictiondownloader]
|
||||
[o:calibre:p:calibre-plugins:r:fanfictiondownloader]
|
||||
file_filter = translations/<lang>.po
|
||||
source_file = translations/en.po
|
||||
source_lang = en
|
||||
type = PO
|
||||
type = PO
|
||||
|
||||
|
|
|
|||
|
|
@ -4,7 +4,7 @@ from __future__ import (unicode_literals, division, absolute_import,
|
|||
print_function)
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2016, Jim Miller'
|
||||
__copyright__ = '2019, Jim Miller'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
import sys, os
|
||||
|
|
@ -32,6 +32,9 @@ except NameError:
|
|||
# The class that all Interface Action plugin wrappers must inherit from
|
||||
from calibre.customize import InterfaceActionBase
|
||||
|
||||
# pulled out from FanFicFareBase for saving in prefs.py
|
||||
__version__ = (4, 57, 7)
|
||||
|
||||
## Apparently the name for this class doesn't matter--it was still
|
||||
## 'demo' for the first few versions.
|
||||
class FanFicFareBase(InterfaceActionBase):
|
||||
|
|
@ -48,8 +51,8 @@ class FanFicFareBase(InterfaceActionBase):
|
|||
description = _('UI plugin to download FanFiction stories from various sites.')
|
||||
supported_platforms = ['windows', 'osx', 'linux']
|
||||
author = 'Jim Miller'
|
||||
version = (2, 8, 0)
|
||||
minimum_calibre_version = (1, 48, 0)
|
||||
version = __version__
|
||||
minimum_calibre_version = (2, 85, 1)
|
||||
|
||||
#: This field defines the GUI plugin class that contains all the code
|
||||
#: that actually does something. Its format is module_path:class_name
|
||||
|
|
@ -102,8 +105,19 @@ class FanFicFareBase(InterfaceActionBase):
|
|||
ac.apply_settings()
|
||||
|
||||
def load_actual_plugin(self, gui):
|
||||
with self: # so the sys.path was modified while loading the
|
||||
# plug impl.
|
||||
# so the sys.path was modified while loading the plug impl.
|
||||
with self:
|
||||
|
||||
# Make sure the fanficfare module is available globally
|
||||
# under its simple name, -- This is the only reason other
|
||||
# plugin files can import fanficfare instead of
|
||||
# calibre_plugins.fanficfare_plugin.fanficfare.
|
||||
#
|
||||
# Added specifically for the benefit of
|
||||
# eli-schwartz/eschwartz's Arch Linux distro that wants to
|
||||
# package FFF plugin outside Calibre.
|
||||
import fanficfare
|
||||
|
||||
return InterfaceActionBase.load_actual_plugin(self,gui)
|
||||
|
||||
def cli_main(self,argv):
|
||||
|
|
@ -111,11 +125,10 @@ class FanFicFareBase(InterfaceActionBase):
|
|||
with self: # so the sys.path was modified appropriately
|
||||
# I believe there's no performance hit loading these here when
|
||||
# CLI--it would load everytime anyway.
|
||||
from StringIO import StringIO
|
||||
from calibre.library import db
|
||||
from calibre_plugins.fanficfare_plugin.fanficfare.cli import main as fff_main
|
||||
from fanficfare.cli import main as fff_main
|
||||
from calibre_plugins.fanficfare_plugin.prefs import PrefsFacade
|
||||
from calibre.utils.config import prefs as calibre_prefs
|
||||
from fanficfare.six import ensure_text
|
||||
from optparse import OptionParser
|
||||
|
||||
parser = OptionParser('%prog --run-plugin '+self.name+' -- [options] <storyurl>')
|
||||
|
|
@ -127,12 +140,11 @@ class FanFicFareBase(InterfaceActionBase):
|
|||
pargs = [x for x in argv if x.startswith('--with-library') or x.startswith('--library-path')
|
||||
or not x.startswith('-')]
|
||||
opts, args = parser.parse_args(pargs)
|
||||
|
||||
fff_prefs = PrefsFacade(db(path=opts.library_path,
|
||||
read_only=True))
|
||||
read_only=True))
|
||||
|
||||
fff_main(argv[1:],
|
||||
parser=parser,
|
||||
passed_defaultsini=StringIO(get_resources("fanficfare/defaults.ini")),
|
||||
passed_personalini=StringIO(fff_prefs["personal.ini"]),
|
||||
passed_defaultsini=ensure_text(get_resources("fanficfare/defaults.ini")),
|
||||
passed_personalini=ensure_text(fff_prefs["personal.ini"]),
|
||||
)
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
<hr />
|
||||
|
||||
<p>Plugin created by Jim Miller, borrowing heavily from Grant Drake's
|
||||
<p>Plugin created by Jim Miller, originally borrowing heavily from Grant Drake's
|
||||
'<a href="http://www.mobileread.com/forums/showthread.php?t=134856">Reading List</a>',
|
||||
'<a href="http://www.mobileread.com/forums/showthread.php?t=126727">Extract ISBN</a>' and
|
||||
'<a href="http://www.mobileread.com/forums/showthread.php?t=134000">Count Pages</a>'
|
||||
|
|
@ -8,12 +8,12 @@
|
|||
|
||||
<p>
|
||||
Calibre officially distributes plugins from the mobileread.com forum site.
|
||||
The official distro channel for this plugin is there: <a href="http://www.mobileread.com/forums/showthread.php?t=259221">FanFicFare</a>
|
||||
The official distro channel and discussion thread for this plugin is there: <a href="http://www.mobileread.com/forums/showthread.php?t=259221">FanFicFare</a>
|
||||
</p>
|
||||
|
||||
<p> I also monitor the
|
||||
<a href="http://groups.google.com/group/fanfic-downloader">general users
|
||||
group</a> for the downloader. That covers the web application and CLI, too.
|
||||
group</a> for the downloader CLI, too.
|
||||
</p>
|
||||
|
||||
<p>
|
||||
|
|
|
|||
20
calibre-plugin/action_chains.py
Normal file
20
calibre-plugin/action_chains.py
Normal file
|
|
@ -0,0 +1,20 @@
|
|||
from __future__ import (unicode_literals, division, absolute_import,
|
||||
print_function)
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2024, Jim Miller'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
## References:
|
||||
## https://www.mobileread.com/forums/showthread.php?p=4435205&postcount=65
|
||||
## https://www.mobileread.com/forums/showthread.php?p=4102834&postcount=389
|
||||
|
||||
from calibre_plugins.action_chains.events import ChainEvent
|
||||
|
||||
class FanFicFareDownloadFinished(ChainEvent):
|
||||
|
||||
# replace with the name of your event
|
||||
name = 'FanFicFare Download Finished'
|
||||
|
||||
def get_event_signal(self):
|
||||
return self.gui.iactions['FanFicFare'].download_finished_signal
|
||||
|
|
@ -1,64 +1,62 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
from __future__ import (unicode_literals, division,
|
||||
print_function)
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2015, Jim Miller'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
import re
|
||||
|
||||
try:
|
||||
from PyQt5.Qt import (Qt, QSyntaxHighlighter, QTextCharFormat, QBrush)
|
||||
except ImportError as e:
|
||||
from PyQt4.Qt import (Qt, QSyntaxHighlighter, QTextCharFormat, QBrush)
|
||||
|
||||
class BasicIniHighlighter(QSyntaxHighlighter):
|
||||
'''
|
||||
QSyntaxHighlighter class for use with QTextEdit for highlighting
|
||||
ini config files.
|
||||
|
||||
I looked high and low to find a high lighter for basic ini config
|
||||
format, so I'm leaving this in the project even though I'm not
|
||||
using.
|
||||
'''
|
||||
|
||||
def __init__( self, parent, theme ):
|
||||
QSyntaxHighlighter.__init__( self, parent )
|
||||
self.parent = parent
|
||||
|
||||
self.highlightingRules = []
|
||||
|
||||
# keyword
|
||||
self.highlightingRules.append( HighlightingRule( r"^[^:=\s][^:=]*[:=]",
|
||||
Qt.blue,
|
||||
Qt.SolidPattern ) )
|
||||
|
||||
# section
|
||||
self.highlightingRules.append( HighlightingRule( r"^\[[^\]]+\]",
|
||||
Qt.darkBlue,
|
||||
Qt.SolidPattern ) )
|
||||
|
||||
# comment
|
||||
self.highlightingRules.append( HighlightingRule( r"#[^\n]*" ,
|
||||
Qt.darkYellow,
|
||||
Qt.SolidPattern ) )
|
||||
|
||||
def highlightBlock( self, text ):
|
||||
for rule in self.highlightingRules:
|
||||
for match in rule.pattern.finditer(text):
|
||||
self.setFormat( match.start(), match.end()-match.start(), rule.highlight )
|
||||
self.setCurrentBlockState( 0 )
|
||||
|
||||
class HighlightingRule():
|
||||
def __init__( self, pattern, color, style ):
|
||||
if isinstance(pattern,basestring):
|
||||
self.pattern = re.compile(pattern)
|
||||
else:
|
||||
self.pattern=pattern
|
||||
charfmt = QTextCharFormat()
|
||||
brush = QBrush(color, style)
|
||||
charfmt.setForeground(brush)
|
||||
self.highlight = charfmt
|
||||
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
from __future__ import (absolute_import, unicode_literals, division,
|
||||
print_function)
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2015, Jim Miller'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
import re
|
||||
|
||||
from PyQt5.Qt import (Qt, QSyntaxHighlighter, QTextCharFormat, QBrush)
|
||||
|
||||
from fanficfare.six import string_types
|
||||
|
||||
class BasicIniHighlighter(QSyntaxHighlighter):
|
||||
'''
|
||||
QSyntaxHighlighter class for use with QTextEdit for highlighting
|
||||
ini config files.
|
||||
|
||||
I looked high and low to find a high lighter for basic ini config
|
||||
format, so I'm leaving this in the project even though I'm not
|
||||
using.
|
||||
'''
|
||||
|
||||
def __init__( self, parent, theme ):
|
||||
QSyntaxHighlighter.__init__( self, parent )
|
||||
self.parent = parent
|
||||
|
||||
self.highlightingRules = []
|
||||
|
||||
# keyword
|
||||
self.highlightingRules.append( HighlightingRule( r"^[^:=\s][^:=]*[:=]",
|
||||
Qt.blue,
|
||||
Qt.SolidPattern ) )
|
||||
|
||||
# section
|
||||
self.highlightingRules.append( HighlightingRule( r"^\[[^\]]+\]",
|
||||
Qt.darkBlue,
|
||||
Qt.SolidPattern ) )
|
||||
|
||||
# comment
|
||||
self.highlightingRules.append( HighlightingRule( r"#[^\n]*" ,
|
||||
Qt.darkYellow,
|
||||
Qt.SolidPattern ) )
|
||||
|
||||
def highlightBlock( self, text ):
|
||||
for rule in self.highlightingRules:
|
||||
for match in rule.pattern.finditer(text):
|
||||
self.setFormat( match.start(), match.end()-match.start(), rule.highlight )
|
||||
self.setCurrentBlockState( 0 )
|
||||
|
||||
class HighlightingRule():
|
||||
def __init__( self, pattern, color, style ):
|
||||
if isinstance(pattern, string_types):
|
||||
self.pattern = re.compile(pattern)
|
||||
else:
|
||||
self.pattern=pattern
|
||||
charfmt = QTextCharFormat()
|
||||
brush = QBrush(color, style)
|
||||
charfmt.setForeground(brush)
|
||||
self.highlight = charfmt
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
|
|
@ -1,49 +1,116 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
from __future__ import (unicode_literals, division, absolute_import,
|
||||
print_function)
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2015, Jim Miller'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
from StringIO import StringIO
|
||||
from ConfigParser import ParsingError
|
||||
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
from calibre_plugins.fanficfare_plugin.fanficfare import adapters, exceptions
|
||||
from calibre_plugins.fanficfare_plugin.fanficfare.configurable import Configuration
|
||||
from calibre_plugins.fanficfare_plugin.prefs import prefs
|
||||
|
||||
def get_fff_personalini():
|
||||
return prefs['personal.ini']
|
||||
|
||||
def get_fff_config(url,fileform="epub",personalini=None):
|
||||
if not personalini:
|
||||
personalini = get_fff_personalini()
|
||||
sections=['unknown']
|
||||
try:
|
||||
sections = adapters.getConfigSectionsFor(url)
|
||||
except Exception as e:
|
||||
logger.debug("Failed trying to get ini config for url(%s): %s, using section %s instead"%(url,e,sections))
|
||||
configuration = Configuration(sections,fileform)
|
||||
configuration.readfp(StringIO(get_resources("plugin-defaults.ini")))
|
||||
configuration.readfp(StringIO(personalini))
|
||||
|
||||
return configuration
|
||||
|
||||
def get_fff_adapter(url,fileform="epub",personalini=None):
|
||||
return adapters.getAdapter(get_fff_config(url,fileform,personalini),url)
|
||||
|
||||
def test_config(initext):
|
||||
try:
|
||||
configini = get_fff_config("test1.com?sid=555",
|
||||
personalini=initext)
|
||||
errors = configini.test_config()
|
||||
except ParsingError as pe:
|
||||
errors = pe.errors
|
||||
|
||||
return errors
|
||||
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
from __future__ import (unicode_literals, division, absolute_import,
|
||||
print_function)
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2020, Jim Miller'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
from functools import reduce
|
||||
|
||||
from io import StringIO
|
||||
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
from fanficfare import adapters
|
||||
from fanficfare.configurable import Configuration
|
||||
from calibre_plugins.fanficfare_plugin.prefs import prefs
|
||||
from fanficfare.six import ensure_text
|
||||
from fanficfare.six.moves import configparser
|
||||
from fanficfare.six.moves import collections_abc
|
||||
|
||||
def get_fff_personalini():
|
||||
return prefs['personal.ini']
|
||||
|
||||
def get_fff_config(url,fileform="epub",personalini=None):
|
||||
if not personalini:
|
||||
personalini = get_fff_personalini()
|
||||
sections=['unknown']
|
||||
try:
|
||||
sections = adapters.getConfigSectionsFor(url)
|
||||
except Exception as e:
|
||||
logger.debug("Failed trying to get ini config for url(%s): %s, using section %s instead"%(url,e,sections))
|
||||
configuration = Configuration(sections,fileform)
|
||||
configuration.read_file(StringIO(ensure_text(get_resources("plugin-defaults.ini"))))
|
||||
configuration.read_file(StringIO(ensure_text(personalini)))
|
||||
|
||||
return configuration
|
||||
|
||||
def get_fff_adapter(url,fileform="epub",personalini=None):
|
||||
return adapters.getAdapter(get_fff_config(url,fileform,personalini),url)
|
||||
|
||||
def test_config(initext):
|
||||
try:
|
||||
configini = get_fff_config("test1.com?sid=555",
|
||||
personalini=initext)
|
||||
errors = configini.test_config()
|
||||
except configparser.ParsingError as pe:
|
||||
errors = pe.errors
|
||||
|
||||
return errors
|
||||
|
||||
|
||||
class OrderedSet(collections_abc.MutableSet):
|
||||
|
||||
def __init__(self, iterable=None):
|
||||
self.end = end = []
|
||||
end += [None, end, end] # sentinel node for doubly linked list
|
||||
self.map = {} # key --> [key, prev, next]
|
||||
if iterable is not None:
|
||||
self |= iterable
|
||||
|
||||
def __len__(self):
|
||||
return len(self.map)
|
||||
|
||||
def __contains__(self, key):
|
||||
return key in self.map
|
||||
|
||||
def add(self, key):
|
||||
if key not in self.map:
|
||||
end = self.end
|
||||
curr = end[1]
|
||||
curr[2] = end[1] = self.map[key] = [key, curr, end]
|
||||
|
||||
def discard(self, key):
|
||||
if key in self.map:
|
||||
key, prev, next = self.map.pop(key)
|
||||
prev[2] = next
|
||||
next[1] = prev
|
||||
|
||||
def __iter__(self):
|
||||
end = self.end
|
||||
curr = end[2]
|
||||
while curr is not end:
|
||||
yield curr[0]
|
||||
curr = curr[2]
|
||||
|
||||
def __reversed__(self):
|
||||
end = self.end
|
||||
curr = end[1]
|
||||
while curr is not end:
|
||||
yield curr[0]
|
||||
curr = curr[1]
|
||||
|
||||
def pop(self, last=True):
|
||||
if not self:
|
||||
raise KeyError('set is empty')
|
||||
key = self.end[1][0] if last else self.end[2][0]
|
||||
self.discard(key)
|
||||
return key
|
||||
|
||||
def __repr__(self):
|
||||
if not self:
|
||||
return '%s()' % (self.__class__.__name__,)
|
||||
return '%s(%r)' % (self.__class__.__name__, list(self))
|
||||
|
||||
def __eq__(self, other):
|
||||
if isinstance(other, OrderedSet):
|
||||
return len(self) == len(other) and list(self) == list(other)
|
||||
return set(self) == set(other)
|
||||
|
||||
def get_common_elements(ll):
|
||||
## returns a list of elements common to all lists in ll
|
||||
## https://www.tutorialspoint.com/find-common-elements-in-list-of-lists-in-python
|
||||
return list(reduce(lambda i, j: i & j, (OrderedSet(n) for n in ll)))
|
||||
|
|
|
|||
Binary file not shown.
|
Before Width: | Height: | Size: 24 KiB |
|
|
@ -1,124 +1,159 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
from __future__ import (unicode_literals, division,
|
||||
print_function)
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2016, Jim Miller'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
import re
|
||||
|
||||
try:
|
||||
from PyQt5.Qt import (Qt, QSyntaxHighlighter, QTextCharFormat, QBrush, QFont)
|
||||
except ImportError as e:
|
||||
from PyQt4.Qt import (Qt, QSyntaxHighlighter, QTextCharFormat, QBrush, QFont)
|
||||
|
||||
# r'add_to_+key
|
||||
|
||||
|
||||
|
||||
class IniHighlighter(QSyntaxHighlighter):
|
||||
'''
|
||||
QSyntaxHighlighter class for use with QTextEdit for highlighting
|
||||
ini config files.
|
||||
'''
|
||||
|
||||
def __init__( self, parent, sections=[], keywords=[], entries=[], entry_keywords=[] ):
|
||||
QSyntaxHighlighter.__init__( self, parent )
|
||||
self.parent = parent
|
||||
|
||||
self.highlightingRules = []
|
||||
|
||||
if entries:
|
||||
# *known* entries
|
||||
reentries = r'('+(r'|'.join(entries))+r')'
|
||||
self.highlightingRules.append( HighlightingRule( r"\b"+reentries+r"\b", Qt.darkGreen ) )
|
||||
|
||||
# true/false -- just to be nice.
|
||||
self.highlightingRules.append( HighlightingRule( r"\b(true|false)\b", Qt.darkGreen ) )
|
||||
|
||||
# *all* keywords -- change known later.
|
||||
self.errorRule = HighlightingRule( r"^[^:=\s][^:=]*[:=]", Qt.red )
|
||||
self.highlightingRules.append( self.errorRule )
|
||||
|
||||
# *all* entry keywords -- change known later.
|
||||
reentrykeywords = r'('+(r'|'.join([ e % r'[a-zA-Z0-9_]+' for e in entry_keywords ]))+r')'
|
||||
self.highlightingRules.append( HighlightingRule( r"^(add_to_)?"+reentrykeywords+r"\s*[:=]", Qt.darkMagenta ) )
|
||||
|
||||
if entries: # separate from known entries so entry named keyword won't be masked.
|
||||
# *known* entry keywords
|
||||
reentrykeywords = r'('+(r'|'.join([ e % reentries for e in entry_keywords ]))+r')'
|
||||
self.highlightingRules.append( HighlightingRule( r"^(add_to_)?"+reentrykeywords+r"\s*[:=]", Qt.blue ) )
|
||||
|
||||
# *known* keywords
|
||||
rekeywords = r'('+(r'|'.join(keywords))+r')'
|
||||
self.highlightingRules.append( HighlightingRule( r"^(add_to_)?"+rekeywords+r"\s*[:=]", Qt.blue ) )
|
||||
|
||||
# *all* sections -- change known later.
|
||||
self.highlightingRules.append( HighlightingRule( r"^\[[^\]]+\].*?$", Qt.red, QFont.Bold, blocknum=1 ) )
|
||||
|
||||
if sections:
|
||||
# *known* sections
|
||||
resections = r'('+(r'|'.join(sections))+r')'
|
||||
resections = resections.replace('.','\.') #escape dots.
|
||||
self.highlightingRules.append( HighlightingRule( r"^\["+resections+r"\]\s*$", Qt.darkBlue, QFont.Bold, blocknum=2 ) )
|
||||
|
||||
# test story sections
|
||||
self.teststoryRule = HighlightingRule( r"^\[teststory:([0-9]+|defaults)\]", Qt.darkCyan, blocknum=3 )
|
||||
self.highlightingRules.append( self.teststoryRule )
|
||||
|
||||
# storyUrl sections
|
||||
self.storyUrlRule = HighlightingRule( r"^\[https?://.*\]", Qt.darkMagenta, blocknum=4 )
|
||||
self.highlightingRules.append( self.storyUrlRule )
|
||||
|
||||
# NOT comments -- but can be custom columns, so don't flag.
|
||||
#self.highlightingRules.append( HighlightingRule( r"(?<!^)#[^\n]*" , Qt.red ) )
|
||||
|
||||
# comments -- comments must start from column 0.
|
||||
self.commentRule = HighlightingRule( r"^#[^\n]*" , Qt.darkYellow )
|
||||
self.highlightingRules.append( self.commentRule )
|
||||
|
||||
def highlightBlock( self, text ):
|
||||
|
||||
is_comment = False
|
||||
blocknum = self.previousBlockState()
|
||||
for rule in self.highlightingRules:
|
||||
for match in rule.pattern.finditer(text):
|
||||
self.setFormat( match.start(), match.end()-match.start(), rule.highlight )
|
||||
if rule == self.commentRule:
|
||||
is_comment = True
|
||||
if rule.blocknum > 0:
|
||||
blocknum = rule.blocknum
|
||||
|
||||
if not is_comment:
|
||||
# unknown section, error all:
|
||||
if blocknum == 1 and blocknum == self.previousBlockState():
|
||||
self.setFormat( 0, len(text), self.errorRule.highlight )
|
||||
|
||||
# teststory section rules:
|
||||
if blocknum == 3:
|
||||
self.setFormat( 0, len(text), self.teststoryRule.highlight )
|
||||
|
||||
# storyUrl section rules:
|
||||
if blocknum == 4:
|
||||
self.setFormat( 0, len(text), self.storyUrlRule.highlight )
|
||||
|
||||
self.setCurrentBlockState( blocknum )
|
||||
|
||||
class HighlightingRule():
|
||||
def __init__( self, pattern, color,
|
||||
weight=QFont.Normal,
|
||||
style=Qt.SolidPattern,
|
||||
blocknum=0):
|
||||
if isinstance(pattern,basestring):
|
||||
self.pattern = re.compile(pattern)
|
||||
else:
|
||||
self.pattern=pattern
|
||||
charfmt = QTextCharFormat()
|
||||
brush = QBrush(color, style)
|
||||
charfmt.setForeground(brush)
|
||||
charfmt.setFontWeight(weight)
|
||||
self.highlight = charfmt
|
||||
self.blocknum=blocknum
|
||||
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
from __future__ import (absolute_import, unicode_literals, division,
|
||||
print_function)
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2020, Jim Miller'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
import re
|
||||
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
from PyQt5.Qt import (QApplication, Qt, QColor, QSyntaxHighlighter,
|
||||
QTextCharFormat, QBrush, QFont)
|
||||
|
||||
try:
|
||||
# qt6 Calibre v6+
|
||||
QFontNormal = QFont.Weight.Normal
|
||||
QFontBold = QFont.Weight.Bold
|
||||
except:
|
||||
# qt5 Calibre v2-5
|
||||
QFontNormal = QFont.Normal
|
||||
QFontBold = QFont.Bold
|
||||
|
||||
from fanficfare.six import string_types
|
||||
|
||||
class IniHighlighter(QSyntaxHighlighter):
|
||||
'''
|
||||
QSyntaxHighlighter class for use with QTextEdit for highlighting
|
||||
ini config files.
|
||||
'''
|
||||
|
||||
def __init__( self, parent, sections=[], keywords=[], entries=[], entry_keywords=[] ):
|
||||
QSyntaxHighlighter.__init__( self, parent )
|
||||
self.parent = parent
|
||||
|
||||
self.highlightingRules = []
|
||||
|
||||
colors = {
|
||||
'knownentries':Qt.darkGreen,
|
||||
'errors':Qt.red,
|
||||
'allkeywords':Qt.darkMagenta,
|
||||
'knownkeywords':Qt.blue,
|
||||
'knownsections':Qt.darkBlue,
|
||||
'teststories':Qt.darkCyan,
|
||||
'storyUrls':Qt.darkMagenta,
|
||||
'comments':Qt.darkYellow
|
||||
}
|
||||
try:
|
||||
if( hasattr(QApplication.instance(),'is_dark_theme')
|
||||
and QApplication.instance().is_dark_theme ):
|
||||
colors = {
|
||||
'knownentries':Qt.green,
|
||||
'errors':Qt.red,
|
||||
'allkeywords':Qt.magenta,
|
||||
'knownkeywords':QColor(Qt.blue).lighter(150),
|
||||
'knownsections':Qt.darkCyan,
|
||||
'teststories':Qt.cyan,
|
||||
'storyUrls':QColor(Qt.magenta).lighter(150),
|
||||
'comments':Qt.yellow
|
||||
}
|
||||
except Exception as e:
|
||||
logger.error("Failed to set dark theme highlight colors: %s"%e)
|
||||
|
||||
if entries:
|
||||
# *known* entries
|
||||
reentries = r'('+(r'|'.join(entries))+r')'
|
||||
self.highlightingRules.append( HighlightingRule( r"\b"+reentries+r"\b", colors['knownentries'] ) )
|
||||
|
||||
# true/false -- just to be nice.
|
||||
self.highlightingRules.append( HighlightingRule( r"\b(true|false)\b", colors['knownentries'] ) )
|
||||
|
||||
# *all* keywords -- change known later.
|
||||
self.errorRule = HighlightingRule( r"^[^:=\s][^:=]*[:=]", colors['errors'] )
|
||||
self.highlightingRules.append( self.errorRule )
|
||||
|
||||
# *all* entry keywords -- change known later.
|
||||
reentrykeywords = r'('+(r'|'.join([ e % r'[a-zA-Z0-9_]+' for e in entry_keywords ]))+r')'
|
||||
self.highlightingRules.append( HighlightingRule( r"^(add_to_)?"+reentrykeywords+r"(_filelist)?\s*[:=]", colors['allkeywords'] ) )
|
||||
|
||||
if entries: # separate from known entries so entry named keyword won't be masked.
|
||||
# *known* entry keywords
|
||||
reentrykeywords = r'('+(r'|'.join([ e % reentries for e in entry_keywords ]))+r')'
|
||||
self.highlightingRules.append( HighlightingRule( r"^(add_to_)?"+reentrykeywords+r"(_filelist)?\s*[:=]", colors['knownkeywords'] ) )
|
||||
|
||||
# *known* keywords
|
||||
rekeywords = r'('+(r'|'.join(keywords))+r')'
|
||||
self.highlightingRules.append( HighlightingRule( r"^(add_to_)?"+rekeywords+r"(_filelist)?\s*[:=]", colors['knownkeywords'] ) )
|
||||
|
||||
# *all* sections -- change known later.
|
||||
self.highlightingRules.append( HighlightingRule( r"^\[[^\]]+\].*?$", colors['errors'], QFontBold, blocknum=1 ) )
|
||||
|
||||
if sections:
|
||||
# *known* sections
|
||||
resections = r'('+(r'|'.join(sections))+r')'
|
||||
resections = resections.replace('.',r'\.') #escape dots.
|
||||
self.highlightingRules.append( HighlightingRule( r"^\["+resections+r"\]\s*$", colors['knownsections'], QFontBold, blocknum=2 ) )
|
||||
|
||||
# test story sections
|
||||
self.teststoryRule = HighlightingRule( r"^\[teststory:([0-9]+|defaults)\]", colors['teststories'], blocknum=3 )
|
||||
self.highlightingRules.append( self.teststoryRule )
|
||||
|
||||
# storyUrl sections
|
||||
# StoryUrls are *not* checked beyond looking for https?://
|
||||
self.storyUrlRule = HighlightingRule( r"^\[https?://.*\]", colors['storyUrls'], QFontBold, blocknum=2 )
|
||||
self.highlightingRules.append( self.storyUrlRule )
|
||||
|
||||
# NOT comments -- but can be custom columns, so don't flag.
|
||||
#self.highlightingRules.append( HighlightingRule( r"(?<!^)#[^\n]*" , colors['errors'] ) )
|
||||
|
||||
# comments -- comments must start from column 0.
|
||||
self.commentRule = HighlightingRule( r"^#[^\n]*" , colors['comments'] )
|
||||
self.highlightingRules.append( self.commentRule )
|
||||
|
||||
def highlightBlock( self, text ):
|
||||
|
||||
is_comment = False
|
||||
blocknum = self.previousBlockState()
|
||||
for rule in self.highlightingRules:
|
||||
for match in rule.pattern.finditer(text):
|
||||
self.setFormat( match.start(), match.end()-match.start(), rule.highlight )
|
||||
if rule == self.commentRule:
|
||||
is_comment = True
|
||||
if rule.blocknum > 0:
|
||||
blocknum = rule.blocknum
|
||||
|
||||
if not is_comment:
|
||||
# unknown section, error all:
|
||||
if blocknum == 1 and blocknum == self.previousBlockState():
|
||||
self.setFormat( 0, len(text), self.errorRule.highlight )
|
||||
|
||||
# teststory section rules:
|
||||
if blocknum == 3:
|
||||
self.setFormat( 0, len(text), self.teststoryRule.highlight )
|
||||
|
||||
## changed storyUrl section to also be blocknum=1 April 2023
|
||||
## storyUrl section rules:
|
||||
# if blocknum == 4:
|
||||
# self.setFormat( 0, len(text), self.storyUrlRule.highlight )
|
||||
|
||||
self.setCurrentBlockState( blocknum )
|
||||
|
||||
class HighlightingRule():
|
||||
def __init__( self, pattern, color,
|
||||
weight=QFontNormal,
|
||||
style=Qt.SolidPattern,
|
||||
blocknum=0):
|
||||
if isinstance(pattern, string_types):
|
||||
self.pattern = re.compile(pattern)
|
||||
else:
|
||||
self.pattern=pattern
|
||||
charfmt = QTextCharFormat()
|
||||
brush = QBrush(color, style)
|
||||
charfmt.setForeground(brush)
|
||||
charfmt.setFontWeight(weight)
|
||||
self.highlight = charfmt
|
||||
self.blocknum=blocknum
|
||||
|
|
|
|||
|
|
@ -1,340 +1,403 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
from __future__ import (unicode_literals, division, absolute_import,
|
||||
print_function)
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2016, Jim Miller, 2011, Grant Drake <grant.drake@gmail.com>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
import traceback
|
||||
from datetime import time
|
||||
from StringIO import StringIO
|
||||
|
||||
from calibre.utils.ipc.server import Server
|
||||
from calibre.utils.ipc.job import ParallelJob
|
||||
from calibre.constants import numeric_version as calibre_version
|
||||
from calibre.utils.date import local_tz
|
||||
|
||||
from calibre_plugins.fanficfare_plugin.wordcount import get_word_count
|
||||
from calibre_plugins.fanficfare_plugin.prefs import (SAVE_YES, SAVE_YES_UNLESS_SITE)
|
||||
|
||||
# pulls in translation files for _() strings
|
||||
try:
|
||||
load_translations()
|
||||
except NameError:
|
||||
pass # load_translations() added in calibre 1.9
|
||||
|
||||
# ------------------------------------------------------------------------------
|
||||
#
|
||||
# Functions to perform downloads using worker jobs
|
||||
#
|
||||
# ------------------------------------------------------------------------------
|
||||
|
||||
def do_download_worker(book_list,
|
||||
options,
|
||||
cpus,
|
||||
merge=False,
|
||||
notification=lambda x,y:x):
|
||||
'''
|
||||
Master job, to launch child jobs to extract ISBN for a set of books
|
||||
This is run as a worker job in the background to keep the UI more
|
||||
responsive and get around the memory leak issues as it will launch
|
||||
a child job for each book as a worker process
|
||||
'''
|
||||
server = Server(pool_size=cpus)
|
||||
|
||||
logger.info(options['version'])
|
||||
total = 0
|
||||
alreadybad = []
|
||||
# Queue all the jobs
|
||||
logger.info("Adding jobs for URLs:")
|
||||
for book in book_list:
|
||||
logger.info("%s"%book['url'])
|
||||
if book['good']:
|
||||
total += 1
|
||||
args = ['calibre_plugins.fanficfare_plugin.jobs',
|
||||
'do_download_for_worker',
|
||||
(book,options,merge)]
|
||||
job = ParallelJob('arbitrary_n',
|
||||
"url:(%s) id:(%s)"%(book['url'],book['calibre_id']),
|
||||
done=None,
|
||||
args=args)
|
||||
job._book = book
|
||||
server.add_job(job)
|
||||
else:
|
||||
# was already bad before the subprocess ever started.
|
||||
alreadybad.append(book)
|
||||
|
||||
# This server is an arbitrary_n job, so there is a notifier available.
|
||||
# Set the % complete to a small number to avoid the 'unavailable' indicator
|
||||
notification(0.01, _('Downloading FanFiction Stories'))
|
||||
|
||||
# dequeue the job results as they arrive, saving the results
|
||||
count = 0
|
||||
while True:
|
||||
job = server.changed_jobs_queue.get()
|
||||
# A job can 'change' when it is not finished, for example if it
|
||||
# produces a notification. Ignore these.
|
||||
job.update()
|
||||
if not job.is_finished:
|
||||
continue
|
||||
# A job really finished. Get the information.
|
||||
book_list.remove(job._book)
|
||||
book_list.append(job.result)
|
||||
book_id = job._book['calibre_id']
|
||||
count = count + 1
|
||||
notification(float(count)/total, _('%d of %d stories finished downloading')%(count,total))
|
||||
# Add this job's output to the current log
|
||||
logger.info('Logfile for book ID %s (%s)'%(book_id, job._book['title']))
|
||||
logger.info(job.details)
|
||||
|
||||
if count >= total:
|
||||
## ordering first by good vs bad, then by listorder.
|
||||
good_list = filter(lambda x : x['good'], book_list)
|
||||
bad_list = filter(lambda x : not x['good'], book_list)
|
||||
good_list = sorted(good_list,key=lambda x : x['listorder'])
|
||||
bad_list = sorted(bad_list,key=lambda x : x['listorder'])
|
||||
|
||||
logger.info("\n"+_("Download Results:")+"\n%s\n"%("\n".join([ "%(url)s %(comment)s" % book for book in good_list+bad_list])))
|
||||
|
||||
logger.info("\n"+_("Successful:")+"\n%s\n"%("\n".join([book['url'] for book in good_list])))
|
||||
logger.info("\n"+_("Unsuccessful:")+"\n%s\n"%("\n".join([book['url'] for book in bad_list])))
|
||||
break
|
||||
|
||||
server.close()
|
||||
|
||||
# return the book list as the job result
|
||||
return book_list
|
||||
|
||||
def do_download_for_worker(book,options,merge,notification=lambda x,y:x):
|
||||
'''
|
||||
Child job, to download story when run as a worker job
|
||||
'''
|
||||
|
||||
from calibre_plugins.fanficfare_plugin import FanFicFareBase
|
||||
fffbase = FanFicFareBase(options['plugin_path'])
|
||||
with fffbase:
|
||||
|
||||
from calibre_plugins.fanficfare_plugin.dialogs import (NotGoingToDownload,
|
||||
OVERWRITE, OVERWRITEALWAYS, UPDATE, UPDATEALWAYS, ADDNEW, SKIP, CALIBREONLY, CALIBREONLYSAVECOL)
|
||||
from calibre_plugins.fanficfare_plugin.fanficfare import adapters, writers, exceptions
|
||||
from calibre_plugins.fanficfare_plugin.fanficfare.epubutils import get_update_data
|
||||
|
||||
from calibre_plugins.fanficfare_plugin.fff_util import (get_fff_adapter, get_fff_config)
|
||||
|
||||
try:
|
||||
book['comment'] = _('Download started...')
|
||||
|
||||
configuration = get_fff_config(book['url'],
|
||||
options['fileform'],
|
||||
options['personal.ini'])
|
||||
|
||||
if configuration.getConfig('use_ssl_unverified_context'):
|
||||
## monkey patch to avoid SSL bug. dupliated from
|
||||
## fff_plugin.py because bg jobs run in own process
|
||||
## space.
|
||||
import ssl
|
||||
if hasattr(ssl, '_create_unverified_context'):
|
||||
ssl._create_default_https_context = ssl._create_unverified_context
|
||||
|
||||
if not options['updateepubcover'] and 'epub_for_update' in book and options['collision'] in (UPDATE, UPDATEALWAYS):
|
||||
configuration.set("overrides","never_make_cover","true")
|
||||
|
||||
# images only for epub, html, even if the user mistakenly
|
||||
# turned it on else where.
|
||||
if options['fileform'] not in ("epub","html"):
|
||||
configuration.set("overrides","include_images","false")
|
||||
|
||||
adapter = adapters.getAdapter(configuration,book['url'])
|
||||
adapter.is_adult = book['is_adult']
|
||||
adapter.username = book['username']
|
||||
adapter.password = book['password']
|
||||
adapter.setChaptersRange(book['begin'],book['end'])
|
||||
|
||||
adapter.load_cookiejar(options['cookiejarfile'])
|
||||
#logger.debug("cookiejar:%s"%adapter.cookiejar)
|
||||
adapter.set_pagecache(options['pagecache'])
|
||||
|
||||
story = adapter.getStoryMetadataOnly()
|
||||
if not story.getMetadata("series") and 'calibre_series' in book:
|
||||
adapter.setSeries(book['calibre_series'][0],book['calibre_series'][1])
|
||||
|
||||
# set PI version instead of default.
|
||||
if 'version' in options:
|
||||
story.setMetadata('version',options['version'])
|
||||
|
||||
book['title'] = story.getMetadata("title", removeallentities=True)
|
||||
book['author_sort'] = book['author'] = story.getList("author", removeallentities=True)
|
||||
book['publisher'] = story.getMetadata("site")
|
||||
book['url'] = story.getMetadata("storyUrl")
|
||||
book['tags'] = story.getSubjectTags(removeallentities=True)
|
||||
book['comments'] = story.get_sanitized_description()
|
||||
book['series'] = story.getMetadata("series", removeallentities=True)
|
||||
|
||||
if story.getMetadataRaw('datePublished'):
|
||||
book['pubdate'] = story.getMetadataRaw('datePublished').replace(tzinfo=local_tz)
|
||||
if story.getMetadataRaw('dateUpdated'):
|
||||
book['updatedate'] = story.getMetadataRaw('dateUpdated').replace(tzinfo=local_tz)
|
||||
if story.getMetadataRaw('dateCreated'):
|
||||
book['timestamp'] = story.getMetadataRaw('dateCreated').replace(tzinfo=local_tz)
|
||||
else:
|
||||
book['timestamp'] = datetime.now() # need *something* there for calibre.
|
||||
|
||||
writer = writers.getWriter(options['fileform'],configuration,adapter)
|
||||
|
||||
outfile = book['outfile']
|
||||
|
||||
## No need to download at all. Shouldn't ever get down here.
|
||||
if options['collision'] in (CALIBREONLY, CALIBREONLYSAVECOL):
|
||||
logger.info("Skipping CALIBREONLY 'update' down inside worker--this shouldn't be happening...")
|
||||
book['comment'] = _('Metadata collected.')
|
||||
book['all_metadata'] = story.getAllMetadata(removeallentities=True)
|
||||
if options['savemetacol'] != '':
|
||||
book['savemetacol'] = story.dump_html_metadata()
|
||||
|
||||
## checks were done earlier, it's new or not dup or newer--just write it.
|
||||
elif options['collision'] in (ADDNEW, SKIP, OVERWRITE, OVERWRITEALWAYS) or \
|
||||
('epub_for_update' not in book and options['collision'] in (UPDATE, UPDATEALWAYS)):
|
||||
|
||||
# preserve logfile even on overwrite.
|
||||
if 'epub_for_update' in book:
|
||||
adapter.logfile = get_update_data(book['epub_for_update'])[6]
|
||||
# change the existing entries id to notid so
|
||||
# write_epub writes a whole new set to indicate overwrite.
|
||||
if adapter.logfile:
|
||||
adapter.logfile = adapter.logfile.replace("span id","span notid")
|
||||
|
||||
if options['collision'] == OVERWRITE and 'fileupdated' in book:
|
||||
lastupdated=story.getMetadataRaw('dateUpdated')
|
||||
fileupdated=book['fileupdated']
|
||||
|
||||
# updated doesn't have time (or is midnight), use dates only.
|
||||
# updated does have time, use full timestamps.
|
||||
if (lastupdated.time() == time.min and fileupdated.date() > lastupdated.date()) or \
|
||||
(lastupdated.time() != time.min and fileupdated > lastupdated):
|
||||
raise NotGoingToDownload(_("Not Overwriting, web site is not newer."),'edit-undo.png',showerror=False)
|
||||
|
||||
|
||||
logger.info("write to %s"%outfile)
|
||||
inject_cal_cols(book,story,configuration)
|
||||
writer.writeStory(outfilename=outfile, forceOverwrite=True)
|
||||
|
||||
book['comment'] = _('Download %s completed, %s chapters.')%(options['fileform'],story.getMetadata("numChapters"))
|
||||
book['all_metadata'] = story.getAllMetadata(removeallentities=True)
|
||||
if options['savemetacol'] != '':
|
||||
book['savemetacol'] = story.dump_html_metadata()
|
||||
|
||||
## checks were done earlier, just update it.
|
||||
elif 'epub_for_update' in book and options['collision'] in (UPDATE, UPDATEALWAYS):
|
||||
|
||||
# update now handled by pre-populating the old images and
|
||||
# chapters in the adapter rather than merging epubs.
|
||||
urlchaptercount = int(story.getMetadata('numChapters').replace(',',''))
|
||||
(url,
|
||||
chaptercount,
|
||||
adapter.oldchapters,
|
||||
adapter.oldimgs,
|
||||
adapter.oldcover,
|
||||
adapter.calibrebookmark,
|
||||
adapter.logfile,
|
||||
adapter.oldchaptersmap,
|
||||
adapter.oldchaptersdata) = get_update_data(book['epub_for_update'])[0:9]
|
||||
|
||||
# dup handling from fff_plugin needed for anthology updates.
|
||||
if options['collision'] == UPDATE:
|
||||
if chaptercount == urlchaptercount:
|
||||
if merge:
|
||||
book['comment']=_("Already contains %d chapters. Reuse as is.")%chaptercount
|
||||
book['all_metadata'] = story.getAllMetadata(removeallentities=True)
|
||||
if options['savemetacol'] != '':
|
||||
book['savemetacol'] = story.dump_html_metadata()
|
||||
book['outfile'] = book['epub_for_update'] # for anthology merge ops.
|
||||
return book
|
||||
else: # not merge,
|
||||
raise NotGoingToDownload(_("Already contains %d chapters.")%chaptercount,'edit-undo.png',showerror=False)
|
||||
elif chaptercount > urlchaptercount:
|
||||
raise NotGoingToDownload(_("Existing epub contains %d chapters, web site only has %d. Use Overwrite to force update.") % (chaptercount,urlchaptercount),'dialog_error.png')
|
||||
elif chaptercount == 0:
|
||||
raise NotGoingToDownload(_("FanFicFare doesn't recognize chapters in existing epub, epub is probably from a different source. Use Overwrite to force update."),'dialog_error.png')
|
||||
|
||||
if not (options['collision'] == UPDATEALWAYS and chaptercount == urlchaptercount) \
|
||||
and adapter.getConfig("do_update_hook"):
|
||||
chaptercount = adapter.hookForUpdates(chaptercount)
|
||||
|
||||
logger.info("Do update - epub(%d) vs url(%d)" % (chaptercount, urlchaptercount))
|
||||
logger.info("write to %s"%outfile)
|
||||
|
||||
inject_cal_cols(book,story,configuration)
|
||||
writer.writeStory(outfilename=outfile, forceOverwrite=True)
|
||||
|
||||
book['comment'] = _('Update %s completed, added %s chapters for %s total.')%\
|
||||
(options['fileform'],(urlchaptercount-chaptercount),urlchaptercount)
|
||||
book['all_metadata'] = story.getAllMetadata(removeallentities=True)
|
||||
if options['savemetacol'] != '':
|
||||
book['savemetacol'] = story.dump_html_metadata()
|
||||
|
||||
if options['do_wordcount'] == SAVE_YES or (
|
||||
options['do_wordcount'] == SAVE_YES_UNLESS_SITE and not story.getMetadataRaw('numWords') ):
|
||||
wordcount = get_word_count(outfile)
|
||||
logger.info("get_word_count:%s"%wordcount)
|
||||
story.setMetadata('numWords',wordcount)
|
||||
writer.writeStory(outfilename=outfile, forceOverwrite=True)
|
||||
book['all_metadata'] = story.getAllMetadata(removeallentities=True)
|
||||
if options['savemetacol'] != '':
|
||||
book['savemetacol'] = story.dump_html_metadata()
|
||||
|
||||
if options['smarten_punctuation'] and options['fileform'] == "epub" \
|
||||
and calibre_version >= (0, 9, 39):
|
||||
# for smarten punc
|
||||
from calibre.ebooks.oeb.polish.main import polish, ALL_OPTS
|
||||
from calibre.utils.logging import Log
|
||||
from collections import namedtuple
|
||||
|
||||
# do smarten_punctuation from calibre's polish feature
|
||||
data = {'smarten_punctuation':True}
|
||||
opts = ALL_OPTS.copy()
|
||||
opts.update(data)
|
||||
O = namedtuple('Options', ' '.join(ALL_OPTS.iterkeys()))
|
||||
opts = O(**opts)
|
||||
|
||||
log = Log(level=Log.DEBUG)
|
||||
polish({outfile:outfile}, opts, log, logger.info)
|
||||
|
||||
except NotGoingToDownload as d:
|
||||
book['good']=False
|
||||
book['showerror']=d.showerror
|
||||
book['comment']=unicode(d)
|
||||
book['icon'] = d.icon
|
||||
|
||||
except Exception as e:
|
||||
book['good']=False
|
||||
book['comment']=unicode(e)
|
||||
book['icon']='dialog_error.png'
|
||||
book['status'] = _('Error')
|
||||
logger.info("Exception: %s:%s"%(book,unicode(e)),exc_info=True)
|
||||
|
||||
#time.sleep(10)
|
||||
return book
|
||||
|
||||
## calibre's columns for an existing book are pased in and injected
|
||||
## into the story's metadata. For convenience, we also add labels and
|
||||
## valid_entries for them in a special [injected] section that has
|
||||
## even less precedence than [defaults]
|
||||
def inject_cal_cols(book,story,configuration):
|
||||
configuration.remove_section('injected')
|
||||
if 'calibre_columns' in book:
|
||||
injectini = ['[injected]']
|
||||
extra_valid = []
|
||||
for k, v in book['calibre_columns'].iteritems():
|
||||
story.setMetadata(k,v['val'])
|
||||
injectini.append('%s_label:%s'%(k,v['label']))
|
||||
extra_valid.append(k)
|
||||
if extra_valid: # if empty, there's nothing to add.
|
||||
injectini.append("add_to_extra_valid_entries:,"+','.join(extra_valid))
|
||||
configuration.readfp(StringIO('\n'.join(injectini)))
|
||||
#print("added:\n%s\n"%('\n'.join(injectini)))
|
||||
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
from __future__ import (unicode_literals, division, absolute_import,
|
||||
print_function)
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2020, Jim Miller, 2011, Grant Drake <grant.drake@gmail.com>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
from time import sleep
|
||||
from datetime import time
|
||||
from io import StringIO
|
||||
from collections import defaultdict
|
||||
import sys
|
||||
|
||||
from calibre.utils.date import local_tz
|
||||
|
||||
# pulls in translation files for _() strings
|
||||
try:
|
||||
load_translations()
|
||||
except NameError:
|
||||
pass # load_translations() added in calibre 1.9
|
||||
|
||||
# ------------------------------------------------------------------------------
|
||||
#
|
||||
# Functions to perform downloads using worker jobs
|
||||
#
|
||||
# ------------------------------------------------------------------------------
|
||||
|
||||
def do_download_worker_single(site,
|
||||
book_list,
|
||||
options,
|
||||
merge,
|
||||
notification=lambda x,y:x):
|
||||
|
||||
logger.info(options['version'])
|
||||
|
||||
## same info debug calibre prints out at startup. For when users
|
||||
## give me job output instead of debug log.
|
||||
from calibre.debug import print_basic_debug_info
|
||||
print_basic_debug_info(sys.stderr)
|
||||
|
||||
notification(0.01, _('Downloading FanFiction Stories'))
|
||||
from calibre_plugins.fanficfare_plugin import FanFicFareBase
|
||||
fffbase = FanFicFareBase(options['plugin_path'])
|
||||
with fffbase: # so the sys.path was modified while loading the
|
||||
# plug impl.
|
||||
from fanficfare.fff_profile import do_cprofile
|
||||
|
||||
## extra function just so I can easily use the same
|
||||
## @do_cprofile decorator
|
||||
@do_cprofile
|
||||
def profiled_func():
|
||||
count = 0
|
||||
totals = {}
|
||||
# can't do direct assignment in list comprehension? I'm sure it
|
||||
# makes sense to some pythonista.
|
||||
# [ totals[x['url']]=0.0 for x in book_list if x['good'] ]
|
||||
[ totals.update({x['url']:0.0}) for x in book_list if x['good'] ]
|
||||
# logger.debug(sites_lists.keys())
|
||||
|
||||
def do_indiv_notif(percent,msg):
|
||||
totals[msg] = percent/len(totals)
|
||||
notification(max(0.01,sum(totals.values())), _('%(count)d of %(total)d stories finished downloading')%{'count':count,'total':len(totals)})
|
||||
|
||||
do_list = []
|
||||
done_list = []
|
||||
logger.info("\n\n"+_("Downloading FanFiction Stories")+"\n%s\n"%("\n".join([ "%(status)s %(url)s %(comment)s" % book for book in book_list])))
|
||||
## pass failures from metadata through bg job so all results are
|
||||
## together.
|
||||
for book in book_list:
|
||||
if book['good']:
|
||||
do_list.append(book)
|
||||
else:
|
||||
done_list.append(book)
|
||||
for book in do_list:
|
||||
# logger.info("%s"%book['url'])
|
||||
done_list.append(do_download_for_worker(book,options,merge,do_indiv_notif))
|
||||
count += 1
|
||||
return finish_download(done_list)
|
||||
return profiled_func()
|
||||
|
||||
def finish_download(donelist):
|
||||
book_list = sorted(donelist,key=lambda x : x['listorder'])
|
||||
logger.info("\n"+_("Download Results:")+"\n%s\n"%("\n".join([ "%(status)s %(url)s %(comment)s" % book for book in book_list])))
|
||||
|
||||
good_lists = defaultdict(list)
|
||||
bad_lists = defaultdict(list)
|
||||
for book in book_list:
|
||||
if book['good']:
|
||||
good_lists[book['status']].append(book)
|
||||
else:
|
||||
bad_lists[book['status']].append(book)
|
||||
|
||||
order = [_('Add'),
|
||||
_('Update'),
|
||||
_('Meta'),
|
||||
_('Different URL'),
|
||||
_('Rejected'),
|
||||
_('Skipped'),
|
||||
_('Bad'),
|
||||
_('Error'),
|
||||
]
|
||||
stnum = 0
|
||||
for d in [ good_lists, bad_lists ]:
|
||||
for status in order:
|
||||
stnum += 1
|
||||
if d[status]:
|
||||
l = d[status]
|
||||
logger.info("\n"+status+"\n%s\n"%("\n".join([book['url'] for book in l])))
|
||||
for book in l:
|
||||
# Add prior listorder to 10000 * status num for
|
||||
# ordering of accumulated results with multiple bg
|
||||
# jobs
|
||||
book['reportorder'] = stnum*10000 + book['listorder']
|
||||
del d[status]
|
||||
# just in case a status is added but doesn't appear in order.
|
||||
for status in d.keys():
|
||||
logger.info("\n"+status+"\n%s\n"%("\n".join([book['url'] for book in d[status]])))
|
||||
|
||||
# return the book list as the job result
|
||||
return book_list
|
||||
|
||||
def do_download_for_worker(book,options,merge,notification=lambda x,y:x):
|
||||
'''
|
||||
Child job, to download story when run as a worker job
|
||||
'''
|
||||
|
||||
from calibre_plugins.fanficfare_plugin import FanFicFareBase
|
||||
fffbase = FanFicFareBase(options['plugin_path'])
|
||||
with fffbase: # so the sys.path was modified while loading the
|
||||
# plug impl.
|
||||
from calibre_plugins.fanficfare_plugin.prefs import (
|
||||
SAVE_YES, SAVE_YES_UNLESS_SITE, OVERWRITE, OVERWRITEALWAYS, UPDATE,
|
||||
UPDATEALWAYS, ADDNEW, SKIP, CALIBREONLY, CALIBREONLYSAVECOL)
|
||||
from calibre_plugins.fanficfare_plugin.wordcount import get_word_count
|
||||
from fanficfare import adapters, writers
|
||||
from fanficfare.epubutils import get_update_data
|
||||
from fanficfare.exceptions import NotGoingToDownload
|
||||
from fanficfare.six import text_type as unicode
|
||||
|
||||
from calibre_plugins.fanficfare_plugin.fff_util import get_fff_config
|
||||
|
||||
try:
|
||||
logger.info("\n\n" + ("-"*80) + " " + book['url'])
|
||||
## No need to download at all. Can happen now due to
|
||||
## collision moving into book for CALIBREONLY changing to
|
||||
## ADDNEW when story URL not in library.
|
||||
if book['collision'] in (CALIBREONLY, CALIBREONLYSAVECOL):
|
||||
logger.info("Skipping CALIBREONLY 'update' down inside worker")
|
||||
return book
|
||||
|
||||
book['comment'] = _('Download started...')
|
||||
|
||||
configuration = get_fff_config(book['url'],
|
||||
options['fileform'],
|
||||
options['personal.ini'])
|
||||
|
||||
# images only for epub, html, even if the user mistakenly
|
||||
# turned it on else where.
|
||||
if options['fileform'] not in ("epub","html"):
|
||||
configuration.set("overrides","include_images","false")
|
||||
|
||||
adapter = adapters.getAdapter(configuration,book['url'])
|
||||
adapter.is_adult = book['is_adult']
|
||||
adapter.username = book['username']
|
||||
adapter.password = book['password']
|
||||
adapter.totp = book['totp']
|
||||
adapter.setChaptersRange(book['begin'],book['end'])
|
||||
|
||||
## each site download job starts with a new copy of the
|
||||
## cookiejar and basic_cache from the FG process. They
|
||||
## are not shared between different sites' BG downloads
|
||||
if 'basic_cache' in options:
|
||||
configuration.set_basic_cache(options['basic_cache'])
|
||||
else:
|
||||
options['basic_cache'] = configuration.get_basic_cache()
|
||||
options['basic_cache'].load_cache(options['basic_cachefile'])
|
||||
if 'cookiejar' in options:
|
||||
configuration.set_cookiejar(options['cookiejar'])
|
||||
else:
|
||||
options['cookiejar'] = configuration.get_cookiejar()
|
||||
options['cookiejar'].load_cookiejar(options['cookiejarfile'])
|
||||
|
||||
story = adapter.getStoryMetadataOnly()
|
||||
if not story.getMetadata("series") and 'calibre_series' in book:
|
||||
adapter.setSeries(book['calibre_series'][0],book['calibre_series'][1])
|
||||
|
||||
# logger.debug(merge)
|
||||
# logger.debug(book.get('epub_for_update','(NONE)'))
|
||||
# logger.debug(options.get('mergebook','(NOMERGEBOOK)'))
|
||||
|
||||
# is a merge, is a pre-existing anthology, and is not a pre-existing book in anthology.
|
||||
if merge and 'mergebook' in options and 'epub_for_update' not in book:
|
||||
# internal for plugin anthologies to mark chapters
|
||||
# (new) in new stories
|
||||
story.setMetadata("newforanthology","true")
|
||||
logger.debug("metadata newforanthology:%s"%story.getMetadata("newforanthology"))
|
||||
|
||||
# set PI version instead of default.
|
||||
if 'version' in options:
|
||||
story.setMetadata('version',options['version'])
|
||||
|
||||
book['title'] = story.getMetadata("title", removeallentities=True)
|
||||
book['author_sort'] = book['author'] = story.getList("author", removeallentities=True)
|
||||
book['publisher'] = story.getMetadata("publisher")
|
||||
book['url'] = story.getMetadata("storyUrl", removeallentities=True)
|
||||
book['comments'] = story.get_sanitized_description()
|
||||
book['series'] = story.getMetadata("series", removeallentities=True)
|
||||
|
||||
if story.getMetadataRaw('datePublished'):
|
||||
book['pubdate'] = story.getMetadataRaw('datePublished').replace(tzinfo=local_tz)
|
||||
if story.getMetadataRaw('dateUpdated'):
|
||||
book['updatedate'] = story.getMetadataRaw('dateUpdated').replace(tzinfo=local_tz)
|
||||
if story.getMetadataRaw('dateCreated'):
|
||||
book['timestamp'] = story.getMetadataRaw('dateCreated').replace(tzinfo=local_tz)
|
||||
else:
|
||||
book['timestamp'] = datetime.now().replace(tzinfo=local_tz) # need *something* there for calibre.
|
||||
|
||||
writer = writers.getWriter(options['fileform'],configuration,adapter)
|
||||
outfile = book['outfile']
|
||||
|
||||
## checks were done earlier, it's new or not dup or newer--just write it.
|
||||
if book['collision'] in (ADDNEW, SKIP, OVERWRITE, OVERWRITEALWAYS) or \
|
||||
('epub_for_update' not in book and book['collision'] in (UPDATE, UPDATEALWAYS)):
|
||||
|
||||
# preserve logfile even on overwrite.
|
||||
if 'epub_for_update' in book:
|
||||
adapter.logfile = get_update_data(book['epub_for_update'])[6]
|
||||
# change the existing entries id to notid so
|
||||
# write_epub writes a whole new set to indicate overwrite.
|
||||
if adapter.logfile:
|
||||
adapter.logfile = adapter.logfile.replace("span id","span notid")
|
||||
|
||||
if book['collision'] == OVERWRITE and 'fileupdated' in book:
|
||||
lastupdated=story.getMetadataRaw('dateUpdated')
|
||||
fileupdated=book['fileupdated']
|
||||
|
||||
# updated doesn't have time (or is midnight), use dates only.
|
||||
# updated does have time, use full timestamps.
|
||||
if (lastupdated.time() == time.min and fileupdated.date() > lastupdated.date()) or \
|
||||
(lastupdated.time() != time.min and fileupdated > lastupdated):
|
||||
raise NotGoingToDownload(_("Not Overwriting, web site is not newer."),'edit-undo.png',showerror=False)
|
||||
|
||||
|
||||
logger.info("write to %s"%outfile)
|
||||
inject_cal_cols(book,story,configuration)
|
||||
writer.writeStory(outfilename=outfile,
|
||||
forceOverwrite=True,
|
||||
notification=notification)
|
||||
|
||||
if adapter.story.chapter_error_count > 0:
|
||||
book['comment'] = _('Download %(fileform)s completed, %(failed)s failed chapters, %(total)s total chapters.')%\
|
||||
{'fileform':options['fileform'],
|
||||
'failed':adapter.story.chapter_error_count,
|
||||
'total':story.getMetadata("numChapters")}
|
||||
book['chapter_error_count'] = adapter.story.chapter_error_count
|
||||
else:
|
||||
book['comment'] = _('Download %(fileform)s completed, %(total)s chapters.')%\
|
||||
{'fileform':options['fileform'],
|
||||
'total':story.getMetadata("numChapters")}
|
||||
book['all_metadata'] = story.getAllMetadata(removeallentities=True)
|
||||
if options['savemetacol'] != '':
|
||||
book['savemetacol'] = story.dump_html_metadata()
|
||||
|
||||
## checks were done earlier, just update it.
|
||||
elif 'epub_for_update' in book and book['collision'] in (UPDATE, UPDATEALWAYS):
|
||||
|
||||
# update now handled by pre-populating the old images and
|
||||
# chapters in the adapter rather than merging epubs.
|
||||
#urlchaptercount = int(story.getMetadata('numChapters').replace(',',''))
|
||||
# returns int adjusted for start-end range.
|
||||
urlchaptercount = story.getChapterCount()
|
||||
(url,
|
||||
chaptercount,
|
||||
adapter.oldchapters,
|
||||
adapter.oldimgs,
|
||||
adapter.oldcover,
|
||||
adapter.calibrebookmark,
|
||||
adapter.logfile,
|
||||
adapter.oldchaptersmap,
|
||||
adapter.oldchaptersdata) = get_update_data(book['epub_for_update'])[0:9]
|
||||
|
||||
# dup handling from fff_plugin needed for anthology updates & BG metadata.
|
||||
if book['collision'] in (UPDATE,UPDATEALWAYS):
|
||||
if chaptercount == urlchaptercount and book['collision'] == UPDATE:
|
||||
if merge:
|
||||
## Deliberately pass for UPDATEALWAYS merge.
|
||||
book['comment']=_("Already contains %d chapters. Reuse as is.")%chaptercount
|
||||
book['all_metadata'] = story.getAllMetadata(removeallentities=True)
|
||||
if options['savemetacol'] != '':
|
||||
book['savemetacol'] = story.dump_html_metadata()
|
||||
book['outfile'] = book['epub_for_update'] # for anthology merge ops.
|
||||
return book
|
||||
else:
|
||||
raise NotGoingToDownload(_("Already contains %d chapters.")%chaptercount,'edit-undo.png',showerror=False)
|
||||
elif chaptercount > urlchaptercount and not (book['collision'] == UPDATEALWAYS and adapter.getConfig('force_update_epub_always')):
|
||||
raise NotGoingToDownload(_("Existing epub contains %d chapters, web site only has %d. Use Overwrite or force_update_epub_always to force update.") % (chaptercount,urlchaptercount),'dialog_error.png')
|
||||
elif chaptercount == 0:
|
||||
raise NotGoingToDownload(_("FanFicFare doesn't recognize chapters in existing epub, epub is probably from a different source. Use Overwrite to force update."),'dialog_error.png')
|
||||
|
||||
if not (book['collision'] == UPDATEALWAYS and chaptercount == urlchaptercount) \
|
||||
and adapter.getConfig("do_update_hook"):
|
||||
chaptercount = adapter.hookForUpdates(chaptercount)
|
||||
|
||||
logger.info("Do update - epub(%d) vs url(%d)" % (chaptercount, urlchaptercount))
|
||||
logger.info("write to %s"%outfile)
|
||||
|
||||
inject_cal_cols(book,story,configuration)
|
||||
writer.writeStory(outfilename=outfile,
|
||||
forceOverwrite=True,
|
||||
notification=notification)
|
||||
|
||||
if adapter.story.chapter_error_count > 0:
|
||||
book['comment'] = _('Update %(fileform)s completed, added %(added)s chapters, %(failed)s failed chapters, for %(total)s total.')%\
|
||||
{'fileform':options['fileform'],
|
||||
'failed':adapter.story.chapter_error_count,
|
||||
'added':(urlchaptercount-chaptercount),
|
||||
'total':urlchaptercount}
|
||||
book['chapter_error_count'] = adapter.story.chapter_error_count
|
||||
else:
|
||||
book['comment'] = _('Update %(fileform)s completed, added %(added)s chapters for %(total)s total.')%\
|
||||
{'fileform':options['fileform'],'added':(urlchaptercount-chaptercount),'total':urlchaptercount}
|
||||
book['all_metadata'] = story.getAllMetadata(removeallentities=True)
|
||||
if options['savemetacol'] != '':
|
||||
book['savemetacol'] = story.dump_html_metadata()
|
||||
else:
|
||||
## Shouldn't ever get here, but hey, it happened once
|
||||
## before with prefs['collision']
|
||||
raise Exception("Impossible state reached -- Book: %s:\nOptions:%s:"%(book,options))
|
||||
|
||||
if options['do_wordcount'] == SAVE_YES or (
|
||||
options['do_wordcount'] == SAVE_YES_UNLESS_SITE and not story.getMetadataRaw('numWords') ):
|
||||
try:
|
||||
wordcount = get_word_count(outfile)
|
||||
# logger.info("get_word_count:%s"%wordcount)
|
||||
# clear cache for the rather unusual case of
|
||||
# numWords affecting other previously cached
|
||||
# entries.
|
||||
story.clear_processed_metadata_cache()
|
||||
story.setMetadata('numWords',wordcount)
|
||||
writer.writeStory(outfilename=outfile, forceOverwrite=True)
|
||||
book['all_metadata'] = story.getAllMetadata(removeallentities=True)
|
||||
if options['savemetacol'] != '':
|
||||
book['savemetacol'] = story.dump_html_metadata()
|
||||
except:
|
||||
logger.error("WordCount failed")
|
||||
|
||||
if options['smarten_punctuation'] and options['fileform'] == "epub":
|
||||
# for smarten punc
|
||||
from calibre.ebooks.oeb.polish.main import polish, ALL_OPTS
|
||||
from calibre.utils.logging import Log
|
||||
from collections import namedtuple
|
||||
|
||||
# do smarten_punctuation from calibre's polish feature
|
||||
data = {'smarten_punctuation':True}
|
||||
opts = ALL_OPTS.copy()
|
||||
opts.update(data)
|
||||
O = namedtuple('Options', ' '.join(ALL_OPTS.keys()))
|
||||
opts = O(**opts)
|
||||
|
||||
log = Log(level=Log.DEBUG)
|
||||
polish({outfile:outfile}, opts, log, logger.info)
|
||||
## here to catch tags set in chapters in literotica for
|
||||
## both overwrites and updates.
|
||||
book['tags'] = story.getSubjectTags(removeallentities=True)
|
||||
except NotGoingToDownload as d:
|
||||
book['good']=False
|
||||
book['status']=_('Bad')
|
||||
book['showerror']=d.showerror
|
||||
book['comment']=unicode(d)
|
||||
book['icon'] = d.icon
|
||||
|
||||
except Exception as e:
|
||||
book['good']=False
|
||||
book['status']=_('Error')
|
||||
book['comment']=unicode(e)
|
||||
book['icon']='dialog_error.png'
|
||||
book['status'] = _('Error')
|
||||
logger.info("Exception: %s:%s"%(book,book['comment']),exc_info=True)
|
||||
return book
|
||||
|
||||
## calibre's columns for an existing book are passed in and injected
|
||||
## into the story's metadata. For convenience, we also add labels and
|
||||
## valid_entries for them in a special [injected] section that has
|
||||
## even less precedence than [defaults]
|
||||
def inject_cal_cols(book,story,configuration):
|
||||
configuration.remove_section('injected')
|
||||
if 'calibre_columns' in book:
|
||||
injectini = ['[injected]']
|
||||
extra_valid = []
|
||||
for k in book['calibre_columns'].keys():
|
||||
v = book['calibre_columns'][k]
|
||||
story.setMetadata(k,v['val'])
|
||||
injectini.append('%s_label:%s'%(k,v['label']))
|
||||
extra_valid.append(k)
|
||||
if extra_valid: # if empty, there's nothing to add.
|
||||
injectini.append("add_to_extra_valid_entries:,"+','.join(extra_valid))
|
||||
configuration.read_file(StringIO('\n'.join(injectini)))
|
||||
#print("added:\n%s\n"%('\n'.join(injectini)))
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
|
|
@ -3,22 +3,9 @@
|
|||
|
||||
[defaults]
|
||||
## [defaults] section applies to all formats and sites but may be
|
||||
## overridden at several levels. Example:
|
||||
|
||||
## [defaults]
|
||||
## titlepage_entries: category,genre, status
|
||||
## [www.whofic.com]
|
||||
## # overrides defaults.
|
||||
## titlepage_entries: category,genre, status,dateUpdated,rating
|
||||
## [epub]
|
||||
## # overrides defaults & site section
|
||||
## titlepage_entries: category,genre, status,datePublished,dateUpdated,dateCreated
|
||||
## [www.whofic.com:epub]
|
||||
## # overrides defaults, site section & format section
|
||||
## titlepage_entries: category,genre, status,datePublished
|
||||
## [overrides]
|
||||
## # overrides all other sections
|
||||
## titlepage_entries: category
|
||||
## overridden at several levels. See
|
||||
## https://github.com/JimmXinu/FanFicFare/wiki/INI-File for more
|
||||
## details.
|
||||
|
||||
## Some sites also require the user to confirm they are adult for
|
||||
## adult content. Uncomment by removing '#' in front of is_adult.
|
||||
|
|
@ -29,38 +16,32 @@
|
|||
## want to make them all look the same? Strip them off, then add them
|
||||
## back on with add_chapter_numbers. Don't like the way it strips
|
||||
## numbers or adds them back? See chapter_title_strip_pattern and
|
||||
## chapter_title_add_pattern.
|
||||
## chapter_title_add_pattern in defaults.ini.
|
||||
#strip_chapter_numbers:true
|
||||
#add_chapter_numbers:true
|
||||
|
||||
## Add this to genre if there's more than one category.
|
||||
#add_genre_when_multi_category: Crossover
|
||||
|
||||
[epub]
|
||||
## include images from img tags in the body and summary of stories.
|
||||
## Include images from img tags in the body and summary of stories.
|
||||
## Images will be converted to jpg for size if possible. Images work
|
||||
## in epub format only. To get mobi or other format with images,
|
||||
## download as epub and use Calibre to convert.
|
||||
## true by default, uncomment and set false to not include images.
|
||||
#include_images:true
|
||||
|
||||
## If not set, the summary will have all html stripped for safety.
|
||||
## If set false, the summary will have all html stripped for safety.
|
||||
## Both this and include_images must be true to get images in the
|
||||
## summary.
|
||||
## true by default, uncomment and set false to not keep summary html.
|
||||
#keep_summary_html:true
|
||||
|
||||
## If set, the first image found will be made the cover image. If
|
||||
## keep_summary_html is true, any images in summary will be before any
|
||||
## If set true, and there isn't a specific cover image, the first
|
||||
## image found in the story will be made the cover image. If
|
||||
## keep_summary_html is true, images in the summary will be before any
|
||||
## in chapters.
|
||||
## true by default, uncomment and set false to turn off
|
||||
#make_firstimage_cover:true
|
||||
|
||||
## Resize images down to width, height, preserving aspect ratio.
|
||||
## Nook size, with margin.
|
||||
#image_max_size: 580, 725
|
||||
|
||||
## Change image to grayscale, if graphics library allows, to save
|
||||
## space.
|
||||
#grayscale_images: false
|
||||
|
||||
|
||||
## Most common, I expect will be using this to save username/passwords
|
||||
## for different sites. Here are a few examples. See defaults.ini
|
||||
|
|
@ -72,28 +53,6 @@
|
|||
## default is false
|
||||
#collect_series: true
|
||||
|
||||
[ficwad.com]
|
||||
#username:YourUsername
|
||||
#password:YourPassword
|
||||
|
||||
[www.adastrafanfic.com]
|
||||
## Some sites do not require a login, but do require the user to
|
||||
## confirm they are adult for adult content.
|
||||
#is_adult:true
|
||||
|
||||
[www.twcslibrary.net]
|
||||
#username:YourName
|
||||
#password:yourpassword
|
||||
#is_adult:true
|
||||
## default is false
|
||||
#collect_series: true
|
||||
|
||||
[www.fictionalley.org]
|
||||
#is_adult:true
|
||||
|
||||
[www.harrypotterfanfiction.com]
|
||||
#is_adult:true
|
||||
|
||||
[www.fimfiction.net]
|
||||
#is_adult:true
|
||||
#fail_on_password: false
|
||||
|
|
@ -102,8 +61,9 @@
|
|||
#is_adult:true
|
||||
## tth is a little unusual--it doesn't require user/pass, but the site
|
||||
## keeps track of which chapters you've read and won't send another
|
||||
## update until it thinks you're up to date. This way, on download,
|
||||
## it thinks you're up to date.
|
||||
## update until it thinks you're up to date. If you set
|
||||
## username/password, FFF will login to download. Then the site
|
||||
## thinks you're up to date.
|
||||
#username:YourName
|
||||
#password:yourpassword
|
||||
|
||||
|
|
|
|||
|
|
@ -1,260 +1,282 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
from __future__ import (unicode_literals, division, absolute_import,
|
||||
print_function)
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2016, Jim Miller'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
import copy
|
||||
|
||||
from calibre.utils.config import JSONConfig
|
||||
from calibre.gui2.ui import get_gui
|
||||
|
||||
from calibre_plugins.fanficfare_plugin.common_utils import get_library_uuid
|
||||
|
||||
SKIP=_('Skip')
|
||||
ADDNEW=_('Add New Book')
|
||||
UPDATE=_('Update EPUB if New Chapters')
|
||||
UPDATEALWAYS=_('Update EPUB Always')
|
||||
OVERWRITE=_('Overwrite if Newer')
|
||||
OVERWRITEALWAYS=_('Overwrite Always')
|
||||
CALIBREONLY=_('Update Calibre Metadata from Web Site')
|
||||
CALIBREONLYSAVECOL=_('Update Calibre Metadata from Saved Metadata Column')
|
||||
collision_order=[SKIP,
|
||||
ADDNEW,
|
||||
UPDATE,
|
||||
UPDATEALWAYS,
|
||||
OVERWRITE,
|
||||
OVERWRITEALWAYS,
|
||||
CALIBREONLY,
|
||||
CALIBREONLYSAVECOL,]
|
||||
|
||||
# best idea I've had for how to deal with config/pref saving the
|
||||
# collision name in english.
|
||||
SAVE_SKIP='Skip'
|
||||
SAVE_ADDNEW='Add New Book'
|
||||
SAVE_UPDATE='Update EPUB if New Chapters'
|
||||
SAVE_UPDATEALWAYS='Update EPUB Always'
|
||||
SAVE_OVERWRITE='Overwrite if Newer'
|
||||
SAVE_OVERWRITEALWAYS='Overwrite Always'
|
||||
SAVE_CALIBREONLY='Update Calibre Metadata Only'
|
||||
SAVE_CALIBREONLYSAVECOL='Update Calibre Metadata Only(Saved Column)'
|
||||
save_collisions={
|
||||
SKIP:SAVE_SKIP,
|
||||
ADDNEW:SAVE_ADDNEW,
|
||||
UPDATE:SAVE_UPDATE,
|
||||
UPDATEALWAYS:SAVE_UPDATEALWAYS,
|
||||
OVERWRITE:SAVE_OVERWRITE,
|
||||
OVERWRITEALWAYS:SAVE_OVERWRITEALWAYS,
|
||||
CALIBREONLY:SAVE_CALIBREONLY,
|
||||
CALIBREONLYSAVECOL:SAVE_CALIBREONLYSAVECOL,
|
||||
SAVE_SKIP:SKIP,
|
||||
SAVE_ADDNEW:ADDNEW,
|
||||
SAVE_UPDATE:UPDATE,
|
||||
SAVE_UPDATEALWAYS:UPDATEALWAYS,
|
||||
SAVE_OVERWRITE:OVERWRITE,
|
||||
SAVE_OVERWRITEALWAYS:OVERWRITEALWAYS,
|
||||
SAVE_CALIBREONLY:CALIBREONLY,
|
||||
SAVE_CALIBREONLYSAVECOL:CALIBREONLYSAVECOL,
|
||||
}
|
||||
|
||||
anthology_collision_order=[UPDATE,
|
||||
UPDATEALWAYS,
|
||||
OVERWRITEALWAYS]
|
||||
|
||||
|
||||
# Show translated strings, but save the same string in prefs so your
|
||||
# prefs are the same in different languages.
|
||||
YES=_('Yes, Always')
|
||||
SAVE_YES='Yes'
|
||||
YES_IF_IMG=_('Yes, if EPUB has a cover image')
|
||||
SAVE_YES_IF_IMG='Yes, if img'
|
||||
YES_UNLESS_IMG=_('Yes, unless FanFicFare found a cover image')
|
||||
SAVE_YES_UNLESS_IMG='Yes, unless img'
|
||||
YES_UNLESS_SITE=_('Yes, unless found on site')
|
||||
SAVE_YES_UNLESS_SITE='Yes, unless site'
|
||||
NO=_('No')
|
||||
SAVE_NO='No'
|
||||
prefs_save_options = {
|
||||
YES:SAVE_YES,
|
||||
SAVE_YES:YES,
|
||||
YES_IF_IMG:SAVE_YES_IF_IMG,
|
||||
SAVE_YES_IF_IMG:YES_IF_IMG,
|
||||
YES_UNLESS_IMG:SAVE_YES_UNLESS_IMG,
|
||||
SAVE_YES_UNLESS_IMG:YES_UNLESS_IMG,
|
||||
NO:SAVE_NO,
|
||||
SAVE_NO:NO,
|
||||
YES_UNLESS_SITE:SAVE_YES_UNLESS_SITE,
|
||||
SAVE_YES_UNLESS_SITE:YES_UNLESS_SITE,
|
||||
}
|
||||
updatecalcover_order=[YES,YES_IF_IMG,NO]
|
||||
gencalcover_order=[YES,YES_UNLESS_IMG,NO]
|
||||
do_wordcount_order=[YES,YES_UNLESS_SITE,NO]
|
||||
|
||||
# if don't have any settings for FanFicFarePlugin, copy from
|
||||
# predecessor FanFictionDownLoaderPlugin.
|
||||
FFDL_PREFS_NAMESPACE = 'FanFictionDownLoaderPlugin'
|
||||
PREFS_NAMESPACE = 'FanFicFarePlugin'
|
||||
PREFS_KEY_SETTINGS = 'settings'
|
||||
|
||||
# Set defaults used by all. Library specific settings continue to
|
||||
# take from here.
|
||||
default_prefs = {}
|
||||
default_prefs['personal.ini'] = get_resources('plugin-example.ini')
|
||||
default_prefs['cal_cols_pass_in'] = False
|
||||
default_prefs['rejecturls'] = ''
|
||||
default_prefs['rejectreasons'] = '''Sucked
|
||||
Boring
|
||||
Dup from another site'''
|
||||
default_prefs['reject_always'] = False
|
||||
default_prefs['reject_delete_default'] = True
|
||||
|
||||
default_prefs['updatemeta'] = True
|
||||
default_prefs['bgmeta'] = False
|
||||
default_prefs['updateepubcover'] = False
|
||||
default_prefs['keeptags'] = False
|
||||
default_prefs['suppressauthorsort'] = False
|
||||
default_prefs['suppresstitlesort'] = False
|
||||
default_prefs['mark'] = False
|
||||
default_prefs['showmarked'] = False
|
||||
default_prefs['autoconvert'] = False
|
||||
default_prefs['urlsfromclip'] = True
|
||||
default_prefs['updatedefault'] = True
|
||||
default_prefs['fileform'] = 'epub'
|
||||
default_prefs['collision'] = SAVE_UPDATE
|
||||
default_prefs['deleteotherforms'] = False
|
||||
default_prefs['adddialogstaysontop'] = False
|
||||
default_prefs['lookforurlinhtml'] = False
|
||||
default_prefs['checkforseriesurlid'] = True
|
||||
default_prefs['auto_reject_seriesurlid'] = False
|
||||
default_prefs['checkforurlchange'] = True
|
||||
default_prefs['injectseries'] = False
|
||||
default_prefs['matchtitleauth'] = True
|
||||
default_prefs['do_wordcount'] = SAVE_YES_UNLESS_SITE
|
||||
default_prefs['smarten_punctuation'] = False
|
||||
default_prefs['show_est_time'] = False
|
||||
|
||||
default_prefs['send_lists'] = ''
|
||||
default_prefs['read_lists'] = ''
|
||||
default_prefs['addtolists'] = False
|
||||
default_prefs['addtoreadlists'] = False
|
||||
default_prefs['addtolistsonread'] = False
|
||||
default_prefs['autounnew'] = False
|
||||
|
||||
default_prefs['updatecalcover'] = None
|
||||
default_prefs['gencalcover'] = SAVE_YES
|
||||
default_prefs['updatecover'] = False
|
||||
default_prefs['calibre_gen_cover'] = False
|
||||
default_prefs['plugin_gen_cover'] = True
|
||||
default_prefs['gcnewonly'] = False
|
||||
default_prefs['gc_site_settings'] = {}
|
||||
default_prefs['allow_gc_from_ini'] = True
|
||||
default_prefs['gc_polish_cover'] = False
|
||||
|
||||
default_prefs['countpagesstats'] = []
|
||||
default_prefs['wordcountmissing'] = False
|
||||
|
||||
default_prefs['errorcol'] = ''
|
||||
default_prefs['save_all_errors'] = True
|
||||
default_prefs['savemetacol'] = ''
|
||||
default_prefs['lastcheckedcol'] = ''
|
||||
default_prefs['custom_cols'] = {}
|
||||
default_prefs['custom_cols_newonly'] = {}
|
||||
default_prefs['allow_custcol_from_ini'] = True
|
||||
|
||||
default_prefs['std_cols_newonly'] = {}
|
||||
default_prefs['set_author_url'] = True
|
||||
default_prefs['includecomments'] = False
|
||||
default_prefs['anth_comments_newonly'] = True
|
||||
|
||||
default_prefs['imapserver'] = ''
|
||||
default_prefs['imapuser'] = ''
|
||||
default_prefs['imappass'] = ''
|
||||
default_prefs['imapsessionpass'] = False
|
||||
default_prefs['imapfolder'] = 'INBOX'
|
||||
default_prefs['imapmarkread'] = True
|
||||
default_prefs['auto_reject_from_email'] = False
|
||||
default_prefs['update_existing_only_from_email'] = False
|
||||
default_prefs['download_from_email_immediately'] = False
|
||||
|
||||
def set_library_config(library_config,db):
|
||||
db.prefs.set_namespaced(PREFS_NAMESPACE,
|
||||
PREFS_KEY_SETTINGS,
|
||||
library_config)
|
||||
|
||||
def get_library_config(db):
|
||||
library_id = get_library_uuid(db)
|
||||
library_config = None
|
||||
|
||||
if library_config is None:
|
||||
#print("get prefs from db")
|
||||
library_config = db.prefs.get_namespaced(PREFS_NAMESPACE,
|
||||
PREFS_KEY_SETTINGS)
|
||||
|
||||
# if don't have any settings for FanFicFarePlugin, copy from
|
||||
# predecessor FanFictionDownLoaderPlugin.
|
||||
if library_config is None:
|
||||
logger.info("Attempting to read settings from predecessor--FFDL")
|
||||
library_config = db.prefs.get_namespaced(FFDL_PREFS_NAMESPACE,
|
||||
PREFS_KEY_SETTINGS)
|
||||
if library_config is None:
|
||||
# defaults.
|
||||
logger.info("Using default settings")
|
||||
library_config = copy.deepcopy(default_prefs)
|
||||
|
||||
return library_config
|
||||
|
||||
# fake out so I don't have to change the prefs calls anywhere. The
|
||||
# Java programmer in me is offended by op-overloading, but it's very
|
||||
# tidy.
|
||||
class PrefsFacade():
|
||||
def _get_db(self):
|
||||
if self.passed_db:
|
||||
return self.passed_db
|
||||
else:
|
||||
# In the GUI plugin we want current db so we detect when
|
||||
# it's changed. CLI plugin calls need to pass db in.
|
||||
return get_gui().current_db
|
||||
|
||||
def __init__(self,passed_db=None):
|
||||
self.default_prefs = default_prefs
|
||||
self.libraryid = None
|
||||
self.current_prefs = None
|
||||
self.passed_db=passed_db
|
||||
|
||||
def _get_prefs(self):
|
||||
libraryid = get_library_uuid(self._get_db())
|
||||
if self.current_prefs == None or self.libraryid != libraryid:
|
||||
#print("self.current_prefs == None(%s) or self.libraryid != libraryid(%s)"%(self.current_prefs == None,self.libraryid != libraryid))
|
||||
self.libraryid = libraryid
|
||||
self.current_prefs = get_library_config(self._get_db())
|
||||
return self.current_prefs
|
||||
|
||||
def __getitem__(self,k):
|
||||
prefs = self._get_prefs()
|
||||
if k not in prefs:
|
||||
# pulls from default_prefs.defaults automatically if not set
|
||||
# in default_prefs
|
||||
return self.default_prefs[k]
|
||||
return prefs[k]
|
||||
|
||||
def __setitem__(self,k,v):
|
||||
prefs = self._get_prefs()
|
||||
prefs[k]=v
|
||||
# self._save_prefs(prefs)
|
||||
|
||||
def __delitem__(self,k):
|
||||
prefs = self._get_prefs()
|
||||
if k in prefs:
|
||||
del prefs[k]
|
||||
|
||||
def save_to_db(self):
|
||||
set_library_config(self._get_prefs(),self._get_db())
|
||||
|
||||
prefs = PrefsFacade()
|
||||
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
from __future__ import (unicode_literals, division, absolute_import,
|
||||
print_function)
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2021, Jim Miller'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
import copy
|
||||
|
||||
from calibre.gui2.ui import get_gui
|
||||
|
||||
# pulls in translation files for _() strings
|
||||
try:
|
||||
load_translations()
|
||||
except NameError:
|
||||
pass # load_translations() added in calibre 1.9
|
||||
|
||||
from calibre_plugins.fanficfare_plugin import __version__ as plugin_version
|
||||
from calibre_plugins.fanficfare_plugin.common_utils import get_library_uuid
|
||||
|
||||
SKIP=_('Skip')
|
||||
ADDNEW=_('Add New Book')
|
||||
UPDATE=_('Update EPUB if New Chapters')
|
||||
UPDATEALWAYS=_('Update EPUB Always')
|
||||
OVERWRITE=_('Overwrite if Newer')
|
||||
OVERWRITEALWAYS=_('Overwrite Always')
|
||||
CALIBREONLY=_('Update Calibre Metadata from Web Site')
|
||||
CALIBREONLYSAVECOL=_('Update Calibre Metadata from Saved Metadata Column')
|
||||
collision_order=[SKIP,
|
||||
ADDNEW,
|
||||
UPDATE,
|
||||
UPDATEALWAYS,
|
||||
OVERWRITE,
|
||||
OVERWRITEALWAYS,
|
||||
CALIBREONLY,
|
||||
CALIBREONLYSAVECOL,]
|
||||
|
||||
# best idea I've had for how to deal with config/pref saving the
|
||||
# collision name in english.
|
||||
SAVE_SKIP='Skip'
|
||||
SAVE_ADDNEW='Add New Book'
|
||||
SAVE_UPDATE='Update EPUB if New Chapters'
|
||||
SAVE_UPDATEALWAYS='Update EPUB Always'
|
||||
SAVE_OVERWRITE='Overwrite if Newer'
|
||||
SAVE_OVERWRITEALWAYS='Overwrite Always'
|
||||
SAVE_CALIBREONLY='Update Calibre Metadata Only'
|
||||
SAVE_CALIBREONLYSAVECOL='Update Calibre Metadata Only(Saved Column)'
|
||||
save_collisions={
|
||||
SKIP:SAVE_SKIP,
|
||||
ADDNEW:SAVE_ADDNEW,
|
||||
UPDATE:SAVE_UPDATE,
|
||||
UPDATEALWAYS:SAVE_UPDATEALWAYS,
|
||||
OVERWRITE:SAVE_OVERWRITE,
|
||||
OVERWRITEALWAYS:SAVE_OVERWRITEALWAYS,
|
||||
CALIBREONLY:SAVE_CALIBREONLY,
|
||||
CALIBREONLYSAVECOL:SAVE_CALIBREONLYSAVECOL,
|
||||
SAVE_SKIP:SKIP,
|
||||
SAVE_ADDNEW:ADDNEW,
|
||||
SAVE_UPDATE:UPDATE,
|
||||
SAVE_UPDATEALWAYS:UPDATEALWAYS,
|
||||
SAVE_OVERWRITE:OVERWRITE,
|
||||
SAVE_OVERWRITEALWAYS:OVERWRITEALWAYS,
|
||||
SAVE_CALIBREONLY:CALIBREONLY,
|
||||
SAVE_CALIBREONLYSAVECOL:CALIBREONLYSAVECOL,
|
||||
}
|
||||
|
||||
anthology_collision_order=[UPDATE,
|
||||
UPDATEALWAYS,
|
||||
OVERWRITEALWAYS]
|
||||
|
||||
|
||||
# Show translated strings, but save the same string in prefs so your
|
||||
# prefs are the same in different languages.
|
||||
YES=_('Yes, Always')
|
||||
SAVE_YES='Yes'
|
||||
YES_IF_IMG=_('Yes, if EPUB has a cover image')
|
||||
SAVE_YES_IF_IMG='Yes, if img'
|
||||
YES_UNLESS_IMG=_('Yes, unless FanFicFare found a cover image')
|
||||
SAVE_YES_UNLESS_IMG='Yes, unless img'
|
||||
YES_UNLESS_SITE=_('Yes, unless found on site')
|
||||
SAVE_YES_UNLESS_SITE='Yes, unless site'
|
||||
NO=_('No')
|
||||
SAVE_NO='No'
|
||||
prefs_save_options = {
|
||||
YES:SAVE_YES,
|
||||
SAVE_YES:YES,
|
||||
YES_IF_IMG:SAVE_YES_IF_IMG,
|
||||
SAVE_YES_IF_IMG:YES_IF_IMG,
|
||||
YES_UNLESS_IMG:SAVE_YES_UNLESS_IMG,
|
||||
SAVE_YES_UNLESS_IMG:YES_UNLESS_IMG,
|
||||
NO:SAVE_NO,
|
||||
SAVE_NO:NO,
|
||||
YES_UNLESS_SITE:SAVE_YES_UNLESS_SITE,
|
||||
SAVE_YES_UNLESS_SITE:YES_UNLESS_SITE,
|
||||
}
|
||||
updatecalcover_order=[YES,YES_IF_IMG,NO]
|
||||
gencalcover_order=[YES,YES_UNLESS_IMG,NO]
|
||||
do_wordcount_order=[YES,YES_UNLESS_SITE,NO]
|
||||
|
||||
PREFS_NAMESPACE = 'FanFicFarePlugin'
|
||||
PREFS_KEY_SETTINGS = 'settings'
|
||||
|
||||
# Set defaults used by all. Library specific settings continue to
|
||||
# take from here.
|
||||
default_prefs = {}
|
||||
default_prefs['last_saved_version'] = (0,0,0)
|
||||
default_prefs['personal.ini'] = get_resources('plugin-example.ini')
|
||||
default_prefs['cal_cols_pass_in'] = False
|
||||
default_prefs['rejecturls'] = '' # removed, but need empty default for fallback
|
||||
default_prefs['rejectreasons'] = '''Sucked
|
||||
Boring
|
||||
Dup from another site'''
|
||||
default_prefs['reject_always'] = False
|
||||
default_prefs['reject_delete_default'] = True
|
||||
|
||||
default_prefs['updatemeta'] = True
|
||||
default_prefs['bgmeta'] = False
|
||||
#default_prefs['updateepubcover'] = True # removed in favor of always True Oct 2022
|
||||
default_prefs['keeptags'] = False
|
||||
default_prefs['suppressauthorsort'] = False
|
||||
default_prefs['suppresstitlesort'] = False
|
||||
default_prefs['authorcase'] = False
|
||||
default_prefs['titlecase'] = False
|
||||
default_prefs['seriescase'] = False
|
||||
default_prefs['setanthologyseries'] = False
|
||||
default_prefs['mark'] = False
|
||||
default_prefs['mark_success'] = True
|
||||
default_prefs['mark_failed'] = True
|
||||
default_prefs['mark_chapter_error'] = True
|
||||
default_prefs['showmarked'] = False
|
||||
default_prefs['autoconvert'] = False
|
||||
default_prefs['urlsfromclip'] = True
|
||||
default_prefs['button_instantpopup'] = False
|
||||
default_prefs['updatedefault'] = True
|
||||
default_prefs['fileform'] = 'epub'
|
||||
default_prefs['collision'] = SAVE_UPDATE
|
||||
default_prefs['deleteotherforms'] = False
|
||||
default_prefs['adddialogstaysontop'] = False
|
||||
default_prefs['lookforurlinhtml'] = False
|
||||
default_prefs['checkforseriesurlid'] = True
|
||||
default_prefs['auto_reject_seriesurlid'] = False
|
||||
default_prefs['mark_series_anthologies'] = False
|
||||
default_prefs['checkforurlchange'] = True
|
||||
default_prefs['injectseries'] = False
|
||||
default_prefs['matchtitleauth'] = True
|
||||
default_prefs['do_wordcount'] = SAVE_YES_UNLESS_SITE
|
||||
default_prefs['smarten_punctuation'] = False
|
||||
default_prefs['show_est_time'] = False
|
||||
|
||||
default_prefs['send_lists'] = ''
|
||||
default_prefs['read_lists'] = ''
|
||||
default_prefs['addtolists'] = False
|
||||
default_prefs['addtoreadlists'] = False
|
||||
default_prefs['addtolistsonread'] = False
|
||||
default_prefs['autounnew'] = False
|
||||
|
||||
default_prefs['updatecalcover'] = SAVE_YES_IF_IMG
|
||||
default_prefs['covernewonly'] = False
|
||||
default_prefs['gencalcover'] = SAVE_YES_UNLESS_IMG
|
||||
default_prefs['updatecover'] = False
|
||||
default_prefs['calibre_gen_cover'] = True
|
||||
default_prefs['plugin_gen_cover'] = False
|
||||
default_prefs['gcnewonly'] = True
|
||||
default_prefs['gc_site_settings'] = {}
|
||||
default_prefs['allow_gc_from_ini'] = True
|
||||
default_prefs['gc_polish_cover'] = False
|
||||
|
||||
default_prefs['countpagesstats'] = []
|
||||
default_prefs['wordcountmissing'] = False
|
||||
|
||||
default_prefs['errorcol'] = ''
|
||||
default_prefs['save_all_errors'] = True
|
||||
default_prefs['savemetacol'] = ''
|
||||
default_prefs['lastcheckedcol'] = ''
|
||||
default_prefs['custom_cols'] = {}
|
||||
default_prefs['custom_cols_newonly'] = {}
|
||||
default_prefs['allow_custcol_from_ini'] = True
|
||||
|
||||
default_prefs['std_cols_newonly'] = {}
|
||||
default_prefs['set_author_url'] = True
|
||||
default_prefs['set_series_url'] = True
|
||||
default_prefs['includecomments'] = False
|
||||
default_prefs['anth_comments_newonly'] = True
|
||||
|
||||
default_prefs['imapserver'] = ''
|
||||
default_prefs['imapuser'] = ''
|
||||
default_prefs['imappass'] = ''
|
||||
default_prefs['imapsessionpass'] = False
|
||||
default_prefs['imapfolder'] = 'INBOX'
|
||||
default_prefs['imaptags'] = ''
|
||||
default_prefs['imapmarkread'] = True
|
||||
default_prefs['auto_reject_from_email'] = False
|
||||
default_prefs['update_existing_only_from_email'] = False
|
||||
default_prefs['download_from_email_immediately'] = False
|
||||
|
||||
|
||||
#default_prefs['single_proc_jobs'] = True # setting and code removed
|
||||
default_prefs['site_split_jobs'] = True
|
||||
default_prefs['reconsolidate_jobs'] = True
|
||||
|
||||
def set_library_config(library_config,db,setting=PREFS_KEY_SETTINGS):
|
||||
db.prefs.set_namespaced(PREFS_NAMESPACE,
|
||||
setting,
|
||||
library_config)
|
||||
|
||||
def get_library_config(db,setting=PREFS_KEY_SETTINGS,def_prefs=default_prefs):
|
||||
library_id = get_library_uuid(db)
|
||||
library_config = None
|
||||
|
||||
if library_config is None:
|
||||
#print("get prefs from db")
|
||||
library_config = db.prefs.get_namespaced(PREFS_NAMESPACE,
|
||||
setting)
|
||||
|
||||
if library_config is None:
|
||||
# defaults.
|
||||
logger.info("Using default settings")
|
||||
library_config = copy.deepcopy(def_prefs)
|
||||
|
||||
return library_config
|
||||
|
||||
# fake out so I don't have to change the prefs calls anywhere. The
|
||||
# Java programmer in me is offended by op-overloading, but it's very
|
||||
# tidy.
|
||||
class PrefsFacade():
|
||||
def _get_db(self):
|
||||
if self.passed_db:
|
||||
return self.passed_db
|
||||
else:
|
||||
# In the GUI plugin we want current db so we detect when
|
||||
# it's changed. CLI plugin calls need to pass db in.
|
||||
return get_gui().current_db
|
||||
|
||||
def __init__(self,passed_db=None,setting=PREFS_KEY_SETTINGS,def_prefs=default_prefs):
|
||||
self.default_prefs = def_prefs
|
||||
self.setting=setting
|
||||
self.libraryid = None
|
||||
self.current_prefs = None
|
||||
self.passed_db=passed_db
|
||||
|
||||
def _get_prefs(self):
|
||||
libraryid = get_library_uuid(self._get_db())
|
||||
if self.current_prefs == None or self.libraryid != libraryid:
|
||||
#print("self.current_prefs == None(%s) or self.libraryid != libraryid(%s)"%(self.current_prefs == None,self.libraryid != libraryid))
|
||||
self.libraryid = libraryid
|
||||
self.current_prefs = get_library_config(self._get_db(),
|
||||
setting=self.setting,
|
||||
def_prefs=self.default_prefs)
|
||||
return self.current_prefs
|
||||
|
||||
def __getitem__(self,k):
|
||||
prefs = self._get_prefs()
|
||||
if k not in prefs:
|
||||
# pulls from default_prefs.defaults automatically if not set
|
||||
# in default_prefs
|
||||
return self.default_prefs[k]
|
||||
return prefs[k]
|
||||
|
||||
def __setitem__(self,k,v):
|
||||
prefs = self._get_prefs()
|
||||
prefs[k]=v
|
||||
# self._save_prefs(prefs)
|
||||
|
||||
def __delitem__(self,k):
|
||||
prefs = self._get_prefs()
|
||||
if k in prefs:
|
||||
del prefs[k]
|
||||
|
||||
def save_to_db(self):
|
||||
self['last_saved_version'] = plugin_version
|
||||
set_library_config(self._get_prefs(),self._get_db(),setting=self.setting)
|
||||
|
||||
prefs = PrefsFacade(setting=PREFS_KEY_SETTINGS,
|
||||
def_prefs=default_prefs)
|
||||
|
||||
rejects_data = PrefsFacade(setting="rejects_data",
|
||||
def_prefs={'rejecturls_data':[]})
|
||||
|
|
|
|||
2613
calibre-plugin/translations/ar.po
Normal file
2613
calibre-plugin/translations/ar.po
Normal file
File diff suppressed because it is too large
Load diff
2615
calibre-plugin/translations/ca.po
Normal file
2615
calibre-plugin/translations/ca.po
Normal file
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
2613
calibre-plugin/translations/ja.po
Normal file
2613
calibre-plugin/translations/ja.po
Normal file
File diff suppressed because it is too large
Load diff
2613
calibre-plugin/translations/ko.po
Normal file
2613
calibre-plugin/translations/ko.po
Normal file
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
2612
calibre-plugin/translations/mr.po
Normal file
2612
calibre-plugin/translations/mr.po
Normal file
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
2616
calibre-plugin/translations/pl.po
Normal file
2616
calibre-plugin/translations/pl.po
Normal file
File diff suppressed because it is too large
Load diff
2615
calibre-plugin/translations/pt.po
Normal file
2615
calibre-plugin/translations/pt.po
Normal file
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
2616
calibre-plugin/translations/ru.po
Normal file
2616
calibre-plugin/translations/ru.po
Normal file
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
2612
calibre-plugin/translations/ta.po
Normal file
2612
calibre-plugin/translations/ta.po
Normal file
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
|
|
@ -18,6 +18,7 @@ logger = logging.getLogger(__name__)
|
|||
import re
|
||||
|
||||
from calibre.ebooks.oeb.iterator import EbookIterator
|
||||
from fanficfare.six import text_type as unicode
|
||||
|
||||
RE_HTML_BODY = re.compile(u'<body[^>]*>(.*)</body>', re.UNICODE | re.DOTALL | re.IGNORECASE)
|
||||
RE_STRIP_MARKUP = re.compile(u'<[^>]+>', re.UNICODE)
|
||||
|
|
@ -28,7 +29,7 @@ def get_word_count(book_path):
|
|||
Estimate a word count
|
||||
'''
|
||||
from calibre.utils.localization import get_lang
|
||||
|
||||
|
||||
iterator = _open_epub_file(book_path)
|
||||
|
||||
lang = iterator.opf.language
|
||||
|
|
@ -52,7 +53,7 @@ def _get_epub_standard_word_count(iterator, lang='en'):
|
|||
'''
|
||||
|
||||
book_text = _read_epub_contents(iterator, strip_html=True)
|
||||
|
||||
|
||||
try:
|
||||
from calibre.spell.break_iterator import count_words
|
||||
wordcount = count_words(book_text, lang)
|
||||
|
|
@ -67,7 +68,7 @@ def _get_epub_standard_word_count(iterator, lang='en'):
|
|||
wordcount = get_wordcount_obj(book_text)
|
||||
wordcount = wordcount.words
|
||||
logger.debug('\tWord count - old method:%s'%wordcount)
|
||||
|
||||
|
||||
return wordcount
|
||||
|
||||
def _read_epub_contents(iterator, strip_html=False):
|
||||
|
|
@ -92,4 +93,3 @@ def _extract_body_text(data):
|
|||
if body:
|
||||
return RE_STRIP_MARKUP.sub('', body[0]).replace('.','. ')
|
||||
return ''
|
||||
|
||||
|
|
|
|||
|
|
@ -1,7 +1,23 @@
|
|||
# coding: utf-8
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2018 FanFicFare team
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
from __future__ import absolute_import
|
||||
|
||||
import re
|
||||
import codecs
|
||||
|
||||
stack = []
|
||||
|
||||
|
|
@ -54,4 +70,4 @@ def flush():
|
|||
del stack[:]
|
||||
|
||||
def get_stack():
|
||||
return stack
|
||||
return stack
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2015 Fanficdownloader team, 2016 FanFicFare team
|
||||
# Copyright 2015 Fanficdownloader team, 2018 FanFicFare team
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the 'License');
|
||||
# you may not use this file except in compliance with the License.
|
||||
|
|
@ -14,20 +14,23 @@
|
|||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
from __future__ import absolute_import
|
||||
|
||||
try:
|
||||
# just a way to switch between web service and CLI/PI
|
||||
import google.appengine.api
|
||||
try: # just a way to switch between CLI and PI
|
||||
from calibre.constants import DEBUG
|
||||
if os.environ.get('CALIBRE_WORKER', None) is not None or DEBUG:
|
||||
loghandler.setLevel(logging.DEBUG)
|
||||
logger.setLevel(logging.DEBUG)
|
||||
else:
|
||||
loghandler.setLevel(logging.CRITICAL)
|
||||
logger.setLevel(logging.CRITICAL)
|
||||
except:
|
||||
try: # just a way to switch between CLI and PI
|
||||
import calibre.constants
|
||||
except:
|
||||
import sys
|
||||
if sys.version_info >= (2, 7):
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
loghandler=logging.StreamHandler()
|
||||
loghandler.setFormatter(logging.Formatter("FFF: %(levelname)s: %(asctime)s: %(filename)s(%(lineno)d): %(message)s"))
|
||||
logger.addHandler(loghandler)
|
||||
loghandler.setLevel(logging.DEBUG)
|
||||
logger.setLevel(logging.DEBUG)
|
||||
import sys
|
||||
if sys.version_info >= (2, 7):
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
loghandler=logging.StreamHandler()
|
||||
loghandler.setFormatter(logging.Formatter("FFF: %(levelname)s: %(asctime)s: %(filename)s(%(lineno)d): %(message)s"))
|
||||
logger.addHandler(loghandler)
|
||||
loghandler.setLevel(logging.DEBUG)
|
||||
logger.setLevel(logging.DEBUG)
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2011 Fanficdownloader team, 2016 FanFicFare team
|
||||
# Copyright 2011 Fanficdownloader team, 2020 FanFicFare team
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
|
|
@ -15,159 +15,132 @@
|
|||
# limitations under the License.
|
||||
#
|
||||
|
||||
import os, re, sys, glob, types
|
||||
from os.path import dirname, basename, normpath
|
||||
from __future__ import absolute_import
|
||||
import os, re, sys, types
|
||||
from contextlib import contextmanager
|
||||
import logging
|
||||
import urlparse as up
|
||||
|
||||
# py2 vs py3 transition
|
||||
from ..six.moves.urllib.parse import urlparse
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
from .. import exceptions as exceptions
|
||||
from ..configurable import Configuration
|
||||
from .. import configurable as configurable
|
||||
|
||||
## must import each adapter here.
|
||||
|
||||
import adapter_test1
|
||||
import adapter_fanfictionnet
|
||||
import adapter_fanficcastletvnet
|
||||
import adapter_fictionalleyorg
|
||||
import adapter_fictionpresscom
|
||||
import adapter_ficwadcom
|
||||
import adapter_fimfictionnet
|
||||
import adapter_harrypotterfanfictioncom
|
||||
import adapter_mediaminerorg
|
||||
import adapter_potionsandsnitches
|
||||
import adapter_tenhawkpresentscom
|
||||
import adapter_adastrafanficcom
|
||||
import adapter_tthfanficorg
|
||||
import adapter_twilightednet
|
||||
import adapter_whoficcom
|
||||
import adapter_siyecouk
|
||||
import adapter_archiveofourownorg
|
||||
import adapter_ficbooknet
|
||||
import adapter_portkeyorg
|
||||
import adapter_mugglenetcom
|
||||
import adapter_hpfandomnet
|
||||
import adapter_nfacommunitycom
|
||||
import adapter_midnightwhispersca
|
||||
import adapter_ksarchivecom
|
||||
import adapter_archiveskyehawkecom
|
||||
import adapter_squidgeorgpeja
|
||||
import adapter_libraryofmoriacom
|
||||
import adapter_wraithbaitcom
|
||||
import adapter_chaossycophanthexcom
|
||||
import adapter_dramioneorg
|
||||
import adapter_erosnsapphosycophanthexcom
|
||||
import adapter_lumossycophanthexcom
|
||||
import adapter_occlumencysycophanthexcom
|
||||
import adapter_phoenixsongnet
|
||||
import adapter_walkingtheplankorg
|
||||
import adapter_ashwindersycophanthexcom
|
||||
import adapter_thehexfilesnet
|
||||
import adapter_dokugacom
|
||||
import adapter_iketernalnet
|
||||
import adapter_onedirectionfanfictioncom
|
||||
import adapter_storiesofardacom
|
||||
import adapter_samdeanarchivenu
|
||||
import adapter_destinysgatewaycom
|
||||
import adapter_ncisfictionnet
|
||||
import adapter_thealphagatecom
|
||||
import adapter_fanfiktionde
|
||||
import adapter_ponyfictionarchivenet
|
||||
import adapter_ncisficcom
|
||||
import adapter_nationallibrarynet
|
||||
import adapter_themasquenet
|
||||
import adapter_pretendercentrecom
|
||||
import adapter_darksolaceorg
|
||||
import adapter_finestoriescom
|
||||
import adapter_hpfanficarchivecom
|
||||
import adapter_twilightarchivescom
|
||||
import adapter_nhamagicalworldsus
|
||||
import adapter_hlfictionnet
|
||||
import adapter_dracoandginnycom
|
||||
import adapter_scarvesandcoffeenet
|
||||
import adapter_thepetulantpoetesscom
|
||||
import adapter_wolverineandroguecom
|
||||
import adapter_sinfuldesireorg
|
||||
import adapter_merlinficdtwinscouk
|
||||
import adapter_thehookupzonenet
|
||||
import adapter_bloodtiesfancom
|
||||
import adapter_indeathnet
|
||||
import adapter_qafficcom
|
||||
import adapter_efpfanficnet
|
||||
import adapter_potterficscom
|
||||
import adapter_efictionestelielde
|
||||
import adapter_pommedesangcom
|
||||
import adapter_restrictedsectionorg
|
||||
import adapter_imagineeficcom
|
||||
import adapter_psychficcom
|
||||
import adapter_asr3slashzoneorg
|
||||
import adapter_potterheadsanonymouscom
|
||||
import adapter_fictionpadcom
|
||||
import adapter_storiesonlinenet
|
||||
import adapter_trekiverseorg
|
||||
import adapter_literotica
|
||||
import adapter_voracity2eficcom
|
||||
import adapter_spikeluvercom
|
||||
import adapter_bloodshedversecom
|
||||
import adapter_nocturnallightnet
|
||||
import adapter_fanfichu
|
||||
import adapter_fictionmaniatv
|
||||
import adapter_tolkienfanfiction
|
||||
import adapter_themaplebookshelf
|
||||
import adapter_fannation
|
||||
import adapter_sheppardweircom
|
||||
import adapter_samandjacknet
|
||||
import adapter_csiforensicscom
|
||||
import adapter_lotrfanfictioncom
|
||||
import adapter_fhsarchivecom
|
||||
import adapter_fanfictionjunkiesde
|
||||
import adapter_tgstorytimecom
|
||||
import adapter_itcouldhappennet
|
||||
import adapter_forumsspacebattlescom
|
||||
import adapter_forumssufficientvelocitycom
|
||||
import adapter_forumquestionablequestingcom
|
||||
import adapter_ninelivesarchivecom
|
||||
import adapter_masseffect2in
|
||||
import adapter_quotevcom
|
||||
import adapter_mcstoriescom
|
||||
import adapter_buffygilescom
|
||||
import adapter_andromedawebcom
|
||||
import adapter_artemisfowlcom
|
||||
import adapter_naiceanilmenet
|
||||
import adapter_deepinmysoulnet
|
||||
import adapter_haremlucifaelcom
|
||||
import adapter_kiarepositorymujajinet
|
||||
import adapter_fanfictionlucifaelcom
|
||||
import adapter_adultfanfictionorg
|
||||
import adapter_fictionhuntcom
|
||||
import adapter_royalroadl
|
||||
import adapter_chosentwofanficcom
|
||||
import adapter_bdsmlibrarycom
|
||||
import adapter_ficsitecom
|
||||
import adapter_asexstoriescom
|
||||
import adapter_gluttonyfictioncom
|
||||
import adapter_valentchambercom
|
||||
import adapter_looselugscom
|
||||
import adapter_wwwgiantessworldnet
|
||||
import adapter_lotrgficcom
|
||||
import adapter_tomparisdormcom
|
||||
import adapter_writingwhimsicalwanderingsnet
|
||||
import adapter_sugarquillnet
|
||||
import adapter_wwwarea52hkhnet
|
||||
import adapter_starslibrarynet
|
||||
import adapter_fanficauthorsnet
|
||||
import adapter_fireflyfansnet
|
||||
import adapter_fireflypopulliorg
|
||||
import adapter_sebklainenet
|
||||
import adapter_shriftweborgbfa
|
||||
import adapter_trekfanfictionnet
|
||||
import adapter_wuxiaworldcom
|
||||
import adapter_wwwlushstoriescom
|
||||
import adapter_wwwutopiastoriescom
|
||||
import adapter_sinfuldreamscomunicornfic
|
||||
import adapter_sinfuldreamscomwhisperedmuse
|
||||
import adapter_sinfuldreamscomwickedtemptation
|
||||
from . import base_adapter
|
||||
from . import base_efiction_adapter
|
||||
from . import adapter_test1
|
||||
from . import adapter_test2
|
||||
from . import adapter_test3
|
||||
from . import adapter_test4
|
||||
from . import adapter_fanfictionnet
|
||||
from . import adapter_fictionalleyarchiveorg
|
||||
from . import adapter_fictionpresscom
|
||||
from . import adapter_ficwadcom
|
||||
from . import adapter_fimfictionnet
|
||||
from . import adapter_mediaminerorg
|
||||
from . import adapter_potionsandsnitches
|
||||
from . import adapter_tenhawkpresents
|
||||
from . import adapter_adastrafanficcom
|
||||
from . import adapter_tthfanficorg
|
||||
from . import adapter_twilightednet
|
||||
from . import adapter_whoficcom
|
||||
from . import adapter_siyecouk
|
||||
from . import adapter_archiveofourownorg
|
||||
from . import adapter_ficbooknet
|
||||
from . import adapter_midnightwhispers
|
||||
from . import adapter_ksarchivecom
|
||||
from . import adapter_libraryofmoriacom
|
||||
from . import adapter_ashwindersycophanthexcom
|
||||
from . import adapter_chaossycophanthexcom
|
||||
from . import adapter_erosnsapphosycophanthexcom
|
||||
from . import adapter_lumossycophanthexcom
|
||||
from . import adapter_occlumencysycophanthexcom
|
||||
from . import adapter_phoenixsongnet
|
||||
from . import adapter_walkingtheplankorg
|
||||
from . import adapter_dokugacom
|
||||
from . import adapter_storiesofardacom
|
||||
from . import adapter_ncisfictioncom
|
||||
from . import adapter_fanfiktionde
|
||||
from . import adapter_themasquenet
|
||||
from . import adapter_pretendercentrecom
|
||||
from . import adapter_darksolaceorg
|
||||
from . import adapter_storyroomcom
|
||||
from . import adapter_dracoandginnycom
|
||||
from . import adapter_wolverineandroguecom
|
||||
from . import adapter_thehookupzonenet
|
||||
from . import adapter_efpfanficnet
|
||||
from . import adapter_imagineeficcom
|
||||
from . import adapter_storiesonlinenet
|
||||
from . import adapter_literotica
|
||||
from . import adapter_voracity2eficcom
|
||||
from . import adapter_spikeluvercom
|
||||
from . import adapter_bloodshedversecom
|
||||
from . import adapter_fictionmaniatv
|
||||
from . import adapter_sheppardweircom
|
||||
from . import adapter_samandjacknet
|
||||
from . import adapter_tgstorytimecom
|
||||
from . import adapter_forumsspacebattlescom
|
||||
from . import adapter_forumssufficientvelocitycom
|
||||
from . import adapter_forumquestionablequestingcom
|
||||
from . import adapter_ninelivesarchivecom
|
||||
from . import adapter_masseffect2in
|
||||
from . import adapter_quotevcom
|
||||
from . import adapter_mcstoriescom
|
||||
from . import adapter_naiceanilmenet
|
||||
from . import adapter_adultfanfictionorg
|
||||
from . import adapter_fictionhuntcom
|
||||
from . import adapter_royalroadcom
|
||||
from . import adapter_chosentwofanficcom
|
||||
from . import adapter_bdsmlibrarycom
|
||||
from . import adapter_asexstoriescom
|
||||
from . import adapter_gluttonyfictioncom
|
||||
from . import adapter_valentchambercom
|
||||
from . import adapter_wwwgiantessworldnet
|
||||
from . import adapter_starslibrarynet
|
||||
from . import adapter_fanficauthorsnet
|
||||
from . import adapter_fireflyfansnet
|
||||
from . import adapter_trekfanfictionnet
|
||||
from . import adapter_wwwutopiastoriescom
|
||||
from . import adapter_sinfuldreamscomunicornfic
|
||||
from . import adapter_sinfuldreamscomwickedtemptation
|
||||
from . import adapter_asianfanficscom
|
||||
from . import adapter_mttjustoncenet
|
||||
from . import adapter_narutoficorg
|
||||
from . import adapter_thedelphicexpansecom
|
||||
from . import adapter_wwwaneroticstorycom
|
||||
from . import adapter_lcfanficcom
|
||||
from . import adapter_inkbunnynet
|
||||
from . import adapter_alternatehistorycom
|
||||
from . import adapter_wattpadcom
|
||||
from . import adapter_novelonlinefullcom
|
||||
from . import adapter_wwwnovelallcom
|
||||
from . import adapter_hentaifoundrycom
|
||||
from . import adapter_mugglenetfanfictioncom
|
||||
from . import adapter_fanficsme
|
||||
from . import adapter_fanfictalkcom
|
||||
from . import adapter_scifistoriescom
|
||||
from . import adapter_chireadscom
|
||||
from . import adapter_scribblehubcom
|
||||
from . import adapter_fictionlive
|
||||
from . import adapter_thesietchcom
|
||||
from . import adapter_squidgeworldorg
|
||||
from . import adapter_novelfull
|
||||
from . import adapter_psychficcom
|
||||
from . import adapter_deviantartcom
|
||||
from . import adapter_readonlymindcom
|
||||
from . import adapter_wwwsunnydaleafterdarkcom
|
||||
from . import adapter_syosetucom
|
||||
from . import adapter_kakuyomujp
|
||||
from . import adapter_fanfictionsfr
|
||||
from . import adapter_touchfluffytail
|
||||
from . import adapter_spiritfanfictioncom
|
||||
from . import adapter_superlove
|
||||
from . import adapter_cfaa
|
||||
from . import adapter_althistorycom
|
||||
|
||||
## This bit of complexity allows adapters to be added by just adding
|
||||
## importing. It eliminates the long if/else clauses we used to need
|
||||
|
|
@ -178,9 +151,11 @@ __class_list = []
|
|||
__domain_map = {}
|
||||
|
||||
def imports():
|
||||
out = []
|
||||
for name, val in globals().items():
|
||||
if isinstance(val, types.ModuleType):
|
||||
yield val.__name__
|
||||
out.append(val.__name__)
|
||||
return out
|
||||
|
||||
for x in imports():
|
||||
if "fanficfare.adapters.adapter_" in x:
|
||||
|
|
@ -192,6 +167,32 @@ for x in imports():
|
|||
l.append(cls)
|
||||
__domain_map[site]=l
|
||||
|
||||
def get_url_chapter_range(url_in):
|
||||
# Allow chapter range with URL.
|
||||
# like test1.com?sid=5[4-6] or [4,6]
|
||||
mc = re.match(r"^(?P<url>.*?)(?:\[(?P<begin>\d+)?(?P<comma>[,-])?(?P<end>\d+)?\])?$",url_in)
|
||||
#print("url:(%s) begin:(%s) end:(%s)"%(mc.group('url'),mc.group('begin'),mc.group('end')))
|
||||
url = mc.group('url')
|
||||
ch_begin = mc.group('begin')
|
||||
ch_end = mc.group('end')
|
||||
if ch_begin and not mc.group('comma'):
|
||||
ch_end = ch_begin
|
||||
return url,ch_begin,ch_end
|
||||
|
||||
# Call as ' with busy_cursor:"
|
||||
@contextmanager
|
||||
def lightweight_adapter(url):
|
||||
adapter = None
|
||||
try:
|
||||
if not getNormalStoryURL.__dummyconfig:
|
||||
getNormalStoryURL.__dummyconfig = configurable.Configuration(["test1.com"],"EPUB",lightweight=True)
|
||||
adapter = getAdapter(getNormalStoryURL.__dummyconfig,url)
|
||||
yield adapter
|
||||
except:
|
||||
yield None
|
||||
finally:
|
||||
del adapter
|
||||
|
||||
def getNormalStoryURL(url):
|
||||
r = getNormalStoryURLSite(url)
|
||||
if r:
|
||||
|
|
@ -199,24 +200,45 @@ def getNormalStoryURL(url):
|
|||
else:
|
||||
return None
|
||||
|
||||
def getNormalStoryURLSite(url):
|
||||
# print("getNormalStoryURLSite:%s"%url)
|
||||
if not getNormalStoryURL.__dummyconfig:
|
||||
getNormalStoryURL.__dummyconfig = Configuration(["test1.com"],"EPUB",lightweight=True)
|
||||
# pulling up an adapter is pretty low over-head. If
|
||||
# it fails, it's a bad url.
|
||||
try:
|
||||
adapter = getAdapter(getNormalStoryURL.__dummyconfig,url)
|
||||
url = adapter.url
|
||||
site = adapter.getSiteDomain()
|
||||
del adapter
|
||||
return (url,site)
|
||||
except:
|
||||
return None
|
||||
|
||||
# kludgey function static/singleton
|
||||
# Note it's *not* on lightweight_adapter because it can't reference
|
||||
# itself in its definition.
|
||||
getNormalStoryURL.__dummyconfig = None
|
||||
|
||||
def getNormalStoryURLSite(url):
|
||||
with lightweight_adapter(url) as adapter:
|
||||
if adapter:
|
||||
return (adapter.url,adapter.getSiteDomain())
|
||||
else:
|
||||
return None
|
||||
|
||||
## Originally defined for INI [storyUrl] sections where story URL
|
||||
## contains a title that can change, now also used for reject list.
|
||||
## waaaay faster with classmethod.
|
||||
def get_section_url(url):
|
||||
cls = _get_class_for(url)[0]
|
||||
if cls:
|
||||
return cls.get_section_url(url)
|
||||
else:
|
||||
## might be a url from a removed adapter.
|
||||
## return unchanged in that case.
|
||||
return url
|
||||
|
||||
def get_url_search(url):
|
||||
'''
|
||||
For adapters that have story URLs that can change. This is
|
||||
used for searching the Calibre library by identifiers:url for
|
||||
sites (generally) that contain author or title that can
|
||||
change, but also have a unique identifier that doesn't.
|
||||
|
||||
returns a string containing a regexp, not a compiled re object.
|
||||
'''
|
||||
cls = _get_class_for(url)[0]
|
||||
if not cls:
|
||||
## still apply common processing.
|
||||
cls = base_adapter.BaseSiteAdapter
|
||||
return cls.get_url_search(url)
|
||||
|
||||
def getAdapter(config,url,anyurl=False):
|
||||
|
||||
#logger.debug("trying url:"+url)
|
||||
|
|
@ -244,8 +266,7 @@ def getConfigSections():
|
|||
def get_bulk_load_sites():
|
||||
# for now, all eFiction Base adapters are assumed to allow bulk_load.
|
||||
sections = set()
|
||||
for cls in filter( lambda x : issubclass(x,base_efiction_adapter.BaseEfictionAdapter),
|
||||
__class_list):
|
||||
for cls in [x for x in __class_list if issubclass(x,base_efiction_adapter.BaseEfictionAdapter) ]:
|
||||
sections.update( [ x.replace('www.','') for x in cls.getConfigSections() ] )
|
||||
return sections
|
||||
|
||||
|
|
@ -270,13 +291,13 @@ def _get_class_for(url):
|
|||
fixedurl = "http:%s"%url
|
||||
if not fixedurl.startswith("http"):
|
||||
fixedurl = "http://%s"%url
|
||||
|
||||
|
||||
## remove any trailing '#' locations, except for #post-12345 for
|
||||
## XenForo
|
||||
if not "#post-" in fixedurl:
|
||||
fixedurl = re.sub(r"#.*$","",fixedurl)
|
||||
|
||||
parsedUrl = up.urlparse(fixedurl)
|
||||
parsedUrl = urlparse(fixedurl)
|
||||
domain = parsedUrl.netloc.lower()
|
||||
if( domain != parsedUrl.netloc ):
|
||||
fixedurl = fixedurl.replace(parsedUrl.netloc,domain)
|
||||
|
|
@ -295,14 +316,15 @@ def _get_class_for(url):
|
|||
fixedurl = re.sub(r"^http(s?)://",r"http\1://www.",fixedurl)
|
||||
|
||||
cls = None
|
||||
if len(clslst) == 1:
|
||||
cls = clslst[0]
|
||||
elif len(clslst) > 1:
|
||||
for c in clslst:
|
||||
if c.getSiteURLFragment() in fixedurl:
|
||||
cls = c
|
||||
break
|
||||
|
||||
if clslst:
|
||||
if len(clslst) == 1:
|
||||
cls = clslst[0]
|
||||
elif len(clslst) > 1:
|
||||
for c in clslst:
|
||||
if c.getSiteURLFragment() in fixedurl:
|
||||
cls = c
|
||||
break
|
||||
|
||||
if cls:
|
||||
fixedurl = cls.stripURLParameters(fixedurl)
|
||||
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2011 Fanficdownloader team, 2015 FanFicFare team
|
||||
# Copyright 2011 Fanficdownloader team, 2018 FanFicFare team
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
|
|
@ -15,222 +15,24 @@
|
|||
# limitations under the License.
|
||||
#
|
||||
|
||||
# Software: eFiction
|
||||
import time
|
||||
from __future__ import absolute_import
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import re
|
||||
import urllib
|
||||
import urllib2
|
||||
|
||||
from ..htmlcleanup import stripHTML
|
||||
from .. import exceptions as exceptions
|
||||
|
||||
from base_adapter import BaseSiteAdapter, makeDate
|
||||
|
||||
class AdAstraFanficComSiteAdapter(BaseSiteAdapter):
|
||||
|
||||
def __init__(self, config, url):
|
||||
BaseSiteAdapter.__init__(self, config, url)
|
||||
self.story.setMetadata('siteabbrev','aaff')
|
||||
self.decode = ["Windows-1252",
|
||||
"utf8"] # 1252 is a superset of iso-8859-1.
|
||||
# Most sites that claim to be
|
||||
# iso-8859-1 (and some that claim to be
|
||||
# utf8) are really windows-1252.
|
||||
self.is_adult=False
|
||||
|
||||
# get storyId from url--url validation guarantees query is only sid=1234
|
||||
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
|
||||
|
||||
|
||||
# normalized story URL.
|
||||
self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId'))
|
||||
|
||||
|
||||
@staticmethod
|
||||
def getSiteDomain():
|
||||
return 'www.adastrafanfic.com'
|
||||
|
||||
@classmethod
|
||||
def getSiteExampleURLs(cls):
|
||||
return "http://"+cls.getSiteDomain()+"/viewstory.php?sid=1234"
|
||||
|
||||
def getSiteURLPattern(self):
|
||||
return re.escape("http://"+self.getSiteDomain()+"/viewstory.php?sid=")+r"\d+$"
|
||||
|
||||
def use_pagecache(self):
|
||||
'''
|
||||
adapters that will work with the page cache need to implement
|
||||
this and change it to True.
|
||||
'''
|
||||
return True
|
||||
|
||||
def extractChapterUrlsAndMetadata(self):
|
||||
|
||||
if self.is_adult or self.getConfig("is_adult"):
|
||||
addurl = "&warning=5"
|
||||
else:
|
||||
addurl=""
|
||||
|
||||
url = self.url+'&index=1'+addurl
|
||||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
except urllib2.HTTPError, e:
|
||||
if e.code == 404:
|
||||
raise exceptions.StoryDoesNotExist(self.url)
|
||||
else:
|
||||
raise e
|
||||
|
||||
if "Content is only suitable for mature adults. May contain explicit language and adult themes. Equivalent of NC-17." in data:
|
||||
raise exceptions.AdultCheckRequired(self.url)
|
||||
|
||||
# problems with some stories, but only in calibre. I suspect
|
||||
# issues with different SGML parsers in python. This is a
|
||||
# nasty hack, but it works.
|
||||
data = data[data.index("<body"):]
|
||||
|
||||
# use BeautifulSoup HTML parser to make everything easier to find.
|
||||
soup = self.make_soup(data)
|
||||
|
||||
## Title
|
||||
a = soup.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$"))
|
||||
self.story.setMetadata('title',stripHTML(a))
|
||||
|
||||
# Find authorid and URL from... author url.
|
||||
a = soup.find('a', href=re.compile(r"viewuser.php"))
|
||||
self.story.setMetadata('authorId',a['href'].split('=')[1])
|
||||
self.story.setMetadata('authorUrl','http://'+self.host+'/'+a['href'])
|
||||
self.story.setMetadata('author',a.string)
|
||||
|
||||
# Find the chapters:
|
||||
for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"&chapter=\d+$")):
|
||||
# just in case there's tags, like <i> in chapter titles.
|
||||
self.chapterUrls.append((stripHTML(chapter),'http://'+self.host+'/'+chapter['href']+addurl))
|
||||
|
||||
self.story.setMetadata('numChapters',len(self.chapterUrls))
|
||||
|
||||
## <meta name='description' content='<p>Description</p> ...' >
|
||||
## Summary, strangely, is in the content attr of a <meta name='description'> tag
|
||||
## which is escaped HTML. Unfortunately, we can't use it because they don't
|
||||
## escape (') chars in the desc, breakin the tag.
|
||||
#meta_desc = soup.find('meta',{'name':'description'})
|
||||
#metasoup = bs.BeautifulStoneSoup(meta_desc['content'])
|
||||
#self.story.setMetadata('description',stripHTML(metasoup))
|
||||
|
||||
def defaultGetattr(d,k):
|
||||
try:
|
||||
return d[k]
|
||||
except:
|
||||
return ""
|
||||
|
||||
# <span class="label">Rated:</span> NC-17<br /> etc
|
||||
labels = soup.findAll('span',{'class':'label'})
|
||||
for labelspan in labels:
|
||||
value = labelspan.nextSibling
|
||||
label = labelspan.string
|
||||
|
||||
if 'Summary' in label:
|
||||
## Everything until the next span class='label'
|
||||
svalue = ''
|
||||
while value and 'label' not in defaultGetattr(value,'class'):
|
||||
svalue += unicode(value)
|
||||
value = value.nextSibling
|
||||
# sometimes poorly formated desc (<p> w/o </p>) leads
|
||||
# to all labels being included.
|
||||
svalue=svalue[:svalue.find('<span class="label">')]
|
||||
self.setDescription(url,svalue)
|
||||
#self.story.setMetadata('description',stripHTML(svalue))
|
||||
|
||||
if 'Rated' in label:
|
||||
self.story.setMetadata('rating', value)
|
||||
|
||||
if 'Word count' in label:
|
||||
self.story.setMetadata('numWords', value)
|
||||
|
||||
if 'Categories' in label:
|
||||
cats = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=categories'))
|
||||
catstext = [cat.string for cat in cats]
|
||||
for cat in catstext:
|
||||
self.story.addToList('category',cat.string)
|
||||
|
||||
if 'Characters' in label:
|
||||
chars = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=characters'))
|
||||
charstext = [char.string for char in chars]
|
||||
for char in charstext:
|
||||
self.story.addToList('characters',char.string)
|
||||
|
||||
if 'Genre' in label:
|
||||
genres = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=1'))
|
||||
genrestext = [genre.string for genre in genres]
|
||||
self.genre = ', '.join(genrestext)
|
||||
for genre in genrestext:
|
||||
self.story.addToList('genre',genre.string)
|
||||
|
||||
if 'Warnings' in label:
|
||||
warnings = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=2'))
|
||||
warningstext = [warning.string for warning in warnings]
|
||||
self.warning = ', '.join(warningstext)
|
||||
for warning in warningstext:
|
||||
self.story.addToList('warnings',warning.string)
|
||||
|
||||
if 'Completed' in label:
|
||||
if 'Yes' in value:
|
||||
self.story.setMetadata('status', 'Completed')
|
||||
else:
|
||||
self.story.setMetadata('status', 'In-Progress')
|
||||
|
||||
if 'Published' in label:
|
||||
self.story.setMetadata('datePublished', makeDate(value.strip(), "%d %b %Y"))
|
||||
|
||||
if 'Updated' in label:
|
||||
# there's a stray [ at the end.
|
||||
#value = value[0:-1]
|
||||
self.story.setMetadata('dateUpdated', makeDate(value.strip(), "%d %b %Y"))
|
||||
|
||||
try:
|
||||
# Find Series name from series URL.
|
||||
a = soup.find('a', href=re.compile(r"viewseries.php\?seriesid=\d+"))
|
||||
series_name = a.string
|
||||
series_url = 'http://'+self.host+'/'+a['href']
|
||||
|
||||
# use BeautifulSoup HTML parser to make everything easier to find.
|
||||
seriessoup = self_make_soup(self._fetchUrl(series_url))
|
||||
storyas = seriessoup.findAll('a', href=re.compile(r'^viewstory.php\?sid=\d+$'))
|
||||
i=1
|
||||
for a in storyas:
|
||||
if a['href'] == ('viewstory.php?sid='+self.story.getMetadata('storyId')):
|
||||
self.setSeries(series_name, i)
|
||||
self.story.setMetadata('seriesUrl',series_url)
|
||||
break
|
||||
i+=1
|
||||
|
||||
except:
|
||||
# I find it hard to care if the series parsing fails
|
||||
pass
|
||||
|
||||
|
||||
def getChapterText(self, url):
|
||||
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
data = self._fetchUrl(url)
|
||||
# problems with some stories, but only in calibre. I suspect
|
||||
# issues with different SGML parsers in python. This is a
|
||||
# nasty hack, but it works.
|
||||
data = data[data.index("<body"):]
|
||||
|
||||
soup = self.make_soup(data)
|
||||
|
||||
span = soup.find('div', {'id' : 'story'})
|
||||
|
||||
if None == span:
|
||||
raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url)
|
||||
|
||||
return self.utf8FromSoup(url,span)
|
||||
from .base_otw_adapter import BaseOTWAdapter
|
||||
|
||||
def getClass():
|
||||
return AdAstraFanficComSiteAdapter
|
||||
return AdastrafanficComAdapter
|
||||
|
||||
class AdastrafanficComAdapter(BaseOTWAdapter):
|
||||
|
||||
def __init__(self, config, url):
|
||||
BaseOTWAdapter.__init__(self, config, url)
|
||||
|
||||
# Each adapter needs to have a unique site abbreviation.
|
||||
self.story.setMetadata('siteabbrev','aaff')
|
||||
|
||||
@staticmethod # must be @staticmethod, don't remove it.
|
||||
def getSiteDomain():
|
||||
# The site domain. Does have www here, if it uses it.
|
||||
return 'www.adastrafanfic.com'
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
# -- coding: utf-8 --
|
||||
# Copyright 2013 Fanficdownloader team, 2015 FanFicFare team
|
||||
# Copyright 2013 Fanficdownloader team, 2020 FanFicFare team
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
|
|
@ -17,18 +17,19 @@
|
|||
################################################################################
|
||||
### Written by GComyn
|
||||
################################################################################
|
||||
from __future__ import absolute_import
|
||||
from __future__ import unicode_literals
|
||||
import time
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import re
|
||||
import sys
|
||||
from bs4 import UnicodeDammit
|
||||
|
||||
from ..htmlcleanup import stripHTML
|
||||
from .. import exceptions as exceptions
|
||||
|
||||
from base_adapter import BaseSiteAdapter, makeDate
|
||||
# py2 vs py3 transition
|
||||
from ..six import text_type as unicode
|
||||
|
||||
from .base_adapter import BaseSiteAdapter, makeDate
|
||||
|
||||
################################################################################
|
||||
|
||||
|
|
@ -41,13 +42,7 @@ class AdultFanFictionOrgAdapter(BaseSiteAdapter):
|
|||
|
||||
def __init__(self, config, url):
|
||||
BaseSiteAdapter.__init__(self, config, url)
|
||||
logger.debug("AdultFanFictionOrgAdapter.__init__ - url='{0}'".format(url))
|
||||
|
||||
self.decode = ["utf8",
|
||||
"Windows-1252", "iso-8859-1"] # 1252 is a superset of iso-8859-1.
|
||||
# Most sites that claim to be
|
||||
# iso-8859-1 (and some that claim to be
|
||||
# utf8) are really windows-1252.
|
||||
# logger.debug("AdultFanFictionOrgAdapter.__init__ - url='{0}'".format(url))
|
||||
|
||||
self.username = "NoneGiven" # if left empty, site doesn't return any message at all.
|
||||
self.password = ""
|
||||
|
|
@ -62,8 +57,8 @@ class AdultFanFictionOrgAdapter(BaseSiteAdapter):
|
|||
# normalized story URL.(checking self.zone against list
|
||||
# removed--it was redundant w/getAcceptDomains and
|
||||
# getSiteURLPattern both)
|
||||
self._setURL('http://{0}.{1}/story.php?no={2}'.format(self.zone, self.getBaseDomain(), self.story.getMetadata('storyId')))
|
||||
#self._setURL('http://' + self.zone + '.' + self.getBaseDomain() + '/story.php?no='+self.story.getMetadata('storyId'))
|
||||
self._setURL('https://{0}.{1}/story.php?no={2}'.format(self.zone, self.getBaseDomain(), self.story.getMetadata('storyId')))
|
||||
#self._setURL('https://' + self.zone + '.' + self.getBaseDomain() + '/story.php?no='+self.story.getMetadata('storyId'))
|
||||
|
||||
# Each adapter needs to have a unique site abbreviation.
|
||||
#self.story.setMetadata('siteabbrev',self.getSiteAbbrev())
|
||||
|
|
@ -73,9 +68,7 @@ class AdultFanFictionOrgAdapter(BaseSiteAdapter):
|
|||
|
||||
# The date format will vary from site to site.
|
||||
# http://docs.python.org/library/datetime.html#strftime-strptime-behavior
|
||||
self.dateformat = "%Y-%m-%d"
|
||||
|
||||
|
||||
self.dateformat = "%B %d, %Y"
|
||||
|
||||
## Added because adult-fanfiction.org does send you to
|
||||
## www.adult-fanfiction.org when you go to it and it also moves
|
||||
|
|
@ -118,79 +111,31 @@ class AdultFanFictionOrgAdapter(BaseSiteAdapter):
|
|||
|
||||
@classmethod
|
||||
def getSiteExampleURLs(self):
|
||||
return ("http://anime.adult-fanfiction.org/story.php?no=123456789 "
|
||||
+ "http://anime2.adult-fanfiction.org/story.php?no=123456789 "
|
||||
+ "http://bleach.adult-fanfiction.org/story.php?no=123456789 "
|
||||
+ "http://books.adult-fanfiction.org/story.php?no=123456789 "
|
||||
+ "http://buffy.adult-fanfiction.org/story.php?no=123456789 "
|
||||
+ "http://cartoon.adult-fanfiction.org/story.php?no=123456789 "
|
||||
+ "http://celeb.adult-fanfiction.org/story.php?no=123456789 "
|
||||
+ "http://comics.adult-fanfiction.org/story.php?no=123456789 "
|
||||
+ "http://ff.adult-fanfiction.org/story.php?no=123456789 "
|
||||
+ "http://games.adult-fanfiction.org/story.php?no=123456789 "
|
||||
+ "http://hp.adult-fanfiction.org/story.php?no=123456789 "
|
||||
+ "http://inu.adult-fanfiction.org/story.php?no=123456789 "
|
||||
+ "http://lotr.adult-fanfiction.org/story.php?no=123456789 "
|
||||
+ "http://manga.adult-fanfiction.org/story.php?no=123456789 "
|
||||
+ "http://movies.adult-fanfiction.org/story.php?no=123456789 "
|
||||
+ "http://naruto.adult-fanfiction.org/story.php?no=123456789 "
|
||||
+ "http://ne.adult-fanfiction.org/story.php?no=123456789 "
|
||||
+ "http://original.adult-fanfiction.org/story.php?no=123456789 "
|
||||
+ "http://tv.adult-fanfiction.org/story.php?no=123456789 "
|
||||
+ "http://xmen.adult-fanfiction.org/story.php?no=123456789 "
|
||||
+ "http://ygo.adult-fanfiction.org/story.php?no=123456789 "
|
||||
+ "http://yuyu.adult-fanfiction.org/story.php?no=123456789")
|
||||
return ("https://anime.adult-fanfiction.org/story.php?no=123456789 "
|
||||
+ "https://anime2.adult-fanfiction.org/story.php?no=123456789 "
|
||||
+ "https://bleach.adult-fanfiction.org/story.php?no=123456789 "
|
||||
+ "https://books.adult-fanfiction.org/story.php?no=123456789 "
|
||||
+ "https://buffy.adult-fanfiction.org/story.php?no=123456789 "
|
||||
+ "https://cartoon.adult-fanfiction.org/story.php?no=123456789 "
|
||||
+ "https://celeb.adult-fanfiction.org/story.php?no=123456789 "
|
||||
+ "https://comics.adult-fanfiction.org/story.php?no=123456789 "
|
||||
+ "https://ff.adult-fanfiction.org/story.php?no=123456789 "
|
||||
+ "https://games.adult-fanfiction.org/story.php?no=123456789 "
|
||||
+ "https://hp.adult-fanfiction.org/story.php?no=123456789 "
|
||||
+ "https://inu.adult-fanfiction.org/story.php?no=123456789 "
|
||||
+ "https://lotr.adult-fanfiction.org/story.php?no=123456789 "
|
||||
+ "https://manga.adult-fanfiction.org/story.php?no=123456789 "
|
||||
+ "https://movies.adult-fanfiction.org/story.php?no=123456789 "
|
||||
+ "https://naruto.adult-fanfiction.org/story.php?no=123456789 "
|
||||
+ "https://ne.adult-fanfiction.org/story.php?no=123456789 "
|
||||
+ "https://original.adult-fanfiction.org/story.php?no=123456789 "
|
||||
+ "https://tv.adult-fanfiction.org/story.php?no=123456789 "
|
||||
+ "https://xmen.adult-fanfiction.org/story.php?no=123456789 "
|
||||
+ "https://ygo.adult-fanfiction.org/story.php?no=123456789 "
|
||||
+ "https://yuyu.adult-fanfiction.org/story.php?no=123456789")
|
||||
|
||||
def getSiteURLPattern(self):
|
||||
return r'http?://(anime|anime2|bleach|books|buffy|cartoon|celeb|comics|ff|games|hp|inu|lotr|manga|movies|naruto|ne|original|tv|xmen|ygo|yuyu)\.adult-fanfiction\.org/story\.php\?no=\d+$'
|
||||
|
||||
##This is not working right now, so I'm commenting it out, but leaving it for future testing
|
||||
## Login seems to be reasonably standard across eFiction sites.
|
||||
#def needToLoginCheck(self, data):
|
||||
##This adapter will always require a login
|
||||
# return True
|
||||
|
||||
# <form name="login" method="post" action="">
|
||||
# <div class="top">E-mail: <span id="sprytextfield1">
|
||||
# <input name="email" type="text" id="email" size="20" maxlength="255" />
|
||||
# <span class="textfieldRequiredMsg">Email is required.</span><span class="textfieldInvalidFormatMsg">Invalid E-mail.</span></span></div>
|
||||
# <div class="top">Password: <span id="sprytextfield2">
|
||||
# <input name="pass1" type="password" id="pass1" size="20" maxlength="32" />
|
||||
# <span class="textfieldRequiredMsg">password is required.</span><span class="textfieldMinCharsMsg">Minimum 8 characters8.</span><span class="textfieldMaxCharsMsg">Exceeded 32 characters.</span></span></div>
|
||||
# <div class="top"><br /> <input name="loginsubmittop" type="hidden" id="loginsubmit" value="TRUE" />
|
||||
# <input type="submit" value="Login" />
|
||||
# </div>
|
||||
# </form>
|
||||
|
||||
|
||||
##This is not working right now, so I'm commenting it out, but leaving it for future testing
|
||||
#def performLogin(self, url, soup):
|
||||
# params = {}
|
||||
|
||||
# if self.password:
|
||||
# params['email'] = self.username
|
||||
# params['pass1'] = self.password
|
||||
# else:
|
||||
# params['email'] = self.getConfig("username")
|
||||
# params['pass1'] = self.getConfig("password")
|
||||
# params['submit'] = 'Login'
|
||||
|
||||
# # copy all hidden input tags to pick up appropriate tokens.
|
||||
# for tag in soup.findAll('input',{'type':'hidden'}):
|
||||
# params[tag['name']] = tag['value']
|
||||
|
||||
# logger.debug("Will now login to URL {0} as {1} with password: {2}".format(url, params['email'],params['pass1']))
|
||||
|
||||
# d = self._postUrl(url, params, usecache=False)
|
||||
# d = self._fetchUrl(url, params, usecache=False)
|
||||
# soup = self.make_soup(d)
|
||||
|
||||
#if not (soup.find('form', {'name' : 'login'}) == None):
|
||||
# logger.info("Failed to login to URL %s as %s" % (url, params['email']))
|
||||
# raise exceptions.FailedToLogin(url,params['email'])
|
||||
# return False
|
||||
#else:
|
||||
# return True
|
||||
return r'https?://(anime|anime2|bleach|books|buffy|cartoon|celeb|comics|ff|games|hp|inu|lotr|manga|movies|naruto|ne|original|tv|xmen|ygo|yuyu)\.adult-fanfiction\.org/story\.php\?no=\d+$'
|
||||
|
||||
## Getting the chapter list and the meta data, plus 'is adult' checking.
|
||||
def doExtractChapterUrlsAndMetadata(self, get_cover=True):
|
||||
|
|
@ -198,212 +143,109 @@ class AdultFanFictionOrgAdapter(BaseSiteAdapter):
|
|||
## You need to have your is_adult set to true to get this story
|
||||
if not (self.is_adult or self.getConfig("is_adult")):
|
||||
raise exceptions.AdultCheckRequired(self.url)
|
||||
else:
|
||||
d = self.post_request('https://www.adult-fanfiction.org/globals/ajax/age-verify.php', {"verify":"1"})
|
||||
if "Age verified successfully" not in d:
|
||||
raise exceptions.FailedToDownload("Failed to Verify Age: {0}".format(d))
|
||||
|
||||
url = self.url
|
||||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
except urllib2.HTTPError, e:
|
||||
if e.code == 404:
|
||||
raise exceptions.StoryDoesNotExist("Code: 404. {0}".format(url))
|
||||
elif e.code == 410:
|
||||
raise exceptions.StoryDoesNotExist("Code: 410. {0}".format(url))
|
||||
elif e.code == 401:
|
||||
self.needToLogin = True
|
||||
data = ''
|
||||
else:
|
||||
raise e
|
||||
data = self.get_request(url)
|
||||
# logger.debug(data)
|
||||
|
||||
if "The dragons running the back end of the site can not seem to find the story you are looking for." in data:
|
||||
raise exceptions.StoryDoesNotExist("{0}.{1} says: The dragons running the back end of the site can not seem to find the story you are looking for.".format(self.zone, self.getBaseDomain()))
|
||||
|
||||
# use BeautifulSoup HTML parser to make everything easier to find.
|
||||
soup = self.make_soup(data)
|
||||
|
||||
##This is not working right now, so I'm commenting it out, but leaving it for future testing
|
||||
#self.performLogin(url, soup)
|
||||
|
||||
# Now go hunting for all the meta data and the chapter list.
|
||||
soup = self.make_soup(data)
|
||||
|
||||
## Title
|
||||
## Some of the titles have a backslash on the story page, but not on the Author's page
|
||||
## So I am removing it from the title, so it can be found on the Author's page further in the code.
|
||||
## Also, some titles may have extra spaces ' ', and the search on the Author's page removes them,
|
||||
## so I have to here as well. I used multiple replaces to make sure, since I did the same below.
|
||||
a = soup.find('a', href=re.compile(r'story.php\?no='+self.story.getMetadata('storyId')+"$"))
|
||||
self.story.setMetadata('title',stripHTML(a).replace('\\','').replace(' ',' ').replace(' ',' ').replace(' ',' ').strip())
|
||||
h1 = soup.find('h1')
|
||||
# logger.debug("Title:%s"%h1)
|
||||
self.story.setMetadata('title',stripHTML(h1).replace('\\','').replace(' ',' ').replace(' ',' ').replace(' ',' ').strip())
|
||||
|
||||
# Find the chapters from first list only
|
||||
chapters = soup.select_one('select.chapter-select').select('option')
|
||||
for chapter in chapters:
|
||||
self.add_chapter(chapter,self.url+'&chapter='+chapter['value'])
|
||||
|
||||
# Find the chapters:
|
||||
chapters = soup.find('div',{'id':'snav'})
|
||||
for i, chapter in enumerate(chapters.findAll('a')):
|
||||
self.chapterUrls.append((stripHTML(chapter),self.url+'&chapter='+str(i+1)))
|
||||
|
||||
self.story.setMetadata('numChapters', len(self.chapterUrls))
|
||||
|
||||
# Find authorid and URL from... author url.
|
||||
a = soup.find('a', href=re.compile(r"profile.php\?no=\d+"))
|
||||
a = soup.find('a', href=re.compile(r"profile.php\?id=\d+"))
|
||||
if a == None:
|
||||
# I know that the original author of fanficfare wants to always have metadata,
|
||||
# I know that the original author of fanficfare wants to always have metadata,
|
||||
# but I posit that if the story is there, even if we can't get the metadata from the
|
||||
# author page, the story should still be able to be downloaded, which is what I've done here.
|
||||
self.story.setMetadata('authorId','000000000')
|
||||
self.story.setMetadata('authorUrl','http://www.adult-fanfiction.org')
|
||||
self.story.setMetadata('authorUrl','https://www.adult-fanfiction.org')
|
||||
self.story.setMetadata('author','Unknown')
|
||||
logger.warning('There was no author found for the story... Metadata will not be retreived.')
|
||||
self.setDescription(url,'>>>>>>>>>> No Summary Given <<<<<<<<<<')
|
||||
self.setDescription(url,'>>>>>>>>>> No Summary Given, Unknown Author <<<<<<<<<<')
|
||||
else:
|
||||
self.story.setMetadata('authorId',a['href'].split('=')[1])
|
||||
self.story.setMetadata('authorUrl',a['href'])
|
||||
self.story.setMetadata('author',stripHTML(a))
|
||||
|
||||
##The story page does not give much Metadata, so we go to the Author's page
|
||||
|
||||
##Get the first Author page to see if there are multiple pages.
|
||||
##AFF doesn't care if the page number is larger than the actual pages,
|
||||
##it will continue to show the last page even if the variable is larger than the actual page
|
||||
author_Url = '{0}&view=story&zone={1}&page=1'.format(self.story.getMetadata('authorUrl'), self.zone)
|
||||
#author_Url = self.story.getMetadata('authorUrl')+'&view=story&zone='+self.zone+'&page=1'
|
||||
|
||||
##I'm resetting the author page to the zone for this story
|
||||
self.story.setMetadata('authorUrl',author_Url)
|
||||
|
||||
logger.debug('Getting the author page: {0}'.format(author_Url))
|
||||
try:
|
||||
adata = self._fetchUrl(author_Url)
|
||||
except urllib2.HTTPError, e:
|
||||
if e.code in 404:
|
||||
raise exceptions.StoryDoesNotExist("Author Page: Code: 404. {0}".format(author_Url))
|
||||
elif e.code == 410:
|
||||
raise exceptions.StoryDoesNotExist("Author Page: Code: 410. {0}".format(author_Url))
|
||||
else:
|
||||
raise e
|
||||
|
||||
if "The member you are looking for does not exist." in adata:
|
||||
raise exceptions.StoryDoesNotExist("{0}.{1} says: The member you are looking for does not exist.".format(self.zone, self.getBaseDomain()))
|
||||
#raise exceptions.StoryDoesNotExist(self.zone+'.'+self.getBaseDomain() +" says: The member you are looking for does not exist.")
|
||||
|
||||
## The story page does not give much Metadata, so we go to
|
||||
## the Author's page. Except it's actually a sub-req for
|
||||
## list of author's stories for that subdomain
|
||||
author_Url = 'https://members.{0}/load-user-stories.php?subdomain={1}&uid={2}'.format(
|
||||
self.getBaseDomain(),
|
||||
self.zone,
|
||||
self.story.getMetadata('authorId'))
|
||||
|
||||
logger.debug('Getting the load-user-stories page: {0}'.format(author_Url))
|
||||
adata = self.get_request(author_Url)
|
||||
|
||||
none_found = "No stories found in this category."
|
||||
if none_found in adata:
|
||||
raise exceptions.StoryDoesNotExist("{0}.{1} says: {2}".format(self.zone, self.getBaseDomain(), none_found))
|
||||
|
||||
asoup = self.make_soup(adata)
|
||||
|
||||
##Getting the number of pages
|
||||
pages=asoup.find('div',{'class' : 'pagination'}).findAll('li')[-1].find('a')
|
||||
if not pages == None:
|
||||
pages = pages['href'].split('=')[-1]
|
||||
else:
|
||||
pages = 0
|
||||
|
||||
##If there is only 1 page of stories, check it to get the Metadata,
|
||||
if pages == 0:
|
||||
a = asoup.findAll('li')
|
||||
for lc2 in a:
|
||||
if lc2.find('a', href=re.compile(r'story.php\?no='+self.story.getMetadata('storyId')+"$")):
|
||||
break
|
||||
## otherwise go through the pages
|
||||
else:
|
||||
page=1
|
||||
i=0
|
||||
while i == 0:
|
||||
##We already have the first page, so if this is the first time through, skip getting the page
|
||||
if page != 1:
|
||||
author_Url = '{0}&view=story&zone={1}&page={2}'.format(self.story.getMetadata('authorUrl'), self.zone, str(page))
|
||||
logger.debug('Getting the author page: {0}'.format(author_Url))
|
||||
try:
|
||||
adata = self._fetchUrl(author_Url)
|
||||
except urllib2.HTTPError, e:
|
||||
if e.code in 404:
|
||||
raise exceptions.StoryDoesNotExist("Author Page: Code: 404. {0}".format(author_Url))
|
||||
elif e.code == 410:
|
||||
raise exceptions.StoryDoesNotExist("Author Page: Code: 410. {0}".format(author_Url))
|
||||
else:
|
||||
raise e
|
||||
##This will probably never be needed, since AFF doesn't seem to care what number you put as
|
||||
## the page number, it will default to the last page, even if you use 1000, for an author
|
||||
## that only hase 5 pages of stories, but I'm keeping it in to appease Saint Justin Case (just in case).
|
||||
if "The member you are looking for does not exist." in adata:
|
||||
raise exceptions.StoryDoesNotExist("{0}.{1} says: The member you are looking for does not exist.".format(self.zone, self.getBaseDomain()))
|
||||
# we look for the li element that has the story here
|
||||
asoup = self.make_soup(adata)
|
||||
|
||||
a = asoup.findAll('li')
|
||||
for lc2 in a:
|
||||
if lc2.find('a', href=re.compile(r'story.php\?no='+self.story.getMetadata('storyId')+"$")):
|
||||
i=1
|
||||
break
|
||||
page = page + 1
|
||||
if page > pages:
|
||||
break
|
||||
|
||||
##Split the Metadata up into a list
|
||||
##We have to change the soup type to a string, then remove the newlines, and double spaces,
|
||||
##then changes the <br/> to '-:-', which seperates the different elemeents.
|
||||
##Then we strip the HTML elements from the string.
|
||||
##There is also a double <br/>, so we have to fix that, then remove the leading and trailing '-:-'.
|
||||
##They are always in the same order.
|
||||
## EDIT 09/26/2016: Had some trouble with unicode errors... so I had to put in the decode/encode parts to fix it
|
||||
liMetadata = str(lc2).decode('utf-8').replace('\n','').replace('\r','').replace('\t',' ').replace(' ',' ').replace(' ',' ').replace(' ',' ')
|
||||
liMetadata = stripHTML(liMetadata.replace(r'<br/>','-:-').replace('<!-- <br /-->','-:-'))
|
||||
liMetadata = liMetadata.strip('-:-').strip('-:-').encode('utf-8')
|
||||
for i, value in enumerate(liMetadata.decode('utf-8').split('-:-')):
|
||||
if i == 0:
|
||||
# The value for the title has been manipulated, so may not be the same as gotten at the start.
|
||||
# I'm going to use the href from the lc2 retrieved from the author's page to determine if it is correct.
|
||||
if lc2.find('a', href=re.compile(r'story.php\?no='+self.story.getMetadata('storyId')+"$"))['href'] != url:
|
||||
raise exceptions.StoryDoesNotExist('Did not find story in author story list: {0}'.format(author_Url))
|
||||
elif i == 1:
|
||||
##Get the description
|
||||
self.setDescription(url,stripHTML(value.strip()))
|
||||
else:
|
||||
# the rest of the values can be missing, so instead of hardcoding the numbers, we search for them.
|
||||
if 'Located :' in value:
|
||||
self.story.setMetadata('category',value.replace(r'>',r'>').replace(r'Located :',r'').strip())
|
||||
elif 'Category :' in value:
|
||||
# Get the Category
|
||||
self.story.setMetadata('category',value.replace(r'>',r'>').replace(r'Located :',r'').strip())
|
||||
elif 'Content Tags :' in value:
|
||||
# Get the Erotic Tags
|
||||
value = stripHTML(value.replace(r'Content Tags :',r'')).strip()
|
||||
for code in re.split(r'\s',value):
|
||||
self.story.addToList('eroticatags',code)
|
||||
elif 'Posted :' in value:
|
||||
# Get the Posted Date
|
||||
value = value.replace(r'Posted :',r'').strip()
|
||||
if value.startswith('008'):
|
||||
# It is unknown how the 200 became 008, but I'm going to change it back here
|
||||
value = value.replace('008','200')
|
||||
elif value.startswith('0000'):
|
||||
# Since the date is showing as 0000,
|
||||
# I'm going to put the memberdate here
|
||||
value = asoup.find('div',{'id':'contentdata'}).find('p').get_text(strip=True).replace('Member Since','').strip()
|
||||
self.story.setMetadata('datePublished', makeDate(stripHTML(value), self.dateformat))
|
||||
elif 'Edited :' in value:
|
||||
# Get the 'Updated' Edited date
|
||||
# AFF has the time for the Updated date, and we only want the date,
|
||||
# so we take the first 10 characters only
|
||||
value = value.replace(r'Edited :',r'').strip()[0:10]
|
||||
if value.startswith('008'):
|
||||
# It is unknown how the 200 became 008, but I'm going to change it back here
|
||||
value = value.replace('008','200')
|
||||
self.story.setMetadata('dateUpdated', makeDate(stripHTML(value), self.dateformat))
|
||||
elif value.startswith('0000') or '-00-' in value:
|
||||
# Since the date is showing as 0000,
|
||||
# or there is -00- in the date,
|
||||
# I'm going to put the Published date here
|
||||
self.story.setMetadata('dateUpdated', self.story.getMetadata('datPublished'))
|
||||
else:
|
||||
self.story.setMetadata('dateUpdated', makeDate(stripHTML(value), self.dateformat))
|
||||
else:
|
||||
# This catches the blank elements, and the Review and Dragon Prints.
|
||||
# I am not interested in these, so do nothing
|
||||
zzzzzzz=0
|
||||
|
||||
# logger.debug(asoup)
|
||||
|
||||
story_card = asoup.select_one('div.story-card:has(a[href="{0}"])'.format(url))
|
||||
# logger.debug(story_card)
|
||||
|
||||
## Category
|
||||
## I've only seen one category per story so far, but just in case:
|
||||
for cat in story_card.select('div.story-card-category'):
|
||||
# remove Category:, old code suggests Located: is also
|
||||
# possible, so removing by <strong>
|
||||
cat.find("strong").decompose()
|
||||
self.story.addToList('category',stripHTML(cat))
|
||||
|
||||
self.setDescription(url,story_card.select_one('div.story-card-description'))
|
||||
|
||||
for tag in story_card.select('span.story-tag'):
|
||||
self.story.addToList('eroticatags',stripHTML(tag))
|
||||
|
||||
## created/updates share formatting
|
||||
for meta in story_card.select('div.story-card-meta-item span:last-child'):
|
||||
meta = stripHTML(meta)
|
||||
if 'Created: ' in meta:
|
||||
meta = meta.replace('Created: ','')
|
||||
self.story.setMetadata('datePublished', makeDate(meta, self.dateformat))
|
||||
|
||||
if 'Updated: ' in meta:
|
||||
meta = meta.replace('Updated: ','')
|
||||
self.story.setMetadata('dateUpdated', makeDate(meta, self.dateformat))
|
||||
|
||||
# grab the text for an individual chapter.
|
||||
def getChapterText(self, url):
|
||||
#Since each chapter is on 1 page, we don't need to do anything special, just get the content of the page.
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
soup = self.make_soup(self._fetchUrl(url))
|
||||
chaptertag = soup.find('div',{'class' : 'pagination'}).parent.findNext('td')
|
||||
soup = self.make_soup(self.get_request(url))
|
||||
chaptertag = soup.select_one('div.chapter-body')
|
||||
if None == chaptertag:
|
||||
raise exceptions.FailedToDownload("Error downloading Chapter: {0}! Missing required element!".format(url))
|
||||
## chapter text includes a copy of story title, author,
|
||||
## chapter title, & eroticatags specific to the chapter. Did
|
||||
## before, too.
|
||||
|
||||
return self.utf8FromSoup(url,chaptertag)
|
||||
|
|
|
|||
46
fanficfare/adapters/adapter_alternatehistorycom.py
Normal file
46
fanficfare/adapters/adapter_alternatehistorycom.py
Normal file
|
|
@ -0,0 +1,46 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2020 FanFicFare team
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
from __future__ import absolute_import
|
||||
from .base_xenforo2forum_adapter import BaseXenForo2ForumAdapter
|
||||
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
def getClass():
|
||||
return WWWAlternatehistoryComAdapter
|
||||
|
||||
class WWWAlternatehistoryComAdapter(BaseXenForo2ForumAdapter):
|
||||
|
||||
def __init__(self, config, url):
|
||||
BaseXenForo2ForumAdapter.__init__(self, config, url)
|
||||
|
||||
# Each adapter needs to have a unique site abbreviation.
|
||||
self.story.setMetadata('siteabbrev','ah')
|
||||
|
||||
@staticmethod # must be @staticmethod, don't remove it.
|
||||
def getSiteDomain():
|
||||
# The site domain. Does have www here, if it uses it.
|
||||
return 'www.alternatehistory.com'
|
||||
|
||||
@classmethod
|
||||
def getPathPrefix(cls):
|
||||
# in case it needs more than just site/
|
||||
return '/forum/'
|
||||
|
||||
def get_post_created_date(self,souptag):
|
||||
return self.make_date(souptag.find('div', {'class':'message-inner'}))
|
||||
40
fanficfare/adapters/adapter_althistorycom.py
Normal file
40
fanficfare/adapters/adapter_althistorycom.py
Normal file
|
|
@ -0,0 +1,40 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2026 FanFicFare team
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
from __future__ import absolute_import
|
||||
import re
|
||||
|
||||
from .base_xenforo2forum_adapter import BaseXenForo2ForumAdapter
|
||||
|
||||
def getClass():
|
||||
return AltHistoryComAdapter
|
||||
|
||||
## NOTE: This is a different site than www.alternatehistory.com.
|
||||
|
||||
class AltHistoryComAdapter(BaseXenForo2ForumAdapter):
|
||||
|
||||
def __init__(self, config, url):
|
||||
BaseXenForo2ForumAdapter.__init__(self, config, url)
|
||||
|
||||
# Each adapter needs to have a unique site abbreviation.
|
||||
self.story.setMetadata('siteabbrev','ahc')
|
||||
|
||||
@staticmethod # must be @staticmethod, don't remove it.
|
||||
def getSiteDomain():
|
||||
# The site domain. Does have www here, if it uses it.
|
||||
return 'althistory.com'
|
||||
|
||||
|
|
@ -1,302 +0,0 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2016 FanFicFare team
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
# ####### Not all lables are captured. they are not formtted correctly on the
|
||||
# ####### webpage.
|
||||
|
||||
# Software: eFiction
|
||||
import time
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import re
|
||||
import urllib2
|
||||
|
||||
|
||||
from ..htmlcleanup import stripHTML
|
||||
from .. import exceptions as exceptions
|
||||
|
||||
from base_adapter import BaseSiteAdapter, makeDate
|
||||
|
||||
def getClass():
|
||||
return AndromedaWebComAdapter # XXX
|
||||
|
||||
# Class name has to be unique. Our convention is camel case the
|
||||
# sitename with Adapter at the end. www is skipped.
|
||||
class AndromedaWebComAdapter(BaseSiteAdapter): # XXX
|
||||
|
||||
def __init__(self, config, url):
|
||||
BaseSiteAdapter.__init__(self, config, url)
|
||||
|
||||
self.decode = ["Windows-1252",
|
||||
"utf8"] # 1252 is a superset of iso-8859-1.
|
||||
# Most sites that claim to be
|
||||
# iso-8859-1 (and some that claim to be
|
||||
# utf8) are really windows-1252.
|
||||
self.username = "NoneGiven" # if left empty, site doesn't return any message at all.
|
||||
self.password = ""
|
||||
self.is_adult=False
|
||||
|
||||
# get storyId from url--url validation guarantees query is only sid=1234
|
||||
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
|
||||
|
||||
|
||||
# normalized story URL.
|
||||
# XXX Most sites don't have the /fiction part. Replace all to remove it usually.
|
||||
self._setURL('http://' + self.getSiteDomain() + '/fiction/viewstory.php?sid='+self.story.getMetadata('storyId'))
|
||||
|
||||
# Each adapter needs to have a unique site abbreviation.
|
||||
self.story.setMetadata('siteabbrev','awc') # XXX
|
||||
|
||||
# The date format will vary from site to site.
|
||||
# http://docs.python.org/library/datetime.html#strftime-strptime-behavior
|
||||
self.dateformat = "%d %b %Y" # XXX
|
||||
|
||||
@staticmethod # must be @staticmethod, don't remove it.
|
||||
def getSiteDomain():
|
||||
# The site domain. Does have www here, if it uses it.
|
||||
return 'www.andromeda-web.com' # XXX
|
||||
|
||||
@classmethod
|
||||
def getSiteExampleURLs(cls):
|
||||
return "http://"+cls.getSiteDomain()+"/fiction/viewstory.php?sid=1234"
|
||||
|
||||
def getSiteURLPattern(self):
|
||||
return re.escape("http://"+self.getSiteDomain()+"/fiction/viewstory.php?sid=")+r"\d+$"
|
||||
|
||||
## Login seems to be reasonably standard across eFiction sites.
|
||||
def needToLoginCheck(self, data):
|
||||
if 'Registered Users Only' in data \
|
||||
or 'There is no such account on our website' in data \
|
||||
or "That password doesn't match the one in our database" in data:
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
|
||||
def performLogin(self, url):
|
||||
params = {}
|
||||
|
||||
if self.password:
|
||||
params['penname'] = self.username
|
||||
params['password'] = self.password
|
||||
else:
|
||||
params['penname'] = self.getConfig("username")
|
||||
params['password'] = self.getConfig("password")
|
||||
params['cookiecheck'] = '1'
|
||||
params['submit'] = 'Submit'
|
||||
|
||||
loginUrl = 'http://' + self.getSiteDomain() + '/user.php?action=login'
|
||||
logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
|
||||
params['penname']))
|
||||
|
||||
d = self._fetchUrl(loginUrl, params)
|
||||
|
||||
if "Member Account" not in d : #Member Account
|
||||
logger.info("Failed to login to URL %s as %s" % (loginUrl,
|
||||
params['penname']))
|
||||
raise exceptions.FailedToLogin(url,params['penname'])
|
||||
return False
|
||||
else:
|
||||
return True
|
||||
|
||||
## Getting the chapter list and the meta data, plus 'is adult' checking.
|
||||
def extractChapterUrlsAndMetadata(self):
|
||||
|
||||
if self.is_adult or self.getConfig("is_adult"):
|
||||
# Weirdly, different sites use different warning numbers.
|
||||
# If the title search below fails, there's a good chance
|
||||
# you need a different number. print data at that point
|
||||
# and see what the 'click here to continue' url says.
|
||||
addurl = "&warning=2"
|
||||
else:
|
||||
addurl=""
|
||||
|
||||
# index=1 makes sure we see the story chapter index. Some
|
||||
# sites skip that for one-chapter stories.
|
||||
url = self.url+'&index=1'+addurl
|
||||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
except urllib2.HTTPError, e:
|
||||
if e.code == 404:
|
||||
raise exceptions.StoryDoesNotExist(self.url)
|
||||
else:
|
||||
raise e
|
||||
|
||||
if self.needToLoginCheck(data):
|
||||
# need to log in for this one.
|
||||
self.performLogin(url)
|
||||
data = self._fetchUrl(url)
|
||||
|
||||
# Since the warning text can change by warning level, let's
|
||||
# look for the warning pass url. ksarchive uses
|
||||
# &warning= -- actually, so do other sites. Must be an
|
||||
# eFiction book.
|
||||
|
||||
# fiction/viewstory.php?sid=1882&warning=4
|
||||
# fiction/viewstory.php?sid=1654&ageconsent=ok&warning=2
|
||||
#print data
|
||||
m = re.search(r"'fiction/viewstory.php\?sid=10(&warning=2)'",data)
|
||||
m = re.search(r"'fiction/viewstory.php\?sid=\d+((?:&ageconsent=ok)?&warning=\d+)'",data)
|
||||
if m != None:
|
||||
if self.is_adult or self.getConfig("is_adult"):
|
||||
# We tried the default and still got a warning, so
|
||||
# let's pull the warning number from the 'continue'
|
||||
# link and reload data.
|
||||
addurl = m.group(1)
|
||||
# correct stupid & error in url.
|
||||
addurl = addurl.replace("&","&")
|
||||
url = self.url+'&index=1'+addurl
|
||||
logger.debug("URL 2nd try: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
except urllib2.HTTPError, e:
|
||||
if e.code == 404:
|
||||
raise exceptions.StoryDoesNotExist(self.url)
|
||||
else:
|
||||
raise e
|
||||
else:
|
||||
raise exceptions.AdultCheckRequired(self.url)
|
||||
|
||||
if "Access denied. This story has not been validated by the adminstrators of this site." in data:
|
||||
raise exceptions.FailedToDownload(self.getSiteDomain() +" says: Access denied. This story has not been validated by the adminstrators of this site.")
|
||||
|
||||
# use BeautifulSoup HTML parser to make everything easier to find.
|
||||
soup = self.make_soup(data)
|
||||
# print data
|
||||
|
||||
# Now go hunting for all the meta data and the chapter list.
|
||||
|
||||
pagetitle = soup.find('div',{'id':'content'})
|
||||
|
||||
## Title
|
||||
a = pagetitle.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$"))
|
||||
|
||||
self.story.setMetadata('title',stripHTML(a))
|
||||
|
||||
# Find authorid and URL from... author url.
|
||||
a = pagetitle.find('a', href=re.compile(r"viewuser.php\?uid=\d+"))
|
||||
self.story.setMetadata('authorId',a['href'].split('=')[1])
|
||||
self.story.setMetadata('authorUrl','http://'+self.host+'/'+a['href'])
|
||||
self.story.setMetadata('author',a.string)
|
||||
|
||||
# Find the chapters:
|
||||
for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"&chapter=\d+$")):
|
||||
# just in case there's tags, like <i> in chapter titles.
|
||||
self.chapterUrls.append((stripHTML(chapter),'http://'+self.host+'/fiction/'+chapter['href']+addurl))
|
||||
|
||||
self.story.setMetadata('numChapters',len(self.chapterUrls))
|
||||
|
||||
# eFiction sites don't help us out a lot with their meta data
|
||||
# formating, so it's a little ugly.
|
||||
|
||||
# utility method
|
||||
def defaultGetattr(d,k):
|
||||
try:
|
||||
return d[k]
|
||||
except:
|
||||
return ""
|
||||
|
||||
# <span class="label">Rated:</span> NC-17<br /> etc
|
||||
labels = soup.findAll('span',{'class':'label'})
|
||||
for labelspan in labels:
|
||||
value = labelspan.nextSibling
|
||||
label = labelspan.string
|
||||
|
||||
if 'Summary' in label:
|
||||
## Everything until the next span class='label'
|
||||
svalue = ""
|
||||
while 'label' not in defaultGetattr(value,'class'):
|
||||
svalue += unicode(value)
|
||||
value = value.nextSibling
|
||||
self.setDescription(url,svalue)
|
||||
#self.story.setMetadata('description',stripHTML(svalue))
|
||||
|
||||
if 'Rated' in label:
|
||||
self.story.setMetadata('rating', value)
|
||||
|
||||
if 'Word count' in label:
|
||||
self.story.setMetadata('numWords', value)
|
||||
|
||||
if 'Categories' in label:
|
||||
cats = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=categories'))
|
||||
for cat in cats:
|
||||
self.story.addToList('category',cat.string)
|
||||
|
||||
if 'Characters' in label:
|
||||
chars = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=characters'))
|
||||
for char in chars:
|
||||
self.story.addToList('characters',char.string)
|
||||
|
||||
if 'Genre' in label:
|
||||
genres = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=1'))
|
||||
for genre in genres:
|
||||
self.story.addToList('genre',genre.string)
|
||||
|
||||
if 'Warnings' in label:
|
||||
warnings = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=3'))
|
||||
for warning in warnings:
|
||||
self.story.addToList('warnings',warning.string)
|
||||
|
||||
if 'Completed' in label:
|
||||
if 'Yes' in value:
|
||||
self.story.setMetadata('status', 'Completed')
|
||||
else:
|
||||
self.story.setMetadata('status', 'In-Progress')
|
||||
|
||||
if 'Published' in label:
|
||||
self.story.setMetadata('datePublished', makeDate(stripHTML(value), self.dateformat))
|
||||
|
||||
if 'Updated' in label:
|
||||
# there's a stray [ at the end.
|
||||
#value = value[0:-1]
|
||||
self.story.setMetadata('dateUpdated', makeDate(stripHTML(value), self.dateformat))
|
||||
|
||||
try:
|
||||
# Find Series name from series URL.
|
||||
a = soup.find('a', href=re.compile(r"fiction/viewseries.php\?seriesid=\d+"))
|
||||
series_name = a.string
|
||||
series_url = 'http://'+self.host+'/'+a['href']
|
||||
|
||||
# use BeautifulSoup HTML parser to make everything easier to find.
|
||||
seriessoup = self.make_soup(self._fetchUrl(series_url))
|
||||
storyas = seriessoup.findAll('a', href=re.compile(r'^fiction/viewstory.php\?sid=\d+$'))
|
||||
i=1
|
||||
for a in storyas:
|
||||
if a['href'] == ('fiction/viewstory.php?sid='+self.story.getMetadata('storyId')):
|
||||
self.setSeries(series_name, i)
|
||||
self.story.setMetadata('seriesUrl',series_url)
|
||||
break
|
||||
i+=1
|
||||
|
||||
except:
|
||||
# I find it hard to care if the series parsing fails
|
||||
pass
|
||||
|
||||
# grab the text for an individual chapter.
|
||||
def getChapterText(self, url):
|
||||
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
soup = self.make_soup(self._fetchUrl(url))
|
||||
|
||||
div = soup.find('div', {'class' : 'story'})
|
||||
|
||||
if None == div:
|
||||
raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url)
|
||||
|
||||
return self.utf8FromSoup(url,div)
|
||||
|
|
@ -1,6 +1,6 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2014 Fanficdownloader team, 2015 FanFicFare team
|
||||
# Copyright 2014 Fanficdownloader team, 2020 FanFicFare team
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
|
|
@ -15,383 +15,55 @@
|
|||
# limitations under the License.
|
||||
#
|
||||
|
||||
import time
|
||||
from __future__ import absolute_import
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import re
|
||||
import urllib2
|
||||
|
||||
from ..htmlcleanup import stripHTML
|
||||
from .. import exceptions as exceptions
|
||||
|
||||
from base_adapter import BaseSiteAdapter, makeDate
|
||||
from .base_otw_adapter import BaseOTWAdapter
|
||||
|
||||
def getClass():
|
||||
return ArchiveOfOurOwnOrgAdapter
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class ArchiveOfOurOwnOrgAdapter(BaseSiteAdapter):
|
||||
class ArchiveOfOurOwnOrgAdapter(BaseOTWAdapter):
|
||||
|
||||
def __init__(self, config, url):
|
||||
BaseSiteAdapter.__init__(self, config, url)
|
||||
|
||||
self.decode = ["utf8",
|
||||
"Windows-1252"] # 1252 is a superset of iso-8859-1.
|
||||
# Most sites that claim to be
|
||||
# iso-8859-1 (and some that claim to be
|
||||
# utf8) are really windows-1252.
|
||||
|
||||
|
||||
self.username = "NoneGiven" # if left empty, site doesn't return any message at all.
|
||||
self.password = ""
|
||||
self.is_adult=False
|
||||
|
||||
# get storyId from url--url validation guarantees query is only sid=1234
|
||||
self.story.setMetadata('storyId',self.parsedUrl.path.split('/',)[2])
|
||||
|
||||
|
||||
# get storyId from url--url validation guarantees query correct
|
||||
m = re.match(self.getSiteURLPattern(),url)
|
||||
if m:
|
||||
self.story.setMetadata('storyId',m.group('id'))
|
||||
|
||||
# normalized story URL.
|
||||
self._setURL('http://' + self.getSiteDomain() + '/works/'+self.story.getMetadata('storyId'))
|
||||
else:
|
||||
raise exceptions.InvalidStoryURL(url,
|
||||
self.getSiteDomain(),
|
||||
self.getSiteExampleURLs())
|
||||
BaseOTWAdapter.__init__(self, config, url)
|
||||
|
||||
# Each adapter needs to have a unique site abbreviation.
|
||||
self.story.setMetadata('siteabbrev','ao3')
|
||||
|
||||
# The date format will vary from site to site.
|
||||
# http://docs.python.org/library/datetime.html#strftime-strptime-behavior
|
||||
self.dateformat = "%Y-%b-%d"
|
||||
|
||||
@staticmethod # must be @staticmethod, don't remove it.
|
||||
def getSiteDomain():
|
||||
# The site domain. Does have www here, if it uses it.
|
||||
return 'archiveofourown.org'
|
||||
|
||||
# The certificate is only valid for the following names:
|
||||
# ao3.org,
|
||||
# archiveofourown.com,
|
||||
# archiveofourown.net,
|
||||
# archiveofourown.org,
|
||||
# www.ao3.org,
|
||||
|
||||
@classmethod
|
||||
def getSiteExampleURLs(cls):
|
||||
return "http://"+cls.getSiteDomain()+"/works/123456 http://"+cls.getSiteDomain()+"/collections/Some_Archive/works/123456 http://"+cls.getSiteDomain()+"/works/123456/chapters/78901"
|
||||
def getAcceptDomains(cls):
|
||||
return ['archiveofourown.org',
|
||||
'archiveofourown.com',
|
||||
'archiveofourown.net',
|
||||
'archiveofourown.gay',
|
||||
'download.archiveofourown.org',
|
||||
'download.archiveofourown.com',
|
||||
'download.archiveofourown.net',
|
||||
'ao3.org',
|
||||
]
|
||||
|
||||
def getSiteURLPattern(self):
|
||||
# http://archiveofourown.org/collections/Smallville_Slash_Archive/works/159770
|
||||
# Discard leading zeros from story ID numbers--AO3 doesn't use them in it's own chapter URLs.
|
||||
return r"https?://"+re.escape(self.getSiteDomain())+r"(/collections/[^/]+)?/works/0*(?P<id>\d+)"
|
||||
def mod_url_request(self, url):
|
||||
return url
|
||||
|
||||
## Login
|
||||
def needToLoginCheck(self, data):
|
||||
if 'This work is only available to registered users of the Archive.' in data \
|
||||
or "The password or user name you entered doesn't match our records" in data:
|
||||
return True
|
||||
def mod_url_request(self, url):
|
||||
## add / to *not* replace media.archiveofourown.org
|
||||
if self.getConfig("use_archive_transformativeworks_org",False):
|
||||
return url.replace("/archiveofourown.org","/archive.transformativeworks.org")
|
||||
elif self.getConfig("use_archiveofourown_gay",False):
|
||||
return url.replace("/archiveofourown.org","/archiveofourown.gay")
|
||||
else:
|
||||
return False
|
||||
|
||||
def performLogin(self, url, data):
|
||||
|
||||
params = {}
|
||||
if self.password:
|
||||
params['user_session[login]'] = self.username
|
||||
params['user_session[password]'] = self.password
|
||||
else:
|
||||
params['user_session[login]'] = self.getConfig("username")
|
||||
params['user_session[password]'] = self.getConfig("password")
|
||||
params['user_session[remember_me]'] = '1'
|
||||
params['commit'] = 'Log in'
|
||||
#params['utf8'] = u'✓'#u'\x2713' # gets along with out it, and it confuses the encoder.
|
||||
params['authenticity_token'] = data.split('input name="authenticity_token" type="hidden" value="')[1].split('"')[0]
|
||||
|
||||
loginUrl = 'http://' + self.getSiteDomain() + '/user_sessions'
|
||||
logger.info("Will now login to URL (%s) as (%s)" % (loginUrl,
|
||||
params['user_session[login]']))
|
||||
|
||||
d = self._postUrl(loginUrl, params)
|
||||
#logger.info(d)
|
||||
|
||||
if "Successfully logged in" not in d : #Member Account
|
||||
logger.info("Failed to login to URL %s as %s" % (loginUrl,
|
||||
params['user_session[login]']))
|
||||
raise exceptions.FailedToLogin(url,params['user_session[login]'])
|
||||
return False
|
||||
else:
|
||||
return True
|
||||
|
||||
def use_pagecache(self):
|
||||
'''
|
||||
adapters that will work with the page cache need to implement
|
||||
this and change it to True.
|
||||
'''
|
||||
return True
|
||||
|
||||
## Getting the chapter list and the meta data, plus 'is adult' checking.
|
||||
def extractChapterUrlsAndMetadata(self):
|
||||
|
||||
if self.is_adult or self.getConfig("is_adult"):
|
||||
addurl = "?view_adult=true"
|
||||
else:
|
||||
addurl=""
|
||||
|
||||
metaurl = self.url+addurl
|
||||
url = self.url+'/navigate'+addurl
|
||||
logger.info("url: "+url)
|
||||
logger.info("metaurl: "+metaurl)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
meta = self._fetchUrl(metaurl)
|
||||
|
||||
if "This work could have adult content. If you proceed you have agreed that you are willing to see such content." in meta:
|
||||
raise exceptions.AdultCheckRequired(self.url)
|
||||
|
||||
except urllib2.HTTPError, e:
|
||||
if e.code == 404:
|
||||
raise exceptions.StoryDoesNotExist(self.url)
|
||||
else:
|
||||
raise e
|
||||
|
||||
if "Sorry, we couldn't find the work you were looking for." in data:
|
||||
raise exceptions.StoryDoesNotExist(self.url)
|
||||
|
||||
if self.needToLoginCheck(data):
|
||||
# need to log in for this one.
|
||||
self.performLogin(url,data)
|
||||
data = self._fetchUrl(url,usecache=False)
|
||||
meta = self._fetchUrl(metaurl,usecache=False)
|
||||
|
||||
# use BeautifulSoup HTML parser to make everything easier to find.
|
||||
soup = self.make_soup(data)
|
||||
for tag in soup.findAll('div',id='admin-banner'):
|
||||
tag.extract()
|
||||
metasoup = self.make_soup(meta)
|
||||
for tag in metasoup.findAll('div',id='admin-banner'):
|
||||
tag.extract()
|
||||
|
||||
# Now go hunting for all the meta data and the chapter list.
|
||||
|
||||
## Title
|
||||
a = soup.find('a', href=re.compile(r"/works/\d+$"))
|
||||
self.story.setMetadata('title',stripHTML(a))
|
||||
|
||||
# Find authorid and URL from... author url.
|
||||
alist = soup.findAll('a', href=re.compile(r"/users/\w+/pseuds/\w+"))
|
||||
if len(alist) < 1: # ao3 allows for author 'Anonymous' with no author link.
|
||||
self.story.setMetadata('author','Anonymous')
|
||||
self.story.setMetadata('authorUrl','http://archiveofourown.org/')
|
||||
self.story.setMetadata('authorId','0')
|
||||
else:
|
||||
for a in alist:
|
||||
self.story.addToList('authorId',a['href'].split('/')[-1])
|
||||
self.story.addToList('authorUrl','http://'+self.host+a['href'])
|
||||
self.story.addToList('author',a.text)
|
||||
|
||||
byline = metasoup.find('h3',{'class':'byline'})
|
||||
if byline:
|
||||
self.story.setMetadata('byline',stripHTML(byline))
|
||||
|
||||
newestChapter = None
|
||||
self.newestChapterNum = None # save for comparing during update.
|
||||
# Scan all chapters to find the oldest and newest, on AO3 it's
|
||||
# possible for authors to insert new chapters out-of-order or
|
||||
# change the dates of earlier ones by editing them--That WILL
|
||||
# break epub update.
|
||||
# Find the chapters:
|
||||
chapters=soup.findAll('a', href=re.compile(r'/works/'+self.story.getMetadata('storyId')+"/chapters/\d+$"))
|
||||
self.story.setMetadata('numChapters',len(chapters))
|
||||
logger.debug("numChapters: (%s)"%self.story.getMetadata('numChapters'))
|
||||
if len(chapters)==1:
|
||||
self.chapterUrls.append((self.story.getMetadata('title'),'http://'+self.host+chapters[0]['href']+addurl))
|
||||
else:
|
||||
for index, chapter in enumerate(chapters):
|
||||
# strip just in case there's tags, like <i> in chapter titles.
|
||||
self.chapterUrls.append((stripHTML(chapter),'http://'+self.host+chapter['href']+addurl))
|
||||
# (2013-09-21)
|
||||
date = stripHTML(chapter.findNext('span'))[1:-1]
|
||||
chapterDate = makeDate(date,self.dateformat)
|
||||
if newestChapter == None or chapterDate > newestChapter:
|
||||
newestChapter = chapterDate
|
||||
self.newestChapterNum = index
|
||||
|
||||
a = metasoup.find('blockquote',{'class':'userstuff'})
|
||||
if a != None:
|
||||
self.setDescription(url,a)
|
||||
#self.story.setMetadata('description',a.text)
|
||||
|
||||
a = metasoup.find('dd',{'class':"rating tags"})
|
||||
if a != None:
|
||||
self.story.setMetadata('rating',stripHTML(a.text))
|
||||
|
||||
d = metasoup.find('dd',{'class':"language"})
|
||||
if d != None:
|
||||
self.story.setMetadata('language',stripHTML(d.text))
|
||||
|
||||
a = metasoup.find('dd',{'class':"fandom tags"})
|
||||
fandoms = a.findAll('a',{'class':"tag"})
|
||||
for fandom in fandoms:
|
||||
self.story.addToList('fandoms',fandom.string)
|
||||
|
||||
a = metasoup.find('dd',{'class':"warning tags"})
|
||||
if a != None:
|
||||
warnings = a.findAll('a',{'class':"tag"})
|
||||
for warning in warnings:
|
||||
self.story.addToList('warnings',warning.string)
|
||||
|
||||
a = metasoup.find('dd',{'class':"freeform tags"})
|
||||
if a != None:
|
||||
genres = a.findAll('a',{'class':"tag"})
|
||||
for genre in genres:
|
||||
self.story.addToList('freeformtags',genre.string)
|
||||
|
||||
a = metasoup.find('dd',{'class':"category tags"})
|
||||
if a != None:
|
||||
genres = a.findAll('a',{'class':"tag"})
|
||||
for genre in genres:
|
||||
if genre != "Gen":
|
||||
self.story.addToList('ao3categories',genre.string)
|
||||
|
||||
a = metasoup.find('dd',{'class':"character tags"})
|
||||
if a != None:
|
||||
chars = a.findAll('a',{'class':"tag"})
|
||||
for char in chars:
|
||||
self.story.addToList('characters',char.string)
|
||||
|
||||
a = metasoup.find('dd',{'class':"relationship tags"})
|
||||
if a != None:
|
||||
ships = a.findAll('a',{'class':"tag"})
|
||||
for ship in ships:
|
||||
self.story.addToList('ships',ship.string)
|
||||
|
||||
a = metasoup.find('dd',{'class':"collections"})
|
||||
if a != None:
|
||||
collections = a.findAll('a')
|
||||
for collection in collections:
|
||||
self.story.addToList('collections',collection.string)
|
||||
|
||||
stats = metasoup.find('dl',{'class':'stats'})
|
||||
dt = stats.findAll('dt')
|
||||
dd = stats.findAll('dd')
|
||||
for x in range(0,len(dt)):
|
||||
label = dt[x].text
|
||||
value = dd[x].text
|
||||
|
||||
if 'Words:' in label:
|
||||
self.story.setMetadata('numWords', value)
|
||||
|
||||
if 'Comments:' in label:
|
||||
self.story.setMetadata('comments', value)
|
||||
|
||||
if 'Kudos:' in label:
|
||||
self.story.setMetadata('kudos', value)
|
||||
|
||||
if 'Hits:' in label:
|
||||
self.story.setMetadata('hits', value)
|
||||
|
||||
if 'Bookmarks:' in label:
|
||||
self.story.setMetadata('bookmarks', value)
|
||||
|
||||
if 'Chapters:' in label:
|
||||
if value.split('/')[0] == value.split('/')[1]:
|
||||
self.story.setMetadata('status', 'Completed')
|
||||
else:
|
||||
self.story.setMetadata('status', 'In-Progress')
|
||||
|
||||
|
||||
if 'Published' in label:
|
||||
self.story.setMetadata('datePublished', makeDate(stripHTML(value), self.dateformat))
|
||||
self.story.setMetadata('dateUpdated', makeDate(stripHTML(value), self.dateformat))
|
||||
|
||||
if 'Updated' in label:
|
||||
self.story.setMetadata('dateUpdated', makeDate(stripHTML(value), self.dateformat))
|
||||
|
||||
if 'Completed' in label:
|
||||
self.story.setMetadata('dateUpdated', makeDate(stripHTML(value), self.dateformat))
|
||||
|
||||
|
||||
# Find Series name from series URL.
|
||||
ddseries = metasoup.find('dd',{'class':"series"})
|
||||
|
||||
if ddseries:
|
||||
for i, a in enumerate(ddseries.findAll('a', href=re.compile(r"/series/\d+"))):
|
||||
series_name = stripHTML(a)
|
||||
series_url = 'http://'+self.host+a['href']
|
||||
series_index = int(stripHTML(a.previousSibling).replace(', ','').split(' ')[1]) # "Part # of" or ", Part #"
|
||||
self.story.setMetadata('series%02d'%i,"%s [%s]"%(series_name,series_index))
|
||||
self.story.setMetadata('series%02dUrl'%i,series_url)
|
||||
if i == 0:
|
||||
self.setSeries(series_name, series_index)
|
||||
self.story.setMetadata('seriesUrl',series_url)
|
||||
|
||||
def hookForUpdates(self,chaptercount):
|
||||
if self.oldchapters and len(self.oldchapters) > self.newestChapterNum:
|
||||
logger.info("Existing epub has %s chapters\nNewest chapter is %s. Discarding old chapters from there on."%(len(self.oldchapters), self.newestChapterNum+1))
|
||||
self.oldchapters = self.oldchapters[:self.newestChapterNum]
|
||||
return len(self.oldchapters)
|
||||
|
||||
# grab the text for an individual chapter.
|
||||
def getChapterText(self, url):
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
chapter=self.make_soup('<div class="story"></div>').find('div')
|
||||
data = self._fetchUrl(url)
|
||||
soup = self.make_soup(data)
|
||||
|
||||
exclude_notes=self.getConfigList('exclude_notes')
|
||||
|
||||
def append_tag(elem,tag,string):
|
||||
'''bs4 requires tags be added separately.'''
|
||||
new_tag = soup.new_tag(tag)
|
||||
new_tag.string=string
|
||||
elem.append(new_tag)
|
||||
|
||||
if 'authorheadnotes' not in exclude_notes:
|
||||
headnotes = soup.find('div', {'class' : "preface group"}).find('div', {'class' : "notes module"})
|
||||
if headnotes != None:
|
||||
headnotes = headnotes.find('blockquote', {'class' : "userstuff"})
|
||||
if headnotes != None:
|
||||
append_tag(chapter,'b',"Author's Note:")
|
||||
chapter.append(headnotes)
|
||||
|
||||
if 'chaptersummary' not in exclude_notes:
|
||||
chapsumm = soup.find('div', {'id' : "summary"})
|
||||
if chapsumm != None:
|
||||
chapsumm = chapsumm.find('blockquote')
|
||||
append_tag(chapter,'b',"Summary for the Chapter:")
|
||||
chapter.append(chapsumm)
|
||||
|
||||
if 'chapterheadnotes' not in exclude_notes:
|
||||
chapnotes = soup.find('div', {'id' : "notes"})
|
||||
if chapnotes != None:
|
||||
chapnotes = chapnotes.find('blockquote')
|
||||
if chapnotes != None:
|
||||
append_tag(chapter,'b',"Notes for the Chapter:")
|
||||
chapter.append(chapnotes)
|
||||
|
||||
text = soup.find('div', {'class' : "userstuff module"})
|
||||
chtext = text.find('h3', {'class' : "landmark heading"})
|
||||
if chtext:
|
||||
chtext.extract()
|
||||
chapter.append(text)
|
||||
|
||||
if 'chapterfootnotes' not in exclude_notes:
|
||||
chapfoot = soup.find('div', {'class' : "end notes module", 'role' : "complementary"})
|
||||
if chapfoot != None:
|
||||
chapfoot = chapfoot.find('blockquote')
|
||||
append_tag(chapter,'b',"Notes for the Chapter:")
|
||||
chapter.append(chapfoot)
|
||||
|
||||
if 'authorfootnotes' not in exclude_notes:
|
||||
footnotes = soup.find('div', {'id' : "work_endnotes"})
|
||||
if footnotes != None:
|
||||
footnotes = footnotes.find('blockquote')
|
||||
append_tag(chapter,'b',"Author's Note:")
|
||||
chapter.append(footnotes)
|
||||
|
||||
if None == soup:
|
||||
raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url)
|
||||
|
||||
return self.utf8FromSoup(url,chapter)
|
||||
return url
|
||||
|
|
|
|||
|
|
@ -1,190 +0,0 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2011 Fanficdownloader team, 2015 FanFicFare team
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
import time
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import re
|
||||
import urllib2
|
||||
|
||||
|
||||
from ..htmlcleanup import stripHTML
|
||||
from .. import exceptions as exceptions
|
||||
|
||||
from base_adapter import BaseSiteAdapter, makeDate
|
||||
|
||||
|
||||
def getClass():
|
||||
return ArchiveSkyeHawkeComAdapter
|
||||
|
||||
# Class name has to be unique. Our convention is camel case the
|
||||
# sitename with Adapter at the end. www is skipped.
|
||||
class ArchiveSkyeHawkeComAdapter(BaseSiteAdapter):
|
||||
|
||||
def __init__(self, config, url):
|
||||
BaseSiteAdapter.__init__(self, config, url)
|
||||
|
||||
self.decode = ["Windows-1252",
|
||||
"utf8"] # 1252 is a superset of iso-8859-1.
|
||||
# Most sites that claim to be
|
||||
# iso-8859-1 (and some that claim to be
|
||||
# utf8) are really windows-1252.
|
||||
self.username = "NoneGiven" # if left empty, site doesn't return any message at all.
|
||||
self.password = ""
|
||||
self.is_adult=False
|
||||
|
||||
# get storyId from url--url validation guarantees query is only sid=1234
|
||||
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
|
||||
|
||||
|
||||
# normalized story URL.
|
||||
self._setURL('http://' + self.getSiteDomain() + '/story.php?no='+self.story.getMetadata('storyId'))
|
||||
|
||||
# Each adapter needs to have a unique site abbreviation.
|
||||
self.story.setMetadata('siteabbrev','ash')
|
||||
|
||||
# The date format will vary from site to site.
|
||||
# http://docs.python.org/library/datetime.html#strftime-strptime-behavior
|
||||
self.dateformat = "%Y-%m-%d"
|
||||
|
||||
@staticmethod # must be @staticmethod, don't remove it.
|
||||
def getSiteDomain():
|
||||
# The site domain. Does have www here, if it uses it.
|
||||
return 'archive.skyehawke.com'
|
||||
|
||||
@classmethod
|
||||
def getAcceptDomains(cls):
|
||||
return ['archive.skyehawke.com','www.skyehawke.com']
|
||||
|
||||
@classmethod
|
||||
def getSiteExampleURLs(cls):
|
||||
return "http://archive.skyehawke.com/story.php?no=1234 http://www.skyehawke.com/archive/story.php?no=1234 http://skyehawke.com/archive/story.php?no=1234"
|
||||
|
||||
def getSiteURLPattern(self):
|
||||
return re.escape("http://")+r"(archive|www)\.skyehawke\.com/(archive/)?story\.php\?no=\d+$"
|
||||
|
||||
## Getting the chapter list and the meta data, plus 'is adult' checking.
|
||||
def extractChapterUrlsAndMetadata(self):
|
||||
|
||||
url = self.url
|
||||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
except urllib2.HTTPError, e:
|
||||
if e.code == 404:
|
||||
raise exceptions.StoryDoesNotExist(self.url)
|
||||
else:
|
||||
raise e
|
||||
|
||||
# use BeautifulSoup HTML parser to make everything easier to find.
|
||||
soup = self.make_soup(data)
|
||||
# print data
|
||||
|
||||
# Now go hunting for all the meta data and the chapter list.
|
||||
|
||||
## Title
|
||||
a = soup.find('div', {'class':"story border"}).find('span',{'class':'left'})
|
||||
title=stripHTML(a).split('"')[1]
|
||||
self.story.setMetadata('title',title)
|
||||
|
||||
# Find authorid and URL from... author url.
|
||||
author = a.find('a')
|
||||
self.story.setMetadata('authorId',author['href'].split('=')[1])
|
||||
self.story.setMetadata('authorUrl','http://'+self.host+'/'+author['href'])
|
||||
self.story.setMetadata('author',author.string)
|
||||
|
||||
authorSoup = self.make_soup(self._fetchUrl(self.story.getMetadata('authorUrl')))
|
||||
|
||||
chapter=soup.find('select',{'name':'chapter'}).findAll('option')
|
||||
|
||||
for i in range(1,len(chapter)):
|
||||
ch=chapter[i]
|
||||
self.chapterUrls.append((stripHTML(ch),ch['value']))
|
||||
|
||||
self.story.setMetadata('numChapters',len(self.chapterUrls))
|
||||
|
||||
# eFiction sites don't help us out a lot with their meta data
|
||||
# formating, so it's a little ugly.
|
||||
|
||||
box=soup.find('div', {'class': "container borderridge"})
|
||||
sum=box.find('span').text
|
||||
self.setDescription(url,sum)
|
||||
|
||||
boxes=soup.findAll('div', {'class': "container bordersolid"})
|
||||
for box in boxes:
|
||||
if box.find('b') != None and box.find('b').text == "History and Story Information":
|
||||
|
||||
for b in box.findAll('b'):
|
||||
if "words" in b.nextSibling:
|
||||
self.story.setMetadata('numWords', b.text)
|
||||
if "archived" in b.previousSibling:
|
||||
self.story.setMetadata('datePublished', makeDate(stripHTML(b.text), self.dateformat))
|
||||
if "updated" in b.previousSibling:
|
||||
self.story.setMetadata('dateUpdated', makeDate(stripHTML(b.text), self.dateformat))
|
||||
if "fandom" in b.nextSibling:
|
||||
self.story.addToList('category', b.text)
|
||||
|
||||
for br in box.findAll('br'):
|
||||
br.replaceWith('split')
|
||||
genre=box.text.split("Genre:")[1].split("split")[0]
|
||||
if not "Unspecified" in genre:
|
||||
self.story.addToList('genre',genre)
|
||||
|
||||
|
||||
if box.find('span') != None and box.find('span').text == "WARNING":
|
||||
|
||||
rating=box.findAll('span')[1]
|
||||
rating.find('br').replaceWith('split')
|
||||
rating=rating.text.replace("This story is rated",'').split('split')[0]
|
||||
self.story.setMetadata('rating',rating)
|
||||
logger.debug(self.story.getMetadata('rating'))
|
||||
|
||||
warnings=box.find('ol')
|
||||
if warnings != None:
|
||||
warnings=warnings.text.replace(']', '').replace('[', '').split(' ')
|
||||
for warning in warnings:
|
||||
self.story.addToList('warnings',warning)
|
||||
|
||||
|
||||
for asoup in authorSoup.findAll('div', {'class':"story bordersolid"}):
|
||||
if asoup.find('a')['href'] == 'story.php?no='+self.story.getMetadata('storyId'):
|
||||
if '[ Completed ]' in asoup.text:
|
||||
self.story.setMetadata('status', 'Completed')
|
||||
else:
|
||||
self.story.setMetadata('status', 'In-Progress')
|
||||
chars=asoup.findNext('div').text.split('Characters')[1].split(']')[0]
|
||||
for char in chars.split(','):
|
||||
if not "None" in char:
|
||||
self.story.addToList('characters',char)
|
||||
break
|
||||
|
||||
|
||||
|
||||
# grab the text for an individual chapter.
|
||||
def getChapterText(self, url):
|
||||
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
soup = self.make_soup(self._fetchUrl(url))
|
||||
|
||||
div = soup.find('div',{'class':"chapter bordersolid"}).findNext('div').findNext('div')
|
||||
|
||||
if None == div:
|
||||
raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url)
|
||||
|
||||
return self.utf8FromSoup(url,div)
|
||||
|
|
@ -1,302 +0,0 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2016 FanFicFare team
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
# ####### Not all lables are captured. they are not formtted correctly on the
|
||||
# ####### webpage.
|
||||
|
||||
# Software: eFiction
|
||||
import time
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import re
|
||||
import urllib2
|
||||
|
||||
|
||||
from ..htmlcleanup import stripHTML
|
||||
from .. import exceptions as exceptions
|
||||
|
||||
from base_adapter import BaseSiteAdapter, makeDate
|
||||
|
||||
def getClass():
|
||||
return ArtemisFowlComAdapter # XXX
|
||||
|
||||
# Class name has to be unique. Our convention is camel case the
|
||||
# sitename with Adapter at the end. www is skipped.
|
||||
class ArtemisFowlComAdapter(BaseSiteAdapter): # XXX
|
||||
|
||||
def __init__(self, config, url):
|
||||
BaseSiteAdapter.__init__(self, config, url)
|
||||
|
||||
self.decode = ["Windows-1252",
|
||||
"utf8"] # 1252 is a superset of iso-8859-1.
|
||||
# Most sites that claim to be
|
||||
# iso-8859-1 (and some that claim to be
|
||||
# utf8) are really windows-1252.
|
||||
self.username = "NoneGiven" # if left empty, site doesn't return any message at all.
|
||||
self.password = ""
|
||||
self.is_adult=False
|
||||
|
||||
# get storyId from url--url validation guarantees query is only sid=1234
|
||||
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
|
||||
|
||||
|
||||
# normalized story URL.
|
||||
# XXX Most sites don't have the /fiction part. Replace all to remove it usually.
|
||||
self._setURL('http://' + self.getSiteDomain() + '/fanfiction/viewstory.php?sid='+self.story.getMetadata('storyId'))
|
||||
|
||||
# Each adapter needs to have a unique site abbreviation.
|
||||
self.story.setMetadata('siteabbrev','afcff') # XXX
|
||||
|
||||
# The date format will vary from site to site.
|
||||
# http://docs.python.org/library/datetime.html#strftime-strptime-behavior
|
||||
self.dateformat = "%d/%m/%y" # XXX
|
||||
|
||||
@staticmethod # must be @staticmethod, don't remove it.
|
||||
def getSiteDomain():
|
||||
# The site domain. Does have www here, if it uses it.
|
||||
return 'www.artemis-fowl.com' # XXX
|
||||
|
||||
@classmethod
|
||||
def getSiteExampleURLs(cls):
|
||||
return "http://"+cls.getSiteDomain()+"/fanfiction/viewstory.php?sid=1234"
|
||||
|
||||
def getSiteURLPattern(self):
|
||||
return re.escape("http://"+self.getSiteDomain()+"/fanfiction/viewstory.php?sid=")+r"\d+$"
|
||||
|
||||
## Login seems to be reasonably standard across eFiction sites.
|
||||
def needToLoginCheck(self, data):
|
||||
if 'Registered Users Only' in data \
|
||||
or 'There is no such account on our website' in data \
|
||||
or "That password doesn't match the one in our database" in data:
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
|
||||
def performLogin(self, url):
|
||||
params = {}
|
||||
|
||||
if self.password:
|
||||
params['penname'] = self.username
|
||||
params['password'] = self.password
|
||||
else:
|
||||
params['penname'] = self.getConfig("username")
|
||||
params['password'] = self.getConfig("password")
|
||||
params['cookiecheck'] = '1'
|
||||
params['submit'] = 'Submit'
|
||||
|
||||
loginUrl = 'http://' + self.getSiteDomain() + '/user.php?action=login'
|
||||
logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
|
||||
params['penname']))
|
||||
|
||||
d = self._fetchUrl(loginUrl, params)
|
||||
|
||||
if "Member Account" not in d : #Member Account
|
||||
logger.info("Failed to login to URL %s as %s" % (loginUrl,
|
||||
params['penname']))
|
||||
raise exceptions.FailedToLogin(url,params['penname'])
|
||||
return False
|
||||
else:
|
||||
return True
|
||||
|
||||
## Getting the chapter list and the meta data, plus 'is adult' checking.
|
||||
def extractChapterUrlsAndMetadata(self):
|
||||
|
||||
if self.is_adult or self.getConfig("is_adult"):
|
||||
# Weirdly, different sites use different warning numbers.
|
||||
# If the title search below fails, there's a good chance
|
||||
# you need a different number. print data at that point
|
||||
# and see what the 'click here to continue' url says.
|
||||
addurl = "&warning=5"
|
||||
else:
|
||||
addurl=""
|
||||
|
||||
# index=1 makes sure we see the story chapter index. Some
|
||||
# sites skip that for one-chapter stories.
|
||||
url = self.url+'&index=1'+addurl
|
||||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
except urllib2.HTTPError, e:
|
||||
if e.code == 404:
|
||||
raise exceptions.StoryDoesNotExist(self.url)
|
||||
else:
|
||||
raise e
|
||||
|
||||
if self.needToLoginCheck(data):
|
||||
# need to log in for this one.
|
||||
self.performLogin(url)
|
||||
data = self._fetchUrl(url)
|
||||
|
||||
# Since the warning text can change by warning level, let's
|
||||
# look for the warning pass url. ksarchive uses
|
||||
# &warning= -- actually, so do other sites. Must be an
|
||||
# eFiction book.
|
||||
|
||||
# fanfiction/viewstory.php?sid=1882&warning=4
|
||||
# fanfiction/viewstory.php?sid=1654&ageconsent=ok&warning=2
|
||||
#print data
|
||||
m = re.search(r"'fanfiction/viewstory.php\?sid=10(&warning=5)'",data)
|
||||
m = re.search(r"'fanfiction/viewstory.php\?sid=\d+((?:&ageconsent=ok)?&warning=\d+)'",data)
|
||||
if m != None:
|
||||
if self.is_adult or self.getConfig("is_adult"):
|
||||
# We tried the default and still got a warning, so
|
||||
# let's pull the warning number from the 'continue'
|
||||
# link and reload data.
|
||||
addurl = m.group(1)
|
||||
# correct stupid & error in url.
|
||||
addurl = addurl.replace("&","&")
|
||||
url = self.url+'&index=1'+addurl
|
||||
logger.debug("URL 2nd try: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
except urllib2.HTTPError, e:
|
||||
if e.code == 404:
|
||||
raise exceptions.StoryDoesNotExist(self.url)
|
||||
else:
|
||||
raise e
|
||||
else:
|
||||
raise exceptions.AdultCheckRequired(self.url)
|
||||
|
||||
if "Access denied. This story has not been validated by the adminstrators of this site." in data:
|
||||
raise exceptions.FailedToDownload(self.getSiteDomain() +" says: Access denied. This story has not been validated by the adminstrators of this site.")
|
||||
|
||||
# use BeautifulSoup HTML parser to make everything easier to find.
|
||||
soup = self.make_soup(data)
|
||||
# print data
|
||||
|
||||
# Now go hunting for all the meta data and the chapter list.
|
||||
|
||||
pagetitle = soup.find('div',{'id':'pagetitle'})
|
||||
|
||||
## Title
|
||||
a = pagetitle.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$"))
|
||||
|
||||
self.story.setMetadata('title',stripHTML(a))
|
||||
|
||||
# Find authorid and URL from... author url.
|
||||
a = pagetitle.find('a', href=re.compile(r"viewuser.php\?uid=\d+"))
|
||||
self.story.setMetadata('authorId',a['href'].split('=')[1])
|
||||
self.story.setMetadata('authorUrl','http://'+self.host+'/'+a['href'])
|
||||
self.story.setMetadata('author',a.string)
|
||||
|
||||
# Find the chapters:
|
||||
for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"&chapter=\d+$")):
|
||||
# just in case there's tags, like <i> in chapter titles.
|
||||
self.chapterUrls.append((stripHTML(chapter),'http://'+self.host+'/fanfiction/'+chapter['href']+addurl))
|
||||
|
||||
self.story.setMetadata('numChapters',len(self.chapterUrls))
|
||||
|
||||
# eFiction sites don't help us out a lot with their meta data
|
||||
# formating, so it's a little ugly.
|
||||
|
||||
# utility method
|
||||
def defaultGetattr(d,k):
|
||||
try:
|
||||
return d[k]
|
||||
except:
|
||||
return ""
|
||||
|
||||
# <span class="label">Rated:</span> NC-17<br /> etc
|
||||
labels = soup.findAll('span',{'class':'label'})
|
||||
for labelspan in labels:
|
||||
value = labelspan.nextSibling
|
||||
label = labelspan.string
|
||||
|
||||
if 'Summary' in label:
|
||||
## Everything until the next span class='label'
|
||||
svalue = ""
|
||||
while 'label' not in defaultGetattr(value,'class'):
|
||||
svalue += unicode(value)
|
||||
value = value.nextSibling
|
||||
self.setDescription(url,svalue)
|
||||
#self.story.setMetadata('description',stripHTML(svalue))
|
||||
|
||||
if 'Rated' in label:
|
||||
self.story.setMetadata('rating', value)
|
||||
|
||||
if 'Word count' in label:
|
||||
self.story.setMetadata('numWords', value)
|
||||
|
||||
if 'Categories' in label:
|
||||
cats = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=categories'))
|
||||
for cat in cats:
|
||||
self.story.addToList('category',cat.string)
|
||||
|
||||
if 'Characters' in label:
|
||||
chars = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=characters'))
|
||||
for char in chars:
|
||||
self.story.addToList('characters',char.string)
|
||||
|
||||
if 'Genre' in label:
|
||||
genres = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=1'))
|
||||
for genre in genres:
|
||||
self.story.addToList('genre',genre.string)
|
||||
|
||||
if 'Warnings' in label:
|
||||
warnings = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=3'))
|
||||
for warning in warnings:
|
||||
self.story.addToList('warnings',warning.string)
|
||||
|
||||
if 'Completed' in label:
|
||||
if 'Yes' in value:
|
||||
self.story.setMetadata('status', 'Completed')
|
||||
else:
|
||||
self.story.setMetadata('status', 'In-Progress')
|
||||
|
||||
if 'Published' in label:
|
||||
self.story.setMetadata('datePublished', makeDate(stripHTML(value), self.dateformat))
|
||||
|
||||
if 'Updated' in label:
|
||||
# there's a stray [ at the end.
|
||||
#value = value[0:-1]
|
||||
self.story.setMetadata('dateUpdated', makeDate(stripHTML(value), self.dateformat))
|
||||
|
||||
try:
|
||||
# Find Series name from series URL.
|
||||
a = soup.find('a', href=re.compile(r"fanfiction/viewseries.php\?seriesid=\d+"))
|
||||
series_name = a.string
|
||||
series_url = 'http://'+self.host+'/'+a['href']
|
||||
|
||||
# use BeautifulSoup HTML parser to make everything easier to find.
|
||||
seriessoup = self.make_soup(self._fetchUrl(series_url))
|
||||
storyas = seriessoup.findAll('a', href=re.compile(r'^fanfiction/viewstory.php\?sid=\d+$'))
|
||||
i=1
|
||||
for a in storyas:
|
||||
if a['href'] == ('fanfiction/viewstory.php?sid='+self.story.getMetadata('storyId')):
|
||||
self.setSeries(series_name, i)
|
||||
self.story.setMetadata('seriesUrl',series_url)
|
||||
break
|
||||
i+=1
|
||||
|
||||
except:
|
||||
# I find it hard to care if the series parsing fails
|
||||
pass
|
||||
|
||||
# grab the text for an individual chapter.
|
||||
def getChapterText(self, url):
|
||||
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
soup = self.make_soup(self._fetchUrl(url))
|
||||
|
||||
div = soup.find('div', {'id' : 'story'})
|
||||
|
||||
if None == div:
|
||||
raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url)
|
||||
|
||||
return self.utf8FromSoup(url,div)
|
||||
|
|
@ -1,6 +1,6 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2013 Fanficdownloader team, 2016 FanFicFare team
|
||||
# Copyright 2013 Fanficdownloader team, 2018 FanFicFare team
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
|
|
@ -15,21 +15,18 @@
|
|||
# limitations under the License.
|
||||
#
|
||||
|
||||
import time
|
||||
from __future__ import absolute_import
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import re
|
||||
import urllib2
|
||||
import urlparse
|
||||
import time
|
||||
import os
|
||||
|
||||
from bs4.element import Comment
|
||||
from ..htmlcleanup import stripHTML
|
||||
from .. import exceptions as exceptions
|
||||
import sys
|
||||
|
||||
from base_adapter import BaseSiteAdapter, makeDate
|
||||
# py2 vs py3 transition
|
||||
from ..six.moves.urllib import parse as urlparse
|
||||
|
||||
from .base_adapter import BaseSiteAdapter, makeDate
|
||||
|
||||
def getClass():
|
||||
return ASexStoriesComAdapter
|
||||
|
|
@ -39,14 +36,6 @@ class ASexStoriesComAdapter(BaseSiteAdapter):
|
|||
def __init__(self, config, url):
|
||||
BaseSiteAdapter.__init__(self, config, url)
|
||||
|
||||
self.decode = ["utf8",
|
||||
"Windows-1252",
|
||||
"iso-8859-1"]
|
||||
# 1252 is a superset of iso-8859-1.
|
||||
# Most sites that claim to be
|
||||
# iso-8859-1 (and some that claim to be
|
||||
# utf8) are really windows-1252.
|
||||
|
||||
self.story.setMetadata('siteabbrev','asscom')
|
||||
|
||||
# Extract story ID from base URL, http://www.asexstories.com/Halloween-party-with-the-phantom/
|
||||
|
|
@ -87,16 +76,10 @@ class ASexStoriesComAdapter(BaseSiteAdapter):
|
|||
if not (self.is_adult or self.getConfig("is_adult")):
|
||||
raise exceptions.AdultCheckRequired(self.url)
|
||||
|
||||
try:
|
||||
data1 = self._fetchUrl(self.url)
|
||||
soup1 = self.make_soup(data1)
|
||||
#strip comments from soup
|
||||
[comment.extract() for comment in soup1.find_all(text=lambda text:isinstance(text, Comment))]
|
||||
except urllib2.HTTPError, e:
|
||||
if e.code == 404:
|
||||
raise exceptions.StoryDoesNotExist(self.url)
|
||||
else:
|
||||
raise e
|
||||
data1 = self.get_request(self.url)
|
||||
soup1 = self.make_soup(data1)
|
||||
#strip comments from soup
|
||||
[comment.extract() for comment in soup1.find_all(string=lambda text:isinstance(text, Comment))]
|
||||
|
||||
if 'Page Not Found.' in data1:
|
||||
raise exceptions.StoryDoesNotExist(self.url)
|
||||
|
|
@ -109,7 +92,7 @@ class ASexStoriesComAdapter(BaseSiteAdapter):
|
|||
self.story.setMetadata('title', title.string)
|
||||
|
||||
# Author
|
||||
author = soup1.find('div',{'class':'story-info'}).findAll('div',{'class':'story-info-bl'})[1].find('a')
|
||||
author = soup1.find('div',{'class':'story-info'}).find_all('div',{'class':'story-info-bl'})[1].find('a')
|
||||
authorurl = author['href']
|
||||
self.story.setMetadata('author', author.string)
|
||||
self.story.setMetadata('authorUrl', authorurl)
|
||||
|
|
@ -125,14 +108,11 @@ class ASexStoriesComAdapter(BaseSiteAdapter):
|
|||
description = description.encode('utf-8','ignore').strip()[0:150].decode('utf-8','ignore')
|
||||
self.setDescription(url,'Excerpt from beginning of story: '+description+'...')
|
||||
|
||||
# Get chapter URLs
|
||||
self.chapterUrls = []
|
||||
|
||||
### The first 'chapter' is not listed in the links, so we have to
|
||||
### add it before the rest of the pages, if any
|
||||
self.chapterUrls.append(('1', self.url))
|
||||
self.add_chapter('1', self.url)
|
||||
|
||||
chapterTable = soup1.find('div',{'class':'pages'}).findAll('a')
|
||||
chapterTable = soup1.find('div',{'class':'pages'}).find_all('a')
|
||||
|
||||
if chapterTable is not None:
|
||||
# Multi-chapter story
|
||||
|
|
@ -140,11 +120,11 @@ class ASexStoriesComAdapter(BaseSiteAdapter):
|
|||
for page in chapterTable:
|
||||
chapterTitle = page.string
|
||||
chapterUrl = urlparse.urljoin(self.url, page['href'])
|
||||
self.chapterUrls.append((chapterTitle, chapterUrl))
|
||||
if chapterUrl.startswith(self.url): # there are other URLs in the pages block now.
|
||||
self.add_chapter(chapterTitle, chapterUrl)
|
||||
|
||||
self.story.setMetadata('numChapters', len(self.chapterUrls))
|
||||
|
||||
rated = soup1.find('div',{'class':'story-info'}).findAll('div',{'story-info-bl5'})[0].find('img')['title'].replace('- Rate','').strip()
|
||||
rated = soup1.find('div',{'class':'story-info'}).find_all('div',{'class':'story-info-bl5'})[0].find('img')['title'].replace('- Rate','').strip()
|
||||
self.story.setMetadata('rating',rated)
|
||||
|
||||
self.story.setMetadata('dateUpdated', makeDate('01/01/2001', '%m/%d/%Y'))
|
||||
|
|
@ -157,7 +137,7 @@ class ASexStoriesComAdapter(BaseSiteAdapter):
|
|||
logger.debug('Getting chapter text from <%s>' % url)
|
||||
#logger.info('Getting chapter text from <%s>' % url)
|
||||
|
||||
data1 = self._fetchUrl(url)
|
||||
data1 = self.get_request(url)
|
||||
soup1 = self.make_soup(data1)
|
||||
|
||||
# get story text
|
||||
|
|
@ -170,5 +150,11 @@ class ASexStoriesComAdapter(BaseSiteAdapter):
|
|||
if self.getConfig('strip_text_links'):
|
||||
for anchor in story1('a', {'target': '_blank'}):
|
||||
anchor.replaceWith(anchor.string)
|
||||
## remove ad links in the story text and their following <br>
|
||||
for anchor in story1('a', {'rel': 'nofollow'}):
|
||||
br = anchor.find_next_sibling('br')
|
||||
if br:
|
||||
br.extract()
|
||||
anchor.extract()
|
||||
|
||||
return self.utf8FromSoup(url, story1)
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2011 Fanficdownloader team, 2015 FanFicFare team
|
||||
# Copyright 2011 Fanficdownloader team, 2018 FanFicFare team
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
|
|
@ -16,17 +16,17 @@
|
|||
#
|
||||
|
||||
# Software: eFiction
|
||||
import time
|
||||
from __future__ import absolute_import
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import re
|
||||
import urllib2
|
||||
|
||||
|
||||
from ..htmlcleanup import stripHTML
|
||||
from .. import exceptions as exceptions
|
||||
|
||||
from base_adapter import BaseSiteAdapter, makeDate
|
||||
# py2 vs py3 transition
|
||||
from ..six import text_type as unicode
|
||||
|
||||
from .base_adapter import BaseSiteAdapter, makeDate
|
||||
|
||||
def getClass():
|
||||
return AshwinderSycophantHexComAdapter
|
||||
|
|
@ -38,11 +38,6 @@ class AshwinderSycophantHexComAdapter(BaseSiteAdapter):
|
|||
def __init__(self, config, url):
|
||||
BaseSiteAdapter.__init__(self, config, url)
|
||||
|
||||
self.decode = ["Windows-1252",
|
||||
"utf8"] # 1252 is a superset of iso-8859-1.
|
||||
# Most sites that claim to be
|
||||
# iso-8859-1 (and some that claim to be
|
||||
# utf8) are really windows-1252.
|
||||
self.username = "NoneGiven" # if left empty, site doesn't return any message at all.
|
||||
self.password = ""
|
||||
self.is_adult=False
|
||||
|
|
@ -50,10 +45,10 @@ class AshwinderSycophantHexComAdapter(BaseSiteAdapter):
|
|||
# get storyId from url--url validation guarantees query is only sid=1234
|
||||
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
|
||||
|
||||
|
||||
|
||||
|
||||
# normalized story URL.
|
||||
self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId'))
|
||||
self._setURL('https://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId'))
|
||||
|
||||
# Each adapter needs to have a unique site abbreviation.
|
||||
self.story.setMetadata('siteabbrev','asph')
|
||||
|
|
@ -69,10 +64,10 @@ class AshwinderSycophantHexComAdapter(BaseSiteAdapter):
|
|||
|
||||
@classmethod
|
||||
def getSiteExampleURLs(cls):
|
||||
return "http://"+cls.getSiteDomain()+"/viewstory.php?sid=1234"
|
||||
return "https://"+cls.getSiteDomain()+"/viewstory.php?sid=1234"
|
||||
|
||||
def getSiteURLPattern(self):
|
||||
return re.escape("http://"+self.getSiteDomain()+"/viewstory.php?sid=")+r"\d+$"
|
||||
return r"https?://"+re.escape(self.getSiteDomain()+"/viewstory.php?sid=")+r"\d+$"
|
||||
|
||||
## Login seems to be reasonably standard across eFiction sites.
|
||||
def needToLoginCheck(self, data):
|
||||
|
|
@ -97,11 +92,11 @@ class AshwinderSycophantHexComAdapter(BaseSiteAdapter):
|
|||
params['intent'] = ''
|
||||
params['submit'] = 'Submit'
|
||||
|
||||
loginUrl = 'http://' + self.getSiteDomain() + '/user.php'
|
||||
loginUrl = 'https://' + self.getSiteDomain() + '/user.php'
|
||||
logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
|
||||
params['penname']))
|
||||
|
||||
d = self._fetchUrl(loginUrl, params)
|
||||
d = self.post_request(loginUrl, params)
|
||||
|
||||
if "Logout" not in d : #Member Account
|
||||
logger.info("Failed to login to URL %s as %s" % (loginUrl,
|
||||
|
|
@ -118,61 +113,52 @@ class AshwinderSycophantHexComAdapter(BaseSiteAdapter):
|
|||
url = self.url
|
||||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
except urllib2.HTTPError, e:
|
||||
if e.code == 404:
|
||||
raise exceptions.StoryDoesNotExist(self.url)
|
||||
else:
|
||||
raise e
|
||||
data = self.get_request(url)
|
||||
|
||||
if self.needToLoginCheck(data):
|
||||
# need to log in for this one.
|
||||
self.performLogin(url)
|
||||
data = self._fetchUrl(url)
|
||||
data = self.get_request(url)
|
||||
|
||||
if "Access denied. This story has not been validated by the adminstrators of this site." in data:
|
||||
raise exceptions.AccessDenied(self.getSiteDomain() +" says: Access denied. This story has not been validated by the adminstrators of this site.")
|
||||
|
||||
# use BeautifulSoup HTML parser to make everything easier to find.
|
||||
soup = self.make_soup(data)
|
||||
# print data
|
||||
|
||||
# Now go hunting for all the meta data and the chapter list.
|
||||
|
||||
# Find authorid and URL from... author url.
|
||||
a = soup.find('a', href=re.compile(r"viewuser.php\?uid=\d+"))
|
||||
self.story.setMetadata('authorId',a['href'].split('=')[1])
|
||||
self.story.setMetadata('authorUrl','http://'+self.host+'/'+a['href'])
|
||||
self.story.setMetadata('authorUrl','https://'+self.host+'/'+a['href'])
|
||||
self.story.setMetadata('author',a.string)
|
||||
asoup = self.make_soup(self._fetchUrl(self.story.getMetadata('authorUrl')))
|
||||
|
||||
asoup = self.make_soup(self.get_request(self.story.getMetadata('authorUrl')))
|
||||
|
||||
try:
|
||||
# in case link points somewhere other than the first chapter
|
||||
a = soup.findAll('option')[1]['value']
|
||||
a = soup.find_all('option')[1]['value']
|
||||
self.story.setMetadata('storyId',a.split('=',)[1])
|
||||
url = 'http://'+self.host+'/'+a
|
||||
soup = self.make_soup(self._fetchUrl(url))
|
||||
url = 'https://'+self.host+'/'+a
|
||||
soup = self.make_soup(self.get_request(url))
|
||||
except:
|
||||
pass
|
||||
|
||||
for info in asoup.findAll('table', {'width' : '100%', 'bordercolor' : re.compile(r'#')}):
|
||||
|
||||
for info in asoup.find_all('table', {'width' : '100%', 'bordercolor' : re.compile(r'#')}):
|
||||
a = info.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$"))
|
||||
if a != None:
|
||||
self.story.setMetadata('title',stripHTML(a))
|
||||
break
|
||||
|
||||
|
||||
|
||||
# Find the chapters:
|
||||
chapters=soup.findAll('a', href=re.compile(r'viewstory.php\?sid=\d+&i=1$'))
|
||||
chapters=soup.find_all('a', href=re.compile(r'viewstory.php\?sid=\d+&i=1$'))
|
||||
if len(chapters) == 0:
|
||||
self.chapterUrls.append((self.story.getMetadata('title'),url))
|
||||
self.add_chapter(self.story.getMetadata('title'),url)
|
||||
else:
|
||||
for chapter in chapters:
|
||||
# just in case there's tags, like <i> in chapter titles.
|
||||
self.chapterUrls.append((stripHTML(chapter),'http://'+self.host+'/'+chapter['href']))
|
||||
self.add_chapter(chapter,'https://'+self.host+'/'+chapter['href'])
|
||||
|
||||
self.story.setMetadata('numChapters',len(self.chapterUrls))
|
||||
|
||||
# eFiction sites don't help us out a lot with their meta data
|
||||
# formating, so it's a little ugly.
|
||||
|
|
@ -183,11 +169,11 @@ class AshwinderSycophantHexComAdapter(BaseSiteAdapter):
|
|||
return d.name
|
||||
except:
|
||||
return ""
|
||||
|
||||
cats = info.findAll('a',href=re.compile('categories.php'))
|
||||
|
||||
cats = info.find_all('a',href=re.compile('categories.php'))
|
||||
for cat in cats:
|
||||
self.story.addToList('category',cat.string)
|
||||
|
||||
|
||||
a = info.find('a', href=re.compile(r'reviews.php\?sid='+self.story.getMetadata('storyId')))
|
||||
val = a.nextSibling
|
||||
svalue = ""
|
||||
|
|
@ -199,8 +185,10 @@ class AshwinderSycophantHexComAdapter(BaseSiteAdapter):
|
|||
val = val.nextSibling
|
||||
self.setDescription(url,svalue)
|
||||
|
||||
# <span class="label">Rated:</span> NC-17<br /> etc
|
||||
labels = info.findAll('b')
|
||||
## <td><span class="sb"><b>Published:</b> 04/08/2007</td>
|
||||
|
||||
## one story had <b>Updated...</b> in the description. Restrict to sub-table
|
||||
labels = info.find('table').find_all('b')
|
||||
for labelspan in labels:
|
||||
value = labelspan.nextSibling
|
||||
label = stripHTML(labelspan)
|
||||
|
|
@ -242,8 +230,8 @@ class AshwinderSycophantHexComAdapter(BaseSiteAdapter):
|
|||
def getChapterText(self, url):
|
||||
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
data = self._fetchUrl(url)
|
||||
|
||||
data = self.get_request(url)
|
||||
|
||||
soup = self.make_soup(data) # some chapters seem to be hanging up on those tags, so it is safer to close them
|
||||
|
||||
|
|
|
|||
290
fanficfare/adapters/adapter_asianfanficscom.py
Normal file
290
fanficfare/adapters/adapter_asianfanficscom.py
Normal file
|
|
@ -0,0 +1,290 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
from __future__ import absolute_import
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import re
|
||||
import json
|
||||
from ..htmlcleanup import stripHTML
|
||||
from .. import exceptions as exceptions
|
||||
|
||||
# py2 vs py3 transition
|
||||
|
||||
from .base_adapter import BaseSiteAdapter, makeDate
|
||||
|
||||
def getClass():
|
||||
return AsianFanFicsComAdapter
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class AsianFanFicsComAdapter(BaseSiteAdapter):
|
||||
|
||||
def __init__(self, config, url):
|
||||
BaseSiteAdapter.__init__(self, config, url)
|
||||
|
||||
self.username = ""
|
||||
self.password = ""
|
||||
self.is_adult=False
|
||||
|
||||
# get storyId from url--url validation guarantees query is only sid=1234
|
||||
self.story.setMetadata('storyId',self.parsedUrl.path.split('/',)[3])
|
||||
|
||||
# get storyId from url--url validation guarantees query correct
|
||||
m = re.match(self.getSiteURLPattern(),url)
|
||||
if m:
|
||||
self.story.setMetadata('storyId',m.group('id'))
|
||||
|
||||
# normalized story URL.
|
||||
self._setURL('https://' + self.getSiteDomain() + '/story/view/'+self.story.getMetadata('storyId'))
|
||||
else:
|
||||
raise exceptions.InvalidStoryURL(url,
|
||||
self.getSiteDomain(),
|
||||
self.getSiteExampleURLs())
|
||||
|
||||
# Each adapter needs to have a unique site abbreviation.
|
||||
self.story.setMetadata('siteabbrev','asnff')
|
||||
|
||||
# The date format will vary from site to site.
|
||||
# http://docs.python.org/library/datetime.html#strftime-strptime-behavior
|
||||
self.dateformat = "%Y-%b-%d"
|
||||
|
||||
@staticmethod # must be @staticmethod, don't remove it.
|
||||
def getSiteDomain():
|
||||
# The site domain. Does have www here, if it uses it.
|
||||
return 'www.asianfanfics.com'
|
||||
|
||||
@classmethod
|
||||
def getSiteExampleURLs(cls):
|
||||
return "https://"+cls.getSiteDomain()+"/story/view/123456 https://"+cls.getSiteDomain()+"/story/view/123456/story-title-here https://"+cls.getSiteDomain()+"/story/view/123456/1"
|
||||
|
||||
def getSiteURLPattern(self):
|
||||
return r"https?://"+re.escape(self.getSiteDomain())+r"/story/view/0*(?P<id>\d+)"
|
||||
|
||||
def performLogin(self, url, data):
|
||||
params = {}
|
||||
if self.password:
|
||||
params['username'] = self.username
|
||||
params['password'] = self.password
|
||||
else:
|
||||
params['username'] = self.getConfig("username")
|
||||
params['password'] = self.getConfig("password")
|
||||
|
||||
if not params['username']:
|
||||
raise exceptions.FailedToLogin(url,params['username'])
|
||||
|
||||
params['from_url'] = url
|
||||
# capture token from JS script, not appearing in form now.
|
||||
csrf_token_search = 'csrfToken = "'
|
||||
params['csrf_aff_token'] = data[data.index(csrf_token_search)+len(csrf_token_search):]
|
||||
params['csrf_aff_token'] = params['csrf_aff_token'][:params['csrf_aff_token'].index('"')]
|
||||
|
||||
loginUrl = 'https://' + self.getSiteDomain() + '/login/index'
|
||||
logger.info("Will now login to URL (%s) as (%s)" % (loginUrl, params['username']))
|
||||
|
||||
data = self.post_request(loginUrl, params)
|
||||
soup = self.make_soup(data)
|
||||
if self.loginNeededCheck(data):
|
||||
logger.info('Failed to login to URL %s as %s' % (loginUrl, params['username']))
|
||||
raise exceptions.FailedToLogin(url,params['username'])
|
||||
|
||||
def loginNeededCheck(self,data):
|
||||
return "isLoggedIn = false" in data
|
||||
|
||||
def doStorySubscribe(self, url, soup):
|
||||
subHref = soup.find('a',{'id':'subscribe'})
|
||||
if subHref:
|
||||
#does not work when using https - 403
|
||||
subUrl = 'http://' + self.getSiteDomain() + subHref['href']
|
||||
self.get_request(subUrl)
|
||||
data = self.get_request(url,usecache=False)
|
||||
soup = self.make_soup(data)
|
||||
check = soup.find('div',{'class':'click-to-read-full'})
|
||||
if check:
|
||||
return False
|
||||
else:
|
||||
return soup
|
||||
else:
|
||||
return False
|
||||
|
||||
## Getting the chapter list and the meta data, plus 'is adult' checking.
|
||||
def doExtractChapterUrlsAndMetadata(self,get_cover=True):
|
||||
url = self.url
|
||||
logger.info("url: "+url)
|
||||
soup = None
|
||||
try:
|
||||
data = self.get_request(url)
|
||||
soup = self.make_soup(data)
|
||||
except exceptions.HTTPErrorFFF as e:
|
||||
if e.status_code != 404:
|
||||
raise
|
||||
data = self.decode_data(e.data)
|
||||
|
||||
# logger.debug(data)
|
||||
if not soup or self.loginNeededCheck(data):
|
||||
# always login if not already to avoid lots of headaches
|
||||
self.performLogin(url,data)
|
||||
# refresh website after logging in
|
||||
data = self.get_request(url,usecache=False)
|
||||
soup = self.make_soup(data)
|
||||
|
||||
# subscription check
|
||||
# logger.debug(soup)
|
||||
subCheck = soup.find('div',{'class':'click-to-read-full'})
|
||||
if subCheck and self.getConfig("auto_sub"):
|
||||
subSoup = self.doStorySubscribe(url,soup)
|
||||
if subSoup:
|
||||
soup = subSoup
|
||||
else:
|
||||
raise exceptions.FailedToDownload("Error when subscribing to story. This usually means a change in the website code.")
|
||||
elif subCheck and not self.getConfig("auto_sub"):
|
||||
raise exceptions.FailedToDownload("This story is only available to subscribers. You can subscribe manually on the web site, or set auto_sub:true in personal.ini.")
|
||||
|
||||
## Title
|
||||
a = soup.find('h1', {'id': 'story-title'})
|
||||
self.story.setMetadata('title',stripHTML(a))
|
||||
|
||||
# Find authorid and URL from... author url.
|
||||
mainmeta = soup.find('footer', {'class': 'main-meta'})
|
||||
alist = mainmeta.find('span', string='Author(s)')
|
||||
alist = alist.parent.find_all('a', href=re.compile(r"/profile/u/[^/]+"))
|
||||
for a in alist:
|
||||
self.story.addToList('authorId',a['href'].split('/')[-1])
|
||||
self.story.addToList('authorUrl','https://'+self.host+a['href'])
|
||||
self.story.addToList('author',a.text)
|
||||
|
||||
newestChapter = None
|
||||
self.newestChapterNum = None
|
||||
# Find the chapters:
|
||||
chapters=soup.find('select',{'name':'chapter-nav'})
|
||||
hrefattr=None
|
||||
if chapters:
|
||||
chapters=chapters.find_all('option')
|
||||
hrefattr='value'
|
||||
else: # didn't find <select name='chapter-nav', look for alternative
|
||||
chapters=soup.find('div',{'class':'widget--chapters'}).find_all('a')
|
||||
hrefattr='href'
|
||||
for index, chapter in enumerate(chapters):
|
||||
if chapter.text != 'Foreword' and 'Collapse chapters' not in chapter.text:
|
||||
self.add_chapter(chapter.text,'https://' + self.getSiteDomain() + chapter[hrefattr])
|
||||
# note: AFF cuts off chapter names in list. this gets kind of fixed later on
|
||||
|
||||
|
||||
# find timestamp
|
||||
a = soup.find('span', string='Updated')
|
||||
if a == None:
|
||||
a = soup.find('span', string='Published') # use published date if work was never updated
|
||||
a = a.parent.find('time')
|
||||
chapterDate = makeDate(a['datetime'],self.dateformat)
|
||||
if newestChapter == None or chapterDate > newestChapter:
|
||||
newestChapter = chapterDate
|
||||
self.newestChapterNum = index
|
||||
|
||||
# story status
|
||||
a = mainmeta.find('span', string='Completed')
|
||||
if a:
|
||||
self.story.setMetadata('status', 'Completed')
|
||||
else:
|
||||
self.story.setMetadata('status', 'In-Progress')
|
||||
|
||||
# story description
|
||||
try:
|
||||
jsonlink = soup.find('script',string=re.compile(r'/api/forewords/[0-9]+/foreword_[0-9a-z]+.json')).get_text().split('"')[1] # grabs url from quotation marks
|
||||
fore_json = json.loads(self.get_request(jsonlink))
|
||||
content = self.make_soup(fore_json['post']).find('body') # BS4 adds <html><body> if not present.
|
||||
a = content.find('div', {'id':'story-description'})
|
||||
except:
|
||||
# not all stories have foreward link.
|
||||
a = soup.find('div', {'id':'story-description'})
|
||||
if a:
|
||||
self.setDescription(url,a)
|
||||
|
||||
# story tags
|
||||
a = mainmeta.find('span',string='Tags')
|
||||
if a:
|
||||
tags = a.parent.find_all('a')
|
||||
for tag in tags:
|
||||
self.story.addToList('tags', tag.text)
|
||||
|
||||
# story tags
|
||||
a = mainmeta.find('span',string='Characters')
|
||||
if a:
|
||||
self.story.addToList('characters', a.nextSibling)
|
||||
|
||||
# published on
|
||||
a = soup.find('span', string='Published')
|
||||
a = a.parent.find('time')
|
||||
self.story.setMetadata('datePublished', makeDate(a['datetime'], self.dateformat))
|
||||
|
||||
# updated on
|
||||
a = soup.find('span', string='Updated')
|
||||
if a:
|
||||
a = a.parent.find('time')
|
||||
self.story.setMetadata('dateUpdated', makeDate(a['datetime'], self.dateformat))
|
||||
|
||||
# word count
|
||||
a = soup.find('span', string='Total Word Count')
|
||||
if a:
|
||||
a = a.find_next('span')
|
||||
self.story.setMetadata('numWords', int(a.text.split()[0]))
|
||||
|
||||
# upvote, subs, and views
|
||||
a = soup.find('div',{'class':'title-meta'})
|
||||
spans = a.find_all('span', recursive=False)
|
||||
self.story.setMetadata('upvotes', re.search(r'\(([^)]+)', spans[0].find('span').text).group(1))
|
||||
self.story.setMetadata('subscribers', re.search(r'\(([^)]+)', spans[1].find('span').text).group(1))
|
||||
if len(spans) > 2: # views can be private
|
||||
self.story.setMetadata('views', spans[2].text.split()[0])
|
||||
|
||||
# cover art in the form of a div before chapter content
|
||||
if get_cover:
|
||||
cover_url = ""
|
||||
a = soup.find('div',{'id':'bodyText'})
|
||||
if a:
|
||||
a = a.find('div',{'class':'text-center'})
|
||||
if a:
|
||||
cover_url = a.find('img')['src']
|
||||
self.setCoverImage(url,cover_url)
|
||||
|
||||
# grab the text for an individual chapter
|
||||
def getChapterText(self, url):
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
data = self.get_request(url)
|
||||
soup = self.make_soup(data)
|
||||
# logger.debug(data)
|
||||
|
||||
ageform = soup.select_one('form[action="/account/toggle_age"]')
|
||||
# logger.debug(ageform)
|
||||
if ageform and (self.is_adult or self.getConfig("is_adult")):
|
||||
params = {}
|
||||
params['is_of_age']=ageform.select_one('input#is_of_age')['value']
|
||||
params['current_url']=ageform.select_one('input#current_url')['value']
|
||||
params['csrf_aff_token']=ageform.select_one('input[name="csrf_aff_token"]')['value']
|
||||
loginUrl = 'https://' + self.getSiteDomain() + '/account/mark_over_18'
|
||||
logger.info("Will now toggle age to URL (%s)" % (loginUrl))
|
||||
# logger.debug(params)
|
||||
data = self.post_request(loginUrl, params)
|
||||
soup = self.make_soup(data)
|
||||
# logger.debug(data)
|
||||
|
||||
content = soup.find('div', {'id': 'user-submitted-body'})
|
||||
|
||||
if self.getConfig('inject_chapter_image'):
|
||||
logger.debug("Injecting chapter image")
|
||||
imgdiv = soup.select_one('div#bodyText div.bot-spacer')
|
||||
if imgdiv:
|
||||
content.insert(0, "\n")
|
||||
content.insert(0, imgdiv)
|
||||
content.insert(0, "\n")
|
||||
|
||||
if self.getConfig('inject_chapter_title'):
|
||||
logger.debug("Injecting full-length chapter title")
|
||||
title = soup.find('h1', {'id' : 'chapter-title'}).text
|
||||
newTitle = soup.new_tag('h3')
|
||||
newTitle.string = title
|
||||
content.insert(0, "\n")
|
||||
content.insert(0, newTitle)
|
||||
content.insert(0, "\n")
|
||||
|
||||
return self.utf8FromSoup(url,content)
|
||||
|
|
@ -1,227 +0,0 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2013 Fanficdownloader team, 2015 FanFicFare team
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
# Software: eFiction
|
||||
import time
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import re
|
||||
import urllib2
|
||||
|
||||
|
||||
from ..htmlcleanup import stripHTML
|
||||
from .. import exceptions as exceptions
|
||||
|
||||
from base_adapter import BaseSiteAdapter, makeDate
|
||||
|
||||
def getClass():
|
||||
return Asr3SlashzoneOrgAdapter
|
||||
|
||||
class Asr3SlashzoneOrgAdapter(BaseSiteAdapter):
|
||||
|
||||
def __init__(self, config, url):
|
||||
BaseSiteAdapter.__init__(self, config, url)
|
||||
|
||||
self.decode = ["Windows-1252",
|
||||
"utf8"] # 1252 is a superset of iso-8859-1.
|
||||
# Most sites that claim to be
|
||||
# iso-8859-1 (and some that claim to be
|
||||
# utf8) are really windows-1252.
|
||||
self.username = "NoneGiven" # if left empty, site doesn't return any message at all.
|
||||
self.password = ""
|
||||
self.is_adult=False
|
||||
|
||||
# get storyId from url--url validation guarantees query is only sid=1234
|
||||
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
|
||||
|
||||
|
||||
# normalized story URL.
|
||||
self._setURL('http://' + self.getSiteDomain() + '/archive/viewstory.php?sid='+self.story.getMetadata('storyId'))
|
||||
|
||||
# Each adapter needs to have a unique site abbreviation.
|
||||
self.story.setMetadata('siteabbrev','asr3')
|
||||
|
||||
# The date format will vary from site to site.
|
||||
# http://docs.python.org/library/datetime.html#strftime-strptime-behavior
|
||||
self.dateformat = "%d/%m/%y"
|
||||
|
||||
@staticmethod # must be @staticmethod, don't remove it.
|
||||
def getSiteDomain():
|
||||
# The site domain. Does have www here, if it uses it.
|
||||
return 'asr3.slashzone.org'
|
||||
|
||||
@classmethod
|
||||
def getSiteExampleURLs(cls):
|
||||
return "http://"+cls.getSiteDomain()+"/archive/viewstory.php?sid=1234"
|
||||
|
||||
def getSiteURLPattern(self):
|
||||
return re.escape("http://"+self.getSiteDomain()+"/archive/viewstory.php?sid=")+r"\d+$"
|
||||
|
||||
## Getting the chapter list and the meta data, plus 'is adult' checking.
|
||||
def extractChapterUrlsAndMetadata(self):
|
||||
|
||||
if self.is_adult or self.getConfig("is_adult"):
|
||||
# Weirdly, different sites use different warning numbers.
|
||||
# If the title search below fails, there's a good chance
|
||||
# you need a different number. print data at that point
|
||||
# and see what the 'click here to continue' url says.
|
||||
addurl = "&ageconsent=ok&warning=3"
|
||||
else:
|
||||
addurl=""
|
||||
|
||||
# index=1 makes sure we see the story chapter index. Some
|
||||
# sites skip that for one-chapter stories.
|
||||
url = self.url+'&index=1'+addurl
|
||||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
except urllib2.HTTPError, e:
|
||||
if e.code == 404:
|
||||
raise exceptions.StoryDoesNotExist(self.url)
|
||||
else:
|
||||
raise e
|
||||
|
||||
m = re.search(r"'viewstory.php\?sid=\d+((?:&ageconsent=ok)?&warning=\d+)'",data)
|
||||
if m != None:
|
||||
if self.is_adult or self.getConfig("is_adult"):
|
||||
# We tried the default and still got a warning, so
|
||||
# let's pull the warning number from the 'continue'
|
||||
# link and reload data.
|
||||
addurl = m.group(1)
|
||||
# correct stupid & error in url.
|
||||
addurl = addurl.replace("&","&")
|
||||
url = self.url+'&index=1'+addurl
|
||||
logger.debug("URL 2nd try: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
except urllib2.HTTPError, e:
|
||||
if e.code == 404:
|
||||
raise exceptions.StoryDoesNotExist(self.url)
|
||||
else:
|
||||
raise e
|
||||
else:
|
||||
raise exceptions.AdultCheckRequired(self.url)
|
||||
|
||||
if "Access denied. This story has not been validated by the adminstrators of this site." in data:
|
||||
raise exceptions.AccessDenied(self.getSiteDomain() +" says: Access denied. This story has not been validated by the adminstrators of this site.")
|
||||
|
||||
# use BeautifulSoup HTML parser to make everything easier to find.
|
||||
soup = self.make_soup(data)
|
||||
#print data
|
||||
|
||||
# Now go hunting for all the meta data and the chapter list.
|
||||
|
||||
## Title
|
||||
a = soup.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$"))
|
||||
self.story.setMetadata('title',stripHTML(a))
|
||||
|
||||
# Find authorid and URL from... author url.
|
||||
a = soup.find('a', href=re.compile(r"viewuser.php\?uid=\d+"))
|
||||
self.story.setMetadata('authorId',a['href'].split('=')[1])
|
||||
self.story.setMetadata('authorUrl','http://'+self.host+'/archive/'+a['href'])
|
||||
self.story.setMetadata('author',a.string)
|
||||
|
||||
# Rating
|
||||
rate = stripHTML(soup.find('div',{'id':'pagetitle'}))
|
||||
rate = rate[rate.rindex('[')+1:rate.rindex(']')]
|
||||
self.story.setMetadata('rating', rate)
|
||||
|
||||
# Find the chapters:
|
||||
for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"&chapter=\d+$")):
|
||||
# just in case there's tags, like <i> in chapter titles.
|
||||
self.chapterUrls.append((stripHTML(chapter),'http://'+self.host+'/archive/'+chapter['href']+addurl))
|
||||
|
||||
self.story.setMetadata('numChapters',len(self.chapterUrls))
|
||||
|
||||
# eFiction sites don't help us out a lot with their meta data
|
||||
# formating, so it's a little ugly.
|
||||
|
||||
metadiv = soup.find('div',{'class':'content'})
|
||||
smalldiv = metadiv.find('div',{'class':'small'})
|
||||
|
||||
categorys = smalldiv.parent.findAll('a',href=re.compile(r'browse.php\?type=categories'))
|
||||
for category in categorys:
|
||||
self.story.addToList('category',category.string)
|
||||
|
||||
chars = smalldiv.parent.findAll('a',href=re.compile(r'browse.php\?type=characters'))
|
||||
for char in chars:
|
||||
self.story.addToList('characters',char.string)
|
||||
|
||||
ships = smalldiv.parent.findAll('a',href=re.compile(r'browse\.php\?type=class&type_id=2&classid=1'))
|
||||
for ship in ships:
|
||||
self.story.addToList('ships',ship.string)
|
||||
|
||||
metatext = stripHTML(smalldiv)
|
||||
|
||||
if 'Completed: Yes' in metatext:
|
||||
self.story.setMetadata('status', 'Completed')
|
||||
else:
|
||||
self.story.setMetadata('status', 'In-Progress')
|
||||
|
||||
wordstart=metatext.rindex('Word count:')+12
|
||||
words = metatext[wordstart:metatext.index(' ',wordstart)]
|
||||
self.story.setMetadata('numWords', words)
|
||||
|
||||
datesdiv = soup.find('div',{'class':'bottom'})
|
||||
dates = stripHTML(datesdiv).split()
|
||||
# Published: 04/26/2011 Updated: 03/06/2013
|
||||
self.story.setMetadata('datePublished', makeDate(dates[1], self.dateformat))
|
||||
self.story.setMetadata('dateUpdated', makeDate(dates[3], self.dateformat))
|
||||
|
||||
try:
|
||||
# Find Series name from series URL.
|
||||
a = soup.find('a', href=re.compile(r"viewseries.php\?seriesid=\d+"))
|
||||
series_name = a.string
|
||||
series_url = 'http://'+self.host+'/archive/'+a['href']
|
||||
|
||||
# use BeautifulSoup HTML parser to make everything easier to find.
|
||||
seriessoup = self.make_soup(self._fetchUrl(series_url))
|
||||
# can't use ^viewstory...$ in case of higher rated stories with javascript href.
|
||||
storyas = seriessoup.findAll('a', href=re.compile(r'viewstory.php\?sid=\d+'))
|
||||
i=1
|
||||
for a in storyas:
|
||||
# skip 'report this' and 'TOC' links
|
||||
if 'contact.php' not in a['href'] and 'index' not in a['href']:
|
||||
if a['href'] == ('viewstory.php?sid='+self.story.getMetadata('storyId')):
|
||||
self.setSeries(series_name, i)
|
||||
self.story.setMetadata('seriesUrl',series_url)
|
||||
break
|
||||
i+=1
|
||||
|
||||
except:
|
||||
# I find it hard to care if the series parsing fails
|
||||
pass
|
||||
|
||||
# remove 'small' leaving only summary.
|
||||
smalldiv.extract()
|
||||
self.setDescription(url,metadiv)
|
||||
|
||||
# grab the text for an individual chapter.
|
||||
def getChapterText(self, url):
|
||||
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
soup = self.make_soup(self._fetchUrl(url))
|
||||
|
||||
div = soup.find('div', {'id' : 'story'})
|
||||
|
||||
if None == div:
|
||||
raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url)
|
||||
|
||||
return self.utf8FromSoup(url,div)
|
||||
|
|
@ -1,6 +1,6 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2011 Fanficdownloader team, 2015 FanFicFare team
|
||||
# Copyright 2011 Fanficdownloader team, 2018 FanFicFare team
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
|
|
@ -23,6 +23,7 @@
|
|||
### Fixed the removal of the extra tags from some of the stories and
|
||||
### removed the attributes from the paragraph and span tags
|
||||
###########################################################################
|
||||
from __future__ import absolute_import
|
||||
'''
|
||||
This works, but some of the stories have abysmal formatting, so it would
|
||||
probably need to be edited for reading.
|
||||
|
|
@ -49,16 +50,16 @@ import time
|
|||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import re
|
||||
import urllib
|
||||
import urllib2
|
||||
import sys
|
||||
import urlparse
|
||||
|
||||
from bs4 import Comment
|
||||
from ..htmlcleanup import stripHTML
|
||||
from .. import exceptions as exceptions
|
||||
|
||||
from base_adapter import BaseSiteAdapter, makeDate
|
||||
# py2 vs py3 transition
|
||||
from ..six import text_type as unicode
|
||||
from ..six.moves.urllib import parse as urlparse
|
||||
|
||||
from .base_adapter import BaseSiteAdapter, makeDate
|
||||
|
||||
def getClass():
|
||||
return BDSMLibraryComSiteAdapter
|
||||
|
|
@ -68,13 +69,6 @@ class BDSMLibraryComSiteAdapter(BaseSiteAdapter):
|
|||
def __init__(self, config, url):
|
||||
BaseSiteAdapter.__init__(self, config, url)
|
||||
|
||||
self.decode = ["utf8",
|
||||
"Windows-1252",
|
||||
"iso-8859-1"] # 1252 is a superset of iso-8859-1.
|
||||
# Most sites that claim to be
|
||||
# iso-8859-1 (and some that claim to be
|
||||
# utf8) are really windows-1252.
|
||||
|
||||
self.username = "NoneGiven" # if left empty, site doesn't return any message at all.
|
||||
self.password = ""
|
||||
self.is_adult=False
|
||||
|
|
@ -82,7 +76,7 @@ class BDSMLibraryComSiteAdapter(BaseSiteAdapter):
|
|||
# get storyId from url--url validation guarantees query is only storyid=1234
|
||||
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
|
||||
|
||||
self._setURL('http://{0}/stories/story.php?storyid={1}'.format(self.getSiteDomain(), self.story.getMetadata('storyId')))
|
||||
self._setURL('https://{0}/stories/story.php?storyid={1}'.format(self.getSiteDomain(), self.story.getMetadata('storyId')))
|
||||
|
||||
# Each adapter needs to have a unique site abbreviation.
|
||||
self.story.setMetadata('siteabbrev','bdsmlib')
|
||||
|
|
@ -98,33 +92,19 @@ class BDSMLibraryComSiteAdapter(BaseSiteAdapter):
|
|||
|
||||
@classmethod
|
||||
def getSiteExampleURLs(cls):
|
||||
return "http://"+cls.getSiteDomain()+"/stories/story.php?storyid=1234"
|
||||
return "https://"+cls.getSiteDomain()+"/stories/story.php?storyid=1234"
|
||||
|
||||
def getSiteURLPattern(self):
|
||||
return re.escape("http://"+self.getSiteDomain()+"/stories/story.php?storyid=")+r"\d+$"
|
||||
|
||||
def use_pagecache(self):
|
||||
'''
|
||||
adapters that will work with the page cache need to implement
|
||||
this and change it to True.
|
||||
'''
|
||||
return True
|
||||
return r"https?://"+re.escape(self.getSiteDomain()+"/stories/story.php?storyid=")+r"\d+$"
|
||||
|
||||
def extractChapterUrlsAndMetadata(self):
|
||||
if not (self.is_adult or self.getConfig("is_adult")):
|
||||
raise exceptions.AdultCheckRequired(self.url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(self.url)
|
||||
soup = self.make_soup(data)
|
||||
except urllib2.HTTPError, e:
|
||||
if e.code == 404:
|
||||
raise exceptions.StoryDoesNotExist(self.url)
|
||||
else:
|
||||
raise e
|
||||
|
||||
data = self.get_request(self.url)
|
||||
if 'The story does not exist' in data:
|
||||
raise exceptions.StoryDoesNotExist(self.url)
|
||||
soup = self.make_soup(data)
|
||||
|
||||
# Extract metadata
|
||||
title=soup.title.text.replace('BDSM Library - Story: ','').replace('\\','')
|
||||
|
|
@ -132,47 +112,33 @@ class BDSMLibraryComSiteAdapter(BaseSiteAdapter):
|
|||
|
||||
# Author
|
||||
author = soup.find('a', href=re.compile(r"/stories/author.php\?authorid=\d+"))
|
||||
i = 0
|
||||
while author == None:
|
||||
time.sleep(1)
|
||||
logger.warning('A problem retrieving the author information. Trying Again')
|
||||
try:
|
||||
data = self._fetchUrl(self.url)
|
||||
soup = self.make_soup(data)
|
||||
except urllib2.HTTPError, e:
|
||||
if e.code == 404:
|
||||
raise exceptions.StoryDoesNotExist(self.url)
|
||||
else:
|
||||
raise e
|
||||
author = soup.find('a', href=re.compile(r"/stories/author.php\?authorid=\d+"))
|
||||
i += 1
|
||||
if i == 20:
|
||||
logger.info('Too Many cycles... exiting')
|
||||
sys.exit()
|
||||
|
||||
|
||||
authorurl = urlparse.urljoin(self.url, author['href'])
|
||||
self.story.setMetadata('author', author.text)
|
||||
self.story.setMetadata('authorUrl', authorurl)
|
||||
authorid = author['href'].split('=')[1]
|
||||
self.story.setMetadata('authorId', authorid)
|
||||
if author:
|
||||
authorurl = urlparse.urljoin(self.url, author['href'])
|
||||
self.story.setMetadata('author', author.text)
|
||||
self.story.setMetadata('authorUrl', authorurl)
|
||||
authorid = author['href'].split('=')[1]
|
||||
self.story.setMetadata('authorId', authorid)
|
||||
else:
|
||||
logger.info("Failed to find Author, setting to Anonymous")
|
||||
self.story.setMetadata('author','Anonymous')
|
||||
self.story.setMetadata('authorUrl','https://' + self.getSiteDomain() + '/')
|
||||
self.story.setMetadata('authorId','0')
|
||||
|
||||
# Find the chapters:
|
||||
# The update date is with the chapter links... so we will update it here as well
|
||||
for chapter in soup.findAll('a', href=re.compile(r'/stories/chapter.php\?storyid='+self.story.getMetadata('storyId')+"&chapterid=\d+$")):
|
||||
for chapter in soup.find_all('a', href=re.compile(r'/stories/chapter.php\?storyid='+self.story.getMetadata('storyId')+r"&chapterid=\d+$")):
|
||||
value = chapter.findNext('td').findNext('td').string.replace('(added on','').replace(')','').strip()
|
||||
self.story.setMetadata('dateUpdated', makeDate(value, self.dateformat))
|
||||
self.chapterUrls.append((stripHTML(chapter),'http://'+self.getSiteDomain()+chapter['href']))
|
||||
self.add_chapter(chapter,'https://'+self.getSiteDomain()+chapter['href'])
|
||||
|
||||
self.story.setMetadata('numChapters',len(self.chapterUrls))
|
||||
|
||||
# Get the MetaData
|
||||
# Erotia Tags
|
||||
tags = soup.findAll('a',href=re.compile(r'/stories/search.php\?selectedcode'))
|
||||
tags = soup.find_all('a',href=re.compile(r'/stories/search.php\?selectedcode'))
|
||||
for tag in tags:
|
||||
self.story.addToList('eroticatags',tag.text)
|
||||
|
||||
for td in soup.findAll('td'):
|
||||
for td in soup.find_all('td'):
|
||||
if len(td.text)>0:
|
||||
if 'Added on:' in td.text and '<table' not in unicode(td):
|
||||
value = td.text.replace('Added on:','').strip()
|
||||
|
|
@ -192,7 +158,7 @@ class BDSMLibraryComSiteAdapter(BaseSiteAdapter):
|
|||
#Since each chapter is on 1 page, we don't need to do anything special, just get the content of the page.
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
soup = self.make_soup(self._fetchUrl(url))
|
||||
soup = self.make_soup(self.get_request(url))
|
||||
chaptertag = soup.find('div',{'class' : 'storyblock'})
|
||||
|
||||
# Some of the stories have the chapters in <pre> sections, so have to check for that
|
||||
|
|
@ -203,20 +169,20 @@ class BDSMLibraryComSiteAdapter(BaseSiteAdapter):
|
|||
raise exceptions.FailedToDownload("Error downloading Chapter: {0}! Missing required element!".format(url))
|
||||
|
||||
#strip comments from soup
|
||||
[comment.extract() for comment in chaptertag.findAll(text=lambda text:isinstance(text, Comment))]
|
||||
[comment.extract() for comment in chaptertag.find_all(string=lambda text:isinstance(text, Comment))]
|
||||
|
||||
# BDSM Library basically wraps it's own html around the document,
|
||||
# so we will be removing the script, title and meta content from the
|
||||
# storyblock
|
||||
for tag in chaptertag.findAll('head') + chaptertag.findAll('style') + chaptertag.findAll('title') + chaptertag.findAll('meta') + chaptertag.findAll('o:p') + chaptertag.findAll('link'):
|
||||
for tag in chaptertag.find_all('head') + chaptertag.find_all('style') + chaptertag.find_all('title') + chaptertag.find_all('meta') + chaptertag.find_all('o:p') + chaptertag.find_all('link'):
|
||||
tag.extract()
|
||||
|
||||
for tag in chaptertag.findAll('o:smarttagtype'):
|
||||
for tag in chaptertag.find_all('o:smarttagtype'):
|
||||
tag.name = 'span'
|
||||
|
||||
## I'm going to take the attributes off all of the tags
|
||||
## because they usually refer to the style that we removed above.
|
||||
for tag in chaptertag.findAll(True):
|
||||
for tag in chaptertag.find_all(True):
|
||||
tag.attrs = None
|
||||
|
||||
return self.utf8FromSoup(url,chaptertag)
|
||||
|
|
|
|||
|
|
@ -1,12 +1,15 @@
|
|||
from datetime import timedelta
|
||||
from __future__ import absolute_import
|
||||
import re
|
||||
import urllib2
|
||||
import urlparse
|
||||
|
||||
from bs4 import BeautifulSoup
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
from ..htmlcleanup import stripHTML
|
||||
|
||||
from base_adapter import BaseSiteAdapter, makeDate
|
||||
# py2 vs py3 transition
|
||||
from ..six.moves.urllib import parse as urlparse
|
||||
|
||||
from .base_adapter import BaseSiteAdapter, makeDate
|
||||
from .. import exceptions
|
||||
|
||||
|
||||
|
|
@ -24,7 +27,7 @@ class BloodshedverseComAdapter(BaseSiteAdapter):
|
|||
SITE_ABBREVIATION = 'bvc'
|
||||
SITE_DOMAIN = 'bloodshedverse.com'
|
||||
|
||||
BASE_URL = 'http://' + SITE_DOMAIN + '/'
|
||||
BASE_URL = 'https://' + SITE_DOMAIN + '/'
|
||||
READ_URL_TEMPLATE = BASE_URL + 'stories.php?go=read&no=%s'
|
||||
|
||||
STARTED_DATETIME_FORMAT = '%m/%d/%Y'
|
||||
|
|
@ -40,19 +43,6 @@ class BloodshedverseComAdapter(BaseSiteAdapter):
|
|||
self._setURL(self.READ_URL_TEMPLATE % story_no)
|
||||
self.story.setMetadata('siteabbrev', self.SITE_ABBREVIATION)
|
||||
|
||||
def _customized_fetch_url(self, url, exception=None, parameters=None):
|
||||
if exception:
|
||||
try:
|
||||
data = self._fetchUrl(url, parameters)
|
||||
except urllib2.HTTPError:
|
||||
raise exception(self.url)
|
||||
# Just let self._fetchUrl throw the exception, don't catch and
|
||||
# customize it.
|
||||
else:
|
||||
data = self._fetchUrl(url, parameters)
|
||||
|
||||
return self.make_soup(data)
|
||||
|
||||
@staticmethod
|
||||
def getSiteDomain():
|
||||
return BloodshedverseComAdapter.SITE_DOMAIN
|
||||
|
|
@ -62,7 +52,7 @@ class BloodshedverseComAdapter(BaseSiteAdapter):
|
|||
return cls.READ_URL_TEMPLATE % 1234
|
||||
|
||||
def getSiteURLPattern(self):
|
||||
return re.escape(self.BASE_URL + 'stories.php?go=') + r'(read|chapters)\&(amp;)?no=\d+$'
|
||||
return r'https?://' + re.escape(self.SITE_DOMAIN + '/stories.php?go=') + r'(read|chapters)\&(amp;)?no=\d+$'
|
||||
|
||||
# Override stripURLParameters so the "no" parameter won't get stripped
|
||||
@classmethod
|
||||
|
|
@ -70,7 +60,9 @@ class BloodshedverseComAdapter(BaseSiteAdapter):
|
|||
return url
|
||||
|
||||
def extractChapterUrlsAndMetadata(self):
|
||||
soup = self._customized_fetch_url(self.url)
|
||||
logger.debug("URL: "+self.url)
|
||||
|
||||
soup = self.make_soup(self.get_request(self.url))
|
||||
|
||||
# Since no 404 error code we have to raise the exception ourselves.
|
||||
# A title that is just 'by' indicates that there is no author name
|
||||
|
|
@ -81,14 +73,24 @@ class BloodshedverseComAdapter(BaseSiteAdapter):
|
|||
for option in soup.find('select', {'name': 'chapter'}):
|
||||
title = stripHTML(option)
|
||||
url = self.READ_URL_TEMPLATE % option['value']
|
||||
self.chapterUrls.append((title, url))
|
||||
self.add_chapter(title, url)
|
||||
|
||||
# Reset the storyId to be the first chapter no. Needed
|
||||
# because emails contain link to later chapters instead.
|
||||
query_data = urlparse.parse_qs(self.get_chapter(0,'url'))
|
||||
story_no = query_data['no'][0]
|
||||
|
||||
self.story.setMetadata('storyId', story_no)
|
||||
self._setURL(self.READ_URL_TEMPLATE % story_no)
|
||||
logger.info("updated storyId:%s"%story_no)
|
||||
logger.info("updated storyUrl:%s"%self.url)
|
||||
|
||||
story_no = self.story.getMetadata('storyId')
|
||||
# Get the URL to the author's page and find the correct story entry to
|
||||
# scrape the metadata
|
||||
author_url = urlparse.urljoin(self.url, soup.find('a', {'class': 'headline'})['href'])
|
||||
soup = self._customized_fetch_url(author_url)
|
||||
soup = self.make_soup(self.get_request(author_url))
|
||||
|
||||
story_no = self.story.getMetadata('storyId')
|
||||
# Ignore first list_box div, it only contains the author information
|
||||
for list_box in soup('div', {'class': 'list_box'})[1:]:
|
||||
url = list_box.find('a', {'class': 'fictitle'})['href']
|
||||
|
|
@ -115,7 +117,7 @@ class BloodshedverseComAdapter(BaseSiteAdapter):
|
|||
|
||||
summary_div = list_box.find('div', {'class': 'list_summary'})
|
||||
if not self.getConfig('keep_summary_html'):
|
||||
summary = ''.join(summary_div(text=True))
|
||||
summary = ''.join(summary_div(string=True))
|
||||
else:
|
||||
summary = self.utf8FromSoup(author_url, summary_div)
|
||||
|
||||
|
|
@ -155,9 +157,6 @@ class BloodshedverseComAdapter(BaseSiteAdapter):
|
|||
|
||||
self.story.addToList('warnings', warning)
|
||||
|
||||
elif key == 'Chapters':
|
||||
self.story.setMetadata('numChapters', int(value))
|
||||
|
||||
elif key == 'Words':
|
||||
# Apparently only numChapters need to be an integer for
|
||||
# some strange reason. Remove possible ',' characters as to
|
||||
|
|
@ -172,12 +171,13 @@ class BloodshedverseComAdapter(BaseSiteAdapter):
|
|||
# ugly %p(am/pm) hack moved into makeDate so other sites can use it.
|
||||
self.story.setMetadata('dateUpdated', date)
|
||||
|
||||
if self.story.getMetadata('rating') == 'NC-17' and not (self.is_adult or self.getConfig('is_adult')):
|
||||
if self.story.getMetadataRaw('rating') == 'NC-17' and not (self.is_adult or self.getConfig('is_adult')):
|
||||
raise exceptions.AdultCheckRequired(self.url)
|
||||
|
||||
def getChapterText(self, url):
|
||||
soup = self._customized_fetch_url(url)
|
||||
storytext_div = soup.find('div', {'class': 'storytext'})
|
||||
soup = self.make_soup(self.get_request(url))
|
||||
storytext_div = soup.find('div', {'class': 'tl'})
|
||||
storytext_div = storytext_div.find('div', {'class': ''})
|
||||
|
||||
if self.getConfig('strip_text_links'):
|
||||
for anchor in storytext_div('a', {'class': 'FAtxtL'}):
|
||||
|
|
|
|||
|
|
@ -1,330 +0,0 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2011 Fanficdownloader team, 2015 FanFicFare team
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
# Software: eFiction
|
||||
import time
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import re
|
||||
import urllib2
|
||||
|
||||
from bs4.element import Tag
|
||||
from ..htmlcleanup import stripHTML
|
||||
from .. import exceptions as exceptions
|
||||
|
||||
from base_adapter import BaseSiteAdapter, makeDate
|
||||
|
||||
# By virtue of being recent and requiring both is_adult and user/pass,
|
||||
# adapter_fanficcastletvnet.py is the best choice for learning to
|
||||
# write adapters--especially for sites that use the eFiction system.
|
||||
# Most sites that have ".../viewstory.php?sid=123" in the story URL
|
||||
# are eFiction.
|
||||
|
||||
# For non-eFiction sites, it can be considerably more complex, but
|
||||
# this is still a good starting point.
|
||||
|
||||
# In general an 'adapter' needs to do these five things:
|
||||
|
||||
# - 'Register' correctly with the downloader
|
||||
# - Site Login (if needed)
|
||||
# - 'Are you adult?' check (if needed--some do one, some the other, some both)
|
||||
# - Grab the chapter list
|
||||
# - Grab the story meta-data (some (non-eFiction) adapters have to get it from the author page)
|
||||
# - Grab the chapter texts
|
||||
|
||||
# Search for XXX comments--that's where things are most likely to need changing.
|
||||
|
||||
# This function is called by the downloader in all adapter_*.py files
|
||||
# in this dir to register the adapter class. So it needs to be
|
||||
# updated to reflect the class below it. That, plus getSiteDomain()
|
||||
# take care of 'Registering'.
|
||||
def getClass():
|
||||
return BloodTiesFansComAdapter # XXX
|
||||
|
||||
# Class name has to be unique. Our convention is camel case the
|
||||
# sitename with Adapter at the end. www is skipped.
|
||||
class BloodTiesFansComAdapter(BaseSiteAdapter): # XXX
|
||||
|
||||
def __init__(self, config, url):
|
||||
BaseSiteAdapter.__init__(self, config, url)
|
||||
|
||||
self.decode = ["Windows-1252",
|
||||
"utf8"] # 1252 is a superset of iso-8859-1.
|
||||
# Most sites that claim to be
|
||||
# iso-8859-1 (and some that claim to be
|
||||
# utf8) are really windows-1252.
|
||||
self.is_adult=False
|
||||
|
||||
# get storyId from url--url validation guarantees query is only sid=1234
|
||||
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
|
||||
|
||||
|
||||
# normalized story URL.
|
||||
# XXX Most sites don't have the /fanfic part. Replace all to remove it usually.
|
||||
self._setURL('http://' + self.getSiteDomain() + '/fiction/viewstory.php?sid='+self.story.getMetadata('storyId'))
|
||||
|
||||
# Each adapter needs to have a unique site abbreviation.
|
||||
self.story.setMetadata('siteabbrev','btf') # XXX
|
||||
|
||||
# The date format will vary from site to site.
|
||||
# http://docs.python.org/library/datetime.html#strftime-strptime-behavior
|
||||
self.dateformat = "%d %b %Y" # XXX
|
||||
|
||||
@staticmethod # must be @staticmethod, don't remove it.
|
||||
def getSiteDomain():
|
||||
# The site domain. Does have www here, if it uses it.
|
||||
return 'bloodties-fans.com' # XXX
|
||||
|
||||
@classmethod
|
||||
def getSiteExampleURLs(cls):
|
||||
return "http://"+cls.getSiteDomain()+"/fiction/viewstory.php?sid=1234"
|
||||
|
||||
def getSiteURLPattern(self):
|
||||
return re.escape("http://"+self.getSiteDomain()+"/fiction/viewstory.php?sid=")+r"\d+$"
|
||||
|
||||
## Login seems to be reasonably standard across eFiction sites.
|
||||
def needToLoginCheck(self, data):
|
||||
if 'Registered Users Only' in data \
|
||||
or 'There is no such account on our website' in data \
|
||||
or "That password doesn't match the one in our database" in data:
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
|
||||
def performLogin(self, url):
|
||||
params = {}
|
||||
|
||||
if self.password:
|
||||
params['penname'] = self.username
|
||||
params['password'] = self.password
|
||||
else:
|
||||
params['penname'] = self.getConfig("username")
|
||||
params['password'] = self.getConfig("password")
|
||||
params['cookiecheck'] = '1'
|
||||
params['submit'] = 'Submit'
|
||||
|
||||
loginUrl = 'http://' + self.getSiteDomain() + '/fiction/user.php?action=login'
|
||||
logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
|
||||
params['penname']))
|
||||
|
||||
d = self._fetchUrl(loginUrl, params)
|
||||
|
||||
if "Member Account" not in d : #Member Account
|
||||
logger.info("Failed to login to URL %s as %s" % (loginUrl,
|
||||
params['penname']))
|
||||
raise exceptions.FailedToLogin(url,params['penname'])
|
||||
return False
|
||||
else:
|
||||
return True
|
||||
|
||||
## Getting the chapter list and the meta data, plus 'is adult' checking.
|
||||
def extractChapterUrlsAndMetadata(self):
|
||||
|
||||
if self.is_adult or self.getConfig("is_adult"):
|
||||
# Weirdly, different sites use different warning numbers.
|
||||
# If the title search below fails, there's a good chance
|
||||
# you need a different number. print data at that point
|
||||
# and see what the 'click here to continue' url says.
|
||||
|
||||
# Furthermore, there's a couple sites now with more than
|
||||
# one warning level for different ratings. And they're
|
||||
# fussy about it. midnightwhispers has three: 4, 2 & 1.
|
||||
# we'll try 1 first.
|
||||
addurl = "&ageconsent=ok&warning=4" # XXX
|
||||
else:
|
||||
addurl=""
|
||||
|
||||
# index=1 makes sure we see the story chapter index. Some
|
||||
# sites skip that for one-chapter stories.
|
||||
url = self.url+'&index=1'+addurl
|
||||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
except urllib2.HTTPError, e:
|
||||
if e.code == 404:
|
||||
raise exceptions.StoryDoesNotExist(self.url)
|
||||
else:
|
||||
raise e
|
||||
|
||||
# The actual text that is used to announce you need to be an
|
||||
# adult varies from site to site. Again, print data before
|
||||
# the title search to troubleshoot.
|
||||
|
||||
# Since the warning text can change by warning level, let's
|
||||
# look for the warning pass url. nfacommunity uses
|
||||
# &warning= -- actually, so do other sites. Must be an
|
||||
# eFiction book.
|
||||
|
||||
# viewstory.php?sid=561&warning=4
|
||||
# viewstory.php?sid=561&warning=1
|
||||
# viewstory.php?sid=561&warning=2
|
||||
#print data
|
||||
#m = re.search(r"'viewstory.php\?sid=1882(&warning=4)'",data)
|
||||
m = re.search(r"'viewstory.php\?sid=\d+((?:&ageconsent=ok)?&warning=\d+)'",data)
|
||||
if m != None:
|
||||
if self.is_adult or self.getConfig("is_adult"):
|
||||
# We tried the default and still got a warning, so
|
||||
# let's pull the warning number from the 'continue'
|
||||
# link and reload data.
|
||||
addurl = m.group(1)
|
||||
# correct stupid & error in url.
|
||||
addurl = addurl.replace("&","&")
|
||||
url = self.url+'&index=1'+addurl
|
||||
logger.debug("URL 2nd try: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
except urllib2.HTTPError, e:
|
||||
if e.code == 404:
|
||||
raise exceptions.StoryDoesNotExist(self.url)
|
||||
else:
|
||||
raise e
|
||||
else:
|
||||
raise exceptions.AdultCheckRequired(self.url)
|
||||
|
||||
if "Access denied. This story has not been validated by the adminstrators of this site." in data:
|
||||
raise exceptions.AccessDenied(self.getSiteDomain() +" says: Access denied. This story has not been validated by the adminstrators of this site.")
|
||||
|
||||
# use BeautifulSoup HTML parser to make everything easier to find.
|
||||
soup = self.make_soup(data)
|
||||
# print data
|
||||
|
||||
# Now go hunting for all the meta data and the chapter list.
|
||||
|
||||
## Title
|
||||
a = soup.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$"))
|
||||
self.story.setMetadata('title',stripHTML(a))
|
||||
|
||||
# Find authorid and URL from... author url.
|
||||
a = soup.find('a', href=re.compile(r"viewuser.php\?uid=\d+"))
|
||||
self.story.setMetadata('authorId',a['href'].split('=')[1])
|
||||
self.story.setMetadata('authorUrl','http://'+self.host+'/fiction/'+a['href'])
|
||||
self.story.setMetadata('author',a.string)
|
||||
|
||||
# Find the chapters:
|
||||
for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"&chapter=\d+$")):
|
||||
# just in case there's tags, like <i> in chapter titles.
|
||||
self.chapterUrls.append((stripHTML(chapter),'http://'+self.host+'/fiction/'+chapter['href']+addurl))
|
||||
|
||||
self.story.setMetadata('numChapters',len(self.chapterUrls))
|
||||
|
||||
# eFiction sites don't help us out a lot with their meta data
|
||||
# formating, so it's a little ugly.
|
||||
|
||||
# utility method
|
||||
def defaultGetattr(d,k):
|
||||
try:
|
||||
return d[k]
|
||||
except:
|
||||
return ""
|
||||
|
||||
listbox = soup.find('div',{'class':'listbox'})
|
||||
# <strong>Rating:</strong> M<br /> etc
|
||||
labels = listbox.findAll('strong')
|
||||
for labelspan in labels:
|
||||
value = labelspan.nextSibling
|
||||
label = labelspan.string
|
||||
|
||||
if 'Summary' in label:
|
||||
## Everything until the next strong tag.
|
||||
svalue = ""
|
||||
while not isinstance(value,Tag) or value.name != 'strong':
|
||||
svalue += unicode(value)
|
||||
value = value.nextSibling
|
||||
self.setDescription(url,svalue)
|
||||
#self.story.setMetadata('description',stripHTML(svalue))
|
||||
|
||||
if 'Rating' in label:
|
||||
self.story.setMetadata('rating', value)
|
||||
|
||||
if 'Words' in label:
|
||||
value=re.sub(r"\|",r"",value)
|
||||
self.story.setMetadata('numWords', value)
|
||||
|
||||
if 'Categories' in label:
|
||||
cats = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=categories'))
|
||||
catstext = [cat.string for cat in cats]
|
||||
for cat in catstext:
|
||||
self.story.addToList('category',cat.string)
|
||||
|
||||
if 'Characters' in label:
|
||||
chars = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=characters'))
|
||||
charstext = [char.string for char in chars]
|
||||
for char in charstext:
|
||||
self.story.addToList('characters',char.string)
|
||||
|
||||
if 'Completed' in label:
|
||||
if 'Yes' in value:
|
||||
self.story.setMetadata('status', 'Completed')
|
||||
else:
|
||||
self.story.setMetadata('status', 'In-Progress')
|
||||
|
||||
if 'Published' in label:
|
||||
value=re.sub(r"\|",r"",value)
|
||||
self.story.setMetadata('datePublished', makeDate(stripHTML(value), self.dateformat))
|
||||
|
||||
if 'Updated' in label:
|
||||
value=re.sub(r"\|",r"",value)
|
||||
self.story.setMetadata('dateUpdated', makeDate(stripHTML(value), self.dateformat))
|
||||
|
||||
# moved outside because they changed *most*, but not *all* labels to <strong>
|
||||
ships = listbox.findAll('a',href=re.compile(r'browse.php.type=class&(amp;)?type_id=2')) # crappy html: & vs & in url.
|
||||
shipstext = [ship.string for ship in ships]
|
||||
for ship in shipstext:
|
||||
self.story.addToList('ships',ship.string)
|
||||
|
||||
genres = listbox.findAll('a',href=re.compile(r'browse.php\?type=class&(amp;)?type_id=1')) # crappy html: & vs & in url.
|
||||
genrestext = [genre.string for genre in genres]
|
||||
for genre in genrestext:
|
||||
self.story.addToList('genre',genre.string)
|
||||
|
||||
|
||||
try:
|
||||
# Find Series name from series URL.
|
||||
a = soup.find('a', href=re.compile(r"viewseries.php\?seriesid=\d+"))
|
||||
series_name = a.string
|
||||
series_url = 'http://'+self.host+'/fiction/'+a['href']
|
||||
|
||||
# use BeautifulSoup HTML parser to make everything easier to find.
|
||||
seriessoup = self.make_soup(self._fetchUrl(series_url))
|
||||
storyas = seriessoup.findAll('a', href=re.compile(r'^viewstory.php\?sid=\d+$'))
|
||||
i=1
|
||||
for a in storyas:
|
||||
if a['href'] == ('viewstory.php?sid='+self.story.getMetadata('storyId')):
|
||||
self.setSeries(series_name, i)
|
||||
self.story.setMetadata('seriesUrl',series_url)
|
||||
break
|
||||
i+=1
|
||||
|
||||
except:
|
||||
# I find it hard to care if the series parsing fails
|
||||
pass
|
||||
|
||||
# grab the text for an individual chapter.
|
||||
def getChapterText(self, url):
|
||||
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
soup = self.make_soup(self._fetchUrl(url))
|
||||
|
||||
div = soup.find('div', {'id' : 'story'})
|
||||
|
||||
if None == div:
|
||||
raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url)
|
||||
|
||||
return self.utf8FromSoup(url,div)
|
||||
|
|
@ -1,300 +0,0 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2016 FanFicFare team
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
# Software: eFiction
|
||||
import time
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import re
|
||||
import urllib2
|
||||
|
||||
|
||||
from ..htmlcleanup import stripHTML
|
||||
from .. import exceptions as exceptions
|
||||
|
||||
from base_adapter import BaseSiteAdapter, makeDate
|
||||
|
||||
def getClass():
|
||||
return BuffyGilesComAdapter
|
||||
|
||||
# Class name has to be unique. Our convention is camel case the
|
||||
# sitename with Adapter at the end. www is skipped.
|
||||
class BuffyGilesComAdapter(BaseSiteAdapter):
|
||||
|
||||
def __init__(self, config, url):
|
||||
BaseSiteAdapter.__init__(self, config, url)
|
||||
|
||||
self.decode = ["Windows-1252",
|
||||
"utf8"] # 1252 is a superset of iso-8859-1.
|
||||
# Most sites that claim to be
|
||||
# iso-8859-1 (and some that claim to be
|
||||
# utf8) are really windows-1252.
|
||||
self.username = "NoneGiven" # if left empty, site doesn't return any message at all.
|
||||
self.password = ""
|
||||
self.is_adult=False
|
||||
|
||||
# get storyId from url--url validation guarantees query is only sid=1234
|
||||
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
|
||||
|
||||
|
||||
# normalized story URL.
|
||||
# XXX Most sites don't have the /efiction part. Replace all to remove it usually.
|
||||
self._setURL('http://' + self.getSiteDomain() + '/efiction/viewstory.php?sid='+self.story.getMetadata('storyId'))
|
||||
|
||||
# Each adapter needs to have a unique site abbreviation.
|
||||
self.story.setMetadata('siteabbrev','bufg')
|
||||
|
||||
# The date format will vary from site to site.
|
||||
# http://docs.python.org/library/datetime.html#strftime-strptime-behavior
|
||||
self.dateformat = "%d/%m/%y"
|
||||
|
||||
@staticmethod # must be @staticmethod, don't remove it.
|
||||
def getSiteDomain():
|
||||
# The site domain. Does have www here, if it uses it.
|
||||
return 'buffygiles.velocitygrass.com'
|
||||
|
||||
@classmethod
|
||||
def getSiteExampleURLs(cls):
|
||||
return "http://"+cls.getSiteDomain()+"/efiction/viewstory.php?sid=1234"
|
||||
|
||||
def getSiteURLPattern(self):
|
||||
return re.escape("http://"+self.getSiteDomain()+"/efiction/viewstory.php?sid=")+r"\d+$"
|
||||
|
||||
## Login seems to be reasonably standard across eFiction sites.
|
||||
def needToLoginCheck(self, data):
|
||||
if 'Registered Users Only' in data \
|
||||
or 'There is no such account on our website' in data \
|
||||
or "That password doesn't match the one in our database" in data:
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
|
||||
def performLogin(self, url):
|
||||
params = {}
|
||||
|
||||
if self.password:
|
||||
params['penname'] = self.username
|
||||
params['password'] = self.password
|
||||
else:
|
||||
params['penname'] = self.getConfig("username")
|
||||
params['password'] = self.getConfig("password")
|
||||
params['cookiecheck'] = '1'
|
||||
params['submit'] = 'Submit'
|
||||
|
||||
loginUrl = 'http://' + self.getSiteDomain() + '/user.php?action=login'
|
||||
logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
|
||||
params['penname']))
|
||||
|
||||
d = self._fetchUrl(loginUrl, params)
|
||||
|
||||
if "Member Account" not in d : #Member Account
|
||||
logger.info("Failed to login to URL %s as %s" % (loginUrl,
|
||||
params['penname']))
|
||||
raise exceptions.FailedToLogin(url,params['penname'])
|
||||
return False
|
||||
else:
|
||||
return True
|
||||
|
||||
## Getting the chapter list and the meta data, plus 'is adult' checking.
|
||||
def extractChapterUrlsAndMetadata(self):
|
||||
|
||||
if self.is_adult or self.getConfig("is_adult"):
|
||||
# Weirdly, different sites use different warning numbers.
|
||||
# If the title search below fails, there's a good chance
|
||||
# you need a different number. print data at that point
|
||||
# and see what the 'click here to continue' url says.
|
||||
addurl = "&warning=5"
|
||||
else:
|
||||
addurl=""
|
||||
|
||||
# index=1 makes sure we see the story chapter index. Some
|
||||
# sites skip that for one-chapter stories.
|
||||
url = self.url+'&index=1'+addurl
|
||||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
except urllib2.HTTPError, e:
|
||||
if e.code == 404:
|
||||
raise exceptions.StoryDoesNotExist(self.url)
|
||||
else:
|
||||
raise e
|
||||
|
||||
if self.needToLoginCheck(data):
|
||||
# need to log in for this one.
|
||||
self.performLogin(url)
|
||||
data = self._fetchUrl(url)
|
||||
|
||||
# Since the warning text can change by warning level, let's
|
||||
# look for the warning pass url. ksarchive uses
|
||||
# &warning= -- actually, so do other sites. Must be an
|
||||
# eFiction book.
|
||||
|
||||
# efiction/viewstory.php?sid=1882&warning=4
|
||||
# efiction/viewstory.php?sid=1654&ageconsent=ok&warning=5
|
||||
#print data
|
||||
m = re.search(r"'efiction/viewstory.php\?sid=542(&warning=5)'",data)
|
||||
m = re.search(r"'efiction/viewstory.php\?sid=\d+((?:&ageconsent=ok)?&warning=\d+)'",data)
|
||||
if m != None:
|
||||
if self.is_adult or self.getConfig("is_adult"):
|
||||
# We tried the default and still got a warning, so
|
||||
# let's pull the warning number from the 'continue'
|
||||
# link and reload data.
|
||||
addurl = m.group(1)
|
||||
# correct stupid & error in url.
|
||||
addurl = addurl.replace("&","&")
|
||||
url = self.url+'&index=1'+addurl
|
||||
logger.debug("URL 2nd try: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
except urllib2.HTTPError, e:
|
||||
if e.code == 404:
|
||||
raise exceptions.StoryDoesNotExist(self.url)
|
||||
else:
|
||||
raise e
|
||||
else:
|
||||
raise exceptions.AdultCheckRequired(self.url)
|
||||
|
||||
if "Access denied. This story has not been validated by the adminstrators of this site." in data:
|
||||
raise exceptions.FailedToDownload(self.getSiteDomain() +" says: Access denied. This story has not been validated by the adminstrators of this site.")
|
||||
|
||||
# use BeautifulSoup HTML parser to make everything easier to find.
|
||||
soup = self.make_soup(data)
|
||||
# print data
|
||||
|
||||
# Now go hunting for all the meta data and the chapter list.
|
||||
|
||||
pagetitle = soup.find('div',{'id':'pagetitle'})
|
||||
|
||||
## Title
|
||||
a = pagetitle.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$"))
|
||||
|
||||
self.story.setMetadata('title',stripHTML(a))
|
||||
|
||||
# Find authorid and URL from... author url.
|
||||
a = pagetitle.find('a', href=re.compile(r"viewuser.php\?uid=\d+"))
|
||||
self.story.setMetadata('authorId',a['href'].split('=')[1])
|
||||
self.story.setMetadata('authorUrl','http://'+self.host+'/'+a['href'])
|
||||
self.story.setMetadata('author',a.string)
|
||||
|
||||
# Find the chapters:
|
||||
for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"&chapter=\d+$")):
|
||||
# just in case there's tags, like <i> in chapter titles.
|
||||
self.chapterUrls.append((stripHTML(chapter),'http://'+self.host+'/efiction/'+chapter['href']+addurl))
|
||||
|
||||
self.story.setMetadata('numChapters',len(self.chapterUrls))
|
||||
|
||||
# eFiction sites don't help us out a lot with their meta data
|
||||
# formating, so it's a little ugly.
|
||||
|
||||
# utility method
|
||||
def defaultGetattr(d,k):
|
||||
try:
|
||||
return d[k]
|
||||
except:
|
||||
return ""
|
||||
|
||||
# <span class="label">Rated:</span> NC-17<br /> etc
|
||||
labels = soup.findAll('span',{'class':'label'})
|
||||
for labelspan in labels:
|
||||
value = labelspan.nextSibling
|
||||
label = labelspan.string
|
||||
|
||||
if 'Summary' in label:
|
||||
## Everything until the next span class='label'
|
||||
svalue = ""
|
||||
while 'label' not in defaultGetattr(value,'class'):
|
||||
svalue += unicode(value)
|
||||
value = value.nextSibling
|
||||
self.setDescription(url,svalue)
|
||||
#self.story.setMetadata('description',stripHTML(svalue))
|
||||
|
||||
if 'Rated' in label:
|
||||
self.story.setMetadata('rating', value)
|
||||
|
||||
if 'Word count' in label:
|
||||
self.story.setMetadata('numWords', value)
|
||||
|
||||
if 'Categories' in label:
|
||||
cats = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=categories'))
|
||||
for cat in cats:
|
||||
self.story.addToList('category',cat.string)
|
||||
|
||||
if 'Characters' in label:
|
||||
chars = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=characters'))
|
||||
for char in chars:
|
||||
self.story.addToList('characters',char.string)
|
||||
|
||||
if 'Genre' in label:
|
||||
genres = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=1'))
|
||||
for genre in genres:
|
||||
self.story.addToList('genre',genre.string)
|
||||
|
||||
if 'Warnings' in label:
|
||||
warnings = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=3'))
|
||||
for warning in warnings:
|
||||
self.story.addToList('warnings',warning.string)
|
||||
|
||||
if 'Completed' in label:
|
||||
if 'Yes' in value:
|
||||
self.story.setMetadata('status', 'Completed')
|
||||
else:
|
||||
self.story.setMetadata('status', 'In-Progress')
|
||||
|
||||
if 'Published' in label:
|
||||
self.story.setMetadata('datePublished', makeDate(stripHTML(value), self.dateformat))
|
||||
|
||||
if 'Updated' in label:
|
||||
# there's a stray [ at the end.
|
||||
#value = value[0:-1]
|
||||
self.story.setMetadata('dateUpdated', makeDate(stripHTML(value), self.dateformat))
|
||||
|
||||
try:
|
||||
# Find Series name from series URL.
|
||||
a = soup.find('a', href=re.compile(r"efiction/viewseries.php\?seriesid=\d+"))
|
||||
series_name = a.string
|
||||
series_url = 'http://'+self.host+'/'+a['href']
|
||||
|
||||
# use BeautifulSoup HTML parser to make everything easier to find.
|
||||
seriessoup = self.make_soup(self._fetchUrl(series_url))
|
||||
storyas = seriessoup.findAll('a', href=re.compile(r'^efiction/viewstory.php\?sid=\d+$'))
|
||||
i=1
|
||||
for a in storyas:
|
||||
if a['href'] == ('efiction/viewstory.php?sid='+self.story.getMetadata('storyId')):
|
||||
self.setSeries(series_name, i)
|
||||
self.story.setMetadata('seriesUrl',series_url)
|
||||
break
|
||||
i+=1
|
||||
|
||||
except:
|
||||
# I find it hard to care if the series parsing fails
|
||||
pass
|
||||
|
||||
# grab the text for an individual chapter.
|
||||
def getChapterText(self, url):
|
||||
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
soup = self.make_soup(self._fetchUrl(url))
|
||||
|
||||
div = soup.find('div', {'id' : 'story'})
|
||||
|
||||
if None == div:
|
||||
raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url)
|
||||
|
||||
return self.utf8FromSoup(url,div)
|
||||
38
fanficfare/adapters/adapter_cfaa.py
Normal file
38
fanficfare/adapters/adapter_cfaa.py
Normal file
|
|
@ -0,0 +1,38 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2024 FanFicFare team
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
from __future__ import absolute_import
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
from .base_otw_adapter import BaseOTWAdapter
|
||||
|
||||
def getClass():
|
||||
return CFAAAdapter
|
||||
|
||||
class CFAAAdapter(BaseOTWAdapter):
|
||||
|
||||
def __init__(self, config, url):
|
||||
BaseOTWAdapter.__init__(self, config, url)
|
||||
|
||||
# Each adapter needs to have a unique site abbreviation.
|
||||
self.story.setMetadata('siteabbrev','cfaa')
|
||||
|
||||
@staticmethod # must be @staticmethod, don't remove it.
|
||||
def getSiteDomain():
|
||||
# The site domain. Does have www here, if it uses it.
|
||||
return 'www.cfaarchive.org'
|
||||
|
|
@ -1,6 +1,6 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2011 Fanficdownloader team, 2015 FanFicFare team
|
||||
# Copyright 2011 Fanficdownloader team, 2018 FanFicFare team
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
|
|
@ -16,17 +16,17 @@
|
|||
#
|
||||
|
||||
# Software: eFiction
|
||||
import time
|
||||
from __future__ import absolute_import
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import re
|
||||
import urllib2
|
||||
|
||||
|
||||
from ..htmlcleanup import stripHTML
|
||||
from .. import exceptions as exceptions
|
||||
|
||||
from base_adapter import BaseSiteAdapter, makeDate
|
||||
# py2 vs py3 transition
|
||||
from ..six import text_type as unicode
|
||||
|
||||
from .base_adapter import BaseSiteAdapter, makeDate
|
||||
|
||||
def getClass():
|
||||
return ChaosSycophantHexComAdapter
|
||||
|
|
@ -38,11 +38,6 @@ class ChaosSycophantHexComAdapter(BaseSiteAdapter):
|
|||
def __init__(self, config, url):
|
||||
BaseSiteAdapter.__init__(self, config, url)
|
||||
|
||||
self.decode = ["Windows-1252",
|
||||
"utf8"] # 1252 is a superset of iso-8859-1.
|
||||
# Most sites that claim to be
|
||||
# iso-8859-1 (and some that claim to be
|
||||
# utf8) are really windows-1252.
|
||||
self.username = "NoneGiven" # if left empty, site doesn't return any message at all.
|
||||
self.password = ""
|
||||
self.is_adult=False
|
||||
|
|
@ -50,7 +45,7 @@ class ChaosSycophantHexComAdapter(BaseSiteAdapter):
|
|||
# get storyId from url--url validation guarantees query is only sid=1234
|
||||
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
|
||||
|
||||
|
||||
|
||||
|
||||
# normalized story URL.
|
||||
self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId'))
|
||||
|
|
@ -91,13 +86,7 @@ class ChaosSycophantHexComAdapter(BaseSiteAdapter):
|
|||
url = self.url+'&index=1'+addurl
|
||||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
except urllib2.HTTPError, e:
|
||||
if e.code == 404:
|
||||
raise exceptions.StoryDoesNotExist(self.url)
|
||||
else:
|
||||
raise e
|
||||
data = self.get_request(url)
|
||||
|
||||
# The actual text that is used to announce you need to be an
|
||||
# adult varies from site to site. Again, print data before
|
||||
|
|
@ -108,11 +97,9 @@ class ChaosSycophantHexComAdapter(BaseSiteAdapter):
|
|||
if "Access denied. This story has not been validated by the adminstrators of this site." in data:
|
||||
raise exceptions.AccessDenied(self.getSiteDomain() +" says: Access denied. This story has not been validated by the adminstrators of this site.")
|
||||
|
||||
# use BeautifulSoup HTML parser to make everything easier to find.
|
||||
soup = self.make_soup(data)
|
||||
# print data
|
||||
|
||||
# Now go hunting for all the meta data and the chapter list.
|
||||
|
||||
## Title
|
||||
pt = soup.find('div', {'id' : 'pagetitle'})
|
||||
|
|
@ -129,11 +116,10 @@ class ChaosSycophantHexComAdapter(BaseSiteAdapter):
|
|||
self.story.setMetadata('rating', rating)
|
||||
|
||||
# Find the chapters:
|
||||
for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"&chapter=\d+$")):
|
||||
for chapter in soup.find_all('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+r"&chapter=\d+$")):
|
||||
# just in case there's tags, like <i> in chapter titles.
|
||||
self.chapterUrls.append((stripHTML(chapter),'http://'+self.host+'/'+chapter['href']+addurl))
|
||||
self.add_chapter(chapter,'http://'+self.host+'/'+chapter['href']+addurl)
|
||||
|
||||
self.story.setMetadata('numChapters',len(self.chapterUrls))
|
||||
|
||||
# eFiction sites don't help us out a lot with their meta data
|
||||
# formating, so it's a little ugly.
|
||||
|
|
@ -144,12 +130,12 @@ class ChaosSycophantHexComAdapter(BaseSiteAdapter):
|
|||
return d[k]
|
||||
except:
|
||||
return ""
|
||||
|
||||
|
||||
|
||||
# <span class="label">Rated:</span> NC-17<br /> etc
|
||||
|
||||
labels = soup.findAll('span',{'class':'label'})
|
||||
|
||||
|
||||
labels = soup.find_all('span',{'class':'label'})
|
||||
|
||||
value = labels[0].previousSibling
|
||||
svalue = ""
|
||||
while value != None:
|
||||
|
|
@ -159,7 +145,7 @@ class ChaosSycophantHexComAdapter(BaseSiteAdapter):
|
|||
svalue += unicode(val)
|
||||
val = val.nextSibling
|
||||
self.setDescription(url,svalue)
|
||||
|
||||
|
||||
for labelspan in labels:
|
||||
value = labelspan.nextSibling
|
||||
label = labelspan.string
|
||||
|
|
@ -168,22 +154,22 @@ class ChaosSycophantHexComAdapter(BaseSiteAdapter):
|
|||
self.story.setMetadata('numWords', value.split(' -')[0])
|
||||
|
||||
if 'Categories' in label:
|
||||
cats = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=categories'))
|
||||
cats = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=categories'))
|
||||
for cat in cats:
|
||||
self.story.addToList('category',cat.string)
|
||||
|
||||
if 'Characters' in label:
|
||||
chars = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=characters'))
|
||||
chars = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=characters'))
|
||||
for char in chars:
|
||||
self.story.addToList('characters',char.string)
|
||||
|
||||
if 'Genre' in label:
|
||||
genres = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=1'))
|
||||
genres = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=class&type_id=1'))
|
||||
for genre in genres:
|
||||
self.story.addToList('genre',genre.string)
|
||||
|
||||
if 'Warnings' in label:
|
||||
warnings = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=2'))
|
||||
warnings = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=class&type_id=2'))
|
||||
for warning in warnings:
|
||||
self.story.addToList('warnings',warning.string)
|
||||
|
||||
|
|
@ -207,9 +193,8 @@ class ChaosSycophantHexComAdapter(BaseSiteAdapter):
|
|||
series_name = a.string
|
||||
series_url = 'http://'+self.host+'/'+a['href']
|
||||
|
||||
# use BeautifulSoup HTML parser to make everything easier to find.
|
||||
seriessoup = self.make_soup(self._fetchUrl(series_url))
|
||||
storyas = seriessoup.findAll('a', href=re.compile(r'^viewstory.php\?sid=\d+$'))
|
||||
seriessoup = self.make_soup(self.get_request(series_url))
|
||||
storyas = seriessoup.find_all('a', href=re.compile(r'^viewstory.php\?sid=\d+$'))
|
||||
i=1
|
||||
for a in storyas:
|
||||
if a['href'] == ('viewstory.php?sid='+self.story.getMetadata('storyId')):
|
||||
|
|
@ -227,7 +212,7 @@ class ChaosSycophantHexComAdapter(BaseSiteAdapter):
|
|||
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
soup = self.make_soup(self._fetchUrl(url))
|
||||
soup = self.make_soup(self.get_request(url))
|
||||
|
||||
div = soup.find('div', {'id' : 'story'})
|
||||
|
||||
|
|
|
|||
107
fanficfare/adapters/adapter_chireadscom.py
Normal file
107
fanficfare/adapters/adapter_chireadscom.py
Normal file
|
|
@ -0,0 +1,107 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2020 FanFicFare team
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
|
||||
from __future__ import absolute_import
|
||||
import logging
|
||||
import re
|
||||
# py2 vs py3 transition
|
||||
|
||||
from .base_adapter import BaseSiteAdapter, makeDate
|
||||
from fanficfare.htmlcleanup import stripHTML
|
||||
from .. import exceptions as exceptions
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def getClass():
|
||||
return ChireadsComSiteAdapter
|
||||
|
||||
|
||||
class ChireadsComSiteAdapter(BaseSiteAdapter):
|
||||
NEW_DATE_FORMAT = '%Y/%m/%d %H:%M:%S'
|
||||
OLD_DATE_FORMAT = '%m/%d/%Y %I:%M:%S %p'
|
||||
|
||||
def __init__(self, config, url):
|
||||
BaseSiteAdapter.__init__(self, config, url)
|
||||
self.story.setMetadata('siteabbrev', 'chireads')
|
||||
|
||||
# get storyId from url--url validation guarantees query correct
|
||||
match = re.match(self.getSiteURLPattern(), url)
|
||||
if not match:
|
||||
raise exceptions.InvalidStoryURL(url, self.getSiteDomain(), self.getSiteExampleURLs())
|
||||
|
||||
story_id = match.group('id')
|
||||
self.story.setMetadata('storyId', story_id)
|
||||
self._setURL('https://%s/category/translatedtales/%s/' % (self.getSiteDomain(), story_id))
|
||||
|
||||
@staticmethod
|
||||
def getSiteDomain():
|
||||
return 'chireads.com'
|
||||
|
||||
@classmethod
|
||||
def getSiteExampleURLs(cls):
|
||||
return 'https://%s/category/translatedtales/story-name' % cls.getSiteDomain()
|
||||
|
||||
def getSiteURLPattern(self):
|
||||
return r'https?://chireads\.com/category/translatedtales/(?P<id>[^/]+)(/)?'
|
||||
|
||||
def extractChapterUrlsAndMetadata(self):
|
||||
logger.debug('URL: %s', self.url)
|
||||
|
||||
data = self.get_request(self.url)
|
||||
|
||||
soup = self.make_soup(data)
|
||||
info = soup.select_one('.inform-inform-data')
|
||||
self.story.setMetadata('title', stripHTML(info.h3).split(' | ')[0])
|
||||
|
||||
self.setCoverImage(self.url, soup.select_one('.inform-product > img')['src'])
|
||||
|
||||
# Unicode strings because ':' isn't ':', but \xef\xbc\x9a
|
||||
# author = stripHTML(info.h6).split(u' ')[0].replace(u'Auteur : ', '', 1)
|
||||
|
||||
author = stripHTML(info.h6).split('Babelcheck')[0].replace('Auteur : ', '').replace('\xc2\xa0', '')
|
||||
# author = stripHTML(info.h6).split('\xa0')[0].replace(u'Auteur : ', '', 1)
|
||||
self.story.setMetadata('author', author)
|
||||
self.story.setMetadata('authorId', author)
|
||||
## site doesn't have authorUrl links.
|
||||
|
||||
datestr = stripHTML(soup.select_one('.newestchapitre > div > a')['href'])[-11:-1]
|
||||
date = makeDate(datestr, '%Y/%m/%d')
|
||||
if date:
|
||||
self.story.setMetadata('dateUpdated', date)
|
||||
|
||||
intro = stripHTML(info.select_one('.inform-inform-txt').span)
|
||||
self.setDescription(self.url, intro)
|
||||
|
||||
for content in soup.find_all('div', {'id': 'content'}):
|
||||
for a in content.find_all('a'):
|
||||
self.add_chapter(a.get_text(), a['href'])
|
||||
|
||||
|
||||
def getChapterText(self, url):
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
data = self.get_request(url)
|
||||
soup = self.make_soup(data)
|
||||
|
||||
content = soup.select_one('#content')
|
||||
|
||||
if None == content:
|
||||
raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url)
|
||||
|
||||
return self.utf8FromSoup(url,content)
|
||||
|
|
@ -1,6 +1,6 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2012 Fanficdownloader team, 2015 FanFicFare team
|
||||
# Copyright 2012 Fanficdownloader team, 2018 FanFicFare team
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
|
|
@ -16,18 +16,19 @@
|
|||
#
|
||||
|
||||
# Software: eFiction
|
||||
import time
|
||||
from __future__ import absolute_import
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import re
|
||||
import urllib2
|
||||
import sys
|
||||
|
||||
from bs4.element import Comment
|
||||
from ..htmlcleanup import stripHTML
|
||||
from .. import exceptions as exceptions
|
||||
|
||||
from base_adapter import BaseSiteAdapter, makeDate
|
||||
# py2 vs py3 transition
|
||||
from ..six import text_type as unicode
|
||||
|
||||
from .base_adapter import BaseSiteAdapter, makeDate
|
||||
|
||||
def getClass():
|
||||
return ChosenTwoFanFicArchiveAdapter
|
||||
|
|
@ -39,12 +40,6 @@ class ChosenTwoFanFicArchiveAdapter(BaseSiteAdapter):
|
|||
def __init__(self, config, url):
|
||||
BaseSiteAdapter.__init__(self, config, url)
|
||||
|
||||
self.decode = ["Windows-1252",
|
||||
"utf8",
|
||||
"iso-8859-1"] # 1252 is a superset of iso-8859-1.
|
||||
# Most sites that claim to be
|
||||
# iso-8859-1 (and some that claim to be
|
||||
# utf8) are really windows-1252.
|
||||
self.username = "NoneGiven" # if left empty, site doesn't return any message at all.
|
||||
self.password = ""
|
||||
self.is_adult=False
|
||||
|
|
@ -54,7 +49,7 @@ class ChosenTwoFanFicArchiveAdapter(BaseSiteAdapter):
|
|||
|
||||
|
||||
# normalized story URL.
|
||||
self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId'))
|
||||
self._setURL('https://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId'))
|
||||
|
||||
# Each adapter needs to have a unique site abbreviation.
|
||||
self.story.setMetadata('siteabbrev','chosen2')
|
||||
|
|
@ -70,10 +65,10 @@ class ChosenTwoFanFicArchiveAdapter(BaseSiteAdapter):
|
|||
|
||||
@classmethod
|
||||
def getSiteExampleURLs(cls):
|
||||
return "http://"+cls.getSiteDomain()+"/viewstory.php?sid=1234"
|
||||
return "https://"+cls.getSiteDomain()+"/viewstory.php?sid=1234"
|
||||
|
||||
def getSiteURLPattern(self):
|
||||
return re.escape("http://"+self.getSiteDomain()+"/viewstory.php?sid=")+r"\d+$"
|
||||
return r"https?"+re.escape("://"+self.getSiteDomain()+"/viewstory.php?sid=")+r"\d+$"
|
||||
|
||||
## Getting the chapter list and the meta data, plus 'is adult' checking.
|
||||
def extractChapterUrlsAndMetadata(self):
|
||||
|
|
@ -83,19 +78,13 @@ class ChosenTwoFanFicArchiveAdapter(BaseSiteAdapter):
|
|||
addURL = "&ageconsent=ok&warning=3"
|
||||
else:
|
||||
addURL = ""
|
||||
|
||||
|
||||
# index=1 makes sure we see the story chapter index. Some
|
||||
# sites skip that for one-chapter stories.
|
||||
url = '{0}&index=1{1}'.format(self.url,addURL)
|
||||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
except urllib2.HTTPError, e:
|
||||
if e.code == 404:
|
||||
raise exceptions.StoryDoesNotExist(self.url)
|
||||
else:
|
||||
raise e
|
||||
data = self.get_request(url)
|
||||
|
||||
if "Content is only suitable for mature adults. May contain explicit language and adult themes. Equivalent of NC-17." in data:
|
||||
raise exceptions.AdultCheckRequired(self.url)
|
||||
|
|
@ -103,15 +92,13 @@ class ChosenTwoFanFicArchiveAdapter(BaseSiteAdapter):
|
|||
if "Access denied. This story has not been validated by the adminstrators of this site." in data:
|
||||
raise exceptions.AccessDenied("{0} says: Access denied. This story has not been validated by the adminstrators of this site.".format(self.getSiteDomain()))
|
||||
|
||||
# use BeautifulSoup HTML parser to make everything easier to find.
|
||||
soup = self.make_soup(data)
|
||||
|
||||
# Now go hunting for all the meta data and the chapter list.
|
||||
|
||||
## Title
|
||||
## Some stories have a banner that has it's own a tag before the actual text title...
|
||||
## so I'm checking the pagetitle div for all a tags that match the criteria, then taking the last.
|
||||
a = soup.find('div',{'id':'pagetitle'}).findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$"))[-1]
|
||||
a = soup.find('div',{'id':'pagetitle'}).find_all('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$"))[-1]
|
||||
self.story.setMetadata('title',stripHTML(a))
|
||||
|
||||
# Find authorid and URL from... author url.
|
||||
|
|
@ -119,16 +106,15 @@ class ChosenTwoFanFicArchiveAdapter(BaseSiteAdapter):
|
|||
# so I'm checking the pagetitle div for this as well
|
||||
a = soup.find('div',{'id':'pagetitle'}).find('a', href=re.compile(r"viewuser.php\?uid=\d+"))
|
||||
self.story.setMetadata('authorId',a['href'].split('=')[1])
|
||||
self.story.setMetadata('authorUrl','http://'+self.host+'/'+a['href'])
|
||||
self.story.setMetadata('authorUrl','https://'+self.host+'/'+a['href'])
|
||||
self.story.setMetadata('author',a.string)
|
||||
|
||||
# Find the chapters:
|
||||
for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"&chapter=\d+$")):
|
||||
for chapter in soup.find_all('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+r"&chapter=\d+$")):
|
||||
# just in case there's tags, like <i> in chapter titles.
|
||||
#self.chapterUrls.append((stripHTML(chapter),'http://'+self.host+'/'+chapter['href']))
|
||||
self.chapterUrls.append((stripHTML(chapter),'http://{0}/{1}{2}'.format(self.host, chapter['href'],addURL)))
|
||||
#self.add_chapter(chapter,'http://'+self.host+'/'+chapter['href'])
|
||||
self.add_chapter(chapter,'https://{0}/{1}{2}'.format(self.host, chapter['href'],addURL))
|
||||
|
||||
self.story.setMetadata('numChapters',len(self.chapterUrls))
|
||||
|
||||
# eFiction sites don't help us out a lot with their meta data
|
||||
# formating, so it's a little ugly.
|
||||
|
|
@ -141,7 +127,7 @@ class ChosenTwoFanFicArchiveAdapter(BaseSiteAdapter):
|
|||
return ""
|
||||
|
||||
# <span class="label">Rated:</span> NC-17<br /> etc
|
||||
labels = soup.findAll('span',{'class':'label'})
|
||||
labels = soup.find_all('span',{'class':'label'})
|
||||
for labelspan in labels:
|
||||
val = labelspan.nextSibling
|
||||
value = unicode('')
|
||||
|
|
@ -163,27 +149,27 @@ class ChosenTwoFanFicArchiveAdapter(BaseSiteAdapter):
|
|||
self.story.setMetadata('numWords', stripHTML(value))
|
||||
|
||||
if 'Categories' in label:
|
||||
cats = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=categories'))
|
||||
cats = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=categories'))
|
||||
for cat in cats:
|
||||
self.story.addToList('category',cat.string)
|
||||
|
||||
if 'Characters' in label:
|
||||
chars = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=characters'))
|
||||
chars = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=characters'))
|
||||
for char in chars:
|
||||
self.story.addToList('characters',char.string)
|
||||
|
||||
if 'Genre' in label:
|
||||
genres = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=1')) # XXX
|
||||
genres = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=class&type_id=1')) # XXX
|
||||
for genre in genres:
|
||||
self.story.addToList('genre',genre.string)
|
||||
|
||||
if 'Pairing' in label:
|
||||
ships = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=4'))
|
||||
ships = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=class&type_id=4'))
|
||||
for ship in ships:
|
||||
self.story.addToList('ships',ship.string)
|
||||
|
||||
if 'Warnings' in label:
|
||||
warnings = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=2')) # XXX
|
||||
warnings = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=class&type_id=2')) # XXX
|
||||
for warning in warnings:
|
||||
self.story.addToList('warnings',warning.string)
|
||||
|
||||
|
|
@ -206,17 +192,16 @@ class ChosenTwoFanFicArchiveAdapter(BaseSiteAdapter):
|
|||
# Find Series name from series URL.
|
||||
a = soup.find('a', href=re.compile(r"viewseries.php\?seriesid=\d+"))
|
||||
series_name = a.string
|
||||
series_url = 'http://'+self.host+'/'+a['href']
|
||||
series_url = 'https://'+self.host+'/'+a['href']
|
||||
|
||||
# use BeautifulSoup HTML parser to make everything easier to find.
|
||||
seriessoup = self.make_soup(self._fetchUrl(series_url))
|
||||
seriessoup = self.make_soup(self.get_request(series_url))
|
||||
# can't use ^viewstory...$ in case of higher rated stories with javascript href.
|
||||
storyas = seriessoup.findAll('a', href=re.compile(r'viewstory.php\?sid=\d+'))
|
||||
storyas = seriessoup.find_all('a', href=re.compile(r'viewstory.php\?sid=\d+'))
|
||||
i=1
|
||||
for a in storyas:
|
||||
# skip 'report this' and 'TOC' links
|
||||
if 'contact.php' not in a['href'] and 'index' not in a['href']:
|
||||
if a['href'] == ('viewstory.php?sid='+self.story.getMetadata('storyId')):
|
||||
# this site has several links to each story.
|
||||
if a.text == 'Latest Chapter':
|
||||
if ('viewstory.php?sid='+self.story.getMetadata('storyId')) in a['href']:
|
||||
self.setSeries(series_name, i)
|
||||
self.story.setMetadata('seriesUrl',series_url)
|
||||
break
|
||||
|
|
@ -231,7 +216,7 @@ class ChosenTwoFanFicArchiveAdapter(BaseSiteAdapter):
|
|||
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
soup = self.make_soup(self._fetchUrl(url))
|
||||
soup = self.make_soup(self.get_request(url))
|
||||
|
||||
div = soup.find('div', {'id' : 'story'})
|
||||
|
||||
|
|
|
|||
|
|
@ -1,237 +0,0 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2011 Fanficdownloader team, 2015 FanFicFare team
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
import time
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import re
|
||||
import urllib2
|
||||
|
||||
|
||||
from ..htmlcleanup import stripHTML
|
||||
from .. import exceptions as exceptions
|
||||
|
||||
from base_adapter import BaseSiteAdapter, makeDate
|
||||
|
||||
|
||||
def getClass():
|
||||
return CSIForensicsComAdapter
|
||||
|
||||
# Class name has to be unique. Our convention is camel case the
|
||||
# sitename with Adapter at the end. www is skipped.
|
||||
class CSIForensicsComAdapter(BaseSiteAdapter):
|
||||
|
||||
def __init__(self, config, url):
|
||||
BaseSiteAdapter.__init__(self, config, url)
|
||||
|
||||
self.decode = ["Windows-1252",
|
||||
"utf8"] # 1252 is a superset of iso-8859-1.
|
||||
# Most sites that claim to be
|
||||
# iso-8859-1 (and some that claim to be
|
||||
# utf8) are really windows-1252.
|
||||
self.username = "NoneGiven" # if left empty, site doesn't return any message at all.
|
||||
self.password = ""
|
||||
self.is_adult=False
|
||||
|
||||
# get storyId from url--url validation guarantees query is only sid=1234
|
||||
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
|
||||
|
||||
|
||||
|
||||
self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId'))
|
||||
|
||||
# Each adapter needs to have a unique site abbreviation.
|
||||
self.story.setMetadata('siteabbrev','csiforensics')
|
||||
|
||||
# The date format will vary from site to site.
|
||||
# http://docs.python.org/library/datetime.html#strftime-strptime-behavior
|
||||
self.dateformat = "%d %b %Y"
|
||||
|
||||
@staticmethod # must be @staticmethod, don't remove it.
|
||||
def getSiteDomain():
|
||||
# The site domain. Does have www here, if it uses it.
|
||||
return 'csi-forensics.com'
|
||||
|
||||
@classmethod
|
||||
def getSiteExampleURLs(cls):
|
||||
return "http://"+cls.getSiteDomain()+"/viewstory.php?sid=1234"
|
||||
|
||||
def getSiteURLPattern(self):
|
||||
return re.escape("http://"+self.getSiteDomain()+"/viewstory.php?sid=")+r"\d+$"
|
||||
|
||||
## Getting the chapter list and the meta data, plus 'is adult' checking.
|
||||
def extractChapterUrlsAndMetadata(self):
|
||||
|
||||
if self.is_adult or self.getConfig("is_adult"):
|
||||
# Weirdly, different sites use different warning numbers.
|
||||
# If the title search below fails, there's a good chance
|
||||
# you need a different number. print data at that point
|
||||
# and see what the 'click here to continue' url says.
|
||||
addurl = "&ageconsent=ok&warning=5&skin=elegantcsi"
|
||||
else:
|
||||
addurl="&skin=elegantcsi"
|
||||
|
||||
# index=1 makes sure we see the story chapter index. Some
|
||||
# sites skip that for one-chapter stories.
|
||||
url = self.url+'&index=1'+addurl
|
||||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
except urllib2.HTTPError, e:
|
||||
if e.code == 404:
|
||||
raise exceptions.StoryDoesNotExist(self.url)
|
||||
else:
|
||||
raise e
|
||||
|
||||
# The actual text that is used to announce you need to be an
|
||||
# adult varies from site to site. Again, print data before
|
||||
# the title search to troubleshoot.
|
||||
if "This story is rated NC-17, and therefore is not suitable for minors. If you are below the age required to view such material in your locality, please return from whence you came." in data: # XXX
|
||||
raise exceptions.AdultCheckRequired(self.url)
|
||||
|
||||
if "Access denied. This story has not been validated by the adminstrators of this site." in data:
|
||||
raise exceptions.AccessDenied(self.getSiteDomain() +" says: Access denied. This story has not been validated by the adminstrators of this site.")
|
||||
|
||||
# use BeautifulSoup HTML parser to make everything easier to find.
|
||||
soup = self.make_soup(data)
|
||||
# print data
|
||||
|
||||
# Now go hunting for all the meta data and the chapter list.
|
||||
|
||||
## Title
|
||||
|
||||
pt = soup.find('div', {'id' : 'pagetitle'})
|
||||
a = pt.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$"))
|
||||
self.story.setMetadata('title',a.string)
|
||||
|
||||
# Find authorid and URL from... author url.
|
||||
a = soup.find('a', href=re.compile(r"viewuser.php\?uid=\d+"))
|
||||
self.story.setMetadata('authorId',a['href'].split('=')[1])
|
||||
self.story.setMetadata('authorUrl','http://'+self.host+'/'+a['href'])
|
||||
self.story.setMetadata('author',a.string)
|
||||
|
||||
# Rating
|
||||
rate = stripHTML(soup.find('div',{'id':'pagetitle'}))
|
||||
rate = rate[rate.rindex('[')+1:rate.rindex(']')]
|
||||
self.story.setMetadata('rating', rate)
|
||||
|
||||
# Find the chapters:
|
||||
for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"&chapter=\d+$")):
|
||||
# just in case there's tags, like <i> in chapter titles.
|
||||
self.chapterUrls.append((stripHTML(chapter),'http://'+self.host+'/'+chapter['href']+addurl))
|
||||
|
||||
self.story.setMetadata('numChapters',len(self.chapterUrls))
|
||||
|
||||
# eFiction sites don't help us out a lot with their meta data
|
||||
# formating, so it's a little ugly.
|
||||
|
||||
# utility method
|
||||
def defaultGetattr(d,k):
|
||||
try:
|
||||
return d[k]
|
||||
except:
|
||||
return ""
|
||||
|
||||
smalldiv = soup.find('div', {'class' : 'small'})
|
||||
|
||||
|
||||
chars = smalldiv.findAll('a',href=re.compile(r'browse.php\?type=characters'))
|
||||
for char in chars:
|
||||
self.story.addToList('characters',char.string)
|
||||
|
||||
metatext = stripHTML(smalldiv)
|
||||
|
||||
if 'Completed: Yes' in metatext:
|
||||
self.story.setMetadata('status', 'Completed')
|
||||
else:
|
||||
self.story.setMetadata('status', 'In-Progress')
|
||||
|
||||
word=soup.find(text=re.compile("Word count:")).split(':')
|
||||
self.story.setMetadata('numWords', word[1])
|
||||
|
||||
cats = smalldiv.findAll('a',href=re.compile(r'browse.php\?type=categories'))
|
||||
for cat in cats:
|
||||
self.story.addToList('category',cat.string)
|
||||
|
||||
warnings = smalldiv.findAll('a',href=re.compile(r'browse.php\?type=class(&)type_id=2(&)classid=\d+'))
|
||||
for warning in warnings:
|
||||
self.story.addToList('warnings',warning.string)
|
||||
|
||||
date=soup.find('div',{'class' : 'bottom'})
|
||||
pd=date.find(text=re.compile("Published:")).string.split(': ')
|
||||
self.story.setMetadata('datePublished', makeDate(stripHTML(pd[1].split(' U')[0]), self.dateformat))
|
||||
self.story.setMetadata('dateUpdated', makeDate(stripHTML(pd[2]), self.dateformat))
|
||||
|
||||
# <span class="label">Rated:</span> NC-17<br /> etc
|
||||
labels = soup.findAll('span',{'class':'label'})
|
||||
pub=0
|
||||
for labelspan in labels:
|
||||
value = labelspan.nextSibling
|
||||
label = labelspan.string
|
||||
|
||||
if 'Genres' in label:
|
||||
genres = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=1'))
|
||||
for genre in genres:
|
||||
self.story.addToList('genre',genre.string)
|
||||
|
||||
if 'Warnings' in label:
|
||||
warnings = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=2'))
|
||||
for warning in warnings:
|
||||
self.story.addToList('warnings',warning.string)
|
||||
|
||||
try:
|
||||
# Find Series name from series URL.
|
||||
a = soup.find('a', href=re.compile(r"viewseries.php\?seriesid=\d+"))
|
||||
series_name = a.string
|
||||
series_url = 'http://'+self.host+'/'+a['href']
|
||||
|
||||
# use BeautifulSoup HTML parser to make everything easier to find.
|
||||
seriessoup = self.make_soup(self._fetchUrl(series_url))
|
||||
storyas = seriessoup.findAll('a', href=re.compile(r'^viewstory.php\?sid=\d+$'))
|
||||
i=1
|
||||
for a in storyas:
|
||||
if a['href'] == ('viewstory.php?sid='+self.story.getMetadata('storyId')):
|
||||
self.setSeries(series_name, i)
|
||||
self.story.setMetadata('seriesUrl',series_url)
|
||||
break
|
||||
i+=1
|
||||
|
||||
except:
|
||||
# I find it hard to care if the series parsing fails
|
||||
pass
|
||||
|
||||
smalldiv.extract()
|
||||
|
||||
# Summary
|
||||
summary = soup.find('div', {'class' : 'content'})
|
||||
self.setDescription(url,summary)
|
||||
|
||||
# grab the text for an individual chapter.
|
||||
def getChapterText(self, url):
|
||||
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
soup = self.make_soup(self._fetchUrl(url))
|
||||
|
||||
div = soup.find('div', {'id' : 'story'})
|
||||
|
||||
if None == div:
|
||||
raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url)
|
||||
|
||||
return self.utf8FromSoup(url,div)
|
||||
|
||||
|
|
@ -1,6 +1,6 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2011 Fanficdownloader team, 2015 FanFicFare team
|
||||
# Copyright 2011 Fanficdownloader team, 2018 FanFicFare team
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
|
|
@ -15,13 +15,21 @@
|
|||
# limitations under the License.
|
||||
#
|
||||
|
||||
from __future__ import absolute_import
|
||||
from ..htmlcleanup import stripHTML
|
||||
|
||||
# Software: eFiction
|
||||
from base_efiction_adapter import BaseEfictionAdapter
|
||||
from .base_efiction_adapter import BaseEfictionAdapter
|
||||
|
||||
class DarkSolaceOrgAdapter(BaseEfictionAdapter):
|
||||
|
||||
@classmethod
|
||||
def getProtocol(self):
|
||||
"""
|
||||
Some, but not all site now require https.
|
||||
"""
|
||||
return "https"
|
||||
|
||||
@staticmethod
|
||||
def getSiteDomain():
|
||||
return 'dark-solace.org'
|
||||
|
|
|
|||
|
|
@ -1,300 +0,0 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2013 Fanficdownloader team, 2015 FanFicFare team
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
# Software: eFiction
|
||||
import time
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import re
|
||||
import urllib2
|
||||
|
||||
|
||||
from ..htmlcleanup import stripHTML
|
||||
from .. import exceptions as exceptions
|
||||
|
||||
from base_adapter import BaseSiteAdapter, makeDate
|
||||
|
||||
def getClass():
|
||||
return DeepInMySoulNetAdapter ## XXX
|
||||
|
||||
# Class name has to be unique. Our convention is camel case the
|
||||
# sitename with Adapter at the end. www is skipped.
|
||||
class DeepInMySoulNetAdapter(BaseSiteAdapter): # XXX
|
||||
|
||||
def __init__(self, config, url):
|
||||
BaseSiteAdapter.__init__(self, config, url)
|
||||
|
||||
self.decode = ["Windows-1252",
|
||||
"utf8"] # 1252 is a superset of iso-8859-1.
|
||||
# Most sites that claim to be
|
||||
# iso-8859-1 (and some that claim to be
|
||||
# utf8) are really windows-1252.
|
||||
self.username = "NoneGiven" # if left empty, site doesn't return any message at all.
|
||||
self.password = ""
|
||||
self.is_adult=False
|
||||
|
||||
# get storyId from url--url validation guarantees query is only sid=1234
|
||||
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
|
||||
|
||||
|
||||
# normalized story URL.
|
||||
# XXX Most sites don't have the /fiction part. Replace all to remove it usually.
|
||||
self._setURL('http://' + self.getSiteDomain() + '/fiction/viewstory.php?sid='+self.story.getMetadata('storyId'))
|
||||
|
||||
# Each adapter needs to have a unique site abbreviation.
|
||||
self.story.setMetadata('siteabbrev','dimsn') ## XXX
|
||||
|
||||
# The date format will vary from site to site.
|
||||
# http://docs.python.org/library/datetime.html#strftime-strptime-behavior
|
||||
self.dateformat = "%B %d, %Y"
|
||||
|
||||
@staticmethod # must be @staticmethod, don't remove it.
|
||||
def getSiteDomain():
|
||||
# The site domain. Does have www here, if it uses it.
|
||||
return 'www.deepinmysoul.net' # XXX
|
||||
|
||||
@classmethod
|
||||
def getSiteExampleURLs(cls):
|
||||
return "http://"+cls.getSiteDomain()+"/fiction/viewstory.php?sid=1234"
|
||||
|
||||
def getSiteURLPattern(self):
|
||||
return re.escape("http://"+self.getSiteDomain()+"/fiction/viewstory.php?sid=")+r"\d+$"
|
||||
|
||||
## Login seems to be reasonably standard across eFiction sites.
|
||||
def needToLoginCheck(self, data):
|
||||
if 'Registered Users Only' in data \
|
||||
or 'There is no such account on our website' in data \
|
||||
or "That password doesn't match the one in our database" in data:
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
|
||||
def performLogin(self, url):
|
||||
params = {}
|
||||
|
||||
if self.password:
|
||||
params['penname'] = self.username
|
||||
params['password'] = self.password
|
||||
else:
|
||||
params['penname'] = self.getConfig("username")
|
||||
params['password'] = self.getConfig("password")
|
||||
params['cookiecheck'] = '1'
|
||||
params['submit'] = 'Submit'
|
||||
|
||||
loginUrl = 'http://' + self.getSiteDomain() + '/fiction/user.php?action=login'
|
||||
logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
|
||||
params['penname']))
|
||||
|
||||
d = self._fetchUrl(loginUrl, params)
|
||||
|
||||
if "Member Account" not in d : #Member Account
|
||||
logger.info("Failed to login to URL %s as %s" % (loginUrl,
|
||||
params['penname']))
|
||||
raise exceptions.FailedToLogin(url,params['penname'])
|
||||
return False
|
||||
else:
|
||||
return True
|
||||
|
||||
## Getting the chapter list and the meta data, plus 'is adult' checking.
|
||||
def extractChapterUrlsAndMetadata(self):
|
||||
|
||||
if self.is_adult or self.getConfig("is_adult"):
|
||||
# Weirdly, different sites use different warning numbers.
|
||||
# If the title search below fails, there's a good chance
|
||||
# you need a different number. print data at that point
|
||||
# and see what the 'click here to continue' url says.
|
||||
addurl = "&warning=4"
|
||||
else:
|
||||
addurl=""
|
||||
|
||||
# index=1 makes sure we see the story chapter index. Some
|
||||
# sites skip that for one-chapter stories.
|
||||
url = self.url+'&index=1'+addurl
|
||||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
except urllib2.HTTPError, e:
|
||||
if e.code == 404:
|
||||
raise exceptions.StoryDoesNotExist(self.url)
|
||||
else:
|
||||
raise e
|
||||
|
||||
if self.needToLoginCheck(data):
|
||||
# need to log in for this one.
|
||||
self.performLogin(url)
|
||||
data = self._fetchUrl(url)
|
||||
|
||||
# Since the warning text can change by warning level, let's
|
||||
# look for the warning pass url. ksarchive uses
|
||||
# &warning= -- actually, so do other sites. Must be an
|
||||
# eFiction book.
|
||||
|
||||
# fiction/viewstory.php?sid=1882&warning=4
|
||||
# fiction/viewstory.php?sid=1654&ageconsent=ok&warning=5
|
||||
#print data
|
||||
m = re.search(r"'fiction/viewstory.php\?sid=29(&warning=4)'",data)
|
||||
m = re.search(r"'fiction/viewstory.php\?sid=\d+((?:&ageconsent=ok)?&warning=\d+)'",data)
|
||||
if m != None:
|
||||
if self.is_adult or self.getConfig("is_adult"):
|
||||
# We tried the default and still got a warning, so
|
||||
# let's pull the warning number from the 'continue'
|
||||
# link and reload data.
|
||||
addurl = m.group(1)
|
||||
# correct stupid & error in url.
|
||||
addurl = addurl.replace("&","&")
|
||||
url = self.url+'&index=1'+addurl
|
||||
logger.debug("URL 2nd try: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
except urllib2.HTTPError, e:
|
||||
if e.code == 404:
|
||||
raise exceptions.StoryDoesNotExist(self.url)
|
||||
else:
|
||||
raise e
|
||||
else:
|
||||
raise exceptions.AdultCheckRequired(self.url)
|
||||
|
||||
if "Access denied. This story has not been validated by the adminstrators of this site." in data:
|
||||
raise exceptions.FailedToDownload(self.getSiteDomain() +" says: Access denied. This story has not been validated by the adminstrators of this site.")
|
||||
|
||||
# use BeautifulSoup HTML parser to make everything easier to find.
|
||||
soup = self.make_soup(data)
|
||||
|
||||
# Now go hunting for all the meta data and the chapter list.
|
||||
|
||||
pagetitle = soup.find('div',{'id':'pagecontent'})
|
||||
|
||||
## Title
|
||||
a = pagetitle.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$"))
|
||||
|
||||
self.story.setMetadata('title',stripHTML(a))
|
||||
|
||||
# Find authorid and URL from... author url.
|
||||
a = pagetitle.find('a', href=re.compile(r"viewuser.php\?uid=\d+"))
|
||||
self.story.setMetadata('authorId',a['href'].split('=')[1])
|
||||
self.story.setMetadata('authorUrl','http://'+self.host+'/'+a['href'])
|
||||
self.story.setMetadata('author',a.string)
|
||||
|
||||
# Find the chapters:
|
||||
for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"&chapter=\d+$")):
|
||||
# just in case there's tags, like <i> in chapter titles.
|
||||
self.chapterUrls.append((stripHTML(chapter),'http://'+self.host+'/fiction/'+chapter['href']+addurl))
|
||||
|
||||
self.story.setMetadata('numChapters',len(self.chapterUrls))
|
||||
|
||||
# eFiction sites don't help us out a lot with their meta data
|
||||
# formating, so it's a little ugly.
|
||||
|
||||
# utility method
|
||||
def defaultGetattr(d,k):
|
||||
try:
|
||||
return d[k]
|
||||
except:
|
||||
return ""
|
||||
|
||||
# <span class="label">Rated:</span> NC-17<br /> etc
|
||||
labels = soup.findAll('span',{'class':'label'})
|
||||
for labelspan in labels:
|
||||
value = labelspan.nextSibling
|
||||
label = labelspan.string
|
||||
|
||||
if 'Summary' in label:
|
||||
## Everything until the next span class='label'
|
||||
svalue = ""
|
||||
while 'label' not in defaultGetattr(value,'class'):
|
||||
svalue += unicode(value)
|
||||
value = value.nextSibling
|
||||
self.setDescription(url,svalue)
|
||||
#self.story.setMetadata('description',stripHTML(svalue))
|
||||
|
||||
if 'Rated' in label:
|
||||
self.story.setMetadata('rating', value)
|
||||
|
||||
if 'Word count' in label:
|
||||
self.story.setMetadata('numWords', value)
|
||||
|
||||
if 'Categories' in label:
|
||||
cats = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=categories'))
|
||||
for cat in cats:
|
||||
self.story.addToList('category',cat.string)
|
||||
|
||||
if 'Characters' in label:
|
||||
chars = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=characters'))
|
||||
for char in chars:
|
||||
self.story.addToList('characters',char.string)
|
||||
|
||||
if 'Genre' in label:
|
||||
genres = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=1'))
|
||||
for genre in genres:
|
||||
self.story.addToList('genre',genre.string)
|
||||
|
||||
if 'Warnings' in label:
|
||||
warnings = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=3'))
|
||||
for warning in warnings:
|
||||
self.story.addToList('warnings',warning.string)
|
||||
|
||||
if 'Completed' in label:
|
||||
if 'Yes' in value:
|
||||
self.story.setMetadata('status', 'Completed')
|
||||
else:
|
||||
self.story.setMetadata('status', 'In-Progress')
|
||||
|
||||
if 'Published' in label:
|
||||
self.story.setMetadata('datePublished', makeDate(stripHTML(value), self.dateformat))
|
||||
|
||||
if 'Updated' in label:
|
||||
# there's a stray [ at the end.
|
||||
#value = value[0:-1]
|
||||
self.story.setMetadata('dateUpdated', makeDate(stripHTML(value), self.dateformat))
|
||||
|
||||
try:
|
||||
# Find Series name from series URL.
|
||||
a = soup.find('a', href=re.compile(r"fiction/viewseries.php\?seriesid=\d+"))
|
||||
series_name = a.string
|
||||
series_url = 'http://'+self.host+'/'+a['href']
|
||||
|
||||
# use BeautifulSoup HTML parser to make everything easier to find.
|
||||
seriessoup = self.make_soup(self._fetchUrl(series_url))
|
||||
storyas = seriessoup.findAll('a', href=re.compile(r'^fiction/viewstory.php\?sid=\d+$'))
|
||||
i=1
|
||||
for a in storyas:
|
||||
if a['href'] == ('fiction/viewstory.php?sid='+self.story.getMetadata('storyId')):
|
||||
self.setSeries(series_name, i)
|
||||
self.story.setMetadata('seriesUrl',series_url)
|
||||
break
|
||||
i+=1
|
||||
|
||||
except:
|
||||
# I find it hard to care if the series parsing fails
|
||||
pass
|
||||
|
||||
# grab the text for an individual chapter.
|
||||
def getChapterText(self, url):
|
||||
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
soup = self.make_soup(self._fetchUrl(url))
|
||||
|
||||
div = soup.find('div', {'id' : 'story'})
|
||||
|
||||
if None == div:
|
||||
raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url)
|
||||
|
||||
return self.utf8FromSoup(url,div)
|
||||
|
||||
|
|
@ -1,243 +0,0 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2012 Fanficdownloader team, 2015 FanFicFare team
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
# Software: eFiction
|
||||
import time
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import re
|
||||
import urllib2
|
||||
|
||||
|
||||
from ..htmlcleanup import stripHTML
|
||||
from .. import exceptions as exceptions
|
||||
|
||||
from base_adapter import BaseSiteAdapter, makeDate
|
||||
|
||||
def getClass():
|
||||
return DestinysGatewayComAdapter
|
||||
|
||||
# Class name has to be unique. Our convention is camel case the
|
||||
# sitename with Adapter at the end. www is skipped.
|
||||
class DestinysGatewayComAdapter(BaseSiteAdapter):
|
||||
|
||||
def __init__(self, config, url):
|
||||
BaseSiteAdapter.__init__(self, config, url)
|
||||
|
||||
self.decode = ["Windows-1252",
|
||||
"utf8"] # 1252 is a superset of iso-8859-1.
|
||||
# Most sites that claim to be
|
||||
# iso-8859-1 (and some that claim to be
|
||||
# utf8) are really windows-1252.
|
||||
self.username = "NoneGiven" # if left empty, site doesn't return any message at all.
|
||||
self.password = ""
|
||||
self.is_adult=False
|
||||
|
||||
# get storyId from url--url validation guarantees query is only sid=1234
|
||||
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
|
||||
|
||||
|
||||
# normalized story URL.
|
||||
self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId'))
|
||||
|
||||
# Each adapter needs to have a unique site abbreviation.
|
||||
self.story.setMetadata('siteabbrev','dgrfa')
|
||||
|
||||
# The date format will vary from site to site.
|
||||
# http://docs.python.org/library/datetime.html#strftime-strptime-behavior
|
||||
self.dateformat = "%b %d %Y"
|
||||
|
||||
@staticmethod # must be @staticmethod, don't remove it.
|
||||
def getSiteDomain():
|
||||
# The site domain. Does have www here, if it uses it.
|
||||
return 'www.destinysgateway.com'
|
||||
|
||||
@classmethod
|
||||
def getSiteExampleURLs(cls):
|
||||
return "http://"+cls.getSiteDomain()+"/viewstory.php?sid=1234"
|
||||
|
||||
def getSiteURLPattern(self):
|
||||
return re.escape("http://"+self.getSiteDomain()+"/viewstory.php?sid=")+r"\d+$"
|
||||
|
||||
|
||||
## Getting the chapter list and the meta data, plus 'is adult' checking.
|
||||
def extractChapterUrlsAndMetadata(self):
|
||||
|
||||
if self.is_adult or self.getConfig("is_adult"):
|
||||
# Weirdly, different sites use different warning numbers.
|
||||
# If the title search below fails, there's a good chance
|
||||
# you need a different number. print data at that point
|
||||
# and see what the 'click here to continue' url says.
|
||||
addurl = "&warning=4"
|
||||
else:
|
||||
addurl=""
|
||||
|
||||
# index=1 makes sure we see the story chapter index. Some
|
||||
# sites skip that for one-chapter stories.
|
||||
url = self.url+'&index=1'+addurl
|
||||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
except urllib2.HTTPError, e:
|
||||
if e.code == 404:
|
||||
raise exceptions.StoryDoesNotExist(self.url)
|
||||
else:
|
||||
raise e
|
||||
|
||||
m = re.search(r"'viewstory.php\?sid=\d+((?:&ageconsent=ok)?&warning=\d+)'",data)
|
||||
if m != None:
|
||||
if self.is_adult or self.getConfig("is_adult"):
|
||||
# We tried the default and still got a warning, so
|
||||
# let's pull the warning number from the 'continue'
|
||||
# link and reload data.
|
||||
addurl = m.group(1)
|
||||
# correct stupid & error in url.
|
||||
addurl = addurl.replace("&","&")
|
||||
url = self.url+'&index=1'+addurl
|
||||
logger.debug("URL 2nd try: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
except urllib2.HTTPError, e:
|
||||
if e.code == 404:
|
||||
raise exceptions.StoryDoesNotExist(self.url)
|
||||
else:
|
||||
raise e
|
||||
else:
|
||||
raise exceptions.AdultCheckRequired(self.url)
|
||||
|
||||
if "Access denied. This story has not been validated by the adminstrators of this site." in data:
|
||||
raise exceptions.AccessDenied(self.getSiteDomain() +" says: Access denied. This story has not been validated by the adminstrators of this site.")
|
||||
|
||||
# use BeautifulSoup HTML parser to make everything easier to find.
|
||||
soup = self.make_soup(data)
|
||||
# print data
|
||||
|
||||
# Now go hunting for all the meta data and the chapter list.
|
||||
|
||||
## Title
|
||||
a = soup.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$"))
|
||||
self.story.setMetadata('title',stripHTML(a))
|
||||
|
||||
# Find authorid and URL from... author url.
|
||||
a = soup.find('a', href=re.compile(r"viewuser.php\?uid=\d+"))
|
||||
self.story.setMetadata('authorId',a['href'].split('=')[1])
|
||||
self.story.setMetadata('authorUrl','http://'+self.host+'/'+a['href'])
|
||||
self.story.setMetadata('author',a.string)
|
||||
|
||||
# Find the chapters:
|
||||
for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"&chapter=\d+$")):
|
||||
# just in case there's tags, like <i> in chapter titles.
|
||||
self.chapterUrls.append((stripHTML(chapter),'http://'+self.host+'/'+chapter['href']+addurl))
|
||||
|
||||
self.story.setMetadata('numChapters',len(self.chapterUrls))
|
||||
|
||||
# eFiction sites don't help us out a lot with their meta data
|
||||
# formating, so it's a little ugly.
|
||||
|
||||
# utility method
|
||||
def defaultGetattr(d,k):
|
||||
try:
|
||||
return d[k]
|
||||
except:
|
||||
return ""
|
||||
|
||||
# <span class="label">Rated:</span> NC-17<br /> etc
|
||||
labels = soup.findAll('span',{'class':'label'})
|
||||
for labelspan in labels:
|
||||
value = labelspan.nextSibling
|
||||
label = labelspan.string
|
||||
|
||||
if 'Summary' in label:
|
||||
## Everything until the next span class='label'
|
||||
svalue = ""
|
||||
while value and 'label' not in defaultGetattr(value,'class'):
|
||||
svalue += unicode(value)
|
||||
value = value.nextSibling
|
||||
self.setDescription(url,svalue)
|
||||
#self.story.setMetadata('description',stripHTML(svalue))
|
||||
|
||||
if 'Rated' in label:
|
||||
self.story.setMetadata('rating', value)
|
||||
|
||||
if 'Word count' in label:
|
||||
self.story.setMetadata('numWords', value)
|
||||
|
||||
if 'Categories' in label:
|
||||
cats = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=categories'))
|
||||
for cat in cats:
|
||||
self.story.addToList('category',cat.string)
|
||||
|
||||
if 'Genre' in label:
|
||||
genres = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=1'))
|
||||
for genre in genres:
|
||||
self.story.addToList('genre',genre.string)
|
||||
|
||||
if 'Warnings' in label:
|
||||
warnings = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=2'))
|
||||
for warning in warnings:
|
||||
self.story.addToList('warnings',warning.string)
|
||||
|
||||
if 'Completed' in label:
|
||||
if 'Yes' in value:
|
||||
self.story.setMetadata('status', 'Completed')
|
||||
else:
|
||||
self.story.setMetadata('status', 'In-Progress')
|
||||
|
||||
if 'Published' in label:
|
||||
self.story.setMetadata('datePublished', makeDate(stripHTML(value), self.dateformat))
|
||||
|
||||
if 'Updated' in label:
|
||||
# there's a stray [ at the end.
|
||||
#value = value[0:-1]
|
||||
self.story.setMetadata('dateUpdated', makeDate(stripHTML(value), self.dateformat))
|
||||
|
||||
try:
|
||||
# Find Series name from series URL.
|
||||
a = soup.find('a', href=re.compile(r"viewseries.php\?seriesid=\d+"))
|
||||
series_name = a.string
|
||||
series_url = 'http://'+self.host+'/'+a['href']
|
||||
|
||||
# use BeautifulSoup HTML parser to make everything easier to find.
|
||||
seriessoup = self.make_soup(self._fetchUrl(series_url))
|
||||
storyas = seriessoup.findAll('a', href=re.compile(r'^viewstory.php\?sid=\d+$'))
|
||||
i=1
|
||||
for a in storyas:
|
||||
if a['href'] == ('viewstory.php?sid='+self.story.getMetadata('storyId')):
|
||||
self.setSeries(series_name, i)
|
||||
self.story.setMetadata('seriesUrl',series_url)
|
||||
break
|
||||
i+=1
|
||||
|
||||
except:
|
||||
# I find it hard to care if the series parsing fails
|
||||
pass
|
||||
|
||||
# grab the text for an individual chapter.
|
||||
def getChapterText(self, url):
|
||||
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
soup = self.make_soup(self._fetchUrl(url))
|
||||
|
||||
div = soup.find('div', {'id' : 'story'})
|
||||
|
||||
if None == div:
|
||||
raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url)
|
||||
|
||||
return self.utf8FromSoup(url,div)
|
||||
256
fanficfare/adapters/adapter_deviantartcom.py
Normal file
256
fanficfare/adapters/adapter_deviantartcom.py
Normal file
|
|
@ -0,0 +1,256 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2021 FanFicFare team
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
|
||||
from __future__ import absolute_import
|
||||
import logging
|
||||
import re
|
||||
# py2 vs py3 transition
|
||||
from ..six.moves.urllib.parse import urlparse
|
||||
|
||||
from .base_adapter import BaseSiteAdapter, makeDate
|
||||
from fanficfare.htmlcleanup import stripHTML
|
||||
from .. import exceptions as exceptions
|
||||
from fanficfare.dateutils import parse_relative_date_string
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def getClass():
|
||||
return DeviantArtComSiteAdapter
|
||||
|
||||
|
||||
class DeviantArtComSiteAdapter(BaseSiteAdapter):
|
||||
def __init__(self, config, url):
|
||||
BaseSiteAdapter.__init__(self, config, url)
|
||||
self.story.setMetadata('siteabbrev', 'dac')
|
||||
|
||||
self.username = 'NoneGiven'
|
||||
self.password = ''
|
||||
self.is_adult = False
|
||||
|
||||
match = re.match(self.getSiteURLPattern(), url)
|
||||
if not match:
|
||||
raise exceptions.InvalidStoryURL(url, self.getSiteDomain(), self.getSiteExampleURLs())
|
||||
|
||||
story_id = match.group('id')
|
||||
author = match.group('author')
|
||||
self.story.setMetadata('author', author)
|
||||
self.story.setMetadata('authorId', author)
|
||||
self.story.setMetadata('authorUrl', 'https://www.deviantart.com/' + author)
|
||||
self._setURL(url)
|
||||
|
||||
@staticmethod
|
||||
def getSiteDomain():
|
||||
return 'www.deviantart.com'
|
||||
|
||||
@classmethod
|
||||
def getAcceptDomains(cls):
|
||||
return ['www.deviantart.com']
|
||||
|
||||
@classmethod
|
||||
def getProtocol(self):
|
||||
return 'https'
|
||||
|
||||
@classmethod
|
||||
def getSiteExampleURLs(cls):
|
||||
return 'https://%s/<author>/art/<work-name>' % cls.getSiteDomain()
|
||||
|
||||
def getSiteURLPattern(self):
|
||||
return r'https?://www\.deviantart\.com/(?P<author>[^/]+)/art/(?P<id>[^/]+)/?'
|
||||
|
||||
def performLogin(self, url):
|
||||
if self.username and self.username != 'NoneGiven':
|
||||
username = self.username
|
||||
else:
|
||||
username = self.getConfig('username')
|
||||
|
||||
# logger.debug("\n\nusername:(%s)\n\n"%username)
|
||||
if not username:
|
||||
logger.info("Login Required for URL %s" % url)
|
||||
raise exceptions.FailedToLogin(url,username)
|
||||
|
||||
data = self.get_request_raw('https://www.deviantart.com/users/login', referer=url, usecache=False)
|
||||
data = self.decode_data(data)
|
||||
soup = self.make_soup(data)
|
||||
params = {
|
||||
'referer': 'https://www.deviantart.com/_sisu/do/signin', # soup.find('input', {'name': 'referer'})['value'],
|
||||
'referer_type': soup.find('input', {'name': 'referer_type'})['value'],
|
||||
'csrf_token': soup.find('input', {'name': 'csrf_token'})['value'],
|
||||
'challenge': soup.find('input', {'name': 'challenge'})['value'],
|
||||
'lu_token': soup.find('input', {'name': 'lu_token'})['value'],
|
||||
'remember': 'on',
|
||||
'username': username
|
||||
}
|
||||
|
||||
loginUrl = 'https://' + self.getSiteDomain() + '/_sisu/do/step2'
|
||||
logger.debug('Will now login to deviantARt as (%s)' % username)
|
||||
|
||||
result = self.post_request(loginUrl, params, usecache=False)
|
||||
soup = self.make_soup(result)
|
||||
if not soup.find('input', {'name': 'lu_token2'}):
|
||||
logger.info("Login Failed for URL %s (no lu_token2 found)" % url)
|
||||
raise exceptions.FailedToLogin(url,username)
|
||||
|
||||
params = {
|
||||
'referer': 'https://www.deviantart.com/_sisu/do/signin', # soup.find('input', {'name': 'referer'})['value'],
|
||||
'referer_type': soup.find('input', {'name': 'referer_type'})['value'],
|
||||
'csrf_token': soup.find('input', {'name': 'csrf_token'})['value'],
|
||||
'challenge': soup.find('input', {'name': 'challenge'})['value'],
|
||||
'lu_token': soup.find('input', {'name': 'lu_token'})['value'],
|
||||
'lu_token2': soup.find('input', {'name': 'lu_token2'})['value'],
|
||||
'remember': 'on',
|
||||
'username': ''
|
||||
}
|
||||
|
||||
if self.password:
|
||||
params['password'] = self.password
|
||||
else:
|
||||
params['password'] = self.getConfig('password')
|
||||
|
||||
# logger.debug("\n\nparams['password']:(%s)\n\n"%params['password'])
|
||||
loginUrl = 'https://' + self.getSiteDomain() + '/_sisu/do/signin'
|
||||
logger.debug('Will now send password to deviantARt')
|
||||
|
||||
result = self.post_request(loginUrl, params, usecache=False)
|
||||
|
||||
if 'Log In | DeviantArt' in result:
|
||||
logger.error('Failed to login to deviantArt as %s' % username)
|
||||
raise exceptions.FailedToLogin('https://www.deviantart.com', username)
|
||||
else:
|
||||
return True
|
||||
|
||||
def requiresLogin(self, data):
|
||||
return '</a> has limited the viewing of this artwork to members of the DeviantArt community only' in data
|
||||
|
||||
def isLoggedIn(self, data):
|
||||
return '<form id="logout-form" action="https://www.deviantart.com/users/logout" method="POST">' in data
|
||||
|
||||
def isWatchersOnly(self, data):
|
||||
return '>Watchers-Only Deviation<' in data
|
||||
|
||||
def requiresMatureContentEnabled(self, data):
|
||||
return (
|
||||
'>This content is intended for mature audiences<' in data
|
||||
or '>This deviation is intended for mature audiences<' in data
|
||||
or '>This filter hides content that may be inappropriate for some viewers<' in data
|
||||
or '>May contain sensitive content<' in data
|
||||
or '>Log in to view<' in data
|
||||
or '>This deviation has been labeled as containing themes not suitable for all deviants.<' in data
|
||||
)
|
||||
|
||||
def extractChapterUrlsAndMetadata(self):
|
||||
logger.debug('URL: %s', self.url)
|
||||
|
||||
data = self.get_request(self.url)
|
||||
soup = self.make_soup(data)
|
||||
|
||||
## story can require login outright, or it can show up as
|
||||
## watchers-only or mature-enabled without the same 'requires
|
||||
## login' strings.
|
||||
if self.requiresLogin(data) or ( not self.isLoggedIn(data) and
|
||||
(self.isWatchersOnly(data) or
|
||||
self.requiresMatureContentEnabled(data)) ):
|
||||
if self.performLogin(self.url):
|
||||
data = self.get_request(self.url, usecache=False)
|
||||
soup = self.make_soup(data)
|
||||
|
||||
## Check watchers only and mature enabled again, separately,
|
||||
## after login because they can still apply after login.
|
||||
if self.isWatchersOnly(data):
|
||||
raise exceptions.FailedToDownload(
|
||||
'Deviation is only available for watchers.' +
|
||||
'You must watch this author before you can download it.'
|
||||
)
|
||||
if self.requiresMatureContentEnabled(data):
|
||||
raise exceptions.FailedToDownload(
|
||||
'Deviation is set as mature, you must go into your account ' +
|
||||
'and enable showing of mature content.'
|
||||
)
|
||||
|
||||
appurl = soup.select_one('meta[property="og:url"]')['content']
|
||||
if appurl:
|
||||
story_id = urlparse(appurl).path.lstrip('/')
|
||||
else:
|
||||
logger.debug("Looking for JS story id")
|
||||
## after login, this is only found in a JS block. Dunno why.
|
||||
## F875A309-B0DB-860E-5079-790D0FBE5668
|
||||
match = re.match(r'\\"deviationUuid\\":\\"(?P<id>[A-Z0-9-]+)\\",',data)
|
||||
if match:
|
||||
story_id = match.group('id')
|
||||
else:
|
||||
raise exceptions.FailedToDownload('Failed to find Story ID.')
|
||||
self.story.setMetadata('storyId', story_id)
|
||||
|
||||
title = soup.select_one('h1').get_text()
|
||||
self.story.setMetadata('title', stripHTML(title))
|
||||
|
||||
## dA has no concept of status
|
||||
# self.story.setMetadata('status', 'Completed')
|
||||
|
||||
pubdate = soup.select_one('time').get_text()
|
||||
|
||||
# Maybe do this better, but this works
|
||||
try:
|
||||
self.story.setMetadata('datePublished', makeDate(pubdate, '%b %d, %Y'))
|
||||
except:
|
||||
self.story.setMetadata('datePublished', parse_relative_date_string(pubdate))
|
||||
|
||||
# do description here if appropriate
|
||||
|
||||
story_tags = soup.select('a[href^="https://www.deviantart.com/tag"] span')
|
||||
if story_tags is not None:
|
||||
for tag in story_tags:
|
||||
self.story.addToList('genre', tag.get_text())
|
||||
|
||||
self.add_chapter(title, self.url)
|
||||
|
||||
def getChapterText(self, url):
|
||||
logger.debug('Getting chapter text from: %s', url)
|
||||
data = self.get_request(url)
|
||||
# logger.debug(data)
|
||||
soup = self.make_soup(data)
|
||||
|
||||
# remove comments section to avoid false matches
|
||||
comments = soup.select_one('[data-hook=comments_thread]')
|
||||
if comments:
|
||||
comments.decompose()
|
||||
# previous search not always found in some stories.
|
||||
# <div id="comments"></div> inside the real containing
|
||||
# div seems more common
|
||||
commentsdiv = soup.select_one('div#comments')
|
||||
if commentsdiv:
|
||||
commentsdiv.parent.decompose()
|
||||
|
||||
# three different 'content' tags to look for.
|
||||
# This is the current in Oct 2024
|
||||
content = soup.select_one('[data-editor-viewer="1"]')
|
||||
|
||||
if content is None:
|
||||
# older story? I can't find any of this style in Oct2024
|
||||
content = soup.select_one('[data-id="rich-content-viewer"]')
|
||||
|
||||
if content is None:
|
||||
# olderer story, but used by some older (2018) posts
|
||||
content = soup.select_one('.legacy-journal')
|
||||
|
||||
if content is None:
|
||||
raise exceptions.FailedToDownload(
|
||||
'Could not find story text. Please open a bug with the URL %s' % self.url
|
||||
)
|
||||
|
||||
return self.utf8FromSoup(url, content)
|
||||
|
|
@ -1,6 +1,6 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2011 Fanficdownloader team, 2015 FanFicFare team
|
||||
# Copyright 2011 Fanficdownloader team, 2018 FanFicFare team
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
|
|
@ -15,17 +15,16 @@
|
|||
# limitations under the License.
|
||||
#
|
||||
|
||||
import time
|
||||
from __future__ import absolute_import
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import re
|
||||
import urllib2
|
||||
|
||||
|
||||
from ..htmlcleanup import stripHTML
|
||||
from .. import exceptions as exceptions
|
||||
|
||||
from base_adapter import BaseSiteAdapter, makeDate
|
||||
# py2 vs py3 transition
|
||||
|
||||
from .base_adapter import BaseSiteAdapter, makeDate
|
||||
|
||||
def getClass():
|
||||
return DokugaComAdapter
|
||||
|
|
@ -37,11 +36,6 @@ class DokugaComAdapter(BaseSiteAdapter):
|
|||
def __init__(self, config, url):
|
||||
BaseSiteAdapter.__init__(self, config, url)
|
||||
|
||||
self.decode = ["Windows-1252",
|
||||
"utf8"] # 1252 is a superset of iso-8859-1.
|
||||
# Most sites that claim to be
|
||||
# iso-8859-1 (and some that claim to be
|
||||
# utf8) are really windows-1252.
|
||||
self.username = "NoneGiven" # if left empty, site doesn't return any message at all.
|
||||
self.password = ""
|
||||
self.is_adult=False
|
||||
|
|
@ -80,7 +74,7 @@ class DokugaComAdapter(BaseSiteAdapter):
|
|||
return "http://"+cls.getSiteDomain()+"/fanfiction/story/1234/1 http://"+cls.getSiteDomain()+"/spark/story/1234/1"
|
||||
|
||||
def getSiteURLPattern(self):
|
||||
return r"http://"+self.getSiteDomain()+"/(fanfiction|spark)?/story/\d+/?\d+?$"
|
||||
return r"http://"+self.getSiteDomain()+r"/(fanfiction|spark)?/story/\d+/?\d+?$"
|
||||
|
||||
## Login seems to be reasonably standard across eFiction sites.
|
||||
def needToLoginCheck(self, data):
|
||||
|
|
@ -101,17 +95,17 @@ class DokugaComAdapter(BaseSiteAdapter):
|
|||
params['Submit'] = 'Submit'
|
||||
|
||||
# copy all hidden input tags to pick up appropriate tokens.
|
||||
for tag in soup.findAll('input',{'type':'hidden'}):
|
||||
for tag in soup.find_all('input',{'type':'hidden'}):
|
||||
params[tag['name']] = tag['value']
|
||||
|
||||
loginUrl = 'http://' + self.getSiteDomain() + '/fanfiction'
|
||||
logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
|
||||
params['username']))
|
||||
|
||||
d = self._postUrl(loginUrl, params)
|
||||
d = self.post_request(loginUrl, params)
|
||||
|
||||
if "Your session has expired. Please log in again." in d:
|
||||
d = self._postUrl(loginUrl, params)
|
||||
d = self.post_request(loginUrl, params)
|
||||
|
||||
if "Logout" not in d : #Member Account
|
||||
logger.info("Failed to login to URL %s as %s" % (loginUrl,
|
||||
|
|
@ -129,28 +123,20 @@ class DokugaComAdapter(BaseSiteAdapter):
|
|||
url = self.url
|
||||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
except urllib2.HTTPError, e:
|
||||
if e.code == 404:
|
||||
raise exceptions.StoryDoesNotExist(self.url)
|
||||
else:
|
||||
raise e
|
||||
data = self.get_request(url)
|
||||
|
||||
# use BeautifulSoup HTML parser to make everything easier to find.
|
||||
soup = self.make_soup(data)
|
||||
|
||||
if self.needToLoginCheck(data):
|
||||
# need to log in for this one.
|
||||
self.performLogin(url,soup)
|
||||
data = self._fetchUrl(url)
|
||||
data = self.get_request(url)
|
||||
soup = self.make_soup(data)
|
||||
|
||||
if "Access denied. This story has not been validated by the adminstrators of this site." in data:
|
||||
raise exceptions.AccessDenied(self.getSiteDomain() +" says: Access denied. This story has not been validated by the adminstrators of this site.")
|
||||
# print data
|
||||
|
||||
# Now go hunting for all the meta data and the chapter list.
|
||||
|
||||
## Title and author
|
||||
a = soup.find('div', {'align' : 'center'}).find('h3')
|
||||
|
|
@ -167,23 +153,22 @@ class DokugaComAdapter(BaseSiteAdapter):
|
|||
self.story.setMetadata('title',stripHTML(a))
|
||||
|
||||
# Find the chapters:
|
||||
chapters = soup.find('select').findAll('option')
|
||||
chapters = soup.find('select').find_all('option')
|
||||
if len(chapters)==1:
|
||||
self.chapterUrls.append((self.story.getMetadata('title'),'http://'+self.host+'/'+self.section+'/story/'+self.story.getMetadata('storyId')+'/1'))
|
||||
self.add_chapter(self.story.getMetadata('title'),'http://'+self.host+'/'+self.section+'/story/'+self.story.getMetadata('storyId')+'/1')
|
||||
else:
|
||||
for chapter in chapters:
|
||||
# just in case there's tags, like <i> in chapter titles. /fanfiction/story/7406/1
|
||||
self.chapterUrls.append((stripHTML(chapter),'http://'+self.host+'/'+self.section+'/story/'+self.story.getMetadata('storyId')+'/'+chapter['value']))
|
||||
self.add_chapter(chapter,'http://'+self.host+'/'+self.section+'/story/'+self.story.getMetadata('storyId')+'/'+chapter['value'])
|
||||
|
||||
self.story.setMetadata('numChapters',len(self.chapterUrls))
|
||||
|
||||
asoup = self.make_soup(self._fetchUrl(alink))
|
||||
asoup = self.make_soup(self.get_request(alink))
|
||||
|
||||
if 'fanfiction' in self.section:
|
||||
asoup=asoup.find('div', {'id' : 'cb_tabid_52'}).find('div')
|
||||
|
||||
#grab the rest of the metadata from the author's page
|
||||
for div in asoup.findAll('div'):
|
||||
for div in asoup.find_all('div'):
|
||||
nav=div.find('a', href=re.compile(r'/fanfiction/story/'+self.story.getMetadata('storyId')+"/1$"))
|
||||
if nav != None:
|
||||
break
|
||||
|
|
@ -223,7 +208,7 @@ class DokugaComAdapter(BaseSiteAdapter):
|
|||
|
||||
else:
|
||||
asoup=asoup.find('div', {'id' : 'maincol'}).find('div', {'class' : 'padding'})
|
||||
for div in asoup.findAll('div'):
|
||||
for div in asoup.find_all('div'):
|
||||
nav=div.find('a', href=re.compile(r'/spark/story/'+self.story.getMetadata('storyId')+"/1$"))
|
||||
if nav != None:
|
||||
break
|
||||
|
|
@ -267,7 +252,7 @@ class DokugaComAdapter(BaseSiteAdapter):
|
|||
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
soup = self.make_soup(self._fetchUrl(url))
|
||||
soup = self.make_soup(self.get_request(url))
|
||||
|
||||
div = soup.find('div', {'id' : 'chtext'})
|
||||
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2012 Fanficdownloader team, 2015 FanFicFare team
|
||||
# Copyright 2012 Fanficdownloader team, 2018 FanFicFare team
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
|
|
@ -16,17 +16,17 @@
|
|||
#
|
||||
|
||||
# Software: eFiction
|
||||
import time
|
||||
from __future__ import absolute_import
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import re
|
||||
import urllib2
|
||||
|
||||
|
||||
from ..htmlcleanup import stripHTML
|
||||
from .. import exceptions as exceptions
|
||||
|
||||
from base_adapter import BaseSiteAdapter, makeDate
|
||||
# py2 vs py3 transition
|
||||
from ..six import text_type as unicode
|
||||
|
||||
from .base_adapter import BaseSiteAdapter, makeDate
|
||||
|
||||
def getClass():
|
||||
return DracoAndGinnyComAdapter
|
||||
|
|
@ -38,11 +38,6 @@ class DracoAndGinnyComAdapter(BaseSiteAdapter):
|
|||
def __init__(self, config, url):
|
||||
BaseSiteAdapter.__init__(self, config, url)
|
||||
|
||||
self.decode = ["Windows-1252",
|
||||
"utf8"] # 1252 is a superset of iso-8859-1.
|
||||
# Most sites that claim to be
|
||||
# iso-8859-1 (and some that claim to be
|
||||
# utf8) are really windows-1252.
|
||||
self.username = "NoneGiven" # if left empty, site doesn't return any message at all.
|
||||
self.password = ""
|
||||
self.is_adult=False
|
||||
|
|
@ -98,7 +93,7 @@ class DracoAndGinnyComAdapter(BaseSiteAdapter):
|
|||
logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
|
||||
params['penname']))
|
||||
|
||||
d = self._fetchUrl(loginUrl, params)
|
||||
d = self.post_request(loginUrl, params)
|
||||
|
||||
if "Member Account" not in d : #Member Account
|
||||
logger.info("Failed to login to URL %s as %s" % (loginUrl,
|
||||
|
|
@ -125,18 +120,12 @@ class DracoAndGinnyComAdapter(BaseSiteAdapter):
|
|||
url = self.url+'&index=1'+addurl
|
||||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
except urllib2.HTTPError, e:
|
||||
if e.code == 404:
|
||||
raise exceptions.StoryDoesNotExist(self.url)
|
||||
else:
|
||||
raise e
|
||||
data = self.get_request(url)
|
||||
|
||||
if self.needToLoginCheck(data):
|
||||
# need to log in for this one.
|
||||
self.performLogin(url)
|
||||
data = self._fetchUrl(url)
|
||||
data = self.get_request(url)
|
||||
|
||||
m = re.search(r"'viewstory.php\?sid=\d+((?:&ageconsent=ok)?&warning=\d+)'",data)
|
||||
if m != None:
|
||||
|
|
@ -150,24 +139,16 @@ class DracoAndGinnyComAdapter(BaseSiteAdapter):
|
|||
url = self.url+'&index=1'+addurl
|
||||
logger.debug("URL 2nd try: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
except urllib2.HTTPError, e:
|
||||
if e.code == 404:
|
||||
raise exceptions.StoryDoesNotExist(self.url)
|
||||
else:
|
||||
raise e
|
||||
data = self.get_request(url)
|
||||
else:
|
||||
raise exceptions.AdultCheckRequired(self.url)
|
||||
|
||||
if "Access denied. This story has not been validated by the adminstrators of this site." in data:
|
||||
raise exceptions.AccessDenied(self.getSiteDomain() +" says: Access denied. This story has not been validated by the adminstrators of this site.")
|
||||
|
||||
# use BeautifulSoup HTML parser to make everything easier to find.
|
||||
soup = self.make_soup(data)
|
||||
# print data
|
||||
|
||||
# Now go hunting for all the meta data and the chapter list.
|
||||
|
||||
## Title
|
||||
a = soup.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$"))
|
||||
|
|
@ -180,11 +161,10 @@ class DracoAndGinnyComAdapter(BaseSiteAdapter):
|
|||
self.story.setMetadata('author',a.string)
|
||||
|
||||
# Find the chapters:
|
||||
for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"&chapter=\d+$")):
|
||||
for chapter in soup.find_all('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+r"&chapter=\d+$")):
|
||||
# just in case there's tags, like <i> in chapter titles.
|
||||
self.chapterUrls.append((stripHTML(chapter),'http://'+self.host+'/'+chapter['href']+addurl))
|
||||
self.add_chapter(chapter,'http://'+self.host+'/'+chapter['href']+addurl)
|
||||
|
||||
self.story.setMetadata('numChapters',len(self.chapterUrls))
|
||||
|
||||
# eFiction sites don't help us out a lot with their meta data
|
||||
# formating, so it's a little ugly.
|
||||
|
|
@ -201,13 +181,13 @@ class DracoAndGinnyComAdapter(BaseSiteAdapter):
|
|||
|
||||
self.setDescription(url,content.find('blockquote'))
|
||||
|
||||
for genre in content.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=1')):
|
||||
for genre in content.find_all('a',href=re.compile(r'browse.php\?type=class&type_id=1')):
|
||||
self.story.addToList('genre',genre.string)
|
||||
|
||||
for warning in content.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=2')):
|
||||
for warning in content.find_all('a',href=re.compile(r'browse.php\?type=class&type_id=2')):
|
||||
self.story.addToList('warnings',warning.string)
|
||||
|
||||
labels = content.findAll('b')
|
||||
labels = content.find_all('b')
|
||||
|
||||
for labelspan in labels:
|
||||
value = labelspan.nextSibling
|
||||
|
|
@ -228,22 +208,22 @@ class DracoAndGinnyComAdapter(BaseSiteAdapter):
|
|||
self.story.setMetadata('rating', value)
|
||||
|
||||
if 'Categories' in label:
|
||||
cats = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=categories'))
|
||||
cats = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=categories'))
|
||||
for cat in cats:
|
||||
self.story.addToList('category',cat.string)
|
||||
|
||||
if 'Characters' in label:
|
||||
chars = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=characters'))
|
||||
chars = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=characters'))
|
||||
for char in chars:
|
||||
self.story.addToList('characters',char.string)
|
||||
|
||||
if 'Genre' in label:
|
||||
genres = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=1'))
|
||||
genres = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=class&type_id=1'))
|
||||
for genre in genres:
|
||||
self.story.addToList('genre',genre.string)
|
||||
|
||||
if 'Warnings' in label:
|
||||
warnings = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=2'))
|
||||
warnings = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=class&type_id=2'))
|
||||
for warning in warnings:
|
||||
self.story.addToList('warnings',warning.string)
|
||||
|
||||
|
|
@ -265,10 +245,9 @@ class DracoAndGinnyComAdapter(BaseSiteAdapter):
|
|||
series_name = a.string
|
||||
series_url = 'http://'+self.host+'/'+a['href']
|
||||
|
||||
# use BeautifulSoup HTML parser to make everything easier to find.
|
||||
seriessoup = self.make_soup(self._fetchUrl(series_url))
|
||||
seriessoup = self.make_soup(self.get_request(series_url))
|
||||
# can't use ^viewstory...$ in case of higher rated stories with javascript href.
|
||||
storyas = seriessoup.findAll('a', href=re.compile(r'viewstory.php\?sid=\d+'))
|
||||
storyas = seriessoup.find_all('a', href=re.compile(r'viewstory.php\?sid=\d+'))
|
||||
i=1
|
||||
for a in storyas:
|
||||
# skip 'report this' and 'TOC' links
|
||||
|
|
@ -288,7 +267,7 @@ class DracoAndGinnyComAdapter(BaseSiteAdapter):
|
|||
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
soup = self.make_soup(self._fetchUrl(url))
|
||||
soup = self.make_soup(self.get_request(url))
|
||||
|
||||
div = soup.find('div', {'class' : 'listbox'})
|
||||
|
||||
|
|
|
|||
|
|
@ -1,311 +0,0 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2011 Fanficdownloader team, 2015 FanFicFare team
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
# Software: eFiction
|
||||
import time
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import re
|
||||
import urllib2
|
||||
|
||||
from bs4.element import Tag
|
||||
from ..htmlcleanup import stripHTML
|
||||
from .. import exceptions as exceptions
|
||||
|
||||
from base_adapter import BaseSiteAdapter, makeDate
|
||||
|
||||
def getClass():
|
||||
return DramioneOrgAdapter
|
||||
|
||||
# Class name has to be unique. Our convention is camel case the
|
||||
# sitename with Adapter at the end. www is skipped.
|
||||
class DramioneOrgAdapter(BaseSiteAdapter):
|
||||
|
||||
def __init__(self, config, url):
|
||||
BaseSiteAdapter.__init__(self, config, url)
|
||||
|
||||
self.decode = ["utf8",
|
||||
"Windows-1252",]
|
||||
# 1252 is a superset of iso-8859-1.
|
||||
# Most sites that claim to be
|
||||
# iso-8859-1 (and some that claim to be
|
||||
# utf8) are really windows-1252.
|
||||
self.username = "" # if left empty, site doesn't return any message at all.
|
||||
self.password = ""
|
||||
self.is_adult=False
|
||||
|
||||
# get storyId from url--url validation guarantees query is only sid=1234
|
||||
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
|
||||
|
||||
|
||||
# normalized story URL.
|
||||
self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId'))
|
||||
|
||||
# Each adapter needs to have a unique site abbreviation.
|
||||
self.story.setMetadata('siteabbrev','drmn')
|
||||
|
||||
# The date format will vary from site to site.
|
||||
# http://docs.python.org/library/datetime.html#strftime-strptime-behavior
|
||||
self.dateformat = "%d %B %Y"
|
||||
|
||||
@staticmethod # must be @staticmethod, don't remove it.
|
||||
def getSiteDomain():
|
||||
# The site domain. Does have www here, if it uses it.
|
||||
return 'dramione.org'
|
||||
|
||||
@classmethod
|
||||
def getSiteExampleURLs(cls):
|
||||
return "http://"+cls.getSiteDomain()+"/viewstory.php?sid=1234"
|
||||
|
||||
def getSiteURLPattern(self):
|
||||
return re.escape("http://"+self.getSiteDomain()+"/viewstory.php?sid=")+r"\d+$"
|
||||
|
||||
## Login seems to be reasonably standard across eFiction sites.
|
||||
def needToLoginCheck(self, data):
|
||||
if 'Registered Users Only' in data \
|
||||
or 'There is no such account on our website' in data \
|
||||
or "That password doesn't match the one in our database" in data:
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
|
||||
def performLogin(self, url):
|
||||
params = {}
|
||||
|
||||
if self.password:
|
||||
params['penname'] = self.username
|
||||
params['password'] = self.password
|
||||
else:
|
||||
params['penname'] = self.getConfig("username")
|
||||
params['password'] = self.getConfig("password")
|
||||
params['cookiecheck'] = '1'
|
||||
params['submit'] = 'Submit'
|
||||
|
||||
loginUrl = 'http://' + self.getSiteDomain() + '/user.php?action=login'
|
||||
logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
|
||||
params['penname']))
|
||||
|
||||
d = self._fetchUrl(loginUrl, params)
|
||||
|
||||
if "Member Account" not in d : #Member Account
|
||||
logger.info("Failed to login to URL %s as %s" % (loginUrl,
|
||||
params['penname']))
|
||||
raise exceptions.FailedToLogin(url,params['penname'])
|
||||
return False
|
||||
else:
|
||||
return True
|
||||
|
||||
## Getting the chapter list and the meta data, plus 'is adult' checking.
|
||||
def extractChapterUrlsAndMetadata(self):
|
||||
|
||||
if self.is_adult or self.getConfig("is_adult"):
|
||||
# Weirdly, different sites use different warning numbers.
|
||||
# If the title search below fails, there's a good chance
|
||||
# you need a different number. print data at that point
|
||||
# and see what the 'click here to continue' url says.
|
||||
addurl = "&warning=5"
|
||||
else:
|
||||
addurl=""
|
||||
|
||||
# index=1 makes sure we see the story chapter index. Some
|
||||
# sites skip that for one-chapter stories.
|
||||
url = self.url+addurl
|
||||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
except urllib2.HTTPError, e:
|
||||
if e.code == 404:
|
||||
raise exceptions.StoryDoesNotExist(self.url)
|
||||
else:
|
||||
raise e
|
||||
|
||||
if self.needToLoginCheck(data):
|
||||
# need to log in for this one.
|
||||
self.performLogin(url)
|
||||
data = self._fetchUrl(url)
|
||||
|
||||
# The actual text that is used to announce you need to be an
|
||||
# adult varies from site to site. Again, print data before
|
||||
# the title search to troubleshoot.
|
||||
if "Stories that are suitable for ages 16 and older" in data:
|
||||
raise exceptions.AdultCheckRequired(self.url)
|
||||
|
||||
if "Access denied. This story has not been validated by the adminstrators of this site." in data:
|
||||
raise exceptions.AccessDenied(self.getSiteDomain() +" says: Access denied. This story has not been validated by the adminstrators of this site.")
|
||||
|
||||
# use BeautifulSoup HTML parser to make everything easier to find.
|
||||
soup = self.make_soup(data)
|
||||
# print data
|
||||
|
||||
# Now go hunting for all the meta data and the chapter list.
|
||||
|
||||
## Title
|
||||
a = soup.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$"))
|
||||
self.story.setMetadata('title',stripHTML(a))
|
||||
|
||||
# Find authorid and URL from... author url.
|
||||
a = soup.find('a', href=re.compile(r"viewuser.php\?uid=\d+"))
|
||||
self.story.setMetadata('authorId',a['href'].split('=')[1])
|
||||
self.story.setMetadata('authorUrl','http://'+self.host+'/'+a['href'])
|
||||
self.story.setMetadata('author',a.string)
|
||||
|
||||
# Use banner as cover if found
|
||||
coverurl = ''
|
||||
img = soup.find('img',{'class':'banner'})
|
||||
if img:
|
||||
coverurl = img['src']
|
||||
#print "Cover: "+coverurl
|
||||
a = soup.find(text="This story has a banner; click to view.")
|
||||
if a:
|
||||
#print "A: "+ ', '.join("(%s, %s)" %tup for tup in a.parent.attrs)
|
||||
coverurl = a.parent['href']
|
||||
#print "Cover: "+coverurl
|
||||
if coverurl:
|
||||
self.setCoverImage(url,coverurl)
|
||||
|
||||
# Find the chapters:
|
||||
for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"&chapter=\d+$")):
|
||||
# just in case there's tags, like <i> in chapter titles.
|
||||
self.chapterUrls.append((stripHTML(chapter),'http://'+self.host+'/'+chapter['href']+addurl))
|
||||
|
||||
self.story.setMetadata('numChapters',len(self.chapterUrls))
|
||||
|
||||
# eFiction sites don't help us out a lot with their meta data
|
||||
# formating, so it's a little ugly.
|
||||
|
||||
genres=soup.findAll('a', {'class' : "tag-1"})
|
||||
for genre in genres:
|
||||
self.story.addToList('genre',genre.string)
|
||||
|
||||
warnings=soup.findAll('a', {'class' : "tag-2"})
|
||||
for warning in warnings:
|
||||
self.story.addToList('warnings',warning.string)
|
||||
|
||||
themes=soup.findAll('a', {'class' : "tag-3"})
|
||||
for theme in themes:
|
||||
self.story.addToList('themes',theme.string)
|
||||
|
||||
hermiones=soup.findAll('a', {'class' : "tag-4"})
|
||||
for hermione in hermiones:
|
||||
self.story.addToList('hermiones',hermione.string)
|
||||
|
||||
dracos=soup.findAll('a', {'class' : "tag-5"})
|
||||
for draco in dracos:
|
||||
self.story.addToList('dracos',draco.string)
|
||||
|
||||
timelines=soup.findAll('a', {'class' : "tag-6"})
|
||||
for timeline in timelines:
|
||||
self.story.addToList('timeline',timeline.string)
|
||||
|
||||
# utility method
|
||||
def defaultGetattr(d,k):
|
||||
try:
|
||||
return d[k]
|
||||
except:
|
||||
return ""
|
||||
|
||||
listbox = soup.find('div',{'class':'listbox'})
|
||||
# <strong>Rated:</strong> M<br /> etc
|
||||
labels = listbox.findAll('strong')
|
||||
for labelspan in labels:
|
||||
value = labelspan.nextSibling
|
||||
label = labelspan.string
|
||||
|
||||
if 'Summary' in label:
|
||||
## Everything until the next strong tag.
|
||||
svalue = ""
|
||||
while not isinstance(value,Tag) or value.name != 'strong':
|
||||
svalue += unicode(value)
|
||||
value = value.nextSibling
|
||||
self.setDescription(url,svalue)
|
||||
|
||||
if 'Rated' in label:
|
||||
self.story.setMetadata('rating', value)
|
||||
|
||||
if 'Word count' in label:
|
||||
self.story.setMetadata('numWords', value)
|
||||
|
||||
if 'Read' in label:
|
||||
self.story.setMetadata('read', value)
|
||||
|
||||
if 'Categories' in label:
|
||||
cats = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=categories'))
|
||||
for cat in cats:
|
||||
self.story.addToList('category',cat.string)
|
||||
|
||||
if 'Characters' in label:
|
||||
chars = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=characters'))
|
||||
for char in chars:
|
||||
self.story.addToList('characters',char.string)
|
||||
|
||||
if 'Completed' in label:
|
||||
if 'Yes' in value:
|
||||
self.story.setMetadata('status', 'Completed')
|
||||
else:
|
||||
self.story.setMetadata('status', 'In-Progress')
|
||||
|
||||
if 'Published' in label:
|
||||
value=re.sub(r"(\d+)(st|nd|rd|th)",r"\1",value)
|
||||
self.story.setMetadata('datePublished', makeDate(stripHTML(value), self.dateformat))
|
||||
|
||||
if 'Updated' in label:
|
||||
value=re.sub(r"(\d+)(st|nd|rd|th)",r"\1",value)
|
||||
self.story.setMetadata('dateUpdated', makeDate(stripHTML(value), self.dateformat))
|
||||
|
||||
try:
|
||||
# Find Series name from series URL.
|
||||
a = soup.find('a', href=re.compile(r"viewseries.php\?seriesid=\d+"))
|
||||
series_name = a.string
|
||||
series_url = 'http://'+self.host+'/'+a['href']
|
||||
|
||||
# use BeautifulSoup HTML parser to make everything easier to find.
|
||||
seriessoup = self.make_soup(self._fetchUrl(series_url))
|
||||
storyas = seriessoup.findAll('a', href=re.compile(r'^viewstory.php\?sid=\d+$'))
|
||||
i=1
|
||||
for a in storyas:
|
||||
if a['href'] == ('viewstory.php?sid='+self.story.getMetadata('storyId')):
|
||||
self.setSeries(series_name, i)
|
||||
self.story.setMetadata('seriesUrl',series_url)
|
||||
break
|
||||
i+=1
|
||||
|
||||
except:
|
||||
# I find it hard to care if the series parsing fails
|
||||
pass
|
||||
|
||||
try:
|
||||
self.story.setMetadata('reviews',
|
||||
stripHTML(soup.find('h2',{'id':'pagetitle'}).
|
||||
findAll('a', href=re.compile(r'^reviews.php'))[1]))
|
||||
except:
|
||||
# I find it hard to care if the series parsing fails
|
||||
pass
|
||||
|
||||
# grab the text for an individual chapter.
|
||||
def getChapterText(self, url):
|
||||
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
soup = self.make_soup(self._fetchUrl(url))
|
||||
|
||||
div = soup.find('div', {'id' : 'story'})
|
||||
|
||||
if None == div:
|
||||
raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url)
|
||||
|
||||
return self.utf8FromSoup(url,div)
|
||||
|
|
@ -1,223 +0,0 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2013 Fanficdownloader team, 2015 FanFicFare team
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
# Software: eFiction
|
||||
import time
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import re
|
||||
import urllib2
|
||||
|
||||
|
||||
from ..htmlcleanup import stripHTML
|
||||
from .. import exceptions as exceptions
|
||||
|
||||
from base_adapter import BaseSiteAdapter, makeDate
|
||||
|
||||
def getClass():
|
||||
return EfictionEstelielDeAdapter
|
||||
|
||||
# Class name has to be unique. Our convention is camel case the
|
||||
# sitename with Adapter at the end. www is skipped.
|
||||
class EfictionEstelielDeAdapter(BaseSiteAdapter):
|
||||
|
||||
def __init__(self, config, url):
|
||||
BaseSiteAdapter.__init__(self, config, url)
|
||||
|
||||
self.decode = ["Windows-1252",
|
||||
"utf8"] # 1252 is a superset of iso-8859-1.
|
||||
# Most sites that claim to be
|
||||
# iso-8859-1 (and some that claim to be
|
||||
# utf8) are really windows-1252.
|
||||
self.username = "NoneGiven" # if left empty, site doesn't return any message at all.
|
||||
self.password = ""
|
||||
self.is_adult=False
|
||||
|
||||
# get storyId from url--url validation guarantees query is only sid=1234
|
||||
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
|
||||
|
||||
|
||||
# normalized story URL.
|
||||
self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId'))
|
||||
|
||||
# Each adapter needs to have a unique site abbreviation.
|
||||
self.story.setMetadata('siteabbrev','eesd')
|
||||
|
||||
# The date format will vary from site to site.
|
||||
# http://docs.python.org/library/datetime.html#strftime-strptime-behavior
|
||||
self.dateformat = "%B %d, %Y"
|
||||
|
||||
@staticmethod # must be @staticmethod, don't remove it.
|
||||
def getSiteDomain():
|
||||
# The site domain. Does have www here, if it uses it.
|
||||
return 'efiction.esteliel.de'
|
||||
|
||||
@classmethod
|
||||
def getSiteExampleURLs(cls):
|
||||
return "http://"+cls.getSiteDomain()+"/viewstory.php?sid=1234"
|
||||
|
||||
def getSiteURLPattern(self):
|
||||
return re.escape("http://"+self.getSiteDomain()+"/viewstory.php?sid=")+r"\d+$"
|
||||
|
||||
|
||||
## Getting the chapter list and the meta data, plus 'is adult' checking.
|
||||
def extractChapterUrlsAndMetadata(self):
|
||||
|
||||
# index=1 makes sure we see the story chapter index. Some
|
||||
# sites skip that for one-chapter stories.
|
||||
url = self.url+'&index=1'
|
||||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
except urllib2.HTTPError, e:
|
||||
if e.code == 404:
|
||||
raise exceptions.StoryDoesNotExist(self.url)
|
||||
else:
|
||||
raise e
|
||||
|
||||
if "Access denied. This story has not been validated by the adminstrators of this site." in data:
|
||||
raise exceptions.AccessDenied(self.getSiteDomain() +" says: Access denied. This story has not been validated by the adminstrators of this site.")
|
||||
|
||||
# Now go hunting for all the meta data and the chapter list.
|
||||
|
||||
## Title and author
|
||||
# use BeautifulSoup HTML parser to make everything easier to find.
|
||||
soup = self.make_soup(data)
|
||||
# print data
|
||||
|
||||
# Now go hunting for all the meta data and the chapter list.
|
||||
|
||||
pagetitle = soup.find('div',{'id':'pagetitle'})
|
||||
## Title
|
||||
a = pagetitle.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$"))
|
||||
self.story.setMetadata('title',stripHTML(a))
|
||||
|
||||
# Find authorid and URL from... author url.
|
||||
a = pagetitle.find('a', href=re.compile(r"viewuser.php\?uid=\d+"))
|
||||
self.story.setMetadata('authorId',a['href'].split('=')[1])
|
||||
self.story.setMetadata('authorUrl','http://'+self.host+'/'+a['href'])
|
||||
self.story.setMetadata('author',a.string)
|
||||
|
||||
# Find the chapters:
|
||||
for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"&chapter=\d+$")):
|
||||
# just in case there's tags, like <i> in chapter titles.
|
||||
self.chapterUrls.append((stripHTML(chapter),'http://'+self.host+'/'+chapter['href']))
|
||||
|
||||
self.story.setMetadata('numChapters',len(self.chapterUrls))
|
||||
|
||||
# eFiction sites don't help us out a lot with their meta data
|
||||
# formating, so it's a little ugly.
|
||||
|
||||
# utility method
|
||||
def defaultGetattr(d,k):
|
||||
try:
|
||||
return d[k]
|
||||
except:
|
||||
return ""
|
||||
|
||||
# <span class="label">Rated:</span> NC-17<br /> etc
|
||||
list = soup.find('div', {'class':'listbox'})
|
||||
labelspan=list.find('span',{'class':'label'})
|
||||
value = labelspan.nextSibling
|
||||
label = labelspan.string
|
||||
genres = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=1'))
|
||||
for genre in genres:
|
||||
self.story.addToList('genre',genre.string)
|
||||
|
||||
labels = list.findAll('b')
|
||||
for labelspan in labels:
|
||||
value = labelspan.nextSibling
|
||||
label = labelspan.string
|
||||
|
||||
if 'Summary' in label:
|
||||
## Everything until the next span class='label'
|
||||
svalue = ""
|
||||
while 'Rating' not in unicode(value):
|
||||
svalue += unicode(value)
|
||||
value = value.nextSibling
|
||||
self.setDescription(url,svalue)
|
||||
#self.story.setMetadata('description',stripHTML(svalue))
|
||||
|
||||
if 'Rating' in label:
|
||||
self.story.setMetadata('rating', value)
|
||||
|
||||
if 'Words' in label:
|
||||
self.story.setMetadata('numWords', value)
|
||||
|
||||
if 'Category' in label:
|
||||
cats = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=categories'))
|
||||
for cat in cats:
|
||||
self.story.addToList('category',cat.string)
|
||||
|
||||
if 'Characters' in label:
|
||||
chars = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=characters'))
|
||||
for char in chars:
|
||||
self.story.addToList('characters',char.string)
|
||||
|
||||
if 'Completed' in label:
|
||||
if 'Yes' in value:
|
||||
self.story.setMetadata('status', 'Completed')
|
||||
else:
|
||||
self.story.setMetadata('status', 'In-Progress')
|
||||
|
||||
if 'Published' in label:
|
||||
self.story.setMetadata('datePublished', makeDate(stripHTML(value), self.dateformat))
|
||||
|
||||
if 'Updated' in label:
|
||||
# there's a stray [ at the end.
|
||||
#value = value[0:-1]
|
||||
self.story.setMetadata('dateUpdated', makeDate(stripHTML(value), self.dateformat))
|
||||
|
||||
try:
|
||||
if list.find('a', href=re.compile(r"series.php")) != None:
|
||||
for series in asoup.findAll('a', href=re.compile(r"series.php\?seriesid=\d+")):
|
||||
# Find Series name from series URL.
|
||||
series_url = 'http://'+self.host+'/'+series['href']
|
||||
# use BeautifulSoup HTML parser to make everything easier to find.
|
||||
seriessoup = self.make_soup(self._fetchUrl(series_url))
|
||||
storyas = seriessoup.findAll('a', href=re.compile(r'^viewstory.php\?sid=\d+$'))
|
||||
i=1
|
||||
for a in storyas:
|
||||
if a['href'] == ('viewstory.php?sid='+self.story.getMetadata('storyId')):
|
||||
name=seriessoup.find('div', {'id' : 'pagetitle'})
|
||||
name.find('a').extract()
|
||||
self.setSeries(name.text.split(' by[')[0], i)
|
||||
self.story.setMetadata('seriesUrl',series_url)
|
||||
i=0
|
||||
break
|
||||
i+=1
|
||||
if i == 0:
|
||||
break
|
||||
|
||||
except:
|
||||
# I find it hard to care if the series parsing fails
|
||||
pass
|
||||
|
||||
# grab the text for an individual chapter.
|
||||
def getChapterText(self, url):
|
||||
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
soup = self.make_soup(self._fetchUrl(url))
|
||||
|
||||
div = soup.find('div', {'id' : 'story'})
|
||||
|
||||
if None == div:
|
||||
raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url)
|
||||
|
||||
return self.utf8FromSoup(url,div)
|
||||
|
|
@ -1,6 +1,6 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2012 Fanficdownloader team, 2015 FanFicFare team
|
||||
# Copyright 2012 Fanficdownloader team, 2018 FanFicFare team
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
|
|
@ -16,17 +16,16 @@
|
|||
#
|
||||
|
||||
# Software: eFiction
|
||||
import time
|
||||
from __future__ import absolute_import
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import re
|
||||
import urllib2
|
||||
|
||||
|
||||
from ..htmlcleanup import stripHTML
|
||||
from .. import exceptions as exceptions
|
||||
|
||||
from base_adapter import BaseSiteAdapter, makeDate
|
||||
# py2 vs py3 transition
|
||||
|
||||
from .base_adapter import BaseSiteAdapter, makeDate
|
||||
|
||||
def getClass():
|
||||
return EFPFanFicNet
|
||||
|
|
@ -38,11 +37,6 @@ class EFPFanFicNet(BaseSiteAdapter):
|
|||
def __init__(self, config, url):
|
||||
BaseSiteAdapter.__init__(self, config, url)
|
||||
|
||||
self.decode = ["Windows-1252",
|
||||
"utf8"] # 1252 is a superset of iso-8859-1.
|
||||
# Most sites that claim to be
|
||||
# iso-8859-1 (and some that claim to be
|
||||
# utf8) are really windows-1252.
|
||||
self.username = "NoneGiven" # if left empty, site doesn't return any message at all.
|
||||
self.password = ""
|
||||
self.is_adult=False
|
||||
|
|
@ -52,7 +46,7 @@ class EFPFanFicNet(BaseSiteAdapter):
|
|||
|
||||
|
||||
# normalized story URL.
|
||||
self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId'))
|
||||
self._setURL('https://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId'))
|
||||
|
||||
# Each adapter needs to have a unique site abbreviation.
|
||||
self.story.setMetadata('siteabbrev','efp')
|
||||
|
|
@ -64,14 +58,14 @@ class EFPFanFicNet(BaseSiteAdapter):
|
|||
@staticmethod # must be @staticmethod, don't remove it.
|
||||
def getSiteDomain():
|
||||
# The site domain. Does have www here, if it uses it.
|
||||
return 'www.efpfanfic.net'
|
||||
return 'efpfanfic.net'
|
||||
|
||||
@classmethod
|
||||
def getSiteExampleURLs(cls):
|
||||
return "http://"+cls.getSiteDomain()+"/viewstory.php?sid=1234"
|
||||
return "https://"+cls.getSiteDomain()+"/viewstory.php?sid=1234"
|
||||
|
||||
def getSiteURLPattern(self):
|
||||
return re.escape("http://"+self.getSiteDomain()+"/viewstory.php?sid=")+r"\d+$"
|
||||
return r"https?://(www\.)?"+re.escape(self.getSiteDomain()+"/viewstory.php?sid=")+r"\d+$"
|
||||
|
||||
## Login seems to be reasonably standard across eFiction sites.
|
||||
def needToLoginCheck(self, data):
|
||||
|
|
@ -93,11 +87,11 @@ class EFPFanFicNet(BaseSiteAdapter):
|
|||
params['cookiecheck'] = '1'
|
||||
params['submit'] = 'Invia'
|
||||
|
||||
loginUrl = 'http://' + self.getSiteDomain() + '/user.php?sid='+self.story.getMetadata('storyId')
|
||||
loginUrl = 'https://' + self.getSiteDomain() + '/user.php?sid='+self.story.getMetadata('storyId')
|
||||
logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
|
||||
params['penname']))
|
||||
|
||||
d = self._fetchUrl(loginUrl, params)
|
||||
d = self.post_request(loginUrl, params)
|
||||
|
||||
if '<a class="menu" href="newaccount.php">' in d : # register for new account link
|
||||
logger.info("Failed to login to URL %s as %s" % (loginUrl,
|
||||
|
|
@ -113,27 +107,19 @@ class EFPFanFicNet(BaseSiteAdapter):
|
|||
url = self.url
|
||||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
except urllib2.HTTPError, e:
|
||||
if e.code == 404:
|
||||
raise exceptions.StoryDoesNotExist(self.url)
|
||||
else:
|
||||
raise e
|
||||
data = self.get_request(url)
|
||||
|
||||
if self.needToLoginCheck(data):
|
||||
# need to log in for this one.
|
||||
self.performLogin(url)
|
||||
data = self._fetchUrl(url)
|
||||
data = self.get_request(url)
|
||||
|
||||
# if "Access denied. This story has not been validated by the adminstrators of this site." in data:
|
||||
# raise exceptions.AccessDenied(self.getSiteDomain() +" says: Access denied. This story has not been validated by the adminstrators of this site.")
|
||||
|
||||
# use BeautifulSoup HTML parser to make everything easier to find.
|
||||
soup = self.make_soup(data)
|
||||
# print data
|
||||
|
||||
# Now go hunting for all the meta data and the chapter list.
|
||||
|
||||
## Title
|
||||
a = soup.find('a', href=re.compile(r'^viewstory\.php\?sid='+self.story.getMetadata('storyId')+"$"))
|
||||
|
|
@ -142,29 +128,28 @@ class EFPFanFicNet(BaseSiteAdapter):
|
|||
# Find authorid and URL from... author url.
|
||||
a = soup.find('a', href=re.compile(r"viewuser.php\?uid=\d+"))
|
||||
self.story.setMetadata('authorId',a['href'].split('=')[1])
|
||||
self.story.setMetadata('authorUrl','http://'+self.host+'/'+a['href'])
|
||||
self.story.setMetadata('authorUrl','https://'+self.host+'/'+a['href'])
|
||||
self.story.setMetadata('author',a.string)
|
||||
|
||||
# Find the chapter selector
|
||||
select = soup.find('select', { 'name' : 'sid' } )
|
||||
|
||||
|
||||
if select is None:
|
||||
# no selector found, so it's a one-chapter story.
|
||||
self.chapterUrls.append((self.story.getMetadata('title'),url))
|
||||
# no selector found, so it's a one-chapter story.
|
||||
self.add_chapter(self.story.getMetadata('title'),url)
|
||||
else:
|
||||
allOptions = select.findAll('option', {'value' : re.compile(r'viewstory')})
|
||||
allOptions = select.find_all('option', {'value' : re.compile(r'viewstory')})
|
||||
for o in allOptions:
|
||||
url = u'http://%s/%s' % ( self.getSiteDomain(),
|
||||
url = u'https://%s/%s' % ( self.getSiteDomain(),
|
||||
o['value'])
|
||||
# just in case there's tags, like <i> in chapter titles.
|
||||
title = stripHTML(o)
|
||||
self.chapterUrls.append((title,url))
|
||||
self.add_chapter(title,url)
|
||||
|
||||
self.story.setMetadata('numChapters',len(self.chapterUrls))
|
||||
self.story.setMetadata('language','Italian')
|
||||
|
||||
# normalize story URL to first chapter if later chapter URL was given:
|
||||
url = self.chapterUrls[0][1].replace('&i=1','')
|
||||
url = self.get_chapter(0,'url').replace('&i=1','')
|
||||
logger.debug("Normalizing to URL: "+url)
|
||||
self._setURL(url)
|
||||
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
|
||||
|
|
@ -184,15 +169,15 @@ class EFPFanFicNet(BaseSiteAdapter):
|
|||
# no storya, but do have authsoup--we're looping on author pages.
|
||||
if authsoup != None:
|
||||
# last author link with offset should be the 'next' link.
|
||||
authurl = u'http://%s/%s' % ( self.getSiteDomain(),
|
||||
authsoup.findAll('a',href=re.compile(r'viewuser\.php\?uid=\d+&catid=&offset='))[-1]['href'] )
|
||||
authurl = u'https://%s/%s' % ( self.getSiteDomain(),
|
||||
authsoup.find_all('a',href=re.compile(r'viewuser\.php\?uid=\d+&catid=&offset='))[-1]['href'] )
|
||||
|
||||
# Need author page for most of the metadata.
|
||||
logger.debug("fetching author page: (%s)"%authurl)
|
||||
authsoup = self.make_soup(self._fetchUrl(authurl))
|
||||
authsoup = self.make_soup(self.get_request(authurl))
|
||||
#print("authsoup:%s"%authsoup)
|
||||
|
||||
storyas = authsoup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+r'&i=1$'))
|
||||
storyas = authsoup.find_all('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+r'&i=1$'))
|
||||
for storya in storyas:
|
||||
#print("======storya:%s"%storya)
|
||||
storyblock = storya.findParent('div',{'class':'storybloc'})
|
||||
|
|
@ -209,7 +194,7 @@ class EFPFanFicNet(BaseSiteAdapter):
|
|||
# Tipo di coppia: Het | Personaggi: Akasuna no Sasori , Akatsuki, Nuovo Personaggio | Note: OOC | Avvertimenti: Tematiche delicate<br />
|
||||
# Categoria: <a href="categories.php?catid=1&parentcatid=1">Anime & Manga</a> > <a href="categories.php?catid=108&parentcatid=108">Naruto</a> | Contesto: Naruto Shippuuden | Leggi le <a href="reviews.php?sid=1331275&a=">3</a> recensioni</div>
|
||||
|
||||
cats = noteblock.findAll('a',href=re.compile(r'browse.php\?type=categories'))
|
||||
cats = noteblock.find_all('a',href=re.compile(r'browse.php\?type=categories'))
|
||||
for cat in cats:
|
||||
self.story.addToList('category',cat.string)
|
||||
|
||||
|
|
@ -273,12 +258,11 @@ class EFPFanFicNet(BaseSiteAdapter):
|
|||
# Find Series name from series URL.
|
||||
a = soup.find('a', href=re.compile(r"viewseries.php\?ssid=\d+&i=1"))
|
||||
series_name = a.string
|
||||
series_url = 'http://'+self.host+'/'+a['href']
|
||||
series_url = 'https://'+self.host+'/'+a['href']
|
||||
|
||||
# use BeautifulSoup HTML parser to make everything easier to find.
|
||||
seriessoup = self.make_soup(self._fetchUrl(series_url))
|
||||
seriessoup = self.make_soup(self.get_request(series_url))
|
||||
# can't use ^viewstory...$ in case of higher rated stories with javascript href.
|
||||
storyas = seriessoup.findAll('a', href=re.compile(r'viewstory.php\?sid=\d+&i=1'))
|
||||
storyas = seriessoup.find_all('a', href=re.compile(r'viewstory.php\?sid=\d+&i=1'))
|
||||
i=1
|
||||
for a in storyas:
|
||||
if a['href'] == ('viewstory.php?sid='+self.story.getMetadata('storyId'))+'&i=1':
|
||||
|
|
@ -296,7 +280,7 @@ class EFPFanFicNet(BaseSiteAdapter):
|
|||
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
soup = self.make_soup(self._fetchUrl(url))
|
||||
soup = self.make_soup(self.get_request(url))
|
||||
|
||||
div = soup.find('div', {'class' : 'storia'})
|
||||
|
||||
|
|
@ -304,11 +288,11 @@ class EFPFanFicNet(BaseSiteAdapter):
|
|||
raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url)
|
||||
|
||||
# remove any header and 'o:p' tags.
|
||||
for tag in div.findAll("head") + div.findAll("o:p"):
|
||||
for tag in div.find_all("head") + div.find_all("o:p"):
|
||||
tag.extract()
|
||||
|
||||
# change any html and body tags to div.
|
||||
for tag in div.findAll("html") + div.findAll("body"):
|
||||
for tag in div.find_all("html") + div.find_all("body"):
|
||||
tag.name='div'
|
||||
|
||||
# remove extra bogus doctype.
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2011 Fanficdownloader team, 2015 FanFicFare team
|
||||
# Copyright 2011 Fanficdownloader team, 2018 FanFicFare team
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
|
|
@ -16,17 +16,17 @@
|
|||
#
|
||||
|
||||
# Software: eFiction
|
||||
import time
|
||||
from __future__ import absolute_import
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import re
|
||||
import urllib2
|
||||
|
||||
|
||||
from ..htmlcleanup import stripHTML
|
||||
from .. import exceptions as exceptions
|
||||
|
||||
from base_adapter import BaseSiteAdapter, makeDate
|
||||
# py2 vs py3 transition
|
||||
from ..six import text_type as unicode
|
||||
|
||||
from .base_adapter import BaseSiteAdapter, makeDate
|
||||
|
||||
def getClass():
|
||||
return ErosnSapphoSycophantHexComAdapter
|
||||
|
|
@ -38,11 +38,6 @@ class ErosnSapphoSycophantHexComAdapter(BaseSiteAdapter):
|
|||
def __init__(self, config, url):
|
||||
BaseSiteAdapter.__init__(self, config, url)
|
||||
|
||||
self.decode = ["Windows-1252",
|
||||
"utf8"] # 1252 is a superset of iso-8859-1.
|
||||
# Most sites that claim to be
|
||||
# iso-8859-1 (and some that claim to be
|
||||
# utf8) are really windows-1252.
|
||||
self.username = "NoneGiven" # if left empty, site doesn't return any message at all.
|
||||
self.password = ""
|
||||
self.is_adult=False
|
||||
|
|
@ -50,7 +45,7 @@ class ErosnSapphoSycophantHexComAdapter(BaseSiteAdapter):
|
|||
# get storyId from url--url validation guarantees query is only sid=1234
|
||||
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
|
||||
|
||||
|
||||
|
||||
|
||||
# normalized story URL.
|
||||
self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId'))
|
||||
|
|
@ -91,13 +86,7 @@ class ErosnSapphoSycophantHexComAdapter(BaseSiteAdapter):
|
|||
url = self.url+'&index=1'+addurl
|
||||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
except urllib2.HTTPError, e:
|
||||
if e.code == 404:
|
||||
raise exceptions.StoryDoesNotExist(self.url)
|
||||
else:
|
||||
raise e
|
||||
data = self.get_request(url)
|
||||
|
||||
m = re.search(r"'viewstory.php\?sid=\d+((?:&ageconsent=ok)?&warning=\d+)'",data)
|
||||
if m != None:
|
||||
|
|
@ -111,24 +100,16 @@ class ErosnSapphoSycophantHexComAdapter(BaseSiteAdapter):
|
|||
url = self.url+'&index=1'+addurl
|
||||
logger.debug("URL 2nd try: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
except urllib2.HTTPError, e:
|
||||
if e.code == 404:
|
||||
raise exceptions.StoryDoesNotExist(self.url)
|
||||
else:
|
||||
raise e
|
||||
data = self.get_request(url)
|
||||
else:
|
||||
raise exceptions.AdultCheckRequired(self.url)
|
||||
|
||||
if "Access denied. This story has not been validated by the adminstrators of this site." in data:
|
||||
raise exceptions.AccessDenied(self.getSiteDomain() +" says: Access denied. This story has not been validated by the adminstrators of this site.")
|
||||
|
||||
# use BeautifulSoup HTML parser to make everything easier to find.
|
||||
soup = self.make_soup(data)
|
||||
# print data
|
||||
|
||||
# Now go hunting for all the meta data and the chapter list.
|
||||
|
||||
## Title
|
||||
pt = soup.find('div', {'id' : 'pagetitle'})
|
||||
|
|
@ -145,11 +126,10 @@ class ErosnSapphoSycophantHexComAdapter(BaseSiteAdapter):
|
|||
self.story.setMetadata('rating', rating)
|
||||
|
||||
# Find the chapters:
|
||||
for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"&chapter=\d+$")):
|
||||
for chapter in soup.find_all('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+r"&chapter=\d+$")):
|
||||
# just in case there's tags, like <i> in chapter titles.
|
||||
self.chapterUrls.append((stripHTML(chapter),'http://'+self.host+'/'+chapter['href']+addurl))
|
||||
self.add_chapter(chapter,'http://'+self.host+'/'+chapter['href']+addurl)
|
||||
|
||||
self.story.setMetadata('numChapters',len(self.chapterUrls))
|
||||
|
||||
# eFiction sites don't help us out a lot with their meta data
|
||||
# formating, so it's a little ugly.
|
||||
|
|
@ -160,12 +140,12 @@ class ErosnSapphoSycophantHexComAdapter(BaseSiteAdapter):
|
|||
return d[k]
|
||||
except:
|
||||
return ""
|
||||
|
||||
|
||||
|
||||
# <span class="label">Rated:</span> NC-17<br /> etc
|
||||
|
||||
labels = soup.findAll('span',{'class':'label'})
|
||||
|
||||
|
||||
labels = soup.find_all('span',{'class':'label'})
|
||||
|
||||
value = labels[0].previousSibling
|
||||
svalue = ""
|
||||
while value != None:
|
||||
|
|
@ -175,7 +155,7 @@ class ErosnSapphoSycophantHexComAdapter(BaseSiteAdapter):
|
|||
svalue += unicode(val)
|
||||
val = val.nextSibling
|
||||
self.setDescription(url,svalue)
|
||||
|
||||
|
||||
for labelspan in labels:
|
||||
value = labelspan.nextSibling
|
||||
label = labelspan.string
|
||||
|
|
@ -184,22 +164,22 @@ class ErosnSapphoSycophantHexComAdapter(BaseSiteAdapter):
|
|||
self.story.setMetadata('numWords', value.split(' -')[0])
|
||||
|
||||
if 'Categories' in label:
|
||||
cats = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=categories'))
|
||||
cats = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=categories'))
|
||||
for cat in cats:
|
||||
self.story.addToList('category',cat.string)
|
||||
|
||||
if 'Characters' in label:
|
||||
chars = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=characters'))
|
||||
chars = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=characters'))
|
||||
for char in chars:
|
||||
self.story.addToList('characters',char.string)
|
||||
|
||||
if 'Genre' in label:
|
||||
genres = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=1'))
|
||||
genres = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=class&type_id=1'))
|
||||
for genre in genres:
|
||||
self.story.addToList('genre',genre.string)
|
||||
|
||||
if 'Warnings' in label:
|
||||
warnings = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=2'))
|
||||
warnings = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=class&type_id=2'))
|
||||
for warning in warnings:
|
||||
self.story.addToList('warnings',warning.string)
|
||||
|
||||
|
|
@ -223,9 +203,8 @@ class ErosnSapphoSycophantHexComAdapter(BaseSiteAdapter):
|
|||
series_name = a.string
|
||||
series_url = 'http://'+self.host+'/'+a['href']
|
||||
|
||||
# use BeautifulSoup HTML parser to make everything easier to find.
|
||||
seriessoup = self.make_soup(self._fetchUrl(series_url))
|
||||
storyas = seriessoup.findAll('a', href=re.compile(r'viewstory.php\?sid=\d+'))
|
||||
seriessoup = self.make_soup(self.get_request(series_url))
|
||||
storyas = seriessoup.find_all('a', href=re.compile(r'viewstory.php\?sid=\d+'))
|
||||
i=1
|
||||
for a in storyas:
|
||||
# skip 'report this' and 'TOC' links
|
||||
|
|
@ -245,7 +224,7 @@ class ErosnSapphoSycophantHexComAdapter(BaseSiteAdapter):
|
|||
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
soup = self.make_soup(self._fetchUrl(url))
|
||||
soup = self.make_soup(self.get_request(url))
|
||||
|
||||
div = soup.find('div', {'id' : 'story'})
|
||||
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
# -- coding: utf-8 --
|
||||
# Copyright 2013 Fanficdownloader team, 2015 FanFicFare team
|
||||
# Copyright 2013 Fanficdownloader team, 2018 FanFicFare team
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
|
|
@ -18,19 +18,19 @@
|
|||
### Adapted by GComyn - November 26, 2016
|
||||
###
|
||||
####################################################################################################
|
||||
from __future__ import absolute_import
|
||||
from __future__ import unicode_literals
|
||||
import time
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import re
|
||||
import sys
|
||||
import urllib2
|
||||
from bs4 import UnicodeDammit, Comment
|
||||
|
||||
from ..htmlcleanup import stripHTML
|
||||
from .. import exceptions as exceptions
|
||||
|
||||
from base_adapter import BaseSiteAdapter, makeDate
|
||||
# py2 vs py3 transition
|
||||
from ..six import text_type as unicode
|
||||
|
||||
from .base_adapter import BaseSiteAdapter, makeDate
|
||||
|
||||
####################################################################################################
|
||||
def getClass():
|
||||
|
|
@ -42,14 +42,6 @@ class FanficAuthorsNetAdapter(BaseSiteAdapter):
|
|||
|
||||
def __init__(self, config, url):
|
||||
BaseSiteAdapter.__init__(self, config, url)
|
||||
logger.debug("FanficAuthorsNetAdapter.__init__ - url='{0}'".format(url))
|
||||
|
||||
self.decode = ["utf8",
|
||||
"Windows-1252",
|
||||
"iso-8859-1"] # 1252 is a superset of iso-8859-1.
|
||||
# Most sites that claim to be
|
||||
# iso-8859-1 (and some that claim to be
|
||||
# utf8) are really windows-1252.
|
||||
|
||||
self.username = "NoneGiven" # if left empty, site doesn't return any message at all.
|
||||
self.password = ""
|
||||
|
|
@ -61,8 +53,11 @@ class FanficAuthorsNetAdapter(BaseSiteAdapter):
|
|||
#Setting the 'Zone' for each "Site"
|
||||
self.zone = self.parsedUrl.netloc.replace('.fanficauthors.net','')
|
||||
|
||||
# site change .nsns to -nsns
|
||||
self.zone = self.zone.replace('.nsns','-nsns')
|
||||
|
||||
# normalized story URL.
|
||||
self._setURL('http://{0}.{1}/{2}/'.format(
|
||||
self._setURL('https://{0}.{1}/{2}/'.format(
|
||||
self.zone, self.getBaseDomain(), self.story.getMetadata('storyId')))
|
||||
|
||||
# Each adapter needs to have a unique site abbreviation.
|
||||
|
|
@ -71,10 +66,10 @@ class FanficAuthorsNetAdapter(BaseSiteAdapter):
|
|||
# The date format will vary from site to site.
|
||||
# http://docs.python.org/library/datetime.html#strftime-strptime-behavior
|
||||
self.dateformat = "%d %b %y"
|
||||
|
||||
|
||||
################################################################################################
|
||||
def getBaseDomain(self):
|
||||
''' Added because fanficauthors.net does send you to www.fanficauthors.net when
|
||||
''' Added because fanficauthors.net does send you to www.fanficauthors.net when
|
||||
you go to it '''
|
||||
return 'fanficauthors.net'
|
||||
|
||||
|
|
@ -87,7 +82,10 @@ class FanficAuthorsNetAdapter(BaseSiteAdapter):
|
|||
@classmethod
|
||||
def getAcceptDomains(cls):
|
||||
|
||||
# need both .nsns(old) and -nsns(new) because it's a domain
|
||||
# change, not just URL change.
|
||||
return ['aaran-st-vines.nsns.fanficauthors.net',
|
||||
'aaran-st-vines-nsns.fanficauthors.net',
|
||||
'abraxan.fanficauthors.net',
|
||||
'bobmin.fanficauthors.net',
|
||||
'canoncansodoff.fanficauthors.net',
|
||||
|
|
@ -103,9 +101,12 @@ class FanficAuthorsNetAdapter(BaseSiteAdapter):
|
|||
'jeconais.fanficauthors.net',
|
||||
'kinsfire.fanficauthors.net',
|
||||
'kokopelli.nsns.fanficauthors.net',
|
||||
'kokopelli-nsns.fanficauthors.net',
|
||||
'ladya.nsns.fanficauthors.net',
|
||||
'ladya-nsns.fanficauthors.net',
|
||||
'lorddwar.fanficauthors.net',
|
||||
'mrintel.nsns.fanficauthors.net',
|
||||
'mrintel-nsns.fanficauthors.net',
|
||||
'musings-of-apathy.fanficauthors.net',
|
||||
'ruskbyte.fanficauthors.net',
|
||||
'seelvor.fanficauthors.net',
|
||||
|
|
@ -116,35 +117,43 @@ class FanficAuthorsNetAdapter(BaseSiteAdapter):
|
|||
################################################################################################
|
||||
@classmethod
|
||||
def getSiteExampleURLs(self):
|
||||
return ("http://aaran-st-vines.nsns.fanficauthors.net/[StoryId]/\n"
|
||||
+ "http://abraxan.fanficauthors.net/[StoryId]/\n"
|
||||
+ "http://bobmin.fanficauthors.net/[StoryId]/\n"
|
||||
+ "http://canoncansodoff.fanficauthors.net/[StoryId]/\n"
|
||||
+ "http://chemprof.fanficauthors.net/[StoryId]/\n"
|
||||
+ "http://copperbadge.fanficauthors.net/[StoryId]/\n"
|
||||
+ "http://crys.fanficauthors.net/[StoryId]/\n"
|
||||
+ "http://deluded-musings.fanficauthors.net/[StoryId]/\n"
|
||||
+ "http://draco664.fanficauthors.net/[StoryId]/\n"
|
||||
+ "http://fp.fanficauthors.net/[StoryId]/\n"
|
||||
+ "http://frenchsession.fanficauthors.net/[StoryId]/\n"
|
||||
+ "http://ishtar.fanficauthors.net/[StoryId]/\n"
|
||||
+ "http://jbern.fanficauthors.net/[StoryId]/\n"
|
||||
+ "http://jeconais.fanficauthors.net/[StoryId]/\n"
|
||||
+ "http://kinsfire.fanficauthors.net/[StoryId]/\n"
|
||||
+ "http://kokopelli.nsns.fanficauthors.net/[StoryId]/\n"
|
||||
+ "http://ladya.nsns.fanficauthors.net/[StoryId]/\n"
|
||||
+ "http://lorddwar.fanficauthors.net/[StoryId]/\n"
|
||||
+ "http://mrintel.nsns.fanficauthors.net/[StoryId]/\n"
|
||||
+ "http://musings-of-apathy.fanficauthors.net/[StoryId]/\n"
|
||||
+ "http://ruskbyte.fanficauthors.net/[StoryId]/\n"
|
||||
+ "http://seelvor.fanficauthors.net/[StoryId]/\n"
|
||||
+ "http://tenhawk.fanficauthors.net/[StoryId]/\n"
|
||||
+ "http://viridian.fanficauthors.net/[StoryId]/\n"
|
||||
+ "http://whydoyouneedtoknow.fanficauthors.net/[StoryId]/\n")
|
||||
return ("https://aaran-st-vines-nsns.fanficauthors.net/A_Story_Name/ "
|
||||
+ "https://abraxan.fanficauthors.net/A_Story_Name/ "
|
||||
+ "https://bobmin.fanficauthors.net/A_Story_Name/ "
|
||||
+ "https://canoncansodoff.fanficauthors.net/A_Story_Name/ "
|
||||
+ "https://chemprof.fanficauthors.net/A_Story_Name/ "
|
||||
+ "https://copperbadge.fanficauthors.net/A_Story_Name/ "
|
||||
+ "https://crys.fanficauthors.net/A_Story_Name/ "
|
||||
+ "https://deluded-musings.fanficauthors.net/A_Story_Name/ "
|
||||
+ "https://draco664.fanficauthors.net/A_Story_Name/ "
|
||||
+ "https://fp.fanficauthors.net/A_Story_Name/ "
|
||||
+ "https://frenchsession.fanficauthors.net/A_Story_Name/ "
|
||||
+ "https://ishtar.fanficauthors.net/A_Story_Name/ "
|
||||
+ "https://jbern.fanficauthors.net/A_Story_Name/ "
|
||||
+ "https://jeconais.fanficauthors.net/A_Story_Name/ "
|
||||
+ "https://kinsfire.fanficauthors.net/A_Story_Name/ "
|
||||
+ "https://kokopelli-nsns.fanficauthors.net/A_Story_Name/ "
|
||||
+ "https://ladya-nsns.fanficauthors.net/A_Story_Name/ "
|
||||
+ "https://lorddwar.fanficauthors.net/A_Story_Name/ "
|
||||
+ "https://mrintel-nsns.fanficauthors.net/A_Story_Name/ "
|
||||
+ "https://musings-of-apathy.fanficauthors.net/A_Story_Name/ "
|
||||
+ "https://ruskbyte.fanficauthors.net/A_Story_Name/ "
|
||||
+ "https://seelvor.fanficauthors.net/A_Story_Name/ "
|
||||
+ "https://tenhawk.fanficauthors.net/A_Story_Name/ "
|
||||
+ "https://viridian.fanficauthors.net/A_Story_Name/ "
|
||||
+ "https://whydoyouneedtoknow.fanficauthors.net/A_Story_Name/ ")
|
||||
|
||||
################################################################################################
|
||||
def getSiteURLPattern(self):
|
||||
return r'http?://(aaran-st-vines.nsns|abraxan|bobmin|canoncansodoff|chemprof|copperbadge|crys|deluded-musings|draco664|fp|frenchsession|ishtar|jbern|jeconais|kinsfire|kokopelli.nsns|ladya.nsns|lorddwar|mrintel.nsns|musings-of-apathy|ruskbyte|seelvor|tenhawk|viridian|whydoyouneedtoknow)\.fanficauthors\.net/([a-zA-Z0-9_]+)/'
|
||||
## .nsns kept here to match both . and -
|
||||
return r'https?://(aaran-st-vines.nsns|abraxan|bobmin|canoncansodoff|chemprof|copperbadge|crys|deluded-musings|draco664|fp|frenchsession|ishtar|jbern|jeconais|kinsfire|kokopelli.nsns|ladya.nsns|lorddwar|mrintel.nsns|musings-of-apathy|ruskbyte|seelvor|tenhawk|viridian|whydoyouneedtoknow)\.fanficauthors\.net/([a-zA-Z0-9_]+)/'
|
||||
|
||||
@classmethod
|
||||
def get_section_url(cls,url):
|
||||
## only changing .nsns to -nsns and only when part of the
|
||||
## domain.
|
||||
url = url.replace('.nsns.fanficauthors.net','-nsns.fanficauthors.net')
|
||||
return url
|
||||
|
||||
################################################################################################
|
||||
def doExtractChapterUrlsAndMetadata(self, get_cover=True):
|
||||
|
|
@ -152,139 +161,105 @@ class FanficAuthorsNetAdapter(BaseSiteAdapter):
|
|||
url = self.url
|
||||
logger.debug("URL: "+url)
|
||||
|
||||
params={}
|
||||
if self.password:
|
||||
params['username'] = self.username
|
||||
params['password'] = self.password
|
||||
else:
|
||||
params['username'] = self.getConfig("username")
|
||||
params['password'] = self.getConfig("password")
|
||||
|
||||
if not params['username']:
|
||||
raise exceptions.FailedToLogin('You need to have your username and pasword set.',params['username'])
|
||||
soup = self.make_soup(self.get_request(url+'index/'))
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url+'index/', params, usecache=False)
|
||||
except urllib2.HTTPError, e:
|
||||
if e.code == 404:
|
||||
raise exceptions.StoryDoesNotExist("Code: 404. {0}".format(url))
|
||||
elif e.code == 410:
|
||||
raise exceptions.StoryDoesNotExist("Code: 410. {0}".format(url))
|
||||
elif e.code == 401:
|
||||
self.needToLogin = True
|
||||
data = ''
|
||||
else:
|
||||
raise e
|
||||
|
||||
if "The requested file has not been found" in data:
|
||||
raise exceptions.StoryDoesNotExist(
|
||||
"{0}.{1} says: The requested file has not been found".format(
|
||||
self.zone, self.getBaseDomain()))
|
||||
|
||||
# use BeautifulSoup HTML parser to make everything easier to find.
|
||||
soup = self.make_soup(data)
|
||||
|
||||
# Find authorid and URL.
|
||||
# There is no place where the author's name is listed,
|
||||
# Find authorid and URL.
|
||||
# There is no place where the author's name is listed,
|
||||
# except for in the image at the top of the page. We have to
|
||||
# work with the url entered to get the Author's Name
|
||||
a = self.zone.split('.')[0]
|
||||
self.story.setMetadata('authorId',a)
|
||||
a = a.replace('-',' ').title()
|
||||
self.story.setMetadata('author',a)
|
||||
self.story.setMetadata('authorUrl','http://{0}/'.format(self.parsedUrl.netloc))
|
||||
|
||||
loginUrl = self.story.getMetadata('authorUrl')+'account/'
|
||||
loginsoup = self.make_soup(self._fetchUrl(loginUrl))
|
||||
if True:
|
||||
# if self.performLogin(loginUrl, loginsoup):
|
||||
# Now go hunting for all the meta data and the chapter list.
|
||||
self.story.setMetadata('authorUrl','https://{0}/'.format(self.parsedUrl.netloc))
|
||||
|
||||
## Title
|
||||
a = soup.find('h2')
|
||||
self.story.setMetadata('title',stripHTML(a))
|
||||
## Title
|
||||
a = soup.find('h2')
|
||||
self.story.setMetadata('title',stripHTML(a))
|
||||
|
||||
# Find the chapters:
|
||||
# The published and update dates are with the chapter links...
|
||||
# so we have to get them from there.
|
||||
chapters = soup.findAll('a', href=re.compile('/'+self.story.getMetadata(
|
||||
'storyId')+'/([a-zA-Z0-9_]+)/'))
|
||||
# Find the chapters:
|
||||
# The published and update dates are with the chapter links...
|
||||
# so we have to get them from there.
|
||||
chapters = soup.find_all('a', href=re.compile('/'+self.story.getMetadata(
|
||||
'storyId')+'/([a-zA-Z0-9_]+)/'))
|
||||
|
||||
# Here we are getting the published date. It is the date the first chapter was "updated"
|
||||
updatedate = stripHTML(unicode(chapters[0].parent)).split('Uploaded on:')[1].strip()
|
||||
updatedate = updatedate.replace('st ',' ').replace('nd ',' ').replace(
|
||||
'rd ',' ').replace('th ',' ')
|
||||
self.story.setMetadata('datePublished', makeDate(updatedate, self.dateformat))
|
||||
# Here we are getting the published date. It is the date the first chapter was "updated"
|
||||
updatedate = stripHTML(unicode(chapters[0].parent)).split('Uploaded on:')[1].strip()
|
||||
updatedate = updatedate.replace('st ',' ').replace('nd ',' ').replace(
|
||||
'rd ',' ').replace('th ',' ')
|
||||
self.story.setMetadata('datePublished', makeDate(updatedate, self.dateformat))
|
||||
|
||||
for i, chapter in enumerate(chapters):
|
||||
if '/reviews/' not in chapter['href']:
|
||||
# here we get the update date. We will update this for every chapter,
|
||||
# so we get the last one.
|
||||
updatedate = stripHTML(unicode(chapters[i].parent)).split(
|
||||
'Uploaded on:')[1].strip()
|
||||
updatedate = updatedate.replace('st ',' ').replace('nd ',' ').replace(
|
||||
'rd ',' ').replace('th ',' ')
|
||||
self.story.setMetadata('dateUpdated', makeDate(updatedate, self.dateformat))
|
||||
|
||||
if '::' in stripHTML(unicode(chapter)):
|
||||
chapter_title = stripHTML(unicode(chapter).split('::')[1])
|
||||
else:
|
||||
chapter_title = stripHTML(unicode(chapter))
|
||||
chapter_Url = self.story.getMetadata('authorUrl')+chapter['href'][1:]
|
||||
self.chapterUrls.append((chapter_title, chapter_Url))
|
||||
|
||||
self.story.setMetadata('numChapters', len(self.chapterUrls))
|
||||
genres = ("Drama","Romance")
|
||||
gotgenre = False
|
||||
## Getting the Metadata that is there
|
||||
div = soup.find('div',{'class':'well'})
|
||||
metads = div.findAll('p')[1].get_text().replace('\n','').split(' - ')
|
||||
for metad in metads:
|
||||
metad = metad.strip()
|
||||
if ':' in metad:
|
||||
heading = metad.split(':')[0].strip()
|
||||
text = metad.split(':')[1].strip()
|
||||
if heading == 'Status':
|
||||
self.story.setMetadata('status',text)
|
||||
elif heading == 'Rating':
|
||||
self.story.setMetadata('rating',text)
|
||||
elif heading == 'Word count':
|
||||
self.story.setMetadata('numWords',text)
|
||||
elif heading == 'Genre':
|
||||
self.story.setMetadata('genre',text.replace(',',', ').replace(' ',' '))
|
||||
gotgenre = True
|
||||
# Status: Completed - Rating: Adult Only - Chapters: 19 - Word count: 323,805 - Genre: Post-OotP
|
||||
# Status: In progress - Rating: Adult Only - Chapters: 42 - Word count: 395,991 - Genre: Action/Adventure, Angst, Drama, Romance, Tragedy
|
||||
# Status: Completed - Rating: Everyone - Chapters: 1 - Word count: 876 - Genre: Sorrow
|
||||
# Status: In progress - Rating: Mature - Chapters: 39 - Word count: 314,544 - Genre: Drama - Romance
|
||||
div = soup.find('div',{'class':'well'})
|
||||
# logger.debug(div.find_all('p')[1])
|
||||
metaline = re.sub(r' +',' ',stripHTML(div.find_all('p')[1]).replace('\n',' '))
|
||||
# logger.debug(metaline)
|
||||
match = re.match(r"Status: (?P<status>.+?) - Rating: (?P<rating>.+?) - Chapters: [0-9,]+ - Word count: (?P<numWords>[0-9,]+?) - Genre: ?(?P<genre>.*?)$",metaline)
|
||||
if match:
|
||||
# logger.debug(match.group('status'))
|
||||
# logger.debug(match.group('rating'))
|
||||
# logger.debug(match.group('numWords'))
|
||||
# logger.debug(match.group('genre'))
|
||||
if "Completed" in match.group('status'):
|
||||
self.story.setMetadata('status',"Completed")
|
||||
else:
|
||||
self.story.setMetadata('status',"In-Progress")
|
||||
self.story.setMetadata('rating',match.group('rating'))
|
||||
self.story.setMetadata('numWords',match.group('numWords'))
|
||||
self.story.extendList('genre',re.split(r'[;,-]',match.group('genre')))
|
||||
else:
|
||||
raise exceptions.FailedToDownload("Error parsing metadata: '{0}'".format(url))
|
||||
|
||||
summary = div.find('blockquote').get_text()
|
||||
self.setDescription(url,summary)
|
||||
|
||||
## Raising AdultCheckRequired after collecting chapters gives
|
||||
## a double chapter list. So does genre, but it de-dups
|
||||
## automatically.
|
||||
if( self.story.getMetadataRaw('rating') in ['Mature','Adult Only']
|
||||
and not (self.is_adult or self.getConfig("is_adult")) ):
|
||||
raise exceptions.AdultCheckRequired(self.url)
|
||||
|
||||
for i, chapter in enumerate(chapters):
|
||||
if '/reviews/' not in chapter['href']:
|
||||
# here we get the update date. We will update this for every chapter,
|
||||
# so we get the last one.
|
||||
updatedate = stripHTML(unicode(chapters[i].parent)).split(
|
||||
'Uploaded on:')[1].strip()
|
||||
updatedate = updatedate.replace('st ',' ').replace('nd ',' ').replace(
|
||||
'rd ',' ').replace('th ',' ')
|
||||
self.story.setMetadata('dateUpdated', makeDate(updatedate, self.dateformat))
|
||||
|
||||
if '::' in stripHTML(unicode(chapter)):
|
||||
chapter_title = stripHTML(unicode(chapter).split('::')[1])
|
||||
else:
|
||||
if gotgenre == True:
|
||||
if ',' in metad:
|
||||
for gen in metad.split(','):
|
||||
self.story.addToList('genre',gen.strip())
|
||||
for gen in genres:
|
||||
if metad == gen:
|
||||
self.story.addToList('genre',metad.strip())
|
||||
else:
|
||||
for gen in genres:
|
||||
if metad == gen:
|
||||
self.story.addToList('genre',metad.strip())
|
||||
|
||||
chapter_title = stripHTML(unicode(chapter))
|
||||
chapter_Url = self.story.getMetadata('authorUrl')+chapter['href'][1:]
|
||||
self.add_chapter(chapter_title, chapter_Url)
|
||||
|
||||
summary = div.find('blockquote').get_text()
|
||||
self.setDescription(url,summary)
|
||||
|
||||
# grab the text for an individual chapter.
|
||||
def getChapterText(self, url):
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
if( self.story.getMetadataRaw('rating') in ['Mature','Adult Only'] and
|
||||
(self.is_adult or self.getConfig("is_adult")) ):
|
||||
addurl = "?bypass=1"
|
||||
else:
|
||||
addurl=""
|
||||
|
||||
soup = self.make_soup(self._fetchUrl(url))
|
||||
soup = self.make_soup(self.get_request(url+addurl))
|
||||
|
||||
story = soup.find('div',{'class':'story'})
|
||||
|
||||
|
||||
if story == None:
|
||||
raise exceptions.FailedToDownload(
|
||||
"Error downloading Chapter: '{0}'! Missing required element!".format(url))
|
||||
|
||||
#Now, there are a lot of extranious tags within the story division.. so we will remove them.
|
||||
for tag in story.findAll('ul',{'class':'pager'}) + story.findAll(
|
||||
'div',{'class':'alert'}) + story.findAll('div', {'class':'btn-group'}):
|
||||
for tag in story.find_all('ul',{'class':'pager'}) + story.find_all(
|
||||
'div',{'class':'alert'}) + story.find_all('div', {'class':'btn-group'}):
|
||||
tag.extract()
|
||||
|
||||
|
||||
return self.utf8FromSoup(url,story)
|
||||
|
|
|
|||
|
|
@ -1,321 +0,0 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2014 Fanficdownloader team, 2015 FanFicFare team
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
# Software: eFiction
|
||||
import time
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import re
|
||||
import urllib2
|
||||
|
||||
|
||||
from ..htmlcleanup import stripHTML
|
||||
from .. import exceptions as exceptions
|
||||
|
||||
from base_adapter import BaseSiteAdapter, makeDate
|
||||
|
||||
# In general an 'adapter' needs to do these five things:
|
||||
|
||||
# - 'Register' correctly with the downloader
|
||||
# - Site Login (if needed)
|
||||
# - 'Are you adult?' check (if needed--some do one, some the other, some both)
|
||||
# - Grab the chapter list
|
||||
# - Grab the story meta-data (some (non-eFiction) adapters have to get it from the author page)
|
||||
# - Grab the chapter texts
|
||||
|
||||
# Search for XXX comments--that's where things are most likely to need changing.
|
||||
|
||||
# This function is called by the downloader in all adapter_*.py files
|
||||
# in this dir to register the adapter class. So it needs to be
|
||||
# updated to reflect the class below it. That, plus getSiteDomain()
|
||||
# take care of 'Registering'.
|
||||
def getClass():
|
||||
return FanficCastleTVNetAdapter # XXX
|
||||
|
||||
# Class name has to be unique. Our convention is camel case the
|
||||
# sitename with Adapter at the end. www is skipped.
|
||||
class FanficCastleTVNetAdapter(BaseSiteAdapter): # XXX
|
||||
|
||||
def __init__(self, config, url):
|
||||
BaseSiteAdapter.__init__(self, config, url)
|
||||
|
||||
self.decode = ["Windows-1252",
|
||||
"utf8"] # 1252 is a superset of iso-8859-1.
|
||||
# Most sites that claim to be
|
||||
# iso-8859-1 (and some that claim to be
|
||||
# utf8) are really windows-1252.
|
||||
self.username = "NoneGiven" # if left empty, site doesn't return any message at all.
|
||||
self.password = ""
|
||||
self.is_adult=False
|
||||
|
||||
# get storyId from url--url validation guarantees query is only sid=1234
|
||||
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
|
||||
|
||||
|
||||
# normalized story URL.
|
||||
# XXX Most sites don't have the /fanfic part. Replace all to remove it usually.
|
||||
self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId'))
|
||||
|
||||
# Each adapter needs to have a unique site abbreviation.
|
||||
self.story.setMetadata('siteabbrev','csltv') # XXX
|
||||
|
||||
# The date format will vary from site to site.
|
||||
# http://docs.python.org/library/datetime.html#strftime-strptime-behavior
|
||||
self.dateformat = "%b %d, %Y" # XXX
|
||||
|
||||
@staticmethod # must be @staticmethod, don't remove it.
|
||||
def getSiteDomain():
|
||||
# The site domain. Does have www here, if it uses it.
|
||||
return 'fanfic.castletv.net' # XXX
|
||||
|
||||
@classmethod
|
||||
def getSiteExampleURLs(cls):
|
||||
return "http://"+cls.getSiteDomain()+"/viewstory.php?sid=1234"
|
||||
|
||||
def getSiteURLPattern(self):
|
||||
return re.escape("http://"+self.getSiteDomain()+"/viewstory.php?sid=")+r"\d+$"
|
||||
|
||||
## Login seems to be reasonably standard across eFiction sites.
|
||||
def needToLoginCheck(self, data):
|
||||
if 'Registered Users Only' in data \
|
||||
or 'There is no such account on our website' in data \
|
||||
or "That password doesn't match the one in our database" in data:
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
|
||||
def performLogin(self, url):
|
||||
params = {}
|
||||
|
||||
if self.password:
|
||||
params['penname'] = self.username
|
||||
params['password'] = self.password
|
||||
else:
|
||||
params['penname'] = self.getConfig("username")
|
||||
params['password'] = self.getConfig("password")
|
||||
params['cookiecheck'] = '1'
|
||||
params['submit'] = 'Submit'
|
||||
|
||||
loginUrl = 'http://' + self.getSiteDomain() + '/user.php?action=login'
|
||||
logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
|
||||
params['penname']))
|
||||
|
||||
d = self._fetchUrl(loginUrl, params)
|
||||
|
||||
if "Member Account" not in d : #Member Account
|
||||
logger.info("Failed to login to URL %s as %s" % (loginUrl,
|
||||
params['penname']))
|
||||
raise exceptions.FailedToLogin(url,params['penname'])
|
||||
return False
|
||||
else:
|
||||
return True
|
||||
|
||||
## Getting the chapter list and the meta data, plus 'is adult' checking.
|
||||
def extractChapterUrlsAndMetadata(self):
|
||||
|
||||
if self.is_adult or self.getConfig("is_adult"):
|
||||
# Weirdly, different sites use different warning numbers.
|
||||
# If the title search below fails, there's a good chance
|
||||
# you need a different number. print data at that point
|
||||
# and see what the 'click here to continue' url says.
|
||||
addurl = "&ageconsent=ok&warning=3"
|
||||
else:
|
||||
addurl=""
|
||||
|
||||
# index=1 makes sure we see the story chapter index. Some
|
||||
# sites skip that for one-chapter stories.
|
||||
url = self.url+'&index=1'+addurl
|
||||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
except urllib2.HTTPError, e:
|
||||
if e.code == 404:
|
||||
raise exceptions.StoryDoesNotExist(self.url)
|
||||
else:
|
||||
raise e
|
||||
|
||||
if self.needToLoginCheck(data):
|
||||
# need to log in for this one.
|
||||
self.performLogin(url)
|
||||
data = self._fetchUrl(url)
|
||||
|
||||
m = re.search(r"'viewstory.php\?sid=\d+((?:&ageconsent=ok)?&warning=\d+)'",data)
|
||||
if m != None:
|
||||
if self.is_adult or self.getConfig("is_adult"):
|
||||
# We tried the default and still got a warning, so
|
||||
# let's pull the warning number from the 'continue'
|
||||
# link and reload data.
|
||||
addurl = m.group(1)
|
||||
# correct stupid & error in url.
|
||||
addurl = addurl.replace("&","&")
|
||||
url = self.url+'&index=1'+addurl
|
||||
logger.debug("URL 2nd try: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
except urllib2.HTTPError, e:
|
||||
if e.code == 404:
|
||||
raise exceptions.StoryDoesNotExist(self.url)
|
||||
else:
|
||||
raise e
|
||||
else:
|
||||
raise exceptions.AdultCheckRequired(self.url)
|
||||
|
||||
if "Access denied. This story has not been validated by the adminstrators of this site." in data:
|
||||
raise exceptions.AccessDenied(self.getSiteDomain() +" says: Access denied. This story has not been validated by the adminstrators of this site.")
|
||||
|
||||
# use BeautifulSoup HTML parser to make everything easier to find.
|
||||
soup = self.make_soup(data)
|
||||
# print data
|
||||
|
||||
# Now go hunting for all the meta data and the chapter list.
|
||||
|
||||
pagetitle = soup.find('div',{'id':'pagetitle'})
|
||||
## Title
|
||||
a = pagetitle.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$"))
|
||||
self.story.setMetadata('title',stripHTML(a))
|
||||
|
||||
# Find authorid and URL from... author url.
|
||||
a = pagetitle.find('a', href=re.compile(r"viewuser.php\?uid=\d+"))
|
||||
self.story.setMetadata('authorId',a['href'].split('=')[1])
|
||||
self.story.setMetadata('authorUrl','http://'+self.host+'/'+a['href'])
|
||||
self.story.setMetadata('author',a.string)
|
||||
|
||||
# Reviews
|
||||
reviewdata = soup.find('div', {'id' : 'sort'})
|
||||
a = reviewdata.findAll('a', href=re.compile(r'reviews.php\?type=ST&(amp;)?item='+self.story.getMetadata('storyId')+"$"))[1] # second one.
|
||||
self.story.setMetadata('reviews',stripHTML(a))
|
||||
|
||||
# Find the chapters:
|
||||
for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"&chapter=\d+$")):
|
||||
# just in case there's tags, like <i> in chapter titles.
|
||||
self.chapterUrls.append((stripHTML(chapter),'http://'+self.host+'/'+chapter['href']+addurl))
|
||||
|
||||
self.story.setMetadata('numChapters',len(self.chapterUrls))
|
||||
|
||||
# eFiction sites don't help us out a lot with their meta data
|
||||
# formating, so it's a little ugly.
|
||||
|
||||
# utility method
|
||||
def defaultGetattr(d,k):
|
||||
try:
|
||||
return d[k]
|
||||
except:
|
||||
return ""
|
||||
|
||||
# <span class="label">Rated:</span> NC-17<br /> etc
|
||||
labels = soup.findAll('span',{'class':'label'})
|
||||
for labelspan in labels:
|
||||
value = labelspan.nextSibling
|
||||
label = labelspan.string
|
||||
|
||||
if 'Summary' in label:
|
||||
## Everything until the next span class='label'
|
||||
svalue = ""
|
||||
while value and 'label' not in defaultGetattr(value,'class'):
|
||||
svalue += unicode(value)
|
||||
value = value.nextSibling
|
||||
self.setDescription(url,svalue)
|
||||
#self.story.setMetadata('description',stripHTML(svalue))
|
||||
|
||||
if 'Rated' in label:
|
||||
self.story.setMetadata('rating', value)
|
||||
|
||||
if 'Word count' in label:
|
||||
self.story.setMetadata('numWords', value)
|
||||
|
||||
if 'Categories' in label:
|
||||
cats = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=categories'))
|
||||
catstext = [cat.string for cat in cats]
|
||||
for cat in catstext:
|
||||
self.story.addToList('category',cat.string)
|
||||
|
||||
if 'Characters' in label:
|
||||
chars = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=characters'))
|
||||
charstext = [char.string for char in chars]
|
||||
for char in charstext:
|
||||
self.story.addToList('characters',char.string)
|
||||
|
||||
## Not all sites use Genre, but there's no harm to
|
||||
## leaving it in. Check to make sure the type_id number
|
||||
## is correct, though--it's site specific.
|
||||
if 'Genre' in label:
|
||||
genres = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=2')) # XXX
|
||||
genrestext = [genre.string for genre in genres]
|
||||
self.genre = ', '.join(genrestext)
|
||||
for genre in genrestext:
|
||||
self.story.addToList('genre',genre.string)
|
||||
|
||||
## Not all sites use Warnings, but there's no harm to
|
||||
## leaving it in. Check to make sure the type_id number
|
||||
## is correct, though--it's site specific.
|
||||
if 'Warnings' in label:
|
||||
warnings = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=2')) # XXX
|
||||
warningstext = [warning.string for warning in warnings]
|
||||
self.warning = ', '.join(warningstext)
|
||||
for warning in warningstext:
|
||||
self.story.addToList('warnings',warning.string)
|
||||
|
||||
if 'Completed' in label:
|
||||
if 'Yes' in value:
|
||||
self.story.setMetadata('status', 'Completed')
|
||||
else:
|
||||
self.story.setMetadata('status', 'In-Progress')
|
||||
|
||||
if 'Published' in label:
|
||||
self.story.setMetadata('datePublished', makeDate(stripHTML(value), self.dateformat))
|
||||
|
||||
if 'Updated' in label:
|
||||
# there's a stray [ at the end.
|
||||
#value = value[0:-1]
|
||||
self.story.setMetadata('dateUpdated', makeDate(stripHTML(value), self.dateformat))
|
||||
|
||||
try:
|
||||
# Find Series name from series URL.
|
||||
a = soup.find('a', href=re.compile(r"viewseries.php\?seriesid=\d+"))
|
||||
series_name = a.string
|
||||
series_url = 'http://'+self.host+'/'+a['href']
|
||||
|
||||
# use BeautifulSoup HTML parser to make everything easier to find.
|
||||
seriessoup = self.make_soup(self._fetchUrl(series_url))
|
||||
storyas = seriessoup.findAll('a', href=re.compile(r'^viewstory.php\?sid=\d+$'))
|
||||
i=1
|
||||
for a in storyas:
|
||||
if a['href'] == ('viewstory.php?sid='+self.story.getMetadata('storyId')):
|
||||
self.setSeries(series_name, i)
|
||||
self.story.setMetadata('seriesUrl',series_url)
|
||||
break
|
||||
i+=1
|
||||
|
||||
except:
|
||||
# I find it hard to care if the series parsing fails
|
||||
pass
|
||||
|
||||
# grab the text for an individual chapter.
|
||||
def getChapterText(self, url):
|
||||
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
soup = self.make_soup(self._fetchUrl(url))
|
||||
|
||||
div = soup.find('div', {'id' : 'story'})
|
||||
|
||||
if None == div:
|
||||
raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url)
|
||||
|
||||
return self.utf8FromSoup(url,div)
|
||||
|
|
@ -1,183 +0,0 @@
|
|||
# coding=utf-8
|
||||
|
||||
import re
|
||||
import urllib2
|
||||
import urlparse
|
||||
|
||||
from base_adapter import BaseSiteAdapter, makeDate
|
||||
from .. import exceptions
|
||||
|
||||
|
||||
_SOURCE_CODE_ENCODING = 'utf-8'
|
||||
|
||||
|
||||
def getClass():
|
||||
return FanficHuAdapter
|
||||
|
||||
|
||||
def _get_query_data(url):
|
||||
components = urlparse.urlparse(url)
|
||||
query_data = urlparse.parse_qs(components.query)
|
||||
return dict((key, data[0]) for key, data in query_data.items())
|
||||
|
||||
|
||||
class FanficHuAdapter(BaseSiteAdapter):
|
||||
SITE_ABBREVIATION = 'ffh'
|
||||
SITE_DOMAIN = 'fanfic.hu'
|
||||
SITE_LANGUAGE = 'Hungarian'
|
||||
|
||||
BASE_URL = 'http://' + SITE_DOMAIN + '/merengo/'
|
||||
VIEW_STORY_URL_TEMPLATE = BASE_URL + 'viewstory.php?sid=%s'
|
||||
|
||||
DATE_FORMAT = '%m/%d/%Y'
|
||||
|
||||
def __init__(self, config, url):
|
||||
BaseSiteAdapter.__init__(self, config, url)
|
||||
|
||||
query_data = urlparse.parse_qs(self.parsedUrl.query)
|
||||
story_id = query_data['sid'][0]
|
||||
|
||||
self.story.setMetadata('storyId', story_id)
|
||||
self._setURL(self.VIEW_STORY_URL_TEMPLATE % story_id)
|
||||
self.story.setMetadata('siteabbrev', self.SITE_ABBREVIATION)
|
||||
self.story.setMetadata('language', self.SITE_LANGUAGE)
|
||||
|
||||
def _customized_fetch_url(self, url, exception=None, parameters=None):
|
||||
if exception:
|
||||
try:
|
||||
data = self._fetchUrl(url, parameters)
|
||||
except urllib2.HTTPError:
|
||||
raise exception(self.url)
|
||||
# Just let self._fetchUrl throw the exception, don't catch and
|
||||
# customize it.
|
||||
else:
|
||||
data = self._fetchUrl(url, parameters)
|
||||
|
||||
return self.make_soup(data)
|
||||
|
||||
@staticmethod
|
||||
def getSiteDomain():
|
||||
return FanficHuAdapter.SITE_DOMAIN
|
||||
|
||||
@classmethod
|
||||
def getSiteExampleURLs(cls):
|
||||
return cls.VIEW_STORY_URL_TEMPLATE % 1234
|
||||
|
||||
def getSiteURLPattern(self):
|
||||
return re.escape(self.VIEW_STORY_URL_TEMPLATE[:-2]) + r'\d+$'
|
||||
|
||||
def extractChapterUrlsAndMetadata(self):
|
||||
soup = self._customized_fetch_url(self.url + '&i=1')
|
||||
|
||||
if soup.title.string.encode(_SOURCE_CODE_ENCODING).strip(' :') == 'írta':
|
||||
raise exceptions.StoryDoesNotExist(self.url)
|
||||
|
||||
chapter_options = soup.find('form', action='viewstory.php').select('option')
|
||||
# Remove redundant "Fejezetek" option
|
||||
chapter_options.pop(0)
|
||||
|
||||
# If there is still more than one entry remove chapter overview entry
|
||||
if len(chapter_options) > 1:
|
||||
chapter_options.pop(0)
|
||||
|
||||
for option in chapter_options:
|
||||
url = urlparse.urljoin(self.url, option['value'])
|
||||
self.chapterUrls.append((option.string, url))
|
||||
|
||||
author_url = urlparse.urljoin(self.BASE_URL, soup.find('a', href=lambda href: href and href.startswith('viewuser.php?uid='))['href'])
|
||||
soup = self._customized_fetch_url(author_url)
|
||||
|
||||
story_id = self.story.getMetadata('storyId')
|
||||
for table in soup('table', {'class': 'mainnav'}):
|
||||
title_anchor = table.find('span', {'class': 'storytitle'}).a
|
||||
href = title_anchor['href']
|
||||
if href.startswith('javascript:'):
|
||||
href = href.rsplit(' ', 1)[1].strip("'")
|
||||
query_data = _get_query_data(href)
|
||||
|
||||
if query_data['sid'] == story_id:
|
||||
break
|
||||
else:
|
||||
# This should never happen, the story must be found on the author's
|
||||
# page.
|
||||
raise exceptions.FailedToDownload(self.url)
|
||||
|
||||
self.story.setMetadata('title', title_anchor.string)
|
||||
|
||||
rows = table('tr')
|
||||
|
||||
anchors = rows[0].div('a')
|
||||
author_anchor = anchors[1]
|
||||
query_data = _get_query_data(author_anchor['href'])
|
||||
self.story.setMetadata('author', author_anchor.string)
|
||||
self.story.setMetadata('authorId', query_data['uid'])
|
||||
self.story.setMetadata('authorUrl', urlparse.urljoin(self.BASE_URL, author_anchor['href']))
|
||||
self.story.setMetadata('reviews', anchors[3].string)
|
||||
|
||||
if self.getConfig('keep_summary_html'):
|
||||
self.story.setMetadata('description', self.utf8FromSoup(author_url, rows[1].td))
|
||||
else:
|
||||
self.story.setMetadata('description', ''.join(rows[1].td(text=True)))
|
||||
|
||||
for row in rows[3:]:
|
||||
index = 0
|
||||
cells = row('td')
|
||||
|
||||
while index < len(cells):
|
||||
cell = cells[index]
|
||||
key = cell.b.string.encode(_SOURCE_CODE_ENCODING).strip(':')
|
||||
try:
|
||||
value = cells[index+1].string.encode(_SOURCE_CODE_ENCODING)
|
||||
except AttributeError:
|
||||
value = None
|
||||
|
||||
if key == 'Kategória':
|
||||
for anchor in cells[index+1]('a'):
|
||||
self.story.addToList('category', anchor.string)
|
||||
|
||||
elif key == 'Szereplõk':
|
||||
if cells[index+1].string:
|
||||
for name in cells[index+1].string.split(', '):
|
||||
self.story.addToList('character', name)
|
||||
|
||||
elif key == 'Korhatár':
|
||||
if value != 'nem korhatáros':
|
||||
self.story.setMetadata('rating', value)
|
||||
|
||||
elif key == 'Figyelmeztetések':
|
||||
for b_tag in cells[index+1]('b'):
|
||||
self.story.addToList('warnings', b_tag.string)
|
||||
|
||||
elif key == 'Jellemzõk':
|
||||
for genre in cells[index+1].string.split(', '):
|
||||
self.story.addToList('genre', genre)
|
||||
|
||||
elif key == 'Fejezetek':
|
||||
self.story.setMetadata('numChapters', int(value))
|
||||
|
||||
elif key == 'Megjelenés':
|
||||
self.story.setMetadata('datePublished', makeDate(value, self.DATE_FORMAT))
|
||||
|
||||
elif key == 'Frissítés':
|
||||
self.story.setMetadata('dateUpdated', makeDate(value, self.DATE_FORMAT))
|
||||
|
||||
elif key == 'Szavak':
|
||||
self.story.setMetadata('numWords', value)
|
||||
|
||||
elif key == 'Befejezett':
|
||||
self.story.setMetadata('status', 'Completed' if value == 'Nem' else 'In-Progress')
|
||||
|
||||
index += 2
|
||||
|
||||
if self.story.getMetadata('rating') == '18':
|
||||
if not (self.is_adult or self.getConfig('is_adult')):
|
||||
raise exceptions.AdultCheckRequired(self.url)
|
||||
|
||||
def getChapterText(self, url):
|
||||
soup = self._customized_fetch_url(url)
|
||||
story_cell = soup.find('form', action='viewstory.php').parent.parent
|
||||
|
||||
for div in story_cell('div'):
|
||||
div.extract()
|
||||
|
||||
return self.utf8FromSoup(url, story_cell)
|
||||
324
fanficfare/adapters/adapter_fanficsme.py
Normal file
324
fanficfare/adapters/adapter_fanficsme.py
Normal file
|
|
@ -0,0 +1,324 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2014 Fanficdownloader team, 2018 FanFicFare team
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
from __future__ import absolute_import
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import re
|
||||
|
||||
from ..htmlcleanup import stripHTML
|
||||
from .. import exceptions as exceptions
|
||||
|
||||
# py2 vs py3 transition
|
||||
|
||||
from .base_adapter import BaseSiteAdapter, makeDate
|
||||
|
||||
def getClass():
|
||||
return FanFicsMeAdapter
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class FanFicsMeAdapter(BaseSiteAdapter):
|
||||
|
||||
def __init__(self, config, url):
|
||||
BaseSiteAdapter.__init__(self, config, url)
|
||||
|
||||
self.username = "NoneGiven" # if left empty, site doesn't return any message at all.
|
||||
self.password = ""
|
||||
self.is_adult=False
|
||||
|
||||
self.full_work_soup = None
|
||||
self.use_full_work_soup = True
|
||||
|
||||
## All Russian as far as I know.
|
||||
self.story.setMetadata('language','Russian')
|
||||
|
||||
# get storyId from url--url validation guarantees query correct
|
||||
m = re.match(self.getSiteURLPattern(),url)
|
||||
if m:
|
||||
self.story.setMetadata('storyId',m.group('id'))
|
||||
|
||||
# normalized story URL.
|
||||
self._setURL('https://' + self.getSiteDomain() + '/fic'+self.story.getMetadata('storyId'))
|
||||
else:
|
||||
raise exceptions.InvalidStoryURL(url,
|
||||
self.getSiteDomain(),
|
||||
self.getSiteExampleURLs())
|
||||
|
||||
# Each adapter needs to have a unique site abbreviation.
|
||||
self.story.setMetadata('siteabbrev','ffme')
|
||||
|
||||
# The date format will vary from site to site.
|
||||
# http://docs.python.org/library/datetime.html#strftime-strptime-behavior
|
||||
self.dateformat = "%d.%m.%Y"
|
||||
|
||||
@staticmethod # must be @staticmethod, don't remove it.
|
||||
def getSiteDomain():
|
||||
# The site domain. Does have www here, if it uses it.
|
||||
return 'fanfics.me'
|
||||
|
||||
@classmethod
|
||||
def getSiteExampleURLs(cls):
|
||||
return "https://"+cls.getSiteDomain()+"/fic1234 https://"+cls.getSiteDomain()+"/read.php?id=1234 https://"+cls.getSiteDomain()+"/read.php?id=1234&chapter=2"
|
||||
|
||||
def getSiteURLPattern(self):
|
||||
# https://fanfics.me/fic137282
|
||||
# https://fanfics.me/read.php?id=137282
|
||||
# https://fanfics.me/read.php?id=137282&chapter=2
|
||||
# https://fanfics.me/download.php?fic=137282&format=epub
|
||||
return r"https?://"+re.escape(self.getSiteDomain())+r"/(fic|read\.php\?id=|download\.php\?fic=)(?P<id>\d+)"
|
||||
|
||||
## Login
|
||||
def needToLoginCheck(self, data):
|
||||
return '<form name="autent" action="https://fanfics.me/autent.php" method="post">' in data
|
||||
|
||||
def performLogin(self, url):
|
||||
'''
|
||||
<form name="autent" action="https://fanfics.me/autent.php" method="post">
|
||||
Имя:<br>
|
||||
<input class="input_3" type="text" name="name" id="name"><br>
|
||||
Пароль:<br>
|
||||
<input class="input_3" type="password" name="pass" id="pass"><br>
|
||||
<input type="checkbox" name="nocookie" id="nocookie" /> <label for="nocookie">Чужой компьютер</label><br>
|
||||
<input class="modern_button" type="submit" value="Войти">
|
||||
<div class="lostpass center"><a href="/index.php?section=lostpass">Забыл пароль</a></div>
|
||||
'''
|
||||
params = {}
|
||||
if self.password:
|
||||
params['name'] = self.username
|
||||
params['pass'] = self.password
|
||||
else:
|
||||
params['name'] = self.getConfig("username")
|
||||
params['pass'] = self.getConfig("password")
|
||||
|
||||
loginUrl = 'https://' + self.getSiteDomain() + '/autent.php'
|
||||
logger.info("Will now login to URL (%s) as (%s)" % (loginUrl,
|
||||
params['name']))
|
||||
## must need a cookie or something.
|
||||
self.get_request(loginUrl, usecache=False)
|
||||
d = self.post_request(loginUrl, params, usecache=False)
|
||||
|
||||
if self.needToLoginCheck(d):
|
||||
logger.info("Failed to login to URL %s as %s" % (loginUrl,
|
||||
params['name']))
|
||||
raise exceptions.FailedToLogin(url,params['name'])
|
||||
return False
|
||||
else:
|
||||
return True
|
||||
|
||||
## Getting the chapter list and the meta data, plus 'is adult' checking.
|
||||
def extractChapterUrlsAndMetadata(self):
|
||||
|
||||
url = self.url
|
||||
logger.info("url: "+url)
|
||||
|
||||
data = self.get_request(url)
|
||||
|
||||
soup = self.make_soup(data)
|
||||
|
||||
## restrict meta searches to header.
|
||||
fichead = soup.find('div',class_='FicHead')
|
||||
def get_meta_content(title):
|
||||
val_label = fichead.find('div',string=re.compile(u'^'+title+u':'))
|
||||
if val_label:
|
||||
return val_label.find_next('div')
|
||||
|
||||
## fanfics.me doesn't have separate adult--you have to set
|
||||
## your age to 18+ in your user account
|
||||
## Rating
|
||||
## R, NC-17, PG-13 require login
|
||||
## doesn't: General
|
||||
#('Рейтинг', 'rating', False, False)
|
||||
# val_label = fichead.find('div',string=u'Рейтинг:')
|
||||
# val = stripHTML(val_label.find_next('div'))
|
||||
# logger.debug(val)
|
||||
self.story.setMetadata('rating',stripHTML(get_meta_content(u'Рейтинг')))
|
||||
|
||||
## Need to login for any rating higher than General.
|
||||
if self.story.getMetadataRaw('rating') != 'General' and self.needToLoginCheck(data):
|
||||
self.performLogin(url)
|
||||
# reload after login.
|
||||
data = self.get_request(url,usecache=False)
|
||||
soup = self.make_soup(data)
|
||||
fichead = soup.find('div',class_='FicHead')
|
||||
|
||||
## Title
|
||||
## <h1>Третья сторона <span class="small green">(гет)</span></h1>
|
||||
h = fichead.find('h1')
|
||||
span = h.find('span')
|
||||
## I haven't found a term for what fanfics.me calls this, but
|
||||
## it translates to Get Jen Slash Femslash
|
||||
self.story.addToList('category',stripHTML(span)[1:-1])
|
||||
span.extract()
|
||||
self.story.setMetadata('title',stripHTML(h))
|
||||
|
||||
## author(s):
|
||||
content = get_meta_content(u'Авторы?')
|
||||
if content:
|
||||
alist = content.find_all('a', class_='user')
|
||||
for a in alist:
|
||||
self.story.addToList('authorId',a['href'].split('/user')[-1])
|
||||
self.story.addToList('authorUrl','https://'+self.host+a['href'])
|
||||
self.story.addToList('author',stripHTML(a))
|
||||
# can be deliberately anonymous.
|
||||
if not alist:
|
||||
self.story.setMetadata('author','Anonymous')
|
||||
self.story.setMetadata('authorUrl','https://'+self.host)
|
||||
self.story.setMetadata('authorId','0')
|
||||
|
||||
# translator(s) in different strings
|
||||
content = get_meta_content(u'Переводчикк?и?')
|
||||
if content:
|
||||
for a in content.find_all('a', class_='user'):
|
||||
self.story.addToList('translatorsId',a['href'].split('/user')[-1])
|
||||
self.story.addToList('translatorsUrl','https://'+self.host+a['href'])
|
||||
self.story.addToList('translators',stripHTML(a))
|
||||
|
||||
# If there are translators, but no authors, copy translators to authors.
|
||||
if self.story.getList('translators') and not self.story.getList('author'):
|
||||
self.story.extendList('authorId',self.story.getList('translatorsId'))
|
||||
self.story.extendList('authorUrl',self.story.getList('translatorsUrl'))
|
||||
self.story.extendList('author',self.story.getList('translators'))
|
||||
|
||||
# beta(s)
|
||||
content = get_meta_content(u'Бета')
|
||||
if content:
|
||||
for a in content.find_all('a', class_='user'):
|
||||
self.story.addToList('betasId',a['href'].split('/user')[-1])
|
||||
self.story.addToList('betasUrl','https://'+self.host+a['href'])
|
||||
self.story.addToList('betas',stripHTML(a))
|
||||
|
||||
content = get_meta_content(u'Фандом')
|
||||
self.story.extendList('fandoms', [ stripHTML(a) for a in
|
||||
fichead.find_all('a',href=re.compile(r'/fandom\d+$')) ] )
|
||||
|
||||
## 'Characters' header has both ships and chars lists
|
||||
content = get_meta_content(u'Персонажи')
|
||||
if content:
|
||||
self.story.extendList('ships', [ stripHTML(a) for a in
|
||||
content.find_all('a',href=re.compile(r'/paring\d+_\d+$')) ] )
|
||||
for ship in self.story.getList('ships'):
|
||||
self.story.extendList('characters', ship.split('/'))
|
||||
self.story.extendList('characters', [ stripHTML(a) for a in
|
||||
content.find_all('a',href=re.compile(r'/character\d+$')) ] )
|
||||
|
||||
self.story.extendList('genre',stripHTML(get_meta_content(u'Жанр')).split(', '))
|
||||
## fanfics.me includes 'AU' and 'OOC' as warnings...
|
||||
content = get_meta_content(u'Предупреждение')
|
||||
if content:
|
||||
self.story.extendList('warnings',stripHTML(content).split(', '))
|
||||
|
||||
content = get_meta_content(u'События')
|
||||
if content:
|
||||
self.story.extendList('events', [ stripHTML(a) for a in
|
||||
content.find_all('a',href=re.compile(r'/find\?keyword=\d+$')) ] )
|
||||
|
||||
## Original work block
|
||||
content = get_meta_content(u'Оригинал')
|
||||
if content:
|
||||
# only going to record URL.
|
||||
titletd = content.find('td',string=u'Ссылка:')
|
||||
self.story.setMetadata('originUrl',stripHTML(titletd.find_next('td')))
|
||||
|
||||
## size block, only saving word count.
|
||||
content = get_meta_content(u'Размер')
|
||||
words = stripHTML(content.find('a'))
|
||||
words = re.sub(r'[^0-9]','',words) # only keep numbers
|
||||
self.story.setMetadata('numWords',words)
|
||||
|
||||
## status by color code
|
||||
statuscolors = {'red':'In-Progress',
|
||||
'green':'Completed',
|
||||
'blue':'Hiatus'}
|
||||
content = get_meta_content(u'Статус')
|
||||
self.story.setMetadata('status',statuscolors[content.span['class'][0]])
|
||||
|
||||
# desc
|
||||
self.setDescription(url,soup.find('div',id='summary_'+self.story.getMetadata('storyId')))
|
||||
|
||||
# cover
|
||||
div = fichead.find('div',class_='FicHead_cover')
|
||||
if div:
|
||||
# get the larger version.
|
||||
self.setCoverImage(self.url,div.img['src'].replace('_200_300',''))
|
||||
|
||||
# dates
|
||||
# <span class="DateUpdate" title="Опубликовано 22.04.2020, изменено 22.04.2020">22.04.2020 - 22.04.2020</span>
|
||||
datespan = soup.find('span',class_='DateUpdate')
|
||||
dates = stripHTML(datespan).split(" - ")
|
||||
self.story.setMetadata('datePublished', makeDate(dates[0], self.dateformat))
|
||||
self.story.setMetadata('dateUpdated', makeDate(dates[1], self.dateformat))
|
||||
|
||||
# series
|
||||
seriesdiv = soup.find('div',id='fic_info_content_serie')
|
||||
if seriesdiv:
|
||||
seriesa = seriesdiv.find('a', href=re.compile(r'/serie\d+$'))
|
||||
i=1
|
||||
for a in seriesdiv.find_all('a', href=re.compile(r'/fic\d+$')):
|
||||
if a['href'] == ('/fic'+self.story.getMetadata('storyId')):
|
||||
self.setSeries(stripHTML(seriesa), i)
|
||||
self.story.setMetadata('seriesUrl','https://'+self.host+seriesa['href'])
|
||||
break
|
||||
i+=1
|
||||
|
||||
|
||||
chapteruls = soup.find_all('ul',class_='FicContents')
|
||||
if chapteruls:
|
||||
for ul in chapteruls:
|
||||
# logger.debug(ul.prettify())
|
||||
for chapter in ul.find_all('li'):
|
||||
a = chapter.find('a')
|
||||
# logger.debug(a.prettify())
|
||||
if a and a.has_attr('href'):
|
||||
# logger.debug(chapter.prettify())
|
||||
self.add_chapter(stripHTML(a),'https://' + self.getSiteDomain() + a['href'])
|
||||
else:
|
||||
self.add_chapter(self.story.getMetadata('title'),
|
||||
'https://' + self.getSiteDomain() +
|
||||
'/read.php?id='+self.story.getMetadata('storyId')+'&chapter=0')
|
||||
|
||||
return
|
||||
|
||||
# grab the text for an individual chapter.
|
||||
def getChapterTextNum(self, url, index):
|
||||
logger.debug('Getting chapter text for: %s index: %s' % (url,index))
|
||||
m = re.match(r'.*&chapter=(\d+).*',url)
|
||||
if m:
|
||||
index=m.group(1)
|
||||
logger.debug("Using index(%s) from &chapter="%index)
|
||||
|
||||
chapter_div = None
|
||||
if self.use_full_work_soup and self.getConfig("use_view_full_work",True) and self.num_chapters() > 1:
|
||||
logger.debug("USE view_full_work")
|
||||
## Assumed view_adult=true was cookied during metadata
|
||||
if not self.full_work_soup:
|
||||
self.full_work_soup = self.make_soup(self.get_request(
|
||||
'https://' + self.getSiteDomain() + '/read.php?id='+self.story.getMetadata('storyId')))
|
||||
|
||||
whole_dl_soup = self.full_work_soup
|
||||
chapter_div = whole_dl_soup.find('div',{'id':'c%s'%(index)})
|
||||
if not chapter_div:
|
||||
self.use_full_work_soup = False
|
||||
logger.warning("c%s not found in view_full_work--ending use_view_full_work"%(index))
|
||||
if chapter_div == None:
|
||||
whole_dl_soup = self.make_soup(self.get_request(url))
|
||||
chapter_div = whole_dl_soup.find('div',{'id':'c%s'%(index)})
|
||||
if None == chapter_div:
|
||||
raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url)
|
||||
|
||||
return self.utf8FromSoup(url,chapter_div)
|
||||
224
fanficfare/adapters/adapter_fanfictalkcom.py
Normal file
224
fanficfare/adapters/adapter_fanfictalkcom.py
Normal file
|
|
@ -0,0 +1,224 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2013 Fanficdownloader team, 2020 FanFicFare team
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
# Software: eFiction
|
||||
from __future__ import absolute_import
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import re
|
||||
from ..htmlcleanup import stripHTML
|
||||
from .. import exceptions as exceptions
|
||||
|
||||
# py2 vs py3 transition
|
||||
|
||||
from .base_adapter import BaseSiteAdapter, makeDate
|
||||
|
||||
def getClass():
|
||||
return FanfictalkComAdapter
|
||||
|
||||
# Class name has to be unique. Our convention is camel case the
|
||||
# sitename with Adapter at the end. www is skipped.
|
||||
class FanfictalkComAdapter(BaseSiteAdapter):
|
||||
|
||||
def __init__(self, config, url):
|
||||
BaseSiteAdapter.__init__(self, config, url)
|
||||
|
||||
self.username = "NoneGiven" # if left empty, site doesn't return any message at all.
|
||||
self.password = ""
|
||||
self.is_adult=False
|
||||
|
||||
# get storyId from url--url validation guarantees query is only sid=1234
|
||||
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
|
||||
|
||||
# normalized story URL.
|
||||
self._setURL('https://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId'))
|
||||
|
||||
# Each adapter needs to have a unique site abbreviation.
|
||||
self.story.setMetadata('siteabbrev','ahpfftc')
|
||||
|
||||
# The date format will vary from site to site.
|
||||
# http://docs.python.org/library/datetime.html#strftime-strptime-behavior
|
||||
self.dateformat = "%d %b %Y"
|
||||
|
||||
@classmethod
|
||||
def getAcceptDomains(cls):
|
||||
return [cls.getSiteDomain(),'archive.hpfanfictalk.com','fanfictalk.com']
|
||||
|
||||
@classmethod
|
||||
def getConfigSections(cls):
|
||||
"Only needs to be overriden if has additional ini sections."
|
||||
return [cls.getConfigSection(),'archive.hpfanfictalk.com','fanfictalk.com']
|
||||
|
||||
@staticmethod # must be @stgetAcceptDomainsaticmethod, don't remove it.
|
||||
def getSiteDomain():
|
||||
# The site domain. Does have www here, if it uses it.
|
||||
return 'archive.fanfictalk.com'
|
||||
|
||||
@classmethod
|
||||
def getSiteExampleURLs(cls):
|
||||
return "https://"+cls.getSiteDomain()+"/viewstory.php?sid=1234"
|
||||
|
||||
def getSiteURLPattern(self):
|
||||
return r"https?://("+r"|".join([x.replace('.',r'\.') for x in self.getAcceptDomains()])+r")(/archive)?/viewstory\.php\?sid=\d+$"
|
||||
|
||||
## Getting the chapter list and the meta data, plus 'is adult' checking.
|
||||
def extractChapterUrlsAndMetadata(self):
|
||||
|
||||
if self.is_adult or self.getConfig("is_adult"):
|
||||
# Weirdly, different sites use different warning numbers.
|
||||
# If the title search below fails, there's a good chance
|
||||
# you need a different number. print data at that point
|
||||
# and see what the 'click here to continue' url says.
|
||||
addurl = "&ageconsent=ok&warning=3"
|
||||
else:
|
||||
addurl=""
|
||||
|
||||
# index=1 makes sure we see the story chapter index. Some
|
||||
# sites skip that for one-chapter stories.
|
||||
url = self.url+'&index=1'+addurl
|
||||
logger.debug("URL: "+url)
|
||||
|
||||
data = self.get_request(url)
|
||||
|
||||
if "Access denied. This story has not been validated by the adminstrators of this site." in data:
|
||||
raise exceptions.AccessDenied(self.getSiteDomain() +" says: Access denied. This story has not been validated by the adminstrators of this site.")
|
||||
|
||||
## Title and author
|
||||
soup = self.make_soup(data)
|
||||
# logger.debug(soup)
|
||||
|
||||
|
||||
pagetitle = soup.select_one('div#pagetitle')
|
||||
# logger.debug(pagetitle)
|
||||
## Title
|
||||
a = pagetitle.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$"))
|
||||
self.story.setMetadata('title',stripHTML(a))
|
||||
|
||||
# Find authorid and URL from... author url.
|
||||
for a in pagetitle.find_all('a', href=re.compile(r"viewuser.php\?uid=\d+")):
|
||||
self.story.addToList('authorId',a['href'].split('=')[1])
|
||||
self.story.addToList('authorUrl','https://'+self.host+'/'+a['href'])
|
||||
self.story.addToList('author',stripHTML(a))
|
||||
|
||||
# Find the chapters:
|
||||
for chapter in soup.find_all('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+r"&chapter=\d+$")):
|
||||
# just in case there's tags, like <i> in chapter titles.
|
||||
self.add_chapter(chapter,'https://'+self.host+'/'+chapter['href'])
|
||||
|
||||
# categories
|
||||
for a in soup.select("div#sort a"):
|
||||
self.story.addToList('category',stripHTML(a))
|
||||
|
||||
# this site has two divs with class=gb-50 and no immediate container.
|
||||
gb50s = soup.find_all('div', {'class':'gb-50'})
|
||||
|
||||
def list_from_urls(parent, regex, metadata):
|
||||
urls = parent.find_all('a',href=re.compile(regex))
|
||||
for url in urls:
|
||||
self.story.addToList(metadata,stripHTML(url))
|
||||
|
||||
list_from_urls(gb50s[0],r'browse.php\?type=characters','characters')
|
||||
list_from_urls(gb50s[0],r'browse.php\?type=class&type_id=11','ships')
|
||||
list_from_urls(gb50s[0],r'browse.php\?type=class&type_id=10','representation')
|
||||
list_from_urls(gb50s[0],r'browse.php\?type=class&type_id=7','storytype')
|
||||
list_from_urls(gb50s[0],r'browse.php\?type=class&type_id=14','house')
|
||||
list_from_urls(gb50s[1],r'browse.php\?type=class&type_id=8','warnings')
|
||||
list_from_urls(gb50s[1],r'browse.php\?type=class&type_id=15','contentwarnings')
|
||||
list_from_urls(gb50s[1],r'browse.php\?type=class&type_id=4','genre')
|
||||
list_from_urls(gb50s[1],r'browse.php\?type=class&type_id=13','tropes')
|
||||
|
||||
bq = soup.find('blockquote2')
|
||||
if bq:
|
||||
# blockquote2??? Whatever. But we're changing it to a real tag.
|
||||
bq.name='div'
|
||||
self.setDescription(url,bq)
|
||||
|
||||
# usually use something more precise for label search, but
|
||||
# site doesn't group much.
|
||||
labels = soup.find_all('b')
|
||||
for labelspan in labels:
|
||||
# logger.debug(labelspan)
|
||||
value = labelspan.nextSibling
|
||||
label = stripHTML(labelspan)
|
||||
# logger.debug(value)
|
||||
# logger.debug(label)
|
||||
|
||||
if 'Words:' in label:
|
||||
stripHTML(value)
|
||||
self.story.setMetadata('numWords', stripHTML(value).replace('·',''))
|
||||
|
||||
if 'Published:' in label:
|
||||
self.story.setMetadata('datePublished', makeDate(stripHTML(value).replace('·',''), self.dateformat))
|
||||
|
||||
if 'Updated:' in label:
|
||||
self.story.setMetadata('dateUpdated', makeDate(stripHTML(value).replace('·',''), self.dateformat))
|
||||
|
||||
# Site allows stories to be in several series at once. FFF
|
||||
# isn't thrilled with that, we have series00, series01, etc.
|
||||
# Example:
|
||||
# https://archive.fanfictalk.com/viewstory.php?sid=483
|
||||
|
||||
if self.getConfig("collect_series"):
|
||||
seriesspan = soup.find('span',label='Series')
|
||||
for i, seriesa in enumerate(seriesspan.find_all('a', href=re.compile(r"viewseries\.php\?seriesid=\d+"))):
|
||||
# logger.debug(seriesa)
|
||||
series_name = stripHTML(seriesa)
|
||||
series_url = 'https://'+self.host+'/'+seriesa['href']
|
||||
|
||||
seriessoup = self.make_soup(self.get_request(series_url))
|
||||
storyas = seriessoup.find_all('a', href=re.compile(r'viewstory.php\?sid=\d+'))
|
||||
# logger.debug(storyas)
|
||||
j=1
|
||||
found = False
|
||||
for storya in storyas:
|
||||
# logger.debug(storya)
|
||||
## allow for JS links.
|
||||
if ('viewstory.php?sid='+self.story.getMetadata('storyId')) in storya['href']:
|
||||
found = True
|
||||
break
|
||||
j+=1
|
||||
if found:
|
||||
series_index = j
|
||||
self.story.setMetadata('series%02d'%i,"%s [%s]"%(series_name,series_index))
|
||||
self.story.setMetadata('series%02dUrl'%i,series_url)
|
||||
if i == 0:
|
||||
self.setSeries(series_name, series_index)
|
||||
self.story.setMetadata('seriesUrl',series_url)
|
||||
else:
|
||||
logger.debug("Story URL not found in series (%s) page, not including."%series_url)
|
||||
|
||||
# grab the text for an individual chapter.
|
||||
def getChapterText(self, url):
|
||||
|
||||
if self.is_adult or self.getConfig("is_adult"):
|
||||
# Weirdly, different sites use different warning numbers.
|
||||
# If the title search below fails, there's a good chance
|
||||
# you need a different number. print data at that point
|
||||
# and see what the 'click here to continue' url says.
|
||||
addurl = "&ageconsent=ok&warning=3"
|
||||
else:
|
||||
addurl=""
|
||||
|
||||
logger.debug('Getting chapter text from: %s' % (url+addurl))
|
||||
soup = self.make_soup(self.get_request(url+addurl))
|
||||
|
||||
div = soup.find('div', {'id' : 'story'})
|
||||
|
||||
if None == div:
|
||||
raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url)
|
||||
|
||||
return self.utf8FromSoup(url,div)
|
||||
|
|
@ -1,290 +0,0 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2011 Fanficdownloader team, 2015 FanFicFare team
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
# Software: eFiction
|
||||
import time
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import re
|
||||
import urllib2
|
||||
|
||||
|
||||
from ..htmlcleanup import stripHTML
|
||||
from .. import exceptions as exceptions
|
||||
|
||||
from base_adapter import BaseSiteAdapter, makeDate
|
||||
|
||||
# By virtue of being recent and requiring both is_adult and user/pass,
|
||||
# adapter_fanficcastletvnet.py is the best choice for learning to
|
||||
# write adapters--especially for sites that use the eFiction system.
|
||||
# Most sites that have ".../viewstory.php?sid=123" in the story URL
|
||||
# are eFiction.
|
||||
|
||||
# For non-eFiction sites, it can be considerably more complex, but
|
||||
# this is still a good starting point.
|
||||
|
||||
# In general an 'adapter' needs to do these five things:
|
||||
|
||||
# - 'Register' correctly with the downloader
|
||||
# - Site Login (if needed)
|
||||
# - 'Are you adult?' check (if needed--some do one, some the other, some both)
|
||||
# - Grab the chapter list
|
||||
# - Grab the story meta-data (some (non-eFiction) adapters have to get it from the author page)
|
||||
# - Grab the chapter texts
|
||||
|
||||
# Search for XXX comments--that's where things are most likely to need changing.
|
||||
|
||||
# This function is called by the downloader in all adapter_*.py files
|
||||
# in this dir to register the adapter class. So it needs to be
|
||||
# updated to reflect the class below it. That, plus getSiteDomain()
|
||||
# take care of 'Registering'.
|
||||
def getClass():
|
||||
return FanfictionJunkiesDeAdapter # XXX
|
||||
|
||||
# Class name has to be unique. Our convention is camel case the
|
||||
# sitename with Adapter at the end. www is skipped.
|
||||
class FanfictionJunkiesDeAdapter(BaseSiteAdapter): # XXX
|
||||
|
||||
def __init__(self, config, url):
|
||||
BaseSiteAdapter.__init__(self, config, url)
|
||||
|
||||
self.decode = ["Windows-1252",
|
||||
"utf8"] # 1252 is a superset of iso-8859-1.
|
||||
# Most sites that claim to be
|
||||
# iso-8859-1 (and some that claim to be
|
||||
# utf8) are really windows-1252.
|
||||
self.username = "NoneGiven" # if left empty, site doesn't return any message at all.
|
||||
self.password = ""
|
||||
self.is_adult=False
|
||||
|
||||
# get storyId from url--url validation guarantees query is only sid=1234
|
||||
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
|
||||
|
||||
|
||||
# normalized story URL.
|
||||
# XXX Most sites don't have the /fanfic part. Replace all to remove it usually.
|
||||
self._setURL('http://' + self.getSiteDomain() + '/efiction/viewstory.php?sid='+self.story.getMetadata('storyId'))
|
||||
|
||||
# Each adapter needs to have a unique site abbreviation.
|
||||
self.story.setMetadata('siteabbrev','ffjde') # XXX
|
||||
|
||||
# The date format will vary from site to site.
|
||||
# http://docs.python.org/library/datetime.html#strftime-strptime-behavior
|
||||
self.dateformat = "%d/%m/%y" # XXX
|
||||
|
||||
@staticmethod # must be @staticmethod, don't remove it.
|
||||
def getSiteDomain():
|
||||
# The site domain. Does have www here, if it uses it.
|
||||
return 'fanfiction-junkies.de' # XXX
|
||||
|
||||
@classmethod
|
||||
def getSiteExampleURLs(cls):
|
||||
return "http://"+cls.getSiteDomain()+"/efiction/viewstory.php?sid=1234"
|
||||
|
||||
def getSiteURLPattern(self):
|
||||
return re.escape("http://"+self.getSiteDomain()+"/efiction/viewstory.php?sid=")+r"\d+$"
|
||||
|
||||
## Login seems to be reasonably standard across eFiction sites.
|
||||
def needToLoginCheck(self, data):
|
||||
if 'Registered Users Only' in data \
|
||||
or 'There is no such account on our website' in data \
|
||||
or "That password doesn't match the one in our database" in data:
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
|
||||
def performLogin(self, url):
|
||||
params = {}
|
||||
|
||||
if self.password:
|
||||
params['penname'] = self.username
|
||||
params['password'] = self.password
|
||||
else:
|
||||
params['penname'] = self.getConfig("username")
|
||||
params['password'] = self.getConfig("password")
|
||||
params['cookiecheck'] = '1'
|
||||
params['submit'] = 'Submit'
|
||||
|
||||
loginUrl = 'http://' + self.getSiteDomain() + '/efiction/user.php?action=login'
|
||||
logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
|
||||
params['penname']))
|
||||
|
||||
d = self._fetchUrl(loginUrl, params)
|
||||
|
||||
if "Member Account" not in d : #Member Account
|
||||
logger.info("Failed to login to URL %s as %s" % (loginUrl,
|
||||
params['penname']))
|
||||
raise exceptions.FailedToLogin(url,params['penname'])
|
||||
return False
|
||||
else:
|
||||
return True
|
||||
|
||||
## Getting the chapter list and the meta data, plus 'is adult' checking.
|
||||
def extractChapterUrlsAndMetadata(self):
|
||||
|
||||
if self.is_adult or self.getConfig("is_adult"):
|
||||
# Weirdly, different sites use different warning numbers.
|
||||
# If the title search below fails, there's a good chance
|
||||
# you need a different number. print data at that point
|
||||
# and see what the 'click here to continue' url says.
|
||||
addurl = "&ageconsent=ok&warning=1" # XXX
|
||||
else:
|
||||
addurl=""
|
||||
|
||||
# index=1 makes sure we see the story chapter index. Some
|
||||
# sites skip that for one-chapter stories.
|
||||
url = self.url+'&index=1'+addurl
|
||||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
except urllib2.HTTPError, e:
|
||||
if e.code == 404:
|
||||
raise exceptions.StoryDoesNotExist(self.url)
|
||||
else:
|
||||
raise e
|
||||
|
||||
if self.needToLoginCheck(data):
|
||||
# need to log in for this one.
|
||||
self.performLogin(url)
|
||||
data = self._fetchUrl(url)
|
||||
|
||||
# The actual text that is used to announce you need to be an
|
||||
# adult varies from site to site. Again, print data before
|
||||
# the title search to troubleshoot.
|
||||
if "For adults only " in data: # XXX
|
||||
raise exceptions.AdultCheckRequired(self.url)
|
||||
|
||||
if "Access denied. This story has not been validated by the adminstrators of this site." in data:
|
||||
raise exceptions.AccessDenied(self.getSiteDomain() +" says: Access denied. This story has not been validated by the adminstrators of this site.")
|
||||
|
||||
# use BeautifulSoup HTML parser to make everything easier to find.
|
||||
soup = self.make_soup(data)
|
||||
# print data
|
||||
|
||||
# Now go hunting for all the meta data and the chapter list.
|
||||
|
||||
pagetitle = soup.find('h4')
|
||||
## Title
|
||||
a = pagetitle.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$"))
|
||||
self.story.setMetadata('title',a.string)
|
||||
|
||||
# Find authorid and URL from... author url.
|
||||
a = pagetitle.find('a', href=re.compile(r"viewuser.php\?uid=\d+"))
|
||||
self.story.setMetadata('authorId',a['href'].split('=')[1])
|
||||
self.story.setMetadata('authorUrl','http://'+self.host+'/efiction/'+a['href'])
|
||||
self.story.setMetadata('author',a.string)
|
||||
|
||||
# Reviews
|
||||
reviewdata = soup.find('div', {'id' : 'sort'})
|
||||
a = reviewdata.findAll('a', href=re.compile(r'reviews.php\?type=ST&(amp;)?item='+self.story.getMetadata('storyId')+"$"))[1] # second one.
|
||||
self.story.setMetadata('reviews',stripHTML(a))
|
||||
|
||||
# Find the chapters:
|
||||
for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"&chapter=\d+$")):
|
||||
# just in case there's tags, like <i> in chapter titles.
|
||||
self.chapterUrls.append((stripHTML(chapter),'http://'+self.host+'/efiction/'+chapter['href']+addurl))
|
||||
|
||||
self.story.setMetadata('numChapters',len(self.chapterUrls))
|
||||
|
||||
# eFiction sites don't help us out a lot with their meta data
|
||||
# formating, so it's a little ugly.
|
||||
|
||||
# utility method
|
||||
def defaultGetattr(d,k):
|
||||
try:
|
||||
return d[k]
|
||||
except:
|
||||
return ""
|
||||
|
||||
|
||||
# <span class="label">Rated:</span> NC-17<br /> etc
|
||||
list = soup.find('div', {'class':'listbox'})
|
||||
|
||||
|
||||
labels = list.findAll('b')
|
||||
for labelspan in labels:
|
||||
value = labelspan.nextSibling
|
||||
label = labelspan.string
|
||||
|
||||
if 'Zusammenfassung' in label:
|
||||
self.setDescription(url,value)
|
||||
|
||||
if 'Eingestuft' in label:
|
||||
self.story.setMetadata('rating', value)
|
||||
|
||||
if u'Wörter' in label:
|
||||
self.story.setMetadata('numWords', value)
|
||||
|
||||
if 'Kategorie' in label:
|
||||
cats = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=categories'))
|
||||
for cat in cats:
|
||||
self.story.addToList('category',cat.string)
|
||||
|
||||
if 'Charaktere' in label:
|
||||
chars = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=characters'))
|
||||
for char in chars:
|
||||
self.story.addToList('characters',char.string)
|
||||
|
||||
if 'Abgeschlossen' in label:
|
||||
if 'Yes' in value:
|
||||
self.story.setMetadata('status', 'Completed')
|
||||
else:
|
||||
self.story.setMetadata('status', 'In-Progress')
|
||||
|
||||
if u'Veröffentlicht' in label:
|
||||
self.story.setMetadata('datePublished', makeDate(stripHTML(value), self.dateformat))
|
||||
|
||||
if 'Aktualisiert' in label:
|
||||
# there's a stray [ at the end.
|
||||
#value = value[0:-1]
|
||||
self.story.setMetadata('dateUpdated', makeDate(stripHTML(value), self.dateformat))
|
||||
|
||||
try:
|
||||
# Find Series name from series URL.
|
||||
a = soup.find('a', href=re.compile(r"viewseries.php\?seriesid=\d+"))
|
||||
series_name = a.string
|
||||
series_url = 'http://'+self.host+'/efiction/'+a['href']
|
||||
|
||||
# use BeautifulSoup HTML parser to make everything easier to find.
|
||||
seriessoup = self.make_soup(self._fetchUrl(series_url))
|
||||
storyas = seriessoup.findAll('a', href=re.compile(r'^viewstory.php\?sid=\d+$'))
|
||||
i=1
|
||||
for a in storyas:
|
||||
if a['href'] == ('viewstory.php?sid='+self.story.getMetadata('storyId')):
|
||||
self.setSeries(series_name, i)
|
||||
self.story.setMetadata('seriesUrl',series_url)
|
||||
break
|
||||
i+=1
|
||||
|
||||
except:
|
||||
# I find it hard to care if the series parsing fails
|
||||
pass
|
||||
|
||||
# grab the text for an individual chapter.
|
||||
def getChapterText(self, url):
|
||||
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
soup = self.make_soup(self._fetchUrl(url))
|
||||
|
||||
div = soup.find('div', {'id' : 'story'})
|
||||
|
||||
if None == div:
|
||||
raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url)
|
||||
|
||||
return self.utf8FromSoup(url,div)
|
||||
|
|
@ -1,6 +1,6 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2011 Fanficdownloader team, 2016 FanFicFare team
|
||||
# Copyright 2011 Fanficdownloader team, 2018 FanFicFare team
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
|
|
@ -15,21 +15,31 @@
|
|||
# limitations under the License.
|
||||
#
|
||||
|
||||
from __future__ import absolute_import
|
||||
from datetime import datetime
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import re
|
||||
import urllib2
|
||||
from urllib import unquote_plus
|
||||
|
||||
# py2 vs py3 transition
|
||||
from ..six import text_type as unicode
|
||||
from ..six.moves.urllib.parse import urlparse
|
||||
|
||||
from .. import exceptions as exceptions
|
||||
from ..htmlcleanup import stripHTML
|
||||
|
||||
from base_adapter import BaseSiteAdapter, makeDate
|
||||
from .base_adapter import BaseSiteAdapter
|
||||
|
||||
ffnetgenres=["Adventure", "Angst", "Crime", "Drama", "Family", "Fantasy", "Friendship", "General",
|
||||
"Horror", "Humor", "Hurt-Comfort", "Mystery", "Parody", "Poetry", "Romance", "Sci-Fi",
|
||||
"Spiritual", "Supernatural", "Suspense", "Tragedy", "Western"]
|
||||
ffnetgenres=["Adventure", "Angst", "Crime", "Drama", "Family", "Fantasy",
|
||||
"Friendship", "General", "Horror", "Humor", "Hurt-Comfort",
|
||||
"Mystery", "Parody", "Poetry", "Romance", "Sci-Fi", "Spiritual",
|
||||
"Supernatural", "Suspense", "Tragedy", "Western"]
|
||||
|
||||
ffnetpluscategories=["+Anima", "Alex + Ada", "Rosario + Vampire", "Blood+",
|
||||
"+C: Sword and Cornett", "Norn9 - ノルン+ノネット",
|
||||
"Haré+Guu/ジャングルはいつもハレのちグゥ", "Lost+Brain",
|
||||
"Wicked + The Divine", "Alex + Ada", "RE: Alistair++",
|
||||
"Tristan + Isolde"]
|
||||
|
||||
class FanFictionNetSiteAdapter(BaseSiteAdapter):
|
||||
|
||||
|
|
@ -37,27 +47,13 @@ class FanFictionNetSiteAdapter(BaseSiteAdapter):
|
|||
BaseSiteAdapter.__init__(self, config, url)
|
||||
self.story.setMetadata('siteabbrev','ffnet')
|
||||
|
||||
# get storyId from url--url validation guarantees second part is storyId
|
||||
self.story.setMetadata('storyId',self.parsedUrl.path.split('/',)[2])
|
||||
self.set_story_idurl(url)
|
||||
|
||||
# normalized story URL.
|
||||
self._setURL("https://"+self.getSiteDomain()\
|
||||
+"/s/"+self.story.getMetadata('storyId')+"/1/")
|
||||
|
||||
# ffnet update emails have the latest chapter URL.
|
||||
# Frequently, when they arrive, not all the servers have the
|
||||
# latest chapter yet and going back to chapter 1 to pull the
|
||||
# chapter list doesn't get the latest. So save and use the
|
||||
# original URL given to pull chapter list & metadata.
|
||||
# Not used by plugin because URL gets normalized first for
|
||||
# eliminating duplicate story urls.
|
||||
self.origurl = url
|
||||
if "https://m." in self.origurl:
|
||||
## accept m(mobile)url, but use www.
|
||||
self.origurl = self.origurl.replace("https://m.","https://www.")
|
||||
|
||||
self.opener.addheaders.append(('Referer',self.origurl))
|
||||
|
||||
@staticmethod
|
||||
def getSiteDomain():
|
||||
return 'www.fanfiction.net'
|
||||
|
|
@ -70,24 +66,74 @@ class FanFictionNetSiteAdapter(BaseSiteAdapter):
|
|||
def getSiteExampleURLs(cls):
|
||||
return "https://www.fanfiction.net/s/1234/1/ https://www.fanfiction.net/s/1234/12/ http://www.fanfiction.net/s/1234/1/Story_Title http://m.fanfiction.net/s/1234/1/"
|
||||
|
||||
def set_story_idurl(self,url):
|
||||
parsedUrl = urlparse(url)
|
||||
pathparts = parsedUrl.path.split('/',)
|
||||
self.story.setMetadata('storyId',pathparts[2])
|
||||
self.urltitle='' if len(pathparts)<5 else pathparts[4]
|
||||
# normalized story URL.
|
||||
self._setURL("https://"+self.getSiteDomain()\
|
||||
+"/s/"+self.story.getMetadata('storyId')+"/1/"+self.urltitle)
|
||||
|
||||
## here so getSiteURLPattern and get_section_url(class method) can
|
||||
## both use it. Note adapter_fictionpresscom has one too.
|
||||
@classmethod
|
||||
def _get_site_url_pattern(cls):
|
||||
return r"https?://(www|m)?\.fanfiction\.net/s/(?P<id>\d+)(/\d+)?(/(?P<title>[^/]+))?/?$"
|
||||
|
||||
@classmethod
|
||||
def get_section_url(cls,url):
|
||||
## minimal URL used for section names in INI and reject list
|
||||
## for comparison
|
||||
# logger.debug("pre--url:%s"%url)
|
||||
m = re.match(cls._get_site_url_pattern(),url)
|
||||
if m:
|
||||
url = "https://"+cls.getSiteDomain()\
|
||||
+"/s/"+m.group('id')+"/1/"
|
||||
# logger.debug("post-url:%s"%url)
|
||||
return url
|
||||
|
||||
@classmethod
|
||||
def get_url_search(cls,url):
|
||||
regexp = super(getClass(), cls).get_url_search(url)
|
||||
regexp = re.sub(r"^(?P<keep>.*net/s/\d+/\d+/)(?P<urltitle>[^\$]*)?",
|
||||
r"\g<keep>(.*)",regexp)
|
||||
logger.debug(regexp)
|
||||
return regexp
|
||||
|
||||
def getSiteURLPattern(self):
|
||||
return r"https?://(www|m)?\.fanfiction\.net/s/\d+(/\d+)?(/|/[^/]+)?/?$"
|
||||
return self._get_site_url_pattern()
|
||||
|
||||
def _fetchUrl(self,url,parameters=None,extrasleep=1.0,usecache=True):
|
||||
## ffnet(and, I assume, fpcom) tends to fail more if hit too
|
||||
## fast. This is in additional to what ever the
|
||||
## slow_down_sleep_time setting is.
|
||||
return BaseSiteAdapter._fetchUrl(self,url,
|
||||
parameters=parameters,
|
||||
extrasleep=extrasleep,
|
||||
usecache=usecache)
|
||||
## normalized chapter URLs DO contain the story title now, but
|
||||
## normalized to current urltitle in case of title changes.
|
||||
def normalize_chapterurl(self,url):
|
||||
return re.sub(r"https?://(www|m)\.(?P<keep>fanfiction\.net/s/\d+/\d+/).*",
|
||||
r"https://www.\g<keep>",url)+self.urltitle
|
||||
|
||||
def use_pagecache(self):
|
||||
'''
|
||||
adapters that will work with the page cache need to implement
|
||||
this and change it to True.
|
||||
'''
|
||||
return True
|
||||
def get_request(self,url,usecache=True):
|
||||
## use super version if not set or isn't a chapter URL with a
|
||||
## title.
|
||||
if( not self.getConfig("try_shortened_title_urls") or
|
||||
not re.match(r"https?://www\.fanfiction\.net/s/\d+/\d+/(?P<title>[^/]+)$", url) ):
|
||||
return super(getClass(), self).get_request(url,usecache)
|
||||
|
||||
## kludgey way to attempt more than one URL variant by
|
||||
## removing title one letter at a time. Note that network and
|
||||
## open_pages_in_browser retries still happen first.
|
||||
titlelen = len(url.split('/')[-1])
|
||||
maxcut = min([4,titlelen])
|
||||
j = 0
|
||||
while j < maxcut: # should actually leave loop either by
|
||||
# return or exception raise.
|
||||
try:
|
||||
useurl = url
|
||||
if j: # j==0, full URL, then remove letters.
|
||||
useurl = url[:-j]
|
||||
return super(getClass(), self).get_request(useurl,usecache)
|
||||
except exceptions.HTTPErrorFFF as fffe:
|
||||
if j >= maxcut or 'Page not found or expired' not in unicode(fffe):
|
||||
raise
|
||||
j = j+1
|
||||
|
||||
def doExtractChapterUrlsAndMetadata(self,get_cover=True):
|
||||
|
||||
|
|
@ -97,52 +143,60 @@ class FanFictionNetSiteAdapter(BaseSiteAdapter):
|
|||
url = self.origurl
|
||||
logger.debug("URL: "+url)
|
||||
|
||||
# use BeautifulSoup HTML parser to make everything easier to find.
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
#logger.debug("\n===================\n%s\n===================\n"%data)
|
||||
soup = self.make_soup(data)
|
||||
except urllib2.HTTPError as e:
|
||||
if e.code == 404:
|
||||
raise exceptions.StoryDoesNotExist(url)
|
||||
else:
|
||||
raise e
|
||||
data = self.get_request(url)
|
||||
#logger.debug("\n===================\n%s\n===================\n"%data)
|
||||
soup = self.make_soup(data)
|
||||
|
||||
if "Unable to locate story" in data:
|
||||
if "Unable to locate story" in data or "Story Not Found" in data:
|
||||
raise exceptions.StoryDoesNotExist(url)
|
||||
|
||||
# some times "Chapter not found...", sometimes "Chapter text not found..."
|
||||
if "not found. Please check to see you are not using an outdated url." in data:
|
||||
# some times "Chapter not found...", sometimes "Chapter text
|
||||
# not found..." or "Story does not have any chapters"
|
||||
if "Please check to see you are not using an outdated url." in data:
|
||||
raise exceptions.FailedToDownload("Error downloading Chapter: %s! 'Chapter not found. Please check to see you are not using an outdated url.'" % url)
|
||||
|
||||
if "Category for this story has been disabled" in data:
|
||||
raise exceptions.FailedToDownload("FanFiction.Net has removed the category for this story and will no longer serve it.")
|
||||
|
||||
# <link rel="canonical" href="//www.fanfiction.net/s/13551154/100/Haze-Gray">
|
||||
canonicalurl = soup.select_one('link[rel=canonical]')['href']
|
||||
self.set_story_idurl(canonicalurl)
|
||||
|
||||
## ffnet used to have a tendency to send out update notices in
|
||||
## email before all their servers were showing the update on
|
||||
## the first chapter. It generates another server request and
|
||||
## doesn't seem to be needed lately, so now default it to off.
|
||||
try:
|
||||
chapcount = len(soup.find('select', { 'name' : 'chapter' } ).find_all('option'))
|
||||
# get chapter part of url.
|
||||
except:
|
||||
chapcount = 1
|
||||
have_later_meta = False
|
||||
if self.getConfig('check_next_chapter'):
|
||||
try:
|
||||
## ffnet used to have a tendency to send out update
|
||||
## notices in email before all their servers were
|
||||
## showing the update on the first chapter. It
|
||||
## generates another server request and doesn't seem
|
||||
## to be needed lately, so now default it to off.
|
||||
try:
|
||||
chapcount = len(soup.find('select', { 'name' : 'chapter' } ).findAll('option'))
|
||||
# get chapter part of url.
|
||||
except:
|
||||
chapcount = 1
|
||||
chapter = url.split('/',)[5]
|
||||
tryurl = "https://%s/s/%s/%d/"%(self.getSiteDomain(),
|
||||
self.story.getMetadata('storyId'),
|
||||
chapcount+1)
|
||||
tryurl = "https://%s/s/%s/%d/%s"%(self.getSiteDomain(),
|
||||
self.story.getMetadata('storyId'),
|
||||
chapcount+1,
|
||||
self.urltitle)
|
||||
logger.debug('=Trying newer chapter: %s' % tryurl)
|
||||
newdata = self._fetchUrl(tryurl)
|
||||
newdata = self.get_request(tryurl)
|
||||
if "not found. Please check to see you are not using an outdated url." not in newdata \
|
||||
and "This request takes too long to process, it is timed out by the server." not in newdata:
|
||||
logger.debug('=======Found newer chapter: %s' % tryurl)
|
||||
soup = self.make_soup(newdata)
|
||||
except urllib2.HTTPError as e:
|
||||
if e.code == 503:
|
||||
raise e
|
||||
except e:
|
||||
logger.warn("Caught an exception reading URL: %s sleeptime(%s) Exception %s."%(unicode(url),sleeptime,unicode(e)))
|
||||
pass
|
||||
have_later_meta = True
|
||||
except Exception as e:
|
||||
logger.warning("Caught exception in check_next_chapter URL: %s Exception %s."%(unicode(tryurl),unicode(e)))
|
||||
|
||||
if self.getConfig('meta_from_last_chapter') and not have_later_meta and chapcount > 1:
|
||||
tryurl = "https://%s/s/%s/%d/%s"%(self.getSiteDomain(),
|
||||
self.story.getMetadata('storyId'),
|
||||
chapcount,
|
||||
self.urltitle)
|
||||
logger.debug('=Trying last chapter for meta_from_last_chapter: %s' % tryurl)
|
||||
newdata = self.get_request(tryurl)
|
||||
soup = self.make_soup(newdata)
|
||||
have_later_meta = True
|
||||
|
||||
# Find authorid and URL from... author url.
|
||||
a = soup.find('a', href=re.compile(r"^/u/\d+"))
|
||||
|
|
@ -157,8 +211,8 @@ class FanFictionNetSiteAdapter(BaseSiteAdapter):
|
|||
## 2) cat1_cat2_Crossover
|
||||
## For 1, use the second link.
|
||||
## For 2, fetch the crossover page and pull the two categories from there.
|
||||
|
||||
categories = soup.find('div',{'id':'pre_story_links'}).findAll('a',{'class':'xcontrast_txt'})
|
||||
pre_links = soup.find('div',{'id':'pre_story_links'})
|
||||
categories = pre_links.find_all('a',{'class':'xcontrast_txt'})
|
||||
#print("xcontrast_txt a:%s"%categories)
|
||||
if len(categories) > 1:
|
||||
# Strangely, the ones with *two* links are the
|
||||
|
|
@ -166,20 +220,17 @@ class FanFictionNetSiteAdapter(BaseSiteAdapter):
|
|||
# of Book, Movie, etc.
|
||||
self.story.addToList('category',stripHTML(categories[1]))
|
||||
elif 'Crossover' in categories[0]['href']:
|
||||
caturl = "https://%s%s"%(self.getSiteDomain(),categories[0]['href'])
|
||||
catsoup = self.make_soup(self._fetchUrl(caturl))
|
||||
found = False
|
||||
for a in catsoup.findAll('a',href=re.compile(r"^/crossovers/.+?/\d+/")):
|
||||
self.story.addToList('category',stripHTML(a))
|
||||
found = True
|
||||
if not found:
|
||||
# Fall back. I ran across a story with a Crossver
|
||||
# category link to a broken page once.
|
||||
# http://www.fanfiction.net/s/2622060/1/
|
||||
# Naruto + Harry Potter Crossover
|
||||
logger.info("Fall back category collection")
|
||||
for c in stripHTML(categories[0]).replace(" Crossover","").split(' + '):
|
||||
self.story.addToList('category',c)
|
||||
## turns out there's only a handful of ffnet category's
|
||||
## with '+' in. Keep a list and look for them
|
||||
## specifically instead of looking up the crossover page.
|
||||
crossover_cat = stripHTML(categories[0]).replace(" Crossover","")
|
||||
for pluscat in ffnetpluscategories:
|
||||
if pluscat in crossover_cat:
|
||||
self.story.addToList('category',pluscat)
|
||||
crossover_cat = crossover_cat.replace(pluscat,'')
|
||||
for cat in crossover_cat.split(' + '):
|
||||
if cat:
|
||||
self.story.addToList('category',cat)
|
||||
|
||||
a = soup.find('a', href=re.compile(r'https?://www\.fictionratings\.com/'))
|
||||
rating = a.string
|
||||
|
|
@ -200,7 +251,7 @@ class FanFictionNetSiteAdapter(BaseSiteAdapter):
|
|||
|
||||
|
||||
grayspan = gui_table1i.find('span', {'class':'xgray xcontrast_txt'})
|
||||
# for b in grayspan.findAll('button'):
|
||||
# for b in grayspan.find_all('button'):
|
||||
# b.extract()
|
||||
metatext = stripHTML(grayspan).replace('Hurt/Comfort','Hurt-Comfort')
|
||||
#logger.debug("metatext:(%s)"%metatext)
|
||||
|
|
@ -210,7 +261,8 @@ class FanFictionNetSiteAdapter(BaseSiteAdapter):
|
|||
else:
|
||||
self.story.setMetadata('status', 'In-Progress')
|
||||
|
||||
metalist = metatext.split(" - ")
|
||||
## Newer BS libraries are discarding whitespace after tags now. :-/
|
||||
metalist = re.split(" ?- ",metatext)
|
||||
#logger.debug("metalist:(%s)"%metalist)
|
||||
|
||||
# Rated: Fiction K - English - Words: 158,078 - Published: 02-04-11
|
||||
|
|
@ -238,7 +290,7 @@ class FanFictionNetSiteAdapter(BaseSiteAdapter):
|
|||
|
||||
# Updated: <span data-xutime='1368059198'>5/8</span> - Published: <span data-xutime='1278984264'>7/12/2010</span>
|
||||
# Published: <span data-xutime='1384358726'>8m ago</span>
|
||||
dates = soup.findAll('span',{'data-xutime':re.compile(r'^\d+$')})
|
||||
dates = soup.find_all('span',{'data-xutime':re.compile(r'^\d+$')})
|
||||
if len(dates) > 1 :
|
||||
# updated get set to the same as published upstream if not found.
|
||||
self.story.setMetadata('dateUpdated',datetime.fromtimestamp(float(dates[0]['data-xutime'])))
|
||||
|
|
@ -286,42 +338,51 @@ class FanFictionNetSiteAdapter(BaseSiteAdapter):
|
|||
# Try the larger image first.
|
||||
cover_url = ""
|
||||
try:
|
||||
img = soup.select('img.lazy.cimage')
|
||||
cover_url=img[0]['data-original']
|
||||
img = soup.select_one('img.lazy.cimage')
|
||||
cover_url=img['data-original']
|
||||
except:
|
||||
img = soup.select('img.cimage')
|
||||
if img:
|
||||
cover_url=img[0]['src']
|
||||
## Nov 2023 - src is always "/static/images/d_60_90.jpg" now
|
||||
## Only take cover if there's data-original
|
||||
## Primary motivator is to prevent unneeded author page hits.
|
||||
pass
|
||||
logger.debug("cover_url:%s"%cover_url)
|
||||
|
||||
authimg_url = ""
|
||||
if cover_url and self.getConfig('skip_author_cover'):
|
||||
authsoup = self.make_soup(self._fetchUrl(self.story.getMetadata('authorUrl')))
|
||||
if cover_url and self.getConfig('skip_author_cover') and self.getConfig('include_images'):
|
||||
try:
|
||||
img = authsoup.select('img.lazy.cimage')
|
||||
authimg_url=img[0]['data-original']
|
||||
except:
|
||||
img = authsoup.select('img.cimage')
|
||||
if img:
|
||||
authimg_url=img[0]['src']
|
||||
authsoup = self.make_soup(self.get_request(self.story.getMetadata('authorUrl')))
|
||||
try:
|
||||
img = authsoup.select_one('img.lazy.cimage')
|
||||
authimg_url=img['data-original']
|
||||
except:
|
||||
img = authsoup.select_one('img.cimage')
|
||||
if img:
|
||||
authimg_url=img['src']
|
||||
|
||||
logger.debug("authimg_url:%s"%authimg_url)
|
||||
logger.debug("authimg_url:%s"%authimg_url)
|
||||
|
||||
## ffnet uses different sizes on auth & story pages, but same id.
|
||||
## //ffcdn2012t-fictionpressllc.netdna-ssl.com/image/1936929/150/
|
||||
## //ffcdn2012t-fictionpressllc.netdna-ssl.com/image/1936929/180/
|
||||
try:
|
||||
cover_id = cover_url.split('/')[4]
|
||||
except:
|
||||
cover_id = None
|
||||
try:
|
||||
authimg_id = authimg_url.split('/')[4]
|
||||
except:
|
||||
authimg_id = None
|
||||
## ffnet uses different sizes on auth & story pages, but same id.
|
||||
## Old URLs:
|
||||
## //ffcdn2012t-fictionpressllc.netdna-ssl.com/image/1936929/150/
|
||||
## //ffcdn2012t-fictionpressllc.netdna-ssl.com/image/1936929/180/
|
||||
## After Dec 2020 ffnet changes:
|
||||
## /image/6472517/180/
|
||||
## /image/6472517/150/
|
||||
try:
|
||||
cover_id = cover_url.split('/')[-3]
|
||||
except:
|
||||
cover_id = None
|
||||
try:
|
||||
authimg_id = authimg_url.split('/')[-3]
|
||||
except:
|
||||
authimg_id = None
|
||||
|
||||
## don't use cover if it matches the auth image.
|
||||
if cover_id and authimg_id and cover_id == authimg_id:
|
||||
cover_url = None
|
||||
## don't use cover if it matches the auth image.
|
||||
if cover_id and authimg_id and cover_id == authimg_id:
|
||||
logger.debug("skip_author_cover: cover_url matches authimg_url: don't use")
|
||||
cover_url = None
|
||||
except Exception as e:
|
||||
logger.warning("Caught exception in skip_author_cover: %s."%unicode(e))
|
||||
|
||||
if cover_url:
|
||||
self.setCoverImage(url,cover_url)
|
||||
|
|
@ -331,35 +392,40 @@ class FanFictionNetSiteAdapter(BaseSiteAdapter):
|
|||
select = soup.find('select', { 'name' : 'chapter' } )
|
||||
|
||||
if select is None:
|
||||
# no selector found, so it's a one-chapter story.
|
||||
self.chapterUrls.append((self.story.getMetadata('title'),url))
|
||||
# no selector found, so it's a one-chapter story.
|
||||
self.add_chapter(self.story.getMetadata('title'),url)
|
||||
else:
|
||||
allOptions = select.findAll('option')
|
||||
allOptions = select.find_all('option')
|
||||
for o in allOptions:
|
||||
url = u'https://%s/s/%s/%s/' % ( self.getSiteDomain(),
|
||||
self.story.getMetadata('storyId'),
|
||||
o['value'])
|
||||
## title URL will be put back on chapter URL during
|
||||
## normalize_chapterurl() anyway, but also here for
|
||||
## clarity
|
||||
url = u'https://%s/s/%s/%s/%s' % ( self.getSiteDomain(),
|
||||
self.story.getMetadata('storyId'),
|
||||
o['value'],
|
||||
self.urltitle)
|
||||
# just in case there's tags, like <i> in chapter titles.
|
||||
title = u"%s" % o
|
||||
title = re.sub(r'<[^>]+>','',title)
|
||||
self.chapterUrls.append((title,url))
|
||||
|
||||
self.story.setMetadata('numChapters',len(self.chapterUrls))
|
||||
|
||||
self.add_chapter(title,url)
|
||||
return
|
||||
|
||||
def getChapterText(self, url):
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
## ffnet(and, I assume, fpcom) tends to fail more if hit too
|
||||
## fast. This is in additional to what ever the
|
||||
## slow_down_sleep_time setting is.
|
||||
data = self._fetchUrl(url,extrasleep=4.0)
|
||||
logger.debug('Getting chapter text from: %s' % (url))
|
||||
|
||||
if "Please email this error message in full to <a href='mailto:support@fanfiction.com'>support@fanfiction.com</a>" in data:
|
||||
## title URL was put back on chapter URL during
|
||||
## normalize_chapterurl()
|
||||
data = self.get_request(url)
|
||||
|
||||
if "Please email this error message in full to <a href='mailto:" in data:
|
||||
raise exceptions.FailedToDownload("Error downloading Chapter: %s! FanFiction.net Site Error!" % url)
|
||||
|
||||
soup = self.make_soup(data)
|
||||
|
||||
## remove inline ads -- only seen with flaresolverr
|
||||
for adtag in soup.select("div.google-auto-placed"):
|
||||
adtag.decompose()
|
||||
|
||||
div = soup.find('div', {'id' : 'storytextp'})
|
||||
|
||||
if None == div:
|
||||
|
|
|
|||
157
fanficfare/adapters/adapter_fanfictionsfr.py
Normal file
157
fanficfare/adapters/adapter_fanfictionsfr.py
Normal file
|
|
@ -0,0 +1,157 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2024 FanFicFare team
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
|
||||
from __future__ import absolute_import
|
||||
import io
|
||||
import logging
|
||||
import re
|
||||
import zipfile
|
||||
|
||||
from bs4 import BeautifulSoup
|
||||
# py2 vs py3 transition
|
||||
|
||||
from .base_adapter import BaseSiteAdapter, makeDate
|
||||
from fanficfare.htmlcleanup import stripHTML
|
||||
from .. import exceptions as exceptions
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def getClass():
|
||||
return FanfictionsFrSiteAdapter
|
||||
|
||||
|
||||
class FanfictionsFrSiteAdapter(BaseSiteAdapter):
|
||||
def __init__(self, config, url):
|
||||
BaseSiteAdapter.__init__(self, config, url)
|
||||
self.story.setMetadata('siteabbrev', 'fanfictionsfr')
|
||||
self.story.setMetadata('langcode','fr')
|
||||
self.story.setMetadata('language','Français')
|
||||
|
||||
# get storyId from url--url validation guarantees query correct
|
||||
match = re.match(self.getSiteURLPattern(), url)
|
||||
if not match:
|
||||
raise exceptions.InvalidStoryURL(url, self.getSiteDomain(), self.getSiteExampleURLs())
|
||||
|
||||
story_id = match.group('id')
|
||||
self.story.setMetadata('storyId', story_id)
|
||||
fandom_name = match.group('fandom')
|
||||
|
||||
self._setURL('https://%s/fanfictions/%s/%s/chapters.html' % (self.getSiteDomain(), fandom_name, story_id))
|
||||
|
||||
@staticmethod
|
||||
def getSiteDomain():
|
||||
return 'www.fanfictions.fr'
|
||||
|
||||
@classmethod
|
||||
def getSiteExampleURLs(cls):
|
||||
return 'https://%s/fanfictions/fandom/fanfiction-id/chapters.html' % cls.getSiteDomain()
|
||||
|
||||
def getSiteURLPattern(self):
|
||||
return r'https?://(?:www\.)?fanfictions\.fr/fanfictions/(?P<fandom>[^/]+)/(?P<id>[^/]+)(/chapters.html)?'
|
||||
|
||||
def extractChapterUrlsAndMetadata(self):
|
||||
logger.debug('URL: %s', self.url)
|
||||
|
||||
data = self.get_request(self.url)
|
||||
soup = self.make_soup(data)
|
||||
|
||||
# detect if the fanfiction is 'suspended' (chapters unavailable)
|
||||
alert_div = soup.find('div', id='alertInactiveFic')
|
||||
if alert_div:
|
||||
raise exceptions.FailedToDownload("Failed to download the fanfiction, most likely because it is suspended.")
|
||||
|
||||
title_element = soup.find('h1', itemprop='name')
|
||||
self.story.setMetadata('title', stripHTML(title_element))
|
||||
|
||||
author_div = soup.find('div', itemprop='author')
|
||||
author_name = stripHTML(author_div.a)
|
||||
author_id = author_div.a['href'].split('/')[-1].replace('.html', '')
|
||||
|
||||
self.story.setMetadata('author', author_name)
|
||||
self.story.setMetadata('authorId', author_id)
|
||||
|
||||
published_date_element = soup.find('span', class_='date-distance')
|
||||
published_date_text = published_date_element['data-date']
|
||||
published_date = makeDate(published_date_text, '%Y-%m-%d %H:%M:%S')
|
||||
if published_date:
|
||||
self.story.setMetadata('datePublished', published_date)
|
||||
|
||||
status_element = soup.find('p', title="Statut de la fanfiction").find('span', class_='badge')
|
||||
french_status = stripHTML(status_element)
|
||||
status_translation = {
|
||||
"En cours": "In-Progress",
|
||||
"Terminée": "Completed",
|
||||
"One-shot": "Completed",
|
||||
}
|
||||
self.story.setMetadata('status', status_translation.get(french_status, french_status))
|
||||
|
||||
genre_elements = soup.find('div', title="Format et genres").find_all('span', class_="highlightable")
|
||||
self.story.extendList('genre', [ stripHTML(genre) for genre in genre_elements[1:] ])
|
||||
|
||||
category_elements = soup.find_all('li', class_="breadcrumb-item")
|
||||
self.story.extendList('category', [ stripHTML(category) for category in category_elements[-2].find_all('a') ])
|
||||
|
||||
first_description = soup.find('p', itemprop='abstract')
|
||||
self.setDescription(self.url, first_description)
|
||||
|
||||
chapter_cards = soup.find_all(class_=['card', 'chapter'])
|
||||
|
||||
for chapter_card in chapter_cards:
|
||||
chapter_title_tag = chapter_card.find('h2')
|
||||
if chapter_title_tag:
|
||||
chapter_title = stripHTML(chapter_title_tag)
|
||||
chapter_link = 'https://'+self.getSiteDomain()+chapter_title_tag.find('a')['href']
|
||||
|
||||
# Clean up the chapter title by replacing multiple spaces and newline characters with a single space
|
||||
chapter_title = re.sub(r'\s+', ' ', chapter_title)
|
||||
|
||||
self.add_chapter(chapter_title, chapter_link)
|
||||
|
||||
last_chapter_div = chapter_cards[-1]
|
||||
updated_date_element = last_chapter_div.find('span', class_='date-distance')
|
||||
last_chapter_update_date = updated_date_element['data-date']
|
||||
date = makeDate(last_chapter_update_date, '%Y-%m-%d %H:%M:%S')
|
||||
if date:
|
||||
self.story.setMetadata('dateUpdated', date)
|
||||
|
||||
|
||||
def getChapterText(self, url):
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
response, redirection_url = self.get_request_redirected(url)
|
||||
|
||||
if "telecharger_pdf.html" in redirection_url:
|
||||
with zipfile.ZipFile(io.BytesIO(response.encode('latin1'))) as z:
|
||||
# Assuming there's only one text file inside the zip
|
||||
file_list = z.namelist()
|
||||
if len(file_list) != 1:
|
||||
raise exceptions.FailedToDownload("Error downloading Chapter: %s! Zip file should contain exactly one text file!" % url)
|
||||
text_filename = file_list[0]
|
||||
with z.open(text_filename) as text_file:
|
||||
# Decode the text file with windows-1252 encoding
|
||||
text = text_file.read().decode('windows-1252')
|
||||
return text.replace("\r\n", "<br>\r\n")
|
||||
else:
|
||||
soup = self.make_soup(response)
|
||||
|
||||
div_content = soup.find('div', id='readarea')
|
||||
if div_content is None:
|
||||
raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url)
|
||||
|
||||
return self.utf8FromSoup(url, div_content)
|
||||
|
|
@ -1,6 +1,6 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2012 Fanficdownloader team, 2015 FanFicFare team
|
||||
# Copyright 2012 Fanficdownloader team, 2018 FanFicFare team
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
|
|
@ -15,19 +15,18 @@
|
|||
# limitations under the License.
|
||||
#
|
||||
|
||||
from __future__ import absolute_import
|
||||
import time
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import re
|
||||
import urllib
|
||||
import urllib2
|
||||
|
||||
|
||||
from ..htmlcleanup import stripHTML
|
||||
from .. import exceptions as exceptions
|
||||
|
||||
|
||||
from base_adapter import BaseSiteAdapter, makeDate
|
||||
# py2 vs py3 transition
|
||||
|
||||
from .base_adapter import BaseSiteAdapter, makeDate
|
||||
|
||||
def getClass():
|
||||
return FanFiktionDeAdapter
|
||||
|
|
@ -39,11 +38,6 @@ class FanFiktionDeAdapter(BaseSiteAdapter):
|
|||
def __init__(self, config, url):
|
||||
BaseSiteAdapter.__init__(self, config, url)
|
||||
|
||||
self.decode = ["utf8",
|
||||
"Windows-1252"] # 1252 is a superset of iso-8859-1.
|
||||
# Most sites that claim to be
|
||||
# iso-8859-1 (and some that claim to be
|
||||
# utf8) are really windows-1252.
|
||||
self.username = "NoneGiven" # if left empty, site doesn't return any message at all.
|
||||
self.password = ""
|
||||
self.is_adult=False
|
||||
|
|
@ -53,7 +47,7 @@ class FanFiktionDeAdapter(BaseSiteAdapter):
|
|||
|
||||
|
||||
# normalized story URL.
|
||||
self._setURL('http://' + self.getSiteDomain() + '/s/'+self.story.getMetadata('storyId') + '/1')
|
||||
self._setURL('https://' + self.getSiteDomain() + '/s/'+self.story.getMetadata('storyId') + '/1')
|
||||
|
||||
# Each adapter needs to have a unique site abbreviation.
|
||||
self.story.setMetadata('siteabbrev','ffde')
|
||||
|
|
@ -69,17 +63,10 @@ class FanFiktionDeAdapter(BaseSiteAdapter):
|
|||
|
||||
@classmethod
|
||||
def getSiteExampleURLs(cls):
|
||||
return "http://"+cls.getSiteDomain()+"/s/46ccbef30000616306614050 http://"+cls.getSiteDomain()+"/s/46ccbef30000616306614050/1 http://"+cls.getSiteDomain()+"/s/46ccbef30000616306614050/1/story-name"
|
||||
return "https://"+cls.getSiteDomain()+"/s/46ccbef30000616306614050 https://"+cls.getSiteDomain()+"/s/46ccbef30000616306614050/1 https://"+cls.getSiteDomain()+"/s/46ccbef30000616306614050/1/story-name"
|
||||
|
||||
def getSiteURLPattern(self):
|
||||
return re.escape("http://"+self.getSiteDomain()+"/s/")+r"\w+(/\d+)?"
|
||||
|
||||
def use_pagecache(self):
|
||||
'''
|
||||
adapters that will work with the page cache need to implement
|
||||
this and change it to True.
|
||||
'''
|
||||
return True
|
||||
return r"https?"+re.escape("://"+self.getSiteDomain()+"/s/")+r"\w+(/\d+)?"
|
||||
|
||||
## Login seems to be reasonably standard across eFiction sites.
|
||||
def needToLoginCheck(self, data):
|
||||
|
|
@ -103,10 +90,10 @@ class FanFiktionDeAdapter(BaseSiteAdapter):
|
|||
params['a'] = 'l'
|
||||
params['submit'] = 'Login...'
|
||||
|
||||
loginUrl = 'https://ssl.fanfiktion.de/'
|
||||
loginUrl = 'https://www.fanfiktion.de/'
|
||||
logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
|
||||
params['nickname']))
|
||||
soup = self.make_soup(self._postUrl(loginUrl,params))
|
||||
soup = self.make_soup(self.post_request(loginUrl,params))
|
||||
if not soup.find('a', title='Logout'):
|
||||
logger.info("Failed to login to URL %s as %s" % (loginUrl,
|
||||
params['nickname']))
|
||||
|
|
@ -121,27 +108,19 @@ class FanFiktionDeAdapter(BaseSiteAdapter):
|
|||
url = self.url
|
||||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
except urllib2.HTTPError, e:
|
||||
if e.code == 404:
|
||||
raise exceptions.StoryDoesNotExist(self.url)
|
||||
else:
|
||||
raise e
|
||||
data = self.get_request(url)
|
||||
|
||||
if self.needToLoginCheck(data):
|
||||
# need to log in for this one.
|
||||
self.performLogin(url)
|
||||
data = self._fetchUrl(url,usecache=False)
|
||||
data = self.get_request(url,usecache=False)
|
||||
|
||||
if "Uhr ist diese Geschichte nur nach einer" in data:
|
||||
raise exceptions.FailedToDownload(self.getSiteDomain() +" says: Auserhalb der Zeit von 23:00 Uhr bis 04:00 Uhr ist diese Geschichte nur nach einer erfolgreichen Altersverifikation zuganglich.")
|
||||
|
||||
# use BeautifulSoup HTML parser to make everything easier to find.
|
||||
soup = self.make_soup(data)
|
||||
# print data
|
||||
# logger.debug(data)
|
||||
|
||||
# Now go hunting for all the meta data and the chapter list.
|
||||
|
||||
## Title
|
||||
a = soup.find('a', href=re.compile(r'/s/'+self.story.getMetadata('storyId')+"/"))
|
||||
|
|
@ -151,41 +130,69 @@ class FanFiktionDeAdapter(BaseSiteAdapter):
|
|||
head = soup.find('div', {'class' : 'story-left'})
|
||||
a = head.find('a')
|
||||
self.story.setMetadata('authorId',a['href'].split('/')[2])
|
||||
self.story.setMetadata('authorUrl','http://'+self.host+'/'+a['href'])
|
||||
self.story.setMetadata('authorUrl','https://'+self.host+'/'+a['href'])
|
||||
self.story.setMetadata('author',stripHTML(a))
|
||||
|
||||
# Find the chapters:
|
||||
for chapter in soup.find('select').findAll('option'):
|
||||
self.chapterUrls.append((stripHTML(chapter),'http://'+self.host+'/s/'+self.story.getMetadata('storyId')+'/'+chapter['value']))
|
||||
for chapter in soup.find('select').find_all('option'):
|
||||
self.add_chapter(chapter,'https://'+self.host+'/s/'+self.story.getMetadata('storyId')+'/'+chapter['value'])
|
||||
|
||||
self.story.setMetadata('numChapters',len(self.chapterUrls))
|
||||
## title="Wörter" failed with max_zalgo:1
|
||||
self.story.setMetadata('numWords',stripHTML(soup.find("span",{'class':"fa-keyboard"}).parent).replace('.','')) # 1.234 = 1,234
|
||||
self.story.setMetadata('language','German')
|
||||
|
||||
self.story.setMetadata('datePublished', makeDate(stripHTML(head.find('span',title='erstellt').parent), self.dateformat))
|
||||
self.story.setMetadata('dateUpdated', makeDate(stripHTML(head.find('span',title='aktualisiert').parent), self.dateformat))
|
||||
|
||||
# second colspan=3 td in head.
|
||||
## Genre now shares a line with rating.
|
||||
genres=stripHTML(head.find('span',class_='fa-angle-right').next_sibling)
|
||||
self.story.extendList('genre',genres[:genres.index('/')].split(', '))
|
||||
self.story.extendList('genre',genres[:genres.index(' / ')].split(', '))
|
||||
self.story.setMetadata('rating', genres[genres.index(' / ')+3:])
|
||||
|
||||
if head.find('span',title='Fertiggestellt'):
|
||||
# self.story.addToList('category',stripHTML(soup.find('span',id='ffcbox-story-topic-1')).split('/')[2].strip())
|
||||
for a in soup.find('span',id='ffcbox-story-topic-1').find_all('a',href=re.compile(r'/c/')):
|
||||
cat = stripHTML(a)
|
||||
if cat != 'Fanfiction':
|
||||
self.story.addToList('category',cat)
|
||||
|
||||
for span in soup.find_all('span',class_='badge-character'):
|
||||
self.story.addToList('characters',stripHTML(span))
|
||||
|
||||
try:
|
||||
self.story.setMetadata('native_status', head.find_all('span',{'class':'titled-icon'})[3]['title'])
|
||||
except e:
|
||||
logger.debug("Failed to find native status:%s"%e)
|
||||
|
||||
if head.find('span',title='fertiggestellt'):
|
||||
self.story.setMetadata('status', 'Completed')
|
||||
elif head.find('span',title='pausiert'):
|
||||
self.story.setMetadata('status', 'Paused')
|
||||
elif head.find('span',title='abgebrochen'):
|
||||
self.story.setMetadata('status', 'Cancelled')
|
||||
else:
|
||||
self.story.setMetadata('status', 'In-Progress')
|
||||
|
||||
#find metadata on the author's page
|
||||
asoup = self.make_soup(self._fetchUrl("http://"+self.getSiteDomain()+"?a=q&a1=v&t=nickdetailsstories&lbi=stories&ar=0&nick="+self.story.getMetadata('authorId')))
|
||||
tr=asoup.findAll('tr')
|
||||
for i in range(1,len(tr)):
|
||||
a = tr[i].find('a')
|
||||
if '/s/'+self.story.getMetadata('storyId')+'/1/' in a['href']:
|
||||
break
|
||||
self.setDescription(url,a['onmouseover'].split("', '")[1])
|
||||
## Get description
|
||||
descdiv = soup.select_one('div#story-summary-inline div')
|
||||
if descdiv:
|
||||
if 'center' in descdiv['class']:
|
||||
del descdiv['class']
|
||||
self.setDescription(url,descdiv)
|
||||
|
||||
# #find metadata on the author's page
|
||||
# asoup = self.make_soup(self.get_request("https://"+self.getSiteDomain()+"?a=q&a1=v&t=nickdetailsstories&lbi=stories&ar=0&nick="+self.story.getMetadata('authorId')))
|
||||
# tr=asoup.find_all('tr')
|
||||
# for i in range(1,len(tr)):
|
||||
# a = tr[i].find('a')
|
||||
# if '/s/'+self.story.getMetadata('storyId')+'/1/' in a['href']:
|
||||
# break
|
||||
|
||||
# td = tr[i].find_all('td')
|
||||
# self.story.addToList('category',stripHTML(td[2]))
|
||||
# self.story.setMetadata('rating', stripHTML(td[5]))
|
||||
# self.story.setMetadata('numWords', stripHTML(td[6]))
|
||||
|
||||
|
||||
td = tr[i].findAll('td')
|
||||
self.story.addToList('category',stripHTML(td[2]))
|
||||
self.story.setMetadata('rating', stripHTML(td[5]))
|
||||
self.story.setMetadata('numWords', stripHTML(td[6]))
|
||||
|
||||
|
||||
# grab the text for an individual chapter.
|
||||
|
|
@ -194,10 +201,10 @@ class FanFiktionDeAdapter(BaseSiteAdapter):
|
|||
logger.debug('Getting chapter text from: %s' % url)
|
||||
time.sleep(0.5) ## ffde has "floodlock" protection
|
||||
|
||||
soup = self.make_soup(self._fetchUrl(url))
|
||||
soup = self.make_soup(self.get_request(url))
|
||||
|
||||
div = soup.find('div', {'id' : 'storytext'})
|
||||
for a in div.findAll('script'):
|
||||
for a in div.find_all('script'):
|
||||
a.extract()
|
||||
|
||||
if None == div:
|
||||
|
|
|
|||
|
|
@ -1,57 +0,0 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2014 Fanficdownloader team, 2015 FanFicFare team
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
# Software: eFiction
|
||||
import re
|
||||
from base_efiction_adapter import BaseEfictionAdapter
|
||||
|
||||
class FHSArchiveComAdapter(BaseEfictionAdapter):
|
||||
|
||||
@staticmethod
|
||||
def getSiteDomain():
|
||||
return 'fhsarchive.com'
|
||||
|
||||
@classmethod
|
||||
def getPathToArchive(self):
|
||||
return '/autoarchive'
|
||||
|
||||
@classmethod
|
||||
def getSiteAbbrev(self):
|
||||
return 'fhsa'
|
||||
|
||||
@classmethod
|
||||
def getDateFormat(self):
|
||||
return "%m/%d/%y"
|
||||
|
||||
def handleMetadataPair(self, key, value):
|
||||
if key == 'Warnings':
|
||||
for val in re.split("\s*,\s*", value):
|
||||
if value == 'None':
|
||||
return
|
||||
else:
|
||||
# toss numbers only.
|
||||
self.story.addToList('warnings', filter(lambda x : not x.isdigit() , val))
|
||||
|
||||
# elif 'Categories' in key:
|
||||
# for val in re.split("\s*>\s*", value):
|
||||
# self.story.addToList('category', val)
|
||||
else:
|
||||
super(FHSArchiveComAdapter, self).handleMetadataPair(key, value)
|
||||
|
||||
def getClass():
|
||||
return FHSArchiveComAdapter
|
||||
|
||||
|
|
@ -1,6 +1,6 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2011 Fanficdownloader team, 2015 FanFicFare team
|
||||
# Copyright 2011 Fanficdownloader team, 2020 FanFicFare team
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
|
|
@ -15,19 +15,20 @@
|
|||
# limitations under the License.
|
||||
#
|
||||
|
||||
import time
|
||||
import datetime
|
||||
from __future__ import absolute_import,unicode_literals
|
||||
# import datetime
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import json
|
||||
import re
|
||||
import urllib2
|
||||
from .. import translit
|
||||
# from .. import translit
|
||||
|
||||
|
||||
from ..htmlcleanup import stripHTML
|
||||
from .. import exceptions as exceptions
|
||||
from .. import exceptions# as exceptions
|
||||
|
||||
from base_adapter import BaseSiteAdapter, makeDate
|
||||
# py2 vs py3 transition
|
||||
|
||||
from .base_adapter import BaseSiteAdapter, makeDate
|
||||
|
||||
|
||||
def getClass():
|
||||
|
|
@ -41,11 +42,6 @@ class FicBookNetAdapter(BaseSiteAdapter):
|
|||
def __init__(self, config, url):
|
||||
BaseSiteAdapter.__init__(self, config, url)
|
||||
|
||||
self.decode = ["utf8",
|
||||
"Windows-1252"] # 1252 is a superset of iso-8859-1.
|
||||
# Most sites that claim to be
|
||||
# iso-8859-1 (and some that claim to be
|
||||
# utf8) are really windows-1252.
|
||||
self.username = "NoneGiven" # if left empty, site doesn't return any message at all.
|
||||
self.password = ""
|
||||
self.is_adult=False
|
||||
|
|
@ -62,34 +58,42 @@ class FicBookNetAdapter(BaseSiteAdapter):
|
|||
|
||||
# The date format will vary from site to site.
|
||||
# http://docs.python.org/library/datetime.html#strftime-strptime-behavior
|
||||
self.dateformat = "%d %m %Y"
|
||||
self.dateformat = u"%d %m %Y г., %H:%M"
|
||||
|
||||
@staticmethod # must be @staticmethod, don't remove it.
|
||||
def getSiteDomain():
|
||||
# The site domain. Does have www here, if it uses it.
|
||||
return 'www.ficbook.net'
|
||||
return 'ficbook.net'
|
||||
|
||||
@classmethod
|
||||
def getSiteExampleURLs(cls):
|
||||
return "https://"+cls.getSiteDomain()+"/readfic/12345 https://"+cls.getSiteDomain()+"/readfic/93626/246417#part_content"
|
||||
return "https://"+cls.getSiteDomain()+"/readfic/12345 https://"+cls.getSiteDomain()+"/readfic/93626/246417#part_content https://"+cls.getSiteDomain()+"/readfic/578de1cd-a8b4-7ff1-aa49-750426508b82 https://"+cls.getSiteDomain()+"/readfic/578de1cd-a8b4-7ff1-aa49-750426508b82/94793742#part_content"
|
||||
|
||||
def getSiteURLPattern(self):
|
||||
return r"https?://"+re.escape(self.getSiteDomain()+"/readfic/")+r"\d+"
|
||||
return r"https?://"+re.escape(self.getSiteDomain()+"/readfic/")+r"[\d\-a-zA-Z]+"
|
||||
|
||||
def performLogin(self,url,data):
|
||||
params = {}
|
||||
if self.password:
|
||||
params['login'] = self.username
|
||||
params['password'] = self.password
|
||||
else:
|
||||
params['login'] = self.getConfig("username")
|
||||
params['password'] = self.getConfig("password")
|
||||
|
||||
logger.debug("Try to login in as (%s)" % params['login'])
|
||||
d = self.post_request('https://' + self.getSiteDomain() + '/login_check_static',params,usecache=False)
|
||||
|
||||
if 'Войти используя аккаунт на сайте' in d:
|
||||
raise exceptions.FailedToLogin(url,params['login'])
|
||||
|
||||
return True
|
||||
|
||||
## Getting the chapter list and the meta data, plus 'is adult' checking.
|
||||
def extractChapterUrlsAndMetadata(self):
|
||||
def extractChapterUrlsAndMetadata(self,get_cover=True):
|
||||
url=self.url
|
||||
logger.debug("URL: "+url)
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
except urllib2.HTTPError, e:
|
||||
if e.code == 404:
|
||||
raise exceptions.StoryDoesNotExist(self.url)
|
||||
else:
|
||||
raise e
|
||||
|
||||
|
||||
# use BeautifulSoup HTML parser to make everything easier to find.
|
||||
data = self.get_request(url)
|
||||
soup = self.make_soup(data)
|
||||
|
||||
adult_div = soup.find('div',id='adultCoverWarning')
|
||||
|
|
@ -98,11 +102,12 @@ class FicBookNetAdapter(BaseSiteAdapter):
|
|||
adult_div.extract()
|
||||
else:
|
||||
raise exceptions.AdultCheckRequired(self.url)
|
||||
|
||||
# Now go hunting for all the meta data and the chapter list.
|
||||
|
||||
|
||||
## Title
|
||||
a = soup.find('section',{'class':'chapter-info'}).find('h1')
|
||||
try:
|
||||
a = soup.find('section',{'class':'chapter-info'}).find('h1')
|
||||
except AttributeError:
|
||||
raise exceptions.FailedToDownload("Error collecting meta: %s! Missing required element!" % url)
|
||||
# kill '+' marks if present.
|
||||
sup = a.find('sup')
|
||||
if sup:
|
||||
|
|
@ -112,42 +117,12 @@ class FicBookNetAdapter(BaseSiteAdapter):
|
|||
|
||||
# Find authorid and URL from... author url.
|
||||
# assume first avatar-nickname -- there can be a second marked 'beta'.
|
||||
a = soup.find('a',{'class':'avatar-nickname'})
|
||||
a = soup.find('a',{'class':'creator-username'})
|
||||
self.story.setMetadata('authorId',a.text) # Author's name is unique
|
||||
self.story.setMetadata('authorUrl','https://'+self.host+'/'+a['href'])
|
||||
self.story.setMetadata('authorUrl','https://'+self.host+a['href'])
|
||||
self.story.setMetadata('author',a.text)
|
||||
logger.debug("Author: (%s)"%self.story.getMetadata('author'))
|
||||
|
||||
# Find the chapters:
|
||||
chapters = soup.find('ul', {'class' : 'table-of-contents'})
|
||||
if chapters != None:
|
||||
chapters=chapters.findAll('a', href=re.compile(r'/readfic/'+self.story.getMetadata('storyId')+"/\d+#part_content$"))
|
||||
self.story.setMetadata('numChapters',len(chapters))
|
||||
for x in range(0,len(chapters)):
|
||||
chapter=chapters[x]
|
||||
churl='https://'+self.host+chapter['href']
|
||||
self.chapterUrls.append((stripHTML(chapter),churl))
|
||||
if x == 0:
|
||||
pubdate = translit.translit(stripHTML(chapter.parent.find('span')))
|
||||
# pubdate = translit.translit(stripHTML(self.make_soup(self._fetchUrl(churl)).find('div', {'class' : 'part_added'}).find('span')))
|
||||
if x == len(chapters)-1:
|
||||
update = translit.translit(stripHTML(chapter.parent.find('span')))
|
||||
# update = translit.translit(stripHTML(self.make_soup(self._fetchUrl(churl)).find('div', {'class' : 'part_added'}).find('span')))
|
||||
else:
|
||||
self.chapterUrls.append((self.story.getMetadata('title'),url))
|
||||
self.story.setMetadata('numChapters',1)
|
||||
pubdate=translit.translit(stripHTML(soup.find('div',{'class':'title-area'}).find('span')))
|
||||
update=pubdate
|
||||
|
||||
logger.debug("numChapters: (%s)"%self.story.getMetadata('numChapters'))
|
||||
|
||||
if not ',' in pubdate:
|
||||
pubdate=datetime.date.today().strftime(self.dateformat)
|
||||
if not ',' in update:
|
||||
update=datetime.date.today().strftime(self.dateformat)
|
||||
pubdate=pubdate.split(',')[0]
|
||||
update=update.split(',')[0]
|
||||
|
||||
fullmon = {"yanvarya":"01", u"января":"01",
|
||||
"fievralya":"02", u"февраля":"02",
|
||||
"marta":"03", u"марта":"03",
|
||||
|
|
@ -161,44 +136,68 @@ class FicBookNetAdapter(BaseSiteAdapter):
|
|||
"noyabrya":"11", u"ноября":"11",
|
||||
"diekabrya":"12", u"декабря":"12" }
|
||||
|
||||
for (name,num) in fullmon.items():
|
||||
if name in pubdate:
|
||||
pubdate = pubdate.replace(name,num)
|
||||
if name in update:
|
||||
update = update.replace(name,num)
|
||||
# Find the chapters:
|
||||
pubdate = None
|
||||
chapters = soup.find('ul', {'class' : 'list-of-fanfic-parts'})
|
||||
if chapters is not None:
|
||||
for chapdiv in chapters.find_all('li', {'class':'part'}):
|
||||
chapter=chapdiv.find('a',href=re.compile(r'/readfic/'+self.story.getMetadata('storyId')+r"/\d+#part_content$"))
|
||||
churl='https://'+self.host+chapter['href']
|
||||
|
||||
self.story.setMetadata('dateUpdated', makeDate(update, self.dateformat))
|
||||
self.story.setMetadata('datePublished', makeDate(pubdate, self.dateformat))
|
||||
# Find the chapter dates.
|
||||
date_str = chapdiv.find('span', {'title': True})['title'].replace(u"\u202fг. в", "")
|
||||
for month_name, month_num in fullmon.items():
|
||||
date_str = date_str.replace(month_name, month_num)
|
||||
chapterdate = makeDate(date_str,self.dateformat)
|
||||
self.add_chapter(chapter,churl,
|
||||
{'date':chapterdate.strftime(self.getConfig("datechapter_format",self.getConfig("datePublished_format",self.dateformat)))})
|
||||
|
||||
if pubdate is None and chapterdate:
|
||||
pubdate = chapterdate
|
||||
update = chapterdate
|
||||
else:
|
||||
self.add_chapter(self.story.getMetadata('title'),url)
|
||||
date_str = soup.find('div', {'class' : 'part-date'}).find('span', {'title': True})['title'].replace(u"\u202fг. в", "")
|
||||
for month_name, month_num in fullmon.items():
|
||||
date_str = date_str.replace(month_name, month_num)
|
||||
pubdate = update = makeDate(date_str,self.dateformat)
|
||||
|
||||
logger.debug("numChapters: (%s)"%self.story.getMetadata('numChapters'))
|
||||
|
||||
self.story.setMetadata('dateUpdated', update)
|
||||
self.story.setMetadata('datePublished', pubdate)
|
||||
self.story.setMetadata('language','Russian')
|
||||
|
||||
## after site change, I don't see word count anywhere.
|
||||
# pr=soup.find('a', href=re.compile(r'/printfic/\w+'))
|
||||
# pr='https://'+self.host+pr['href']
|
||||
# pr = self.make_soup(self._fetchUrl(pr))
|
||||
# pr=pr.findAll('div', {'class' : 'part_text'})
|
||||
# i=0
|
||||
# for part in pr:
|
||||
# i=i+len(stripHTML(part).split(' '))
|
||||
# self.story.setMetadata('numWords', unicode(i))
|
||||
dlinfo = soup.select_one('header.d-flex.flex-column.gap-12.word-break')
|
||||
|
||||
series_label = dlinfo.select_one('div.description.word-break').find('strong', string='Серия:')
|
||||
logger.debug('Series: %s'%str(series_label))
|
||||
if series_label:
|
||||
series_div = series_label.find_next_sibling("div")
|
||||
# No accurate series number as for that, additional request needs to be made
|
||||
self.setSeries(stripHTML(series_div.a), 1)
|
||||
self.story.setMetadata('seriesUrl','https://' + self.getSiteDomain() + series_div.a.get('href'))
|
||||
|
||||
dlinfo = soup.find('dl',{'class':'info'})
|
||||
|
||||
i=0
|
||||
fandoms = dlinfo.find('dd').findAll('a', href=re.compile(r'/fanfiction/\w+'))
|
||||
fandoms = dlinfo.select_one('div:not([class])').find_all('a', href=re.compile(r'/fanfiction/\w+'))
|
||||
for fandom in fandoms:
|
||||
self.story.addToList('category',fandom.string)
|
||||
i=i+1
|
||||
if i > 1:
|
||||
self.story.addToList('genre', u'Кроссовер')
|
||||
|
||||
for genre in dlinfo.findAll('a',href=re.compile(r'/genres/')):
|
||||
self.story.addToList('genre',stripHTML(genre))
|
||||
tags = soup.find('div',{'class':'tags'})
|
||||
if tags:
|
||||
for genre in tags.find_all('a',href=re.compile(r'/tags/')):
|
||||
self.story.addToList('genre',stripHTML(genre))
|
||||
|
||||
ratingdt = dlinfo.find('dt',text='Рейтинг:')
|
||||
self.story.setMetadata('rating', stripHTML(ratingdt.next_sibling))
|
||||
|
||||
# meta=table.findAll('a', href=re.compile(r'/ratings/'))
|
||||
logger.debug("category: (%s)"%self.story.getMetadata('category'))
|
||||
logger.debug("genre: (%s)"%self.story.getMetadata('genre'))
|
||||
|
||||
ratingdt = dlinfo.find('div',{'class':re.compile(r'badge-rating-.*')})
|
||||
self.story.setMetadata('rating', stripHTML(ratingdt.find('span')))
|
||||
|
||||
# meta=table.find_all('a', href=re.compile(r'/ratings/'))
|
||||
# i=0
|
||||
# for m in meta:
|
||||
# if i == 0:
|
||||
|
|
@ -209,39 +208,186 @@ class FicBookNetAdapter(BaseSiteAdapter):
|
|||
# i=2
|
||||
# self.story.addToList('genre', m.find('b').text)
|
||||
# elif i == 2:
|
||||
# self.story.addToList('warnings', m.find('b').text)
|
||||
# self.story.addToList('warnings', m.find('b').text)
|
||||
|
||||
if dlinfo.find('span', {'style' : 'color: green'}):
|
||||
if dlinfo.find('div', {'class':'badge-status-finished'}):
|
||||
self.story.setMetadata('status', 'Completed')
|
||||
else:
|
||||
self.story.setMetadata('status', 'In-Progress')
|
||||
|
||||
|
||||
tags = dlinfo.findAll('dt')
|
||||
for tag in tags:
|
||||
label = translit.translit(tag.text)
|
||||
if 'Piersonazhi:' in label or u'Персонажи:' in label:
|
||||
chars=stripHTML(tag.next_sibling).split(', ')
|
||||
for char in chars:
|
||||
self.story.addToList('characters',char)
|
||||
break
|
||||
|
||||
summary=soup.find('div', {'class' : 'urlize'})
|
||||
self.setDescription(url,summary)
|
||||
#self.story.setMetadata('description', summary.text)
|
||||
try:
|
||||
self.story.setMetadata('universe', stripHTML(dlinfo.find('a', href=re.compile('/fandom_universe/'))))
|
||||
except AttributeError:
|
||||
pass
|
||||
|
||||
paircharsdt = soup.find('strong',string='Пэйринг и персонажи:')
|
||||
# site keeps both ships and indiv chars in /pairings/ links.
|
||||
if paircharsdt:
|
||||
for paira in paircharsdt.find_next('div').find_all('a', href=re.compile(r'/pairings/')):
|
||||
if 'pairing-highlight' in paira['class']:
|
||||
self.story.addToList('ships',stripHTML(paira))
|
||||
chars=stripHTML(paira).split('/')
|
||||
for char in chars:
|
||||
self.story.addToList('characters',char)
|
||||
else:
|
||||
self.story.addToList('characters',stripHTML(paira))
|
||||
|
||||
summary=soup.find('div', itemprop='description')
|
||||
if summary:
|
||||
# Fix for the text not displaying properly
|
||||
summary['class'].append('part_text')
|
||||
self.setDescription(url,summary)
|
||||
#self.story.setMetadata('description', summary.text)
|
||||
|
||||
stats = soup.find('div', {'class':'hat-actions-container'})
|
||||
targetdata = stats.find_all('span', {'class' : 'main-info'})
|
||||
for data in targetdata:
|
||||
svg_class = data.find('svg')['class'][1] if data.find('svg') else None
|
||||
value = int(stripHTML(data)) if stripHTML(data).isdigit() else 0
|
||||
|
||||
if svg_class == 'ic_thumbs-up' and value > 0:
|
||||
self.story.setMetadata('likes', value)
|
||||
#logger.debug("likes: (%s)"%self.story.getMetadata('likes'))
|
||||
elif svg_class == 'ic_bubble-dark' and value > 0:
|
||||
self.story.setMetadata('reviews', value)
|
||||
#logger.debug("reviews: (%s)"%self.story.getMetadata('reviews'))
|
||||
elif svg_class == 'ic_bookmark' and value > 0:
|
||||
self.story.setMetadata('numCollections', value)
|
||||
logger.debug("numCollections: (%s)"%self.story.getMetadata('numCollections'))
|
||||
|
||||
# Grab the amount of pages and words
|
||||
targetpages = soup.find('strong',string='Размер:').find_next('div')
|
||||
if targetpages:
|
||||
targetpages_text = re.sub(r"(?<!\,)\s| ", "", targetpages.text, flags=re.UNICODE | re.MULTILINE)
|
||||
|
||||
pages_raw = re.search(r'(\d+)(?:страницы|страниц)', targetpages_text, re.UNICODE)
|
||||
pages = int(pages_raw.group(1))
|
||||
if pages > 0:
|
||||
self.story.setMetadata('pages', pages)
|
||||
logger.debug("pages: (%s)"%self.story.getMetadata('pages'))
|
||||
|
||||
numWords_raw = re.search(r"(\d+)(?:слова|слов)", targetpages_text, re.UNICODE)
|
||||
numWords = int(numWords_raw.group(1))
|
||||
if numWords > 0:
|
||||
self.story.setMetadata('numWords', numWords)
|
||||
logger.debug("numWords: (%s)"%self.story.getMetadata('numWords'))
|
||||
|
||||
# Grab FBN Category
|
||||
class_tag = soup.select_one('div[class^="badge-with-icon direction"]').find('span', {'class' : 'badge-text'}).text
|
||||
if class_tag:
|
||||
self.story.setMetadata('classification',class_tag)
|
||||
#logger.debug("classification: (%s)"%self.story.getMetadata('classification'))
|
||||
|
||||
# Find dedication.
|
||||
ded = soup.find('div', {'class' : 'js-public-beta-dedication'})
|
||||
if ded:
|
||||
ded['class'].append('part_text')
|
||||
self.story.setMetadata('dedication',ded)
|
||||
|
||||
# Find author comment
|
||||
comm = soup.find('div', {'class' : 'js-public-beta-author-comment'})
|
||||
if comm:
|
||||
comm['class'].append('part_text')
|
||||
self.story.setMetadata('authorcomment',comm)
|
||||
|
||||
follows = stats.find('fanfic-follow-button')[':follow-count']
|
||||
if int(follows) > 0:
|
||||
self.story.setMetadata('follows', int(follows))
|
||||
logger.debug("follows: (%s)"%self.story.getMetadata('follows'))
|
||||
|
||||
# Grab the amount of awards
|
||||
numAwards = 0
|
||||
try:
|
||||
awards = soup.find('fanfic-reward-list')[':initial-fic-rewards-list']
|
||||
award_list = json.loads(awards)
|
||||
numAwards = int(len(award_list))
|
||||
# Grab the awards, but if multiple awards have the same name, only one will be kept; only an issue with hundreds of them.
|
||||
self.story.extendList('awards', [str(award['user_text']) for award in award_list])
|
||||
#logger.debug("awards (%s)"%self.story.getMetadata('awards'))
|
||||
except (TypeError, KeyError):
|
||||
logger.debug("Could not grab the awards")
|
||||
|
||||
if numAwards > 0:
|
||||
self.story.setMetadata('numAwards', numAwards)
|
||||
logger.debug("Num Awards (%s)"%self.story.getMetadata('numAwards'))
|
||||
|
||||
if get_cover:
|
||||
cover = soup.find('fanfic-cover', {'class':"jsVueComponent"})
|
||||
if cover is not None:
|
||||
self.setCoverImage(url,cover['src-original'])
|
||||
|
||||
def replace_formatting(self,tag):
|
||||
tname = tag.name
|
||||
## operating on plain text because BS4 is hard to work on
|
||||
## text with.
|
||||
## stripHTML() discards whitespace around other tags, like <i>
|
||||
txt = tag.get_text()
|
||||
txt = txt.replace("\n","<br/>")
|
||||
soup = self.make_soup("<"+tname+">"+txt+"</"+tname+">")
|
||||
return soup.find(tname)
|
||||
|
||||
# grab the text for an individual chapter.
|
||||
def getChapterText(self, url):
|
||||
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
soup = self.make_soup(self._fetchUrl(url))
|
||||
soup = self.make_soup(self.get_request(url))
|
||||
|
||||
chapter = soup.find('div', {'class' : 'public_beta'})
|
||||
if chapter == None:
|
||||
chapter = soup.find('div', {'id' : 'content'})
|
||||
if chapter is None: ## still needed?
|
||||
chapter = soup.find('div', {'class' : 'public_beta_disabled'})
|
||||
|
||||
if None == chapter:
|
||||
if chapter is None:
|
||||
raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url)
|
||||
|
||||
## ficbook uses weird CSS white-space: pre-wrap; for
|
||||
## paragraphing. Doesn't work with txt output
|
||||
if 'part_text' in chapter['class'] and self.getConfig('replace_text_formatting'):
|
||||
## copy classes, except part_text
|
||||
divclasses = chapter['class']
|
||||
divclasses.remove('part_text')
|
||||
chapter = self.replace_formatting(chapter)
|
||||
chapter['class'] = divclasses
|
||||
|
||||
exclude_notes=self.getConfigList('exclude_notes')
|
||||
if 'headnotes' not in exclude_notes:
|
||||
# Find the headnote
|
||||
head_note = soup.select_one("div.part-comment-top div.js-public-beta-comment-before")
|
||||
if head_note:
|
||||
# Create the structure for the headnote
|
||||
head_notes_div_tag = soup.new_tag('div', attrs={'class': 'fff_chapter_notes fff_head_notes'})
|
||||
head_b_tag = soup.new_tag('b')
|
||||
head_b_tag.string = 'Примечания:'
|
||||
if 'text-preline' in head_note['class'] and self.getConfig('replace_text_formatting'):
|
||||
head_blockquote_tag = self.replace_formatting(head_note)
|
||||
head_blockquote_tag.name = 'blockquote'
|
||||
else:
|
||||
head_blockquote_tag = soup.new_tag('blockquote')
|
||||
head_blockquote_tag.string = stripHTML(head_note)
|
||||
head_notes_div_tag.append(head_b_tag)
|
||||
head_notes_div_tag.append(head_blockquote_tag)
|
||||
# Prepend the headnotes to the chapter, <hr> to mimic the site
|
||||
chapter.insert(0, head_notes_div_tag)
|
||||
chapter.insert(1, soup.new_tag('hr'))
|
||||
|
||||
if 'footnotes' not in exclude_notes:
|
||||
# Find the endnote
|
||||
end_note = soup.select_one("div.part-comment-bottom div.js-public-beta-comment-after")
|
||||
if end_note:
|
||||
# Create the structure for the footnote
|
||||
end_notes_div_tag = soup.new_tag('div', attrs={'class': 'fff_chapter_notes fff_foot_notes'})
|
||||
end_b_tag = soup.new_tag('b')
|
||||
end_b_tag.string = 'Примечания:'
|
||||
if 'text-preline' in end_note['class'] and self.getConfig('replace_text_formatting'):
|
||||
end_blockquote_tag = self.replace_formatting(end_note)
|
||||
end_blockquote_tag.name = 'blockquote'
|
||||
else:
|
||||
end_blockquote_tag = soup.new_tag('blockquote')
|
||||
end_blockquote_tag.string = stripHTML(end_note)
|
||||
end_notes_div_tag.append(end_b_tag)
|
||||
end_notes_div_tag.append(end_blockquote_tag)
|
||||
# Append the endnotes to the chapter, <hr> to mimic the site
|
||||
chapter.append(soup.new_tag('hr'))
|
||||
chapter.append(end_notes_div_tag)
|
||||
|
||||
return self.utf8FromSoup(url,chapter)
|
||||
|
|
|
|||
|
|
@ -1,292 +0,0 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2012 Fanficdownloader team, 2015 FanFicFare team
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
# Software: eFiction
|
||||
import time
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import re
|
||||
import urllib2
|
||||
import sys
|
||||
|
||||
from bs4.element import Comment
|
||||
from ..htmlcleanup import stripHTML
|
||||
from .. import exceptions as exceptions
|
||||
|
||||
from base_adapter import BaseSiteAdapter, makeDate
|
||||
|
||||
def getClass():
|
||||
return HPFanficArchiveComAdapter
|
||||
|
||||
# Class name has to be unique. Our convention is camel case the
|
||||
# sitename with Adapter at the end. www is skipped.
|
||||
class HPFanficArchiveComAdapter(BaseSiteAdapter):
|
||||
|
||||
def __init__(self, config, url):
|
||||
BaseSiteAdapter.__init__(self, config, url)
|
||||
|
||||
self.decode = ["Windows-1252",
|
||||
"utf8", "iso-8859-1"]
|
||||
# 1252 is a superset of iso-8859-1.
|
||||
# Most sites that claim to be
|
||||
# iso-8859-1 (and some that claim to be
|
||||
# utf8) are really windows-1252.
|
||||
self.username = "NoneGiven" # if left empty, site doesn't return any message at all.
|
||||
self.password = ""
|
||||
self.is_adult=False
|
||||
|
||||
# get storyId from url--url validation guarantees query is only sid=1234
|
||||
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
|
||||
|
||||
|
||||
# normalized story URL.
|
||||
self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId'))
|
||||
|
||||
# Each adapter needs to have a unique site abbreviation.
|
||||
self.story.setMetadata('siteabbrev','ficsite')
|
||||
|
||||
# The date format will vary from site to site.
|
||||
# http://docs.python.org/library/datetime.html#strftime-strptime-behavior
|
||||
self.dateformat = "%m/%d/%Y"
|
||||
|
||||
@staticmethod # must be @staticmethod, don't remove it.
|
||||
def getSiteDomain():
|
||||
# The site domain. Does have www here, if it uses it.
|
||||
return 'www.ficsite.com'
|
||||
|
||||
@classmethod
|
||||
def getSiteExampleURLs(cls):
|
||||
return "http://"+cls.getSiteDomain()+"/viewstory.php?sid=1234"
|
||||
|
||||
def getSiteURLPattern(self):
|
||||
return re.escape("http://"+self.getSiteDomain()+"/viewstory.php?sid=")+r"\d+$"
|
||||
|
||||
## Login seems to be reasonably standard across eFiction sites.
|
||||
def needToLoginCheck(self, data):
|
||||
if 'Registered Users Only' in data \
|
||||
or 'There is no such account on our website' in data \
|
||||
or "That password doesn't match the one in our database" in data:
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
|
||||
def performLogin(self, url):
|
||||
params = {}
|
||||
|
||||
if self.password:
|
||||
params['penname'] = self.username
|
||||
params['password'] = self.password
|
||||
else:
|
||||
params['penname'] = self.getConfig("username")
|
||||
params['password'] = self.getConfig("password")
|
||||
params['cookiecheck'] = '1'
|
||||
params['submit'] = 'Submit'
|
||||
|
||||
loginUrl = 'http://' + self.getSiteDomain() + '/user.php?action=login'
|
||||
logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
|
||||
params['penname']))
|
||||
|
||||
d = self._fetchUrl(loginUrl, params)
|
||||
|
||||
if "Member Account" not in d : #Member Account
|
||||
logger.info("Failed to login to URL %s as %s" % (loginUrl,
|
||||
params['penname']))
|
||||
raise exceptions.FailedToLogin(url,params['penname'])
|
||||
return False
|
||||
else:
|
||||
return True
|
||||
|
||||
# I've added this because there are several warnings
|
||||
# that are used by this site.
|
||||
def getWarning(self, data):
|
||||
if "This story contains adult subject matter that may include coarse language, violence, and mild sexual content of a graphical nature. Reader discretion is requested. Thank you." in data:
|
||||
return '&ageconsent=ok&warning=5'
|
||||
elif "This story contains graphical material of an adult nature and a same sex primary relationship. Please do not read if this is not to your taste. Thank you." in data:
|
||||
return '&warning=7'
|
||||
elif "This story contains graphical material of an adult nature. Reader discretion is requested. Thank you." in data:
|
||||
return '&warning=6'
|
||||
else:
|
||||
return False
|
||||
|
||||
|
||||
## Getting the chapter list and the meta data, plus 'is adult' checking.
|
||||
def extractChapterUrlsAndMetadata(self):
|
||||
|
||||
if (self.is_adult or self.getConfig("is_adult")):
|
||||
addurl = '&index=1&ageconsent=ok&warning=5'
|
||||
else:
|
||||
addurl='&index=1'
|
||||
|
||||
# index=1 makes sure we see the story chapter index. Some
|
||||
# sites skip that for one-chapter stories.
|
||||
url = self.url+addurl
|
||||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
except urllib2.HTTPError, e:
|
||||
if e.code == 404:
|
||||
raise exceptions.StoryDoesNotExist(self.url)
|
||||
else:
|
||||
raise e
|
||||
|
||||
if self.needToLoginCheck(data):
|
||||
# need to log in for this one.
|
||||
self.performLogin(url)
|
||||
data = self._fetchUrl(url)
|
||||
|
||||
warning = self.getWarning(data)
|
||||
if warning != False:
|
||||
data = self._fetchUrl(url+warning)
|
||||
|
||||
if "Access denied. This story has not been validated by the adminstrators of this site." in data:
|
||||
raise exceptions.AccessDenied(self.getSiteDomain() +" says: Access denied. This story has not been validated by the adminstrators of this site.")
|
||||
elif "This story contains adult subject matter that may include coarse language, violence, and mild sexual content of a graphical nature. Reader discretion is requested. Thank you." in data:
|
||||
raise exceptions.AccessDenied(self.getSiteDomain()+" says: This story contains adult subject matter that may include coarse language, violence, and mild sexual content of a graphical nature. Reader discretion is requested. Thank you.")
|
||||
elif "This story contains graphical material of an adult nature and a same sex primary relationship. Please do not read if this is not to your taste. Thank you." in data:
|
||||
raise exceptions.AccessDenied(self.getSiteDomain()+" says: This story contains graphical material of an adult nature and a same sex primary relationship. Please do not read if this is not to your taste. Thank you.")
|
||||
|
||||
# use BeautifulSoup HTML parser to make everything easier to find.
|
||||
soup = self.make_soup(data)
|
||||
# print data
|
||||
|
||||
# Now go hunting for all the meta data and the chapter list.
|
||||
|
||||
## Title and Author Div
|
||||
div = soup.find('div',{'id':'pagetitle'})
|
||||
## Title
|
||||
a = div.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$"))
|
||||
self.story.setMetadata('title',stripHTML(a))
|
||||
|
||||
# Find authorid and URL from... author url.
|
||||
a = div.find('a', href=re.compile(r"viewuser.php\?uid=\d+"))
|
||||
self.story.setMetadata('authorId',a['href'].split('=')[1])
|
||||
self.story.setMetadata('authorUrl','http://'+self.host+'/'+a['href'])
|
||||
self.story.setMetadata('author',a.string)
|
||||
|
||||
# Find the chapters:
|
||||
for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"&chapter=\d+$")):
|
||||
# just in case there's tags, like <i> in chapter titles.
|
||||
self.chapterUrls.append((stripHTML(chapter),'http://'+self.host+'/'+chapter['href']))
|
||||
self.story.setMetadata('numChapters',len(self.chapterUrls))
|
||||
|
||||
# eFiction sites don't help us out a lot with their meta data
|
||||
# formating, so it's a little ugly.
|
||||
|
||||
# utility method
|
||||
def defaultGetattr(d,k):
|
||||
try:
|
||||
return d[k]
|
||||
except:
|
||||
return ""
|
||||
|
||||
# <span class="label">Rated:</span> NC-17<br /> etc
|
||||
labels = soup.findAll('span',{'class':'label'})
|
||||
for labelspan in labels:
|
||||
val = labelspan.nextSibling
|
||||
value = unicode('')
|
||||
while val and not 'label' in defaultGetattr(val,'class'):
|
||||
# print("val:%s"%val)
|
||||
if not isinstance(val,Comment):
|
||||
value += unicode(val)
|
||||
val = val.nextSibling
|
||||
label = labelspan.string
|
||||
# print("label:%s\nvalue:%s"%(label,value))
|
||||
|
||||
if 'Summary' in label:
|
||||
self.setDescription(url,value)
|
||||
|
||||
if 'Rated' in label:
|
||||
self.story.setMetadata('rating', stripHTML(value))
|
||||
|
||||
if 'Word count' in label:
|
||||
self.story.setMetadata('numWords', stripHTML(value))
|
||||
|
||||
if 'Categories' in label:
|
||||
cats = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=categories'))
|
||||
for cat in cats:
|
||||
self.story.addToList('category',cat.string)
|
||||
|
||||
if 'Characters' in label:
|
||||
chars = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=characters'))
|
||||
for char in chars:
|
||||
self.story.addToList('characters',char.string)
|
||||
|
||||
if 'Genre' in label:
|
||||
genres = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=1')) # XXX
|
||||
for genre in genres:
|
||||
self.story.addToList('genre',genre.string)
|
||||
|
||||
if 'Pairing' in label:
|
||||
ships = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=4'))
|
||||
for ship in ships:
|
||||
self.story.addToList('ships',ship.string)
|
||||
|
||||
if 'Warnings' in label:
|
||||
warnings = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=2')) # XXX
|
||||
for warning in warnings:
|
||||
self.story.addToList('warnings',warning.string)
|
||||
|
||||
if 'Completed' in label:
|
||||
if 'Yes' in stripHTML(value):
|
||||
self.story.setMetadata('status', 'Completed')
|
||||
else:
|
||||
self.story.setMetadata('status', 'In-Progress')
|
||||
|
||||
if 'Published' in label:
|
||||
self.story.setMetadata('datePublished', makeDate(stripHTML(value), self.dateformat))
|
||||
|
||||
if 'Updated' in label:
|
||||
self.story.setMetadata('dateUpdated', makeDate(stripHTML(value), self.dateformat))
|
||||
|
||||
try:
|
||||
# Find Series name from series URL.
|
||||
a = soup.find('a', href=re.compile(r"viewseries.php\?seriesid=\d+"))
|
||||
series_name = a.string
|
||||
series_url = 'http://'+self.host+'/'+a['href']
|
||||
|
||||
# use BeautifulSoup HTML parser to make everything easier to find.
|
||||
seriessoup = self.make_soup(self._fetchUrl(series_url))
|
||||
# can't use ^viewstory...$ in case of higher rated stories with javascript href.
|
||||
storyas = seriessoup.findAll('a', href=re.compile(r'viewstory.php\?sid=\d+'))
|
||||
i=1
|
||||
for a in storyas:
|
||||
# skip 'report this' and 'TOC' links
|
||||
if 'contact.php' not in a['href'] and 'index' not in a['href']:
|
||||
if a['href'] == ('viewstory.php?sid='+self.story.getMetadata('storyId')):
|
||||
self.setSeries(series_name, i)
|
||||
self.story.setMetadata('seriesUrl',series_url)
|
||||
break
|
||||
i+=1
|
||||
|
||||
except:
|
||||
# I find it hard to care if the series parsing fails
|
||||
pass
|
||||
|
||||
# grab the text for an individual chapter.
|
||||
def getChapterText(self, url):
|
||||
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
soup = self.make_soup(self._fetchUrl(url))
|
||||
|
||||
div = soup.find('div', {'id' : 'story'})
|
||||
|
||||
if None == div:
|
||||
raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url)
|
||||
|
||||
return self.utf8FromSoup(url,div)
|
||||
225
fanficfare/adapters/adapter_fictionalleyarchiveorg.py
Normal file
225
fanficfare/adapters/adapter_fictionalleyarchiveorg.py
Normal file
|
|
@ -0,0 +1,225 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2011 Fanficdownloader team, 2021 FanFicFare team
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
from __future__ import absolute_import
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import re
|
||||
from ..htmlcleanup import stripHTML
|
||||
from .. import exceptions as exceptions
|
||||
|
||||
from .base_adapter import BaseSiteAdapter, makeDate
|
||||
|
||||
class FictionAlleyArchiveOrgSiteAdapter(BaseSiteAdapter):
|
||||
|
||||
def __init__(self, config, url):
|
||||
BaseSiteAdapter.__init__(self, config, url)
|
||||
self.story.setMetadata('siteabbrev','fa')
|
||||
self.is_adult=False
|
||||
|
||||
# get storyId from url--url validation guarantees query correct
|
||||
m = re.match(self.getSiteURLPattern(),url)
|
||||
if m:
|
||||
# normalized story URL.
|
||||
url = "https://"+self.getSiteDomain()+"/authors/"+m.group('auth')+"/"+m.group('id')+".html"
|
||||
self._setURL(url)
|
||||
else:
|
||||
raise exceptions.InvalidStoryURL(url,
|
||||
self.getSiteDomain(),
|
||||
self.getSiteExampleURLs())
|
||||
# The date format will vary from site to site.
|
||||
# http://docs.python.org/library/datetime.html#strftime-strptime-behavior
|
||||
self.dateformat = "%m/%d/%Y"
|
||||
|
||||
def _setURL(self,url):
|
||||
# logger.debug("set URL:%s"%url)
|
||||
super(FictionAlleyArchiveOrgSiteAdapter, self)._setURL(url)
|
||||
m = re.match(self.getSiteURLPattern(),url)
|
||||
if m:
|
||||
self.story.setMetadata('authorId',m.group('auth'))
|
||||
self.story.setMetadata('storyId',m.group('id'))
|
||||
|
||||
@staticmethod
|
||||
def getSiteDomain():
|
||||
return 'www.fictionalley-archive.org'
|
||||
|
||||
@classmethod
|
||||
def getAcceptDomains(cls):
|
||||
return ['www.fictionalley-archive.org',
|
||||
'www.fictionalley.org']
|
||||
|
||||
@classmethod
|
||||
def getSiteExampleURLs(cls):
|
||||
return "https://"+cls.getSiteDomain()+"/authors/drt/DA.html https://"+cls.getSiteDomain()+"/authors/drt/JOTP01a.html"
|
||||
|
||||
@classmethod
|
||||
def getURLDomain(cls):
|
||||
return 'https://' + cls.getSiteDomain()
|
||||
|
||||
def getSiteURLPattern(self):
|
||||
# http://www.fictionalley-archive.org/authors/drt/DA.html
|
||||
# http://www.fictionalley-archive.org/authors/drt/JOTP01a.html
|
||||
return r"https?://www.fictionalley(-archive)?.org/authors/(?P<auth>[a-zA-Z0-9_]+)/(?P<id>[a-zA-Z0-9_]+)\.html"
|
||||
|
||||
def extractChapterUrlsAndMetadata(self):
|
||||
|
||||
## could be either chapter list page or one-shot text page.
|
||||
logger.debug("URL: "+self.url)
|
||||
|
||||
(data,rurl) = self.get_request_redirected(self.url)
|
||||
if rurl != self.url:
|
||||
self._setURL(rurl)
|
||||
logger.debug("set to redirected url:%s"%self.url)
|
||||
soup = self.make_soup(data)
|
||||
|
||||
# If chapter list page, get the first chapter to look for adult check
|
||||
chapterlinklist = soup.select('h5.mb-1 > a')
|
||||
# logger.debug(chapterlinklist)
|
||||
|
||||
if not chapterlinklist:
|
||||
# no chapter list, it's either a chapter URL or a single chapter story
|
||||
# <nav aria-label="Chapter Navigation">
|
||||
# <a class="page-link" href="/authors/mz_xxo/HPATOTFI.html">Index</a>
|
||||
storya = soup.select_one('nav[aria-label="Chapter Navigation"] a')
|
||||
# logger.debug(storya)
|
||||
if storya:
|
||||
## multi chapter story
|
||||
self._setURL(self.getURLDomain()+storya['href'])
|
||||
logger.debug("Normalizing to URL: "+self.url)
|
||||
# ## title's right there...
|
||||
# self.story.setMetadata('title',stripHTML(storya))
|
||||
data = self.get_request(self.url)
|
||||
soup = self.make_soup(data)
|
||||
chapterlinklist = soup.select('h5.mb-1 > a')
|
||||
# logger.debug(chapterlinklist)
|
||||
else:
|
||||
## single chapter story.
|
||||
# logger.debug("Single chapter story")
|
||||
pass
|
||||
|
||||
self.story.setMetadata('title',stripHTML(soup.select_one('h1')))
|
||||
|
||||
## authorid already set.
|
||||
## <h1 class="title" align="center">Just Off The Platform II by <a href="http://www.fictionalley.org/authors/drt/">DrT</a></h1>
|
||||
authora=soup.select_one('h1 + h3 > a')
|
||||
self.story.setMetadata('author',stripHTML(authora))
|
||||
self.story.setMetadata('authorUrl',self.getURLDomain()+authora['href'])
|
||||
|
||||
if chapterlinklist:
|
||||
# Find the chapters:
|
||||
for chapter in chapterlinklist:
|
||||
listitem = chapter.parent.parent.parent
|
||||
# logger.debug(listitem)
|
||||
# date
|
||||
date = stripHTML(listitem.select_one('small.text-nowrap'))
|
||||
chapterDate = makeDate(date,self.dateformat)
|
||||
wordshits = listitem.select('span.font-weight-normal')
|
||||
chap_data = {
|
||||
'date':chapterDate.strftime(self.getConfig("datechapter_format",self.getConfig("datePublished_format","%Y-%m-%d"))),
|
||||
'words':stripHTML(wordshits[0]),
|
||||
'hits':stripHTML(wordshits[1]),
|
||||
'summary':stripHTML(listitem.select_one('p.my-2')),
|
||||
}
|
||||
# logger.debug(chap_data)
|
||||
self.add_chapter(chapter,self.getURLDomain()+chapter['href'], chap_data)
|
||||
else:
|
||||
self.add_chapter(self.story.getMetadata('title'),self.url)
|
||||
|
||||
cardbody = soup.select_one('div.card-body')
|
||||
|
||||
searchs_to_meta = (
|
||||
# sitetype, ffftype, islist
|
||||
('Rating', 'rating', False),
|
||||
('House', 'house', True),
|
||||
('Character', 'characters', True),
|
||||
('Genre', 'genre', True),
|
||||
('Era', 'era', True),
|
||||
('Spoiler', 'spoilers', True),
|
||||
('Ship', 'ships', True),
|
||||
)
|
||||
for (sitetype,ffftype, islist) in searchs_to_meta:
|
||||
# logger.debug((sitetype,ffftype, islist))
|
||||
tags = cardbody.select('a[href^="/stories?Include.%s"]'%sitetype)
|
||||
# logger.debug(tags)
|
||||
if tags:
|
||||
if islist:
|
||||
self.story.extendList(ffftype, [ stripHTML(a) for a in tags ])
|
||||
else:
|
||||
self.story.setMetadata(ffftype, stripHTML(tags[0]))
|
||||
|
||||
|
||||
# Published: 09/26/2003 Updated: 04/13/2004 Words: 14,268 Chapters: 5 Hits: 743
|
||||
badgeinfos = cardbody.select('div.badge-info')
|
||||
# logger.debug(badgeinfos)
|
||||
for badge in badgeinfos:
|
||||
txt = stripHTML(badge)
|
||||
(key,val)=txt.split(':')
|
||||
# logger.debug((key,val))
|
||||
if key in ( 'Published', 'Updated'):
|
||||
date = makeDate(val,self.dateformat)
|
||||
self.story.setMetadata('date'+key,date)
|
||||
elif key in ('Hits'):
|
||||
self.story.setMetadata(key.lower(),val)
|
||||
elif key == 'Words':
|
||||
self.story.setMetadata('numWords',val)
|
||||
|
||||
summary = soup.find('dt',string='Story Summary:')
|
||||
if summary:
|
||||
summary = summary.find_next_sibling('dd')
|
||||
summary.name='div'
|
||||
self.setDescription(self.url,summary)
|
||||
|
||||
return
|
||||
|
||||
def getChapterText(self, url):
|
||||
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
data = self.get_request(url)
|
||||
soup = self.make_soup(data)
|
||||
|
||||
# this may be a brittle way to get the chapter text.
|
||||
# Site doesn't give a lot of hints.
|
||||
chaptext = soup.select_one('main#content div:not([class])')
|
||||
|
||||
# not sure how, but we can get html, etc tags still in some
|
||||
# stories. That breaks later updates because it confuses
|
||||
# epubutils.py
|
||||
# Yes, this still applies to fictionalley-archive.
|
||||
|
||||
for tag in chaptext.find_all('head') + chaptext.find_all('meta') + chaptext.find_all('script'):
|
||||
tag.extract()
|
||||
|
||||
for tag in chaptext.find_all('body') + chaptext.find_all('html'):
|
||||
tag.name = 'div'
|
||||
|
||||
if self.getConfig('include_author_notes'):
|
||||
row = chaptext.find_previous_sibling('div',class_='row')
|
||||
logger.debug(row)
|
||||
andt = row.find('dt',string="Author's Note:")
|
||||
logger.debug(andt)
|
||||
if andt:
|
||||
chaptext.insert(0,andt.parent.extract())
|
||||
# post notes aren't as structured(?)
|
||||
for div in chaptext.find_next_siblings('div',class_='row'):
|
||||
chaptext.append(div.extract())
|
||||
|
||||
# logger.debug(chaptext)
|
||||
return self.utf8FromSoup(url,chaptext)
|
||||
|
||||
def getClass():
|
||||
return FictionAlleyArchiveOrgSiteAdapter
|
||||
|
|
@ -1,244 +0,0 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2011 Fanficdownloader team, 2015 FanFicFare team
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
import time
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import re
|
||||
import urllib
|
||||
import urllib2
|
||||
|
||||
|
||||
from ..htmlcleanup import stripHTML
|
||||
from .. import exceptions as exceptions
|
||||
|
||||
from base_adapter import BaseSiteAdapter, makeDate
|
||||
|
||||
class FictionAlleyOrgSiteAdapter(BaseSiteAdapter):
|
||||
|
||||
def __init__(self, config, url):
|
||||
BaseSiteAdapter.__init__(self, config, url)
|
||||
self.story.setMetadata('siteabbrev','fa')
|
||||
self.decode = ["Windows-1252",
|
||||
"utf8"] # 1252 is a superset of iso-8859-1.
|
||||
# Most sites that claim to be
|
||||
# iso-8859-1 (and some that claim to be
|
||||
# utf8) are really windows-1252.
|
||||
self.is_adult=False
|
||||
|
||||
# get storyId from url--url validation guarantees query correct
|
||||
m = re.match(self.getSiteURLPattern(),url)
|
||||
if m:
|
||||
self.story.setMetadata('authorId',m.group('auth'))
|
||||
self.story.setMetadata('storyId',m.group('id'))
|
||||
|
||||
# normalized story URL.
|
||||
self._setURL(url)
|
||||
else:
|
||||
raise exceptions.InvalidStoryURL(url,
|
||||
self.getSiteDomain(),
|
||||
self.getSiteExampleURLs())
|
||||
|
||||
@staticmethod
|
||||
def getSiteDomain():
|
||||
return 'www.fictionalley.org'
|
||||
|
||||
@classmethod
|
||||
def getSiteExampleURLs(cls):
|
||||
return "http://"+cls.getSiteDomain()+"/authors/drt/DA.html http://"+cls.getSiteDomain()+"/authors/drt/JOTP01a.html"
|
||||
|
||||
def getSiteURLPattern(self):
|
||||
# http://www.fictionalley.org/authors/drt/DA.html
|
||||
# http://www.fictionalley.org/authors/drt/JOTP01a.html
|
||||
return re.escape("http://"+self.getSiteDomain())+"/authors/(?P<auth>[a-zA-Z0-9_]+)/(?P<id>[a-zA-Z0-9_]+)\.html"
|
||||
|
||||
def _postFetchWithIAmOld(self,url):
|
||||
if self.is_adult or self.getConfig("is_adult"):
|
||||
params={'iamold':'Yes',
|
||||
'action':'ageanswer'}
|
||||
logger.info("Attempting to get cookie for %s" % url)
|
||||
## posting on list doesn't work, but doesn't hurt, either.
|
||||
data = self._postUrl(url,params)
|
||||
else:
|
||||
data = self._fetchUrl(url)
|
||||
return data
|
||||
|
||||
def extractChapterUrlsAndMetadata(self):
|
||||
|
||||
## could be either chapter list page or one-shot text page.
|
||||
url = self.url
|
||||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._postFetchWithIAmOld(url)
|
||||
except urllib2.HTTPError, e:
|
||||
if e.code == 404:
|
||||
raise exceptions.StoryDoesNotExist(self.url)
|
||||
else:
|
||||
raise e
|
||||
|
||||
# use BeautifulSoup HTML parser to make everything easier to find.
|
||||
soup = self.make_soup(data)
|
||||
|
||||
chapterdata = data
|
||||
# If chapter list page, get the first chapter to look for adult check
|
||||
chapterlinklist = soup.findAll('a',{'class':'chapterlink'})
|
||||
if chapterlinklist:
|
||||
chapterdata = self._postFetchWithIAmOld(chapterlinklist[0]['href'])
|
||||
|
||||
if "Are you over seventeen years old" in chapterdata:
|
||||
raise exceptions.AdultCheckRequired(self.url)
|
||||
|
||||
if not chapterlinklist:
|
||||
# no chapter list, chapter URL: change to list link.
|
||||
# second a tag inside div breadcrumbs
|
||||
storya = soup.find('div',{'class':'breadcrumbs'}).findAll('a')[1]
|
||||
self._setURL(storya['href'])
|
||||
url=self.url
|
||||
logger.debug("Normalizing to URL: "+url)
|
||||
## title's right there...
|
||||
self.story.setMetadata('title',stripHTML(storya))
|
||||
data = self._fetchUrl(url)
|
||||
soup = self.make_soup(data)
|
||||
chapterlinklist = soup.findAll('a',{'class':'chapterlink'})
|
||||
else:
|
||||
## still need title from somewhere. If chapterlinklist,
|
||||
## then chapterdata contains a chapter, find title the
|
||||
## same way.
|
||||
chapsoup = self.make_soup(chapterdata)
|
||||
storya = chapsoup.find('div',{'class':'breadcrumbs'}).findAll('a')[1]
|
||||
self.story.setMetadata('title',stripHTML(storya))
|
||||
del chapsoup
|
||||
|
||||
del chapterdata
|
||||
|
||||
## authorid already set.
|
||||
## <h1 class="title" align="center">Just Off The Platform II by <a href="http://www.fictionalley.org/authors/drt/">DrT</a></h1>
|
||||
authora=soup.find('h1',{'class':'title'}).find('a')
|
||||
self.story.setMetadata('author',authora.string)
|
||||
self.story.setMetadata('authorUrl',authora['href'])
|
||||
|
||||
if len(chapterlinklist) == 1:
|
||||
self.chapterUrls.append((self.story.getMetadata('title'),chapterlinklist[0]['href']))
|
||||
else:
|
||||
# Find the chapters:
|
||||
for chapter in chapterlinklist:
|
||||
# just in case there's tags, like <i> in chapter titles.
|
||||
self.chapterUrls.append((stripHTML(chapter),chapter['href']))
|
||||
|
||||
self.story.setMetadata('numChapters',len(self.chapterUrls))
|
||||
|
||||
## Go scrape the rest of the metadata from the author's page.
|
||||
data = self._fetchUrl(self.story.getMetadata('authorUrl'))
|
||||
soup = self.make_soup(data)
|
||||
|
||||
# <dl><dt><a class = "Rid story" href = "http://www.fictionalley.org/authors/aafro_man_ziegod/TMH.html">
|
||||
# [Rid] The Magical Hottiez</a> by <a class = "pen_name" href = "http://www.fictionalley.org/authors/aafro_man_ziegod/">Aafro Man Ziegod</a> </small></dt>
|
||||
# <dd><small class = "storyinfo"><a href = "http://www.fictionalley.org/ratings.html" target = "_new">Rating:</a> PG-13 - Spoilers: PS/SS, CoS, PoA, GoF, QTTA, FB - 4264 hits - 5060 words<br />
|
||||
# Genre: Humor, Romance - Main character(s): None - Ships: None - Era: Multiple Eras<br /></small>
|
||||
# Chaos ensues after Witch Weekly, seeking to increase readers, decides to create a boyband out of five seemingly talentless wizards: Harry Potter, Draco Malfoy, Ron Weasley, Neville Longbottom, and Oliver "Toss Your Knickers Here" Wood.<br />
|
||||
# <small class = "storyinfo">Published: June 3, 2002 (between Goblet of Fire and Order of Phoenix) - Updated: June 3, 2002</small>
|
||||
# </dd></dl>
|
||||
|
||||
storya = soup.find('a',{'href':self.story.getMetadata('storyUrl')})
|
||||
storydd = storya.findNext('dd')
|
||||
|
||||
# Rating: PG - Spoilers: None - 2525 hits - 736 words
|
||||
# Genre: Humor - Main character(s): H, R - Ships: None - Era: Multiple Eras
|
||||
# Harry and Ron are back at it again! They reeeeeeally don't want to be back, because they know what's awaiting them. "VH1 Goes Inside..." is back! Why? 'Cos there are soooo many more couples left to pick on.
|
||||
# Published: September 25, 2004 (between Order of Phoenix and Half-Blood Prince) - Updated: September 25, 2004
|
||||
|
||||
## change to text and regexp find.
|
||||
metastr = stripHTML(storydd).replace('\n',' ').replace('\t',' ')
|
||||
|
||||
m = re.match(r".*?Rating: (.+?) -.*?",metastr)
|
||||
if m:
|
||||
self.story.setMetadata('rating', m.group(1))
|
||||
|
||||
m = re.match(r".*?Genre: (.+?) -.*?",metastr)
|
||||
if m:
|
||||
for g in m.group(1).split(','):
|
||||
self.story.addToList('genre',g)
|
||||
|
||||
m = re.match(r".*?Published: ([a-zA-Z]+ \d\d?, \d\d\d\d).*?",metastr)
|
||||
if m:
|
||||
self.story.setMetadata('datePublished',makeDate(m.group(1), "%B %d, %Y"))
|
||||
|
||||
m = re.match(r".*?Updated: ([a-zA-Z]+ \d\d?, \d\d\d\d).*?",metastr)
|
||||
if m:
|
||||
self.story.setMetadata('dateUpdated',makeDate(m.group(1), "%B %d, %Y"))
|
||||
|
||||
m = re.match(r".*? (\d+) words Genre.*?",metastr)
|
||||
if m:
|
||||
self.story.setMetadata('numWords', m.group(1))
|
||||
|
||||
for small in storydd.findAll('small'):
|
||||
small.extract() ## removes the <small> tags, leaving only the summary.
|
||||
storydd.name = 'div' ## change tag name else Calibre treats it oddly.
|
||||
self.setDescription(url,storydd)
|
||||
#self.story.setMetadata('description',stripHTML(storydd))
|
||||
|
||||
return
|
||||
|
||||
def getChapterText(self, url):
|
||||
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
data = self._fetchUrl(url)
|
||||
# find <!-- headerend --> & <!-- footerstart --> and
|
||||
# replaced with matching div pair for easier parsing.
|
||||
# Yes, it's an evil kludge, but what can ya do? Using
|
||||
# something other than div prevents soup from pairing
|
||||
# our div with poor html inside the story text.
|
||||
crazy = "crazytagstringnobodywouldstumbleonaccidently"
|
||||
data = data.replace('<!-- headerend -->','<'+crazy+' id="storytext">').replace('<!-- footerstart -->','</'+crazy+'>')
|
||||
|
||||
# problems with some stories confusing Soup. This is a nasty
|
||||
# hack, but it works.
|
||||
data = data[data.index('<'+crazy+''):]
|
||||
# ditto with extra crap at the end.
|
||||
data = data[:data.index('</'+crazy+'>')+len('</'+crazy+'>')]
|
||||
|
||||
soup = self.make_soup(data)
|
||||
body = soup.findAll('body') ## some stories use a nested body and body
|
||||
## tag, in which case we don't
|
||||
## need crazytagstringnobodywouldstumbleonaccidently
|
||||
## and use the second one instead.
|
||||
if len(body)>1:
|
||||
text = body[1]
|
||||
text.name='div' # force to be a div to avoid multiple body tags.
|
||||
else:
|
||||
text = soup.find(crazy, {'id' : 'storytext'})
|
||||
text.name='div' # change to div tag.
|
||||
|
||||
if not data or not text:
|
||||
raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url)
|
||||
|
||||
# not sure how, but we can get html, etc tags still in some
|
||||
# stories. That breaks later updates because it confuses
|
||||
# epubutils.py
|
||||
for tag in text.findAll('head'):
|
||||
tag.extract()
|
||||
|
||||
for tag in text.findAll('body') + text.findAll('html'):
|
||||
tag.name = 'div'
|
||||
|
||||
return self.utf8FromSoup(url,text)
|
||||
|
||||
def getClass():
|
||||
return FictionAlleyOrgSiteAdapter
|
||||
|
||||
|
|
@ -1,6 +1,6 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2016 FanFicFare team
|
||||
# Copyright 2022 FanFicFare team
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
|
|
@ -15,15 +15,103 @@
|
|||
# limitations under the License.
|
||||
#
|
||||
|
||||
from __future__ import absolute_import
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import re
|
||||
import urllib2
|
||||
|
||||
from .. import exceptions as exceptions
|
||||
from ..htmlcleanup import stripHTML
|
||||
|
||||
from base_adapter import BaseSiteAdapter, makeDate
|
||||
# py2 vs py3 transition
|
||||
|
||||
from .base_adapter import BaseSiteAdapter, makeDate
|
||||
|
||||
ampfandoms = ["A Falcone & Driscoll Investigation",
|
||||
"Alias Smith & Jones",
|
||||
"Atelier Escha & Logy",
|
||||
"Austin & Ally",
|
||||
"Baby & Me/赤ちゃんと僕",
|
||||
"Barney & Friends",
|
||||
"Between Love & Goodbye",
|
||||
"Beyond Good & Evil",
|
||||
"Bill & Ted's Excellent Adventure/Bogus Journey",
|
||||
"BLACK & WHITE",
|
||||
"Bonnie & Clyde",
|
||||
"Brandy & Mr. Whiskers",
|
||||
"Brothers & Sisters",
|
||||
"Bucket & Skinner's Epic Adventures",
|
||||
"Calvin & Hobbes",
|
||||
"Cats & Dogs",
|
||||
"Command & Conquer",
|
||||
"Devil & Devil",
|
||||
"Dharma & Greg",
|
||||
"Dicky & Dawn",
|
||||
"Drake & Josh",
|
||||
"Edgar & Ellen",
|
||||
"Franklin & Bash",
|
||||
"Gabby Duran & The Unsittables",
|
||||
"Girls und Panzer/ガールズ&パンツァー",
|
||||
"Gnomeo & Juliet",
|
||||
"Grim Adventures of Billy & Mandy",
|
||||
"Half & Half/ハーフ・アンド・ハーフ",
|
||||
"Hansel & Gretel",
|
||||
"Hatfields & McCoys",
|
||||
"High & Low - The Story of S.W.O.R.D.",
|
||||
"Home & Away",
|
||||
"Hudson & Rex",
|
||||
"Huntik: Secrets & Seekers",
|
||||
"Imagine Me & You",
|
||||
"Jekyll & Hyde",
|
||||
"Jonathan Strange & Mr. Norrell",
|
||||
"Knight's & Magic/ナイツ&マジック",
|
||||
"Law & Order: Los Angeles",
|
||||
"Law & Order: Organized Crime",
|
||||
"Lilo & Stitch",
|
||||
"Locke & Key",
|
||||
"Lockwood & Co.",
|
||||
"Lost & Found Music Studios",
|
||||
"Lu & Og",
|
||||
"Me & My Brothers",
|
||||
"Melissa & Joey",
|
||||
"Mickey Mouse & Friends",
|
||||
"Mike & Molly",
|
||||
"Mike, Lu & Og",
|
||||
"Miraculous: Tales of Ladybug & Cat Noir",
|
||||
"Mork & Mindy",
|
||||
"Mount&Blade",
|
||||
"Mr. & Mrs. Smith",
|
||||
"Mr. Peabody & Sherman",
|
||||
"Muhyo & Roji",
|
||||
"Nicky, Ricky, Dicky & Dawn",
|
||||
"Oliver & Company",
|
||||
"Ozzy & Drix",
|
||||
"Panty & Stocking with Garterbelt/パンティ&ストッキングwithガーターベルト",
|
||||
"Penryn & the End of Days",
|
||||
"Prep & Landing",
|
||||
"Prince & Hero/王子とヒーロー",
|
||||
"Prince & Me",
|
||||
"Puzzle & Dragons",
|
||||
"Ren & Stimpy Show",
|
||||
"Rizzoli & Isles",
|
||||
"Romeo & Juliet",
|
||||
"Rosemary & Thyme",
|
||||
"Sam & Cat",
|
||||
"Sam & Max",
|
||||
"Sapphire & Steel",
|
||||
"Scott & Bailey",
|
||||
"Shakespeare & Hathaway: Private Investigators",
|
||||
"Soul Nomad & the World Eaters",
|
||||
"Superman & Lois",
|
||||
"Tiger & Bunny/タイガー&バニー",
|
||||
"Trains & Automobiles",
|
||||
"Upin & Ipin",
|
||||
"Wallace & Gromit",
|
||||
"Witch & Wizard",
|
||||
"Wolverine & the X-Men",
|
||||
"Yotsuba&!/よつばと!",
|
||||
"Young & Hungry",
|
||||
]
|
||||
|
||||
|
||||
class FictionHuntComSiteAdapter(BaseSiteAdapter):
|
||||
|
||||
|
|
@ -31,16 +119,32 @@ class FictionHuntComSiteAdapter(BaseSiteAdapter):
|
|||
BaseSiteAdapter.__init__(self, config, url)
|
||||
self.story.setMetadata('siteabbrev','fichunt')
|
||||
|
||||
# get storyId from url--url validation guarantees second part is storyId
|
||||
self.story.setMetadata('storyId',self.parsedUrl.path.split('/',)[2])
|
||||
|
||||
# normalized story URL.
|
||||
self._setURL("http://"+self.getSiteDomain()\
|
||||
+"/read/"+self.story.getMetadata('storyId')+"/1")
|
||||
## new types:
|
||||
## https://fictionhunt.com/stories/7edm248/the-last-of-his-kind/chapters/1
|
||||
## https://fictionhunt.com/stories/89kzg4z/the-last-of-his-kind-new
|
||||
## old type:
|
||||
## http://fictionhunt.com/read/12411643/1
|
||||
# get storyId from url--url validation guarantees query correct
|
||||
m = re.match(self.getSiteURLPattern(),url)
|
||||
if m:
|
||||
# logger.debug(m.groupdict())
|
||||
self.story.setMetadata('storyId',m.group('id'))
|
||||
if m.group('type') == "stories": # newer URL
|
||||
# normalized story URL.
|
||||
self._setURL("https://"+self.getSiteDomain()\
|
||||
+"/stories/"+self.story.getMetadata('storyId')+"/"+ (m.group('title') or ""))
|
||||
else:
|
||||
self._setURL("https://"+self.getSiteDomain()\
|
||||
+"/read/"+self.story.getMetadata('storyId')+"/1")
|
||||
# logger.debug(self.url)
|
||||
else:
|
||||
raise exceptions.InvalidStoryURL(url,
|
||||
self.getSiteDomain(),
|
||||
self.getSiteExampleURLs())
|
||||
|
||||
# The date format will vary from site to site.
|
||||
# http://docs.python.org/library/datetime.html#strftime-strptime-behavior
|
||||
self.dateformat = "%d-%m-%Y"
|
||||
self.dateformat = "%Y-%m-%d %H:%M:%S"
|
||||
|
||||
@staticmethod
|
||||
def getSiteDomain():
|
||||
|
|
@ -48,17 +152,55 @@ class FictionHuntComSiteAdapter(BaseSiteAdapter):
|
|||
|
||||
@classmethod
|
||||
def getSiteExampleURLs(cls):
|
||||
return "http://fictionhunt.com/read/1234/1"
|
||||
return "https://fictionhunt.com/stories/1a1a1a/story-title http://fictionhunt.com/read/1234/1"
|
||||
|
||||
def getSiteURLPattern(self):
|
||||
return r"http://(www.)?fictionhunt.com/read/\d+(/\d+)?(/|/[^/]+)?/?$"
|
||||
## https://fictionhunt.com/stories/7edm248/the-last-of-his-kind/chapters/1
|
||||
## https://fictionhunt.com/stories/89kzg4z/the-last-of-his-kind-new
|
||||
## http://fictionhunt.com/read/12411643/1
|
||||
return r"https?://(www.)?fictionhunt.com/(?P<type>read|stories)/(?P<id>[0-9a-z]+)(/(?P<title>[^/]+))?(/|/[^/]+)*/?$"
|
||||
|
||||
def use_pagecache(self):
|
||||
'''
|
||||
adapters that will work with the page cache need to implement
|
||||
this and change it to True.
|
||||
'''
|
||||
return True
|
||||
def needToLoginCheck(self, data):
|
||||
## FH is apparently reporting "Story has been removed" for all
|
||||
## chapters when not logged in now.
|
||||
if 'https://fictionhunt.com/login' in data:
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
|
||||
def performLogin(self, url):
|
||||
params = {}
|
||||
|
||||
if self.password:
|
||||
params['identifier'] = self.username
|
||||
params['password'] = self.password
|
||||
else:
|
||||
params['identifier'] = self.getConfig("username")
|
||||
params['password'] = self.getConfig("password")
|
||||
params['remember'] = 'on'
|
||||
|
||||
loginUrl = 'https://' + self.getSiteDomain() + '/login'
|
||||
|
||||
if not params['identifier']:
|
||||
logger.info("This site requires login.")
|
||||
raise exceptions.FailedToLogin(url,params['identifier'])
|
||||
|
||||
## need to pull empty login page first to get authenticity_token
|
||||
logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
|
||||
params['identifier']))
|
||||
soup = self.make_soup(self.get_request(loginUrl,usecache=False))
|
||||
params['_token']=soup.find('input', {'name':'_token'})['value']
|
||||
|
||||
d = self.post_request(loginUrl, params, usecache=False)
|
||||
# logger.debug(d)
|
||||
|
||||
if self.needToLoginCheck(d):
|
||||
logger.info("Failed to login to URL %s as %s" % (loginUrl,
|
||||
params['identifier']))
|
||||
raise exceptions.FailedToLogin(url,params['identifier'])
|
||||
return False
|
||||
else:
|
||||
return True
|
||||
|
||||
def doExtractChapterUrlsAndMetadata(self,get_cover=True):
|
||||
|
||||
|
|
@ -66,80 +208,132 @@ class FictionHuntComSiteAdapter(BaseSiteAdapter):
|
|||
# metadata and chapter list
|
||||
|
||||
url = self.url
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
except urllib2.HTTPError, e:
|
||||
if e.code == 404:
|
||||
raise exceptions.StoryDoesNotExist(self.meta)
|
||||
else:
|
||||
raise e
|
||||
data = self.get_request(url)
|
||||
|
||||
## As per #784, site isn't requiring login anymore.
|
||||
## Login check commented since we've seen it toggle before.
|
||||
# if self.needToLoginCheck(data):
|
||||
# self.performLogin(url)
|
||||
# data = self.get_request(url,usecache=False)
|
||||
|
||||
# use BeautifulSoup HTML parser to make everything easier to find.
|
||||
soup = self.make_soup(data)
|
||||
## detect old storyUrl, switch to new storyUrl:
|
||||
canonlink = soup.find('link',rel='canonical')
|
||||
if canonlink:
|
||||
# logger.debug(canonlink)
|
||||
canonlink = re.sub(r"/chapters/\d+","",canonlink['href'])
|
||||
# logger.debug(canonlink)
|
||||
self._setURL(canonlink)
|
||||
url = self.url
|
||||
data = self.get_request(url)
|
||||
soup = self.make_soup(data)
|
||||
else:
|
||||
# in case title changed
|
||||
self._setURL(soup.select_one("div.Story__details a")['href'])
|
||||
url = self.url
|
||||
|
||||
self.story.setMetadata('title',stripHTML(soup.find('div',{'class':'title'})).strip())
|
||||
# logger.debug(data)
|
||||
self.story.setMetadata('title',stripHTML(soup.find('h1',{'class':'Story__title'})))
|
||||
|
||||
self.setDescription(url,'<i>(Story descriptions not available on fictionhunt.com)</i>')
|
||||
summhead = soup.find('h5',string='Summary')
|
||||
self.setDescription(url,summhead.find_next('div'))
|
||||
|
||||
# Find authorid and URL from... author url.
|
||||
# fictionhunt doesn't have author pages, use ffnet original author link.
|
||||
a = soup.find('a', href=re.compile(r"fanfiction.net/u/\d+"))
|
||||
self.story.setMetadata('authorId',a['href'].split('/')[-1])
|
||||
self.story.setMetadata('authorUrl','https://www.fanfiction.net/u/'+self.story.getMetadata('authorId'))
|
||||
self.story.setMetadata('author',a.string)
|
||||
## author:
|
||||
autha = soup.find('div',{'class':'StoryContents__meta'}).find('a') # first a in StoryContents__meta
|
||||
self.story.setMetadata('authorId',autha['href'].split('/')[4])
|
||||
self.story.setMetadata('authorUrl',autha['href'])
|
||||
self.story.setMetadata('author',autha.string)
|
||||
|
||||
updlab = soup.find('label',string='Last Updated:')
|
||||
if updlab:
|
||||
update = updlab.find_next('time')['datetime']
|
||||
self.story.setMetadata('dateUpdated', makeDate(update, self.dateformat))
|
||||
|
||||
publab = soup.find('label',string='Published:')
|
||||
if publab:
|
||||
pubdate = publab.find_next('time')['datetime']
|
||||
self.story.setMetadata('datePublished', makeDate(pubdate, self.dateformat))
|
||||
|
||||
## need author page for some metadata.
|
||||
authsoup = None
|
||||
authpagea = autha
|
||||
authstorya = None
|
||||
|
||||
## Rating and exact word count doesn't appear on the summary
|
||||
## page, try to get from author page.
|
||||
|
||||
## find story url, might need to spin through author's pages.
|
||||
while authpagea and not authstorya:
|
||||
authsoup = self.make_soup(self.get_request(authpagea['href']))
|
||||
authpagea = authsoup.find('a',{'rel':'next'})
|
||||
# CSS selectors don't allow : or / unquoted, which
|
||||
# BS4(and dependencies) didn't used to enforce.
|
||||
authstorya = authsoup.select_one('h4.Story__item-title a[href="%s"]'%self.url)
|
||||
|
||||
if not authstorya:
|
||||
raise exceptions.FailedToDownload("Error finding %s on author page(s)" % self.url)
|
||||
|
||||
meta = authstorya.find_parent('li').find('div',class_='Story__meta-info')
|
||||
meta=meta.text.split()
|
||||
self.story.setMetadata('numWords',meta[meta.index('words')-1])
|
||||
self.story.setMetadata('rating',meta[meta.index('Rating:')+1])
|
||||
# logger.debug(meta)
|
||||
|
||||
# Find original ffnet URL
|
||||
a = soup.find('a', href=re.compile(r"fanfiction.net/s/\d+"))
|
||||
a = soup.find('a', string="Source")
|
||||
self.story.setMetadata('origin',stripHTML(a))
|
||||
self.story.setMetadata('originUrl',a['href'])
|
||||
|
||||
# Fleur D. & Harry P. & Hermione G. & Susan B. - Words: 42,848 - Rated: M - English - None - Chapters: 9 - Reviews: 248 - Updated: 21-09-2016 - Published: 16-05-2015 - by Elven Sorcerer (FFN)
|
||||
# None - Words: 13,087 - Rated: M - English - Romance & Supernatural - Chapters: 3 - Reviews: 5 - Updated: 21-09-2016 - Published: 20-09-2016
|
||||
# Harry P. & OC - Words: 10,910 - Rated: M - English - None - Chapters: 5 - Reviews: 6 - Updated: 21-09-2016 - Published: 11-09-2016
|
||||
# Dudley D. & Harry P. & Nagini & Vernon D. - Words: 4,328 - Rated: K+ - English - None - Chapters: 2 - Updated: 21-09-2016 - Published: 20-09-2016 -
|
||||
details = soup.find('div',{'class':'details'})
|
||||
|
||||
detail_re = \
|
||||
r'(?P<characters>.+) - Words: (?P<numWords>[0-9,]+) - Rated: (?P<rating>[a-zA-Z\\+]+) - (?P<language>.+) - (?P<genre>.+)'+ \
|
||||
r' - Chapters: (?P<numChapters>[0-9,]+)( - Reviews: (?P<reviews>[0-9,]+))? - Updated: (?P<dateUpdated>[0-9-]+)'+ \
|
||||
r' - Published: (?P<datePublished>[0-9-]+)(?P<completed> - Complete)?'
|
||||
|
||||
details_dict = re.match(detail_re,stripHTML(details)).groupdict()
|
||||
|
||||
# lists
|
||||
for meta in ('characters','genre'):
|
||||
if details_dict[meta] != 'None':
|
||||
self.story.extendList(meta,details_dict[meta].split(' & '))
|
||||
|
||||
# scalars
|
||||
for meta in ('numWords','numChapters','rating','language','reviews'):
|
||||
self.story.setMetadata(meta,details_dict[meta])
|
||||
|
||||
# dates
|
||||
for meta in ('datePublished','dateUpdated'):
|
||||
self.story.setMetadata(meta, makeDate(details_dict[meta], self.dateformat))
|
||||
|
||||
# status
|
||||
if details_dict['completed']:
|
||||
datesdiv = soup.find('div',{'class':'dates'})
|
||||
if stripHTML(datesdiv.find('label')) == 'Completed' : # first label is status.
|
||||
self.story.setMetadata('status', 'Completed')
|
||||
else:
|
||||
self.story.setMetadata('status', 'In-Progress')
|
||||
|
||||
# It's assumed that the number of chapters is correct.
|
||||
# There's no complete list of chapters, so the only
|
||||
# alternative is to get the num of chaps from the last
|
||||
# indiated chapter list instead.
|
||||
for i in range(1,1+int(self.story.getMetadata('numChapters'))):
|
||||
self.chapterUrls.append(("Chapter "+unicode(i),"http://"+self.getSiteDomain()\
|
||||
+"/read/"+self.story.getMetadata('storyId')+"/%s"%i))
|
||||
for a in soup.select("div.genres a"):
|
||||
self.story.addToList('genre',stripHTML(a))
|
||||
|
||||
for a in soup.select("section.characters li.Tags__item a"):
|
||||
self.story.addToList('characters',stripHTML(a))
|
||||
|
||||
for a in soup.select('a[href*="pairings="]'):
|
||||
self.story.addToList('ships',stripHTML(a).replace("+","/"))
|
||||
|
||||
for a in soup.select('div.Story__type a[href*="fandoms="]'):
|
||||
# logger.debug(a)
|
||||
fandomstr=stripHTML(a).replace(' Fanfiction','').strip()
|
||||
# logger.debug("'%s'"%fandomstr)
|
||||
## haven't thought of a better way to detect and *not*
|
||||
## split on fandoms with a '&' in them.
|
||||
for ampfandom in ampfandoms:
|
||||
if ampfandom in fandomstr:
|
||||
self.story.addToList('category',ampfandom)
|
||||
fandomstr = fandomstr.replace(ampfandom,'')
|
||||
for fandom in fandomstr.split('&'):
|
||||
if fandom:
|
||||
self.story.addToList('category',fandom)
|
||||
|
||||
## Currently no 'Original' stories on the site, but does list
|
||||
## it as a search type. Set extratags: and uncomment this if
|
||||
## and when.
|
||||
# if self.story.getList('category'):
|
||||
# self.story.addToList('category', 'FanFiction')
|
||||
# else:
|
||||
# self.story.addToList('category', 'Original')
|
||||
|
||||
for chapli in soup.select('ul.StoryContents__chapters li'):
|
||||
self.add_chapter(stripHTML(chapli.select_one('span.chapter-title')),chapli.select_one('a')['href'])
|
||||
|
||||
if self.num_chapters() == 0:
|
||||
raise exceptions.FailedToDownload("Story at %s has no chapters." % self.url)
|
||||
|
||||
def getChapterText(self, url):
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
data = self._fetchUrl(url)
|
||||
data = self.get_request(url)
|
||||
|
||||
soup = self.make_soup(data)
|
||||
|
||||
div = soup.find('div', {'class' : 'text'})
|
||||
div = soup.find('div', {'class' : 'StoryChapter__text'})
|
||||
|
||||
return self.utf8FromSoup(url,div)
|
||||
|
||||
|
|
|
|||
594
fanficfare/adapters/adapter_fictionlive.py
Normal file
594
fanficfare/adapters/adapter_fictionlive.py
Normal file
|
|
@ -0,0 +1,594 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2011 Fanficdownloader team, 2020 FanFicFare team
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
#### Hazel's fiction.live fanficfare adapter
|
||||
# what an *adventure* this was. fiction.live is an angular web3.0 app that does async background stuff everywhere.
|
||||
# they're not kidding about it being live.
|
||||
# can I wrangle it's stories into books for offline reading? yes I 98% can!
|
||||
|
||||
### won't support, because they aren't part of the text
|
||||
# chat, threads, chat replies on vote options
|
||||
|
||||
### can't support because wtf this is a book
|
||||
# music / audio embeds
|
||||
# per-user achivement tracking with fancy achievement-get animations
|
||||
# story scripting (shows script tags visible in the text, not computed values or input fields)
|
||||
|
||||
import re
|
||||
import json
|
||||
from datetime import datetime
|
||||
|
||||
import itertools
|
||||
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# __package__ = 'fanficfare.adapters' # fixes dev issues with unknown package base
|
||||
|
||||
from .base_adapter import BaseSiteAdapter
|
||||
from ..htmlcleanup import stripHTML
|
||||
from .. import exceptions as exceptions
|
||||
from ..six import ensure_text
|
||||
|
||||
def getClass():
|
||||
return FictionLiveAdapter
|
||||
|
||||
class FictionLiveAdapter(BaseSiteAdapter):
|
||||
|
||||
def __init__(self, config, url):
|
||||
BaseSiteAdapter.__init__(self, config, url)
|
||||
|
||||
self.story.setMetadata('siteabbrev','flive')
|
||||
self.story_id = self.parsedUrl.path.split('/')[3]
|
||||
self.story.setMetadata('storyId', self.story_id)
|
||||
|
||||
self.chapter_id_to_api = {}
|
||||
|
||||
# normalize URL. omits title in the url
|
||||
self._setURL("https://fiction.live/stories//{s_id}".format(s_id = self.story_id));
|
||||
|
||||
@staticmethod
|
||||
def getSiteDomain():
|
||||
return "fiction.live"
|
||||
|
||||
@classmethod
|
||||
def getAcceptDomains(cls):
|
||||
return ["fiction.live", "beta.fiction.live"] # I still remember anonkun, but the domain has now lapsed
|
||||
|
||||
def getSiteURLPattern(self):
|
||||
# I'd like to thank regex101.com for helping me screw this up less
|
||||
return r"https?://(beta\.)?fiction\.live/[^/]*/[^/]*/([a-zA-Z0-9\-]+)(/(home)?)?$"
|
||||
|
||||
@classmethod
|
||||
def getSiteExampleURLs(cls):
|
||||
return ("https://fiction.live/stories/Example-Story-Title/17CharacterIDhere/home "
|
||||
+"https://fiction.live/stories/Example-Story-With-Long-ID/-20CharacterIDisHere "
|
||||
+"https://fiction.live/Sci-fi/Example-Story-With-URL-Genre/17CharacterIDhere/ "
|
||||
+"https://fiction.live/stories/Example-Story-With-UUID/00000000-0000-4000-0000-000000000000/")
|
||||
|
||||
@classmethod
|
||||
def get_section_url(cls,url):
|
||||
## minimal URL used for section names in INI and reject list
|
||||
## for comparison
|
||||
# logger.debug("pre--url:%s"%url)
|
||||
url = re.sub(r"https?://(beta\.)?fiction\.live/[^/]*/[^/]*/(?P<id>[a-zA-Z0-9\-]+)(/(home)?)?$",r'https://fiction.live/stories//\g<id>',url)
|
||||
# logger.debug("post-url:%s"%url)
|
||||
return url
|
||||
|
||||
def parse_timestamp(self, timestamp):
|
||||
# fiction.live date format is unix-epoch milliseconds. not a good fit for fanficfare's makeDate.
|
||||
# doesn't use a timezone object and returns tz-naive datetimes. I *think* I can leave the rest to fanficfare
|
||||
return datetime.fromtimestamp(timestamp / 1000.0, None)
|
||||
|
||||
def img_url_trans(self,imgurl):
|
||||
"Apparently site changed cdn URLs for images more than once."
|
||||
# logger.debug("pre--imgurl:%s"%imgurl)
|
||||
imgurl = re.sub(r'(\w+)\.cloudfront\.net',r'cdn6.fiction.live/file/fictionlive',imgurl)
|
||||
imgurl = re.sub(r'www\.filepicker\.io/api/file/(\w+)',r'cdn4.fiction.live/fp/\1',imgurl)
|
||||
imgurl = re.sub(r'cdn[34].fiction.live/(.+)',r'cdn6.fiction.live/file/fictionlive/\1',imgurl)
|
||||
# logger.debug("post-imgurl:%s"%imgurl)
|
||||
return imgurl
|
||||
|
||||
def doExtractChapterUrlsAndMetadata(self, get_cover=True):
|
||||
|
||||
metadata_url = "https://fiction.live/api/node/{s_id}/"
|
||||
response = self.get_request(metadata_url.format(s_id = self.story_id))
|
||||
|
||||
if not response: # this is how fiction.live responds to nonsense urls -- HTTP200 with empty response
|
||||
raise exceptions.StoryDoesNotExist("Empty response for " + self.url)
|
||||
|
||||
data = json.loads(response)
|
||||
|
||||
## get metadata for multi route chapters
|
||||
if 'multiRoute' in data and data['multiRoute'] == True:
|
||||
route_metadata_url = "https://fiction.live/api/anonkun/routes/{s_id}/"
|
||||
response = self.get_request(route_metadata_url.format(s_id = self.story_id))
|
||||
|
||||
if not response: # this is how fiction.live responds to nonsense urls -- HTTP200 with empty response
|
||||
raise exceptions.StoryDoesNotExist("Empty response for " + self.url)
|
||||
|
||||
data["route_metadata"] = json.loads(response)
|
||||
|
||||
self.extract_metadata(data, get_cover)
|
||||
self.add_chapters(data)
|
||||
|
||||
def extract_metadata(self, data, get_cover):
|
||||
# on one hand, we've got nicely-formatted JSON and can just index into the thing we want, no parsing needed.
|
||||
# on the other, nearly *everything* in this api is optional. found that out the hard way.
|
||||
|
||||
# not optional
|
||||
self.story.setMetadata('title', stripHTML(data['t']))
|
||||
|
||||
# stories have ut, rt, ct, and cht. fairly sure that ut = update time and rt = release time.
|
||||
# ct is 'creation time' and everything in the api has it -- you can create stories and edit before publishing
|
||||
# cht is *chunktime* -- newest story chunk added.
|
||||
# ut for update time includes other kinds of update -- threads, chat etc
|
||||
# ct <= rt <= cht <= ut
|
||||
self.story.setMetadata("dateUpdated", self.parse_timestamp(data['cht']))
|
||||
self.story.setMetadata("datePublished", self.parse_timestamp(data['rt']))
|
||||
|
||||
self.most_recent_chunk = data['cht'] if 'cht' in data else 9999999999999998
|
||||
|
||||
# nearly everything optional from here out
|
||||
|
||||
if 'storyStatus' in data:
|
||||
status_translate = {'active': "In-Progress", 'finished': "Completed"} # fiction.live to fanficfare
|
||||
status = data['storyStatus']
|
||||
self.story.setMetadata('status', status_translate.get(status, status.title()))
|
||||
elif 'complete' in data:
|
||||
if data['complete'] == True:
|
||||
self.story.setMetadata('status', "Completed")
|
||||
else:
|
||||
self.story.setMetadata('status', "In-Progress")
|
||||
else:
|
||||
self.story.setMetadata('status', "In-Progress")
|
||||
|
||||
if 'contentRating' in data:
|
||||
self.story.setMetadata('rating', data['contentRating'])
|
||||
elif 'tAge' in data:
|
||||
self.story.setMetadata('rating', data['tAge'])
|
||||
else:
|
||||
self.story.setMetadata('rating', "teen")
|
||||
|
||||
if 'w' in data: self.story.setMetadata('numWords', data['w'])
|
||||
if 'likeCount' in data: self.story.setMetadata('likes', data['likeCount'])
|
||||
if 'rInput' in data: self.story.setMetadata('reader_input', data['rInput'].title())
|
||||
|
||||
summary = stripHTML(data['d']) if 'd' in data else ""
|
||||
firstblock = data['b'].strip() if 'b' in data else ""
|
||||
self.setDescription(self.url, summary if not firstblock else summary + "\n<br />\n" + firstblock)
|
||||
|
||||
tags = data['ta'] if 'ta' in data else []
|
||||
|
||||
if (self.story.getMetadataRaw('rating') in {"nsfw", "adult"} or 'smut' in tags) and \
|
||||
not (self.is_adult or self.getConfig("is_adult")):
|
||||
raise exceptions.AdultCheckRequired(self.url)
|
||||
|
||||
show_spoiler_tags = self.getConfig('show_spoiler_tags')
|
||||
spoiler_tags = data['spoilerTags'] if 'spoilerTags' in data else []
|
||||
for tag in tags:
|
||||
if show_spoiler_tags or not tag in spoiler_tags:
|
||||
self.story.addToList('tags', tag)
|
||||
|
||||
authors = data['u'] # non-optional
|
||||
if len(authors) > 1:
|
||||
for author in data['u']:
|
||||
if '_id' in author and author['n']: # some stories have spurious co-authors (may have been fixed?)
|
||||
self.story.addToList('author', author['n'])
|
||||
self.story.addToList('authorUrl', "https://fiction.live/user/" + author['n'] + "/")
|
||||
self.story.addToList('authorId', author['_id'])
|
||||
else: # TODO: can avoid this?
|
||||
author = authors[0]
|
||||
self.story.setMetadata('author', author['n'])
|
||||
self.story.setMetadata('authorUrl', "https://fiction.live/user/" + author['n'] + "/")
|
||||
self.story.setMetadata('authorId', author['_id'])
|
||||
|
||||
if 'isLive' in data and data['isLive']:
|
||||
self.story.setMetadata('live', "Now! (at time of download)")
|
||||
elif 'nextLive' in data and data['nextLive']:
|
||||
# formatted to match site, not other fanficfare timestamps
|
||||
next_live_time = self.parse_timestamp(data['nextLive'])
|
||||
self.story.setMetadata('live', next_live_time)
|
||||
|
||||
show_nsfw_cover_images = self.getConfig('show_nsfw_cover_images')
|
||||
nsfw_cover = data['nsfwCover'] if 'nsfwCover' in data else False
|
||||
if get_cover and 'i' in data:
|
||||
if show_nsfw_cover_images or not nsfw_cover:
|
||||
coverUrl = data['i'][0]
|
||||
self.setCoverImage(self.url, coverUrl)
|
||||
|
||||
# gonna need these later for adding details to achievement-granting links in the text
|
||||
try:
|
||||
self.achievements = data['achievements']['achievements']
|
||||
except KeyError:
|
||||
self.achievements = []
|
||||
|
||||
def add_chapters(self, data):
|
||||
|
||||
## chapter urls are for the api. they return json and aren't user-navigatable, or the same as on the website
|
||||
chunkrange_url = "https://fiction.live/api/anonkun/chapters/{s_id}/{start}/{end}/"
|
||||
|
||||
## api url to get content of a multi route chapter. requires only the route id and no timestamps
|
||||
route_chunkrange_url = "https://fiction.live/api/anonkun/route/{c_id}/chapters"
|
||||
|
||||
def add_chapter_url(title, bounds):
|
||||
"Adds a chapter url based on the start/end chunk-range timestamps."
|
||||
start, end = bounds
|
||||
end -= 1
|
||||
chapter_url = chunkrange_url.format(s_id = data['_id'], start = start, end = end)
|
||||
self.add_chapter(title, chapter_url)
|
||||
|
||||
def add_route_chapter_url(title, route_id):
|
||||
"Adds a route chapter url based on the route id."
|
||||
chapter_url = route_chunkrange_url.format(c_id = route_id)
|
||||
self.add_chapter(title, chapter_url)
|
||||
|
||||
def pair(iterable):
|
||||
"[1,2,3,4] -> [(1, 2), (2, 3), (3, 4)]"
|
||||
a, b = itertools.tee(iterable, 2)
|
||||
next(b, None)
|
||||
return list(zip(a, b))
|
||||
|
||||
def map_chap_ids_to_api(chapter_ids, route_ids, times):
|
||||
for index, bounds in enumerate(times):
|
||||
start, end = bounds
|
||||
end -= 1
|
||||
chapter_url = chunkrange_url.format(s_id = data['_id'], start = start, end = end)
|
||||
self.chapter_id_to_api[chapter_ids[index]] = chapter_url
|
||||
|
||||
for route_id in route_ids:
|
||||
chapter_url = route_chunkrange_url.format(c_id = route_id)
|
||||
self.chapter_id_to_api[route_id] = chapter_url
|
||||
|
||||
## first thing to do is seperate out the appendices
|
||||
appendices, maintext, routes = [], [], []
|
||||
chapters = data['bm'] if 'bm' in data else []
|
||||
|
||||
## not all stories use multiple routes. Those that do have a route id and a title for each route
|
||||
if 'route_metadata' in data and data['route_metadata']:
|
||||
for r in data['route_metadata']:
|
||||
# checking if route title even exists or is None, since most things in the api are optional
|
||||
if 't' in r and r['t'] is not None:
|
||||
title = r['t']
|
||||
else:
|
||||
title = ""
|
||||
routes.append({"id": r['_id'], "title": title})
|
||||
|
||||
for c in chapters:
|
||||
appendices.append(c) if c['title'].startswith('#special') else maintext.append(c)
|
||||
|
||||
## main-text chapter extraction processing. *should* now handle all the edge cases.
|
||||
## relies on fanficfare ignoring empty chapters!
|
||||
|
||||
titles = ["Home"] + [c['title'] for c in maintext]
|
||||
chapter_ids = ['home'] + [c['id'] for c in maintext]
|
||||
times = [data['ct']] + [c['ct'] for c in maintext] + [self.most_recent_chunk + 2] # need to be 1 over, and add_url etc does -1
|
||||
times = pair(times)
|
||||
|
||||
if self.getConfig('include_appendices', True): # Add appendices after main text if desired
|
||||
titles = titles + ["Appendix: " + a['title'][9:] for a in appendices]
|
||||
chapter_ids = chapter_ids + [a['id'] for a in appendices]
|
||||
times = times + [(a['ct'], a['ct'] + 2) for a in appendices]
|
||||
|
||||
route_ids = [r['id'] for r in routes]
|
||||
|
||||
map_chap_ids_to_api(chapter_ids, route_ids, times) # Map chapter ids to API URLs for use when comparing the two
|
||||
|
||||
# doesn't actually run without the call to list.
|
||||
list(map(add_chapter_url, titles, times))
|
||||
|
||||
for r in routes: # add route at the end, after appendices
|
||||
route_id = r['id'] # to get route chapter content, the route id is needed, not the timestamp
|
||||
chapter_title = "Route: " + r['title'] # 'Route: ' at beginning of name, since it's a multiroute chapter
|
||||
add_route_chapter_url(chapter_title, route_id)
|
||||
|
||||
def getChapterText(self, url):
|
||||
|
||||
chunk_handler = {
|
||||
"choice" : self.format_choice,
|
||||
"readerPost" : self.format_readerposts,
|
||||
"chapter" : self.format_chapter
|
||||
}
|
||||
|
||||
response = self.get_request(url)
|
||||
data = json.loads(response)
|
||||
|
||||
if data == []:
|
||||
return ""
|
||||
# and *now* we can assume there's at least one chunk in the data -- chapters can be totally empty.
|
||||
|
||||
# are we trying to read an appendix? check the first chunk to find out.
|
||||
getting_appendix = len(data) == 1 and 't' in data[0] and data[0]['t'].startswith("#special")
|
||||
|
||||
text = ""
|
||||
|
||||
for count, chunk in enumerate(data):
|
||||
|
||||
# logger.debug(count) # pollutes the debug log, shows which chunk crashed the handler
|
||||
|
||||
text += "<div>" # chapter chunks aren't always well-delimited in their contents
|
||||
|
||||
# appendix chunks are mixed in with other things
|
||||
if not getting_appendix and 't' in chunk and chunk['t'].startswith("#special"): # t = title = bookmark
|
||||
continue
|
||||
|
||||
handler = chunk_handler.get(chunk['nt'], self.format_unknown) # nt = node type
|
||||
text += handler(chunk)
|
||||
|
||||
show_timestamps = self.getConfig('show_timestamps')
|
||||
if show_timestamps and 'ct' in chunk:
|
||||
#logger.debug("Adding timestamp for chunk...")
|
||||
timestamp = ensure_text(self.parse_timestamp(chunk['ct']).strftime("%x -- %X"))
|
||||
text += '<div class="ut">' + timestamp + '</div>'
|
||||
|
||||
text += "</div><br />\n"
|
||||
|
||||
## soup to repair the most egregious HTML errors.
|
||||
return self.utf8FromSoup(url,self.make_soup(text))
|
||||
|
||||
### everything from here out is chunk data handling.
|
||||
|
||||
def format_chapter(self, chunk):
|
||||
"""Handles any formatting in the chapter body text for text chapters.
|
||||
In the 'default case' where we're getting boring chapter-chunk body text, just calls utf8fromSoup
|
||||
and returns the text as is on the website."""
|
||||
|
||||
soup = self.make_soup(chunk['b'] if 'b' in chunk else "")
|
||||
|
||||
if self.getConfig('legend_spoilers',True):
|
||||
soup = self.add_spoiler_legends(soup)
|
||||
|
||||
if self.achievements:
|
||||
soup = self.append_achievments(soup)
|
||||
|
||||
return str(soup)
|
||||
|
||||
def add_spoiler_legends(self, soup):
|
||||
# find spoiler links and change link-anchor block to legend block
|
||||
spoilers = soup.find_all('a', class_="tydai-spoiler")
|
||||
for link_tag in spoilers:
|
||||
link_tag.name = 'fieldset'
|
||||
legend = soup.new_tag('legend')
|
||||
legend.string = "Spoiler"
|
||||
link_tag.insert(0, legend)
|
||||
return soup
|
||||
|
||||
def fictionlive_normalize(self, string):
|
||||
# might be able to use this to preserve titles in normalized urls, if the scheme is the same
|
||||
|
||||
# BUG: in achivement ids these are all replaced, but I *don't* know that the list is complete.
|
||||
# should be rare, thankfully. *most* authors don't use any funny characters in the achievment's *ID*
|
||||
special_chars = "\"\\,.!?+=/[](){}<>_'@#$%^&*~`;:|" # not the hyphen, which is used to represent spaces
|
||||
|
||||
return string.lower().replace(" ", "-").translate({ord(x) : None for x in special_chars})
|
||||
|
||||
def append_achievments(self, soup):
|
||||
# achivements are present in the text as a kind of link, and you get the shiny popup by clicking them.
|
||||
achievement_links = soup.find_all('a', class_="tydai-achievement")
|
||||
|
||||
achieved_ids = []
|
||||
for link_tag in achievement_links:
|
||||
# these are not only prepended by a unicode lightning-bolt, but also format clearly as a link
|
||||
# should use .u css selector -- part of output_css defaults? or just let replace_tags_with_spans do it?
|
||||
new_u = soup.new_tag('u')
|
||||
new_u.string = link_tag.text # copy out the link text into a new element
|
||||
# html entities for improved compatability with AZW3 conversion
|
||||
link_tag.string = "⚡" # then overwrite
|
||||
link_tag.insert(1, new_u)
|
||||
|
||||
## while we've got the achievment links, get the ids from the link
|
||||
a_id = link_tag['data-id']
|
||||
a_id = self.fictionlive_normalize(a_id)
|
||||
|
||||
achieved_ids.append(a_id)
|
||||
|
||||
if achieved_ids:
|
||||
logger.debug("achievements (this chunk): " + ", ".join(achieved_ids))
|
||||
|
||||
# can't replicate the animated shiny announcement popup, so have an end-of-chunk announcement instead
|
||||
# TODO: achievement images -- does anyone use them?
|
||||
a_source = "<br />\n<fieldset><legend>⚡ Achievement obtained!</legend>\n<h4>{}</h4>\n{}</fieldset>\n"
|
||||
|
||||
for a_id in achieved_ids:
|
||||
if a_id in self.achievements:
|
||||
a_title = self.achievements[a_id]['t'] if 't' in self.achievements[a_id] else a_id.title()
|
||||
a_text = self.achievements[a_id]['d'] if 'd' in self.achievements[a_id] else ""
|
||||
soup.append(self.make_soup(a_source.format(a_title, a_text)))
|
||||
else:
|
||||
a_title = a_id.title()
|
||||
error = "<br />\n<fieldset><legend>Error: Achievement not found.</legend>Couldn't find '{}'. Ask the story author to check if the achievment exists."
|
||||
soup.append(self.make_soup(error.format(a_title)))
|
||||
|
||||
return soup
|
||||
|
||||
def count_votes(self, chunk):
|
||||
"""So, fiction.live's api doesn't return the counted votes you see on the website.
|
||||
After all, it needs to allow for things like revoking a vote,
|
||||
with the count live and updated in realtime on your client.
|
||||
So instead we get the raw vote-data, but have to count it ourselves."""
|
||||
|
||||
# optional.
|
||||
choices = chunk['choices'] if 'choices' in chunk else []
|
||||
|
||||
def counter(votes):
|
||||
output = [0] * len(choices)
|
||||
for vote in votes.values():
|
||||
## votes are either a single option-index or a list of option-indicies, depending on the choice type
|
||||
if 'multiple' in chunk and chunk['multiple'] == False:
|
||||
vote = [vote] # normalize to list
|
||||
for v in vote:
|
||||
# v should only be int, but there is at least one story where some unrelated string was returned,
|
||||
# so let's just ignore non-int values here
|
||||
if not isinstance(v, int):
|
||||
continue
|
||||
if 0 <= v < len(choices):
|
||||
output[v] += 1
|
||||
return output
|
||||
|
||||
# I believe that verified is always a subset of all votes, but that's not enforced here
|
||||
total_votes = counter(chunk['votes'] if 'votes' in chunk else {})
|
||||
verified_votes = counter(chunk['userVotes'] if 'userVotes' in chunk else {})
|
||||
|
||||
# Choices can link to route chapters, where the index of the choice in list 'choices' is a key in the
|
||||
# 'routes' dict and the dict value is the route id.
|
||||
# That route id is needed for the url to create the internal link from the choice to the route chapter.
|
||||
routes = chunk['routes'] if 'routes' in chunk else {}
|
||||
if choices and len(routes) > 0:
|
||||
altered_choices = []
|
||||
for i, choice in enumerate(choices):
|
||||
choice_index = str(i)
|
||||
if choice_index in routes.keys():
|
||||
route_chunkrange_url = "https://fiction.live/api/anonkun/route/{c_id}/chapters"
|
||||
route_url = route_chunkrange_url.format(c_id=routes[choice_index])
|
||||
choice_link = "<a data-orighref='" + route_url + "' >" + choice + "</a>"
|
||||
altered_choices.append(choice_link)
|
||||
else:
|
||||
altered_choices.append(choice)
|
||||
choices = altered_choices
|
||||
|
||||
return zip(choices, verified_votes, total_votes)
|
||||
|
||||
def format_choice(self, chunk):
|
||||
|
||||
options = self.count_votes(chunk)
|
||||
|
||||
# crossed-out writeins. authors can censor user-written choices, and (optionally) offer a reason.
|
||||
x_outs = [int(x) for x in chunk['xOut']] if 'xOut' in chunk else []
|
||||
x_reasons = chunk['xOutReasons'] if 'xOutReasons' in chunk else {}
|
||||
|
||||
closed = "closed" if 'closed' in chunk else "open" # BUG: check on reopened votes
|
||||
|
||||
num_voters = len(chunk['votes']) if 'votes' in chunk else 0
|
||||
|
||||
vote_title = chunk['b'] if 'b' in chunk else "Choices"
|
||||
|
||||
output = ""
|
||||
# start with the header
|
||||
output += u"<h4><span>" + vote_title + " — <small>Voting " + closed
|
||||
output += u" — " + str(num_voters) + " voters</small></span></h4>\n"
|
||||
|
||||
# we've got everything needed to build the html for our vote table.
|
||||
output += "<table class=\"voteblock\">\n"
|
||||
|
||||
# filter out the crossed-out options, which display last
|
||||
crossed = []
|
||||
for index, (choice_text, verified_votes, total_votes) in enumerate(options):
|
||||
if index in x_outs:
|
||||
crossed.append((index, choice_text, verified_votes, total_votes))
|
||||
else:
|
||||
output += "<tr class=\"choiceitem\"><td>" + str(choice_text) + "</td><td class=\"votecount\">"
|
||||
if verified_votes > 0:
|
||||
output += "★" + str(verified_votes) + "/"
|
||||
output += str(total_votes)+ " </td></tr>\n"
|
||||
|
||||
# crossed out options are: displayed last, struckthrough, smaller, with the reason below, and no vote count.
|
||||
# also greyed out, but that's a bit much.
|
||||
for index, choice_text, _, _ in crossed:
|
||||
if choice_text == "permanentlyRemoved":
|
||||
continue
|
||||
else:
|
||||
x_reason = x_reasons[str(index)] if str(index) in x_reasons else ""
|
||||
output += "<tr class=\"choiceitem\"><td colspan=\"2\"><small><strike>" \
|
||||
+ str(choice_text) + "</strike><br>" + str(x_reason) + "</small></td></tr>"
|
||||
|
||||
output += "</table>\n"
|
||||
|
||||
return output
|
||||
|
||||
def format_readerposts(self, chunk):
|
||||
|
||||
closed = "Closed" if 'closed' in chunk else "Open"
|
||||
|
||||
posts = chunk['votes'] if 'votes' in chunk else {}
|
||||
dice = chunk['dice'] if 'dice' in chunk else {}
|
||||
|
||||
# now matches the site and does *not* include dicerolls as posts!
|
||||
num_votes = str(len(posts)) + " posts" if len(posts) != 0 else "be the first to post."
|
||||
|
||||
posts_title = chunk['b'] if 'b' in chunk else "Reader Posts"
|
||||
|
||||
output = ""
|
||||
output += u"<h4><span>" + posts_title + " — <small> Posting " + closed
|
||||
output += u" — " + num_votes + "</small></span></h4>\n"
|
||||
|
||||
## so. a voter can roll with their post. these rolls are in a seperate dict, but have the **same uid**.
|
||||
## they're then formatted with the roll above the writein for that user.
|
||||
## I *think* that formatting roll-only before writein-only posts is correct, but tbh, it's hard to tell.
|
||||
## writeins are usually opened by the author for posts or rolls, not both at once.
|
||||
## people tend to only mix the two by accident.
|
||||
if dice != {}:
|
||||
for uid, roll in dice.items():
|
||||
output += '<div class="choiceitem">'
|
||||
if roll: # optional. just because there's a list entry for it doesn't mean it has a value!
|
||||
output += '<div class="dice">' + str(roll) + '</div>\n'
|
||||
if uid in posts:
|
||||
post = posts[uid]
|
||||
if post:
|
||||
output += str(post)
|
||||
del posts[uid] # it's handled here with the roll instead of later
|
||||
output += '</div>'
|
||||
|
||||
for post in posts.values():
|
||||
if post:
|
||||
output += '<div class="choiceitem">' + str(post) + '</div>\n'
|
||||
|
||||
return output
|
||||
|
||||
def normalize_chapterurl(self, url):
|
||||
if url.startswith(r'https://fiction.live/api/anonkun/chapters'):
|
||||
return url
|
||||
|
||||
pattern = None
|
||||
|
||||
if url.startswith(r'https://fiction.live/api/anonkun/route'):
|
||||
pattern = r"https?://(?:beta\.)?fiction\.live/[^/]*/[^/]*/[a-zA-Z0-9]+/routes/([a-zA-Z0-9]+)"
|
||||
elif url.startswith(r'https://fiction.live/'):
|
||||
pattern = r"https?://(?:beta\.)?fiction\.live/[^/]*/[^/]*/[a-zA-Z0-9]+/[^/]*(/[a-zA-Z0-9]+|home)"
|
||||
# regex101 rocks
|
||||
|
||||
if not pattern:
|
||||
return url
|
||||
|
||||
match = re.match(pattern, url)
|
||||
if not match:
|
||||
return url
|
||||
|
||||
chapter_id = match.group(1)
|
||||
|
||||
if chapter_id.startswith('/'):
|
||||
chapter_id = chapter_id[1:]
|
||||
|
||||
if chapter_id and chapter_id in self.chapter_id_to_api:
|
||||
return self.chapter_id_to_api[chapter_id]
|
||||
|
||||
return url
|
||||
|
||||
def format_unknown(self, chunk):
|
||||
raise NotImplementedError("Unknown chunk type ({}) in fiction.live story.".format(chunk))
|
||||
|
||||
# in future, I'd like to handle audio embeds somehow. but they're not availble to add to stories right now.
|
||||
# pretty sure they'll just format as a link (with a special tydai-audio class) and should be easier than achievements
|
||||
|
||||
# TODO: verify that show_timestamps is working, check times!
|
||||
|
||||
# TODO: find a story that uses achievement images and implement them?
|
||||
|
||||
### known bugs:
|
||||
|
||||
# TODO: support chapter urls for single-chapter / chapter-range downloads
|
||||
# complicated -- urls for getChapterText are API urls generated by add_chapters, not the public/website ones
|
||||
# in particular, may need more API reversing to figure out how to get the *end* of the chunk range
|
||||
# find in 'bm' in the metadata?
|
||||
|
|
@ -1,8 +1,12 @@
|
|||
from __future__ import absolute_import
|
||||
import re
|
||||
import urllib2
|
||||
import urlparse
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
# py2 vs py3 transition
|
||||
from ..six import text_type as unicode
|
||||
from ..six.moves.urllib import parse as urlparse
|
||||
|
||||
from base_adapter import BaseSiteAdapter, makeDate
|
||||
from .base_adapter import BaseSiteAdapter, makeDate
|
||||
|
||||
|
||||
def getClass():
|
||||
|
|
@ -19,7 +23,7 @@ class FictionManiaTVAdapter(BaseSiteAdapter):
|
|||
SITE_ABBREVIATION = 'fmt'
|
||||
SITE_DOMAIN = 'fictionmania.tv'
|
||||
|
||||
BASE_URL = 'http://' + SITE_DOMAIN + '/stories/'
|
||||
BASE_URL = 'https://' + SITE_DOMAIN + '/stories/'
|
||||
READ_TEXT_STORY_URL_TEMPLATE = BASE_URL + 'readtextstory.html?storyID=%s'
|
||||
DETAILS_URL_TEMPLATE = BASE_URL + 'details.html?storyID=%s'
|
||||
|
||||
|
|
@ -36,23 +40,6 @@ class FictionManiaTVAdapter(BaseSiteAdapter):
|
|||
self._setURL(self.READ_TEXT_STORY_URL_TEMPLATE % story_id)
|
||||
self.story.setMetadata('siteabbrev', self.SITE_ABBREVIATION)
|
||||
|
||||
# Always single chapters, probably should use the Anthology feature to
|
||||
# merge chapters of a story
|
||||
self.story.setMetadata('numChapters', 1)
|
||||
|
||||
def _customized_fetch_url(self, url, exception=None, parameters=None):
|
||||
if exception:
|
||||
try:
|
||||
data = self._fetchUrl(url, parameters)
|
||||
except urllib2.HTTPError:
|
||||
raise exception(self.url)
|
||||
# Just let self._fetchUrl throw the exception, don't catch and
|
||||
# customize it.
|
||||
else:
|
||||
data = self._fetchUrl(url, parameters)
|
||||
|
||||
return self.make_soup(data)
|
||||
|
||||
@staticmethod
|
||||
def getSiteDomain():
|
||||
return FictionManiaTVAdapter.SITE_DOMAIN
|
||||
|
|
@ -62,11 +49,11 @@ class FictionManiaTVAdapter(BaseSiteAdapter):
|
|||
return cls.READ_TEXT_STORY_URL_TEMPLATE % 1234
|
||||
|
||||
def getSiteURLPattern(self):
|
||||
return 'https?' + re.escape(self.BASE_URL[len('http'):]) + '(readtextstory|readxstory|details)\.html\?storyID=\d+$'
|
||||
return r'https?' + re.escape(self.BASE_URL[len('https'):]) + r'(readtextstory|readhtmlstory|readxstory|details)\.html\?storyID=\d+$'
|
||||
|
||||
def extractChapterUrlsAndMetadata(self):
|
||||
url = self.DETAILS_URL_TEMPLATE % self.story.getMetadata('storyId')
|
||||
soup = self._customized_fetch_url(url)
|
||||
soup = self.make_soup(self.get_request(url))
|
||||
|
||||
keep_summary_html = self.getConfig('keep_summary_html')
|
||||
for row in soup.find('table')('tr'):
|
||||
|
|
@ -79,7 +66,7 @@ class FictionManiaTVAdapter(BaseSiteAdapter):
|
|||
|
||||
if key == 'Title':
|
||||
self.story.setMetadata('title', value)
|
||||
self.chapterUrls.append((value, self.url))
|
||||
self.add_chapter(value, self.url)
|
||||
|
||||
elif key == 'File Name':
|
||||
self.story.setMetadata('fileName', value)
|
||||
|
|
@ -119,7 +106,7 @@ class FictionManiaTVAdapter(BaseSiteAdapter):
|
|||
self.story.setMetadata('rating', value)
|
||||
|
||||
elif key == 'Complete':
|
||||
self.story.setMetadata('status', 'Completed' if value == 'Complete' else 'In-Progress')
|
||||
self.story.setMetadata('status', 'Completed' if value == 'yes' else 'In-Progress')
|
||||
|
||||
elif key == 'Categories':
|
||||
for element in cells[1]('a'):
|
||||
|
|
@ -149,20 +136,78 @@ class FictionManiaTVAdapter(BaseSiteAdapter):
|
|||
self.story.setMetadata('readings', value)
|
||||
|
||||
def getChapterText(self, url):
|
||||
soup = self._customized_fetch_url(url)
|
||||
element = soup.find('pre')
|
||||
element.name = 'div'
|
||||
if self.getConfig("download_text_version",False):
|
||||
soup = self.make_soup(self.get_request(url))
|
||||
element = soup.find('pre')
|
||||
element.name = 'div'
|
||||
|
||||
# The story's content is contained in a <pre> tag, probably taken 1:1
|
||||
# from the source text file. A simple replacement of all newline
|
||||
# characters with a break line tag should take care of formatting.
|
||||
# The story's content is contained in a <pre> tag, probably taken 1:1
|
||||
# from the source text file. A simple replacement of all newline
|
||||
# characters with a break line tag should take care of formatting.
|
||||
|
||||
# While wrapping in paragraphs would be possible, it's too much work,
|
||||
# I'd rather display the story 1:1 like it was found in the pre tag.
|
||||
content = unicode(element)
|
||||
content = content.replace('\n', '<br/>')
|
||||
# While wrapping in paragraphs would be possible, it's too much work,
|
||||
# I'd rather display the story 1:1 like it was found in the pre tag.
|
||||
content = unicode(element)
|
||||
content = content.replace('\n', '<br/>')
|
||||
|
||||
if self.getConfig('non_breaking_spaces'):
|
||||
return content.replace(' ', ' ')
|
||||
if self.getConfig('non_breaking_spaces'):
|
||||
return content.replace(' ', ' ')
|
||||
|
||||
return content
|
||||
## Normally, getChapterText should use self.utf8FromSoup(),
|
||||
## but this is converting from plain(ish) text. -- JM
|
||||
return content
|
||||
|
||||
else:
|
||||
|
||||
# try SWI (story with images) version first
|
||||
# <div style="margin-left:10ex;margin-right:10ex">
|
||||
## fetching SWI version now instead of text.
|
||||
htmlurl = url.replace('readtextstory','readhtmlstory')
|
||||
## Used to find by style, but it's inconsistent now. we've seen:
|
||||
## margin-left:10ex;margin-right:10ex
|
||||
## margin-right: 5%; margin-left: 5%
|
||||
## margin-left:5%; margin-right:5%
|
||||
## margin-left:5%; margin-right:5%; background: white
|
||||
## And there's some without a <div> tag (or an unclosed div)
|
||||
## Only the comments appear to be consistent.
|
||||
beginmarker='<!--Read or display the file-->'
|
||||
endmarker='''<hr size=1 noshade>
|
||||
<!--review add read, top and bottom-->
|
||||
'''
|
||||
data = self.get_request(htmlurl)
|
||||
try:
|
||||
## if both markers are found, assume whatever is in between
|
||||
## is the chapter text.
|
||||
soup = self.make_soup(data[data.index(beginmarker):data.index(endmarker)])
|
||||
return self.utf8FromSoup(htmlurl,soup)
|
||||
except Exception as e:
|
||||
# logger.debug(e)
|
||||
# logger.debug(soup)
|
||||
logger.debug("Story With Images(SWI) not found, falling back to HTML.")
|
||||
|
||||
## fetching html version now instead of text.
|
||||
## Note that html and SWI pages are *not* formatted the same.
|
||||
soup = self.make_soup(self.get_request(url.replace('readtextstory','readxstory')))
|
||||
# logger.debug(soup)
|
||||
|
||||
# remove first hr and everything before
|
||||
remove = soup.find('hr')
|
||||
# logger.debug(remove)
|
||||
for tag in remove.find_previous_siblings():
|
||||
tag.extract()
|
||||
remove.extract()
|
||||
|
||||
# remove trailing hr, parent tags and everything after.
|
||||
remove = soup.find('hr',size='1') # <center><hr size=1>
|
||||
if remove.parent.name == 'center':
|
||||
## can also be directly in body without <center>
|
||||
remove = remove.parent
|
||||
# logger.debug(remove)
|
||||
for tag in remove.find_next_siblings():
|
||||
tag.extract()
|
||||
remove.extract()
|
||||
|
||||
content = soup.find('body')
|
||||
content.name='div'
|
||||
|
||||
return self.utf8FromSoup(url,content)
|
||||
|
|
|
|||
|
|
@ -1,194 +0,0 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2013 Fanficdownloader team, 2015 FanFicFare team
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
import time
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import re
|
||||
import urllib2
|
||||
import time
|
||||
import json
|
||||
|
||||
|
||||
#from ..htmlcleanup import stripHTML
|
||||
from .. import exceptions as exceptions
|
||||
|
||||
from base_adapter import BaseSiteAdapter, makeDate
|
||||
|
||||
class FictionPadSiteAdapter(BaseSiteAdapter):
|
||||
|
||||
def __init__(self, config, url):
|
||||
BaseSiteAdapter.__init__(self, config, url)
|
||||
self.story.setMetadata('siteabbrev','fpad')
|
||||
self.dateformat = "%Y-%m-%dT%H:%M:%SZ"
|
||||
self.is_adult=False
|
||||
self.username = None
|
||||
self.password = None
|
||||
# get storyId from url--url validation guarantees query correct
|
||||
m = re.match(self.getSiteURLPattern(),url)
|
||||
if m:
|
||||
self.story.setMetadata('storyId',m.group('id'))
|
||||
|
||||
# normalized story URL.
|
||||
self._setURL("https://"+self.getSiteDomain()
|
||||
+"/author/"+m.group('author')
|
||||
+"/stories/"+self.story.getMetadata('storyId'))
|
||||
else:
|
||||
raise exceptions.InvalidStoryURL(url,
|
||||
self.getSiteDomain(),
|
||||
self.getSiteExampleURLs())
|
||||
|
||||
@staticmethod
|
||||
def getSiteDomain():
|
||||
return 'fictionpad.com'
|
||||
|
||||
@classmethod
|
||||
def getSiteExampleURLs(cls):
|
||||
return "https://fictionpad.com/author/Author/stories/1234/Some-Title"
|
||||
|
||||
def getSiteURLPattern(self):
|
||||
# http://fictionpad.com/author/Serdd/stories/4275
|
||||
return r"http(s)?://(www\.)?fictionpad\.com/author/(?P<author>[^/]+)/stories/(?P<id>\d+)"
|
||||
|
||||
# <form method="post" action="/signin">
|
||||
# <input name="authenticity_token" type="hidden" value="u+cfdXh46dRnwVnSlmE2B2BFmHgu760paqgBG6KQeos=" />
|
||||
# <input type="hidden" name="remember" value="1">
|
||||
# <strong class="help-start text-center">or with FictionPad</strong>
|
||||
# <label class="control-label hidden-placeholder">Pseudonym or Email Address</label>
|
||||
# <input name="login" class="input-block-level" type="text" placeholder="Pseudonym or Email Address" maxlength="50" required autofocus>
|
||||
# <label class="control-label hidden-placeholder">Password</label>
|
||||
# <input name="password" class="input-block-level" type="password" placeholder="Password" minlength="6" required>
|
||||
# <button type="submit" class="btn btn-primary btn-block">Sign In</button>
|
||||
# <p class="help-end">
|
||||
# <a href="/passwordreset">Forgot your password?</a>
|
||||
# </p>
|
||||
# </form>
|
||||
def performLogin(self):
|
||||
params = {}
|
||||
|
||||
if self.password:
|
||||
params['login'] = self.username
|
||||
params['password'] = self.password
|
||||
else:
|
||||
params['login'] = self.getConfig("username")
|
||||
params['password'] = self.getConfig("password")
|
||||
params['remember'] = '1'
|
||||
|
||||
loginUrl = 'http://' + self.getSiteDomain() + '/signin'
|
||||
logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
|
||||
params['login']))
|
||||
|
||||
## need to pull empty login page first to get authenticity_token
|
||||
soup = self.make_soup(self._fetchUrl(loginUrl))
|
||||
params['authenticity_token']=soup.find('input', {'name':'authenticity_token'})['value']
|
||||
|
||||
data = self._postUrl(loginUrl, params)
|
||||
|
||||
if "Invalid email/pseudonym and password combination." in data:
|
||||
logger.info("Failed to login to URL %s as %s" % (loginUrl,
|
||||
params['login']))
|
||||
raise exceptions.FailedToLogin(loginUrl,params['login'])
|
||||
|
||||
|
||||
def extractChapterUrlsAndMetadata(self):
|
||||
# fetch the chapter. From that we will get almost all the
|
||||
# metadata and chapter list
|
||||
|
||||
url=self.url
|
||||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
if "This is a mature story. Please sign in to read it." in data:
|
||||
self.performLogin()
|
||||
data = self._fetchUrl(url)
|
||||
|
||||
find = "wordyarn.config.page = "
|
||||
data = data[data.index(find)+len(find):]
|
||||
data = data[:data.index("</script>")]
|
||||
data = data[:data.rindex(";")]
|
||||
data = data.replace('tables:','"tables":')
|
||||
tables = json.loads(data)['tables']
|
||||
except urllib2.HTTPError, e:
|
||||
if e.code == 404:
|
||||
raise exceptions.StoryDoesNotExist(url)
|
||||
else:
|
||||
raise e
|
||||
|
||||
# looks like only one author per story allowed.
|
||||
author = tables['users'][0]
|
||||
story = tables['stories'][0]
|
||||
story_ver = tables['story_versions'][0]
|
||||
logger.debug("story:%s"%story)
|
||||
|
||||
self.story.setMetadata('authorId',author['id'])
|
||||
self.story.setMetadata('author',author['display_name'])
|
||||
self.story.setMetadata('authorUrl','https://'+self.host+'/author/'+author['display_name']+'/stories')
|
||||
|
||||
self.story.setMetadata('title',story_ver['title'])
|
||||
self.setDescription(url,story_ver['description'])
|
||||
|
||||
if not ('assets/story_versions/covers' in story_ver['profile_image_url@2x']):
|
||||
self.setCoverImage(url,story_ver['profile_image_url@2x'])
|
||||
|
||||
self.story.setMetadata('datePublished',makeDate(story['published_at'], self.dateformat))
|
||||
self.story.setMetadata('dateUpdated',makeDate(story['published_at'], self.dateformat))
|
||||
|
||||
self.story.setMetadata('followers',story['followers_count'])
|
||||
self.story.setMetadata('comments',story['comments_count'])
|
||||
self.story.setMetadata('views',story['views_count'])
|
||||
self.story.setMetadata('likes',int(story['likes'])) # no idea why they floated these.
|
||||
if 'dislikes' in story:
|
||||
self.story.setMetadata('dislikes',int(story['dislikes']))
|
||||
|
||||
if story_ver['is_complete']:
|
||||
self.story.setMetadata('status', 'Completed')
|
||||
else:
|
||||
self.story.setMetadata('status', 'In-Progress')
|
||||
|
||||
self.story.setMetadata('rating', story_ver['maturity_level'])
|
||||
self.story.setMetadata('numWords', unicode(story_ver['word_count']))
|
||||
|
||||
for i in tables['fandoms']:
|
||||
self.story.addToList('category',i['name'])
|
||||
|
||||
for i in tables['genres']:
|
||||
self.story.addToList('genre',i['name'])
|
||||
|
||||
for i in tables['characters']:
|
||||
self.story.addToList('characters',i['name'])
|
||||
|
||||
for c in tables['chapters']:
|
||||
chtitle = "Chapter %d"%c['number']
|
||||
if c['title']:
|
||||
chtitle += " - %s"%c['title']
|
||||
self.chapterUrls.append((chtitle,c['body_url']))
|
||||
|
||||
self.story.setMetadata('numChapters',len(self.chapterUrls))
|
||||
|
||||
def getChapterText(self, url):
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
if not url:
|
||||
data = u"<em>This chapter has no text.</em>"
|
||||
else:
|
||||
data = self._fetchUrl(url)
|
||||
soup = self.make_soup(u"<div id='story'>"+data+u"</div>")
|
||||
return self.utf8FromSoup(url,soup)
|
||||
|
||||
def getClass():
|
||||
return FictionPadSiteAdapter
|
||||
|
||||
|
|
@ -1,6 +1,6 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2011 Fanficdownloader team, 2015 FanFicFare team
|
||||
# Copyright 2011 Fanficdownloader team, 2018 FanFicFare team
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
|
|
@ -15,15 +15,15 @@
|
|||
# limitations under the License.
|
||||
#
|
||||
|
||||
import time
|
||||
from __future__ import absolute_import
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import re
|
||||
import urllib2
|
||||
import time
|
||||
|
||||
# py2 vs py3 transition
|
||||
|
||||
## They're from the same people and pretty much identical.
|
||||
from adapter_fanfictionnet import FanFictionNetSiteAdapter
|
||||
from .adapter_fanfictionnet import FanFictionNetSiteAdapter
|
||||
|
||||
class FictionPressComSiteAdapter(FanFictionNetSiteAdapter):
|
||||
|
||||
|
|
@ -43,8 +43,15 @@ class FictionPressComSiteAdapter(FanFictionNetSiteAdapter):
|
|||
def getSiteExampleURLs(cls):
|
||||
return "https://www.fictionpress.com/s/1234/1/ https://www.fictionpress.com/s/1234/12/ http://www.fictionpress.com/s/1234/1/Story_Title http://m.fictionpress.com/s/1234/1/"
|
||||
|
||||
def getSiteURLPattern(self):
|
||||
return r"https?://(www|m)?\.fictionpress\.com/s/\d+(/\d+)?(/|/[a-zA-Z0-9_-]+)?/?$"
|
||||
@classmethod
|
||||
def _get_site_url_pattern(cls):
|
||||
return r"https?://(www|m)?\.fictionpress\.com/s/(?P<id>\d+)(/\d+)?(/(?P<title>[^/]+))?/?$"
|
||||
|
||||
## normalized chapter URLs DO contain the story title now, but
|
||||
## normalized to current urltitle in case of title changes.
|
||||
def normalize_chapterurl(self,url):
|
||||
return re.sub(r"https?://(www|m)\.(?P<keep>fictionpress\.com/s/\d+/\d+/).*",
|
||||
r"https://www.\g<keep>",url)+self.urltitle
|
||||
|
||||
def getClass():
|
||||
return FictionPressComSiteAdapter
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2011 Fanficdownloader team, 2015 FanFicFare team
|
||||
# Copyright 2011 Fanficdownloader team, 2018 FanFicFare team
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
|
|
@ -15,18 +15,18 @@
|
|||
# limitations under the License.
|
||||
#
|
||||
|
||||
import time
|
||||
from __future__ import absolute_import
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import re
|
||||
import urllib2
|
||||
import time
|
||||
import httplib, urllib
|
||||
|
||||
from .. import exceptions as exceptions
|
||||
from ..htmlcleanup import stripHTML
|
||||
|
||||
from base_adapter import BaseSiteAdapter, makeDate
|
||||
# py2 vs py3 transition
|
||||
from ..six import text_type as unicode
|
||||
|
||||
from .base_adapter import BaseSiteAdapter, makeDate
|
||||
|
||||
class FicwadComSiteAdapter(BaseSiteAdapter):
|
||||
|
||||
|
|
@ -46,10 +46,10 @@ class FicwadComSiteAdapter(BaseSiteAdapter):
|
|||
|
||||
@classmethod
|
||||
def getSiteExampleURLs(cls):
|
||||
return "http://ficwad.com/story/1234"
|
||||
return "https://ficwad.com/story/1234"
|
||||
|
||||
def getSiteURLPattern(self):
|
||||
return re.escape(r"http://"+self.getSiteDomain())+"/story/\d+?$"
|
||||
return r"https?:"+re.escape(r"//"+self.getSiteDomain())+r"/story/\d+?$"
|
||||
|
||||
def performLogin(self,url):
|
||||
params = {}
|
||||
|
|
@ -61,12 +61,13 @@ class FicwadComSiteAdapter(BaseSiteAdapter):
|
|||
params['username'] = self.getConfig("username")
|
||||
params['password'] = self.getConfig("password")
|
||||
|
||||
loginUrl = 'http://' + self.getSiteDomain() + '/account/login'
|
||||
loginUrl = 'https://' + self.getSiteDomain() + '/account/login'
|
||||
logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
|
||||
params['username']))
|
||||
d = self._postUrl(loginUrl,params,usecache=False)
|
||||
d = self.post_request(loginUrl,params,usecache=False)
|
||||
|
||||
if "Login attempt failed..." in d:
|
||||
if "Login attempt failed..." in d or \
|
||||
'<div id="error">Please enter your username and password.</div>' in d:
|
||||
logger.info("Failed to login to URL %s as %s" % (loginUrl,
|
||||
params['username']))
|
||||
raise exceptions.FailedToLogin(url,params['username'])
|
||||
|
|
@ -74,13 +75,6 @@ class FicwadComSiteAdapter(BaseSiteAdapter):
|
|||
else:
|
||||
return True
|
||||
|
||||
def use_pagecache(self):
|
||||
'''
|
||||
adapters that will work with the page cache need to implement
|
||||
this and change it to True.
|
||||
'''
|
||||
return True
|
||||
|
||||
def extractChapterUrlsAndMetadata(self):
|
||||
|
||||
# fetch the chapter. From that we will get almost all the
|
||||
|
|
@ -89,58 +83,45 @@ class FicwadComSiteAdapter(BaseSiteAdapter):
|
|||
url = self.url
|
||||
logger.debug("URL: "+url)
|
||||
|
||||
# use BeautifulSoup HTML parser to make everything easier to find.
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
# non-existent/removed story urls get thrown to the front page.
|
||||
if "<h4>Featured Story</h4>" in data:
|
||||
raise exceptions.StoryDoesNotExist(self.url)
|
||||
soup = self.make_soup(data)
|
||||
except urllib2.HTTPError, e:
|
||||
if e.code == 404:
|
||||
raise exceptions.StoryDoesNotExist(self.url)
|
||||
else:
|
||||
raise e
|
||||
data = self.get_request(url)
|
||||
# non-existent/removed story urls get thrown to the front page.
|
||||
if "<h4>Featured Story</h4>" in data:
|
||||
raise exceptions.StoryDoesNotExist(self.url)
|
||||
soup = self.make_soup(data)
|
||||
|
||||
# if blocked, attempt login.
|
||||
if soup.find("div",{"class":"blocked"}) or soup.find("li",{"class":"blocked"}):
|
||||
if self.performLogin(url): # performLogin raises
|
||||
# FailedToLogin if it fails.
|
||||
soup = self.make_soup(self._fetchUrl(url,usecache=False))
|
||||
soup = self.make_soup(self.get_request(url,usecache=False))
|
||||
|
||||
divstory = soup.find('div',id='story')
|
||||
storya = divstory.find('a',href=re.compile("^/story/\d+$"))
|
||||
storya = divstory.find('a',href=re.compile(r"^/story/\d+$"))
|
||||
if storya : # if there's a story link in the divstory header, this is a chapter page.
|
||||
# normalize story URL on chapter list.
|
||||
self.story.setMetadata('storyId',storya['href'].split('/',)[2])
|
||||
url = "http://"+self.getSiteDomain()+storya['href']
|
||||
url = "https://"+self.getSiteDomain()+storya['href']
|
||||
logger.debug("Normalizing to URL: "+url)
|
||||
self._setURL(url)
|
||||
try:
|
||||
soup = self.make_soup(self._fetchUrl(url))
|
||||
except urllib2.HTTPError, e:
|
||||
if e.code == 404:
|
||||
raise exceptions.StoryDoesNotExist(self.url)
|
||||
else:
|
||||
raise e
|
||||
soup = self.make_soup(self.get_request(url))
|
||||
|
||||
# if blocked, attempt login.
|
||||
if soup.find("div",{"class":"blocked"}) or soup.find("li",{"class":"blocked"}):
|
||||
if self.performLogin(url): # performLogin raises
|
||||
# FailedToLogin if it fails.
|
||||
soup = self.make_soup(self._fetchUrl(url,usecache=False))
|
||||
soup = self.make_soup(self.get_request(url,usecache=False))
|
||||
|
||||
# title - first h4 tag will be title.
|
||||
titleh4 = soup.find('div',{'class':'storylist'}).find('h4')
|
||||
self.story.setMetadata('title', stripHTML(titleh4.a))
|
||||
|
||||
if 'Deleted story' in self.story.getMetadata('title'):
|
||||
if 'Deleted story' in self.story.getMetadataRaw('title'):
|
||||
raise exceptions.StoryDoesNotExist("This story was deleted. %s"%self.url)
|
||||
|
||||
|
||||
# Find authorid and URL from... author url.
|
||||
a = soup.find('span',{'class':'author'}).find('a', href=re.compile(r"^/a/"))
|
||||
self.story.setMetadata('authorId',a['href'].split('/')[2])
|
||||
self.story.setMetadata('authorUrl','http://'+self.host+a['href'])
|
||||
self.story.setMetadata('authorUrl','https://'+self.host+a['href'])
|
||||
self.story.setMetadata('author',a.string)
|
||||
|
||||
# description
|
||||
|
|
@ -149,14 +130,14 @@ class FicwadComSiteAdapter(BaseSiteAdapter):
|
|||
#self.story.setMetadata('description', storydiv.find("blockquote",{'class':'summary'}).p.string)
|
||||
|
||||
# most of the meta data is here:
|
||||
metap = storydiv.find("p",{"class":"meta"})
|
||||
metap = storydiv.find("div",{"class":"meta"})
|
||||
self.story.addToList('category',metap.find("a",href=re.compile(r"^/category/\d+")).string)
|
||||
|
||||
# warnings
|
||||
# <span class="req"><a href="/help/38" title="Medium Spoilers">[!!] </a> <a href="/help/38" title="Rape/Sexual Violence">[R] </a> <a href="/help/38" title="Violence">[V] </a> <a href="/help/38" title="Child/Underage Sex">[Y] </a></span>
|
||||
spanreq = metap.find("span",{"class":"story-warnings"})
|
||||
if spanreq: # can be no warnings.
|
||||
for a in spanreq.findAll("a"):
|
||||
for a in spanreq.find_all("a"):
|
||||
self.story.addToList('warnings',a['title'])
|
||||
|
||||
## perhaps not the most efficient way to parse this, using
|
||||
|
|
@ -168,7 +149,9 @@ class FicwadComSiteAdapter(BaseSiteAdapter):
|
|||
if m:
|
||||
self.story.setMetadata('rating', m.group(1))
|
||||
|
||||
m = re.match(r".*?Genres: (.+?) -.*?",metastr)
|
||||
## Genre appears even if list is empty. But there are a
|
||||
## limited number of genres allowed by the site.
|
||||
m = re.match(r".*?Genres: ((?:(?:Angst|Crossover|Drama|Erotica|Fantasy|Horror|Humor|Parody|Romance|Sci-fi)(?:,)?)+) -.*?",metastr)
|
||||
if m:
|
||||
for g in m.group(1).split(','):
|
||||
self.story.addToList('genre',g)
|
||||
|
|
@ -202,27 +185,24 @@ class FicwadComSiteAdapter(BaseSiteAdapter):
|
|||
storylistul = soup.find('ul',{'class':'storylist'})
|
||||
if not storylistul:
|
||||
# no list found, so it's a one-chapter story.
|
||||
self.chapterUrls.append((self.story.getMetadata('title'),url))
|
||||
self.add_chapter(self.story.getMetadata('title'),url)
|
||||
else:
|
||||
chapterlistlis = storylistul.findAll('li')
|
||||
chapterlistlis = storylistul.find_all('li')
|
||||
for chapterli in chapterlistlis:
|
||||
if "blocked" in chapterli['class']:
|
||||
# paranoia check. We should already be logged in by now.
|
||||
raise exceptions.FailedToLogin(url,self.username)
|
||||
else:
|
||||
#print "chapterli.h4.a (%s)"%chapterli.h4.a
|
||||
self.chapterUrls.append((chapterli.h4.a.string,
|
||||
u'http://%s%s'%(self.getSiteDomain(),
|
||||
chapterli.h4.a['href'])))
|
||||
#print "self.chapterUrls:%s"%self.chapterUrls
|
||||
self.story.setMetadata('numChapters',len(self.chapterUrls))
|
||||
|
||||
self.add_chapter(chapterli.h4.a.string,
|
||||
u'https://%s%s'%(self.getSiteDomain(),
|
||||
chapterli.h4.a['href']))
|
||||
return
|
||||
|
||||
|
||||
def getChapterText(self, url):
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
soup = self.make_soup(self._fetchUrl(url))
|
||||
soup = self.make_soup(self.get_request(url))
|
||||
|
||||
span = soup.find('div', {'id' : 'storytext'})
|
||||
|
||||
|
|
@ -233,4 +213,3 @@ class FicwadComSiteAdapter(BaseSiteAdapter):
|
|||
|
||||
def getClass():
|
||||
return FicwadComSiteAdapter
|
||||
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2011 Fanficdownloader team, 2015 FanFicFare team
|
||||
# Copyright 2011 Fanficdownloader team, 2020 FanFicFare team
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
|
|
@ -15,20 +15,22 @@
|
|||
# limitations under the License.
|
||||
#
|
||||
|
||||
from __future__ import absolute_import
|
||||
import time
|
||||
from datetime import date
|
||||
from datetime import timedelta
|
||||
from datetime import date, datetime
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import re
|
||||
import urllib2
|
||||
import cookielib as cl
|
||||
import json
|
||||
|
||||
from ..htmlcleanup import stripHTML
|
||||
from .. import exceptions as exceptions
|
||||
|
||||
from base_adapter import BaseSiteAdapter, makeDate
|
||||
# py2 vs py3 transition
|
||||
from ..six import text_type as unicode
|
||||
from ..six.moves import http_cookiejar as cl
|
||||
|
||||
from .base_adapter import BaseSiteAdapter, makeDate
|
||||
|
||||
def getClass():
|
||||
return FimFictionNetSiteAdapter
|
||||
|
|
@ -39,11 +41,12 @@ class FimFictionNetSiteAdapter(BaseSiteAdapter):
|
|||
BaseSiteAdapter.__init__(self, config, url)
|
||||
self.story.setMetadata('siteabbrev','fimficnet')
|
||||
self.story.setMetadata('storyId', self.parsedUrl.path.split('/',)[2])
|
||||
self._setURL("http://"+self.getSiteDomain()+"/story/"+self.story.getMetadata('storyId')+"/")
|
||||
self._setURL("https://"+self.getSiteDomain()+"/story/"+self.story.getMetadata('storyId')+"/")
|
||||
self.is_adult = False
|
||||
|
||||
# The date format will vary from site to site.
|
||||
# http://docs.python.org/library/datetime.html#strftime-strptime-behavior
|
||||
# FYI, not the only format used in this file.
|
||||
self.dateformat = "%d %b %Y"
|
||||
|
||||
@staticmethod
|
||||
|
|
@ -57,18 +60,11 @@ class FimFictionNetSiteAdapter(BaseSiteAdapter):
|
|||
|
||||
@classmethod
|
||||
def getSiteExampleURLs(cls):
|
||||
return "http://www.fimfiction.net/story/1234/story-title-here http://www.fimfiction.net/story/1234/ http://www.fimfiction.com/story/1234/1/ http://mobile.fimfiction.net/story/1234/1/story-title-here/chapter-title-here"
|
||||
return "https://www.fimfiction.net/story/1234/story-title-here https://www.fimfiction.net/story/1234/ https://www.fimfiction.com/story/1234/1/ https://mobile.fimfiction.net/story/1234/1/story-title-here/chapter-title-here"
|
||||
|
||||
def getSiteURLPattern(self):
|
||||
return r"https?://(www|mobile)\.fimfiction\.(net|com)/story/\d+/?.*"
|
||||
|
||||
def use_pagecache(self):
|
||||
'''
|
||||
adapters that will work with the page cache need to implement
|
||||
this and change it to True.
|
||||
'''
|
||||
return True
|
||||
|
||||
def set_adult_cookie(self):
|
||||
cookie = cl.Cookie(version=0, name='view_mature', value='true',
|
||||
port=None, port_specified=False,
|
||||
|
|
@ -81,27 +77,57 @@ class FimFictionNetSiteAdapter(BaseSiteAdapter):
|
|||
comment_url=None,
|
||||
rest={'HttpOnly': None},
|
||||
rfc2109=False)
|
||||
self.get_cookiejar().set_cookie(cookie)
|
||||
self.get_configuration().get_cookiejar().set_cookie(cookie)
|
||||
|
||||
def performLogin(self, url):
|
||||
params = {}
|
||||
if self.password:
|
||||
params['username'] = self.username
|
||||
params['password'] = self.password
|
||||
else:
|
||||
params['username'] = self.getConfig("username")
|
||||
params['password'] = self.getConfig("password")
|
||||
params['keep_logged_in'] = '1'
|
||||
|
||||
if params['username'] and params['password']:
|
||||
loginUrl = 'https://' + self.getSiteDomain() + '/ajax/login'
|
||||
logger.info("Will now login to URL (%s) as (%s)" % (loginUrl,
|
||||
params['username']))
|
||||
d = self.post_request(loginUrl, params)
|
||||
if "signing_key" not in d :
|
||||
logger.info("Failed to login to URL %s as %s" % (loginUrl,
|
||||
params['username']))
|
||||
raise exceptions.FailedToLogin(url,params['username'])
|
||||
|
||||
def make_soup(self,data):
|
||||
soup = super(FimFictionNetSiteAdapter, self).make_soup(data)
|
||||
for img in soup.select('img.lazy-img, img.user_image'):
|
||||
## FimF has started a 'camo' mechanism for images that
|
||||
## gets block by CF. attr data-source is original source.
|
||||
if img.has_attr('data-source'):
|
||||
img['src'] = img['data-source']
|
||||
elif img.has_attr('data-src'):
|
||||
img['src'] = img['data-src']
|
||||
return soup
|
||||
|
||||
def doExtractChapterUrlsAndMetadata(self,get_cover=True):
|
||||
|
||||
if self.is_adult or self.getConfig("is_adult"):
|
||||
self.set_adult_cookie()
|
||||
|
||||
## Only needed with password protected stories, which you have
|
||||
## to have logged into in the website using this account.
|
||||
if self.getConfig("always_login"):
|
||||
self.performLogin(self.url)
|
||||
|
||||
##---------------------------------------------------------------------------------------------------
|
||||
## Get the story's title page. Check if it exists.
|
||||
|
||||
try:
|
||||
# don't use cache if manual is_adult--should only happen
|
||||
# if it's an adult story and they don't have is_adult in ini.
|
||||
data = self.do_fix_blockquotes(self._fetchUrl(self.url,
|
||||
usecache=(not self.is_adult)))
|
||||
soup = self.make_soup(data)
|
||||
except urllib2.HTTPError, e:
|
||||
if e.code == 404:
|
||||
raise exceptions.StoryDoesNotExist(self.url)
|
||||
else:
|
||||
raise e
|
||||
# don't use cache if manual is_adult--should only happen
|
||||
# if it's an adult story and they don't have is_adult in ini.
|
||||
data = self.do_fix_blockquotes(self.get_request(self.url,
|
||||
usecache=(not self.is_adult)))
|
||||
soup = self.make_soup(data)
|
||||
|
||||
if "Warning: mysql_fetch_array(): supplied argument is not a valid MySQL result resource" in data:
|
||||
raise exceptions.StoryDoesNotExist(self.url)
|
||||
|
|
@ -109,18 +135,6 @@ class FimFictionNetSiteAdapter(BaseSiteAdapter):
|
|||
if "This story has been marked as having adult content. Please click below to confirm you are of legal age to view adult material in your country." in data:
|
||||
raise exceptions.AdultCheckRequired(self.url)
|
||||
|
||||
if self.password:
|
||||
params = {}
|
||||
params['password'] = self.password
|
||||
data = self._postUrl(self.url, params)
|
||||
soup = self.make_soup(data)
|
||||
|
||||
if not (soup.find('form', {'id' : 'password_form'}) == None):
|
||||
if self.getConfig('fail_on_password'):
|
||||
raise exceptions.FailedToDownload("%s requires story password and fail_on_password is true."%self.url)
|
||||
else:
|
||||
raise exceptions.FailedToLogin(self.url,"Story requires individual password",passwdonly=True)
|
||||
|
||||
##----------------------------------------------------------------------------------------------------
|
||||
## Extract metadata
|
||||
|
||||
|
|
@ -131,11 +145,14 @@ class FimFictionNetSiteAdapter(BaseSiteAdapter):
|
|||
self.story.setMetadata('title',stripHTML(title))
|
||||
|
||||
# Author
|
||||
author = storyContentBox.find('div', {'class':'author'}).find('a')
|
||||
author = soup.find('div', {'class':'info-container'}).find('a')
|
||||
self.story.setMetadata("author", stripHTML(author))
|
||||
#No longer seems to be a way to access Fimfiction's internal author ID
|
||||
self.story.setMetadata("authorId", self.story.getMetadata("author"))
|
||||
self.story.setMetadata("authorUrl", "http://%s/user/%s" % (self.getSiteDomain(), stripHTML(author)))
|
||||
# /user/288866/Stryker-Shadowpony-Blade
|
||||
self.story.setMetadata("authorId", author['href'].split('/')[2])
|
||||
self.story.setMetadata("authorUrl", "https://%s/user/%s/%s" % (self.getSiteDomain(),
|
||||
self.story.getMetadata('authorId'),
|
||||
# meta entry author can be changed by the user.
|
||||
stripHTML(author)))
|
||||
|
||||
#Rating text is replaced with full words for historical compatibility after the site changed
|
||||
#on 2014-10-27
|
||||
|
|
@ -144,10 +161,9 @@ class FimFictionNetSiteAdapter(BaseSiteAdapter):
|
|||
self.story.setMetadata("rating", rating)
|
||||
|
||||
# Chapters
|
||||
for chapter in storyContentBox.find_all('a',{'class':'chapter_link'}):
|
||||
self.chapterUrls.append((stripHTML(chapter), 'http://'+self.host+chapter['href']))
|
||||
for chapter in soup.find('ul',{'class':'chapters'}).find_all('a',{'class':'chapter-title'}):
|
||||
self.add_chapter(chapter, 'https://'+self.host+chapter['href'])
|
||||
|
||||
self.story.setMetadata('numChapters',len(self.chapterUrls))
|
||||
|
||||
# Status
|
||||
# In the case of Fimfiction, possible statuses are 'Completed', 'Incomplete', 'On Hiatus' and 'Cancelled'
|
||||
|
|
@ -158,51 +174,53 @@ class FimFictionNetSiteAdapter(BaseSiteAdapter):
|
|||
status = status.replace("Incomplete", "In-Progress").replace("Complete", "Completed")
|
||||
self.story.setMetadata("status", status)
|
||||
|
||||
# Genres and Warnings
|
||||
# warnings were folded into general categories in the 2014-10-27 site update
|
||||
categories = storyContentBox.find_all('a', {'class':re.compile(r'.*\bstory_category\b.*')})
|
||||
for category in categories:
|
||||
category = stripHTML(category)
|
||||
if category == "Gore" or category == "Sex":
|
||||
self.story.addToList('warnings', category)
|
||||
else:
|
||||
self.story.addToList('genre', category)
|
||||
|
||||
# Word count
|
||||
wordCountText = stripHTML(storyContentBox.find('li', {'class':'bottom'}).find('div', {'class':'word_count'}))
|
||||
wordCountText = stripHTML(storyContentBox.find('div', {'class':'chapters-footer'}).find('div', {'class':'word_count'}))
|
||||
self.story.setMetadata("numWords", re.sub(r'[^0-9]', '', wordCountText))
|
||||
|
||||
# Cover image
|
||||
storyImage = storyContentBox.find('div', {'class':'story_image'})
|
||||
if storyImage:
|
||||
coverurl = storyImage.find('a')['href']
|
||||
if coverurl.startswith('//'): # fix for img urls missing 'http:'
|
||||
coverurl = "http:"+coverurl
|
||||
if get_cover:
|
||||
# try setting from href, if fails, try using the img src
|
||||
if self.setCoverImage(self.url,coverurl)[0] == "failedtoload":
|
||||
img = storyImage.find('img')
|
||||
# try src, then data-src, then leave None.
|
||||
coverurl = img.get('src',img.get('data-src',None))
|
||||
if coverurl:
|
||||
self.setCoverImage(self.url,coverurl)
|
||||
if get_cover:
|
||||
storyImage = soup.select_one('div.story_container__story_image img')
|
||||
if storyImage:
|
||||
coverurl = storyImage['data-fullsize']
|
||||
# try setting from data-fullsize, if fails, try using data-src
|
||||
cover_set = self.setCoverImage(self.url,coverurl)[0]
|
||||
if not cover_set or cover_set.startswith("failedtoload"):
|
||||
coverurl = storyImage['src']
|
||||
self.setCoverImage(self.url,coverurl)
|
||||
|
||||
coverSource = storyImage.find('a', {'class':'source'})
|
||||
if coverSource:
|
||||
self.story.setMetadata('coverSourceUrl', coverSource['href'])
|
||||
#There's no text associated with the cover source link, so just
|
||||
#reuse the URL. Makes it clear it's an external link leading
|
||||
#outside of the fanfic site, at least.
|
||||
self.story.setMetadata('coverSource', coverSource['href'])
|
||||
coverSource = storyImage.parent.find('a', {'class':'source'})
|
||||
if coverSource:
|
||||
self.story.setMetadata('coverSourceUrl', coverSource['href'])
|
||||
# There's no text associated with the cover source
|
||||
# link, so just reuse the URL. Makes it clear it's
|
||||
# an external link leading outside of the fanfic
|
||||
# site, at least.
|
||||
self.story.setMetadata('coverSource', coverSource['href'])
|
||||
|
||||
# fimf has started including extra stuff inside the description div.
|
||||
descdivstr = u"%s"%storyContentBox.find("div", {"class":"description"})
|
||||
hrstr=u"<hr/>"
|
||||
descdivstr = u'<div class="description">'+descdivstr[descdivstr.index(hrstr)+len(hrstr):]
|
||||
# specifically, the prequel link
|
||||
description = storyContentBox.find("span", {"class":"description-text"})
|
||||
description.name='div' # change to div, technically, spans
|
||||
# aren't supposed to contain <p>'s.
|
||||
descdivstr = u"%s"%description # string, but not stripHTML'ed
|
||||
#The link to the prequel is embedded in the description text, so erring
|
||||
#on the side of caution and wrapping this whole thing in a try block.
|
||||
#If anything goes wrong this probably wasn't a valid prequel link.
|
||||
try:
|
||||
if "This story is a sequel to" in stripHTML(description):
|
||||
link = description.find('a') # assume first link.
|
||||
self.story.setMetadata("prequelUrl", 'https://'+self.host+link["href"])
|
||||
self.story.setMetadata("prequel", stripHTML(link))
|
||||
if not self.getConfig('keep_prequel_in_description',False):
|
||||
hrstr=u"<hr/>"
|
||||
descdivstr = u'<div class="description">'+descdivstr[descdivstr.index(hrstr)+len(hrstr):]
|
||||
except:
|
||||
logger.info("Prequel parsing failed...")
|
||||
self.setDescription(self.url,descdivstr)
|
||||
|
||||
# Find the newest and oldest chapter dates
|
||||
storyData = storyContentBox.find('div', {'class':'story_data'})
|
||||
storyData = storyContentBox.find('ul', {'class':'chapters'})
|
||||
oldestChapter = None
|
||||
newestChapter = None
|
||||
self.newestChapterNum = None # save for comparing during update.
|
||||
|
|
@ -230,7 +248,7 @@ class FimFictionNetSiteAdapter(BaseSiteAdapter):
|
|||
|
||||
# Date published
|
||||
# falls back to oldest chapter date for stories that haven't been officially published yet
|
||||
pubdatetag = storyContentBox.find('span', {'class':'date_approved'})
|
||||
pubdatetag = storyContentBox.find('span', {'class':'approved-date'})
|
||||
if pubdatetag is None:
|
||||
if oldestChapter is None:
|
||||
#this will only be true when updating metadata for stories that have 0 chapters
|
||||
|
|
@ -240,16 +258,25 @@ class FimFictionNetSiteAdapter(BaseSiteAdapter):
|
|||
else:
|
||||
self.story.setMetadata("datePublished", oldestChapter)
|
||||
else:
|
||||
pubDate = self.ordinal_date_string_to_date(pubdatetag('span')[1].text)
|
||||
pubDate = self.date_span_tag_to_date(pubdatetag)
|
||||
self.story.setMetadata("datePublished", pubDate)
|
||||
|
||||
# Characters
|
||||
chars = storyContentBox.find("div", {"class":"extra_story_data"})
|
||||
for character in chars.find_all("a", {"class":"character_icon"}):
|
||||
self.story.addToList("characters", character['title'])
|
||||
tags = storyContentBox.find("ul", {"class":"story-tags"})
|
||||
for character in tags.find_all("a", {"class":"tag-character"}):
|
||||
self.story.addToList("characters", stripHTML(character))
|
||||
for genre in tags.find_all("a", {"class":"tag-genre"}):
|
||||
self.story.addToList("genre", stripHTML(genre))
|
||||
for series in tags.find_all("a", {"class":"tag-series"}):
|
||||
#using 'fandoms' as the identifier to standardize with archiveofourown.org
|
||||
self.story.addToList("fandoms", stripHTML(series))
|
||||
for warning in tags.find_all("a", {"class":"tag-warning"}):
|
||||
self.story.addToList("warnings", stripHTML(warning))
|
||||
for content in tags.find_all("a", {"class":"tag-content"}):
|
||||
self.story.addToList("content", stripHTML(content))
|
||||
|
||||
# Likes and dislikes
|
||||
storyToolbar = soup.find('div', {'class':'story-toolbar'})
|
||||
storyToolbar = soup.find('div', {'class':'story-top-toolbar'})
|
||||
likes = storyToolbar.find('span', {'class':'likes'})
|
||||
if not likes is None:
|
||||
self.story.setMetadata("likes", stripHTML(likes))
|
||||
|
|
@ -259,8 +286,9 @@ class FimFictionNetSiteAdapter(BaseSiteAdapter):
|
|||
|
||||
# Highest view for a chapter and total views
|
||||
viewSpan = storyToolbar.find('span', {'title':re.compile(r'.*\btotal views\b.*')})
|
||||
self.story.setMetadata("views", re.sub(r'[^0-9]', '', stripHTML(viewSpan)))
|
||||
self.story.setMetadata("total_views", re.sub(r'[^0-9]', '', viewSpan['title']))
|
||||
viewResults = re.search(r'([0-9]*) views \/ ([0-9]*)', viewSpan['title'].replace(',',''))
|
||||
self.story.setMetadata("views", viewResults.group(1))
|
||||
self.story.setMetadata("total_views", viewResults.group(2))
|
||||
|
||||
# Comment count
|
||||
commentSpan = storyToolbar.find('span', {'title':re.compile(r'.*\bcomments\b.*')})
|
||||
|
|
@ -270,59 +298,68 @@ class FimFictionNetSiteAdapter(BaseSiteAdapter):
|
|||
descriptionMeta = soup.find('meta', {'property':'og:description'})
|
||||
self.story.setMetadata("short_description", stripHTML(descriptionMeta['content']))
|
||||
|
||||
#groups
|
||||
if soup.find('button', {'id':'button-view-all-groups'}):
|
||||
groupResponse = self._fetchUrl("https://www.fimfiction.net/ajax/stories/%s/groups" % (self.story.getMetadata("storyId")))
|
||||
groupData = json.loads(groupResponse)
|
||||
groupList = self.make_soup(groupData["content"])
|
||||
else:
|
||||
# groups.
|
||||
# If there are more than X groups, there's a 'Show all' button
|
||||
# that calls for a JSON containing HTML with the full list.
|
||||
# But it doesn't work reliably with FlareSolverr.
|
||||
groupList = None
|
||||
groupButton = soup.find('button', {'data-click':'showAll'})
|
||||
if groupButton != None and groupButton.find('i', {'class':'fa-search-plus'}):
|
||||
try:
|
||||
groupResponse = self.get_request("https://www.fimfiction.net/ajax/stories/%s/groups" % (self.story.getMetadata("storyId")))
|
||||
groupData = json.loads(groupResponse)
|
||||
groupList = self.make_soup(groupData["content"])
|
||||
except Exception as e:
|
||||
logger.warning("Collecting 'groups' (AKA 'Featured In') from JSON failed:%s"%e)
|
||||
logger.warning("Only 'groups' initially shown on the page will be collected.")
|
||||
logger.warning("This is a known issue with JSON and FlareSolverr. See #1122")
|
||||
|
||||
if not groupList:
|
||||
groupList = soup.find('ul', {'id':'story-groups-list'})
|
||||
|
||||
if not (groupList == None):
|
||||
for groupName in groupList.find_all('a'):
|
||||
self.story.addToList("groupsUrl", 'http://'+self.host+groupName["href"])
|
||||
self.story.addToList("groups",stripHTML(groupName).replace(',', ';'))
|
||||
if groupList:
|
||||
for groupContent in groupList.find_all('a'):
|
||||
self.story.addToList("groupsUrl", 'https://'+self.host+groupContent["href"])
|
||||
groupName = groupContent.find('span', {"class":"group-name"})
|
||||
if groupName != None:
|
||||
self.story.addToList("groups",stripHTML(groupName).replace(',', ';'))
|
||||
else:
|
||||
self.story.addToList("groups",stripHTML(groupContent).replace(',', ';'))
|
||||
|
||||
#sequels
|
||||
for header in soup.find_all('h1', {'class':'header-stories'}):
|
||||
# I don't know why using text=re.compile with find() wouldn't work, but it didn't.
|
||||
# I don't know why using string=re.compile with find() wouldn't work, but it didn't.
|
||||
if header.text.startswith('Sequels'):
|
||||
sequelContainer = header.parent
|
||||
for sequel in sequelContainer.find_all('a', {'class':'story_link'}):
|
||||
self.story.addToList("sequelsUrl", 'http://'+self.host+sequel["href"])
|
||||
self.story.addToList("sequelsUrl", 'https://'+self.host+sequel["href"])
|
||||
self.story.addToList("sequels", stripHTML(sequel).replace(',', ';'))
|
||||
|
||||
#author last login
|
||||
userPageHeader = soup.find('div', {'class':re.compile(r'\buser-page-header\b')})
|
||||
userPageHeader = soup.find('div', {'class':'user-page-header'})
|
||||
if not userPageHeader == None:
|
||||
infoContainer = userPageHeader.find('div', {'class':re.compile(r'\binfo-container\b')})
|
||||
infoContainer = userPageHeader.find('ul', {'class':'mini-info-box'})
|
||||
listItems = infoContainer.find_all('li')
|
||||
lastLoginString = stripHTML(listItems[1])
|
||||
lastLogin = None
|
||||
if "online" in lastLoginString:
|
||||
lastLogin = date.today()
|
||||
elif "offline" in lastLoginString:
|
||||
#this regex extracts the number of weeks and the number of days from the last login string.
|
||||
#durations under a day are ignored.
|
||||
#group 1 is weeks, group 2 is days
|
||||
durationGroups = re.match(r"(?:[^0-9]*(\d+?)w)?[^0-9]*(?:(\d+?)d)?", lastLoginString)
|
||||
lastLogin = date.today() - timedelta(days=int(durationGroups.group(2) or 0), weeks=int(durationGroups.group(1) or 0))
|
||||
lastLogin = self.date_span_tag_to_date(listItems[1])
|
||||
self.story.setMetadata("authorLastLogin", lastLogin)
|
||||
|
||||
#The link to the prequel is embedded in the description text, so erring
|
||||
#on the side of caution and wrapping this whole thing in a try block.
|
||||
#If anything goes wrong this probably wasn't a valid prequel link.
|
||||
try:
|
||||
description = soup.find('div', {'class':'description'})
|
||||
firstHR = description.find("hr")
|
||||
nextSib = firstHR.nextSibling
|
||||
if "This story is a sequel to" in nextSib.string:
|
||||
link = nextSib.nextSibling
|
||||
if link.name == "a":
|
||||
self.story.setMetadata("prequelUrl", 'http://'+self.host+link["href"])
|
||||
self.story.setMetadata("prequel", stripHTML(link))
|
||||
except:
|
||||
pass
|
||||
def date_span_tag_to_date(self, containingtag):
|
||||
## <span data-time="1435421997" title="Saturday 27th of June 2015 @4:19pm">Jun 27th, 2015</span>
|
||||
## No timezone adjustment is done.
|
||||
span = containingtag.find('span',{'data-time':re.compile(r'^\d+$')})
|
||||
if span != None:
|
||||
return datetime.fromtimestamp(float(span['data-time']))
|
||||
## Sometimes, for reasons that are unclear, data-time is not present. Parse the date out of the title instead.
|
||||
else:
|
||||
span = containingtag.find('span', title=True)
|
||||
dateRegex = re.search('([a-zA-Z ]+)([0-9]+)(st of|th of|nd of|rd of)([a-zA-Z ]+[0-9]+)', span['title'])
|
||||
dateString = dateRegex.group(2) + dateRegex.group(4)
|
||||
return makeDate(dateString, "%d %B %Y")
|
||||
|
||||
def ordinal_date_string_to_date(self, datestring):
|
||||
datestripped=re.sub(r"(\d+)(st|nd|rd|th)", r"\1", datestring.strip())
|
||||
|
|
@ -346,21 +383,58 @@ class FimFictionNetSiteAdapter(BaseSiteAdapter):
|
|||
def getChapterText(self, url):
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
data = self._fetchUrl(url)
|
||||
data = self.get_request(url)
|
||||
|
||||
soup = self.make_soup(data)
|
||||
if not (soup.find('form', {'id' : 'password_form'}) == None):
|
||||
if self.password:
|
||||
params = {}
|
||||
params['password'] = self.password
|
||||
data = self._postUrl(url, params)
|
||||
else:
|
||||
logger.error("Chapter %s needed password but no password was present" % url)
|
||||
|
||||
data = self.do_fix_blockquotes(data)
|
||||
|
||||
soup = self.make_soup(data).find('div', {'class' : 'chapter_content'})
|
||||
if soup == None:
|
||||
raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url)
|
||||
if self.getConfig("include_author_notes",True):
|
||||
soup = self.make_soup(data).find_all('div', {'class':re.compile(r'(.*\bauthors-note\b.*|.*\bchapter-body\b.*)')})
|
||||
if soup == None:
|
||||
raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url)
|
||||
chapter_divs = [unicode(div) for div in soup]
|
||||
soup = self.make_soup(" ".join(chapter_divs))
|
||||
else:
|
||||
soup = self.make_soup(data).find('div', {'id' : 'chapter-body'})
|
||||
if soup == None:
|
||||
raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url)
|
||||
|
||||
return self.utf8FromSoup(url,soup)
|
||||
|
||||
def before_get_urls_from_page(self,url,normalize):
|
||||
## Unlike most that show the links to 'adult' stories, but protect
|
||||
## them, FimF doesn't even show them if not logged in.
|
||||
# data = self.get_request(url)
|
||||
if self.getConfig("is_adult"):
|
||||
self.set_adult_cookie()
|
||||
|
||||
def get_urls_from_page(self,url,normalize):
|
||||
iterate = self.getConfig('scrape_bookshelf', default=False)
|
||||
if not re.search(r'fimfiction\.net/bookshelf/(?P<listid>.+?)/',url) or iterate == 'legacy':
|
||||
return super().get_urls_from_page(url,normalize)
|
||||
|
||||
self.before_get_urls_from_page(url,normalize)
|
||||
|
||||
final_urls = list()
|
||||
while True:
|
||||
data = self.get_request(url,usecache=True)
|
||||
soup = self.make_soup(data)
|
||||
paginator = soup.select_one('div.paginator-container > div.page_list > ul').find_all('li')
|
||||
logger.debug("Paginator: " + str(len(paginator)))
|
||||
stories_container = soup.select_one('div.content > div.two-columns > div.left').find_all('article', recursive=False)
|
||||
x = 0
|
||||
logger.debug("Container "+str(len(stories_container)))
|
||||
for story_raw in stories_container:
|
||||
x += 1
|
||||
story_url = story_raw.select_one('div.story_content_box > header.title > div > a.story_name').get('href')
|
||||
url_story = ('https://' + self.getSiteDomain() + story_url)
|
||||
#logger.debug(url_story)
|
||||
final_urls.append(url_story)
|
||||
logger.debug("Discovered %s new stories."%str(x))
|
||||
|
||||
next_button = paginator[-1].select_one('a')
|
||||
logger.debug("Next button: " + next_button.get_text())
|
||||
if next_button.get_text() or not iterate:
|
||||
return {'urllist': final_urls}
|
||||
url = ('https://' + self.getSiteDomain() + next_button.get('href'))
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2011 Fanficdownloader team, 2015 FanFicFare team
|
||||
# Copyright 2011 Fanficdownloader team, 2018 FanFicFare team
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
|
|
@ -17,14 +17,14 @@
|
|||
####################################################################################################
|
||||
# Adapted by GComyn - December 10, 2016
|
||||
####################################################################################################
|
||||
from __future__ import absolute_import
|
||||
''' This adapter will download the stories from the www.fireflyfans.net forum pages '''
|
||||
import logging
|
||||
import re
|
||||
import sys
|
||||
import time
|
||||
import urllib2
|
||||
# py2 vs py3 transition
|
||||
from ..six import text_type as unicode
|
||||
|
||||
from base_adapter import BaseSiteAdapter, makeDate
|
||||
from .base_adapter import BaseSiteAdapter, makeDate
|
||||
|
||||
from .. import exceptions as exceptions
|
||||
from ..htmlcleanup import stripHTML
|
||||
|
|
@ -43,12 +43,6 @@ class FireFlyFansNetSiteAdapter(BaseSiteAdapter):
|
|||
def __init__(self, config, url):
|
||||
BaseSiteAdapter.__init__(self, config, url)
|
||||
self.story.setMetadata('siteabbrev', 'fffans')
|
||||
self.decode = ["Windows-1252",
|
||||
"utf8",
|
||||
"iso-8859-1"] # 1252 is a superset of iso-8859-1.
|
||||
# Most sites that claim to be
|
||||
# iso-8859-1 (and some that claim to be
|
||||
# utf8) are really windows-1252.
|
||||
self.is_adult = False
|
||||
|
||||
# get storyId from url--url validation guarantees query is only
|
||||
|
|
@ -83,19 +77,12 @@ class FireFlyFansNetSiteAdapter(BaseSiteAdapter):
|
|||
url = self.url
|
||||
logger.debug("URL: " + url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
except urllib2.HTTPError, e:
|
||||
if e.code == 404:
|
||||
raise exceptions.StoryDoesNotExist(self.url)
|
||||
else:
|
||||
raise e
|
||||
data = self.get_request(url)
|
||||
|
||||
if 'Something bad happened, but hell if I know what it is.' in data:
|
||||
raise exceptions.StoryDoesNotExist(
|
||||
'{0} says: GORAMIT!!! SOMETHING WENT WRONG! Something bad happened, but hell if I know what it is.'.format(self.url))
|
||||
|
||||
# use BeautifulSoup HTML parser to make everything easier to find.
|
||||
soup = self.make_soup(data)
|
||||
|
||||
# Title
|
||||
|
|
@ -106,6 +93,9 @@ class FireFlyFansNetSiteAdapter(BaseSiteAdapter):
|
|||
|
||||
a = soup.find('a', href=re.compile(r"profileshow.aspx\?u="))
|
||||
self.story.setMetadata('authorId', a['href'].split('=')[1])
|
||||
if not self.story.getMetadata('authorId'):
|
||||
logger.warning("Site authorUrl missing authorId, using SiteMissingAuthorId")
|
||||
self.story.setMetadata('authorId', 'SiteMissingAuthorId')
|
||||
self.story.setMetadata('authorUrl', 'http://' +
|
||||
self.host + '/' + a['href'])
|
||||
self.story.setMetadata('author', a.string)
|
||||
|
|
@ -114,9 +104,8 @@ class FireFlyFansNetSiteAdapter(BaseSiteAdapter):
|
|||
# way to determine if there are other chapters to the same story, so you have
|
||||
# to download them one at a time yourself. I'm also setting the status to
|
||||
# complete
|
||||
self.chapterUrls.append((self.story.getMetadata('title'), self.url))
|
||||
self.story.setMetadata('numChapters', 1)
|
||||
self.story.setMetadata('status', 'Complete')
|
||||
self.add_chapter(self.story.getMetadata('title'), self.url)
|
||||
self.story.setMetadata('status', 'Completed')
|
||||
|
||||
## some stories do not have a summary listed, so I'm setting it here.
|
||||
summary = soup.find('span', {'id': 'MainContent_txtItemDescription'})
|
||||
|
|
@ -137,7 +126,7 @@ class FireFlyFansNetSiteAdapter(BaseSiteAdapter):
|
|||
# which is usualy FireFly on this site, but I'm going to get them
|
||||
# anyway.a
|
||||
category = soup.find('span', {'id': 'MainContent_txtItemDetails'})
|
||||
category = stripHTML(str(category).replace(b"\xc2\xa0", ' '))
|
||||
category = stripHTML(unicode(category).replace(u"\xa0", u' '))
|
||||
metad = category.split(' ')
|
||||
for meta in metad:
|
||||
if ":" in meta:
|
||||
|
|
|
|||
|
|
@ -1,329 +0,0 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
# Copyright 2016 FanFicFare team
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
####################################################################################################
|
||||
### Adapted by GComyn on December 15, 2016
|
||||
###=================================================================================================
|
||||
### I ran this through a linter, and formatted it as per the suggestions, hence some of the lines
|
||||
### are "chopped"
|
||||
###=================================================================================================
|
||||
### I have started to use lines of # on the line just before a function so they are easier to find.
|
||||
####################################################################################################
|
||||
''' This adapter scrapes the metadata and chapter text from stories on firefly.populli.org '''
|
||||
import logging
|
||||
import re
|
||||
import urllib2
|
||||
import sys
|
||||
|
||||
from base_adapter import BaseSiteAdapter, makeDate
|
||||
|
||||
from .. import exceptions as exceptions
|
||||
from ..htmlcleanup import stripHTML
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
####################################################################################################
|
||||
def getClass():
|
||||
return FireflyPopulliOrgSiteAdapter
|
||||
|
||||
####################################################################################################
|
||||
class FireflyPopulliOrgSiteAdapter(BaseSiteAdapter):
|
||||
|
||||
def __init__(self, config, url):
|
||||
BaseSiteAdapter.__init__(self, config, url)
|
||||
|
||||
# 1252 is a superset of iso-8859-1.
|
||||
# Most sites that claim to be iso-8859-1 (and some that claim to be utf8)
|
||||
# are really windows-1252. I've put the iso-8859-1 in just to cover the bases [GComyn]
|
||||
self.decode = ["Windows-1252", "utf8", "iso-8859-1"]
|
||||
|
||||
self.is_adult = False
|
||||
|
||||
# normalized story URL.
|
||||
|
||||
m = re.match(self.getSiteURLPattern(),url)
|
||||
if m:
|
||||
self.story.setMetadata('storyId',m.group('id'))
|
||||
|
||||
# normalized story URL.
|
||||
self._setURL('http://' + self.getSiteDomain() + '/archive/' +m.group('cat') +
|
||||
'/' + self.story.getMetadata('storyId') +'.shtml')
|
||||
else:
|
||||
raise exceptions.InvalidStoryURL(url,
|
||||
self.getSiteDomain(),
|
||||
self.getSiteExampleURLs())
|
||||
|
||||
## each adapter needs to have a unique abbreviation, whih is set here.
|
||||
self.story.setMetadata('siteabbrev', 'fga')
|
||||
|
||||
# The date format will vary from site to site.
|
||||
# The below website give the list of variables that can be used to formulate the
|
||||
# correct format
|
||||
# http://docs.python.org/library/datetime.html#strftime-strptime-behavior
|
||||
self.dateformat = "%m/%d/%y"
|
||||
|
||||
# This site has the entire story on one page, so I am initializing a variable to hold the
|
||||
# soup so that the getChaperText function doesn't have to use bandwidth to get it again.
|
||||
self.html = ''
|
||||
|
||||
################################################################################################
|
||||
@staticmethod
|
||||
def getSiteDomain():
|
||||
return 'firefly.populli.org'
|
||||
|
||||
################################################################################################
|
||||
@classmethod
|
||||
def getSiteExampleURLs(cls):
|
||||
return "http://" + cls.getSiteDomain() + "/archive/#/[storyId].shtml"
|
||||
|
||||
################################################################################################
|
||||
def getSiteURLPattern(self):
|
||||
return re.escape("http://"+self.getSiteDomain())+r'/archive/(?P<cat>\d+)/(?P<id>\S+)\.shtml'
|
||||
|
||||
################################################################################################
|
||||
def get_page(self, page):
|
||||
'''
|
||||
This will download the url from the web and return the data
|
||||
I'm using it since I call several places below, and this will
|
||||
cut down on the size of the file
|
||||
'''
|
||||
try:
|
||||
page_data = self._fetchUrl(page)
|
||||
except urllib2.HTTPError, e:
|
||||
if e.code == 404:
|
||||
raise exceptions.StoryDoesNotExist('404 error: {}'.format(page))
|
||||
else:
|
||||
raise e
|
||||
return page_data
|
||||
|
||||
################################################################################################
|
||||
def extractChapterUrlsAndMetadata(self):
|
||||
|
||||
url = self.url
|
||||
logger.debug("URL: " + url)
|
||||
|
||||
data = self.get_page(url)
|
||||
|
||||
# Since this is a site with the entire story on one page and there are no updates, I'm going
|
||||
# to set the status to complete.
|
||||
self.story.setMetadata('status', 'Complete')
|
||||
|
||||
# use BeautifulSoup HTML parser to make everything easier to find.
|
||||
soup = self.make_soup(data)
|
||||
|
||||
# Title
|
||||
## Some stories do not have the title in a tag that can be easily gotten.
|
||||
title = soup.find('h2')
|
||||
if not title:
|
||||
raise exceptions.StoryDoesNotExist('Cannot find title on the page {}'.format(url))
|
||||
|
||||
self.story.setMetadata('title', stripHTML(soup.find('h2')))
|
||||
|
||||
# This site has the entire story on one page, so we will be using the normalized URL as
|
||||
# the chapterUrl and the Title as the chapter Title
|
||||
self.chapterUrls.append((self.story.getMetadata('title'), url))
|
||||
|
||||
## i would take this out, as it is not really needed, but the calibre plugin uses it,
|
||||
## so it's staying
|
||||
self.story.setMetadata('numChapters', 1)
|
||||
|
||||
# Find authorid and URL
|
||||
## this site does not have dedicated pages for the authors, you have to use the searh engine.
|
||||
## so that is what I will do. Some of the stories have multiple author names separated by
|
||||
## commas or a colon. I'm going to take the first name as the author name, and use the rest
|
||||
## as a coauthor site specific tag. I did it this way so we keep all of the information,
|
||||
## because the author can be used in the filename, and if it's too long windows systems
|
||||
## won't be able to use it.
|
||||
mdata = stripHTML(soup.find('a', href=re.compile('mailto')))
|
||||
if ':' in mdata:
|
||||
self.story.setMetadata('coauthor', ' '.join(mdata.split(':')[1:]).strip())
|
||||
mdata = mdata.split(':')[0]
|
||||
if ',' in mdata:
|
||||
self.story.setMetadata('coauthor', ', '.join(mdata.split(',')[1:]).strip())
|
||||
mdata = mdata.split(',')[0]
|
||||
|
||||
# print mdata
|
||||
# self.story.getMetadata('coauthor')
|
||||
# sys.exit()
|
||||
self.story.setMetadata('authorId', mdata)
|
||||
self.story.setMetadata('author', mdata.title())
|
||||
|
||||
# Some stories list multiple authors, but the search engine only uses 1 author, and since
|
||||
# we can't tell how many 'words' are in each name, I'm going to do a work around.
|
||||
author_name = mdata.split(' ')[0].strip()
|
||||
author_url = ('http://'+self.getSiteDomain()+'/cgi-bin/search.cgi?Author={}&SortBy=0'+
|
||||
'&SortOrder=0&NumToList=0&FastSearch=0&ShortResults=0').format(author_name)
|
||||
story_found = False
|
||||
while not story_found:
|
||||
logger.debug('Getting author page: %s' % author_url)
|
||||
adata = self.get_page(author_url)
|
||||
if 'No stories found for your search choices.' in adata:
|
||||
author_name = ' '.join(author_name.split()[:-1])
|
||||
author_url = ('http://'+self.getSiteDomain(
|
||||
)+'/cgi-bin/search.cgi?Author={}&SortBy=0'+
|
||||
'&SortOrder=0&NumToList=0&FastSearch=0' +
|
||||
'&ShortResults=0').format(author_name)
|
||||
pass
|
||||
else:
|
||||
asoup = self.make_soup(adata)
|
||||
# Ok...this site does not have the stories encompassed by any sort of tag... so I have
|
||||
# to make it.
|
||||
stories = asoup.find_all('p', {'class':'search'})
|
||||
if stories:
|
||||
for story in stories:
|
||||
# There alot of nbsp's (non broken spaces) in here, so I'm going to remove them
|
||||
# I'm also getting rid of the bold tags and the nextline characters to make it
|
||||
# easier to get the information below
|
||||
story = repr(story).replace(b'\\xa0', '').replace(' ',' ').replace(
|
||||
'<b>','').replace('</b>','').replace(r'\n','')
|
||||
story = self.make_soup(story).find('p')
|
||||
story_a = story.find('a')
|
||||
title = self.story.getMetadata('title').split('-')[0].strip()
|
||||
if story_a.get_text() == title:
|
||||
story_found = True
|
||||
break
|
||||
if not story_found:
|
||||
raise exceptions.StoryDoesNotExist(
|
||||
"Could not find the story {} on the author's {} search page {}".format(
|
||||
url, author_name, author_url))
|
||||
|
||||
self.story.setMetadata('authorUrl', author_url)
|
||||
|
||||
# The first element is the author, which we already have, so I'm going to drop it.
|
||||
# Some prequel and sequel have links, so we are going to process them here, and get the
|
||||
# series at the same time, then catch those that don't have links below
|
||||
links = story.find_all('a')
|
||||
for link in links:
|
||||
label = link.previousSibling.strip()
|
||||
if label == 'Series Title:':
|
||||
## there is no way to tell which number of the series the story is, so we won't
|
||||
# put a number
|
||||
series_url = 'http://'+self.getSiteDomain()+'/'+link['href']
|
||||
self.story.setMetadata('series', link.get_text())
|
||||
self.story.setMetadata('seriesUrl', series_url)
|
||||
elif label == 'Prequel to:':
|
||||
value = link.string + ' (' + 'http://'+self.getSiteDomain()+link['href'] + ')'
|
||||
self.story.setMetadata('prequelto', value)
|
||||
elif label == 'Sequel to:':
|
||||
value = link.string + ' (' + 'http://'+self.getSiteDomain()+link['href'] + ')'
|
||||
self.story.setMetadata('sequelto', value)
|
||||
|
||||
# Some stories have alot of text in the "summary", and I've tried to keep down on creating
|
||||
# new metadata from here, so I'm going to grab some, but the rest will be lumped into the
|
||||
# summary metadata.
|
||||
summary = ''
|
||||
mdatas = story.find_all('br')
|
||||
for mdata in mdatas:
|
||||
meta = mdata.nextSibling.string
|
||||
if meta:
|
||||
# some of the "sentences" have a colon in them, but are not actually labels... so
|
||||
# I'm checking to see if the colon is within the first 20 characters, and taking
|
||||
# that as a label... otherwise, it will be added to the summary section below. I've
|
||||
# decided that the entire section will be put into the summary section, unless it
|
||||
# has specific labels
|
||||
if meta.find(':') > 0 and meta.find(':') < 20:
|
||||
label = meta.split(':', 2)[0].strip().lower()
|
||||
value = meta[len(label)+1:].strip()
|
||||
else:
|
||||
label = meta.string
|
||||
value = ''
|
||||
if (label == 'series title' or label == 'author' or label == '[' or
|
||||
label == 'prequel to'):
|
||||
# we've either already got this or we don't want it so we'll pass
|
||||
## I'm handling it here, to get it out of the way for the rest of the code since
|
||||
# anything not captured is put into the summary
|
||||
pass
|
||||
elif label == 'details':
|
||||
# for the details section, none of this is labeled, and some stories can have
|
||||
# less than others, so I have to check what each is to determine where to put
|
||||
# it.
|
||||
for val in value.split('|'):
|
||||
val = val.strip()
|
||||
if len(val) == 0:
|
||||
# we don't need the ones that don't have anything in it.
|
||||
pass
|
||||
elif val in ['Series', 'Standalone', 'Work-In-Progress']:
|
||||
self.story.setMetadata('storytype', val)
|
||||
elif val in ['G', 'NC-17', 'PG', 'PG-13', 'R']:
|
||||
self.story.setMetadata('rating', val)
|
||||
elif val.split()[0].replace(',','') in ['*slash*', 'gen', 'het']:
|
||||
self.story.setMetadata('genre', val)
|
||||
elif val[-1] == 'k':
|
||||
self.story.setMetadata('size', val)
|
||||
elif len(val) > 0:
|
||||
# There is no update date, so I'm putting the date in both
|
||||
self.story.setMetadata('datePublished',makeDate(val, self.dateformat))
|
||||
self.story.setMetadata('dateUpdated',makeDate(val, self.dateformat))
|
||||
else:
|
||||
## This should catch anything else, and shouldn't ever really be gotten
|
||||
# to, but I'm going to have it print out in the debugger, just in case
|
||||
logger.debug('Metadata not caught: %s' % str(meta))
|
||||
zzzzzzzz = 0
|
||||
elif label == 'characters':
|
||||
self.story.setMetadata('characters', value)
|
||||
elif label == 'pairings':
|
||||
self.story.setMetadata('ships', value)
|
||||
elif label == 'warnings' or label == '[eta] warning':
|
||||
self.story.setMetadata('warnings', value)
|
||||
elif label == 'sequel to':
|
||||
self.story.setMetadata('sequelto', value)
|
||||
elif label == 'disclaimer':
|
||||
self.story.setMetadata('disclaimer', value)
|
||||
elif label == 'spoilers':
|
||||
self.story.setMetadata('spoilers', value)
|
||||
elif label == 'crossover with':
|
||||
self.story.addToList('category', value)
|
||||
elif label == 'summary':
|
||||
summary += value + '<br/>'
|
||||
else:
|
||||
## since this is not really a labled string, I'm adding the original string to
|
||||
# the summary. This may cause some of the sentences from the other site specific
|
||||
# labels to be separated, but this is the only way I can figure out how to do
|
||||
# this, at this time.
|
||||
summary += meta.string + '<br/>'
|
||||
|
||||
self.setDescription(url, summary)
|
||||
|
||||
# since this is the only "chapter" that will be retrieved, I'm going to save the soup here
|
||||
# so the getChapterText function doesn't have to use more bandwidth to get it again
|
||||
self.html = soup
|
||||
|
||||
################################################################################################
|
||||
def getChapterText(self, url):
|
||||
|
||||
logger.debug('Using the html retrieved previously from: %s' % url)
|
||||
|
||||
soup = self.html
|
||||
|
||||
story = soup.find('blockquote')
|
||||
|
||||
if None == story:
|
||||
raise exceptions.FailedToDownload(
|
||||
"Error downloading Chapter: %s! Missing required element!" % url)
|
||||
|
||||
## now that we have the story, there needs to be a little cleanup before we send it to the
|
||||
# writers. Some of them really need editing to be cleaned up
|
||||
## I am converting the text to raw unicode, then removing the <blockquote>, then removing
|
||||
# the end of the section, which has alot of extraneous things, then adding my own div
|
||||
# wrapper, recreating the soup, then getting that div from the soup again, before sending to
|
||||
# the writers.
|
||||
story = repr(story).replace(b'\\xa0', '').replace(' ',' ').replace(r'\n','').strip()
|
||||
story = story[12:]
|
||||
story = story[:story.find('<p align="center" class="comments">Please <')]
|
||||
story = '<div class="chaptertext">' + story + '</div>'
|
||||
story = self.make_soup(story).find('div', {'class':'chaptertext'})
|
||||
|
||||
return self.utf8FromSoup(url, story)
|
||||
|
|
@ -1,6 +1,6 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2015 FanFicFare team
|
||||
# Copyright 2024 FanFicFare team
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
|
|
@ -15,15 +15,18 @@
|
|||
# limitations under the License.
|
||||
#
|
||||
|
||||
from base_xenforoforum_adapter import BaseXenForoForumAdapter
|
||||
from __future__ import absolute_import
|
||||
import re
|
||||
|
||||
from .base_xenforo2forum_adapter import BaseXenForo2ForumAdapter
|
||||
|
||||
def getClass():
|
||||
return QuestionablequestingComAdapter
|
||||
|
||||
class QuestionablequestingComAdapter(BaseXenForoForumAdapter):
|
||||
class QuestionablequestingComAdapter(BaseXenForo2ForumAdapter):
|
||||
|
||||
def __init__(self, config, url):
|
||||
BaseXenForoForumAdapter.__init__(self, config, url)
|
||||
BaseXenForo2ForumAdapter.__init__(self, config, url)
|
||||
|
||||
# Each adapter needs to have a unique site abbreviation.
|
||||
self.story.setMetadata('siteabbrev','qq')
|
||||
|
|
@ -33,3 +36,12 @@ class QuestionablequestingComAdapter(BaseXenForoForumAdapter):
|
|||
# The site domain. Does have www here, if it uses it.
|
||||
return 'forum.questionablequesting.com'
|
||||
|
||||
@classmethod
|
||||
def getAcceptDomains(cls):
|
||||
return [cls.getSiteDomain(),
|
||||
cls.getSiteDomain().replace('forum.','')]
|
||||
|
||||
def getSiteURLPattern(self):
|
||||
## QQ accepts forum.questionablequesting.com and questionablequesting.com
|
||||
## We will use forum. as canonical for all
|
||||
return super(QuestionablequestingComAdapter, self).getSiteURLPattern().replace(re.escape("forum."),r"(forum\.)?")
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2015 FanFicFare team
|
||||
# Copyright 2019 FanFicFare team
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
|
|
@ -15,15 +15,18 @@
|
|||
# limitations under the License.
|
||||
#
|
||||
|
||||
from base_xenforoforum_adapter import BaseXenForoForumAdapter
|
||||
from __future__ import absolute_import
|
||||
import re
|
||||
|
||||
from .base_xenforo2forum_adapter import BaseXenForo2ForumAdapter
|
||||
|
||||
def getClass():
|
||||
return ForumsSpacebattlesComAdapter
|
||||
|
||||
class ForumsSpacebattlesComAdapter(BaseXenForoForumAdapter):
|
||||
class ForumsSpacebattlesComAdapter(BaseXenForo2ForumAdapter):
|
||||
|
||||
def __init__(self, config, url):
|
||||
BaseXenForoForumAdapter.__init__(self, config, url)
|
||||
BaseXenForo2ForumAdapter.__init__(self, config, url)
|
||||
|
||||
# Each adapter needs to have a unique site abbreviation.
|
||||
self.story.setMetadata('siteabbrev','fsb')
|
||||
|
|
@ -33,3 +36,12 @@ class ForumsSpacebattlesComAdapter(BaseXenForoForumAdapter):
|
|||
# The site domain. Does have www here, if it uses it.
|
||||
return 'forums.spacebattles.com'
|
||||
|
||||
@classmethod
|
||||
def getAcceptDomains(cls):
|
||||
return [cls.getSiteDomain(),
|
||||
cls.getSiteDomain().replace('forums.','forum.')]
|
||||
|
||||
def getSiteURLPattern(self):
|
||||
## SB accepts forums.spacebattles.com and forum.spacebattles.com
|
||||
## We will use forums. as canonical for all
|
||||
return super(ForumsSpacebattlesComAdapter, self).getSiteURLPattern().replace(re.escape("forums."),r"forums?\.")
|
||||
|
|
|
|||
Some files were not shown because too many files have changed in this diff Show more
Loading…
Reference in a new issue