mirror of
https://github.com/JimmXinu/FanFicFare.git
synced 2026-05-06 03:20:24 +02:00
Small fixes for Wattpad. (#1137)
* adapter_wattpadcom: Various fixes and changes * adapter_wattpadcom: Config update & category 0 not always present --------- Co-authored-by: dbhmw <github.spherical376@passmail.net>
This commit is contained in:
parent
a9944cd255
commit
816bbdfd66
7 changed files with 391 additions and 32 deletions
|
|
@ -3276,11 +3276,17 @@ readings_label:Readings
|
|||
|
||||
[wattpad.com]
|
||||
use_basic_cache:true
|
||||
#is_adult:true
|
||||
extra_titlepage_entries: language, reads
|
||||
extra_valid_entries: language, tags, reads
|
||||
extra_titlepage_entries: reads
|
||||
extra_valid_entries: reads
|
||||
reads_label:Read Count
|
||||
include_in_genre: tags
|
||||
datechapter_format:%%Y-%%m-%%d %%H:%%M:%%S
|
||||
|
||||
## You can set the 'dateUpdated_method' to either:
|
||||
## - 'modifyDate': This will keep the current behavior, where the
|
||||
## update date corresponds to any modification made to the content.
|
||||
## - 'lastPublishedPart': This will set the update date to
|
||||
## the date of the last published chapter.
|
||||
dateUpdated_method: modifyDate
|
||||
|
||||
# Add comma separators for numeric reads. Eg 10000 becomes 10,000
|
||||
add_to_comma_entries:,reads
|
||||
|
|
|
|||
|
|
@ -30,7 +30,7 @@ logger = logging.getLogger(__name__)
|
|||
class WattpadComAdapter(BaseSiteAdapter):
|
||||
# All the API discovery work done by github user de3sw2aq1
|
||||
# Source: https://github.com/de3sw2aq1/wattpad-ebook-scraper/blob/master/scrape.py
|
||||
API_GETCATEGORIES = 'https://www.wattpad.com/apiv2/getcategories'
|
||||
API_GETCATEGORIES = 'https://www.wattpad.com/api/v3/categories'
|
||||
API_STORYINFO = 'https://www.wattpad.com/api/v3/stories/%s' # stories?id=X is NOT the same
|
||||
API_STORYTEXT = 'https://www.wattpad.com/apiv2/storytext?id=%s'
|
||||
API_CHAPTERINFO = 'https://www.wattpad.com/v4/parts/%s?fields=group(id)&_=%s'
|
||||
|
|
@ -44,15 +44,6 @@ class WattpadComAdapter(BaseSiteAdapter):
|
|||
self._setURL('https://www.wattpad.com/story/%s' % self.storyId)
|
||||
self.chapter_photoUrl = {}
|
||||
|
||||
# categoryDefs do not change all that often, if at all. Could be put in a constant, leaving it as a class var for now
|
||||
# note: classvar may be useless because of del adapter
|
||||
if WattpadComAdapter.CATEGORY_DEFs is None:
|
||||
try:
|
||||
WattpadComAdapter.CATEGORY_DEFs = json.loads(self.get_request(WattpadComAdapter.API_GETCATEGORIES))
|
||||
except:
|
||||
logger.warning('API_GETCATEGORIES failed.')
|
||||
WattpadComAdapter.CATEGORY_DEFs = []
|
||||
|
||||
@staticmethod
|
||||
def getSiteDomain():
|
||||
return 'www.wattpad.com'
|
||||
|
|
@ -99,7 +90,17 @@ class WattpadComAdapter(BaseSiteAdapter):
|
|||
else:
|
||||
return groupid
|
||||
|
||||
def doExtractChapterUrlsAndMetadata(self, get_cover=True):
|
||||
def extractChapterUrlsAndMetadata(self, get_cover=True):
|
||||
# categoryDefs do not change all that often, if at all. Could be put in a constant, leaving it as a class var for now
|
||||
# note: classvar may be useless because of del adapter
|
||||
if WattpadComAdapter.CATEGORY_DEFs is None:
|
||||
try:
|
||||
WattpadComAdapter.CATEGORY_DEFs = json.loads(self.get_request(WattpadComAdapter.API_GETCATEGORIES))
|
||||
except Exception as e:
|
||||
logger.warning('API_GETCATEGORIES failed: %s. Fallback to list from 2024-12'%e)
|
||||
WattpadComAdapter.CATEGORY_DEFs = [{"id":4,"name":"Romance","name_english":"Romance","roles":["onboarding","writing","searching"]},{"id":5,"name":"Science Fiction","name_english":"Science Fiction","roles":["onboarding","writing","searching"]},{"id":3,"name":"Fantasy","name_english":"Fantasy","roles":["onboarding","writing","searching"]},{"id":7,"name":"Humor","name_english":"Humor","roles":["onboarding","writing","searching"]},{"id":12,"name":"Paranormal","name_english":"Paranormal","roles":["onboarding","writing","searching"]},{"id":8,"name":"Mystery Thriller","name_english":"Mystery Thriller","roles":["onboarding","writing","searching"]},{"id":9,"name":"Horror","name_english":"Horror","roles":["onboarding","writing","searching"]},{"id":11,"name":"Adventure","name_english":"Adventure","roles":["onboarding","writing","searching"]},{"id":23,"name":"Historical Fiction","name_english":"Historical Fiction","roles":["onboarding","writing","searching"]},{"id":1,"name":"Teen Fiction","name_english":"Teen Fiction","roles":["onboarding","writing","searching"]},{"id":6,"name":"Fanfiction","name_english":"Fanfiction","roles":["onboarding","writing","searching"]},{"id":2,"name":"Poetry","name_english":"Poetry","roles":["onboarding","writing","searching"]},{"id":17,"name":"Short Story","name_english":"Short Story","roles":["onboarding","writing","searching"]},{"id":21,"name":"General Fiction","name_english":"General Fiction","roles":["onboarding","writing","searching"]},{"id":24,"name":"ChickLit","name_english":"ChickLit","roles":["onboarding","writing","searching"]},{"id":14,"name":"Action","name_english":"Action","roles":["onboarding","writing","searching"]},{"id":18,"name":"Vampire","name_english":"Vampire","roles":["onboarding","writing","searching"]},{"id":22,"name":"Werewolf","name_english":"Werewolf","roles":["onboarding","writing","searching"]},{"id":13,"name":"Spiritual","name_english":"Spiritual","roles":["onboarding","writing","searching"]},{"id":16,"name":"Non-Fiction","name_english":"Non-Fiction","roles":["onboarding","writing","searching"]},{"id":10,"name":"Classics","name_english":"Classics","roles":["onboarding","searching"]},{"id":19,"name":"Random","name_english":"Random","roles":["writing","searching"]}]
|
||||
|
||||
logger.debug("URL: "+self.url)
|
||||
try:
|
||||
storyInfo = json.loads(self.get_request(WattpadComAdapter.API_STORYINFO % self.storyId))
|
||||
# logger.debug('storyInfo: %s' % json.dumps(storyInfo, sort_keys=True,
|
||||
|
|
@ -113,6 +114,13 @@ class WattpadComAdapter(BaseSiteAdapter):
|
|||
if not (self.is_adult or self.getConfig("is_adult")) and storyInfo['mature'] == True:
|
||||
raise exceptions.AdultCheckRequired(self.url)
|
||||
|
||||
# Tags
|
||||
self.story.extendList('genre', storyInfo['tags'])
|
||||
|
||||
# Rating
|
||||
if storyInfo['mature']:
|
||||
self.story.setMetadata('rating', 'Mature')
|
||||
|
||||
# title
|
||||
self.story.setMetadata('title', storyInfo['title'])
|
||||
|
||||
|
|
@ -131,26 +139,43 @@ class WattpadComAdapter(BaseSiteAdapter):
|
|||
self.setDescription(storyInfo['url'], storyInfo['description'])
|
||||
|
||||
# DATES
|
||||
self.story.setMetadata('dateUpdated', makeDate(storyInfo['modifyDate'].rstrip('Z'), "%Y-%m-%dT%H:%M:%S"))
|
||||
if self.story.getConfig('dateUpdated_method') == "lastPublishedPart":
|
||||
self.story.setMetadata('dateUpdated', makeDate(storyInfo['lastPublishedPart']['createDate'], self.getDateFormat()))
|
||||
else:
|
||||
self.story.setMetadata('dateUpdated', makeDate(storyInfo['modifyDate'], self.getDateFormat()))
|
||||
self.story.setMetadata('datePublished', makeDate(storyInfo['createDate'].rstrip('Z'), "%Y-%m-%dT%H:%M:%S"))
|
||||
|
||||
# Chapters
|
||||
for part in storyInfo['parts']:
|
||||
self.add_chapter(part['title'], part['url'])
|
||||
chapterDate = makeDate(part["createDate"], self.getDateFormat())
|
||||
chaptermodifyDate = makeDate(part["modifyDate"], self.getDateFormat())
|
||||
self.add_chapter(part["title"], part["url"], {
|
||||
"date": chapterDate.strftime(self.getConfig("datechapter_format", self.getConfig("datePublished_format", self.getDateFormat()))),
|
||||
"modifyDate": chaptermodifyDate.strftime(self.getConfig("datechapter_format", self.getConfig("datePublished_format", self.getDateFormat())))
|
||||
},
|
||||
)
|
||||
self.chapter_photoUrl[part['url']] = part['photoUrl']
|
||||
self.setCoverImage(storyInfo['url'], storyInfo['cover'].replace('-256-','-512-'))
|
||||
self.story.setMetadata('language', storyInfo['language']['name'])
|
||||
|
||||
# CATEGORIES
|
||||
try:
|
||||
storyCategories = [WattpadComAdapter.CATEGORY_DEFs.get(unicode(c)) for c in storyInfo['categories'] if
|
||||
unicode(c) in WattpadComAdapter.CATEGORY_DEFs]
|
||||
|
||||
self.story.setMetadata('category', storyCategories[0])
|
||||
self.story.setMetadata('tags', storyInfo['tags'])
|
||||
except:
|
||||
pass
|
||||
|
||||
return self.extractChapterUrlsAndMetadata()
|
||||
# The category '0' is almost always present but does not have an entry in the Wattpad API (https://www.wattpad.com/api/v3/categories).
|
||||
logger.debug('Categories: %s'%str(storyInfo['categories']))
|
||||
0 in storyInfo['categories'] and storyInfo['categories'].remove(0)
|
||||
storyCategories = []
|
||||
for category in WattpadComAdapter.CATEGORY_DEFs:
|
||||
if category['id'] in storyInfo['categories']:
|
||||
storyCategories.append(category['name'])
|
||||
storyInfo['categories'].remove(category['id'])
|
||||
if not storyInfo['categories']:
|
||||
break
|
||||
self.story.extendList('category', storyCategories)
|
||||
#try:
|
||||
#storyCategories = [WattpadComAdapter.CATEGORY_DEFs.get(unicode(c)) for c in storyInfo['categories'] if
|
||||
# unicode(c) in WattpadComAdapter.CATEGORY_DEFs]
|
||||
#self.story.setMetadata('category', storyCategories[0])
|
||||
#except Exception as e:
|
||||
#pass
|
||||
|
||||
def getChapterText(self, url):
|
||||
logger.debug('%s' % url)
|
||||
|
|
|
|||
|
|
@ -309,6 +309,7 @@ def get_valid_set_options():
|
|||
'skip_sticky_first_posts':(base_xenforo2_list,None,boollist),
|
||||
'include_dice_rolls':(base_xenforo2_list,None,boollist+['svg']),
|
||||
'include_chapter_banner_images':(['wattpad.com'],None,boollist),
|
||||
'dateUpdated_method':(['wattpad.com'],None,['modifyDate', 'lastPublishedPart']),
|
||||
'fix_excess_space': (['novelonlinefull.com', 'novelall.com'], ['epub', 'html'], boollist),
|
||||
'dedup_order_chapter_list': (['wuxiaworld.xyz', 'novelupdates.cc'], None, boollist),
|
||||
'show_nsfw_cover_images': (['fiction.live'], None, boollist),
|
||||
|
|
@ -574,6 +575,7 @@ def get_valid_keywords():
|
|||
'skip_sticky_first_posts',
|
||||
'include_dice_rolls',
|
||||
'include_chapter_banner_images',
|
||||
'dateUpdated_method',
|
||||
'datethreadmark_format',
|
||||
'fix_pseudo_html',
|
||||
'fix_excess_space',
|
||||
|
|
|
|||
|
|
@ -3271,11 +3271,17 @@ readings_label:Readings
|
|||
|
||||
[wattpad.com]
|
||||
use_basic_cache:true
|
||||
#is_adult:true
|
||||
extra_titlepage_entries: language, reads
|
||||
extra_valid_entries: language, tags, reads
|
||||
extra_titlepage_entries: reads
|
||||
extra_valid_entries: reads
|
||||
reads_label:Read Count
|
||||
include_in_genre: tags
|
||||
datechapter_format:%%Y-%%m-%%d %%H:%%M:%%S
|
||||
|
||||
## You can set the 'dateUpdated_method' to either:
|
||||
## - 'modifyDate': This will keep the current behavior, where the
|
||||
## update date corresponds to any modification made to the content.
|
||||
## - 'lastPublishedPart': This will set the update date to
|
||||
## the date of the last published chapter.
|
||||
dateUpdated_method: modifyDate
|
||||
|
||||
# Add comma separators for numeric reads. Eg 10000 becomes 10,000
|
||||
add_to_comma_entries:,reads
|
||||
|
|
|
|||
176
tests/adapters/test_adapter_wattpadcom.py
Normal file
176
tests/adapters/test_adapter_wattpadcom.py
Normal file
|
|
@ -0,0 +1,176 @@
|
|||
import pytest
|
||||
from unittest.mock import patch
|
||||
from fanficfare.exceptions import HTTPErrorFFF
|
||||
|
||||
from fanficfare.adapters.adapter_wattpadcom import WattpadComAdapter as wattpadcom
|
||||
from tests.adapters.generic_adapter_test import GenericAdapterTestExtractChapterUrlsAndMetadata, GenericAdapterTestGetChapterText
|
||||
from tests.conftest import wattpadcom_api_story_return, wattpadcom_api_chapter_return, wattpadcom_api_getcategories_return
|
||||
|
||||
SPECIFIC_TEST_DATA = {
|
||||
'adapter': wattpadcom,
|
||||
'url': 'https://www.wattpad.com/story/173080052-the-kids-aren%27t-alright',
|
||||
'sections': ["wattpad.com"],
|
||||
'specific_path_adapter': 'adapter_wattpadcom.WattpadComAdapter',
|
||||
|
||||
'title': 'The Kids Aren\'t Alright',
|
||||
'cover_image': 'https://img.wattpad.com/cover/173080052-512-k768737.jpg',
|
||||
'author': 'bee_mcd',
|
||||
'authorId': 'bee_mcd',
|
||||
'datePublished': '2019-01-02',
|
||||
'dateUpdated': '2024-01-22',
|
||||
'intro': "The year is 1988, and Finn, Ronan, Becca and Jasper are spending the summer at a reformatory camp located deep in the Alaskan wilderness. The camp, named Lightlake, is the last chance the teens have to get their lives back on track, but changing for the better isn't easy - and especially not at a place like Lightlake, where secrets outnumber the campers and myths have a way of coming to life.\n\nThis story is now free on Wattpad. \n\n[[word count: 200,000-250,000 words]]",
|
||||
'expected_chapters': {
|
||||
0: {'title': 'Chapter 1: Finn',
|
||||
'url': 'https://www.wattpad.com/675342676-the-kids-aren%27t-alright-chapter-1-finn',
|
||||
'date': '2019-01-02 03:02:00'},
|
||||
10: {'title': 'Chapter 11: Jasper',
|
||||
'url': 'https://www.wattpad.com/675347689-the-kids-aren%27t-alright-chapter-11-jasper'},
|
||||
76: {'title': 'Sneak Peak of Book #2, "Kids These Days"',
|
||||
'url': 'https://www.wattpad.com/807690860-the-kids-aren%27t-alright-sneak-peak-of-book-2-kids'},
|
||||
},
|
||||
'list_chapters_fixture': wattpadcom_api_story_return,
|
||||
|
||||
'chapter_url': 'https://www.wattpad.com/675344459-the-kids-aren%27t-alright-chapter-3-ronan',
|
||||
'expected_sentences': [
|
||||
"We end up stopping at a newspaper stand a few blocks away.",
|
||||
"\"We can go somewhere else if it bothers you so much. I'll call a cab.\"",
|
||||
"\"I'll see you tomorrow,\" I say to him as he climbs the stairs to the front door. \"We can catch a Mets game—\""
|
||||
],
|
||||
'chapter_fixture': wattpadcom_api_chapter_return,
|
||||
|
||||
'status': 'Completed',
|
||||
'category': 'Teen Fiction',
|
||||
'genre': '80s, adventure, alaska, camps, comedy, drama, foundfamily, friends, humor, lake, lgbt, magic, mystery, myth, novel, psychic, retro, summer, summercamp, teen, teenfiction, texttospeech, wilderness, youngadult, yukon',
|
||||
'language': 'English',
|
||||
'rating': '',
|
||||
'reads': '1206132',
|
||||
}
|
||||
|
||||
class TestExtractChapterUrlsAndMetadata(GenericAdapterTestExtractChapterUrlsAndMetadata):
|
||||
def setup_method(self):
|
||||
self.expected_data = SPECIFIC_TEST_DATA
|
||||
|
||||
super().setup_method(
|
||||
SPECIFIC_TEST_DATA['adapter'],
|
||||
SPECIFIC_TEST_DATA['url'],
|
||||
SPECIFIC_TEST_DATA['sections'],
|
||||
SPECIFIC_TEST_DATA['specific_path_adapter'],
|
||||
SPECIFIC_TEST_DATA['list_chapters_fixture'])
|
||||
|
||||
self.configuration.validEntries.extend(['reads'])
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def setup_env(self):
|
||||
with patch(f'fanficfare.adapters.{self.path_adapter}.setDescription') as mock_setDescription, \
|
||||
patch(f'fanficfare.adapters.{self.path_adapter}.setCoverImage') as mock_setCoverImage, \
|
||||
patch(f'fanficfare.adapters.{self.path_adapter}.get_request') as mockget_request:
|
||||
|
||||
self.mock_setCoverImage = mock_setCoverImage
|
||||
self.mock_setDescription = mock_setDescription
|
||||
self.mockget_request = mockget_request
|
||||
|
||||
if wattpadcom.CATEGORY_DEFs == None:
|
||||
self.mockget_request.side_effect = [wattpadcom_api_getcategories_return, self.fixture]
|
||||
else:
|
||||
self.mockget_request.return_value = self.fixture
|
||||
|
||||
yield
|
||||
|
||||
def test_get_cover_image(self):
|
||||
# When
|
||||
self.adapter.extractChapterUrlsAndMetadata()
|
||||
|
||||
# Then
|
||||
self.mock_setCoverImage.assert_called_with(self.url, self.expected_data['cover_image'])
|
||||
|
||||
def test_get_published_date(self):
|
||||
# When
|
||||
self.adapter.extractChapterUrlsAndMetadata()
|
||||
|
||||
# Then
|
||||
assert self.adapter.story.getMetadata('datePublished') == self.expected_data['datePublished']
|
||||
|
||||
def test_get_status(self):
|
||||
# When
|
||||
self.adapter.extractChapterUrlsAndMetadata()
|
||||
|
||||
# Then
|
||||
assert self.adapter.story.getMetadata('status') == self.expected_data['status']
|
||||
|
||||
def test_get_genre(self):
|
||||
# When
|
||||
self.adapter.extractChapterUrlsAndMetadata()
|
||||
|
||||
# Then
|
||||
assert self.adapter.story.getMetadata('genre') == self.expected_data['genre']
|
||||
|
||||
def test_get_warnings(self):
|
||||
# When
|
||||
self.adapter.extractChapterUrlsAndMetadata()
|
||||
|
||||
# Then
|
||||
assert self.adapter.story.getMetadata('reads') == self.expected_data['reads']
|
||||
|
||||
def test_get_language(self):
|
||||
# When
|
||||
self.adapter.extractChapterUrlsAndMetadata()
|
||||
|
||||
# Then
|
||||
assert self.adapter.story.getMetadata('language') == self.expected_data['language']
|
||||
|
||||
def test_get_agerating(self):
|
||||
# When
|
||||
self.adapter.extractChapterUrlsAndMetadata()
|
||||
|
||||
# Then
|
||||
assert self.adapter.story.getMetadata('rating') == self.expected_data['rating']
|
||||
|
||||
def test_get_agerating(self):
|
||||
# When
|
||||
self.adapter.extractChapterUrlsAndMetadata()
|
||||
|
||||
# Then
|
||||
assert self.adapter.story.getMetadata('category') == self.expected_data['category']
|
||||
|
||||
@patch('fanficfare.adapters.adapter_wattpadcom.WattpadComAdapter.get_request')
|
||||
def test_get_category_when_req_fails(self, mockget_request):
|
||||
# Given
|
||||
mockget_request.side_effect = [HTTPErrorFFF(self.expected_data['url'], 403, 'Client Error'), wattpadcom_api_story_return]
|
||||
wattpadcom.CATEGORY_DEFs = None
|
||||
|
||||
# When
|
||||
self.adapter.extractChapterUrlsAndMetadata()
|
||||
|
||||
# Then
|
||||
assert self.adapter.story.getMetadata('category') == self.expected_data['category']
|
||||
|
||||
|
||||
class TestGetChapterText(GenericAdapterTestGetChapterText):
|
||||
def setup_method(self):
|
||||
self.expected_data = SPECIFIC_TEST_DATA
|
||||
|
||||
super().setup_method(
|
||||
SPECIFIC_TEST_DATA['adapter'],
|
||||
SPECIFIC_TEST_DATA['url'],
|
||||
SPECIFIC_TEST_DATA['sections'],
|
||||
SPECIFIC_TEST_DATA['specific_path_adapter'],
|
||||
SPECIFIC_TEST_DATA['chapter_fixture'])
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def setup_env(self):
|
||||
with patch(f'fanficfare.adapters.{self.path_adapter}.setDescription') as mock_setDescription, \
|
||||
patch(f'fanficfare.adapters.{self.path_adapter}.setCoverImage') as mock_setCoverImage, \
|
||||
patch(f'fanficfare.adapters.{self.path_adapter}.get_request') as mockget_request:
|
||||
|
||||
mockget_request.side_effect = [wattpadcom_api_story_return, self.fixture]
|
||||
|
||||
yield
|
||||
|
||||
def test_get_metadata(self):
|
||||
# When
|
||||
self.adapter.extractChapterUrlsAndMetadata()
|
||||
response = self.adapter.getChapterText(self.expected_data['chapter_url'])
|
||||
|
||||
# Then
|
||||
for p in self.expected_data['expected_sentences']:
|
||||
assert p in response
|
||||
|
|
@ -1,2 +1,3 @@
|
|||
from tests.fixtures_chireads import *
|
||||
from tests.fixtures_fanfictionsfr import *
|
||||
from tests.fixtures_fanfictionsfr import *
|
||||
from tests.fixtures_wattpadcom import *
|
||||
|
|
|
|||
143
tests/fixtures_wattpadcom.py
Normal file
143
tests/fixtures_wattpadcom.py
Normal file
File diff suppressed because one or more lines are too long
Loading…
Reference in a new issue