Small fixes for Wattpad. (#1137)

* adapter_wattpadcom: Various fixes and changes

* adapter_wattpadcom: Config update & category 0 not always present

---------

Co-authored-by: dbhmw <github.spherical376@passmail.net>
This commit is contained in:
dbhmw 2024-12-31 02:10:56 +00:00 committed by GitHub
parent a9944cd255
commit 816bbdfd66
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
7 changed files with 391 additions and 32 deletions

View file

@ -3276,11 +3276,17 @@ readings_label:Readings
[wattpad.com]
use_basic_cache:true
#is_adult:true
extra_titlepage_entries: language, reads
extra_valid_entries: language, tags, reads
extra_titlepage_entries: reads
extra_valid_entries: reads
reads_label:Read Count
include_in_genre: tags
datechapter_format:%%Y-%%m-%%d %%H:%%M:%%S
## You can set the 'dateUpdated_method' to either:
## - 'modifyDate': This will keep the current behavior, where the
## update date corresponds to any modification made to the content.
## - 'lastPublishedPart': This will set the update date to
## the date of the last published chapter.
dateUpdated_method: modifyDate
# Add comma separators for numeric reads. Eg 10000 becomes 10,000
add_to_comma_entries:,reads

View file

@ -30,7 +30,7 @@ logger = logging.getLogger(__name__)
class WattpadComAdapter(BaseSiteAdapter):
# All the API discovery work done by github user de3sw2aq1
# Source: https://github.com/de3sw2aq1/wattpad-ebook-scraper/blob/master/scrape.py
API_GETCATEGORIES = 'https://www.wattpad.com/apiv2/getcategories'
API_GETCATEGORIES = 'https://www.wattpad.com/api/v3/categories'
API_STORYINFO = 'https://www.wattpad.com/api/v3/stories/%s' # stories?id=X is NOT the same
API_STORYTEXT = 'https://www.wattpad.com/apiv2/storytext?id=%s'
API_CHAPTERINFO = 'https://www.wattpad.com/v4/parts/%s?fields=group(id)&_=%s'
@ -44,15 +44,6 @@ class WattpadComAdapter(BaseSiteAdapter):
self._setURL('https://www.wattpad.com/story/%s' % self.storyId)
self.chapter_photoUrl = {}
# categoryDefs do not change all that often, if at all. Could be put in a constant, leaving it as a class var for now
# note: classvar may be useless because of del adapter
if WattpadComAdapter.CATEGORY_DEFs is None:
try:
WattpadComAdapter.CATEGORY_DEFs = json.loads(self.get_request(WattpadComAdapter.API_GETCATEGORIES))
except:
logger.warning('API_GETCATEGORIES failed.')
WattpadComAdapter.CATEGORY_DEFs = []
@staticmethod
def getSiteDomain():
return 'www.wattpad.com'
@ -99,7 +90,17 @@ class WattpadComAdapter(BaseSiteAdapter):
else:
return groupid
def doExtractChapterUrlsAndMetadata(self, get_cover=True):
def extractChapterUrlsAndMetadata(self, get_cover=True):
# categoryDefs do not change all that often, if at all. Could be put in a constant, leaving it as a class var for now
# note: classvar may be useless because of del adapter
if WattpadComAdapter.CATEGORY_DEFs is None:
try:
WattpadComAdapter.CATEGORY_DEFs = json.loads(self.get_request(WattpadComAdapter.API_GETCATEGORIES))
except Exception as e:
logger.warning('API_GETCATEGORIES failed: %s. Fallback to list from 2024-12'%e)
WattpadComAdapter.CATEGORY_DEFs = [{"id":4,"name":"Romance","name_english":"Romance","roles":["onboarding","writing","searching"]},{"id":5,"name":"Science Fiction","name_english":"Science Fiction","roles":["onboarding","writing","searching"]},{"id":3,"name":"Fantasy","name_english":"Fantasy","roles":["onboarding","writing","searching"]},{"id":7,"name":"Humor","name_english":"Humor","roles":["onboarding","writing","searching"]},{"id":12,"name":"Paranormal","name_english":"Paranormal","roles":["onboarding","writing","searching"]},{"id":8,"name":"Mystery Thriller","name_english":"Mystery Thriller","roles":["onboarding","writing","searching"]},{"id":9,"name":"Horror","name_english":"Horror","roles":["onboarding","writing","searching"]},{"id":11,"name":"Adventure","name_english":"Adventure","roles":["onboarding","writing","searching"]},{"id":23,"name":"Historical Fiction","name_english":"Historical Fiction","roles":["onboarding","writing","searching"]},{"id":1,"name":"Teen Fiction","name_english":"Teen Fiction","roles":["onboarding","writing","searching"]},{"id":6,"name":"Fanfiction","name_english":"Fanfiction","roles":["onboarding","writing","searching"]},{"id":2,"name":"Poetry","name_english":"Poetry","roles":["onboarding","writing","searching"]},{"id":17,"name":"Short Story","name_english":"Short Story","roles":["onboarding","writing","searching"]},{"id":21,"name":"General Fiction","name_english":"General Fiction","roles":["onboarding","writing","searching"]},{"id":24,"name":"ChickLit","name_english":"ChickLit","roles":["onboarding","writing","searching"]},{"id":14,"name":"Action","name_english":"Action","roles":["onboarding","writing","searching"]},{"id":18,"name":"Vampire","name_english":"Vampire","roles":["onboarding","writing","searching"]},{"id":22,"name":"Werewolf","name_english":"Werewolf","roles":["onboarding","writing","searching"]},{"id":13,"name":"Spiritual","name_english":"Spiritual","roles":["onboarding","writing","searching"]},{"id":16,"name":"Non-Fiction","name_english":"Non-Fiction","roles":["onboarding","writing","searching"]},{"id":10,"name":"Classics","name_english":"Classics","roles":["onboarding","searching"]},{"id":19,"name":"Random","name_english":"Random","roles":["writing","searching"]}]
logger.debug("URL: "+self.url)
try:
storyInfo = json.loads(self.get_request(WattpadComAdapter.API_STORYINFO % self.storyId))
# logger.debug('storyInfo: %s' % json.dumps(storyInfo, sort_keys=True,
@ -113,6 +114,13 @@ class WattpadComAdapter(BaseSiteAdapter):
if not (self.is_adult or self.getConfig("is_adult")) and storyInfo['mature'] == True:
raise exceptions.AdultCheckRequired(self.url)
# Tags
self.story.extendList('genre', storyInfo['tags'])
# Rating
if storyInfo['mature']:
self.story.setMetadata('rating', 'Mature')
# title
self.story.setMetadata('title', storyInfo['title'])
@ -131,26 +139,43 @@ class WattpadComAdapter(BaseSiteAdapter):
self.setDescription(storyInfo['url'], storyInfo['description'])
# DATES
self.story.setMetadata('dateUpdated', makeDate(storyInfo['modifyDate'].rstrip('Z'), "%Y-%m-%dT%H:%M:%S"))
if self.story.getConfig('dateUpdated_method') == "lastPublishedPart":
self.story.setMetadata('dateUpdated', makeDate(storyInfo['lastPublishedPart']['createDate'], self.getDateFormat()))
else:
self.story.setMetadata('dateUpdated', makeDate(storyInfo['modifyDate'], self.getDateFormat()))
self.story.setMetadata('datePublished', makeDate(storyInfo['createDate'].rstrip('Z'), "%Y-%m-%dT%H:%M:%S"))
# Chapters
for part in storyInfo['parts']:
self.add_chapter(part['title'], part['url'])
chapterDate = makeDate(part["createDate"], self.getDateFormat())
chaptermodifyDate = makeDate(part["modifyDate"], self.getDateFormat())
self.add_chapter(part["title"], part["url"], {
"date": chapterDate.strftime(self.getConfig("datechapter_format", self.getConfig("datePublished_format", self.getDateFormat()))),
"modifyDate": chaptermodifyDate.strftime(self.getConfig("datechapter_format", self.getConfig("datePublished_format", self.getDateFormat())))
},
)
self.chapter_photoUrl[part['url']] = part['photoUrl']
self.setCoverImage(storyInfo['url'], storyInfo['cover'].replace('-256-','-512-'))
self.story.setMetadata('language', storyInfo['language']['name'])
# CATEGORIES
try:
storyCategories = [WattpadComAdapter.CATEGORY_DEFs.get(unicode(c)) for c in storyInfo['categories'] if
unicode(c) in WattpadComAdapter.CATEGORY_DEFs]
self.story.setMetadata('category', storyCategories[0])
self.story.setMetadata('tags', storyInfo['tags'])
except:
pass
return self.extractChapterUrlsAndMetadata()
# The category '0' is almost always present but does not have an entry in the Wattpad API (https://www.wattpad.com/api/v3/categories).
logger.debug('Categories: %s'%str(storyInfo['categories']))
0 in storyInfo['categories'] and storyInfo['categories'].remove(0)
storyCategories = []
for category in WattpadComAdapter.CATEGORY_DEFs:
if category['id'] in storyInfo['categories']:
storyCategories.append(category['name'])
storyInfo['categories'].remove(category['id'])
if not storyInfo['categories']:
break
self.story.extendList('category', storyCategories)
#try:
#storyCategories = [WattpadComAdapter.CATEGORY_DEFs.get(unicode(c)) for c in storyInfo['categories'] if
# unicode(c) in WattpadComAdapter.CATEGORY_DEFs]
#self.story.setMetadata('category', storyCategories[0])
#except Exception as e:
#pass
def getChapterText(self, url):
logger.debug('%s' % url)

View file

@ -309,6 +309,7 @@ def get_valid_set_options():
'skip_sticky_first_posts':(base_xenforo2_list,None,boollist),
'include_dice_rolls':(base_xenforo2_list,None,boollist+['svg']),
'include_chapter_banner_images':(['wattpad.com'],None,boollist),
'dateUpdated_method':(['wattpad.com'],None,['modifyDate', 'lastPublishedPart']),
'fix_excess_space': (['novelonlinefull.com', 'novelall.com'], ['epub', 'html'], boollist),
'dedup_order_chapter_list': (['wuxiaworld.xyz', 'novelupdates.cc'], None, boollist),
'show_nsfw_cover_images': (['fiction.live'], None, boollist),
@ -574,6 +575,7 @@ def get_valid_keywords():
'skip_sticky_first_posts',
'include_dice_rolls',
'include_chapter_banner_images',
'dateUpdated_method',
'datethreadmark_format',
'fix_pseudo_html',
'fix_excess_space',

View file

@ -3271,11 +3271,17 @@ readings_label:Readings
[wattpad.com]
use_basic_cache:true
#is_adult:true
extra_titlepage_entries: language, reads
extra_valid_entries: language, tags, reads
extra_titlepage_entries: reads
extra_valid_entries: reads
reads_label:Read Count
include_in_genre: tags
datechapter_format:%%Y-%%m-%%d %%H:%%M:%%S
## You can set the 'dateUpdated_method' to either:
## - 'modifyDate': This will keep the current behavior, where the
## update date corresponds to any modification made to the content.
## - 'lastPublishedPart': This will set the update date to
## the date of the last published chapter.
dateUpdated_method: modifyDate
# Add comma separators for numeric reads. Eg 10000 becomes 10,000
add_to_comma_entries:,reads

View file

@ -0,0 +1,176 @@
import pytest
from unittest.mock import patch
from fanficfare.exceptions import HTTPErrorFFF
from fanficfare.adapters.adapter_wattpadcom import WattpadComAdapter as wattpadcom
from tests.adapters.generic_adapter_test import GenericAdapterTestExtractChapterUrlsAndMetadata, GenericAdapterTestGetChapterText
from tests.conftest import wattpadcom_api_story_return, wattpadcom_api_chapter_return, wattpadcom_api_getcategories_return
SPECIFIC_TEST_DATA = {
'adapter': wattpadcom,
'url': 'https://www.wattpad.com/story/173080052-the-kids-aren%27t-alright',
'sections': ["wattpad.com"],
'specific_path_adapter': 'adapter_wattpadcom.WattpadComAdapter',
'title': 'The Kids Aren\'t Alright',
'cover_image': 'https://img.wattpad.com/cover/173080052-512-k768737.jpg',
'author': 'bee_mcd',
'authorId': 'bee_mcd',
'datePublished': '2019-01-02',
'dateUpdated': '2024-01-22',
'intro': "The year is 1988, and Finn, Ronan, Becca and Jasper are spending the summer at a reformatory camp located deep in the Alaskan wilderness. The camp, named Lightlake, is the last chance the teens have to get their lives back on track, but changing for the better isn't easy - and especially not at a place like Lightlake, where secrets outnumber the campers and myths have a way of coming to life.\n\nThis story is now free on Wattpad. \n\n[[word count: 200,000-250,000 words]]",
'expected_chapters': {
0: {'title': 'Chapter 1: Finn',
'url': 'https://www.wattpad.com/675342676-the-kids-aren%27t-alright-chapter-1-finn',
'date': '2019-01-02 03:02:00'},
10: {'title': 'Chapter 11: Jasper',
'url': 'https://www.wattpad.com/675347689-the-kids-aren%27t-alright-chapter-11-jasper'},
76: {'title': 'Sneak Peak of Book #2, "Kids These Days"',
'url': 'https://www.wattpad.com/807690860-the-kids-aren%27t-alright-sneak-peak-of-book-2-kids'},
},
'list_chapters_fixture': wattpadcom_api_story_return,
'chapter_url': 'https://www.wattpad.com/675344459-the-kids-aren%27t-alright-chapter-3-ronan',
'expected_sentences': [
"We end up stopping at a newspaper stand a few blocks away.",
"\"We can go somewhere else if it bothers you so much. I'll call a cab.\"",
"\"I'll see you tomorrow,\" I say to him as he climbs the stairs to the front door. \"We can catch a Mets game—\""
],
'chapter_fixture': wattpadcom_api_chapter_return,
'status': 'Completed',
'category': 'Teen Fiction',
'genre': '80s, adventure, alaska, camps, comedy, drama, foundfamily, friends, humor, lake, lgbt, magic, mystery, myth, novel, psychic, retro, summer, summercamp, teen, teenfiction, texttospeech, wilderness, youngadult, yukon',
'language': 'English',
'rating': '',
'reads': '1206132',
}
class TestExtractChapterUrlsAndMetadata(GenericAdapterTestExtractChapterUrlsAndMetadata):
def setup_method(self):
self.expected_data = SPECIFIC_TEST_DATA
super().setup_method(
SPECIFIC_TEST_DATA['adapter'],
SPECIFIC_TEST_DATA['url'],
SPECIFIC_TEST_DATA['sections'],
SPECIFIC_TEST_DATA['specific_path_adapter'],
SPECIFIC_TEST_DATA['list_chapters_fixture'])
self.configuration.validEntries.extend(['reads'])
@pytest.fixture(autouse=True)
def setup_env(self):
with patch(f'fanficfare.adapters.{self.path_adapter}.setDescription') as mock_setDescription, \
patch(f'fanficfare.adapters.{self.path_adapter}.setCoverImage') as mock_setCoverImage, \
patch(f'fanficfare.adapters.{self.path_adapter}.get_request') as mockget_request:
self.mock_setCoverImage = mock_setCoverImage
self.mock_setDescription = mock_setDescription
self.mockget_request = mockget_request
if wattpadcom.CATEGORY_DEFs == None:
self.mockget_request.side_effect = [wattpadcom_api_getcategories_return, self.fixture]
else:
self.mockget_request.return_value = self.fixture
yield
def test_get_cover_image(self):
# When
self.adapter.extractChapterUrlsAndMetadata()
# Then
self.mock_setCoverImage.assert_called_with(self.url, self.expected_data['cover_image'])
def test_get_published_date(self):
# When
self.adapter.extractChapterUrlsAndMetadata()
# Then
assert self.adapter.story.getMetadata('datePublished') == self.expected_data['datePublished']
def test_get_status(self):
# When
self.adapter.extractChapterUrlsAndMetadata()
# Then
assert self.adapter.story.getMetadata('status') == self.expected_data['status']
def test_get_genre(self):
# When
self.adapter.extractChapterUrlsAndMetadata()
# Then
assert self.adapter.story.getMetadata('genre') == self.expected_data['genre']
def test_get_warnings(self):
# When
self.adapter.extractChapterUrlsAndMetadata()
# Then
assert self.adapter.story.getMetadata('reads') == self.expected_data['reads']
def test_get_language(self):
# When
self.adapter.extractChapterUrlsAndMetadata()
# Then
assert self.adapter.story.getMetadata('language') == self.expected_data['language']
def test_get_agerating(self):
# When
self.adapter.extractChapterUrlsAndMetadata()
# Then
assert self.adapter.story.getMetadata('rating') == self.expected_data['rating']
def test_get_agerating(self):
# When
self.adapter.extractChapterUrlsAndMetadata()
# Then
assert self.adapter.story.getMetadata('category') == self.expected_data['category']
@patch('fanficfare.adapters.adapter_wattpadcom.WattpadComAdapter.get_request')
def test_get_category_when_req_fails(self, mockget_request):
# Given
mockget_request.side_effect = [HTTPErrorFFF(self.expected_data['url'], 403, 'Client Error'), wattpadcom_api_story_return]
wattpadcom.CATEGORY_DEFs = None
# When
self.adapter.extractChapterUrlsAndMetadata()
# Then
assert self.adapter.story.getMetadata('category') == self.expected_data['category']
class TestGetChapterText(GenericAdapterTestGetChapterText):
def setup_method(self):
self.expected_data = SPECIFIC_TEST_DATA
super().setup_method(
SPECIFIC_TEST_DATA['adapter'],
SPECIFIC_TEST_DATA['url'],
SPECIFIC_TEST_DATA['sections'],
SPECIFIC_TEST_DATA['specific_path_adapter'],
SPECIFIC_TEST_DATA['chapter_fixture'])
@pytest.fixture(autouse=True)
def setup_env(self):
with patch(f'fanficfare.adapters.{self.path_adapter}.setDescription') as mock_setDescription, \
patch(f'fanficfare.adapters.{self.path_adapter}.setCoverImage') as mock_setCoverImage, \
patch(f'fanficfare.adapters.{self.path_adapter}.get_request') as mockget_request:
mockget_request.side_effect = [wattpadcom_api_story_return, self.fixture]
yield
def test_get_metadata(self):
# When
self.adapter.extractChapterUrlsAndMetadata()
response = self.adapter.getChapterText(self.expected_data['chapter_url'])
# Then
for p in self.expected_data['expected_sentences']:
assert p in response

View file

@ -1,2 +1,3 @@
from tests.fixtures_chireads import *
from tests.fixtures_fanfictionsfr import *
from tests.fixtures_fanfictionsfr import *
from tests.fixtures_wattpadcom import *

File diff suppressed because one or more lines are too long