Fixes for site changes in adapter_webnovelcom. #731

This commit is contained in:
Jim Miller 2021-09-03 11:26:40 -05:00
parent 19571e3b2b
commit b24db52b3d
2 changed files with 73 additions and 51 deletions

View file

@ -150,8 +150,9 @@ class WWWWebNovelComAdapter(BaseSiteAdapter):
## get chapters from a json API url.
jsondata = json.loads(self.get_request(
"https://" + self.getSiteDomain() + "/apiajax/chapter/GetChapterList?_csrfToken=" + csrf_token + "&bookId=" + self.story.getMetadata(
'storyId')))
"https://" + self.getSiteDomain() + "/go/pcm/chapter/get-chapter-list?_csrfToken=" + csrf_token + "&bookId=" + self.story.getMetadata(
'storyId') + "&pageIndex=0"))
# logger.debug(json.dumps(jsondata, sort_keys=True,
# indent=2, separators=(',', ':')))
for volume in jsondata["data"]["volumeItems"]:
@ -161,9 +162,18 @@ class WWWWebNovelComAdapter(BaseSiteAdapter):
# seems to have changed
# --JM
continue
chap_title = 'Chapter ' + unicode(chap['index']) + ' - ' + chap['name']
chap_Url = url.rstrip('/') + '/' + chap['id']
self.add_chapter(chap_title, chap_Url)
chap_title = 'Chapter ' + unicode(self.num_chapters()+1) + ' - ' + chap['chapterName']
chap_Url = url.rstrip('/') + '/' + chap['chapterId']
self.add_chapter(chap_title, chap_Url,
{'volumeName':volume['volumeName'],
'volumeId':volume['volumeId'],
## dates are months or years ago for
## so many chapters I judge this
## worthless.
# 'publishTimeFormat':chap['publishTimeFormat'],
# 'date':parse_relative_date_string(chap['publishTimeFormat']).strftime(self.getConfig("datethreadmark_format",
# self.getConfig("dateCreated_format","%Y-%m-%d %H:%M:%S"))),
})
if get_cover:
@ -178,7 +188,7 @@ class WWWWebNovelComAdapter(BaseSiteAdapter):
if rating:
self.story.setMetadata('rating',rating.string)
last_updated_string = jsondata['data']['bookInfo']['newChapterTime']
last_updated_string = jsondata['data']['lastChapterItem']['publishTimeFormat']
last_updated = parse_relative_date_string(last_updated_string)
# Published date is always unknown, so simply don't set it

View file

@ -18,6 +18,7 @@
from __future__ import absolute_import
from datetime import datetime, timedelta
import re
# py2 vs py3 transition
from .six import text_type as unicode
@ -31,56 +32,67 @@ UNIX_EPOCHE = datetime.fromtimestamp(86400)
## Currently used by adapter_webnovelcom & adapter_wwwnovelallcom
def parse_relative_date_string(string_):
# Keep this explicit instead of replacing parentheses in case we
# discover a format that is not so easily translated as a
# keyword-argument to timedelta. In practice I have only observed
# hours, weeks and days
unit_to_keyword = {
'second(s)': 'seconds',
'minute(s)': 'minutes',
'hour(s)': 'hours',
'day(s)': 'days',
'week(s)': 'weeks',
'seconds': 'seconds',
'minutes': 'minutes',
'hours': 'hours',
'days': 'days',
'weeks': 'weeks',
'second': 'seconds',
'minute': 'minutes',
'mins': 'minutes',
'hour': 'hours',
'day': 'days',
'week': 'weeks',
}
relrexp = re.compile(r'^(?P<val>\d+) *(?P<unit>[^ ]+).*$')
# Keep this explicit instead of replacing parentheses in case we
# discover a format that is not so easily translated as a
# keyword-argument to timedelta.
unit_to_keyword = {
'second(s)': 'seconds',
'minute(s)': 'minutes',
'hour(s)': 'hours',
'day(s)': 'days',
'week(s)': 'weeks',
'seconds': 'seconds',
'minutes': 'minutes',
'hours': 'hours',
'days': 'days',
'weeks': 'weeks',
'second': 'seconds',
'minute': 'minutes',
'mins': 'minutes',
'hour': 'hours',
'day': 'days',
'week': 'weeks',
'mth': 'months',
'h': 'hours',
'd': 'days',
'yr': 'years',
}
def parse_relative_date_string(reldatein):
# logger.debug("parse_relative_date_string(%s)"%reldatein)
# discards trailing ' ago' if present
value, unit_string = string_.split()[:2]
unit = unit_to_keyword.get(unit_string)
if not unit:
m = re.match(relrexp,reldatein)
if m:
value = m.group('val')
unit_string = m.group('unit')
unit = unit_to_keyword.get(unit_string)
logger.debug("val:%s unit_string:%s unit:%s"%(value, unit_string, unit))
## I'm not going to worry very much about accuracy for a site
## that considers '2 years ago' and acceptable time stamp.
if "year" in unit_string:
## that considers '2 years ago' an acceptable time stamp.
if "year" in unit_string or 'year' in unit:
value = unicode(int(value)*365)
unit = 'days'
elif "month" in unit_string:
elif "month" in unit_string or 'month' in unit:
value = unicode(int(value)*31)
unit = 'days'
else:
# This is "just as wrong" as always returning the currentq
# date, but prevents unneeded updates each time
logger.warning('Failed to parse relative date string: %r, falling back to unix epoche', string_)
return UNIX_EPOCHE
logger.debug("val:%s unit_string:%s unit:%s"%(value, unit_string, unit))
if unit:
kwargs = {unit: int(value)}
kwargs = {unit: int(value)}
# "naive" dates without hours and seconds are created in
# writers.base_writer.writeStory(), so we don't have to strip
# hours and minutes from the base date. Using datetime objects
# would result in a slightly different time (since we calculate
# the last updated date based on the current time) during each
# update, since the seconds and hours change.
today = datetime.utcnow()
time_ago = timedelta(**kwargs)
return today - time_ago
# "naive" dates without hours and seconds are created in
# writers.base_writer.writeStory(), so we don't have to strip
# hours and minutes from the base date. Using datetime objects
# would result in a slightly different time (since we calculate
# the last updated date based on the current time) during each
# update, since the seconds and hours change.
today = datetime.utcnow()
time_ago = timedelta(**kwargs)
return today - time_ago
# This is "just as wrong" as always returning the current
# date, but prevents unneeded updates each time
logger.warning('Failed to parse relative date string: %r, falling back to unix epoche', reldatein)
return UNIX_EPOCHE