mirror of
https://github.com/JimmXinu/FanFicFare.git
synced 2026-04-29 10:25:43 +02:00
Fixes for site changes in adapter_webnovelcom. #731
This commit is contained in:
parent
19571e3b2b
commit
b24db52b3d
2 changed files with 73 additions and 51 deletions
|
|
@ -150,8 +150,9 @@ class WWWWebNovelComAdapter(BaseSiteAdapter):
|
|||
|
||||
## get chapters from a json API url.
|
||||
jsondata = json.loads(self.get_request(
|
||||
"https://" + self.getSiteDomain() + "/apiajax/chapter/GetChapterList?_csrfToken=" + csrf_token + "&bookId=" + self.story.getMetadata(
|
||||
'storyId')))
|
||||
"https://" + self.getSiteDomain() + "/go/pcm/chapter/get-chapter-list?_csrfToken=" + csrf_token + "&bookId=" + self.story.getMetadata(
|
||||
'storyId') + "&pageIndex=0"))
|
||||
|
||||
# logger.debug(json.dumps(jsondata, sort_keys=True,
|
||||
# indent=2, separators=(',', ':')))
|
||||
for volume in jsondata["data"]["volumeItems"]:
|
||||
|
|
@ -161,9 +162,18 @@ class WWWWebNovelComAdapter(BaseSiteAdapter):
|
|||
# seems to have changed
|
||||
# --JM
|
||||
continue
|
||||
chap_title = 'Chapter ' + unicode(chap['index']) + ' - ' + chap['name']
|
||||
chap_Url = url.rstrip('/') + '/' + chap['id']
|
||||
self.add_chapter(chap_title, chap_Url)
|
||||
chap_title = 'Chapter ' + unicode(self.num_chapters()+1) + ' - ' + chap['chapterName']
|
||||
chap_Url = url.rstrip('/') + '/' + chap['chapterId']
|
||||
self.add_chapter(chap_title, chap_Url,
|
||||
{'volumeName':volume['volumeName'],
|
||||
'volumeId':volume['volumeId'],
|
||||
## dates are months or years ago for
|
||||
## so many chapters I judge this
|
||||
## worthless.
|
||||
# 'publishTimeFormat':chap['publishTimeFormat'],
|
||||
# 'date':parse_relative_date_string(chap['publishTimeFormat']).strftime(self.getConfig("datethreadmark_format",
|
||||
# self.getConfig("dateCreated_format","%Y-%m-%d %H:%M:%S"))),
|
||||
})
|
||||
|
||||
|
||||
if get_cover:
|
||||
|
|
@ -178,7 +188,7 @@ class WWWWebNovelComAdapter(BaseSiteAdapter):
|
|||
if rating:
|
||||
self.story.setMetadata('rating',rating.string)
|
||||
|
||||
last_updated_string = jsondata['data']['bookInfo']['newChapterTime']
|
||||
last_updated_string = jsondata['data']['lastChapterItem']['publishTimeFormat']
|
||||
last_updated = parse_relative_date_string(last_updated_string)
|
||||
|
||||
# Published date is always unknown, so simply don't set it
|
||||
|
|
|
|||
|
|
@ -18,6 +18,7 @@
|
|||
from __future__ import absolute_import
|
||||
|
||||
from datetime import datetime, timedelta
|
||||
import re
|
||||
|
||||
# py2 vs py3 transition
|
||||
from .six import text_type as unicode
|
||||
|
|
@ -31,56 +32,67 @@ UNIX_EPOCHE = datetime.fromtimestamp(86400)
|
|||
|
||||
## Currently used by adapter_webnovelcom & adapter_wwwnovelallcom
|
||||
|
||||
def parse_relative_date_string(string_):
|
||||
# Keep this explicit instead of replacing parentheses in case we
|
||||
# discover a format that is not so easily translated as a
|
||||
# keyword-argument to timedelta. In practice I have only observed
|
||||
# hours, weeks and days
|
||||
unit_to_keyword = {
|
||||
'second(s)': 'seconds',
|
||||
'minute(s)': 'minutes',
|
||||
'hour(s)': 'hours',
|
||||
'day(s)': 'days',
|
||||
'week(s)': 'weeks',
|
||||
'seconds': 'seconds',
|
||||
'minutes': 'minutes',
|
||||
'hours': 'hours',
|
||||
'days': 'days',
|
||||
'weeks': 'weeks',
|
||||
'second': 'seconds',
|
||||
'minute': 'minutes',
|
||||
'mins': 'minutes',
|
||||
'hour': 'hours',
|
||||
'day': 'days',
|
||||
'week': 'weeks',
|
||||
}
|
||||
relrexp = re.compile(r'^(?P<val>\d+) *(?P<unit>[^ ]+).*$')
|
||||
|
||||
# Keep this explicit instead of replacing parentheses in case we
|
||||
# discover a format that is not so easily translated as a
|
||||
# keyword-argument to timedelta.
|
||||
unit_to_keyword = {
|
||||
'second(s)': 'seconds',
|
||||
'minute(s)': 'minutes',
|
||||
'hour(s)': 'hours',
|
||||
'day(s)': 'days',
|
||||
'week(s)': 'weeks',
|
||||
'seconds': 'seconds',
|
||||
'minutes': 'minutes',
|
||||
'hours': 'hours',
|
||||
'days': 'days',
|
||||
'weeks': 'weeks',
|
||||
'second': 'seconds',
|
||||
'minute': 'minutes',
|
||||
'mins': 'minutes',
|
||||
'hour': 'hours',
|
||||
'day': 'days',
|
||||
'week': 'weeks',
|
||||
'mth': 'months',
|
||||
'h': 'hours',
|
||||
'd': 'days',
|
||||
'yr': 'years',
|
||||
}
|
||||
|
||||
def parse_relative_date_string(reldatein):
|
||||
# logger.debug("parse_relative_date_string(%s)"%reldatein)
|
||||
# discards trailing ' ago' if present
|
||||
value, unit_string = string_.split()[:2]
|
||||
unit = unit_to_keyword.get(unit_string)
|
||||
if not unit:
|
||||
m = re.match(relrexp,reldatein)
|
||||
|
||||
if m:
|
||||
value = m.group('val')
|
||||
unit_string = m.group('unit')
|
||||
|
||||
unit = unit_to_keyword.get(unit_string)
|
||||
logger.debug("val:%s unit_string:%s unit:%s"%(value, unit_string, unit))
|
||||
## I'm not going to worry very much about accuracy for a site
|
||||
## that considers '2 years ago' and acceptable time stamp.
|
||||
if "year" in unit_string:
|
||||
## that considers '2 years ago' an acceptable time stamp.
|
||||
if "year" in unit_string or 'year' in unit:
|
||||
value = unicode(int(value)*365)
|
||||
unit = 'days'
|
||||
elif "month" in unit_string:
|
||||
elif "month" in unit_string or 'month' in unit:
|
||||
value = unicode(int(value)*31)
|
||||
unit = 'days'
|
||||
else:
|
||||
# This is "just as wrong" as always returning the currentq
|
||||
# date, but prevents unneeded updates each time
|
||||
logger.warning('Failed to parse relative date string: %r, falling back to unix epoche', string_)
|
||||
return UNIX_EPOCHE
|
||||
logger.debug("val:%s unit_string:%s unit:%s"%(value, unit_string, unit))
|
||||
if unit:
|
||||
kwargs = {unit: int(value)}
|
||||
|
||||
kwargs = {unit: int(value)}
|
||||
|
||||
# "naive" dates without hours and seconds are created in
|
||||
# writers.base_writer.writeStory(), so we don't have to strip
|
||||
# hours and minutes from the base date. Using datetime objects
|
||||
# would result in a slightly different time (since we calculate
|
||||
# the last updated date based on the current time) during each
|
||||
# update, since the seconds and hours change.
|
||||
today = datetime.utcnow()
|
||||
time_ago = timedelta(**kwargs)
|
||||
return today - time_ago
|
||||
# "naive" dates without hours and seconds are created in
|
||||
# writers.base_writer.writeStory(), so we don't have to strip
|
||||
# hours and minutes from the base date. Using datetime objects
|
||||
# would result in a slightly different time (since we calculate
|
||||
# the last updated date based on the current time) during each
|
||||
# update, since the seconds and hours change.
|
||||
today = datetime.utcnow()
|
||||
time_ago = timedelta(**kwargs)
|
||||
return today - time_ago
|
||||
# This is "just as wrong" as always returning the current
|
||||
# date, but prevents unneeded updates each time
|
||||
logger.warning('Failed to parse relative date string: %r, falling back to unix epoche', reldatein)
|
||||
return UNIX_EPOCHE
|
||||
|
|
|
|||
Loading…
Reference in a new issue