Fixes for site changes in adapter_webnovelcom. #731

2026-04-29 10:25:43 +02:00 · 2021-09-03 11:26:40 -05:00 · 2021-09-03 11:26:40 -05:00 · b24db52b3d
commit b24db52b3d
parent 19571e3b2b
2 changed files with 73 additions and 51 deletions
--- a/fanficfare/adapters/adapter_webnovelcom.py
+++ b/fanficfare/adapters/adapter_webnovelcom.py
@ -150,8 +150,9 @@ class WWWWebNovelComAdapter(BaseSiteAdapter):

        ## get chapters from a json API url.
        jsondata = json.loads(self.get_request(
-            "https://" + self.getSiteDomain() + "/apiajax/chapter/GetChapterList?_csrfToken=" + csrf_token + "&bookId=" + self.story.getMetadata(
-                'storyId')))
+                "https://" + self.getSiteDomain() + "/go/pcm/chapter/get-chapter-list?_csrfToken=" + csrf_token + "&bookId=" + self.story.getMetadata(
+                    'storyId') + "&pageIndex=0"))
+
        # logger.debug(json.dumps(jsondata, sort_keys=True,
        #                         indent=2, separators=(',', ':')))
        for volume in jsondata["data"]["volumeItems"]:
@ -161,9 +162,18 @@ class WWWWebNovelComAdapter(BaseSiteAdapter):
                                              # seems to have changed
                                              # --JM
                    continue
-                chap_title = 'Chapter ' + unicode(chap['index']) + ' - ' + chap['name']
-                chap_Url = url.rstrip('/') + '/' + chap['id']
-                self.add_chapter(chap_title, chap_Url)
+                chap_title = 'Chapter ' + unicode(self.num_chapters()+1) + ' - ' + chap['chapterName']
+                chap_Url = url.rstrip('/') + '/' + chap['chapterId']
+                self.add_chapter(chap_title, chap_Url,
+                                 {'volumeName':volume['volumeName'],
+                                  'volumeId':volume['volumeId'],
+                                  ## dates are months or years ago for
+                                  ## so many chapters I judge this
+                                  ## worthless.
+                                  # 'publishTimeFormat':chap['publishTimeFormat'],
+                                  # 'date':parse_relative_date_string(chap['publishTimeFormat']).strftime(self.getConfig("datethreadmark_format",
+                                  #                                                                                      self.getConfig("dateCreated_format","%Y-%m-%d %H:%M:%S"))),
+                                  })


        if get_cover:
@ -178,7 +188,7 @@ class WWWWebNovelComAdapter(BaseSiteAdapter):
        if rating:
            self.story.setMetadata('rating',rating.string)

-        last_updated_string = jsondata['data']['bookInfo']['newChapterTime']
+        last_updated_string = jsondata['data']['lastChapterItem']['publishTimeFormat']
        last_updated = parse_relative_date_string(last_updated_string)

        # Published date is always unknown, so simply don't set it
--- a/fanficfare/dateutils.py
+++ b/fanficfare/dateutils.py
@ -18,6 +18,7 @@
 from __future__ import absolute_import

 from datetime import datetime, timedelta
+import re

 # py2 vs py3 transition
 from .six import text_type as unicode
@ -31,56 +32,67 @@ UNIX_EPOCHE = datetime.fromtimestamp(86400)

 ## Currently used by adapter_webnovelcom & adapter_wwwnovelallcom

-def parse_relative_date_string(string_):
-    # Keep this explicit instead of replacing parentheses in case we
-    # discover a format that is not so easily translated as a
-    # keyword-argument to timedelta. In practice I have only observed
-    # hours, weeks and days
-    unit_to_keyword = {
-        'second(s)': 'seconds',
-        'minute(s)': 'minutes',
-        'hour(s)': 'hours',
-        'day(s)': 'days',
-        'week(s)': 'weeks',
-        'seconds': 'seconds',
-        'minutes': 'minutes',
-        'hours': 'hours',
-        'days': 'days',
-        'weeks': 'weeks',
-        'second': 'seconds',
-        'minute': 'minutes',
-        'mins': 'minutes',
-        'hour': 'hours',
-        'day': 'days',
-        'week': 'weeks',
-    }
+relrexp = re.compile(r'^(?P<val>\d+) *(?P<unit>[^ ]+).*$')

+# Keep this explicit instead of replacing parentheses in case we
+# discover a format that is not so easily translated as a
+# keyword-argument to timedelta.
+unit_to_keyword = {
+    'second(s)': 'seconds',
+    'minute(s)': 'minutes',
+    'hour(s)': 'hours',
+    'day(s)': 'days',
+    'week(s)': 'weeks',
+    'seconds': 'seconds',
+    'minutes': 'minutes',
+    'hours': 'hours',
+    'days': 'days',
+    'weeks': 'weeks',
+    'second': 'seconds',
+    'minute': 'minutes',
+    'mins': 'minutes',
+    'hour': 'hours',
+    'day': 'days',
+    'week': 'weeks',
+    'mth': 'months',
+    'h': 'hours',
+    'd': 'days',
+    'yr': 'years',
+}
+
+def parse_relative_date_string(reldatein):
+    # logger.debug("parse_relative_date_string(%s)"%reldatein)
    # discards trailing ' ago' if present
-    value, unit_string = string_.split()[:2]
-    unit = unit_to_keyword.get(unit_string)
-    if not unit:
+    m = re.match(relrexp,reldatein)
+
+    if m:
+        value = m.group('val')
+        unit_string = m.group('unit')
+
+        unit = unit_to_keyword.get(unit_string)
+        logger.debug("val:%s unit_string:%s unit:%s"%(value, unit_string, unit))
        ## I'm not going to worry very much about accuracy for a site
-        ## that considers '2 years ago' and acceptable time stamp.
-        if "year" in unit_string:
+        ## that considers '2 years ago' an acceptable time stamp.
+        if "year" in unit_string or 'year' in unit:
            value = unicode(int(value)*365)
            unit = 'days'
-        elif "month" in unit_string:
+        elif "month" in unit_string or 'month' in unit:
            value = unicode(int(value)*31)
            unit = 'days'
-        else:
-            # This is "just as wrong" as always returning the currentq
-            # date, but prevents unneeded updates each time
-            logger.warning('Failed to parse relative date string: %r, falling back to unix epoche', string_)
-            return UNIX_EPOCHE
+        logger.debug("val:%s unit_string:%s unit:%s"%(value, unit_string, unit))
+        if unit:
+            kwargs = {unit: int(value)}

-    kwargs = {unit: int(value)}
-
-    # "naive" dates without hours and seconds are created in
-    # writers.base_writer.writeStory(), so we don't have to strip
-    # hours and minutes from the base date. Using datetime objects
-    # would result in a slightly different time (since we calculate
-    # the last updated date based on the current time) during each
-    # update, since the seconds and hours change.
-    today = datetime.utcnow()
-    time_ago = timedelta(**kwargs)
-    return today - time_ago
+            # "naive" dates without hours and seconds are created in
+            # writers.base_writer.writeStory(), so we don't have to strip
+            # hours and minutes from the base date. Using datetime objects
+            # would result in a slightly different time (since we calculate
+            # the last updated date based on the current time) during each
+            # update, since the seconds and hours change.
+            today = datetime.utcnow()
+            time_ago = timedelta(**kwargs)
+            return today - time_ago
+    # This is "just as wrong" as always returning the current
+    # date, but prevents unneeded updates each time
+    logger.warning('Failed to parse relative date string: %r, falling back to unix epoche', reldatein)
+    return UNIX_EPOCHE