adapter_literotica: Partial fix for #1283, chapters from JSON fetch

2025-12-06 08:52:55 +01:00 · 2025-11-24 13:20:38 -06:00 · 2025-11-24 13:20:38 -06:00 · 485d4631f9
commit 485d4631f9
parent 30929bc38e
1 changed files with 59 additions and 43 deletions
--- a/fanficfare/adapters/adapter_literotica.py
+++ b/fanficfare/adapters/adapter_literotica.py
@ -95,6 +95,49 @@ class LiteroticaSiteAdapter(BaseSiteAdapter):
        self.story.setMetadata('storyId',self.parsedUrl.path.split('/',)[-1])
        # logger.debug("language:%s"%self.story.getMetadata('language'))

+    ## apply clean_chapter_titles
+    def add_chapter(self,chapter_title,url,othermeta={}):
+        if self.getConfig("clean_chapter_titles"):
+            storytitle = self.story.getMetadata('title').lower()
+            chapter_name_type = None
+            # strip trailing ch or pt before doing the chapter clean.
+            # doesn't remove from story title metadata
+            storytitle = re.sub(r'^(.*?)( (ch|pt))?$',r'\1',storytitle)
+            if chapter_title.lower().startswith(storytitle):
+                chapter = chapter_title[len(storytitle):].strip()
+                # logger.debug('\tChapter: "%s"' % chapter)
+                if chapter == '':
+                    chapter_title = 'Chapter %d' % (self.num_chapters() + 1)
+                    # Sometimes the first chapter does not have type of chapter
+                    if self.num_chapters() == 0:
+                        # logger.debug('\tChapter: first chapter without chapter type')
+                        chapter_name_type = None
+                else:
+                    separater_char = chapter[0]
+                    # logger.debug('\tseparater_char: "%s"' % separater_char)
+                    chapter = chapter[1:].strip() if separater_char in [":", "-"] else chapter
+                    # logger.debug('\tChapter: "%s"' % chapter)
+                    if chapter.lower().startswith('ch.'):
+                        chapter = chapter[len('ch.'):].strip()
+                        try:
+                            chapter_title = 'Chapter %d' % int(chapter)
+                        except:
+                            chapter_title = 'Chapter %s' % chapter
+                        chapter_name_type = 'Chapter' if chapter_name_type is None else chapter_name_type
+                        # logger.debug('\tChapter: chapter_name_type="%s"' % chapter_name_type)
+                    elif chapter.lower().startswith('pt.'):
+                        chapter = chapter[len('pt.'):].strip()
+                        try:
+                            chapter_title = 'Part %d' % int(chapter)
+                        except:
+                            chapter_title = 'Part %s' % chapter
+                        chapter_name_type = 'Part' if chapter_name_type is None else chapter_name_type
+                        # logger.debug('\tChapter: chapter_name_type="%s"' % chapter_name_type)
+                    elif separater_char in [":", "-"]:
+                        chapter_title = chapter
+                        # logger.debug('\tChapter: taking chapter text as whole')
+        super(LiteroticaSiteAdapter, self).add_chapter(chapter_title,url,othermeta)
+
    def extractChapterUrlsAndMetadata(self):
        """
        In April 2024, site introduced significant changes, including
@ -275,49 +318,9 @@ class LiteroticaSiteAdapter(BaseSiteAdapter):
            ## category from chapter list
            self.story.extendList('category',[ stripHTML(t) for t in soup.select('a.br_rl') ])

-            storytitle = self.story.getMetadata('title').lower()
-            chapter_name_type = None
            for chapteratag in soup.select('a.br_rj'):
                chapter_title = stripHTML(chapteratag)
                # logger.debug('\tChapter: "%s"' % chapteratag)
-                if self.getConfig("clean_chapter_titles"):
-                    # strip trailing ch or pt before doing the chapter clean.
-                    # doesn't remove from story title metadata
-                    storytitle = re.sub(r'^(.*?)( (ch|pt))?$',r'\1',storytitle)
-                    if chapter_title.lower().startswith(storytitle):
-                        chapter = chapter_title[len(storytitle):].strip()
-                        # logger.debug('\tChapter: "%s"' % chapter)
-                        if chapter == '':
-                            chapter_title = 'Chapter %d' % (self.num_chapters() + 1)
-                            # Sometimes the first chapter does not have type of chapter
-                            if self.num_chapters() == 0:
-                                # logger.debug('\tChapter: first chapter without chapter type')
-                                chapter_name_type = None
-                        else:
-                            separater_char = chapter[0]
-                            # logger.debug('\tseparater_char: "%s"' % separater_char)
-                            chapter = chapter[1:].strip() if separater_char in [":", "-"] else chapter
-                            # logger.debug('\tChapter: "%s"' % chapter)
-                            if chapter.lower().startswith('ch.'):
-                                chapter = chapter[len('ch.'):].strip()
-                                try:
-                                    chapter_title = 'Chapter %d' % int(chapter)
-                                except:
-                                    chapter_title = 'Chapter %s' % chapter
-                                chapter_name_type = 'Chapter' if chapter_name_type is None else chapter_name_type
-                                # logger.debug('\tChapter: chapter_name_type="%s"' % chapter_name_type)
-                            elif chapter.lower().startswith('pt.'):
-                                chapter = chapter[len('pt.'):].strip()
-                                try:
-                                    chapter_title = 'Part %d' % int(chapter)
-                                except:
-                                    chapter_title = 'Part %s' % chapter
-                                chapter_name_type = 'Part' if chapter_name_type is None else chapter_name_type
-                                # logger.debug('\tChapter: chapter_name_type="%s"' % chapter_name_type)
-                            elif separater_char in [":", "-"]:
-                                chapter_title = chapter
-                                # logger.debug('\tChapter: taking chapter text as whole')
-
                # /series/se does include full URLs current.
                chapurl = chapteratag['href']

@ -330,6 +333,7 @@ class LiteroticaSiteAdapter(BaseSiteAdapter):
                self.setCoverImage(self.url,coverimg['src'])

        #### Attempting averrating from JS metadata.
+        #### also alternate chapters from json
        try:
            state_start="state='"
            state_end="'</script>"
@ -348,7 +352,7 @@ class LiteroticaSiteAdapter(BaseSiteAdapter):
                    ## series
                    elif 'series' in json_state:
                        all_rates = [ float(x['rate_all']) for x in json_state['series']['works'] ]
-                        
+
                        ## Extract dates from chapter approval dates if dates_from_chapters is enabled
                        if self.getConfig("dates_from_chapters"):
                            date_approvals = []
@ -365,8 +369,20 @@ class LiteroticaSiteAdapter(BaseSiteAdapter):
                                self.story.setMetadata('dateUpdated', date_approvals[-1])
                    if all_rates:
                        self.story.setMetadata('averrating', '%4.2f' % (sum(all_rates) / float(len(all_rates))))
+
+                    ## alternate chapters
+                    if self.num_chapters() < 1:
+                        seriesid = json_state.get('series',{}).get('coversSeriesId',None)
+                        if seriesid:
+                            logger.info("Fetching chapter data from JSON")
+                            logger.debug(seriesid)
+                            series_json = json.loads(self.get_request('https://literotica.com/api/3/series/%s/works'%seriesid))
+                            # logger.debug(json.dumps(series_json, sort_keys=True,indent=2, separators=(',', ':')))
+                            for chap in series_json:
+                                self.add_chapter(chap['title'], 'https://www.literotica.com/s/'+chap['url'])
+
        except Exception as e:
-            logger.debug("Processing JSON to find averrating failed. (%s)"%e)
+            logger.debug("Processing JSON failed. (%s)"%e)

        ## Features removed because not supportable by new site form:
        ## averrating metadata entry
@ -525,7 +541,7 @@ class LiteroticaSiteAdapter(BaseSiteAdapter):
        import json
        last_page = int(js_story_list.group('last_page'))
        current_page = int(js_story_list.group('current_page')) + 1
-        # Fetching the remaining urls from api. Can't trust the number given about the pages left from a website. Sometimes even the api returns outdated number of pages. 
+        # Fetching the remaining urls from api. Can't trust the number given about the pages left from a website. Sometimes even the api returns outdated number of pages.
        while current_page <= last_page:
            i = len(urls)
            logger.debug("Pages %s/%s"%(current_page, int(last_page)))