adapter_literotica: Partial fix for #1283, chapters from JSON fetch

This commit is contained in:
Jim Miller 2025-11-24 13:20:38 -06:00
parent 30929bc38e
commit 485d4631f9

View file

@ -95,6 +95,49 @@ class LiteroticaSiteAdapter(BaseSiteAdapter):
self.story.setMetadata('storyId',self.parsedUrl.path.split('/',)[-1])
# logger.debug("language:%s"%self.story.getMetadata('language'))
## apply clean_chapter_titles
def add_chapter(self,chapter_title,url,othermeta={}):
if self.getConfig("clean_chapter_titles"):
storytitle = self.story.getMetadata('title').lower()
chapter_name_type = None
# strip trailing ch or pt before doing the chapter clean.
# doesn't remove from story title metadata
storytitle = re.sub(r'^(.*?)( (ch|pt))?$',r'\1',storytitle)
if chapter_title.lower().startswith(storytitle):
chapter = chapter_title[len(storytitle):].strip()
# logger.debug('\tChapter: "%s"' % chapter)
if chapter == '':
chapter_title = 'Chapter %d' % (self.num_chapters() + 1)
# Sometimes the first chapter does not have type of chapter
if self.num_chapters() == 0:
# logger.debug('\tChapter: first chapter without chapter type')
chapter_name_type = None
else:
separater_char = chapter[0]
# logger.debug('\tseparater_char: "%s"' % separater_char)
chapter = chapter[1:].strip() if separater_char in [":", "-"] else chapter
# logger.debug('\tChapter: "%s"' % chapter)
if chapter.lower().startswith('ch.'):
chapter = chapter[len('ch.'):].strip()
try:
chapter_title = 'Chapter %d' % int(chapter)
except:
chapter_title = 'Chapter %s' % chapter
chapter_name_type = 'Chapter' if chapter_name_type is None else chapter_name_type
# logger.debug('\tChapter: chapter_name_type="%s"' % chapter_name_type)
elif chapter.lower().startswith('pt.'):
chapter = chapter[len('pt.'):].strip()
try:
chapter_title = 'Part %d' % int(chapter)
except:
chapter_title = 'Part %s' % chapter
chapter_name_type = 'Part' if chapter_name_type is None else chapter_name_type
# logger.debug('\tChapter: chapter_name_type="%s"' % chapter_name_type)
elif separater_char in [":", "-"]:
chapter_title = chapter
# logger.debug('\tChapter: taking chapter text as whole')
super(LiteroticaSiteAdapter, self).add_chapter(chapter_title,url,othermeta)
def extractChapterUrlsAndMetadata(self):
"""
In April 2024, site introduced significant changes, including
@ -275,49 +318,9 @@ class LiteroticaSiteAdapter(BaseSiteAdapter):
## category from chapter list
self.story.extendList('category',[ stripHTML(t) for t in soup.select('a.br_rl') ])
storytitle = self.story.getMetadata('title').lower()
chapter_name_type = None
for chapteratag in soup.select('a.br_rj'):
chapter_title = stripHTML(chapteratag)
# logger.debug('\tChapter: "%s"' % chapteratag)
if self.getConfig("clean_chapter_titles"):
# strip trailing ch or pt before doing the chapter clean.
# doesn't remove from story title metadata
storytitle = re.sub(r'^(.*?)( (ch|pt))?$',r'\1',storytitle)
if chapter_title.lower().startswith(storytitle):
chapter = chapter_title[len(storytitle):].strip()
# logger.debug('\tChapter: "%s"' % chapter)
if chapter == '':
chapter_title = 'Chapter %d' % (self.num_chapters() + 1)
# Sometimes the first chapter does not have type of chapter
if self.num_chapters() == 0:
# logger.debug('\tChapter: first chapter without chapter type')
chapter_name_type = None
else:
separater_char = chapter[0]
# logger.debug('\tseparater_char: "%s"' % separater_char)
chapter = chapter[1:].strip() if separater_char in [":", "-"] else chapter
# logger.debug('\tChapter: "%s"' % chapter)
if chapter.lower().startswith('ch.'):
chapter = chapter[len('ch.'):].strip()
try:
chapter_title = 'Chapter %d' % int(chapter)
except:
chapter_title = 'Chapter %s' % chapter
chapter_name_type = 'Chapter' if chapter_name_type is None else chapter_name_type
# logger.debug('\tChapter: chapter_name_type="%s"' % chapter_name_type)
elif chapter.lower().startswith('pt.'):
chapter = chapter[len('pt.'):].strip()
try:
chapter_title = 'Part %d' % int(chapter)
except:
chapter_title = 'Part %s' % chapter
chapter_name_type = 'Part' if chapter_name_type is None else chapter_name_type
# logger.debug('\tChapter: chapter_name_type="%s"' % chapter_name_type)
elif separater_char in [":", "-"]:
chapter_title = chapter
# logger.debug('\tChapter: taking chapter text as whole')
# /series/se does include full URLs current.
chapurl = chapteratag['href']
@ -330,6 +333,7 @@ class LiteroticaSiteAdapter(BaseSiteAdapter):
self.setCoverImage(self.url,coverimg['src'])
#### Attempting averrating from JS metadata.
#### also alternate chapters from json
try:
state_start="state='"
state_end="'</script>"
@ -348,7 +352,7 @@ class LiteroticaSiteAdapter(BaseSiteAdapter):
## series
elif 'series' in json_state:
all_rates = [ float(x['rate_all']) for x in json_state['series']['works'] ]
## Extract dates from chapter approval dates if dates_from_chapters is enabled
if self.getConfig("dates_from_chapters"):
date_approvals = []
@ -365,8 +369,20 @@ class LiteroticaSiteAdapter(BaseSiteAdapter):
self.story.setMetadata('dateUpdated', date_approvals[-1])
if all_rates:
self.story.setMetadata('averrating', '%4.2f' % (sum(all_rates) / float(len(all_rates))))
## alternate chapters
if self.num_chapters() < 1:
seriesid = json_state.get('series',{}).get('coversSeriesId',None)
if seriesid:
logger.info("Fetching chapter data from JSON")
logger.debug(seriesid)
series_json = json.loads(self.get_request('https://literotica.com/api/3/series/%s/works'%seriesid))
# logger.debug(json.dumps(series_json, sort_keys=True,indent=2, separators=(',', ':')))
for chap in series_json:
self.add_chapter(chap['title'], 'https://www.literotica.com/s/'+chap['url'])
except Exception as e:
logger.debug("Processing JSON to find averrating failed. (%s)"%e)
logger.debug("Processing JSON failed. (%s)"%e)
## Features removed because not supportable by new site form:
## averrating metadata entry
@ -525,7 +541,7 @@ class LiteroticaSiteAdapter(BaseSiteAdapter):
import json
last_page = int(js_story_list.group('last_page'))
current_page = int(js_story_list.group('current_page')) + 1
# Fetching the remaining urls from api. Can't trust the number given about the pages left from a website. Sometimes even the api returns outdated number of pages.
# Fetching the remaining urls from api. Can't trust the number given about the pages left from a website. Sometimes even the api returns outdated number of pages.
while current_page <= last_page:
i = len(urls)
logger.debug("Pages %s/%s"%(current_page, int(last_page)))