Implement Alternate Tagging and Date calculation for Literotica

This commit is contained in:
albyofdoom 2025-11-18 16:55:52 -08:00 committed by Jim Miller
parent a599ff6ad2
commit d5c20db681
4 changed files with 61 additions and 7 deletions

View file

@ -2292,6 +2292,22 @@ chapter_categories_use_all: false
## or just use the text. If this can't be done, the full title is used.
clean_chapter_titles: false
## For stories, collect tags from individual chapter pages in addition to the
## series page tags. This allows collection of tags beyond the top 10 on the series but
## if the author updates tags on a chapter and not the series, those tags may persist even if
## the chapter is not fetched during an update.
## Default is false to maintain previous behavior.
tags_from_chapters: false
## For multi-chapter stories (series), use the chapter approval dates for datePublished
## and dateUpdated instead of the series metadata dates. This provides more accurate dates
## based on actual posting dates rather than just when the series metadata changes. This
## method can provide wildly different dates if chapters were written long before being
## approved, if chapters are approved out of order, or if the works were approved/updated
## before literotica's current series system was implemented.
## Default is false to maintain previous behavior.
dates_from_chapters: false
## Some stories mistakenly include 'Ch' or 'Pt' at the end of the
## story title. Appears to be a site bug or common author error. Copy
## these to your personal.ini (and uncomment) to correct.

View file

@ -182,6 +182,8 @@ class LiteroticaSiteAdapter(BaseSiteAdapter):
else: # if all else fails
self.story.setMetadata('authorId', stripHTML(authora))
## Collect tags from series/story page if tags_from_chapters is enabled
if self.getConfig("tags_from_chapters"):
if soup.select('div#tabpanel-tags'):
# logger.debug("tags1")
self.story.extendList('eroticatags', [ stripHTML(t).title() for t in soup.select('div#tabpanel-tags a.av_as') ])
@ -250,7 +252,8 @@ class LiteroticaSiteAdapter(BaseSiteAdapter):
## Multi-chapter stories. AKA multi-part 'Story Series'.
bn_antags = soup.select('div#tabpanel-info p.bn_an')
# logger.debug(bn_antags)
if bn_antags:
if bn_antags and not self.getConfig("dates_from_chapters"):
## Use dates from series metadata unless dates_from_chapters is enabled
dates = []
for datetag in bn_antags[:2]:
datetxt = stripHTML(datetag)
@ -345,6 +348,21 @@ class LiteroticaSiteAdapter(BaseSiteAdapter):
## series
elif 'series' in json_state:
all_rates = [ float(x['rate_all']) for x in json_state['series']['works'] ]
## Extract dates from chapter approval dates if dates_from_chapters is enabled
if self.getConfig("dates_from_chapters"):
date_approvals = []
for work in json_state['series']['works']:
if 'date_approve' in work:
try:
date_approvals.append(makeDate(work['date_approve'], self.dateformat))
except:
pass
if date_approvals:
# Oldest date is published, newest is updated
date_approvals.sort()
self.story.setMetadata('datePublished', date_approvals[0])
self.story.setMetadata('dateUpdated', date_approvals[-1])
if all_rates:
self.story.setMetadata('averrating', '%4.2f' % (sum(all_rates) / float(len(all_rates))))
except Exception as e:

View file

@ -266,6 +266,8 @@ def get_valid_set_options():
'description_in_chapter':(['literotica.com'],None,boollist),
'fetch_stories_from_api':(['literotica.com'],None,boollist),
'order_chapters_by_date':(['literotica.com'],None,boollist),
'tags_from_chapters':(['literotica.com'],None,boollist),
'dates_from_chapters':(['literotica.com'],None,boollist),
'inject_chapter_title':(['asianfanfics.com']+wpc_list,None,boollist),
'inject_chapter_image':(['asianfanfics.com'],None,boollist),
@ -520,6 +522,8 @@ def get_valid_keywords():
'description_in_chapter',
'order_chapters_by_date',
'fetch_stories_from_api',
'tags_from_chapters',
'dates_from_chapters',
'inject_chapter_title',
'inject_chapter_image',
'append_datepublished_to_storyurl',

View file

@ -2285,6 +2285,22 @@ chapter_categories_use_all: false
## or just use the text. If this can't be done, the full title is used.
clean_chapter_titles: false
## For stories, collect tags from individual chapter pages in addition to the
## series page tags. This allows collection of tags beyond the top 10 on the series but
## if the author updates tags on a chapter and not the series, those tags may persist even if
## the chapter is not fetched during an update.
## Default is false to maintain previous behavior.
tags_from_chapters: false
## For multi-chapter stories (series), use the chapter approval dates for datePublished
## and dateUpdated instead of the series metadata dates. This provides more accurate dates
## based on actual posting dates rather than just when the series metadata changes. This
## method can provide wildly different dates if chapters were written long before being
## approved, if chapters are approved out of order, or if the works were approved/updated
## before literotica's current series system was implemented.
## Default is false to maintain previous behavior.
dates_from_chapters: false
## Some stories mistakenly include 'Ch' or 'Pt' at the end of the
## story title. Appears to be a site bug or common author error. Copy
## these to your personal.ini (and uncomment) to correct.