diff --git a/calibre-plugin/plugin-defaults.ini b/calibre-plugin/plugin-defaults.ini index 662c9cb3..a9c9420a 100644 --- a/calibre-plugin/plugin-defaults.ini +++ b/calibre-plugin/plugin-defaults.ini @@ -2241,10 +2241,9 @@ extracategories:Lois & Clark: The New Adventures of Superman [literotica.com] use_basic_cache:true user_agent: -extra_valid_entries:eroticatags,averrating +extra_valid_entries:eroticatags eroticatags_label:Erotica Tags -averrating_label:Average Rating -extra_titlepage_entries:eroticatags,averrating +extra_titlepage_entries:eroticatags ## Extract more erotica_tags from the meta tag of each chapter use_meta_keywords: true @@ -2267,14 +2266,6 @@ clean_chapter_titles: false ## Add the chapter description at the start of each chapter. description_in_chapter: false -## Force chapters in a story to be sorted by date instead of the order -## given by the author. Used to be the default for literotica. -## Note that FFF normalizes literotica.com story URLs to the first -## chapter URL. If the first chapter is not the same by date and by -## list, you may need to set order_chapters_by_date under *both* -## [storyURL] sections. -order_chapters_by_date:false - ## Clear FanFiction from defaults, site is original fiction. extratags:Erotica diff --git a/fanficfare/adapters/adapter_literotica.py b/fanficfare/adapters/adapter_literotica.py index d7527c10..840d3684 100644 --- a/fanficfare/adapters/adapter_literotica.py +++ b/fanficfare/adapters/adapter_literotica.py @@ -47,7 +47,6 @@ class LiteroticaSiteAdapter(BaseSiteAdapter): # where first chapter doesn't have '-ch-'. # Now just rely on extractChapterUrlsAndMetadata to reset # storyId to first chapter link. - storyId = self.parsedUrl.path.split('/',)[2] ## DON'T normalize to www.literotica.com--keep for language, ## which will be set in _setURL(url). Also, multi-chapter @@ -66,7 +65,7 @@ class LiteroticaSiteAdapter(BaseSiteAdapter): # The date format will vary from site to site. # http://docs.python.org/library/datetime.html#strftime-strptime-behavior - self.dateformat = "%m/%d/%y" + self.dateformat = "%m/%d/%Y" @staticmethod def getSiteDomain(): @@ -78,11 +77,12 @@ class LiteroticaSiteAdapter(BaseSiteAdapter): @classmethod def getSiteExampleURLs(cls): - return "http://www.literotica.com/s/story-title https://www.literotica.com/s/story-title https://www.literotica.com/s/story-title https://www.literotica.com/i/image-or-comic-title https://www.literotica.com/p/poem-title http://portuguese.literotica.com/s/story-title http://german.literotica.com/s/story-title" + return "http://www.literotica.com/s/story-title https://www.literotica.com/series/se/9999999 https://www.literotica.com/s/story-title https://www.literotica.com/i/image-or-comic-title https://www.literotica.com/p/poem-title http://portuguese.literotica.com/s/story-title http://german.literotica.com/s/story-title" def getSiteURLPattern(self): + # also https://www.literotica.com/series/se/80075773 # /s/ for story, /i/ for image/comic, /p/ for poem - return r"https?://"+LANG_RE+r"(\.i)?\.literotica\.com/(beta/)?[sip]/([a-zA-Z0-9_-]+)" + return r"https?://"+LANG_RE+r"(\.i)?\.literotica\.com/((beta/)?[sip]/([a-zA-Z0-9_-]+)|series/se/(?P[0-9]+))" def _setURL(self,url): # logger.debug("set URL:%s"%url) @@ -91,349 +91,166 @@ class LiteroticaSiteAdapter(BaseSiteAdapter): lang = m.group('lang') if lang not in ('www','other'): self.story.setMetadata('language',lang.capitalize()) + # reset storyId + self.story.setMetadata('storyId',self.parsedUrl.path.split('/',)[-1]) # logger.debug("language:%s"%self.story.getMetadata('language')) - def getCategories(self, soup): + def parseMetaEroticaTags(self, soup): if self.getConfig("use_meta_keywords"): - categories = soup.find("meta", {"name":"keywords"})['content'].split(',') - categories = [c for c in categories if not self.story.getMetadata('title') in c] - if self.story.getMetadata('author') in categories: - categories.remove(self.story.getMetadata('author')) - # logger.debug("Meta = %s" % categories) - for category in categories: - # logger.debug("\tCategory=%s" % category) -# self.story.addToList('category', category.title()) - self.story.addToList('eroticatags', category.title()) + tags = soup.find("meta", {"name":"keywords"})['content'].split(',') + tags = [t for t in tags if not self.story.getMetadata('title') in t] + if self.story.getMetadata('author') in tags: + tags.remove(self.story.getMetadata('author')) + for tag in tags: + self.story.addToList('eroticatags', tag.title()) def extractChapterUrlsAndMetadata(self): """ - NOTE: Some stories can have versions, - e.g. /my-story-ch-05-version-10 - NOTE: If two stories share the same title, a running index is added, - e.g.: /my-story-ch-02-1 - Strategy: - * Go to author's page, search for the current story link, - * If it's in a tr.root-story => One-part story - * , get metadata and be done - * If it's in a tr.sl => Chapter in series - * Search up from there until we find a tr.ser-ttl (this is the - story) - * Gather metadata - * Search down from there for all tr.sl until the next - tr.ser-ttl, foreach - * Chapter link is there + In April 2024, site introduced significant changes, including + adding a 'Story Series' page and link to it in each chapter. + But not all stories, one-shots don't have 'Story Series'. + + literotica has 'Story Series' & 'Story'. FFF calls them 'Story' & 'Chapters' + See https://github.com/JimmXinu/FanFicFare/issues/1058#issuecomment-2078490037 + + So /series/se/ will be the story URL for multi chapters but + keep individual 'chapter' URL for one-shots. """ + logger.debug("Chapter/Story URL: <%s> " % self.url) if not (self.is_adult or self.getConfig("is_adult")): raise exceptions.AdultCheckRequired(self.url) - # logger.debug("Chapter/Story URL: <%s> " % self.url) - - (data1,rurl) = self.get_request_redirected(self.url) + (data,rurl) = self.get_request_redirected(self.url) + # logger.debug(data) ## for language domains self._setURL(rurl) logger.debug("set opened url:%s"%self.url) - soup1 = self.make_soup(data1) - #strip comments from soup - [comment.extract() for comment in soup1.findAll(string=lambda text:isinstance(text, Comment))] + soup = self.make_soup(data) - if "This submission is awaiting moderator's approval" in data1: + if "This submission is awaiting moderator's approval" in data: raise exceptions.StoryDoesNotExist("This submission is awaiting moderator's approval. %s"%self.url) + ## not series URL, assumed to be a chapter. Look for Story + ## Info block of post-beta page. I don't think it should happen? + if '/series/se' not in self.url: + if not soup.select_one('div.page__aside'): + raise exceptions.FailedToDownload("Missing Story Info block, Beta turned off?") + + storyseriestag = soup.select_one('a.bn_av') + # logger.debug("Story Series Tag:%s"%storyseriestag) + + if storyseriestag: + self._setURL(storyseriestag['href']) + data = self.get_request(storyseriestag['href']) + # logger.debug(data) + soup = self.make_soup(data) + # logger.debug(soup) + else: + logger.debug("One-shot") + + isSingleStory = '/series/se' not in self.url + + ## common between one-shots and multi-chapters + + # title + self.story.setMetadata('title', stripHTML(soup.select_one('h1'))) + # logger.debug(self.story.getMetadata('title')) + # author - authora = soup1.find("a", class_="y_eU") + ## XXX This is still the author URL like: + ## https://www.literotica.com/stories/memberpage.php?uid=999999&page=submissions + ## because that's what's on the page. It redirects to the /authors/ page. + ## Only way I know right now to get the /authors/ is to make + ## the req and look at the redirect. + ## Should change to /authors/ if/when it starts appearing. + ## Assuming it's in the same place. + authora = soup.find("a", class_="y_eU") authorurl = authora['href'] - # logger.debug(authora) - # logger.debug(authorurl) - self.story.setMetadata('authorId', urlparse.parse_qs(authorurl.split('?')[1])['uid'][0]) if authorurl.startswith('//'): authorurl = self.parsedUrl.scheme+':'+authorurl + # logger.debug(authora) + # logger.debug(authorurl) + self.story.setMetadata('author', stripHTML(authora)) self.story.setMetadata('authorUrl', authorurl) - self.story.setMetadata('author', authora.text) + if '?' in authorurl: + self.story.setMetadata('authorId', urlparse.parse_qs(authorurl.split('?')[1])['uid'][0]) + elif '/authors/' in authorurl: + self.story.setMetadata('authorId', authorurl.split('/')[-1]) + else: # if all else fails + self.story.setMetadata('authorId', stripHTML(authora)) - # get the author page - if '/authors/' in authorurl and '/works/' not in authorurl: - authorurl = authorurl + '/works/stories' - ## Apr2024 site is redirecting memberpage.php to /authors/ for some users - (dataAuth, rurl) = self.get_request_redirected(authorurl) - if rurl: - logger.debug("Author url(%s) redirected to (%s)"%(authorurl,rurl)) - if '/authors/' in rurl and '/works/' not in rurl: - authorurl = rurl + '/works/stories' - dataAuth = self.get_request(authorurl) - # logger.debug(dataAuth) - soupAuth = self.make_soup(dataAuth) - #strip comments from soup - [comment.extract() for comment in soupAuth.findAll(string=lambda text:isinstance(text, Comment))] -# logger.debug(soupAuth) + ## takes *eroticatags* entries from on author's page <%s>" % (self.url, authorurl)) + self.story.extendList('eroticatags', [ stripHTML(t).title() for t in soup.select('div#tabpanel-tags a.av_as') ]) if isSingleStory: - self.story.setMetadata('title', storyLink.text.strip('/')) - # logger.debug('Title: "%s"' % storyLink.text.strip('/')) - self.setDescription(authorurl, urlTr.findAll("td")[1].text) - self.story.addToList('category', urlTr.findAll("td")[2].text) -# self.story.addToList('eroticatags', urlTr.findAll("td")[2].text) - date = urlTr.findAll('td')[-1].text - self.story.setMetadata('datePublished', makeDate(date, self.dateformat)) - self.story.setMetadata('dateUpdated',makeDate(date, self.dateformat)) - self.add_chapter(storyLink.text, self.url) - averrating = stripHTML(storyLink.parent) - ## title (0.00) - averrating = averrating[averrating.rfind('(')+1:averrating.rfind(')')] - try: - self.story.setMetadata('averrating', float(averrating)) - except: - pass -# self.story.setMetadata('averrating',averrating) - # parse out the list of chapters + ## one-shots don't *display* date info, but they have it + ## hidden in