mirror of
https://github.com/JimmXinu/FanFicFare.git
synced 2026-01-05 15:44:14 +01:00
Merge pull request #171 from davidfor/master
Storiesonline and Literotica updates
This commit is contained in:
commit
cd0178030c
5 changed files with 42 additions and 11 deletions
|
|
@ -1475,6 +1475,10 @@ extra_titlepage_entries:eroticatags,averrating
|
|||
## Extract more erotica_tags from the meta tag of each chapter
|
||||
use_meta_keywords: true
|
||||
|
||||
## Chapters can be in different categories. Default to not using all
|
||||
## to be consistent with previous version.
|
||||
chapter_categories_use_all: false
|
||||
|
||||
## For multiple chapter stories, attempt to clean up the chapter title. This will
|
||||
## remove the story title and change "Ch. 01" to "Chapter 1", "Pt. 01" to "Part 1"
|
||||
## or just use the text. If this can't be done, the full title is used.
|
||||
|
|
|
|||
|
|
@ -227,6 +227,8 @@ class LiteroticaSiteAdapter(BaseSiteAdapter):
|
|||
description = "%d. %s" % (len(descriptions)+1,stripHTML(chapterTr.findAll("td")[1]))
|
||||
description = stripHTML(chapterTr.findAll("td")[1])
|
||||
chapterLink = chapterTr.find("td", "fc").find("a")
|
||||
if self.getConfig('chapter_categories_use_all'):
|
||||
self.story.addToList('category', chapterTr.findAll("td")[2].text)
|
||||
self.story.addToList('eroticatags', chapterTr.findAll("td")[2].text)
|
||||
pub_date = makeDate(chapterTr.findAll('td')[-1].text, self.dateformat)
|
||||
dates.append(pub_date)
|
||||
|
|
@ -305,7 +307,8 @@ class LiteroticaSiteAdapter(BaseSiteAdapter):
|
|||
|
||||
self.story.setMetadata('numChapters', len(self.chapterUrls))
|
||||
|
||||
self.story.setMetadata('category', soup1.find('div', 'b-breadcrumbs').findAll('a')[1].string)
|
||||
# Add the category from the breadcumb. This might duplicate a category already added.
|
||||
self.story.addToList('category', soup1.find('div', 'b-breadcrumbs').findAll('a')[1].string)
|
||||
self.getCategories(soup1)
|
||||
# self.story.setMetadata('description', soup1.find('meta', {'name': 'description'})['content'])
|
||||
|
||||
|
|
|
|||
|
|
@ -169,19 +169,20 @@ class StoriesOnlineNetAdapter(BaseSiteAdapter):
|
|||
a = soup.find('h1')
|
||||
self.story.setMetadata('title',stripHTML(a))
|
||||
|
||||
notice = soup.find('div', {'class' : 'notice'})
|
||||
if notice:
|
||||
self.story.setMetadata('notice',unicode(notice))
|
||||
|
||||
# Find authorid and URL from... author url.
|
||||
for a in soup.findAll('a', href=re.compile(r"/a/\w+")):
|
||||
nav_section = soup.find('nav')
|
||||
for a in nav_section.findAll('a', {'rel' : 'author'}):
|
||||
self.story.addToList('authorId',a['href'].split('/')[2])
|
||||
self.story.addToList('authorUrl','http://'+self.host+a['href'])
|
||||
self.story.addToList('author',stripHTML(a).replace("'s Page",""))
|
||||
|
||||
# The rest of the metadata is within the article tag.
|
||||
soup = soup.find('article')
|
||||
|
||||
# Find the chapters:
|
||||
chapters = soup.findAll('a', href=re.compile(r'^/s/'+self.story.getMetadata('storyId')+":\d+(/.*)?$"))
|
||||
if len(chapters) != 0:
|
||||
logger.debug("Number of chapters: {0}".format(len(chapters)))
|
||||
for chapter in chapters:
|
||||
# just in case there's tags, like <i> in chapter titles.
|
||||
self.chapterUrls.append((stripHTML(chapter),'http://'+self.host+chapter['href']))
|
||||
|
|
@ -192,18 +193,17 @@ class StoriesOnlineNetAdapter(BaseSiteAdapter):
|
|||
|
||||
# surprisingly, the detailed page does not give enough details, so go to author's page
|
||||
page=0
|
||||
i=0
|
||||
while i == 0:
|
||||
story_found = False
|
||||
while not story_found:
|
||||
page = page + 1
|
||||
data = self._fetchUrl(self.story.getList('authorUrl')[0]+"/"+unicode(page))
|
||||
asoup = self.make_soup(data)
|
||||
|
||||
a = asoup.findAll('td', {'class' : 'lc2'})
|
||||
for lc2 in a:
|
||||
if lc2.find('a', href=re.compile(r'^/s/'+self.story.getMetadata('storyId'))):
|
||||
i=1
|
||||
story_found = True
|
||||
break
|
||||
if a[len(a)-1] == lc2:
|
||||
page=page+1
|
||||
|
||||
for cat in lc2.findAll('div', {'class' : 'typediv'}):
|
||||
self.story.addToList('genre',cat.text)
|
||||
|
|
@ -352,6 +352,24 @@ class StoriesOnlineNetAdapter(BaseSiteAdapter):
|
|||
else:
|
||||
self.story.setMetadata('status', 'Completed')
|
||||
|
||||
# Remove all the metadata elements to leave and preamble text. This is usually
|
||||
# a notice or a forward.
|
||||
if len(self.chapterUrls) > 1:
|
||||
header = soup.find('header')
|
||||
header.extract()
|
||||
else:
|
||||
soup = soup.find('header')
|
||||
# Remove some tags based on their class or id
|
||||
elements_to_remove = ['#det-link', '#s-details', '#index-list', '#s-title', '#s-auth', '.copy']
|
||||
if not self.getConfig('include_images'):
|
||||
elements_to_remove.append('img')
|
||||
for element_name in elements_to_remove:
|
||||
elements = soup.select(element_name)
|
||||
for element in elements:
|
||||
element.extract()
|
||||
if len(soup.contents ) > 0 and (len(soup.text.strip()) > 0 or len(soup.find_all('img')) > 0):
|
||||
self.story.setMetadata('notice', self.utf8FromSoup(url, soup))
|
||||
|
||||
# grab the text for an individual chapter.
|
||||
def getChapterText(self, url):
|
||||
|
||||
|
|
|
|||
|
|
@ -209,6 +209,7 @@ def get_valid_set_options():
|
|||
'romancecat_to_characters_ships':(['tthfanfic.org'],None,boollist),
|
||||
|
||||
'use_meta_keywords':(['literotica.com'],None,boollist),
|
||||
'chapter_categories_use_all':(['literotica.com'],None,boollist),
|
||||
'clean_chapter_titles':(['literotica.com'],None,boollist),
|
||||
'description_in_chapter':(['literotica.com'],None,boollist),
|
||||
|
||||
|
|
@ -389,6 +390,7 @@ def get_valid_keywords():
|
|||
'pairingcat_to_characters_ships',
|
||||
'romancecat_to_characters_ships',
|
||||
'use_meta_keywords',
|
||||
'chapter_categories_use_all',
|
||||
'clean_chapter_titles',
|
||||
'description_in_chapter',
|
||||
'inject_chapter_title',
|
||||
|
|
|
|||
|
|
@ -1497,6 +1497,10 @@ extra_titlepage_entries:eroticatags,averrating
|
|||
## Extract more erotica_tags from the meta tag of each chapter
|
||||
use_meta_keywords: true
|
||||
|
||||
## Chapters can be in different categories. Default to not using all
|
||||
## to be consistent with previous version.
|
||||
chapter_categories_use_all: false
|
||||
|
||||
## For multiple chapter stories, attempt to clean up the chapter title. This will
|
||||
## remove the story title and change "Ch. 01" to "Chapter 1", "Pt. 01" to "Part 1"
|
||||
## or just use the text. If this can't be done, the full title is used.
|
||||
|
|
|
|||
Loading…
Reference in a new issue