mirror of
https://github.com/JimmXinu/FanFicFare.git
synced 2026-01-02 22:23:52 +01:00
Update for recent site change and fix first chapter
Recent site changes mean the keywords meta attribute doesn't have spaces after the comma. Also, sometimes the first chapter does not have a number. Was defaulting to "Chapter 1" but this works it out based on later chapters.
This commit is contained in:
parent
69b6fcc17b
commit
52ae3d1ec0
1 changed files with 23 additions and 2 deletions
|
|
@ -108,7 +108,7 @@ class LiteroticaSiteAdapter(BaseSiteAdapter):
|
|||
|
||||
def getCategories(self, soup):
|
||||
if self.getConfig("use_meta_keywords"):
|
||||
categories = soup.find("meta", {"name":"keywords"})['content'].split(', ')
|
||||
categories = soup.find("meta", {"name":"keywords"})['content'].split(',')
|
||||
categories = [c for c in categories if not self.story.getMetadata('title') in c]
|
||||
if self.story.getMetadata('author') in categories:
|
||||
categories.remove(self.story.getMetadata('author'))
|
||||
|
|
@ -228,6 +228,7 @@ class LiteroticaSiteAdapter(BaseSiteAdapter):
|
|||
descriptions = []
|
||||
ratings = []
|
||||
chapters = []
|
||||
chapter_name_type = None
|
||||
while chapterTr is not None and 'sl' in chapterTr['class']:
|
||||
description = "%d. %s" % (len(descriptions)+1,stripHTML(chapterTr.findAll("td")[1]))
|
||||
description = stripHTML(chapterTr.findAll("td")[1])
|
||||
|
|
@ -241,13 +242,16 @@ class LiteroticaSiteAdapter(BaseSiteAdapter):
|
|||
|
||||
chapter_title = chapterLink.text
|
||||
if self.getConfig("clean_chapter_titles"):
|
||||
# logger.debug('\tChapter Name: "%s"' % chapterLink.string)
|
||||
# logger.debug('\tChapter Name: "%s"' % chapterLink.text)
|
||||
if chapterLink.text.lower().startswith(seriesTitle.lower()):
|
||||
chapter = chapterLink.text[len(seriesTitle):].strip()
|
||||
# logger.debug('\tChapter: "%s"' % chapter)
|
||||
if chapter == '':
|
||||
chapter_title = 'Chapter %d' % (self.num_chapters() + 1)
|
||||
# Sometimes the first chapter does not have type of chapter
|
||||
if self.num_chapters() == 0:
|
||||
logger.debug('\tChapter: first chapter without chapter type')
|
||||
chapter_name_type = None
|
||||
else:
|
||||
separater_char = chapter[0]
|
||||
# logger.debug('\tseparater_char: "%s"' % separater_char)
|
||||
|
|
@ -259,14 +263,19 @@ class LiteroticaSiteAdapter(BaseSiteAdapter):
|
|||
chapter_title = 'Chapter %d' % int(chapter)
|
||||
except:
|
||||
chapter_title = 'Chapter %s' % chapter
|
||||
chapter_name_type = 'Chapter' if chapter_name_type is None else chapter_name_type
|
||||
logger.debug('\tChapter: chapter_name_type="%s"' % chapter_name_type)
|
||||
elif chapter.lower().startswith('pt.'):
|
||||
chapter = chapter[len('pt.'):]
|
||||
try:
|
||||
chapter_title = 'Part %d' % int(chapter)
|
||||
except:
|
||||
chapter_title = 'Part %s' % chapter
|
||||
chapter_name_type = 'Part' if chapter_name_type is None else chapter_name_type
|
||||
logger.debug('\tChapter: chapter_name_type="%s"' % chapter_name_type)
|
||||
elif separater_char in [":", "-"]:
|
||||
chapter_title = chapter
|
||||
logger.debug('\tChapter: taking chapter text as whole')
|
||||
|
||||
# pages include full URLs.
|
||||
chapurl = chapterLink['href']
|
||||
|
|
@ -285,6 +294,18 @@ class LiteroticaSiteAdapter(BaseSiteAdapter):
|
|||
except:
|
||||
pass
|
||||
|
||||
if self.getConfig("clean_chapter_titles") \
|
||||
and chapter_name_type is not None \
|
||||
and not chapters[0][0].startswith(chapter_name_type):
|
||||
logger.debug('\tChapter: chapter_name_type="%s"' % chapter_name_type)
|
||||
logger.debug('\tChapter: first chapter="%s"' % chapters[0][0])
|
||||
logger.debug('\tChapter: first chapter number="%s"' % chapters[0][0][len('Chapter'):])
|
||||
chapters[0] = ("%s %s" % (chapter_name_type, chapters[0][0][len('Chapter'):].strip()),
|
||||
chapters[0][1],
|
||||
chapters[0][2],
|
||||
chapters[0][3]
|
||||
)
|
||||
|
||||
chapters = sorted(chapters, key=lambda chapter: chapter[3])
|
||||
for i, chapter in enumerate(chapters):
|
||||
self.add_chapter(chapter[0], chapter[1])
|
||||
|
|
|
|||
Loading…
Reference in a new issue