Refactor chapter internals for additional site-specific metadata per chapter.

This commit is contained in:
Jim Miller 2018-07-14 16:11:01 -05:00
parent 2943b0964f
commit ffee4aa495
3 changed files with 28 additions and 25 deletions

View file

@ -307,7 +307,7 @@ Some more longer description. "I suck at summaries!" "Better than it sounds!"
#('Chapter n',self.url+"&chapter=o"),
]
for c in chapters:
self.add_chapter(c[0],c[1])
self.add_chapter(c[0],c[1],{'test':'asdf'})
def getChapterText(self, url):

View file

@ -143,15 +143,16 @@ class BaseSiteAdapter(Configurable):
self.chapterLast=int(last)-1
self.story.set_chapters_range(first,last)
def add_chapter(self,title,url):
def add_chapter(self,title,url,othermeta={}):
## Check for chapter URL in ignore_chapter_url_list.
## Normalize chapter urls, both from list and passed in, but
## don't save them that way to match previous behavior.
if self.ignore_chapter_url_list == None:
self.ignore_chapter_url_list = [ self.normalize_chapterurl(u) for u in self.getConfig('ignore_chapter_url_list').splitlines() ]
if self.normalize_chapterurl(url) not in self.ignore_chapter_url_list:
self.chapterUrls.append({'title':stripHTML(title),
'url':url})
meta = dict(othermeta) # copy
meta.update({'title':stripHTML(title),'url':url}) # after other to make sure they are set
self.chapterUrls.append(meta)
self.story.setMetadata('numChapters', self.num_chapters())
return True
# return true/false for those adapters that count words by
@ -185,11 +186,10 @@ class BaseSiteAdapter(Configurable):
url = chap['url']
#logger.debug("index:%s"%index)
newchap = False
passchap = dict(chap)
if (self.chapterFirst!=None and index < self.chapterFirst) or \
(self.chapterLast!=None and index > self.chapterLast):
self.story.addChapter(url,
removeEntities(title),
None)
passchap['html'] = None
else:
data = None
if self.oldchaptersmap:
@ -237,10 +237,11 @@ class BaseSiteAdapter(Configurable):
else:
raise
self.story.addChapter(url,
removeEntities(title),
removeEntities(data),
newchap)
passchap['url'] = url
passchap['title'] = title
passchap['html'] = data
self.story.addChapter(passchap, newchap)
self.storyDone = True
# include image, but no cover from story, add default_cover_image cover.

View file

@ -17,6 +17,7 @@
import os, re
import copy
from collections import defaultdict
import urlparse
import string
import json
@ -30,7 +31,7 @@ import urlparse as up
import bs4
import exceptions
from htmlcleanup import conditionalRemoveEntities, removeAllEntities
from htmlcleanup import conditionalRemoveEntities, removeEntities, removeAllEntities
from configurable import Configurable, re_compile
from htmlheuristics import was_run_marker
@ -1007,20 +1008,21 @@ class Story(Configurable):
return list(subjectset | set(self.getConfigList("extratags")))
def addChapter(self, url, title, html, newchap=False):
# logger.debug("addChapter(%s,%s)"%(url,newchap))
def addChapter(self, chap, newchap=False):
# logger.debug("addChapter(%s,%s)"%(chap,newchap))
chapter = defaultdict(unicode,chap) # default unknown to empty string
chapter['title'] = removeEntities(chapter['title'])
chapter['html'] = removeEntities(chapter['html'])
if self.getConfig('strip_chapter_numbers') and \
self.getConfig('chapter_title_strip_pattern'):
title = re.sub(self.getConfig('chapter_title_strip_pattern'),"",title)
self.chapters.append({'url':url,
'title':title,
'html':html,
'origtitle':title,
'toctitle':title,
'new':newchap,
'number':len(self.chapters)+1,
'index':len(self.chapters)+1,
'0index':"%04d"%(len(self.chapters)+1)})
chapter['title'] = re.sub(self.getConfig('chapter_title_strip_pattern'),"",chapter['title'])
chapter.update({'origtitle':chapter['title'],
'toctitle':chapter['title'],
'new':newchap,
'number':len(self.chapters)+1,
'index':len(self.chapters)+1,
'0index':"%04d"%(len(self.chapters)+1)})
self.chapters.append(chapter)
def getChapters(self,fortoc=False):
"Chapters will be dicts"
@ -1064,7 +1066,7 @@ class Story(Configurable):
usetempl = templ
# logger.debug("chap(%s)"%chap)
# Chapter = namedtuple('Chapter', 'url title html origtitle toctitle new')
chapter = copy.copy(chap)
chapter = defaultdict(unicode,chap)
chapter['chapter'] = chapter['title'] = usetempl.substitute(chap)
chapter['origtitle'] = templ.substitute(chap)
chapter['toctitle'] = toctempl.substitute(chap)