Add support for multiple authors per story. AO3, TtH and wraithbait so far.

Existing check now looks for identifier url first, then title/author match.
This commit is contained in:
Jim Miller 2012-07-13 11:37:39 -05:00
parent f91ed265ba
commit a2e8bcac49
12 changed files with 160 additions and 81 deletions

View file

@ -371,7 +371,7 @@ class AuthorTableWidgetItem(ReadOnlyTableWidgetItem):
#Qt uses a simple < check for sorting items, override this to use the sortKey
def __lt__(self, other):
return self.sort_key < other.sort_key
return self.sort_key.lower() < other.sort_key.lower()
class UpdateExistingDialog(SizePersistedDialog):
def __init__(self, gui, header, prefs, icon, books,
@ -623,7 +623,7 @@ class StoryListTableWidget(QTableWidget):
title_cell.setData(Qt.UserRole, QVariant(row))
self.setItem(row, 1, title_cell)
self.setItem(row, 2, AuthorTableWidgetItem(book['author'], book['author_sort']))
self.setItem(row, 2, AuthorTableWidgetItem(", ".join(book['author']), ", ".join(book['author_sort'])))
url_cell = ReadOnlyTableWidgetItem(book['url'])
#url_cell.setData(Qt.UserRole, QVariant(book['url']))

View file

@ -480,7 +480,7 @@ make_firstimage_cover:true
book['all_metadata'] = story.getAllMetadata(removeallentities=True)
book['title'] = story.getMetadata("title", removeallentities=True)
book['author_sort'] = book['author'] = story.getMetadata("author", removeallentities=True)
book['author_sort'] = book['author'] = story.getList("author", removeallentities=True)
book['publisher'] = story.getMetadata("site")
book['tags'] = writer.getTags() # getTags could be moved up into adapter now. Adapter didn't used to know the fileform
book['comments'] = stripHTML(story.getMetadata("description")) #, removeallentities=True) comments handles entities better.
@ -520,20 +520,24 @@ make_firstimage_cover:true
# only care about collisions when not ADDNEW
elif collision != ADDNEW:
# 'new' book from URL. collision handling applies.
print("from URL")
# find dups
mi = MetaInformation(story.getMetadata("title", removeallentities=True),
[story.getMetadata("author", removeallentities=True)]) # author is a list.
identicalbooks = db.find_identical_books(mi)
## removed for being overkill.
# for ib in identicalbooks:
# # only *really* identical if URL matches, too.
# # XXX make an option?
# if self._get_story_url(db,ib) == url:
# identicalbooks.append(ib)
#print("identicalbooks:%s"%identicalbooks)
print("from URL(%s)"%url)
# try to find by identifier url first.
searchstr = 'identifiers:"=url:%s"'%url.replace(":","|")
identicalbooks = db.search_getting_ids(searchstr, None)
if len(identicalbooks) < 1:
# find dups
authlist = story.getList("author", removeallentities=True)
if len(authlist) > 100:
raise NotGoingToDownload("Story has too many authors--search for existing book will fail. Update by selecting book directly or use Add New.","search_delete_saved.png")
mi = MetaInformation(story.getMetadata("title", removeallentities=True),
authlist)
identicalbooks = db.find_identical_books(mi)
if len(identicalbooks) > 0:
print("existing found by title/author(s)")
else:
print("existing found by identifier URL")
if collision == SKIP and identicalbooks:
raise NotGoingToDownload("Skipping duplicate story.","list_remove.png")
@ -544,7 +548,6 @@ make_firstimage_cover:true
if collision == CALIBREONLY and not identicalbooks:
collision = ADDNEW
options['collision'] = ADDNEW
# raise NotGoingToDownload("Not updating Calibre Metadata, no existing book to update.","search_delete_saved.png")
if len(identicalbooks)>0:
book_id = identicalbooks.pop()
@ -703,9 +706,9 @@ make_firstimage_cover:true
'collision':ADDNEW,
'updatemeta':True,
'updateepubcover':True},):
print("add/update bad %s %s %s"%(book['title'],book['url'],book['comment']))
db.set_custom(book['calibre_id'], book['comment'], label=label, commit=True)
if book['calibre_id']:
print("add/update bad %s %s %s"%(book['title'],book['url'],book['comment']))
db.set_custom(book['calibre_id'], book['comment'], label=label, commit=True)
def _update_books_completed(self, book_list, options={}, showlist=True):
@ -855,9 +858,11 @@ make_firstimage_cover:true
# set author link if found. All current adapters have authorUrl, except anonymous on AO3.
if 'authorUrl' in book['all_metadata']:
autid=db.get_author_id(book['author'])
db.set_link_field_for_author(autid, unicode(book['all_metadata']['authorUrl']),
commit=False, notify=False)
authurls = book['all_metadata']['authorUrl'].split(", ")
for i, auth in enumerate(book['author']):
autid=db.get_author_id(auth)
db.set_link_field_for_author(autid, unicode(authurls[i]),
commit=False, notify=False)
db.set_metadata(book_id,mi)
@ -1003,19 +1008,19 @@ make_firstimage_cover:true
message="<p>You configured FanFictionDownLoader to automatically update Reading List '%s', but you don't have a list of that name?</p>"%l
confirm(message,'fanfictiondownloader_no_reading_list_%s'%l, self.gui)
def _find_existing_book_id(self,db,book,matchurl=True):
mi = MetaInformation(book["title"],[book["author"]]) # author is a list.
identicalbooks = db.find_identical_books(mi)
if matchurl: # only *really* identical if URL matches, too.
for ib in identicalbooks:
if self._get_story_url(db,ib) == book['url']:
return ib
if identicalbooks:
return identicalbooks.pop()
return None
# def _find_existing_book_id(self,db,book,matchurl=True):
# mi = MetaInformation(book["title"],book["author"]) # author is a list.
# identicalbooks = db.find_identical_books(mi)
# if matchurl: # only *really* identical if URL matches, too.
# for ib in identicalbooks:
# if self._get_story_url(db,ib) == book['url']:
# return ib
# if identicalbooks:
# return identicalbooks.pop()
# return None
def _make_mi_from_book(self,book):
mi = MetaInformation(book['title'],[book['author']]) # author is a list.
mi = MetaInformation(book['title'],book['author']) # author is a list.
mi.set_identifiers({'url':book['url']})
mi.publisher = book['publisher']
mi.tags = book['tags']
@ -1052,8 +1057,7 @@ make_firstimage_cover:true
book['good'] = True
book['calibre_id'] = None
book['title'] = 'Unknown'
book['author'] = 'Unknown'
book['author_sort'] = 'Unknown'
book['author_sort'] = book['author'] = ['Unknown'] # list
book['begin'] = None
book['end'] = None
@ -1069,8 +1073,7 @@ make_firstimage_cover:true
book['good'] = good
book['calibre_id'] = idval
book['title'] = 'Unknown'
book['author'] = 'Unknown'
book['author_sort'] = 'Unknown'
book['author_sort'] = book['author'] = ['Unknown'] # list
book['begin'] = None
book['end'] = None
@ -1086,7 +1089,7 @@ make_firstimage_cover:true
book['good'] = True
book['calibre_id'] = mi.id
book['title'] = mi.title
book['author'] = authors_to_string(mi.authors)
book['author'] = mi.authors
book['author_sort'] = mi.author_sort
book['comment'] = ''
book['url'] = ""

View file

@ -155,14 +155,15 @@ class ArchiveOfOurOwnOrgAdapter(BaseSiteAdapter):
self.story.setMetadata('title',a.string)
# Find authorid and URL from... author url.
a = soup.find('a', href=re.compile(r"^/users/\w+/pseuds/\w+"))
if a == None: # ao3 allows for author 'Anonymous' with no author link.
alist = soup.findAll('a', href=re.compile(r"^/users/\w+/pseuds/\w+"))
if len(alist) < 1: # ao3 allows for author 'Anonymous' with no author link.
self.story.setMetadata('author','Anonymous')
self.story.setMetadata('authorUrl',self.url)
else:
self.story.setMetadata('authorId',a['href'].split('/')[2])
self.story.setMetadata('authorUrl','http://'+self.host+a['href'])
self.story.setMetadata('author',a.text)
for a in alist:
self.story.addToList('authorId',a['href'].split('/')[2])
self.story.addToList('authorUrl','http://'+self.host+a['href'])
self.story.addToList('author',a.text)
# Find the chapters:
chapters=soup.findAll('a', href=re.compile(r'/works/'+self.story.getMetadata('storyId')+"/chapters/\d+$"))

View file

@ -103,8 +103,19 @@ Some more longer description. "I suck at summaries!" "Better than it sounds!"
self.story.setMetadata('rating','Tweenie')
self.story.setMetadata('authorId','98765')
self.story.setMetadata('authorUrl','http://author/url')
if self.story.getMetadata('storyId') == '673':
self.story.addToList('author','Author From List')
self.story.addToList('author','Author From List 2')
self.story.addToList('authorId','98765')
self.story.addToList('authorId','98765-2')
self.story.addToList('authorUrl','http://author/url')
self.story.addToList('authorUrl','http://author/url-2')
else:
self.story.setMetadata('authorId','98765')
self.story.setMetadata('authorUrl','http://author/url')
self.story.addToList('warnings','Swearing')
self.story.addToList('warnings','Violence')
@ -175,6 +186,7 @@ Some more longer description. "I suck at summaries!" "Better than it sounds!"
<p>http://test1.com?sid=670 - Succeeds, but sleeps 2sec on each chapter</p>
<p>http://test1.com?sid=671 - Succeeds, but sleeps 2sec metadata only</p>
<p>http://test1.com?sid=672 - Succeeds, quick meta, sleeps 2sec chapters only</p>
<p>http://test1.com?sid=673 - Succeeds, multiple authors</p>
<p>Odd sid's will be In-Progress, evens complete. sid&lt;10 will be assigned one of four languages and included in a series.</p>
</div>
'''

View file

@ -151,12 +151,33 @@ class TwistingTheHellmouthSiteAdapter(BaseSiteAdapter):
self.story.setMetadata('authorId',a['href'].split('/')[1].split('-')[1])
self.story.setMetadata('authorUrl','http://'+self.host+a['href'])
self.story.setMetadata('author',stripHTML(a))
authorurl = 'http://'+self.host+a['href']
ainfo = soup.find('a', href='/StoryInfo-%s-1'%self.story.getMetadata('storyId'))
if ainfo != None: # indicates multiple authors/contributors.
try:
# going to pull part of the meta data from author list page.
infourl = 'http://'+self.host+ainfo['href']
logging.debug("**StoryInfo** URL: "+infourl)
infodata = self._fetchUrl(infourl)
infosoup = bs.BeautifulSoup(infodata)
for a in infosoup.findAll('a',href=re.compile(r"^/Author-\d+")):
self.story.addToList('authorId',a['href'].split('/')[1].split('-')[1])
self.story.addToList('authorUrl','http://'+self.host+a['href'].replace("/Author-","/AuthorStories-"))
self.story.addToList('author',stripHTML(a))
except urllib2.HTTPError, e:
if e.code == 404:
raise exceptions.StoryDoesNotExist(url)
else:
raise e
try:
# going to pull part of the meta data from author list page.
logging.debug("**AUTHOR** URL: "+self.story.getMetadata('authorUrl'))
authordata = self._fetchUrl(self.story.getMetadata('authorUrl'))
descurl=self.story.getMetadata('authorUrl')
# going to pull part of the meta data from *primary* author list page.
logging.debug("**AUTHOR** URL: "+authorurl)
authordata = self._fetchUrl(authorurl)
descurl=authorurl
authorsoup = bs.BeautifulSoup(authordata)
# author can have several pages, scan until we find it.
while( not authorsoup.find('a', href=re.compile(r"^/Story-"+self.story.getMetadata('storyId'))) ):
@ -209,7 +230,7 @@ class TwistingTheHellmouthSiteAdapter(BaseSiteAdapter):
if not BtVSNonX:
BtVS = False # Don't add BtVS if Non-Crossover, unless it's a BtVS/AtS Non-Crossover
print("BtVS: %s BtVSNonX: %s"%(BtVS,BtVSNonX))
#print("BtVS: %s BtVSNonX: %s"%(BtVS,BtVSNonX))
if BtVS:
self.story.addToList('category','Buffy: The Vampire Slayer')

View file

@ -119,10 +119,11 @@ class WraithBaitComAdapter(BaseSiteAdapter):
self.story.setMetadata('title',a.string)
# Find authorid and URL from... author url.
a = pt.find('a', href=re.compile(r"viewuser.php\?uid=\d+"))
self.story.setMetadata('authorId',a['href'].split('=')[1])
self.story.setMetadata('authorUrl','http://'+self.host+'/'+a['href'])
self.story.setMetadata('author',a.string)
alist = pt.findAll('a', href=re.compile(r"viewuser.php\?uid=\d+"))
for a in alist:
self.story.addToList('authorId',a['href'].split('=')[1])
self.story.addToList('authorUrl','http://'+self.host+'/'+a['href'])
self.story.addToList('author',a.string)
rating=pt.text.split('[')[1].split(']')[0]
self.story.setMetadata('rating', rating)

View file

@ -223,9 +223,9 @@ class Story:
def getMetadata(self, key, removeallentities=False, doreplacements=True):
value = None
if self.getLists().has_key(key):
if self.isList(key):
value = ', '.join(self.getList(key))
if self.metadata.has_key(key):
elif self.metadata.has_key(key):
value = self.metadata[key]
if value:
if key == "numWords":
@ -247,11 +247,33 @@ class Story:
All single value *and* list value metadata as strings.
'''
allmetadata = {}
# special handling for authors/authorUrls
authlinkhtml="<a class='authorlink' href='%s'>%s</a>"
if 'author' in self.listables.keys(): # more than one author, assume multiple authorUrl too.
htmllist=[]
for i, v in enumerate(self.listables['author']):
aurl = self.listables['authorUrl'][i]
auth = v
# make sure doreplacements & removeallentities are honored.
if doreplacements:
aurl=self.doReplacments(aurl)
auth=self.doReplacments(auth)
if removeallentities:
aurl=removeAllEntities(aurl)
auth=removeAllEntities(auth)
htmllist.append(authlinkhtml%(aurl,auth))
self.setMetadata('authorHTML',', '.join(htmllist))
else:
self.setMetadata('authorHTML',authlinkhtml%(self.getMetadata('authorUrl', removeallentities, doreplacements),
self.getMetadata('author', removeallentities, doreplacements)))
for k in self.metadata.keys():
allmetadata[k] = self.getMetadata(k, removeallentities, doreplacements)
for l in self.listables.keys():
allmetadata[l] = self.getMetadata(l, removeallentities, doreplacements)
return allmetadata
# just for less clutter in adapters.
@ -263,23 +285,36 @@ class Story:
if value==None:
return
value = conditionalRemoveEntities(value)
if not self.listables.has_key(listname):
if not self.isList(listname):
self.listables[listname]=[]
# prevent duplicates.
if not value in self.listables[listname]:
self.listables[listname].append(value)
def getList(self,listname):
if not self.listables.has_key(listname):
return []
return filter( lambda x : x!=None and x!='' ,
map(self.doReplacments,self.listables[listname]) )
def getList(self,listname, removeallentities=False, doreplacements=True):
retlist = []
if not self.isList(listname):
retlist = [self.getMetadata(listname,removeallentities=removeallentities)]
else:
retlist = self.listables[listname]
if doreplacements:
retlist = filter( lambda x : x!=None and x!='' ,
map(self.doReplacments,retlist) )
if removeallentities:
retlist = filter( lambda x : x!=None and x!='' ,
map(removeAllEntities,retlist) )
return retlist
def getLists(self):
lsts = {}
for ln in self.listables.keys():
lsts[ln] = self.getList(ln)
return lsts
def isList(self,listname):
return self.listables.has_key(listname)
def addChapter(self, title, html):
self.chapters.append( (title,html) )

View file

@ -134,7 +134,7 @@ class BaseStoryWriter(Configurable):
names as Story.metadata, but ENTRY should use label and value.
"""
if self.getConfig("include_titlepage"):
self._write(out,START.substitute(self.story.metadata))
self._write(out,START.substitute(self.story.getAllMetadata()))
if WIDE_ENTRY==None:
WIDE_ENTRY=ENTRY
@ -166,7 +166,7 @@ class BaseStoryWriter(Configurable):
else:
self._write(out, entry)
self._write(out,END.substitute(self.story.metadata))
self._write(out,END.substitute(self.story.getAllMetadata()))
def writeTOCPage(self, out, START, ENTRY, END):
"""
@ -176,13 +176,13 @@ class BaseStoryWriter(Configurable):
"""
# Only do TOC if there's more than one chapter and it's configured.
if len(self.story.getChapters()) > 1 and self.getConfig("include_tocpage") and not self.metaonly :
self._write(out,START.substitute(self.story.metadata))
self._write(out,START.substitute(self.story.getAllMetadata()))
for index, (title,html) in enumerate(self.story.getChapters()):
if html:
self._write(out,ENTRY.substitute({'chapter':title, 'index':"%04d"%(index+1)}))
self._write(out,END.substitute(self.story.metadata))
self._write(out,END.substitute(self.story.getAllMetadata()))
# if no outstream is given, write to file.
def writeStory(self,outstream=None, metaonly=False, outfilename=None, forceOverwrite=False):

View file

@ -52,7 +52,7 @@ class EpubWriter(BaseStoryWriter):
<link href="stylesheet.css" type="text/css" charset="UTF-8" rel="stylesheet"/>
</head>
<body>
<h3><a href="${storyUrl}">${title}</a> by <a href="${authorUrl}">${author}</a></h3>
<h3><a href="${storyUrl}">${title}</a> by ${authorHTML}</h3>
<div>
''')
@ -79,7 +79,7 @@ ${value}<br />
<link href="stylesheet.css" type="text/css" charset="UTF-8" rel="stylesheet"/>
</head>
<body>
<h3><a href="${storyUrl}">${title}</a> by <a href="${authorUrl}">${author}</a></h3>
<h3><a href="${storyUrl}">${title}</a> by ${authorHTML}</h3>
<table class="full">
''')
@ -206,9 +206,15 @@ ${value}<br />
metadata.appendChild(newTag(contentdom,"dc:title",text=self.getMetadata('title')))
if self.getMetadata('author'):
metadata.appendChild(newTag(contentdom,"dc:creator",
attrs={"opf:role":"aut"},
text=self.getMetadata('author')))
if self.story.isList('author'):
for auth in self.story.getList('author'):
metadata.appendChild(newTag(contentdom,"dc:creator",
attrs={"opf:role":"aut"},
text=auth))
else:
metadata.appendChild(newTag(contentdom,"dc:creator",
attrs={"opf:role":"aut"},
text=self.getMetadata('author')))
metadata.appendChild(newTag(contentdom,"dc:contributor",text="fanficdownloader [http://fanficdownloader.googlecode.com]",attrs={"opf:role":"bkp"}))
metadata.appendChild(newTag(contentdom,"dc:rights",text=""))
@ -433,7 +439,7 @@ div { margin: 0pt; padding: 0pt; }
del tocncxdom
# write stylesheet.css file.
outputepub.writestr("OEBPS/stylesheet.css",self.EPUB_CSS.substitute(self.story.metadata))
outputepub.writestr("OEBPS/stylesheet.css",self.EPUB_CSS.substitute(self.story.getAllMetadata()))
# write title page.
if self.getConfig("titlepage_use_table"):

View file

@ -43,7 +43,7 @@ ${output_css}
</style>
</head>
<body>
<h1><a href="${storyUrl}">${title}</a> by <a href="${authorUrl}">${author}</a></h1>
<h1><a href="${storyUrl}">${title}</a> by ${authorHTML}</h1>
''')
self.HTML_TITLE_PAGE_START = string.Template('''
@ -82,7 +82,7 @@ ${output_css}
def writeStoryImpl(self, out):
self._write(out,self.HTML_FILE_START.substitute(self.story.metadata))
self._write(out,self.HTML_FILE_START.substitute(self.story.getAllMetadata()))
self.writeTitlePage(out,
self.HTML_TITLE_PAGE_START,
@ -100,4 +100,4 @@ ${output_css}
self._write(out,self.HTML_CHAPTER_START.substitute({'chapter':title, 'index':"%04d"%(index+1)}))
self._write(out,html)
self._write(out,self.HTML_FILE_END.substitute(self.story.metadata))
self._write(out,self.HTML_FILE_END.substitute(self.story.getAllMetadata()))

View file

@ -43,7 +43,7 @@ class MobiWriter(BaseStoryWriter):
<title>${title} by ${author}</title>
</head>
<body>
<h3><a href="${storyUrl}">${title}</a> by <a href="${authorUrl}">${author}</a></h3>
<h3><a href="${storyUrl}">${title}</a> by ${authorHTML}</h3>
<div>
''')
@ -69,7 +69,7 @@ ${value}<br />
<title>${title} by ${author}</title>
</head>
<body>
<h3><a href="${storyUrl}">${title}</a> by <a href="${authorUrl}">${author}</a></h3>
<h3><a href="${storyUrl}">${title}</a> by ${authorHTML}</h3>
<table class="full">
''')

View file

@ -114,7 +114,7 @@ End file.
wrapout = KludgeStringIO()
wrapout.write(self.TEXT_FILE_START.substitute(self.story.metadata))
wrapout.write(self.TEXT_FILE_START.substitute(self.story.getAllMetadata()))
self.writeTitlePage(wrapout,
self.TEXT_TITLE_PAGE_START,
@ -139,7 +139,7 @@ End file.
self._write(out,self.lineends(self.wraplines(removeAllEntities(self.TEXT_CHAPTER_START.substitute({'chapter':title, 'index':index+1})))))
self._write(out,self.lineends(html2text(html,wrap_width=self.wrap_width)))
self._write(out,self.lineends(self.wraplines(self.TEXT_FILE_END.substitute(self.story.metadata))))
self._write(out,self.lineends(self.wraplines(self.TEXT_FILE_END.substitute(self.story.getAllMetadata()))))
def wraplines(self, text):