Add Series for sites that support it. twcs/twrt/tw default to not collect it--

they (ab)use 'series' as reading lists/user collections. Fix url SIYE pattern &
metadata.
This commit is contained in:
Jim Miller 2012-01-27 22:51:05 -06:00
parent 7a58ea13ed
commit 779f615a7a
18 changed files with 281 additions and 69 deletions

View file

@ -435,8 +435,10 @@ permitted_values = {
'float' : ['numWords','numChapters'],
'bool' : ['status-C','status-I'],
'datetime' : ['datePublished', 'dateUpdated', 'dateCreated'],
'series' : ['series'],
'enumeration' : ['category',
'genre',
'series',
'characters',
'status',
'datePublished',
@ -471,6 +473,7 @@ titleLabels = {
'status':'Status',
'status-C':'Status:Completed',
'status-I':'Status:In-Progress',
'series':'Series',
'characters':'Characters',
'datePublished':'Published',
'dateUpdated':'Updated',
@ -491,7 +494,7 @@ titleLabels = {
'formatname':'File Format',
'formatext':'File Extension',
'siteabbrev':'Site Abbrev',
'version':'FFD Version'
'version':'FFDL Version'
}
class ColumnsTab(QWidget):

View file

@ -414,6 +414,7 @@ class FanFictionDownLoaderPlugin(InterfaceAction):
book['publisher'] = story.getMetadata("site")
book['tags'] = writer.getTags()
book['comments'] = story.getMetadata("description") #, removeallentities=True) comments handles entities better.
book['series'] = story.getMetadata("series")
# adapter.opener is the element with a threadlock. But del
# adapter.opener doesn't work--subproc fails when it tries
@ -425,7 +426,10 @@ class FanFictionDownLoaderPlugin(InterfaceAction):
book['password'] = adapter.password
book['icon'] = 'plus.png'
book['pubdate'] = story.getMetadataRaw('datePublished').replace(tzinfo=local_tz)
if story.getMetadataRaw('datePublished'):
# should only happen when an adapter is broken, but better to
# fail gracefully.
book['pubdate'] = story.getMetadataRaw('datePublished').replace(tzinfo=local_tz)
book['timestamp'] = None # filled below if not skipped.
if collision in (CALIBREONLY):
@ -598,9 +602,8 @@ class FanFictionDownLoaderPlugin(InterfaceAction):
self._add_or_update_book(book,options,prefs,mi)
if options['collision'] == CALIBREONLY or \
(options['updatemeta'] and book['good']) :
self._update_metadata(db, book['calibre_id'], book, mi)
(options['updatemeta'] and book['good']):
self._update_metadata(db, book['calibre_id'], book, mi)
def _update_books_completed(self, book_list, options={}):
@ -663,49 +666,6 @@ class FanFictionDownLoaderPlugin(InterfaceAction):
init_label="Updating calibre for stories...",
win_title="Update calibre for stories",
status_prefix="Updated")
# for book in good_list:
# print("add/update %s %s"%(book['title'],book['url']))
# mi = self._make_mi_from_book(book)
# if options['collision'] != CALIBREONLY:
# self._add_or_update_book(book,options,prefs,mi)
# if options['collision'] == CALIBREONLY or \
# (options['updatemeta'] and book['good']) :
# self._update_metadata(db, book['calibre_id'], book, mi)
##### split here.
# add_list = filter(lambda x : x['good'] and x['added'], book_list)
# update_list = filter(lambda x : x['good'] and not x['added'], book_list)
# update_ids = [ x['calibre_id'] for x in update_list ]
# if len(add_list):
# ## even shows up added to searchs. Nice.
# self.gui.library_view.model().books_added(len(add_list))
# if update_ids:
# self.gui.library_view.model().refresh_ids(update_ids)
# current = self.gui.library_view.currentIndex()
# self.gui.library_view.model().current_changed(current, previous)
# self.gui.tags_view.recount()
# self.gui.status_bar.show_message(_('Finished Adding/Updating %d books.'%(len(update_list) + len(add_list))), 3000)
# if len(update_list) + len(add_list) != total_good:
# d = DisplayStoryListDialog(self.gui,
# 'Updates completed, final status',
# prefs,
# self.qaction.icon(),
# book_list,
# label_text='Stories have be added or updated in Calibre, some had additional problems.'
# )
# d.exec_()
# print("all done, remove temp dir.")
# remove_dir(options['tdir'])
def _add_or_update_book(self,book,options,prefs,mi=None):
db = self.gui.current_db
@ -776,7 +736,7 @@ class FanFictionDownLoaderPlugin(InterfaceAction):
print("%s not a valid column type for %s, skipping."%(col,meta))
continue
label = coldef['label']
if coldef['datatype'] in ('enumeration','text','comments','datetime'):
if coldef['datatype'] in ('enumeration','text','comments','datetime','series'):
db.set_custom(book_id, book['all_metadata'][meta], label=label, commit=False)
elif coldef['datatype'] in ('int','float'):
num = unicode(book['all_metadata'][meta]).replace(",","")
@ -863,6 +823,7 @@ class FanFictionDownLoaderPlugin(InterfaceAction):
mi.pubdate = book['pubdate']
mi.timestamp = book['timestamp']
mi.comments = book['comments']
mi.series = book['series']
return mi
@ -908,12 +869,6 @@ class FanFictionDownLoaderPlugin(InterfaceAction):
book['title'] = mi.title
book['author'] = authors_to_string(mi.authors)
book['author_sort'] = mi.author_sort
# book['series'] = mi.series
# if mi.series:
# book['series_index'] = mi.series_index
# else:
# book['series_index'] = 0
book['comment'] = ''
book['url'] = ""
book['added'] = False

View file

@ -1,4 +1,4 @@
# Copyright 2011 Fanficdownloader team
# Copyright 2012 Fanficdownloader team
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
@ -37,6 +37,7 @@ formatext_label:File Extension
category_label:Category
genre_label:Genre
characters_label:Characters
series_label:Series
## Completed/In-Progress
status_label:Status
## Dates story first published, last updated, and downloaded(last with time).
@ -61,12 +62,19 @@ authorId_label:Author ID
extratags_label:Extra Tags
## The version of fanficdownloader
##
version_label:FFD Version
version_label:FFDL Version
## items to include in the title page
## Empty entries will *not* appear, even if in the list.
## All current formats already include title and author.
titlepage_entries: category,genre,characters,status,datePublished,dateUpdated,dateCreated,rating,warnings,numChapters,numWords,site,description
titlepage_entries: series,category,genre,characters,status,datePublished,dateUpdated,dateCreated,rating,warnings,numChapters,numWords,site,description
## Try to collect series name and number of this story in series.
## Some sites (ab)use 'series' for reading lists and personal
## collections. This lets us turn it on and off by site without
## keeping a lengthy titlepage_entries per site and prevents it
## updating in the plugin.
collect_series: true
## include title page as first page.
include_titlepage: true
@ -129,12 +137,13 @@ background_color: ffffff
## values are available, plus output_filename.
#post_process_cmd: addbook -f "${output_filename}" -t "${title}"
## Each output format has a section that overrides [defaults]
[html]
[txt]
## Add URLs since there aren't links.
titlepage_entries: category,genre,status,datePublished,dateUpdated,dateCreated,rating,warnings,numChapters,numWords,site,storyUrl, authorUrl, description
titlepage_entries: series,category,genre,status,datePublished,dateUpdated,dateCreated,rating,warnings,numChapters,numWords,site,storyUrl, authorUrl, description
## use \r\n for line endings, the windows convention. text output only.
windows_eol: true
@ -197,6 +206,9 @@ extratags:
#username:YourName
#password:yourpassword
## twilighted.net (ab)uses series as personal reading lists.
collect_series: false
[www.twiwrite.net]
## Some sites require login (or login for some rated stories) The
## program can prompt you, or you can save it in config. In
@ -205,6 +217,9 @@ extratags:
#username:YourName
#password:yourpassword
## twiwrite.net (ab)uses series as personal reading lists.
collect_series: false
[www.whofic.com]
[www.mediaminer.org]
@ -222,6 +237,9 @@ extratags:
## personal.ini, not defaults.ini.
#is_adult:true
## thewriterscoffeeshop.com (ab)uses series as personal reading lists.
collect_series: false
[www.ficwad.com]
## Some sites require login (or login for some rated stories) The
## program can prompt you, or you can save it in config. In
@ -262,6 +280,12 @@ output_filename: ${title}-${siteabbrev}_${authorId}_${storyId}${formatext}
## confirm they are adult for adult content. In commandline version,
## this should go in your personal.ini, not defaults.ini.
#is_adult:true
## tth is a little unusual--it doesn't require user/pass, but the site
## keeps track of which chapters you've read and won't send another
## update until it thinks you're up to date. This way, on download,
## it thinks you're up to date.
#username:YourName
#password:yourpassword
[overrides]
## It may sometimes be useful to override all of the specific format,

View file

@ -181,6 +181,26 @@ class AdAstraFanficComSiteAdapter(BaseSiteAdapter):
#value = value[0:-1]
self.story.setMetadata('dateUpdated', makeDate(value.strip(), "%d %b %Y"))
try:
# Find Series name from series URL.
a = soup.find('a', href=re.compile(r"viewseries.php\?seriesid=\d+"))
series_name = a.string
series_url = 'http://'+self.host+'/'+a['href']
# use BeautifulSoup HTML parser to make everything easier to find.
seriessoup = bs.BeautifulSoup(self._fetchUrl(series_url))
storyas = seriessoup.findAll('a', href=re.compile(r'^viewstory.php\?sid=\d+$'))
i=1
for a in storyas:
if a['href'] == ('viewstory.php?sid='+self.story.getMetadata('storyId')):
self.setSeries(series_name, i)
break
i+=1
except:
# I find it hard to care if the series parsing fails
pass
def getChapterText(self, url):

View file

@ -272,6 +272,26 @@ class CastleFansOrgAdapter(BaseSiteAdapter): # XXX
#value = value[0:-1]
self.story.setMetadata('dateUpdated', makeDate(stripHTML(value), self.dateformat))
try:
# Find Series name from series URL.
a = soup.find('a', href=re.compile(r"viewseries.php\?seriesid=\d+"))
series_name = a.string
series_url = 'http://'+self.host+'/fanfic/'+a['href']
# use BeautifulSoup HTML parser to make everything easier to find.
seriessoup = bs.BeautifulSoup(self._fetchUrl(series_url))
storyas = seriessoup.findAll('a', href=re.compile(r'^viewstory.php\?sid=\d+$'))
i=1
for a in storyas:
if a['href'] == ('viewstory.php?sid='+self.story.getMetadata('storyId')):
self.setSeries(series_name, i)
break
i+=1
except:
# I find it hard to care if the series parsing fails
pass
# grab the text for an individual chapter.
def getChapterText(self, url):

View file

@ -176,7 +176,27 @@ class PotionsAndSnitchesNetSiteAdapter(BaseSiteAdapter):
#value = value[0:-1]
self.story.setMetadata('dateUpdated', makeDate(stripHTML(value), "%b %d %Y"))
try:
# Find Series name from series URL.
a = soup.find('a', href=re.compile(r"viewseries.php\?seriesid=\d+"))
series_name = a.string
series_url = 'http://'+self.host+'/fanfiction/'+a['href']
# use BeautifulSoup HTML parser to make everything easier to find.
seriessoup = bs.BeautifulSoup(self._fetchUrl(series_url))
storyas = seriessoup.findAll('a', href=re.compile(r'^viewstory.php\?sid=\d+$'))
i=1
for a in storyas:
if a['href'] == ('viewstory.php?sid='+self.story.getMetadata('storyId')):
self.setSeries(series_name, i)
break
i+=1
except:
# I find it hard to care if the series parsing fails
pass
def getChapterText(self, url):
logging.debug('Getting chapter text from: %s' % url)

View file

@ -81,7 +81,7 @@ class SiyeCoUkAdapter(BaseSiteAdapter): # XXX
return "http://"+self.getSiteDomain()+"/siye/viewstory.php?sid=1234"
def getSiteURLPattern(self):
return re.escape("http://")+r"(www\.)?"+re.escape("siye.co.uk/siye/viewstory.php?sid=")+r"\d+$"
return re.escape("http://")+r"(www\.)?siye\.co\.uk/(siye/)?"+re.escape("viewstory.php?sid=")+r"\d+$"
# ## Login seems to be reasonably standard across eFiction sites.
# def needToLoginCheck(self, data):
@ -228,15 +228,13 @@ class SiyeCoUkAdapter(BaseSiteAdapter): # XXX
if part.startswith("Summary:"):
part = part[part.find(':')+1:]
self.story.setMetadata('description',part)
# want to get the next tr of the table.
#print("%s"%titlea.parent.parent.findNextSibling('tr'))
# eFiction sites don't help us out a lot with their meta data
# formating, so it's a little ugly.
moremeta = stripHTML(titlea.parent.parent.findNextSibling('tr'))
moremeta = stripHTML(titlea.parent.parent.parent.find('div',{'class':'desc'}))
for part in moremeta.replace(' - ','\n').split('\n'):
#print("part:%s"%part)
try:
@ -259,7 +257,25 @@ class SiyeCoUkAdapter(BaseSiteAdapter): # XXX
if name == 'Words':
self.story.setMetadata('numWords', value)
try:
# Find Series name from series URL.
a = titlea.findPrevious('a', href=re.compile(r"series.php\?seriesid=\d+"))
series_name = a.string
series_url = 'http://'+self.host+'/'+a['href']
# use BeautifulSoup HTML parser to make everything easier to find.
seriessoup = bs.BeautifulSoup(self._fetchUrl(series_url))
storyas = seriessoup.findAll('a', href=re.compile(r'^viewstory.php\?sid=\d+$'))
i=1
for a in storyas:
if a['href'] == ('viewstory.php?sid='+self.story.getMetadata('storyId')):
self.setSeries(series_name, i)
break
i+=1
except:
# I find it hard to care if the series parsing fails
pass
# grab the text for an individual chapter.
def getChapterText(self, url):

View file

@ -205,6 +205,26 @@ class TenhawkPresentsComSiteAdapter(BaseSiteAdapter):
#value = value[0:-1]
self.story.setMetadata('dateUpdated', makeDate(stripHTML(value), self.dateformat))
try:
# Find Series name from series URL.
a = soup.find('a', href=re.compile(r"viewseries.php\?seriesid=\d+"))
series_name = a.string
series_url = 'http://'+self.host+'/'+a['href']
# use BeautifulSoup HTML parser to make everything easier to find.
seriessoup = bs.BeautifulSoup(self._fetchUrl(series_url))
storyas = seriessoup.findAll('a', href=re.compile(r'^viewstory.php\?sid=\d+$'))
i=1
for a in storyas:
if a['href'] == ('viewstory.php?sid='+self.story.getMetadata('storyId')):
self.setSeries(series_name, i)
break
i+=1
except:
# I find it hard to care if the series parsing fails
pass
def getChapterText(self, url):

View file

@ -84,7 +84,15 @@ Some more longer description. "I suck at summaries!" "Better than it sounds!"
else:
self.story.setMetadata('dateUpdated',makeDate("1975-04-15","%Y-%m-%d"))
self.story.setMetadata('numWords','123456')
self.story.setMetadata('status','In-Completed')
idnum = int(self.story.getMetadata('storyId'))
if idnum % 2 == 1:
self.story.setMetadata('status','In-Progress')
else:
self.story.setMetadata('status','Completed')
self.setSeries('The Great Test',idnum)
self.story.setMetadata('rating','Tweenie')
self.story.setMetadata('authorId','98765')

View file

@ -207,7 +207,26 @@ class TheWritersCoffeeShopComSiteAdapter(BaseSiteAdapter):
#value = value[0:-1]
self.story.setMetadata('dateUpdated', makeDate(stripHTML(value), self.dateformat))
try:
# Find Series name from series URL.
a = soup.find('a', href=re.compile(r"viewseries.php\?seriesid=\d+"))
series_name = a.string
series_url = 'http://'+self.host+'/library/'+a['href']
# use BeautifulSoup HTML parser to make everything easier to find.
seriessoup = bs.BeautifulSoup(self._fetchUrl(series_url))
storyas = seriessoup.findAll('a', href=re.compile(r'^viewstory.php\?sid=\d+$'))
i=1
for a in storyas:
if a['href'] == ('viewstory.php?sid='+self.story.getMetadata('storyId')):
self.setSeries(series_name, i)
break
i+=1
except:
# I find it hard to care if the series parsing fails
pass
def getChapterText(self, url):
logging.debug('Getting chapter text from: %s' % url)

View file

@ -214,6 +214,12 @@ class TwistingTheHellmouthSiteAdapter(BaseSiteAdapter):
self.chapterUrls.append((stripHTML(o),url))
self.story.setMetadata('numChapters',len(self.chapterUrls))
pseries = soup.find('p', {'style':'margin-top:0px'})
m = re.match('This story is No\. (?P<num>\d+) in the series &quot;(?P<series>.+)&quot;\.',
pseries.text)
if m:
self.setSeries(m.group('series'),m.group('num'))
return

View file

@ -204,6 +204,25 @@ class TwilightedNetSiteAdapter(BaseSiteAdapter):
#value = value[0:-1]
self.story.setMetadata('dateUpdated', makeDate(value.strip(), "%B %d, %Y"))
try:
# Find Series name from series URL.
a = soup.find('a', href=re.compile(r"viewseries.php\?seriesid=\d+"))
series_name = a.string
series_url = 'http://'+self.host+'/'+a['href']
# use BeautifulSoup HTML parser to make everything easier to find.
seriessoup = bs.BeautifulSoup(self._fetchUrl(series_url))
storyas = seriessoup.findAll('a', href=re.compile(r'^viewstory.php\?sid=\d+$'))
i=1
for a in storyas:
if a['href'] == ('viewstory.php?sid='+self.story.getMetadata('storyId')):
self.setSeries(series_name, i)
break
i+=1
except:
# I find it hard to care if the series parsing fails
pass
def getChapterText(self, url):

View file

@ -217,6 +217,25 @@ class TwiwriteNetSiteAdapter(BaseSiteAdapter):
value = value[0:-1]
self.story.setMetadata('dateUpdated', makeDate(value.strip(), "%B %d, %Y"))
try:
# Find Series name from series URL.
a = soup.find('a', href=re.compile(r"viewseries.php\?seriesid=\d+"))
series_name = a.string
series_url = 'http://'+self.host+'/'+a['href']
# use BeautifulSoup HTML parser to make everything easier to find.
seriessoup = bs.BeautifulSoup(self._fetchUrl(series_url))
storyas = seriessoup.findAll('a', href=re.compile(r'^viewstory.php\?sid=\d+$'))
i=1
for a in storyas:
if a['href'] == ('viewstory.php?sid='+self.story.getMetadata('storyId')):
self.setSeries(series_name, i)
break
i+=1
except:
# I find it hard to care if the series parsing fails
pass
def getChapterText(self, url):

View file

@ -189,6 +189,26 @@ class WhoficComSiteAdapter(BaseSiteAdapter):
if name == 'Word Count':
self.story.setMetadata('numWords', value)
try:
# Find Series name from series URL.
a = metadata.find('a', href=re.compile(r"series.php\?seriesid=\d+"))
series_name = a.string
series_url = 'http://'+self.host+'/'+a['href']
# use BeautifulSoup HTML parser to make everything easier to find.
seriessoup = bs.BeautifulSoup(self._fetchUrl(series_url))
storyas = seriessoup.findAll('a', href=re.compile(r'^viewstory.php\?sid=\d+$'))
i=1
for a in storyas:
if a['href'] == ('viewstory.php?sid='+self.story.getMetadata('storyId')):
self.setSeries(series_name, i)
break
i+=1
except:
# I find it hard to care if the series parsing fails
pass
def getChapterText(self, url):
logging.debug('Getting chapter text from: %s' % url)

View file

@ -229,6 +229,11 @@ class BaseSiteAdapter(Configurable):
"Needs to be overriden in each adapter class."
pass
# Just for series, in case we choose to change how it's stored or represented later.
def setSeries(self,name,num):
if self.getConfig('collect_series'):
self.story.setMetadata('series','%s [%s]'%(name, num))
fullmon = {"January":"01", "February":"02", "March":"03", "April":"04", "May":"05",
"June":"06","July":"07", "August":"08", "September":"09", "October":"10",
"November":"11", "December":"12" }

View file

@ -50,6 +50,7 @@ class BaseStoryWriter(Configurable):
'category',
'genre',
'characters',
'series',
'status',
'datePublished',
'dateUpdated',
@ -77,6 +78,7 @@ class BaseStoryWriter(Configurable):
'category':'Category',
'genre':'Genre',
'status':'Status',
'series':'Series',
'characters':'Characters',
'datePublished':'Published',
'dateUpdated':'Updated',
@ -97,7 +99,7 @@ class BaseStoryWriter(Configurable):
'formatname':'File Format',
'formatext':'File Extension',
'siteabbrev':'Site Abbrev',
'version':'FFD Version'
'version':'FFDL Version'
}
self.story.setMetadata('formatname',self.getFormatName())
self.story.setMetadata('formatext',self.getFormatExt())

View file

@ -42,6 +42,7 @@ formatext_label:File Extension
category_label:Category
genre_label:Genre
characters_label:Characters
series_label:Series
## Completed/In-Progress
status_label:Status
## Dates story first published, last updated, and downloaded(last with time).
@ -71,7 +72,14 @@ version_label:FFDL Version
## items to include in the title page
## Empty entries will *not* appear, even if in the list.
## All current formats already include title and author.
titlepage_entries: category,genre,characters,status,datePublished,dateUpdated,dateCreated,rating,warnings,numChapters,numWords,site,description
titlepage_entries: series,category,genre,characters,status,datePublished,dateUpdated,dateCreated,rating,warnings,numChapters,numWords,site,description
## Try to collect series name and number of this story in series.
## Some sites (ab)use 'series' for reading lists and personal
## collections. This lets us turn it on and off by site without
## keeping a lengthy titlepage_entries per site and prevents it
## updating in the plugin.
collect_series: true
## include title page as first page.
include_titlepage: true
@ -108,7 +116,7 @@ background_color: ffffff
[txt]
## Add URLs since there aren't links.
titlepage_entries: category,genre,status,datePublished,dateUpdated,dateCreated,rating,warnings,numChapters,numWords,site,storyUrl, authorUrl, description
titlepage_entries: series,category,genre,status,datePublished,dateUpdated,dateCreated,rating,warnings,numChapters,numWords,site,storyUrl, authorUrl, description
## use \r\n for line endings, the windows convention. text output only.
windows_eol: true
@ -168,6 +176,9 @@ extratags:
#username:YourName
#password:yourpassword
## twilighted.net (ab)uses series as personal reading lists.
collect_series: false
[www.twiwrite.net]
## Some sites require login (or login for some rated stories) The
## program can prompt you, or you can save it in config. In
@ -176,6 +187,9 @@ extratags:
#username:YourName
#password:yourpassword
## twiwrite.net (ab)uses series as personal reading lists.
collect_series: false
[www.whofic.com]
[www.mediaminer.org]
@ -193,6 +207,9 @@ extratags:
## personal.ini, not defaults.ini.
#is_adult:true
## thewriterscoffeeshop.com (ab)uses series as personal reading lists.
collect_series: false
[www.ficwad.com]
## Some sites require login (or login for some rated stories) The
## program can prompt you, or you can save it in config. In

View file

@ -7,11 +7,23 @@
## personal.ini, not defaults.ini.
#is_adult:true
## Try to collect series name and number of this story in series.
## Some sites (ab)use 'series' for reading lists and personal
## collections. This lets us turn it on and off by site without
## keeping a lengthy titlepage_entries per site and prevents it
## updating in the plugin.
## Turn off in [defaults] or [overrides] to prevent all sites from
## updating series column.
## default is true
#collect_series: false
## Most common, I expect will be using this to save username/passwords
## for different sites.
[www.twilighted.net]
#username:YourPenname
#password:YourPassword
## default is false
#collect_series: true
[www.ficwad.com]
#username:YourUsername
@ -20,6 +32,8 @@
[www.twiwrite.net]
#username:YourName
#password:yourpassword
## default is false
#collect_series: true
[www.adastrafanfic.com]
## Some sites do not require a login, but do require the user to
@ -30,6 +44,8 @@
#username:YourName
#password:yourpassword
#is_adult:true
## default is false
#collect_series: true
[www.fictionalley.org]
#is_adult:true
@ -53,3 +69,6 @@
## This section will override anything in the system defaults or other
## sections here.
[overrides]
## default varies by site. Set true here to force all sites to
## collect series.
#collect_series: true