Add replace_metadata by metadata key feature, more meta for AO3, fix www handling.

This commit is contained in:
Jim Miller 2012-09-25 11:03:08 -05:00
parent 12c134911c
commit 4e0ea4bad1
7 changed files with 160 additions and 75 deletions

View file

@ -161,12 +161,14 @@ extratags: FanFiction
## for regexp details.
## Make sure to keep at least one space at the start of each line and
## to escape % to %%, if used.
## Two or three part lines. Two part effect everything.
## Three part effect only those key(s) lists.
#replace_metadata:
# Sci-Fi=>SF
# genre,category=>Sci-Fi=>SF
# Puella Magi Madoka Magica.* => Madoka
# Comedy=>Humor
# Crossover: (.*)=>\1
# (.*)Great(.*)=>\1Moderate\2
# title=>(.*)Great(.*)=>\1Moderate\2
# .*-Centered=>
## Some readers don't show horizontal rule (<hr />) tags correctly.
@ -374,21 +376,32 @@ nook_img_fix:true
extratags: FanFiction,Testing
extracategories:Fafner
extragenres:Romance,Fluff
extracharacters:Reginald Smythe-Smythe,Mokona
extracharacters:Reginald Smythe-Smythe,Mokona,Harry P.
extraships:Smythe-Smythe/Mokona
extrawarnings:Extreme Bogosity
extra_valid_entries:metaA,metaB,metaC,listX,listY,listZ,compositeJ,compositeK,compositeL
include_in_compositeJ:dateCreated
include_in_compositeK:metaC,listX,compositeL,compositeJ,compositeK
include_in_compositeK:metaC,listX,compositeL,compositeJ,compositeK,listZ
include_in_compositeL:ships,metaA,listZ,datePublished,dateUpdated,
#include_in_ships:compositeK,genre
include_in_ships:compositeK,genre,ships,dateUpdated
extra_titlepage_entries: metaA,metaB,metaC,listX,listY,listZ,compositeJ,compositeK,compositeL
extra_logpage_entries: metaA,metaB,metaC,listX,listY,listZ,compositeJ,compositeK,compositeL
replace_metadata:
compositeL=>Val=>VALUE
series,extratags=>Test=>Plan
Puella Magi Madoka Magica.* => Madoka
Comedy=>Humor
Crossover: (.*)=>\1
(.*)Great(.*)=>\1Moderate\2
.*-Centered=>
characters=>Harry P\.=>Harry Potter
## If necessary, you can define [<site>:<format>] sections to
## customize the formats differently for the same site. Overrides
## defaults, format and site.
@ -398,6 +411,16 @@ extratags: FanFiction,Testing,Text
[test1.com:html]
extratags: FanFiction,Testing,HTML
[test1.com:epub]
extra_valid_entries:editedchars
include_in_editedchars:characters
replace_metadata:
editedchars=>^(.+)$=>somesite:\1
extra_titlepage_entries: editedchars
[archive.skyehawke.com]
[archiveofourown.org]
@ -413,6 +436,18 @@ extratags: FanFiction,Testing,HTML
## personal.ini, not defaults.ini.
#is_adult:true
## AO3 adapter defines a few extra metadata entries.
extra_valid_entries:fandoms,freefromtags,ao3categories,comments,kudos,hits,bookmarks
fandoms_label:Fandoms
freefromtags_label:Freeform Tags
ao3categories_label:AO3 Categories
comments_label:Comments
kudos_label:Kudos
hits_label:Hits
bookmarks:Bookmarks
#extra_titlepage_entries: fandoms,freefromtags,ao3categories,comments,kudos,hits,bookmarks
[ashwinder.sycophanthex.com]
## Site dedicated to these categories/characters/ships
extracategories:Harry Potter

View file

@ -118,7 +118,29 @@ for x in imports():
#print x
__class_list.append(sys.modules[x].getClass())
def getDomainURL(url):
def getAdapter(config,url):
logging.debug("trying url:"+url)
(cls,fixedurl) = getClassFor(url)
logging.debug("fixedurl:"+fixedurl)
if cls:
adapter = cls(config,fixedurl) # raises InvalidStoryURL
return adapter
# No adapter found.
raise exceptions.UnknownSite( url, [cls.getSiteDomain() for cls in __class_list] )
def getConfigSections():
return [cls.getConfigSection() for cls in __class_list]
def getConfigSectionFor(url):
(cls,fixedurl) = getClassFor(url)
if cls:
return cls.getConfigSection()
# No adapter found.
raise exceptions.UnknownSite( url, [cls.getSiteDomain() for cls in __class_list] )
def getClassFor(url):
## fix up leading protocol.
fixedurl = re.sub(r"(?i)^[htps]+[:/]+","http://",url.strip())
if not fixedurl.startswith("http"):
@ -135,34 +157,6 @@ def getDomainURL(url):
if( domain != parsedUrl.netloc ):
fixedurl = fixedurl.replace(parsedUrl.netloc,domain)
return (domain,fixedurl)
def getAdapter(config,url):
logging.debug("trying url:"+url)
(domain,fixedurl) = getDomainURL(url)
cls = getClassFromList(domain)
logging.debug("fixedurl:"+fixedurl)
if cls:
adapter = cls(config,fixedurl) # raises InvalidStoryURL
return adapter
# No adapter found.
raise exceptions.UnknownSite( url, [cls.getSiteDomain() for cls in __class_list] )
def getConfigSections():
return [cls.getConfigSection() for cls in __class_list]
def getConfigSectionFor(url):
(domain,fixedurl) = getDomainURL(url)
cls = getClassFromList(domain)
if cls:
return cls.getConfigSection()
# No adapter found.
raise exceptions.UnknownSite( url, [cls.getSiteDomain() for cls in __class_list] )
def getClassFor(domain):
cls = getClassFromList(domain)
if not cls and domain.startswith("www."):
domain = domain.replace("www.","")
@ -173,6 +167,8 @@ def getClassFor(domain):
logging.debug("trying site:www."+domain)
cls = getClassFromList("www."+domain)
fixedurl = fixedurl.replace("http://","http://www.")
return (cls,fixedurl)
def getClassFromList(domain):
for cls in __class_list:

View file

@ -74,16 +74,12 @@ class ArchiveOfOurOwnOrgAdapter(BaseSiteAdapter):
# The site domain. Does have www here, if it uses it.
return 'archiveofourown.org'
@classmethod
def getAcceptDomains(cls):
return ['www.archiveofourown.org','archiveofourown.org']
def getSiteExampleURLs(self):
return "http://"+self.getSiteDomain()+"/works/123456 http://"+self.getSiteDomain()+"/collections/Some_Archive/works/123456"
def getSiteURLPattern(self):
# http://archiveofourown.org/collections/Smallville_Slash_Archive/works/159770
return re.escape("http://")+"(www.)?"+re.escape(self.getSiteDomain())+r"(/collections/[^/]+)?/works/(?P<id>\d+)"
return re.escape("http://")+re.escape(self.getSiteDomain())+r"(/collections/[^/]+)?/works/(?P<id>\d+)"
## Login
def needToLoginCheck(self, data):
@ -201,6 +197,7 @@ class ArchiveOfOurOwnOrgAdapter(BaseSiteAdapter):
a = metasoup.find('dd',{'class':"fandom tags"})
fandoms = a.findAll('a',{'class':"tag"})
for fandom in fandoms:
self.story.addToList('fandoms',fandom.string)
self.story.addToList('category',fandom.string)
a = metasoup.find('dd',{'class':"warning tags"})
@ -216,12 +213,15 @@ class ArchiveOfOurOwnOrgAdapter(BaseSiteAdapter):
if a != None:
genres = a.findAll('a',{'class':"tag"})
for genre in genres:
self.story.addToList('freefromtags',genre.string)
self.story.addToList('genre',genre.string)
a = metasoup.find('dd',{'class':"category tags"})
if a != None:
genres = a.findAll('a',{'class':"tag"})
for genre in genres:
if genre != "Gen":
self.story.addToList('ao3categories',genre.string)
self.story.addToList('genre',genre.string)
a = metasoup.find('dd',{'class':"character tags"})
@ -246,6 +246,18 @@ class ArchiveOfOurOwnOrgAdapter(BaseSiteAdapter):
if 'Words:' in label:
self.story.setMetadata('numWords', value)
if 'Comments:' in label:
self.story.setMetadata('comments', value)
if 'Kudos:' in label:
self.story.setMetadata('kudos', value)
if 'Hits:' in label:
self.story.setMetadata('hits', value)
if 'Bookmarks:' in label:
self.story.setMetadata('bookmarks', value)
if 'Chapters:' in label:
if value.split('/')[0] == value.split('/')[1]:
self.story.setMetadata('status', 'Completed')

View file

@ -127,6 +127,7 @@ Some more longer description. "I suck at summaries!" "Better than it sounds!"
self.story.addToList('category',u'Magical Girl Lyrical Nanoha')
self.story.addToList('genre','Fantasy')
self.story.addToList('genre','Comedy')
self.story.addToList('genre','SF')
self.story.addToList('genre','Noir')

View file

@ -19,6 +19,7 @@ import os, re
import urlparse
import string
from math import floor
from functools import partial
import exceptions
from htmlcleanup import conditionalRemoveEntities, removeAllEntities
@ -192,6 +193,7 @@ class Story(Configurable):
def __init__(self, configuration):
Configurable.__init__(self, configuration)
try:
## calibre plugin will set externally to match PI version.
self.metadata = {'version':os.environ['CURRENT_VERSION_ID']}
except:
self.metadata = {'version':'4.4'}
@ -205,6 +207,16 @@ class Story(Configurable):
self.calibrebookmark=None # cheesy way to carry calibre bookmark file forward across update.
self.logfile=None # cheesy way to carry log file forward across update.
## Look for config parameter, split and add each to metadata field.
for (config,metadata) in [("extratags","extratags"),
("extracategories","category"),
("extragenres","genre"),
("extracharacters","characters"),
("extraships","ships"),
("extrawarnings","warnings")]:
for val in self.getConfigList(config):
self.addToList(metadata,val)
self.setReplace(self.getConfig('replace_metadata'))
def setMetadata(self, key, value, condremoveentities=True):
@ -224,16 +236,34 @@ class Story(Configurable):
self.addToList('lastupdate',value.strftime("Last Update: %Y/%m/%d"))
## Two or three part lines. Two part effect everything.
## Three part effect only those key(s) lists.
## pattern=>replacement
## metakey,metakey=>pattern=>replacement
def setReplace(self,replace):
for line in replace.splitlines():
if "=>" in line:
parts = map( lambda x: x.strip(), line.split("=>") )
if len(parts) > 2:
parts[0] = map( lambda x: x.strip(), parts[0].split(",") )
self.replacements.append(parts)
else:
self.replacements.append([None]+parts)
def doReplacments(self,value,key):
for (keys,p,v) in self.replacements:
if (keys == None or key in keys) \
and isinstance(value,basestring) \
and re.search(p,value):
#pv=value
value = re.sub(p,v,value)
#print("change:%s => %s === %s => %s "%(p,v,pv,value))
return value
def getMetadataRaw(self,key):
if self.isValidMetaEntry(key) and self.metadata.has_key(key):
return self.metadata[key]
def doReplacments(self,value):
for (p,v) in self.replacements:
if (isinstance(value,basestring)) and re.match(p,value):
value = re.sub(p,v,value)
return value
def getMetadata(self, key,
removeallentities=False,
doreplacements=True):
@ -242,7 +272,7 @@ class Story(Configurable):
return value
if self.isList(key):
value = u', '.join(self.getList(key, removeallentities, doreplacements=True))
return u', '.join(self.getList(key, removeallentities, doreplacements=True))
elif self.metadata.has_key(key):
value = self.metadata[key]
if value:
@ -253,12 +283,12 @@ class Story(Configurable):
if key in ("dateCreated","datePublished","dateUpdated"):
value = value.strftime(self.getConfig(key+"_format","%Y-%m-%d"))
if doreplacements:
value=self.doReplacments(value)
if removeallentities and value != None:
return removeAllEntities(value)
else:
return value
if doreplacements:
value=self.doReplacments(value,key)
if removeallentities and value != None:
return removeAllEntities(value)
else:
return value
def getAllMetadata(self,
removeallentities=False,
@ -278,8 +308,8 @@ class Story(Configurable):
auth = v
# make sure doreplacements & removeallentities are honored.
if doreplacements:
aurl=self.doReplacments(aurl)
auth=self.doReplacments(auth)
aurl=self.doReplacments(aurl,'authorUrl')
auth=self.doReplacments(auth,'author')
if removeallentities:
aurl=removeAllEntities(aurl)
auth=removeAllEntities(auth)
@ -341,10 +371,11 @@ class Story(Configurable):
else:
retlist = self.getMetadataRaw(listname)
if doreplacements and retlist:
if retlist:
if doreplacements:
retlist = filter( lambda x : x!=None and x!='' ,
map(self.doReplacments,retlist) )
if removeallentities and retlist:
map(partial(self.doReplacments,key=listname),retlist) )
if removeallentities:
retlist = filter( lambda x : x!=None and x!='' ,
map(removeAllEntities,retlist) )
@ -512,11 +543,6 @@ class Story(Configurable):
def __str__(self):
return "Metadata: " +str(self.metadata)
def setReplace(self,replace):
for line in replace.splitlines():
if "=>" in line:
self.replacements.append(map( lambda x: x.strip(), line.split("=>") ))
def commaGroups(s):
groups = []
while s and s[-1].isdigit():

View file

@ -75,16 +75,6 @@ class BaseStoryWriter(Configurable):
self.story.setMetadata('formatname',self.getFormatName())
self.story.setMetadata('formatext',self.getFormatExt())
## Look for config parameter, split and add each to metadata field.
for (config,metadata) in [("extratags","extratags"),
("extracategories","category"),
("extragenres","genre"),
("extracharacters","characters"),
("extraships","ships"),
("extrawarnings","warnings")]:
for val in self.getConfigList(config):
self.story.addToList(metadata,val)
def getMetadata(self,key, removeallentities=False):
return stripHTML(self.story.getMetadata(key, removeallentities))

View file

@ -131,12 +131,14 @@ extratags: FanFiction
## for regexp details.
## Make sure to keep at least one space at the start of each line and
## to escape % to %%, if used.
## Two or three part lines. Two part effect everything.
## Three part effect only those key(s) lists.
#replace_metadata:
# Sci-Fi=>SF
# genre,category=>Sci-Fi=>SF
# Puella Magi Madoka Magica.* => Madoka
# Comedy=>Humor
# Crossover: (.*)=>\1
# (.*)Great(.*)=>\1Moderate\2
# title=>(.*)Great(.*)=>\1Moderate\2
# .*-Centered=>
## Some readers don't show horizontal rule (<hr />) tags correctly.
@ -355,7 +357,7 @@ nook_img_fix:true
extratags: FanFiction,Testing
extracategories:Fafner
extragenres:Romance,Fluff
extracharacters:Reginald Smythe-Smythe,Mokona
extracharacters:Reginald Smythe-Smythe,Mokona,Harry P.
extraships:Smythe-Smythe/Mokona
extrawarnings:Extreme Bogosity
@ -370,6 +372,17 @@ include_in_compositeL:ships,metaA,listZ,datePublished,dateUpdated,
extra_titlepage_entries: metaA,metaB,metaC,listX,listY,listZ,compositeJ,compositeK,compositeL
extra_logpage_entries: metaA,metaB,metaC,listX,listY,listZ,compositeJ,compositeK,compositeL
replace_metadata:
compositeL=>Val=>VALUE
series,extratags=>Test=>Plan
Puella Magi Madoka Magica.* => Madoka
Comedy=>Humor
Crossover: (.*)=>\1
(.*)Great(.*)=>\1Moderate\2
.*-Centered=>
characters=>Harry P\.=>Harry Potter
## If necessary, you can define [<site>:<format>] sections to
## customize the formats differently for the same site. Overrides
## defaults, format and site.
@ -394,6 +407,18 @@ extratags: FanFiction,Testing,HTML
## personal.ini, not defaults.ini.
#is_adult:true
## AO3 adapter defines a few extra metadata entries.
extra_valid_entries:fandoms,freefromtags,ao3categories,comments,kudos,hits,bookmarks
fandoms_label:Fandoms
freefromtags_label:Freeform Tags
ao3categories_label:AO3 Categories
comments_label:Comments
kudos_label:Kudos
hits_label:Hits
bookmarks:Bookmarks
#extra_titlepage_entries: fandoms,freefromtags,ao3categories,comments,kudos,hits,bookmarks
[ashwinder.sycophanthex.com]
## Site dedicated to these categories/characters/ships
extracategories:Harry Potter