mirror of
https://github.com/JimmXinu/FanFicFare.git
synced 2026-05-08 12:36:11 +02:00
Update ga for image support changes. Change img file names. Possibility of
dups between updates, but not bad overwrites or overlength names,
This commit is contained in:
parent
6a83131a99
commit
0bea4afd01
5 changed files with 37 additions and 14 deletions
|
|
@ -214,7 +214,7 @@ class ArchiveOfOurOwnOrgAdapter(BaseSiteAdapter):
|
|||
|
||||
# grab the text for an individual chapter.
|
||||
def getChapterText(self, url):
|
||||
print('Getting chapter text from: %s' % url)
|
||||
logging.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
chapter=bs.BeautifulSoup('<div class="story"></div>')
|
||||
data = self._fetchUrl(url)
|
||||
|
|
|
|||
|
|
@ -25,7 +25,7 @@ from .. import BeautifulSoup as bs
|
|||
from ..htmlcleanup import stripHTML
|
||||
from .. import exceptions as exceptions
|
||||
|
||||
from base_adapter import BaseSiteAdapter, utf8FromSoup, makeDate
|
||||
from base_adapter import BaseSiteAdapter, makeDate
|
||||
|
||||
def getClass():
|
||||
return GayAuthorsAdapter
|
||||
|
|
@ -162,7 +162,8 @@ class GayAuthorsAdapter(BaseSiteAdapter):
|
|||
self.story.setMetadata('rating',rating.text)
|
||||
|
||||
summary = msoup.find('span', {'itemprop' : 'description'})
|
||||
self.story.setMetadata('description',summary.text)
|
||||
self.setDescription(self.url,summary.text)
|
||||
#self.story.setMetadata('description',summary.text)
|
||||
|
||||
|
||||
stats = msoup.find('dl',{'class':'info'})
|
||||
|
|
@ -200,4 +201,4 @@ class GayAuthorsAdapter(BaseSiteAdapter):
|
|||
if None == div:
|
||||
raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url)
|
||||
|
||||
return utf8FromSoup(div)
|
||||
return self.utf8FromSoup(url,div)
|
||||
|
|
|
|||
|
|
@ -78,7 +78,6 @@ class TestSiteAdapter(BaseSiteAdapter):
|
|||
Some more longer description. "I suck at summaries!" "Better than it sounds!" "My first fic"
|
||||
''')
|
||||
self.story.setMetadata('datePublished',makeDate("1975-03-15","%Y-%m-%d"))
|
||||
self.story.setMetadata('dateCreated',datetime.datetime.now())
|
||||
if self.story.getMetadata('storyId') == '669':
|
||||
self.story.setMetadata('dateUpdated',datetime.datetime.now())
|
||||
else:
|
||||
|
|
|
|||
|
|
@ -259,8 +259,9 @@ class BaseSiteAdapter(Configurable):
|
|||
acceptable_attributes = ['href','name']
|
||||
#print("include_images:"+self.getConfig('include_images'))
|
||||
if self.getConfig('include_images'):
|
||||
acceptable_attributes.extend(('src','alt'))
|
||||
acceptable_attributes.extend(('src','alt','origsrc'))
|
||||
for img in soup.findAll('img'):
|
||||
img['origsrc']=img['src']
|
||||
img['src']=self.story.addImgUrl(self,url,img['src'],self._fetchUrlRaw)
|
||||
|
||||
for attr in soup._getAttrMap().keys():
|
||||
|
|
|
|||
|
|
@ -17,7 +17,6 @@
|
|||
|
||||
import os, re
|
||||
import urlparse
|
||||
from base64 import b64encode
|
||||
|
||||
from htmlcleanup import conditionalRemoveEntities, removeAllEntities
|
||||
|
||||
|
|
@ -198,21 +197,44 @@ class Story:
|
|||
# up with the same name both now, in different chapters, and
|
||||
# later with new update chapters. Numbering them didn't do
|
||||
# that.
|
||||
newsrc = "images/%s.jpg"%(b64encode(imgurl))
|
||||
# newsrc = "images/%s.jpg"%(b64encode(imgurl))
|
||||
# step = 20
|
||||
# if newsrc > step:
|
||||
# i = step
|
||||
# while i < len(newsrc):
|
||||
# newsrc = newsrc[:i]+"/"+newsrc[i:]
|
||||
# i += step
|
||||
|
||||
# But, b64 names can get too big for zip (on windows, at
|
||||
# least) to handle too quickly.
|
||||
|
||||
# This version, prefixing the images with the creation
|
||||
# timestamp, still allows for dup images to be detected and
|
||||
# not dup'ed in a single download. And it prevents 0.jpg from
|
||||
# earlier update being overwritten by the first image in newer
|
||||
# chapter. It does not, however, prevent dup copies of the
|
||||
# same image being d/l'ed and saved in different updates. A
|
||||
# bit of corner case inefficiency I can live with rather than
|
||||
# scanning all the pre-existing files on update. oldsrc is
|
||||
# being saved on img tags just in case, however.
|
||||
prefix=self.getMetadataRaw('dateCreated').strftime("%Y%m%d%H%M%S")
|
||||
|
||||
if imgurl not in self.imgurls:
|
||||
self.imgurls.append(imgurl)
|
||||
parsedUrl = urlparse.urlparse(imgurl)
|
||||
# newsrc = "images/%s.jpg"%(
|
||||
# self.imgurls.index(imgurl))
|
||||
newsrc = "images/%s-%s.jpg"%(
|
||||
prefix,
|
||||
self.imgurls.index(imgurl))
|
||||
sizes = [ int(x) for x in configurable.getConfigList('image_max_size') ]
|
||||
data = convert_image(fetch(imgurl),
|
||||
sizes,
|
||||
configurable.getConfig('grayscale_images'))
|
||||
#print("\nimgurl\nimage size:%d\n"%len(data))
|
||||
print("\nimgurl:%s\nnewsrc:%s\nimage size:%d\n"%(imgurl,newsrc,len(data)))
|
||||
self.imgurldata.append((newsrc,data))
|
||||
# else:
|
||||
# newsrc = "images/%s.jpg"%(
|
||||
# self.imgurls.index(imgurl))
|
||||
else:
|
||||
newsrc = "images/%s-%s.jpg"%(
|
||||
prefix,
|
||||
self.imgurls.index(imgurl))
|
||||
|
||||
#print("===============\n%s\nimg url:%s\n============"%(newsrc,self.imgurls[-1]))
|
||||
|
||||
|
|
|
|||
Loading…
Reference in a new issue