From 0bea4afd01e5d0630090382a41313c8462b644d5 Mon Sep 17 00:00:00 2001
From: Jim Miller <retiefjimm@gmail.com>
Date: Fri, 24 Feb 2012 20:50:43 -0600
Subject: [PATCH] Update ga for image support changes.  Change img file names. 
 Possibility of dups between updates, but not bad overwrites or overlength
 names,

---
 .../adapters/adapter_archiveofourownorg.py    |  2 +-
 .../adapters/adapter_gayauthorsorg.py         |  7 ++--
 fanficdownloader/adapters/adapter_test1.py    |  1 -
 fanficdownloader/adapters/base_adapter.py     |  3 +-
 fanficdownloader/story.py                     | 38 +++++++++++++++----
 5 files changed, 37 insertions(+), 14 deletions(-)
diff --git a/fanficdownloader/adapters/adapter_archiveofourownorg.py b/fanficdownloader/adapters/adapter_archiveofourownorg.py
index 93e45539..ede2a023 100644
--- a/fanficdownloader/adapters/adapter_archiveofourownorg.py
+++ b/fanficdownloader/adapters/adapter_archiveofourownorg.py
@@ -214,7 +214,7 @@ class ArchiveOfOurOwnOrgAdapter(BaseSiteAdapter):
 
     # grab the text for an individual chapter.
     def getChapterText(self, url):
-        print('Getting chapter text from: %s' % url)
+        logging.debug('Getting chapter text from: %s' % url)
 		
         chapter=bs.BeautifulSoup('<div class="story"></div>')
         data = self._fetchUrl(url)
diff --git a/fanficdownloader/adapters/adapter_gayauthorsorg.py b/fanficdownloader/adapters/adapter_gayauthorsorg.py
index ab4984e4..66023de9 100644
--- a/fanficdownloader/adapters/adapter_gayauthorsorg.py
+++ b/fanficdownloader/adapters/adapter_gayauthorsorg.py
@@ -25,7 +25,7 @@ from .. import BeautifulSoup as bs
 from ..htmlcleanup import stripHTML
 from .. import exceptions as exceptions
 
-from base_adapter import BaseSiteAdapter, utf8FromSoup, makeDate
+from base_adapter import BaseSiteAdapter, makeDate
 
 def getClass():
     return GayAuthorsAdapter
@@ -162,7 +162,8 @@ class GayAuthorsAdapter(BaseSiteAdapter):
         self.story.setMetadata('rating',rating.text)
 		
         summary = msoup.find('span', {'itemprop' : 'description'})
-        self.story.setMetadata('description',summary.text)
+        self.setDescription(self.url,summary.text)
+        #self.story.setMetadata('description',summary.text)
 	
 
         stats = msoup.find('dl',{'class':'info'})
@@ -200,4 +201,4 @@ class GayAuthorsAdapter(BaseSiteAdapter):
         if None == div:
             raise exceptions.FailedToDownload("Error downloading Chapter: %s!  Missing required element!" % url)
     
-        return utf8FromSoup(div)
+        return self.utf8FromSoup(url,div)
diff --git a/fanficdownloader/adapters/adapter_test1.py b/fanficdownloader/adapters/adapter_test1.py
index c55b9ecc..eeeb585b 100644
--- a/fanficdownloader/adapters/adapter_test1.py
+++ b/fanficdownloader/adapters/adapter_test1.py
@@ -78,7 +78,6 @@ class TestSiteAdapter(BaseSiteAdapter):
 Some more longer description.  "I suck at summaries!"  "Better than it sounds!"  "My first fic"
 ''')
         self.story.setMetadata('datePublished',makeDate("1975-03-15","%Y-%m-%d"))
-        self.story.setMetadata('dateCreated',datetime.datetime.now())
         if self.story.getMetadata('storyId') == '669':
             self.story.setMetadata('dateUpdated',datetime.datetime.now())
         else:
diff --git a/fanficdownloader/adapters/base_adapter.py b/fanficdownloader/adapters/base_adapter.py
index aeec423b..6f0e9d99 100644
--- a/fanficdownloader/adapters/base_adapter.py
+++ b/fanficdownloader/adapters/base_adapter.py
@@ -259,8 +259,9 @@ class BaseSiteAdapter(Configurable):
         acceptable_attributes = ['href','name']
         #print("include_images:"+self.getConfig('include_images'))
         if self.getConfig('include_images'):
-            acceptable_attributes.extend(('src','alt'))
+            acceptable_attributes.extend(('src','alt','origsrc'))
             for img in soup.findAll('img'):
+                img['origsrc']=img['src']
                 img['src']=self.story.addImgUrl(self,url,img['src'],self._fetchUrlRaw)
 
         for attr in soup._getAttrMap().keys():
diff --git a/fanficdownloader/story.py b/fanficdownloader/story.py
index a4095754..c618e7cc 100644
--- a/fanficdownloader/story.py
+++ b/fanficdownloader/story.py
@@ -17,7 +17,6 @@
 
 import os, re
 import urlparse
-from base64 import b64encode
 
 from htmlcleanup import conditionalRemoveEntities, removeAllEntities
 
@@ -198,21 +197,44 @@ class Story:
         # up with the same name both now, in different chapters, and
         # later with new update chapters.  Numbering them didn't do
         # that.
-        newsrc = "images/%s.jpg"%(b64encode(imgurl))
+        # newsrc = "images/%s.jpg"%(b64encode(imgurl))
+        # step = 20
+        # if newsrc > step:
+        #     i = step
+        #     while i < len(newsrc):
+        #         newsrc = newsrc[:i]+"/"+newsrc[i:]
+        #         i += step
+                
+        # But, b64 names can get too big for zip (on windows, at
+        # least) to handle too quickly.
+
+        # This version, prefixing the images with the creation
+        # timestamp, still allows for dup images to be detected and
+        # not dup'ed in a single download.  And it prevents 0.jpg from
+        # earlier update being overwritten by the first image in newer
+        # chapter.  It does not, however, prevent dup copies of the
+        # same image being d/l'ed and saved in different updates.  A
+        # bit of corner case inefficiency I can live with rather than
+        # scanning all the pre-existing files on update.  oldsrc is
+        # being saved on img tags just in case, however.
+        prefix=self.getMetadataRaw('dateCreated').strftime("%Y%m%d%H%M%S")
+        
         if imgurl not in self.imgurls:
             self.imgurls.append(imgurl)
             parsedUrl = urlparse.urlparse(imgurl)
-            # newsrc = "images/%s.jpg"%(
-            #     self.imgurls.index(imgurl))
+            newsrc = "images/%s-%s.jpg"%(
+                prefix,
+                self.imgurls.index(imgurl))
             sizes = [ int(x) for x in configurable.getConfigList('image_max_size') ]
             data = convert_image(fetch(imgurl),
                                  sizes,
                                  configurable.getConfig('grayscale_images'))
-            #print("\nimgurl\nimage size:%d\n"%len(data))
+            print("\nimgurl:%s\nnewsrc:%s\nimage size:%d\n"%(imgurl,newsrc,len(data)))
             self.imgurldata.append((newsrc,data))
-        # else:
-        #     newsrc = "images/%s.jpg"%(
-        #         self.imgurls.index(imgurl))
+        else:
+            newsrc = "images/%s-%s.jpg"%(
+                prefix,
+                self.imgurls.index(imgurl))
             
         #print("===============\n%s\nimg url:%s\n============"%(newsrc,self.imgurls[-1]))