From bd8e54edcfa033cf7878edd8a91ad07c95c7ce10 Mon Sep 17 00:00:00 2001
From: Jim Miller <retiefjimm@gmail.com>
Date: Sat, 17 May 2014 21:12:43 -0500
Subject: [PATCH] Fixes for literotica.com: URLs using //site, allow https,
 ch01 as storyId, multi ch only.

---
 .../adapters/adapter_literotica.py            | 34 ++++++++++++-------
 1 file changed, 22 insertions(+), 12 deletions(-)

diff --git a/fanficdownloader/adapters/adapter_literotica.py b/fanficdownloader/adapters/adapter_literotica.py
index 7d99e221..ca555288 100644
--- a/fanficdownloader/adapters/adapter_literotica.py
+++ b/fanficdownloader/adapters/adapter_literotica.py
@@ -42,16 +42,19 @@ class LiteroticaSiteAdapter(BaseSiteAdapter):
         
         self.story.setMetadata('siteabbrev','litero')
 
-        # get storyId from url--url validation guarantees query is only sid=1234
-        self.story.setMetadata('storyId',self.parsedUrl.path.split('/',)[2])
+        # normalize to first chapter.  Not sure if they ever have more than 2 digits.
+        storyid = self.parsedUrl.path.split('/',)[2]
+        if re.match(r'-ch\d\d$',storyid):
+            storyid = storyid[:-2]+'01'
+        self.story.setMetadata('storyId',storyid)
         
         self.origurl = url
-        if "http://www.i." in self.origurl:
+        if "//www.i." in self.origurl:
             ## accept m(mobile)url, but use www.
-            self.origurl = self.origurl.replace("http://www.i.","http://www.")
+            self.origurl = self.origurl.replace("//www.i.","//www.")
 
         # normalized story URL.
-        self._setURL("http://"+self.getSiteDomain()\
+        self._setURL(url[:url.index('//')+2]+self.getSiteDomain()\
                          +"/s/"+self.story.getMetadata('storyId'))
 
         # The date format will vary from site to site.
@@ -69,10 +72,10 @@ class LiteroticaSiteAdapter(BaseSiteAdapter):
     @classmethod
     def getSiteExampleURLs(self):
         #return "http://www.literotica.com/s/story-title http://www.literotica.com/stories/showstory.php?id=1234 http://www.i.literotica.com/stories/showstory.php?id=1234"
-        return "http://www.literotica.com/s/story-title"
+        return "http://www.literotica.com/s/story-title https://www.literotica.com/s/story-title"
 
     def getSiteURLPattern(self):
-        return r"http://www(\.i)?\.literotica\.com/s/([a-zA-Z0-9_-]+)"
+        return r"https?://www(\.i)?\.literotica\.com/s/([a-zA-Z0-9_-]+)"
 
     def extractChapterUrlsAndMetadata(self):
 
@@ -97,20 +100,24 @@ class LiteroticaSiteAdapter(BaseSiteAdapter):
         # author
         a = soup1.find("span", "b-story-user-y")
         self.story.setMetadata('authorId', urlparse.parse_qs(a.a['href'].split('?')[1])['uid'])
-        self.story.setMetadata('authorUrl', a.a['href'])
+        authorurl = a.a['href']
+        if authorurl.startswith('//'):
+            authorurl = self.parsedUrl.scheme+':'+authorurl
+        self.story.setMetadata('authorUrl', authorurl)
         self.story.setMetadata('author', a.text)
 
         # get the author page
         try:
-            dataAuth = self._fetchUrl(a.a['href'])
+            dataAuth = self._fetchUrl(authorurl)
             soupAuth = bs.BeautifulSoup(dataAuth)
         except urllib2.HTTPError, e:
             if e.code == 404:
-                raise exceptions.StoryDoesNotExist(a.a['href'])
+                raise exceptions.StoryDoesNotExist(authorurl)
             else:
                 raise e
 
-        storyLink = soupAuth.find('a', href=url1)
+        ## site has started using //domain.name/asdf urls remove https?: from front
+        storyLink = soupAuth.find('a', href=url1[url1.index(':')+1:])
 
         if storyLink is not None:
             # pull the published date from the author page
@@ -166,7 +173,10 @@ class LiteroticaSiteAdapter(BaseSiteAdapter):
             self.story.setMetadata('datePublished',makeDate(stripHTML(row.find('td',{'class':'dt'})), self.dateformat))
             while row['class'] == 'sl':
                 # pages include full URLs.
-                self.chapterUrls.append((row.a.string,row.a['href']))
+                chapurl = row.a['href']
+                if chapurl.startswith('//'):
+                    chapurl = self.parsedUrl.scheme+':'+chapurl
+                self.chapterUrls.append((row.a.string,chapurl))
                 if not row.nextSibling:
                     break
                 row = row.nextSibling