From 267c19e8dd40df2e88cc47cbf34d6846e2eb28db Mon Sep 17 00:00:00 2001 From: Jim Miller Date: Wed, 4 Jan 2017 14:53:01 -0600 Subject: [PATCH] Further attempt at extended char URLs for adapter_wwwlushstoriescom. Works on Win10(US English) CLI and Calibre. --- fanficfare/adapters/adapter_wwwlushstoriescom.py | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/fanficfare/adapters/adapter_wwwlushstoriescom.py b/fanficfare/adapters/adapter_wwwlushstoriescom.py index 4043b2ff..11811efb 100644 --- a/fanficfare/adapters/adapter_wwwlushstoriescom.py +++ b/fanficfare/adapters/adapter_wwwlushstoriescom.py @@ -25,7 +25,6 @@ import logging logger = logging.getLogger(__name__) import re import urllib2 -import sys from ..htmlcleanup import stripHTML from .. import exceptions as exceptions @@ -58,8 +57,18 @@ class WWWLushStoriesComAdapter(BaseSiteAdapter): # XXX self.is_adult=False # get storyId from url - self.story.setMetadata('storyId',urllib2.quote(self.parsedUrl.path.split('/')[3].replace( - '.aspx',''))) + storyId = self.parsedUrl.path.split('/')[3].replace('.aspx','') + if '%' not in storyId: + ## assume already escaped if contains %. Assume needs escaping if it doesn't. + try: + storyId = urllib2.quote(storyId) + except KeyError: + ## string from calibre on windows *isn't* utf8, but a + ## latin1 of some kind. This encode would be better + ## done somewhere where the code type can be known. + storyId = urllib2.quote(storyId.encode("Windows-1252")) + + self.story.setMetadata('storyId',storyId) ## This site has the category as part of the url, so to normalize the url below, we get it ## here