mirror of
https://github.com/JimmXinu/FanFicFare.git
synced 2026-05-04 04:41:36 +02:00
Preserve order of URLs fetched from page--especially important for anthologies.
This commit is contained in:
parent
6ff1ed4ba9
commit
390c661a88
1 changed files with 10 additions and 4 deletions
|
|
@ -74,7 +74,7 @@ def get_urls_from_page(url,configuration=None,normalize=False):
|
|||
return get_urls_from_html(data,url,configuration,normalize,restrictsearch)
|
||||
|
||||
def get_urls_from_html(data,url=None,configuration=None,normalize=False,restrictsearch=None):
|
||||
urls = collections.defaultdict(list)
|
||||
urls = collections.OrderedDict()
|
||||
|
||||
if not configuration:
|
||||
configuration = Configuration("test1.com","EPUB")
|
||||
|
|
@ -103,7 +103,10 @@ def get_urls_from_html(data,url=None,configuration=None,normalize=False,restrict
|
|||
#print("2 urlhref:%s"%href)
|
||||
adapter = adapters.getAdapter(configuration,href)
|
||||
#print("found adapter")
|
||||
urls[adapter.story.getMetadata('storyUrl')].append(href)
|
||||
if adapter.story.getMetadata('storyUrl') not in urls:
|
||||
urls[adapter.story.getMetadata('storyUrl')] = [href]
|
||||
else:
|
||||
urls[adapter.story.getMetadata('storyUrl')].append(href)
|
||||
except Exception, e:
|
||||
#print e
|
||||
pass
|
||||
|
|
@ -114,7 +117,7 @@ def get_urls_from_html(data,url=None,configuration=None,normalize=False,restrict
|
|||
|
||||
|
||||
def get_urls_from_text(data,configuration=None,normalize=False):
|
||||
urls = collections.defaultdict(list)
|
||||
urls = collections.OrderedDict(list)
|
||||
data=unicode(data)
|
||||
|
||||
if not configuration:
|
||||
|
|
@ -130,7 +133,10 @@ def get_urls_from_text(data,configuration=None,normalize=False):
|
|||
try:
|
||||
href = href.replace('&index=1','')
|
||||
adapter = adapters.getAdapter(configuration,href)
|
||||
urls[adapter.story.getMetadata('storyUrl')].append(href)
|
||||
if adapter.story.getMetadata('storyUrl') not in urls:
|
||||
urls[adapter.story.getMetadata('storyUrl')] = [href]
|
||||
else:
|
||||
urls[adapter.story.getMetadata('storyUrl')].append(href)
|
||||
except:
|
||||
pass
|
||||
|
||||
|
|
|
|||
Loading…
Reference in a new issue