Fixes for problems with urlparse.parse_qs in adapters and new unique_list with authorId. Also BS4 fixes to adapter_spikeluvercom.py

This commit is contained in:
Jim Miller 2015-12-21 13:14:47 -06:00
parent 39bfc6632c
commit 9a066be859
3 changed files with 9 additions and 5 deletions

View file

@ -3,7 +3,7 @@ import re
import urllib2
import urlparse
from bs4 import BeautifulSoup
from bs4.element import Tag
from ..htmlcleanup import stripHTML
from base_adapter import BaseSiteAdapter, makeDate
@ -98,7 +98,7 @@ class SpikeluverComAdapter(BaseSiteAdapter):
query_data = urlparse.parse_qs(components.query)
self.story.setMetadata('author', stripHTML(author_anchor))
self.story.setMetadata('authorId', query_data['uid'])
self.story.setMetadata('authorId', query_data['uid'][0])
self.story.setMetadata('authorUrl', url)
sort_div = soup.find('div', id='sort')
@ -122,7 +122,7 @@ class SpikeluverComAdapter(BaseSiteAdapter):
keep_summary_html = self.getConfig('keep_summary_html')
for sibling in _yield_next_siblings(span_tag):
if isinstance(sibling, BeautifulSoup.Tag):
if isinstance(sibling, Tag):
# Encountered next label, break. Not as bad as other
# e-fiction sites, let's hope this is enough for proper
# parsing.

View file

@ -138,7 +138,7 @@ class Voracity2EficComAdapter(BaseSiteAdapter):
query_data = urlparse.parse_qs(components.query)
self.story.setMetadata('author', author_anchor.string)
self.story.setMetadata('authorId', query_data['uid'])
self.story.setMetadata('authorId', query_data['uid'][0])
self.story.setMetadata('authorUrl', url)
sort_div = soup.find('div', id='sort')

View file

@ -1109,4 +1109,8 @@ def commaGroups(s):
def unique_list(seq):
seen = set()
seen_add = seen.add
return [x for x in seq if not (x in seen or seen_add(x))]
try:
return [x for x in seq if not (x in seen or seen_add(x))]
except:
print("unique_list exception seq:%s"%seq)
raise