Update quotev.com adapter.

This commit is contained in:
Jim Miller 2016-01-14 13:45:33 -06:00
parent 17157498a9
commit 3f005cdfbd
3 changed files with 60 additions and 54 deletions

View file

@ -2031,11 +2031,15 @@ extracategories:Psych
extracategories:Queer as Folk
[quotev.com]
extra_valid_entries:pages,readers,reads,favorites
extra_valid_entries:pages,readers,reads,favorites,searchtags,comments
pages_label:Pages
readers_label:Readers
reads_label:Reads
favorites_label:Favorites
searchtags:Search Tags
comments:Comments
include_in_category:category,searchtags
[www.restrictedsection.org]
## Some sites require login (or login for some rated stories) The

View file

@ -5,6 +5,7 @@ import datetime
from .. import exceptions
from base_adapter import BaseSiteAdapter
from ..htmlcleanup import stripHTML
SITE_DOMAIN = 'quotev.com'
STORY_URL_TEMPLATE = 'http://www.quotev.com/story/%s'
@ -53,81 +54,78 @@ class QuotevComAdapter(BaseSiteAdapter):
soup = self.make_soup(data)
element = soup.find('div', {'class': 'result_head'})
element = soup.find('div', {'class': 'result'})
if not element:
raise exceptions.StoryDoesNotExist(self.url)
self.story.setMetadata('title', element.find('span', recursive=False).get_text())
self.story.setMetadata('title', element.find('h1').get_text())
element = soup.find('div', {'class': 'desc_creator'})
if element:
a = element('a')[1]
self.story.setMetadata('author', a.get_text())
self.story.setMetadata('authorId', get_url_path_segments(a['href'])[0])
self.story.setMetadata('authorUrl', urlparse.urljoin(self.url, a['href']))
# Multiple authors
else:
element = soup.find('div', id='qheadx')
for a in element('div', recursive=False)[1]('a'):
author = a.get_text()
if not a.get_text():
continue
self.story.addToList('author', author)
self.story.addToList('authorId', get_url_path_segments(a['href'])[0])
# quotev html is all about formatting without any content tagging
authdiv = soup.find('div', {'style':"text-align:left;"})
if authdiv:
#print("div:%s"%authdiv.find_all('a'))
for a in authdiv.find_all('a'):
self.story.addToList('author', a.get_text())
self.story.addToList('authorId', a['href'].split('/')[-1])
self.story.addToList('authorUrl', urlparse.urljoin(self.url, a['href']))
else:
self.story.setMetadata('author','Anonymous')
self.story.setMetadata('authorUrl','http://www.quotev.com')
self.story.setMetadata('authorId','0')
self.setDescription(self.url, soup.find('div', id='qdesct'))
self.setCoverImage(self.url, urlparse.urljoin(self.url, soup.find('img', {'class': 'logo'})['src']))
for a in soup.find('div', {'class': 'tag'})('a'):
if a['href'] == '#':
continue
for a in soup.find_all('a', {'href': re.compile(SITE_DOMAIN+'/stories/c/')}):
self.story.addToList('category', a.get_text())
elements = soup('span', {'class': 'q_time'})
for a in soup.find_all('a', {'href': re.compile(SITE_DOMAIN+'/search/')}):
self.story.addToList('searchtags', a.get_text())
elements = soup.find_all('span', {'class': 'q_time'})
self.story.setMetadata('datePublished', datetime.datetime.fromtimestamp(float(elements[0]['ts'])))
if len(elements) > 1:
self.story.setMetadata('dateUpdated', datetime.datetime.fromtimestamp(float(elements[1]['ts'])))
metadiv = elements[0].parent
if 'completed' in stripHTML(metadiv):
self.story.setMetadata('status', 'Completed')
else:
self.story.setMetadata('status', 'In-Progress')
data = filter(None, (x.strip() for x in stripHTML(metadiv).split(u'\xb7')))
for datum in data:
parts = datum.split()
if len(parts) < 2 or parts[1] not in self.getConfig('extra_valid_entries'):
continue
# Not a valid metadatum
# if not len(parts) == 2:
# continue
key, value = parts[1], parts[0]
self.story.setMetadata(key, value.replace(',', '').replace('.', ''))
favspans = soup.find('a',{'id':'fav_btn'}).find_all('span')
if len(favspans) > 1:
self.story.setMetadata('favorites', stripHTML(favspans[1]).replace(',', ''))
commentspans = soup.find('a',{'id':'comment_btn'}).find_all('span')
#print("commentspans:%s"%commentspans)
if len(commentspans) > 0:
self.story.setMetadata('comments', stripHTML(commentspans[0]).replace(',', ''))
for a in soup.find('div', id='rselect')('a'):
self.chapterUrls.append((a.get_text(), urlparse.urljoin(self.url, a['href'])))
self.story.setMetadata('numChapters', len(self.chapterUrls))
element = soup.find('div', {'class': 't'})('div', recursive=False)[1].div
data = filter(None, (x.strip() for x in element.get_text().split(u'\xb7')))
if 'completed' in data:
self.story.setMetadata('status', 'Completed')
data.remove('completed')
else:
self.story.setMetadata('status', 'In-Progress')
for datum in data:
parts = datum.split()
# Not a valid metadatum
if not len(parts) == 2:
continue
key, value = parts
self.story.setMetadata(key, value.replace(',', '').replace('.', ''))
self.story.setMetadata('favorites', soup.find('div', id='favqn').get_text())
element = soup.find('a', id='comment_btn').span
self.story.setMetadata('comments', element.get_text() if element else 0)
def getChapterText(self, url):
data = self._fetchUrl(url)
soup = self.make_soup(data)
element = soup.find('div', id='restxt')
element = soup.find('div', id='rescontent')
for a in element('a'):
a.unwrap()

View file

@ -2013,11 +2013,15 @@ extracategories:Psych
extracategories:Queer as Folk
[quotev.com]
extra_valid_entries:pages,readers,reads,favorites
extra_valid_entries:pages,readers,reads,favorites,searchtags,comments
pages_label:Pages
readers_label:Readers
reads_label:Reads
favorites_label:Favorites
searchtags:Search Tags
comments:Comments
include_in_category:category,searchtags
[www.restrictedsection.org]
## Some sites require login (or login for some rated stories) The