mirror of
https://github.com/JimmXinu/FanFicFare.git
synced 2025-12-21 16:23:58 +01:00
Update quotev.com adapter.
This commit is contained in:
parent
17157498a9
commit
3f005cdfbd
3 changed files with 60 additions and 54 deletions
|
|
@ -2031,11 +2031,15 @@ extracategories:Psych
|
|||
extracategories:Queer as Folk
|
||||
|
||||
[quotev.com]
|
||||
extra_valid_entries:pages,readers,reads,favorites
|
||||
extra_valid_entries:pages,readers,reads,favorites,searchtags,comments
|
||||
pages_label:Pages
|
||||
readers_label:Readers
|
||||
reads_label:Reads
|
||||
favorites_label:Favorites
|
||||
searchtags:Search Tags
|
||||
comments:Comments
|
||||
|
||||
include_in_category:category,searchtags
|
||||
|
||||
[www.restrictedsection.org]
|
||||
## Some sites require login (or login for some rated stories) The
|
||||
|
|
|
|||
|
|
@ -5,6 +5,7 @@ import datetime
|
|||
|
||||
from .. import exceptions
|
||||
from base_adapter import BaseSiteAdapter
|
||||
from ..htmlcleanup import stripHTML
|
||||
|
||||
SITE_DOMAIN = 'quotev.com'
|
||||
STORY_URL_TEMPLATE = 'http://www.quotev.com/story/%s'
|
||||
|
|
@ -53,81 +54,78 @@ class QuotevComAdapter(BaseSiteAdapter):
|
|||
|
||||
soup = self.make_soup(data)
|
||||
|
||||
element = soup.find('div', {'class': 'result_head'})
|
||||
element = soup.find('div', {'class': 'result'})
|
||||
if not element:
|
||||
raise exceptions.StoryDoesNotExist(self.url)
|
||||
|
||||
self.story.setMetadata('title', element.find('span', recursive=False).get_text())
|
||||
self.story.setMetadata('title', element.find('h1').get_text())
|
||||
|
||||
element = soup.find('div', {'class': 'desc_creator'})
|
||||
if element:
|
||||
a = element('a')[1]
|
||||
self.story.setMetadata('author', a.get_text())
|
||||
self.story.setMetadata('authorId', get_url_path_segments(a['href'])[0])
|
||||
self.story.setMetadata('authorUrl', urlparse.urljoin(self.url, a['href']))
|
||||
|
||||
# Multiple authors
|
||||
else:
|
||||
element = soup.find('div', id='qheadx')
|
||||
for a in element('div', recursive=False)[1]('a'):
|
||||
author = a.get_text()
|
||||
if not a.get_text():
|
||||
continue
|
||||
|
||||
self.story.addToList('author', author)
|
||||
self.story.addToList('authorId', get_url_path_segments(a['href'])[0])
|
||||
# quotev html is all about formatting without any content tagging
|
||||
authdiv = soup.find('div', {'style':"text-align:left;"})
|
||||
if authdiv:
|
||||
#print("div:%s"%authdiv.find_all('a'))
|
||||
for a in authdiv.find_all('a'):
|
||||
self.story.addToList('author', a.get_text())
|
||||
self.story.addToList('authorId', a['href'].split('/')[-1])
|
||||
self.story.addToList('authorUrl', urlparse.urljoin(self.url, a['href']))
|
||||
else:
|
||||
self.story.setMetadata('author','Anonymous')
|
||||
self.story.setMetadata('authorUrl','http://www.quotev.com')
|
||||
self.story.setMetadata('authorId','0')
|
||||
|
||||
|
||||
self.setDescription(self.url, soup.find('div', id='qdesct'))
|
||||
self.setCoverImage(self.url, urlparse.urljoin(self.url, soup.find('img', {'class': 'logo'})['src']))
|
||||
|
||||
for a in soup.find('div', {'class': 'tag'})('a'):
|
||||
if a['href'] == '#':
|
||||
continue
|
||||
|
||||
for a in soup.find_all('a', {'href': re.compile(SITE_DOMAIN+'/stories/c/')}):
|
||||
self.story.addToList('category', a.get_text())
|
||||
|
||||
elements = soup('span', {'class': 'q_time'})
|
||||
for a in soup.find_all('a', {'href': re.compile(SITE_DOMAIN+'/search/')}):
|
||||
self.story.addToList('searchtags', a.get_text())
|
||||
|
||||
elements = soup.find_all('span', {'class': 'q_time'})
|
||||
self.story.setMetadata('datePublished', datetime.datetime.fromtimestamp(float(elements[0]['ts'])))
|
||||
if len(elements) > 1:
|
||||
self.story.setMetadata('dateUpdated', datetime.datetime.fromtimestamp(float(elements[1]['ts'])))
|
||||
|
||||
metadiv = elements[0].parent
|
||||
|
||||
if 'completed' in stripHTML(metadiv):
|
||||
self.story.setMetadata('status', 'Completed')
|
||||
else:
|
||||
self.story.setMetadata('status', 'In-Progress')
|
||||
|
||||
data = filter(None, (x.strip() for x in stripHTML(metadiv).split(u'\xb7')))
|
||||
|
||||
for datum in data:
|
||||
parts = datum.split()
|
||||
if len(parts) < 2 or parts[1] not in self.getConfig('extra_valid_entries'):
|
||||
continue
|
||||
# Not a valid metadatum
|
||||
# if not len(parts) == 2:
|
||||
# continue
|
||||
|
||||
key, value = parts[1], parts[0]
|
||||
self.story.setMetadata(key, value.replace(',', '').replace('.', ''))
|
||||
|
||||
favspans = soup.find('a',{'id':'fav_btn'}).find_all('span')
|
||||
if len(favspans) > 1:
|
||||
self.story.setMetadata('favorites', stripHTML(favspans[1]).replace(',', ''))
|
||||
|
||||
commentspans = soup.find('a',{'id':'comment_btn'}).find_all('span')
|
||||
#print("commentspans:%s"%commentspans)
|
||||
if len(commentspans) > 0:
|
||||
self.story.setMetadata('comments', stripHTML(commentspans[0]).replace(',', ''))
|
||||
|
||||
for a in soup.find('div', id='rselect')('a'):
|
||||
self.chapterUrls.append((a.get_text(), urlparse.urljoin(self.url, a['href'])))
|
||||
|
||||
self.story.setMetadata('numChapters', len(self.chapterUrls))
|
||||
|
||||
element = soup.find('div', {'class': 't'})('div', recursive=False)[1].div
|
||||
data = filter(None, (x.strip() for x in element.get_text().split(u'\xb7')))
|
||||
if 'completed' in data:
|
||||
self.story.setMetadata('status', 'Completed')
|
||||
data.remove('completed')
|
||||
else:
|
||||
self.story.setMetadata('status', 'In-Progress')
|
||||
|
||||
for datum in data:
|
||||
parts = datum.split()
|
||||
# Not a valid metadatum
|
||||
if not len(parts) == 2:
|
||||
continue
|
||||
|
||||
key, value = parts
|
||||
self.story.setMetadata(key, value.replace(',', '').replace('.', ''))
|
||||
|
||||
self.story.setMetadata('favorites', soup.find('div', id='favqn').get_text())
|
||||
element = soup.find('a', id='comment_btn').span
|
||||
self.story.setMetadata('comments', element.get_text() if element else 0)
|
||||
|
||||
def getChapterText(self, url):
|
||||
data = self._fetchUrl(url)
|
||||
soup = self.make_soup(data)
|
||||
|
||||
element = soup.find('div', id='restxt')
|
||||
element = soup.find('div', id='rescontent')
|
||||
for a in element('a'):
|
||||
a.unwrap()
|
||||
|
||||
|
|
|
|||
|
|
@ -2013,11 +2013,15 @@ extracategories:Psych
|
|||
extracategories:Queer as Folk
|
||||
|
||||
[quotev.com]
|
||||
extra_valid_entries:pages,readers,reads,favorites
|
||||
extra_valid_entries:pages,readers,reads,favorites,searchtags,comments
|
||||
pages_label:Pages
|
||||
readers_label:Readers
|
||||
reads_label:Reads
|
||||
favorites_label:Favorites
|
||||
searchtags:Search Tags
|
||||
comments:Comments
|
||||
|
||||
include_in_category:category,searchtags
|
||||
|
||||
[www.restrictedsection.org]
|
||||
## Some sites require login (or login for some rated stories) The
|
||||
|
|
|
|||
Loading…
Reference in a new issue