diff --git a/calibre-plugin/plugin-defaults.ini b/calibre-plugin/plugin-defaults.ini index ab3d1a33..289d00c5 100644 --- a/calibre-plugin/plugin-defaults.ini +++ b/calibre-plugin/plugin-defaults.ini @@ -2031,11 +2031,15 @@ extracategories:Psych extracategories:Queer as Folk [quotev.com] -extra_valid_entries:pages,readers,reads,favorites +extra_valid_entries:pages,readers,reads,favorites,searchtags,comments pages_label:Pages readers_label:Readers reads_label:Reads favorites_label:Favorites +searchtags:Search Tags +comments:Comments + +include_in_category:category,searchtags [www.restrictedsection.org] ## Some sites require login (or login for some rated stories) The diff --git a/fanficfare/adapters/adapter_quotevcom.py b/fanficfare/adapters/adapter_quotevcom.py index 3e7aa884..ccbd0828 100644 --- a/fanficfare/adapters/adapter_quotevcom.py +++ b/fanficfare/adapters/adapter_quotevcom.py @@ -5,6 +5,7 @@ import datetime from .. import exceptions from base_adapter import BaseSiteAdapter +from ..htmlcleanup import stripHTML SITE_DOMAIN = 'quotev.com' STORY_URL_TEMPLATE = 'http://www.quotev.com/story/%s' @@ -53,81 +54,78 @@ class QuotevComAdapter(BaseSiteAdapter): soup = self.make_soup(data) - element = soup.find('div', {'class': 'result_head'}) + element = soup.find('div', {'class': 'result'}) if not element: raise exceptions.StoryDoesNotExist(self.url) - self.story.setMetadata('title', element.find('span', recursive=False).get_text()) + self.story.setMetadata('title', element.find('h1').get_text()) - element = soup.find('div', {'class': 'desc_creator'}) - if element: - a = element('a')[1] - self.story.setMetadata('author', a.get_text()) - self.story.setMetadata('authorId', get_url_path_segments(a['href'])[0]) - self.story.setMetadata('authorUrl', urlparse.urljoin(self.url, a['href'])) - - # Multiple authors - else: - element = soup.find('div', id='qheadx') - for a in element('div', recursive=False)[1]('a'): - author = a.get_text() - if not a.get_text(): - continue - - self.story.addToList('author', author) - self.story.addToList('authorId', get_url_path_segments(a['href'])[0]) + # quotev html is all about formatting without any content tagging + authdiv = soup.find('div', {'style':"text-align:left;"}) + if authdiv: + #print("div:%s"%authdiv.find_all('a')) + for a in authdiv.find_all('a'): + self.story.addToList('author', a.get_text()) + self.story.addToList('authorId', a['href'].split('/')[-1]) self.story.addToList('authorUrl', urlparse.urljoin(self.url, a['href'])) - else: - self.story.setMetadata('author','Anonymous') - self.story.setMetadata('authorUrl','http://www.quotev.com') - self.story.setMetadata('authorId','0') - + else: + self.story.setMetadata('author','Anonymous') + self.story.setMetadata('authorUrl','http://www.quotev.com') + self.story.setMetadata('authorId','0') self.setDescription(self.url, soup.find('div', id='qdesct')) self.setCoverImage(self.url, urlparse.urljoin(self.url, soup.find('img', {'class': 'logo'})['src'])) - for a in soup.find('div', {'class': 'tag'})('a'): - if a['href'] == '#': - continue - + for a in soup.find_all('a', {'href': re.compile(SITE_DOMAIN+'/stories/c/')}): self.story.addToList('category', a.get_text()) - elements = soup('span', {'class': 'q_time'}) + for a in soup.find_all('a', {'href': re.compile(SITE_DOMAIN+'/search/')}): + self.story.addToList('searchtags', a.get_text()) + + elements = soup.find_all('span', {'class': 'q_time'}) self.story.setMetadata('datePublished', datetime.datetime.fromtimestamp(float(elements[0]['ts']))) if len(elements) > 1: self.story.setMetadata('dateUpdated', datetime.datetime.fromtimestamp(float(elements[1]['ts']))) + metadiv = elements[0].parent + + if 'completed' in stripHTML(metadiv): + self.story.setMetadata('status', 'Completed') + else: + self.story.setMetadata('status', 'In-Progress') + + data = filter(None, (x.strip() for x in stripHTML(metadiv).split(u'\xb7'))) + + for datum in data: + parts = datum.split() + if len(parts) < 2 or parts[1] not in self.getConfig('extra_valid_entries'): + continue + # Not a valid metadatum + # if not len(parts) == 2: + # continue + + key, value = parts[1], parts[0] + self.story.setMetadata(key, value.replace(',', '').replace('.', '')) + + favspans = soup.find('a',{'id':'fav_btn'}).find_all('span') + if len(favspans) > 1: + self.story.setMetadata('favorites', stripHTML(favspans[1]).replace(',', '')) + + commentspans = soup.find('a',{'id':'comment_btn'}).find_all('span') + #print("commentspans:%s"%commentspans) + if len(commentspans) > 0: + self.story.setMetadata('comments', stripHTML(commentspans[0]).replace(',', '')) + for a in soup.find('div', id='rselect')('a'): self.chapterUrls.append((a.get_text(), urlparse.urljoin(self.url, a['href']))) self.story.setMetadata('numChapters', len(self.chapterUrls)) - - element = soup.find('div', {'class': 't'})('div', recursive=False)[1].div - data = filter(None, (x.strip() for x in element.get_text().split(u'\xb7'))) - if 'completed' in data: - self.story.setMetadata('status', 'Completed') - data.remove('completed') - else: - self.story.setMetadata('status', 'In-Progress') - - for datum in data: - parts = datum.split() - # Not a valid metadatum - if not len(parts) == 2: - continue - - key, value = parts - self.story.setMetadata(key, value.replace(',', '').replace('.', '')) - - self.story.setMetadata('favorites', soup.find('div', id='favqn').get_text()) - element = soup.find('a', id='comment_btn').span - self.story.setMetadata('comments', element.get_text() if element else 0) - + def getChapterText(self, url): data = self._fetchUrl(url) soup = self.make_soup(data) - element = soup.find('div', id='restxt') + element = soup.find('div', id='rescontent') for a in element('a'): a.unwrap() diff --git a/fanficfare/defaults.ini b/fanficfare/defaults.ini index b6140800..8a8276b3 100644 --- a/fanficfare/defaults.ini +++ b/fanficfare/defaults.ini @@ -2013,11 +2013,15 @@ extracategories:Psych extracategories:Queer as Folk [quotev.com] -extra_valid_entries:pages,readers,reads,favorites +extra_valid_entries:pages,readers,reads,favorites,searchtags,comments pages_label:Pages readers_label:Readers reads_label:Reads favorites_label:Favorites +searchtags:Search Tags +comments:Comments + +include_in_category:category,searchtags [www.restrictedsection.org] ## Some sites require login (or login for some rated stories) The