Migrate to new bs4 API

Signed-off-by: Emmanuel Ferdman <emmanuelferdman@gmail.com>
This commit is contained in:
Emmanuel Ferdman 2025-05-06 13:12:18 -07:00 committed by Jim Miller
parent 3edd3c3e7b
commit aca07bbf59
57 changed files with 291 additions and 291 deletions

View file

@ -171,7 +171,7 @@ class AdultFanFictionOrgAdapter(BaseSiteAdapter):
# params['submit'] = 'Login' # params['submit'] = 'Login'
# # copy all hidden input tags to pick up appropriate tokens. # # copy all hidden input tags to pick up appropriate tokens.
# for tag in soup.findAll('input',{'type':'hidden'}): # for tag in soup.find_all('input',{'type':'hidden'}):
# params[tag['name']] = tag['value'] # params[tag['name']] = tag['value']
# logger.debug("Will now login to URL {0} as {1} with password: {2}".format(url, params['email'],params['pass1'])) # logger.debug("Will now login to URL {0} as {1} with password: {2}".format(url, params['email'],params['pass1']))
@ -218,7 +218,7 @@ class AdultFanFictionOrgAdapter(BaseSiteAdapter):
# Find the chapters: # Find the chapters:
chapters = soup.find('ul',{'class':'dropdown-content'}) chapters = soup.find('ul',{'class':'dropdown-content'})
for i, chapter in enumerate(chapters.findAll('a')): for i, chapter in enumerate(chapters.find_all('a')):
self.add_chapter(chapter,self.url+'&chapter='+unicode(i+1)) self.add_chapter(chapter,self.url+'&chapter='+unicode(i+1))
@ -262,7 +262,7 @@ class AdultFanFictionOrgAdapter(BaseSiteAdapter):
pages = 0 pages = 0
pagination=asoup.find('ul',{'class' : 'pagination'}) pagination=asoup.find('ul',{'class' : 'pagination'})
if pagination: if pagination:
pages = pagination.findAll('li')[-1].find('a') pages = pagination.find_all('li')[-1].find('a')
if not pages == None: if not pages == None:
pages = pages['href'].split('=')[-1] pages = pages['href'].split('=')[-1]
else: else:
@ -271,7 +271,7 @@ class AdultFanFictionOrgAdapter(BaseSiteAdapter):
storya = None storya = None
##If there is only 1 page of stories, check it to get the Metadata, ##If there is only 1 page of stories, check it to get the Metadata,
if pages == 0: if pages == 0:
a = asoup.findAll('li') a = asoup.find_all('li')
for lc2 in a: for lc2 in a:
if lc2.find('a', href=re.compile(r'story.php\?no='+self.story.getMetadata('storyId')+"$")): if lc2.find('a', href=re.compile(r'story.php\?no='+self.story.getMetadata('storyId')+"$")):
storya = lc2 storya = lc2
@ -294,7 +294,7 @@ class AdultFanFictionOrgAdapter(BaseSiteAdapter):
# we look for the li element that has the story here # we look for the li element that has the story here
asoup = self.make_soup(adata) asoup = self.make_soup(adata)
a = asoup.findAll('li') a = asoup.find_all('li')
for lc2 in a: for lc2 in a:
if lc2.find('a', href=re.compile(r'story.php\?no='+self.story.getMetadata('storyId')+"$")): if lc2.find('a', href=re.compile(r'story.php\?no='+self.story.getMetadata('storyId')+"$")):
i=1 i=1

View file

@ -92,7 +92,7 @@ class ASexStoriesComAdapter(BaseSiteAdapter):
self.story.setMetadata('title', title.string) self.story.setMetadata('title', title.string)
# Author # Author
author = soup1.find('div',{'class':'story-info'}).findAll('div',{'class':'story-info-bl'})[1].find('a') author = soup1.find('div',{'class':'story-info'}).find_all('div',{'class':'story-info-bl'})[1].find('a')
authorurl = author['href'] authorurl = author['href']
self.story.setMetadata('author', author.string) self.story.setMetadata('author', author.string)
self.story.setMetadata('authorUrl', authorurl) self.story.setMetadata('authorUrl', authorurl)
@ -112,7 +112,7 @@ class ASexStoriesComAdapter(BaseSiteAdapter):
### add it before the rest of the pages, if any ### add it before the rest of the pages, if any
self.add_chapter('1', self.url) self.add_chapter('1', self.url)
chapterTable = soup1.find('div',{'class':'pages'}).findAll('a') chapterTable = soup1.find('div',{'class':'pages'}).find_all('a')
if chapterTable is not None: if chapterTable is not None:
# Multi-chapter story # Multi-chapter story
@ -124,7 +124,7 @@ class ASexStoriesComAdapter(BaseSiteAdapter):
self.add_chapter(chapterTitle, chapterUrl) self.add_chapter(chapterTitle, chapterUrl)
rated = soup1.find('div',{'class':'story-info'}).findAll('div',{'class':'story-info-bl5'})[0].find('img')['title'].replace('- Rate','').strip() rated = soup1.find('div',{'class':'story-info'}).find_all('div',{'class':'story-info-bl5'})[0].find('img')['title'].replace('- Rate','').strip()
self.story.setMetadata('rating',rated) self.story.setMetadata('rating',rated)
self.story.setMetadata('dateUpdated', makeDate('01/01/2001', '%m/%d/%Y')) self.story.setMetadata('dateUpdated', makeDate('01/01/2001', '%m/%d/%Y'))

View file

@ -136,14 +136,14 @@ class AshwinderSycophantHexComAdapter(BaseSiteAdapter):
try: try:
# in case link points somewhere other than the first chapter # in case link points somewhere other than the first chapter
a = soup.findAll('option')[1]['value'] a = soup.find_all('option')[1]['value']
self.story.setMetadata('storyId',a.split('=',)[1]) self.story.setMetadata('storyId',a.split('=',)[1])
url = 'http://'+self.host+'/'+a url = 'http://'+self.host+'/'+a
soup = self.make_soup(self.get_request(url)) soup = self.make_soup(self.get_request(url))
except: except:
pass pass
for info in asoup.findAll('table', {'width' : '100%', 'bordercolor' : re.compile(r'#')}): for info in asoup.find_all('table', {'width' : '100%', 'bordercolor' : re.compile(r'#')}):
a = info.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$")) a = info.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$"))
if a != None: if a != None:
self.story.setMetadata('title',stripHTML(a)) self.story.setMetadata('title',stripHTML(a))
@ -151,7 +151,7 @@ class AshwinderSycophantHexComAdapter(BaseSiteAdapter):
# Find the chapters: # Find the chapters:
chapters=soup.findAll('a', href=re.compile(r'viewstory.php\?sid=\d+&i=1$')) chapters=soup.find_all('a', href=re.compile(r'viewstory.php\?sid=\d+&i=1$'))
if len(chapters) == 0: if len(chapters) == 0:
self.add_chapter(self.story.getMetadata('title'),url) self.add_chapter(self.story.getMetadata('title'),url)
else: else:
@ -170,7 +170,7 @@ class AshwinderSycophantHexComAdapter(BaseSiteAdapter):
except: except:
return "" return ""
cats = info.findAll('a',href=re.compile('categories.php')) cats = info.find_all('a',href=re.compile('categories.php'))
for cat in cats: for cat in cats:
self.story.addToList('category',cat.string) self.story.addToList('category',cat.string)
@ -188,7 +188,7 @@ class AshwinderSycophantHexComAdapter(BaseSiteAdapter):
## <td><span class="sb"><b>Published:</b> 04/08/2007</td> ## <td><span class="sb"><b>Published:</b> 04/08/2007</td>
## one story had <b>Updated...</b> in the description. Restrict to sub-table ## one story had <b>Updated...</b> in the description. Restrict to sub-table
labels = info.find('table').findAll('b') labels = info.find('table').find_all('b')
for labelspan in labels: for labelspan in labels:
value = labelspan.nextSibling value = labelspan.nextSibling
label = stripHTML(labelspan) label = stripHTML(labelspan)

View file

@ -147,7 +147,7 @@ class AsianFanFicsComAdapter(BaseSiteAdapter):
# Find authorid and URL from... author url. # Find authorid and URL from... author url.
mainmeta = soup.find('footer', {'class': 'main-meta'}) mainmeta = soup.find('footer', {'class': 'main-meta'})
alist = mainmeta.find('span', string='Author(s)') alist = mainmeta.find('span', string='Author(s)')
alist = alist.parent.findAll('a', href=re.compile(r"/profile/u/[^/]+")) alist = alist.parent.find_all('a', href=re.compile(r"/profile/u/[^/]+"))
for a in alist: for a in alist:
self.story.addToList('authorId',a['href'].split('/')[-1]) self.story.addToList('authorId',a['href'].split('/')[-1])
self.story.addToList('authorUrl','https://'+self.host+a['href']) self.story.addToList('authorUrl','https://'+self.host+a['href'])
@ -159,10 +159,10 @@ class AsianFanFicsComAdapter(BaseSiteAdapter):
chapters=soup.find('select',{'name':'chapter-nav'}) chapters=soup.find('select',{'name':'chapter-nav'})
hrefattr=None hrefattr=None
if chapters: if chapters:
chapters=chapters.findAll('option') chapters=chapters.find_all('option')
hrefattr='value' hrefattr='value'
else: # didn't find <select name='chapter-nav', look for alternative else: # didn't find <select name='chapter-nav', look for alternative
chapters=soup.find('div',{'class':'widget--chapters'}).findAll('a') chapters=soup.find('div',{'class':'widget--chapters'}).find_all('a')
hrefattr='href' hrefattr='href'
for index, chapter in enumerate(chapters): for index, chapter in enumerate(chapters):
if chapter.text != 'Foreword' and 'Collapse chapters' not in chapter.text: if chapter.text != 'Foreword' and 'Collapse chapters' not in chapter.text:
@ -202,7 +202,7 @@ class AsianFanFicsComAdapter(BaseSiteAdapter):
# story tags # story tags
a = mainmeta.find('span',string='Tags') a = mainmeta.find('span',string='Tags')
if a: if a:
tags = a.parent.findAll('a') tags = a.parent.find_all('a')
for tag in tags: for tag in tags:
self.story.addToList('tags', tag.text) self.story.addToList('tags', tag.text)
@ -230,7 +230,7 @@ class AsianFanFicsComAdapter(BaseSiteAdapter):
# upvote, subs, and views # upvote, subs, and views
a = soup.find('div',{'class':'title-meta'}) a = soup.find('div',{'class':'title-meta'})
spans = a.findAll('span', recursive=False) spans = a.find_all('span', recursive=False)
self.story.setMetadata('upvotes', re.search(r'\(([^)]+)', spans[0].find('span').text).group(1)) self.story.setMetadata('upvotes', re.search(r'\(([^)]+)', spans[0].find('span').text).group(1))
self.story.setMetadata('subscribers', re.search(r'\(([^)]+)', spans[1].find('span').text).group(1)) self.story.setMetadata('subscribers', re.search(r'\(([^)]+)', spans[1].find('span').text).group(1))
if len(spans) > 2: # views can be private if len(spans) > 2: # views can be private

View file

@ -126,7 +126,7 @@ class BDSMLibraryComSiteAdapter(BaseSiteAdapter):
# Find the chapters: # Find the chapters:
# The update date is with the chapter links... so we will update it here as well # The update date is with the chapter links... so we will update it here as well
for chapter in soup.findAll('a', href=re.compile(r'/stories/chapter.php\?storyid='+self.story.getMetadata('storyId')+r"&chapterid=\d+$")): for chapter in soup.find_all('a', href=re.compile(r'/stories/chapter.php\?storyid='+self.story.getMetadata('storyId')+r"&chapterid=\d+$")):
value = chapter.findNext('td').findNext('td').string.replace('(added on','').replace(')','').strip() value = chapter.findNext('td').findNext('td').string.replace('(added on','').replace(')','').strip()
self.story.setMetadata('dateUpdated', makeDate(value, self.dateformat)) self.story.setMetadata('dateUpdated', makeDate(value, self.dateformat))
self.add_chapter(chapter,'https://'+self.getSiteDomain()+chapter['href']) self.add_chapter(chapter,'https://'+self.getSiteDomain()+chapter['href'])
@ -134,11 +134,11 @@ class BDSMLibraryComSiteAdapter(BaseSiteAdapter):
# Get the MetaData # Get the MetaData
# Erotia Tags # Erotia Tags
tags = soup.findAll('a',href=re.compile(r'/stories/search.php\?selectedcode')) tags = soup.find_all('a',href=re.compile(r'/stories/search.php\?selectedcode'))
for tag in tags: for tag in tags:
self.story.addToList('eroticatags',tag.text) self.story.addToList('eroticatags',tag.text)
for td in soup.findAll('td'): for td in soup.find_all('td'):
if len(td.text)>0: if len(td.text)>0:
if 'Added on:' in td.text and '<table' not in unicode(td): if 'Added on:' in td.text and '<table' not in unicode(td):
value = td.text.replace('Added on:','').strip() value = td.text.replace('Added on:','').strip()
@ -169,20 +169,20 @@ class BDSMLibraryComSiteAdapter(BaseSiteAdapter):
raise exceptions.FailedToDownload("Error downloading Chapter: {0}! Missing required element!".format(url)) raise exceptions.FailedToDownload("Error downloading Chapter: {0}! Missing required element!".format(url))
#strip comments from soup #strip comments from soup
[comment.extract() for comment in chaptertag.findAll(string=lambda text:isinstance(text, Comment))] [comment.extract() for comment in chaptertag.find_all(string=lambda text:isinstance(text, Comment))]
# BDSM Library basically wraps it's own html around the document, # BDSM Library basically wraps it's own html around the document,
# so we will be removing the script, title and meta content from the # so we will be removing the script, title and meta content from the
# storyblock # storyblock
for tag in chaptertag.findAll('head') + chaptertag.findAll('style') + chaptertag.findAll('title') + chaptertag.findAll('meta') + chaptertag.findAll('o:p') + chaptertag.findAll('link'): for tag in chaptertag.find_all('head') + chaptertag.find_all('style') + chaptertag.find_all('title') + chaptertag.find_all('meta') + chaptertag.find_all('o:p') + chaptertag.find_all('link'):
tag.extract() tag.extract()
for tag in chaptertag.findAll('o:smarttagtype'): for tag in chaptertag.find_all('o:smarttagtype'):
tag.name = 'span' tag.name = 'span'
## I'm going to take the attributes off all of the tags ## I'm going to take the attributes off all of the tags
## because they usually refer to the style that we removed above. ## because they usually refer to the style that we removed above.
for tag in chaptertag.findAll(True): for tag in chaptertag.find_all(True):
tag.attrs = None tag.attrs = None
return self.utf8FromSoup(url,chaptertag) return self.utf8FromSoup(url,chaptertag)

View file

@ -116,7 +116,7 @@ class ChaosSycophantHexComAdapter(BaseSiteAdapter):
self.story.setMetadata('rating', rating) self.story.setMetadata('rating', rating)
# Find the chapters: # Find the chapters:
for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+r"&chapter=\d+$")): for chapter in soup.find_all('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+r"&chapter=\d+$")):
# just in case there's tags, like <i> in chapter titles. # just in case there's tags, like <i> in chapter titles.
self.add_chapter(chapter,'http://'+self.host+'/'+chapter['href']+addurl) self.add_chapter(chapter,'http://'+self.host+'/'+chapter['href']+addurl)
@ -134,7 +134,7 @@ class ChaosSycophantHexComAdapter(BaseSiteAdapter):
# <span class="label">Rated:</span> NC-17<br /> etc # <span class="label">Rated:</span> NC-17<br /> etc
labels = soup.findAll('span',{'class':'label'}) labels = soup.find_all('span',{'class':'label'})
value = labels[0].previousSibling value = labels[0].previousSibling
svalue = "" svalue = ""
@ -154,22 +154,22 @@ class ChaosSycophantHexComAdapter(BaseSiteAdapter):
self.story.setMetadata('numWords', value.split(' -')[0]) self.story.setMetadata('numWords', value.split(' -')[0])
if 'Categories' in label: if 'Categories' in label:
cats = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=categories')) cats = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=categories'))
for cat in cats: for cat in cats:
self.story.addToList('category',cat.string) self.story.addToList('category',cat.string)
if 'Characters' in label: if 'Characters' in label:
chars = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=characters')) chars = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=characters'))
for char in chars: for char in chars:
self.story.addToList('characters',char.string) self.story.addToList('characters',char.string)
if 'Genre' in label: if 'Genre' in label:
genres = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=1')) genres = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=class&type_id=1'))
for genre in genres: for genre in genres:
self.story.addToList('genre',genre.string) self.story.addToList('genre',genre.string)
if 'Warnings' in label: if 'Warnings' in label:
warnings = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=2')) warnings = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=class&type_id=2'))
for warning in warnings: for warning in warnings:
self.story.addToList('warnings',warning.string) self.story.addToList('warnings',warning.string)
@ -194,7 +194,7 @@ class ChaosSycophantHexComAdapter(BaseSiteAdapter):
series_url = 'http://'+self.host+'/'+a['href'] series_url = 'http://'+self.host+'/'+a['href']
seriessoup = self.make_soup(self.get_request(series_url)) seriessoup = self.make_soup(self.get_request(series_url))
storyas = seriessoup.findAll('a', href=re.compile(r'^viewstory.php\?sid=\d+$')) storyas = seriessoup.find_all('a', href=re.compile(r'^viewstory.php\?sid=\d+$'))
i=1 i=1
for a in storyas: for a in storyas:
if a['href'] == ('viewstory.php?sid='+self.story.getMetadata('storyId')): if a['href'] == ('viewstory.php?sid='+self.story.getMetadata('storyId')):

View file

@ -88,8 +88,8 @@ class ChireadsComSiteAdapter(BaseSiteAdapter):
intro = stripHTML(info.select_one('.inform-inform-txt').span) intro = stripHTML(info.select_one('.inform-inform-txt').span)
self.setDescription(self.url, intro) self.setDescription(self.url, intro)
for content in soup.findAll('div', {'id': 'content'}): for content in soup.find_all('div', {'id': 'content'}):
for a in content.findAll('a'): for a in content.find_all('a'):
self.add_chapter(a.get_text(), a['href']) self.add_chapter(a.get_text(), a['href'])

View file

@ -98,7 +98,7 @@ class ChosenTwoFanFicArchiveAdapter(BaseSiteAdapter):
## Title ## Title
## Some stories have a banner that has it's own a tag before the actual text title... ## Some stories have a banner that has it's own a tag before the actual text title...
## so I'm checking the pagetitle div for all a tags that match the criteria, then taking the last. ## so I'm checking the pagetitle div for all a tags that match the criteria, then taking the last.
a = soup.find('div',{'id':'pagetitle'}).findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$"))[-1] a = soup.find('div',{'id':'pagetitle'}).find_all('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$"))[-1]
self.story.setMetadata('title',stripHTML(a)) self.story.setMetadata('title',stripHTML(a))
# Find authorid and URL from... author url. # Find authorid and URL from... author url.
@ -110,7 +110,7 @@ class ChosenTwoFanFicArchiveAdapter(BaseSiteAdapter):
self.story.setMetadata('author',a.string) self.story.setMetadata('author',a.string)
# Find the chapters: # Find the chapters:
for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+r"&chapter=\d+$")): for chapter in soup.find_all('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+r"&chapter=\d+$")):
# just in case there's tags, like <i> in chapter titles. # just in case there's tags, like <i> in chapter titles.
#self.add_chapter(chapter,'http://'+self.host+'/'+chapter['href']) #self.add_chapter(chapter,'http://'+self.host+'/'+chapter['href'])
self.add_chapter(chapter,'https://{0}/{1}{2}'.format(self.host, chapter['href'],addURL)) self.add_chapter(chapter,'https://{0}/{1}{2}'.format(self.host, chapter['href'],addURL))
@ -127,7 +127,7 @@ class ChosenTwoFanFicArchiveAdapter(BaseSiteAdapter):
return "" return ""
# <span class="label">Rated:</span> NC-17<br /> etc # <span class="label">Rated:</span> NC-17<br /> etc
labels = soup.findAll('span',{'class':'label'}) labels = soup.find_all('span',{'class':'label'})
for labelspan in labels: for labelspan in labels:
val = labelspan.nextSibling val = labelspan.nextSibling
value = unicode('') value = unicode('')
@ -149,27 +149,27 @@ class ChosenTwoFanFicArchiveAdapter(BaseSiteAdapter):
self.story.setMetadata('numWords', stripHTML(value)) self.story.setMetadata('numWords', stripHTML(value))
if 'Categories' in label: if 'Categories' in label:
cats = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=categories')) cats = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=categories'))
for cat in cats: for cat in cats:
self.story.addToList('category',cat.string) self.story.addToList('category',cat.string)
if 'Characters' in label: if 'Characters' in label:
chars = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=characters')) chars = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=characters'))
for char in chars: for char in chars:
self.story.addToList('characters',char.string) self.story.addToList('characters',char.string)
if 'Genre' in label: if 'Genre' in label:
genres = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=1')) # XXX genres = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=class&type_id=1')) # XXX
for genre in genres: for genre in genres:
self.story.addToList('genre',genre.string) self.story.addToList('genre',genre.string)
if 'Pairing' in label: if 'Pairing' in label:
ships = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=4')) ships = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=class&type_id=4'))
for ship in ships: for ship in ships:
self.story.addToList('ships',ship.string) self.story.addToList('ships',ship.string)
if 'Warnings' in label: if 'Warnings' in label:
warnings = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=2')) # XXX warnings = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=class&type_id=2')) # XXX
for warning in warnings: for warning in warnings:
self.story.addToList('warnings',warning.string) self.story.addToList('warnings',warning.string)
@ -196,7 +196,7 @@ class ChosenTwoFanFicArchiveAdapter(BaseSiteAdapter):
seriessoup = self.make_soup(self.get_request(series_url)) seriessoup = self.make_soup(self.get_request(series_url))
# can't use ^viewstory...$ in case of higher rated stories with javascript href. # can't use ^viewstory...$ in case of higher rated stories with javascript href.
storyas = seriessoup.findAll('a', href=re.compile(r'viewstory.php\?sid=\d+')) storyas = seriessoup.find_all('a', href=re.compile(r'viewstory.php\?sid=\d+'))
i=1 i=1
for a in storyas: for a in storyas:
# this site has several links to each story. # this site has several links to each story.

View file

@ -95,7 +95,7 @@ class DokugaComAdapter(BaseSiteAdapter):
params['Submit'] = 'Submit' params['Submit'] = 'Submit'
# copy all hidden input tags to pick up appropriate tokens. # copy all hidden input tags to pick up appropriate tokens.
for tag in soup.findAll('input',{'type':'hidden'}): for tag in soup.find_all('input',{'type':'hidden'}):
params[tag['name']] = tag['value'] params[tag['name']] = tag['value']
loginUrl = 'http://' + self.getSiteDomain() + '/fanfiction' loginUrl = 'http://' + self.getSiteDomain() + '/fanfiction'
@ -153,7 +153,7 @@ class DokugaComAdapter(BaseSiteAdapter):
self.story.setMetadata('title',stripHTML(a)) self.story.setMetadata('title',stripHTML(a))
# Find the chapters: # Find the chapters:
chapters = soup.find('select').findAll('option') chapters = soup.find('select').find_all('option')
if len(chapters)==1: if len(chapters)==1:
self.add_chapter(self.story.getMetadata('title'),'http://'+self.host+'/'+self.section+'/story/'+self.story.getMetadata('storyId')+'/1') self.add_chapter(self.story.getMetadata('title'),'http://'+self.host+'/'+self.section+'/story/'+self.story.getMetadata('storyId')+'/1')
else: else:
@ -168,7 +168,7 @@ class DokugaComAdapter(BaseSiteAdapter):
asoup=asoup.find('div', {'id' : 'cb_tabid_52'}).find('div') asoup=asoup.find('div', {'id' : 'cb_tabid_52'}).find('div')
#grab the rest of the metadata from the author's page #grab the rest of the metadata from the author's page
for div in asoup.findAll('div'): for div in asoup.find_all('div'):
nav=div.find('a', href=re.compile(r'/fanfiction/story/'+self.story.getMetadata('storyId')+"/1$")) nav=div.find('a', href=re.compile(r'/fanfiction/story/'+self.story.getMetadata('storyId')+"/1$"))
if nav != None: if nav != None:
break break
@ -208,7 +208,7 @@ class DokugaComAdapter(BaseSiteAdapter):
else: else:
asoup=asoup.find('div', {'id' : 'maincol'}).find('div', {'class' : 'padding'}) asoup=asoup.find('div', {'id' : 'maincol'}).find('div', {'class' : 'padding'})
for div in asoup.findAll('div'): for div in asoup.find_all('div'):
nav=div.find('a', href=re.compile(r'/spark/story/'+self.story.getMetadata('storyId')+"/1$")) nav=div.find('a', href=re.compile(r'/spark/story/'+self.story.getMetadata('storyId')+"/1$"))
if nav != None: if nav != None:
break break

View file

@ -161,7 +161,7 @@ class DracoAndGinnyComAdapter(BaseSiteAdapter):
self.story.setMetadata('author',a.string) self.story.setMetadata('author',a.string)
# Find the chapters: # Find the chapters:
for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+r"&chapter=\d+$")): for chapter in soup.find_all('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+r"&chapter=\d+$")):
# just in case there's tags, like <i> in chapter titles. # just in case there's tags, like <i> in chapter titles.
self.add_chapter(chapter,'http://'+self.host+'/'+chapter['href']+addurl) self.add_chapter(chapter,'http://'+self.host+'/'+chapter['href']+addurl)
@ -181,13 +181,13 @@ class DracoAndGinnyComAdapter(BaseSiteAdapter):
self.setDescription(url,content.find('blockquote')) self.setDescription(url,content.find('blockquote'))
for genre in content.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=1')): for genre in content.find_all('a',href=re.compile(r'browse.php\?type=class&type_id=1')):
self.story.addToList('genre',genre.string) self.story.addToList('genre',genre.string)
for warning in content.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=2')): for warning in content.find_all('a',href=re.compile(r'browse.php\?type=class&type_id=2')):
self.story.addToList('warnings',warning.string) self.story.addToList('warnings',warning.string)
labels = content.findAll('b') labels = content.find_all('b')
for labelspan in labels: for labelspan in labels:
value = labelspan.nextSibling value = labelspan.nextSibling
@ -208,22 +208,22 @@ class DracoAndGinnyComAdapter(BaseSiteAdapter):
self.story.setMetadata('rating', value) self.story.setMetadata('rating', value)
if 'Categories' in label: if 'Categories' in label:
cats = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=categories')) cats = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=categories'))
for cat in cats: for cat in cats:
self.story.addToList('category',cat.string) self.story.addToList('category',cat.string)
if 'Characters' in label: if 'Characters' in label:
chars = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=characters')) chars = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=characters'))
for char in chars: for char in chars:
self.story.addToList('characters',char.string) self.story.addToList('characters',char.string)
if 'Genre' in label: if 'Genre' in label:
genres = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=1')) genres = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=class&type_id=1'))
for genre in genres: for genre in genres:
self.story.addToList('genre',genre.string) self.story.addToList('genre',genre.string)
if 'Warnings' in label: if 'Warnings' in label:
warnings = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=2')) warnings = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=class&type_id=2'))
for warning in warnings: for warning in warnings:
self.story.addToList('warnings',warning.string) self.story.addToList('warnings',warning.string)
@ -247,7 +247,7 @@ class DracoAndGinnyComAdapter(BaseSiteAdapter):
seriessoup = self.make_soup(self.get_request(series_url)) seriessoup = self.make_soup(self.get_request(series_url))
# can't use ^viewstory...$ in case of higher rated stories with javascript href. # can't use ^viewstory...$ in case of higher rated stories with javascript href.
storyas = seriessoup.findAll('a', href=re.compile(r'viewstory.php\?sid=\d+')) storyas = seriessoup.find_all('a', href=re.compile(r'viewstory.php\?sid=\d+'))
i=1 i=1
for a in storyas: for a in storyas:
# skip 'report this' and 'TOC' links # skip 'report this' and 'TOC' links

View file

@ -138,7 +138,7 @@ class EFPFanFicNet(BaseSiteAdapter):
# no selector found, so it's a one-chapter story. # no selector found, so it's a one-chapter story.
self.add_chapter(self.story.getMetadata('title'),url) self.add_chapter(self.story.getMetadata('title'),url)
else: else:
allOptions = select.findAll('option', {'value' : re.compile(r'viewstory')}) allOptions = select.find_all('option', {'value' : re.compile(r'viewstory')})
for o in allOptions: for o in allOptions:
url = u'https://%s/%s' % ( self.getSiteDomain(), url = u'https://%s/%s' % ( self.getSiteDomain(),
o['value']) o['value'])
@ -170,14 +170,14 @@ class EFPFanFicNet(BaseSiteAdapter):
if authsoup != None: if authsoup != None:
# last author link with offset should be the 'next' link. # last author link with offset should be the 'next' link.
authurl = u'https://%s/%s' % ( self.getSiteDomain(), authurl = u'https://%s/%s' % ( self.getSiteDomain(),
authsoup.findAll('a',href=re.compile(r'viewuser\.php\?uid=\d+&catid=&offset='))[-1]['href'] ) authsoup.find_all('a',href=re.compile(r'viewuser\.php\?uid=\d+&catid=&offset='))[-1]['href'] )
# Need author page for most of the metadata. # Need author page for most of the metadata.
logger.debug("fetching author page: (%s)"%authurl) logger.debug("fetching author page: (%s)"%authurl)
authsoup = self.make_soup(self.get_request(authurl)) authsoup = self.make_soup(self.get_request(authurl))
#print("authsoup:%s"%authsoup) #print("authsoup:%s"%authsoup)
storyas = authsoup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+r'&i=1$')) storyas = authsoup.find_all('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+r'&i=1$'))
for storya in storyas: for storya in storyas:
#print("======storya:%s"%storya) #print("======storya:%s"%storya)
storyblock = storya.findParent('div',{'class':'storybloc'}) storyblock = storya.findParent('div',{'class':'storybloc'})
@ -194,7 +194,7 @@ class EFPFanFicNet(BaseSiteAdapter):
# Tipo di coppia: Het | Personaggi: Akasuna no Sasori , Akatsuki, Nuovo Personaggio | Note: OOC | Avvertimenti: Tematiche delicate<br /> # Tipo di coppia: Het | Personaggi: Akasuna no Sasori , Akatsuki, Nuovo Personaggio | Note: OOC | Avvertimenti: Tematiche delicate<br />
# Categoria: <a href="categories.php?catid=1&amp;parentcatid=1">Anime & Manga</a> > <a href="categories.php?catid=108&amp;parentcatid=108">Naruto</a> | Contesto: Naruto Shippuuden | Leggi le <a href="reviews.php?sid=1331275&amp;a=">3</a> recensioni</div> # Categoria: <a href="categories.php?catid=1&amp;parentcatid=1">Anime & Manga</a> > <a href="categories.php?catid=108&amp;parentcatid=108">Naruto</a> | Contesto: Naruto Shippuuden | Leggi le <a href="reviews.php?sid=1331275&amp;a=">3</a> recensioni</div>
cats = noteblock.findAll('a',href=re.compile(r'browse.php\?type=categories')) cats = noteblock.find_all('a',href=re.compile(r'browse.php\?type=categories'))
for cat in cats: for cat in cats:
self.story.addToList('category',cat.string) self.story.addToList('category',cat.string)
@ -262,7 +262,7 @@ class EFPFanFicNet(BaseSiteAdapter):
seriessoup = self.make_soup(self.get_request(series_url)) seriessoup = self.make_soup(self.get_request(series_url))
# can't use ^viewstory...$ in case of higher rated stories with javascript href. # can't use ^viewstory...$ in case of higher rated stories with javascript href.
storyas = seriessoup.findAll('a', href=re.compile(r'viewstory.php\?sid=\d+&i=1')) storyas = seriessoup.find_all('a', href=re.compile(r'viewstory.php\?sid=\d+&i=1'))
i=1 i=1
for a in storyas: for a in storyas:
if a['href'] == ('viewstory.php?sid='+self.story.getMetadata('storyId'))+'&i=1': if a['href'] == ('viewstory.php?sid='+self.story.getMetadata('storyId'))+'&i=1':
@ -288,11 +288,11 @@ class EFPFanFicNet(BaseSiteAdapter):
raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url) raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url)
# remove any header and 'o:p' tags. # remove any header and 'o:p' tags.
for tag in div.findAll("head") + div.findAll("o:p"): for tag in div.find_all("head") + div.find_all("o:p"):
tag.extract() tag.extract()
# change any html and body tags to div. # change any html and body tags to div.
for tag in div.findAll("html") + div.findAll("body"): for tag in div.find_all("html") + div.find_all("body"):
tag.name='div' tag.name='div'
# remove extra bogus doctype. # remove extra bogus doctype.

View file

@ -126,7 +126,7 @@ class ErosnSapphoSycophantHexComAdapter(BaseSiteAdapter):
self.story.setMetadata('rating', rating) self.story.setMetadata('rating', rating)
# Find the chapters: # Find the chapters:
for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+r"&chapter=\d+$")): for chapter in soup.find_all('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+r"&chapter=\d+$")):
# just in case there's tags, like <i> in chapter titles. # just in case there's tags, like <i> in chapter titles.
self.add_chapter(chapter,'http://'+self.host+'/'+chapter['href']+addurl) self.add_chapter(chapter,'http://'+self.host+'/'+chapter['href']+addurl)
@ -144,7 +144,7 @@ class ErosnSapphoSycophantHexComAdapter(BaseSiteAdapter):
# <span class="label">Rated:</span> NC-17<br /> etc # <span class="label">Rated:</span> NC-17<br /> etc
labels = soup.findAll('span',{'class':'label'}) labels = soup.find_all('span',{'class':'label'})
value = labels[0].previousSibling value = labels[0].previousSibling
svalue = "" svalue = ""
@ -164,22 +164,22 @@ class ErosnSapphoSycophantHexComAdapter(BaseSiteAdapter):
self.story.setMetadata('numWords', value.split(' -')[0]) self.story.setMetadata('numWords', value.split(' -')[0])
if 'Categories' in label: if 'Categories' in label:
cats = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=categories')) cats = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=categories'))
for cat in cats: for cat in cats:
self.story.addToList('category',cat.string) self.story.addToList('category',cat.string)
if 'Characters' in label: if 'Characters' in label:
chars = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=characters')) chars = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=characters'))
for char in chars: for char in chars:
self.story.addToList('characters',char.string) self.story.addToList('characters',char.string)
if 'Genre' in label: if 'Genre' in label:
genres = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=1')) genres = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=class&type_id=1'))
for genre in genres: for genre in genres:
self.story.addToList('genre',genre.string) self.story.addToList('genre',genre.string)
if 'Warnings' in label: if 'Warnings' in label:
warnings = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=2')) warnings = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=class&type_id=2'))
for warning in warnings: for warning in warnings:
self.story.addToList('warnings',warning.string) self.story.addToList('warnings',warning.string)
@ -204,7 +204,7 @@ class ErosnSapphoSycophantHexComAdapter(BaseSiteAdapter):
series_url = 'http://'+self.host+'/'+a['href'] series_url = 'http://'+self.host+'/'+a['href']
seriessoup = self.make_soup(self.get_request(series_url)) seriessoup = self.make_soup(self.get_request(series_url))
storyas = seriessoup.findAll('a', href=re.compile(r'viewstory.php\?sid=\d+')) storyas = seriessoup.find_all('a', href=re.compile(r'viewstory.php\?sid=\d+'))
i=1 i=1
for a in storyas: for a in storyas:
# skip 'report this' and 'TOC' links # skip 'report this' and 'TOC' links

View file

@ -163,7 +163,7 @@ class FanficAuthorsNetAdapter(BaseSiteAdapter):
# Find the chapters: # Find the chapters:
# The published and update dates are with the chapter links... # The published and update dates are with the chapter links...
# so we have to get them from there. # so we have to get them from there.
chapters = soup.findAll('a', href=re.compile('/'+self.story.getMetadata( chapters = soup.find_all('a', href=re.compile('/'+self.story.getMetadata(
'storyId')+'/([a-zA-Z0-9_]+)/')) 'storyId')+'/([a-zA-Z0-9_]+)/'))
# Here we are getting the published date. It is the date the first chapter was "updated" # Here we are getting the published date. It is the date the first chapter was "updated"
@ -241,8 +241,8 @@ class FanficAuthorsNetAdapter(BaseSiteAdapter):
"Error downloading Chapter: '{0}'! Missing required element!".format(url)) "Error downloading Chapter: '{0}'! Missing required element!".format(url))
#Now, there are a lot of extranious tags within the story division.. so we will remove them. #Now, there are a lot of extranious tags within the story division.. so we will remove them.
for tag in story.findAll('ul',{'class':'pager'}) + story.findAll( for tag in story.find_all('ul',{'class':'pager'}) + story.find_all(
'div',{'class':'alert'}) + story.findAll('div', {'class':'btn-group'}): 'div',{'class':'alert'}) + story.find_all('div', {'class':'btn-group'}):
tag.extract() tag.extract()
return self.utf8FromSoup(url,story) return self.utf8FromSoup(url,story)

View file

@ -167,7 +167,7 @@ class FanFictionNetSiteAdapter(BaseSiteAdapter):
## the first chapter. It generates another server request and ## the first chapter. It generates another server request and
## doesn't seem to be needed lately, so now default it to off. ## doesn't seem to be needed lately, so now default it to off.
try: try:
chapcount = len(soup.find('select', { 'name' : 'chapter' } ).findAll('option')) chapcount = len(soup.find('select', { 'name' : 'chapter' } ).find_all('option'))
# get chapter part of url. # get chapter part of url.
except: except:
chapcount = 1 chapcount = 1
@ -212,7 +212,7 @@ class FanFictionNetSiteAdapter(BaseSiteAdapter):
## For 1, use the second link. ## For 1, use the second link.
## For 2, fetch the crossover page and pull the two categories from there. ## For 2, fetch the crossover page and pull the two categories from there.
pre_links = soup.find('div',{'id':'pre_story_links'}) pre_links = soup.find('div',{'id':'pre_story_links'})
categories = pre_links.findAll('a',{'class':'xcontrast_txt'}) categories = pre_links.find_all('a',{'class':'xcontrast_txt'})
#print("xcontrast_txt a:%s"%categories) #print("xcontrast_txt a:%s"%categories)
if len(categories) > 1: if len(categories) > 1:
# Strangely, the ones with *two* links are the # Strangely, the ones with *two* links are the
@ -251,7 +251,7 @@ class FanFictionNetSiteAdapter(BaseSiteAdapter):
grayspan = gui_table1i.find('span', {'class':'xgray xcontrast_txt'}) grayspan = gui_table1i.find('span', {'class':'xgray xcontrast_txt'})
# for b in grayspan.findAll('button'): # for b in grayspan.find_all('button'):
# b.extract() # b.extract()
metatext = stripHTML(grayspan).replace('Hurt/Comfort','Hurt-Comfort') metatext = stripHTML(grayspan).replace('Hurt/Comfort','Hurt-Comfort')
#logger.debug("metatext:(%s)"%metatext) #logger.debug("metatext:(%s)"%metatext)
@ -290,7 +290,7 @@ class FanFictionNetSiteAdapter(BaseSiteAdapter):
# Updated: <span data-xutime='1368059198'>5/8</span> - Published: <span data-xutime='1278984264'>7/12/2010</span> # Updated: <span data-xutime='1368059198'>5/8</span> - Published: <span data-xutime='1278984264'>7/12/2010</span>
# Published: <span data-xutime='1384358726'>8m ago</span> # Published: <span data-xutime='1384358726'>8m ago</span>
dates = soup.findAll('span',{'data-xutime':re.compile(r'^\d+$')}) dates = soup.find_all('span',{'data-xutime':re.compile(r'^\d+$')})
if len(dates) > 1 : if len(dates) > 1 :
# updated get set to the same as published upstream if not found. # updated get set to the same as published upstream if not found.
self.story.setMetadata('dateUpdated',datetime.fromtimestamp(float(dates[0]['data-xutime']))) self.story.setMetadata('dateUpdated',datetime.fromtimestamp(float(dates[0]['data-xutime'])))
@ -395,7 +395,7 @@ class FanFictionNetSiteAdapter(BaseSiteAdapter):
# no selector found, so it's a one-chapter story. # no selector found, so it's a one-chapter story.
self.add_chapter(self.story.getMetadata('title'),url) self.add_chapter(self.story.getMetadata('title'),url)
else: else:
allOptions = select.findAll('option') allOptions = select.find_all('option')
for o in allOptions: for o in allOptions:
## title URL will be put back on chapter URL during ## title URL will be put back on chapter URL during
## normalize_chapterurl() anyway, but also here for ## normalize_chapterurl() anyway, but also here for

View file

@ -134,7 +134,7 @@ class FanFiktionDeAdapter(BaseSiteAdapter):
self.story.setMetadata('author',stripHTML(a)) self.story.setMetadata('author',stripHTML(a))
# Find the chapters: # Find the chapters:
for chapter in soup.find('select').findAll('option'): for chapter in soup.find('select').find_all('option'):
self.add_chapter(chapter,'https://'+self.host+'/s/'+self.story.getMetadata('storyId')+'/'+chapter['value']) self.add_chapter(chapter,'https://'+self.host+'/s/'+self.story.getMetadata('storyId')+'/'+chapter['value'])
## title="Wörter" failed with max_zalgo:1 ## title="Wörter" failed with max_zalgo:1
@ -181,13 +181,13 @@ class FanFiktionDeAdapter(BaseSiteAdapter):
# #find metadata on the author's page # #find metadata on the author's page
# asoup = self.make_soup(self.get_request("https://"+self.getSiteDomain()+"?a=q&a1=v&t=nickdetailsstories&lbi=stories&ar=0&nick="+self.story.getMetadata('authorId'))) # asoup = self.make_soup(self.get_request("https://"+self.getSiteDomain()+"?a=q&a1=v&t=nickdetailsstories&lbi=stories&ar=0&nick="+self.story.getMetadata('authorId')))
# tr=asoup.findAll('tr') # tr=asoup.find_all('tr')
# for i in range(1,len(tr)): # for i in range(1,len(tr)):
# a = tr[i].find('a') # a = tr[i].find('a')
# if '/s/'+self.story.getMetadata('storyId')+'/1/' in a['href']: # if '/s/'+self.story.getMetadata('storyId')+'/1/' in a['href']:
# break # break
# td = tr[i].findAll('td') # td = tr[i].find_all('td')
# self.story.addToList('category',stripHTML(td[2])) # self.story.addToList('category',stripHTML(td[2]))
# self.story.setMetadata('rating', stripHTML(td[5])) # self.story.setMetadata('rating', stripHTML(td[5]))
# self.story.setMetadata('numWords', stripHTML(td[6])) # self.story.setMetadata('numWords', stripHTML(td[6]))
@ -204,7 +204,7 @@ class FanFiktionDeAdapter(BaseSiteAdapter):
soup = self.make_soup(self.get_request(url)) soup = self.make_soup(self.get_request(url))
div = soup.find('div', {'id' : 'storytext'}) div = soup.find('div', {'id' : 'storytext'})
for a in div.findAll('script'): for a in div.find_all('script'):
a.extract() a.extract()
if None == div: if None == div:

View file

@ -146,7 +146,7 @@ class FicBookNetAdapter(BaseSiteAdapter):
pubdate = None pubdate = None
chapters = soup.find('ul', {'class' : 'list-of-fanfic-parts'}) chapters = soup.find('ul', {'class' : 'list-of-fanfic-parts'})
if chapters != None: if chapters != None:
for chapdiv in chapters.findAll('li', {'class':'part'}): for chapdiv in chapters.find_all('li', {'class':'part'}):
chapter=chapdiv.find('a',href=re.compile(r'/readfic/'+self.story.getMetadata('storyId')+r"/\d+#part_content$")) chapter=chapdiv.find('a',href=re.compile(r'/readfic/'+self.story.getMetadata('storyId')+r"/\d+#part_content$"))
churl='https://'+self.host+chapter['href'] churl='https://'+self.host+chapter['href']
@ -179,7 +179,7 @@ class FicBookNetAdapter(BaseSiteAdapter):
# pr=soup.find('a', href=re.compile(r'/printfic/\w+')) # pr=soup.find('a', href=re.compile(r'/printfic/\w+'))
# pr='https://'+self.host+pr['href'] # pr='https://'+self.host+pr['href']
# pr = self.make_soup(self.get_request(pr)) # pr = self.make_soup(self.get_request(pr))
# pr=pr.findAll('div', {'class' : 'part_text'}) # pr=pr.find_all('div', {'class' : 'part_text'})
# i=0 # i=0
# for part in pr: # for part in pr:
# i=i+len(stripHTML(part).split(' ')) # i=i+len(stripHTML(part).split(' '))
@ -196,7 +196,7 @@ class FicBookNetAdapter(BaseSiteAdapter):
self.story.setMetadata('seriesUrl','https://' + self.getSiteDomain() + series_div.a.get('href')) self.story.setMetadata('seriesUrl','https://' + self.getSiteDomain() + series_div.a.get('href'))
i=0 i=0
fandoms = dlinfo.select_one('div:not([class])').findAll('a', href=re.compile(r'/fanfiction/\w+')) fandoms = dlinfo.select_one('div:not([class])').find_all('a', href=re.compile(r'/fanfiction/\w+'))
for fandom in fandoms: for fandom in fandoms:
self.story.addToList('category',fandom.string) self.story.addToList('category',fandom.string)
i=i+1 i=i+1
@ -205,13 +205,13 @@ class FicBookNetAdapter(BaseSiteAdapter):
tags = soup.find('div',{'class':'tags'}) tags = soup.find('div',{'class':'tags'})
if tags: if tags:
for genre in tags.findAll('a',href=re.compile(r'/tags/')): for genre in tags.find_all('a',href=re.compile(r'/tags/')):
self.story.addToList('genre',stripHTML(genre)) self.story.addToList('genre',stripHTML(genre))
ratingdt = dlinfo.find('div',{'class':re.compile(r'badge-rating-.*')}) ratingdt = dlinfo.find('div',{'class':re.compile(r'badge-rating-.*')})
self.story.setMetadata('rating', stripHTML(ratingdt.find('span'))) self.story.setMetadata('rating', stripHTML(ratingdt.find('span')))
# meta=table.findAll('a', href=re.compile(r'/ratings/')) # meta=table.find_all('a', href=re.compile(r'/ratings/'))
# i=0 # i=0
# for m in meta: # for m in meta:
# if i == 0: # if i == 0:

View file

@ -201,10 +201,10 @@ class FictionAlleyArchiveOrgSiteAdapter(BaseSiteAdapter):
# epubutils.py # epubutils.py
# Yes, this still applies to fictionalley-archive. # Yes, this still applies to fictionalley-archive.
for tag in chaptext.findAll('head') + chaptext.findAll('meta') + chaptext.findAll('script'): for tag in chaptext.find_all('head') + chaptext.find_all('meta') + chaptext.find_all('script'):
tag.extract() tag.extract()
for tag in chaptext.findAll('body') + chaptext.findAll('html'): for tag in chaptext.find_all('body') + chaptext.find_all('html'):
tag.name = 'div' tag.name = 'div'
if self.getConfig('include_author_notes'): if self.getConfig('include_author_notes'):

View file

@ -136,7 +136,7 @@ class FicwadComSiteAdapter(BaseSiteAdapter):
# <span class="req"><a href="/help/38" title="Medium Spoilers">[!!] </a> <a href="/help/38" title="Rape/Sexual Violence">[R] </a> <a href="/help/38" title="Violence">[V] </a> <a href="/help/38" title="Child/Underage Sex">[Y] </a></span> # <span class="req"><a href="/help/38" title="Medium Spoilers">[!!] </a> <a href="/help/38" title="Rape/Sexual Violence">[R] </a> <a href="/help/38" title="Violence">[V] </a> <a href="/help/38" title="Child/Underage Sex">[Y] </a></span>
spanreq = metap.find("span",{"class":"story-warnings"}) spanreq = metap.find("span",{"class":"story-warnings"})
if spanreq: # can be no warnings. if spanreq: # can be no warnings.
for a in spanreq.findAll("a"): for a in spanreq.find_all("a"):
self.story.addToList('warnings',a['title']) self.story.addToList('warnings',a['title'])
## perhaps not the most efficient way to parse this, using ## perhaps not the most efficient way to parse this, using
@ -186,7 +186,7 @@ class FicwadComSiteAdapter(BaseSiteAdapter):
# no list found, so it's a one-chapter story. # no list found, so it's a one-chapter story.
self.add_chapter(self.story.getMetadata('title'),url) self.add_chapter(self.story.getMetadata('title'),url)
else: else:
chapterlistlis = storylistul.findAll('li') chapterlistlis = storylistul.find_all('li')
for chapterli in chapterlistlis: for chapterli in chapterlistlis:
if "blocked" in chapterli['class']: if "blocked" in chapterli['class']:
# paranoia check. We should already be logged in by now. # paranoia check. We should already be logged in by now.

View file

@ -161,7 +161,7 @@ class ImagineEFicComAdapter(BaseSiteAdapter):
self.story.setMetadata('author',a.string) self.story.setMetadata('author',a.string)
# Find the chapters: # Find the chapters:
for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+r"&chapter=\d+$")): for chapter in soup.find_all('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+r"&chapter=\d+$")):
# just in case there's tags, like <i> in chapter titles. # just in case there's tags, like <i> in chapter titles.
self.add_chapter(chapter,'https://'+self.host+'/'+chapter['href']+addurl) self.add_chapter(chapter,'https://'+self.host+'/'+chapter['href']+addurl)
@ -178,7 +178,7 @@ class ImagineEFicComAdapter(BaseSiteAdapter):
# <span class="label">Rated:</span> NC-17<br /> etc # <span class="label">Rated:</span> NC-17<br /> etc
labels = soup.findAll('span',{'class':'label'}) labels = soup.find_all('span',{'class':'label'})
for labelspan in labels: for labelspan in labels:
value = labelspan.nextSibling value = labelspan.nextSibling
label = labelspan.string label = labelspan.string
@ -199,22 +199,22 @@ class ImagineEFicComAdapter(BaseSiteAdapter):
self.story.setMetadata('numWords', value) self.story.setMetadata('numWords', value)
if 'Categories' in label: if 'Categories' in label:
cats = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=categories')) cats = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=categories'))
for cat in cats: for cat in cats:
self.story.addToList('category',cat.string) self.story.addToList('category',cat.string)
if 'Characters' in label: if 'Characters' in label:
chars = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=characters')) chars = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=characters'))
for char in chars: for char in chars:
self.story.addToList('characters',char.string) self.story.addToList('characters',char.string)
if 'Genre' in label: if 'Genre' in label:
genres = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=1')) genres = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=class&type_id=1'))
for genre in genres: for genre in genres:
self.story.addToList('genre',genre.string) self.story.addToList('genre',genre.string)
if 'Warnings' in label: if 'Warnings' in label:
warnings = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=2')) warnings = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=class&type_id=2'))
for warning in warnings: for warning in warnings:
self.story.addToList('warnings',warning.string) self.story.addToList('warnings',warning.string)
@ -238,7 +238,7 @@ class ImagineEFicComAdapter(BaseSiteAdapter):
seriessoup = self.make_soup(self.get_request(series_url)) seriessoup = self.make_soup(self.get_request(series_url))
# can't use ^viewstory...$ in case of higher rated stories with javascript href. # can't use ^viewstory...$ in case of higher rated stories with javascript href.
storyas = seriessoup.findAll('a', href=re.compile(r'viewstory.php\?sid=\d+')) storyas = seriessoup.find_all('a', href=re.compile(r'viewstory.php\?sid=\d+'))
i=1 i=1
for a in storyas: for a in storyas:
# skip 'report this' and 'TOC' links # skip 'report this' and 'TOC' links

View file

@ -125,7 +125,7 @@ class InkBunnyNetSiteAdapter(BaseSiteAdapter):
soup = self.make_soup(self.get_request(url,usecache=False)) soup = self.make_soup(self.get_request(url,usecache=False))
# removing all of the scripts # removing all of the scripts
for tag in soup.findAll('script'): for tag in soup.find_all('script'):
tag.extract() tag.extract()

View file

@ -144,13 +144,13 @@ class KSArchiveComAdapter(BaseSiteAdapter): # XXX
# Find authorid and URL from... author urls. # Find authorid and URL from... author urls.
pagetitle = soup.find('div',id='pagetitle') pagetitle = soup.find('div',id='pagetitle')
for a in pagetitle.findAll('a', href=re.compile(r"viewuser.php\?uid=\d+")): for a in pagetitle.find_all('a', href=re.compile(r"viewuser.php\?uid=\d+")):
self.story.addToList('authorId',a['href'].split('=')[1]) self.story.addToList('authorId',a['href'].split('=')[1])
self.story.addToList('authorUrl','https://'+self.host+'/'+a['href']) self.story.addToList('authorUrl','https://'+self.host+'/'+a['href'])
self.story.addToList('author',stripHTML(a)) self.story.addToList('author',stripHTML(a))
# Find the chapters: # Find the chapters:
for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+r"&chapter=\d+$")): for chapter in soup.find_all('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+r"&chapter=\d+$")):
# just in case there's tags, like <i> in chapter titles. # just in case there's tags, like <i> in chapter titles.
self.add_chapter(chapter,'https://'+self.host+'/'+chapter['href']+addurl) self.add_chapter(chapter,'https://'+self.host+'/'+chapter['href']+addurl)
@ -166,7 +166,7 @@ class KSArchiveComAdapter(BaseSiteAdapter): # XXX
return "" return ""
# <span class="label">Rated:</span> NC-17<br /> etc # <span class="label">Rated:</span> NC-17<br /> etc
labels = soup.findAll('span',{'class':'label'}) labels = soup.find_all('span',{'class':'label'})
for labelspan in labels: for labelspan in labels:
value = labelspan.nextSibling value = labelspan.nextSibling
label = stripHTML(labelspan) label = stripHTML(labelspan)
@ -193,7 +193,7 @@ class KSArchiveComAdapter(BaseSiteAdapter): # XXX
self.story.setMetadata('numWords', value) self.story.setMetadata('numWords', value)
if 'Categories' in label: if 'Categories' in label:
cats = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=categories')) cats = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=categories'))
catstext = [stripHTML(cat) for cat in cats] catstext = [stripHTML(cat) for cat in cats]
for cat in catstext: for cat in catstext:
# ran across one story with an empty <a href="browse.php?type=categories&amp;catid=1"></a> # ran across one story with an empty <a href="browse.php?type=categories&amp;catid=1"></a>
@ -204,7 +204,7 @@ class KSArchiveComAdapter(BaseSiteAdapter): # XXX
if 'Characters' in label: if 'Characters' in label:
self.story.addToList('characters','Kirk') self.story.addToList('characters','Kirk')
self.story.addToList('characters','Spock') self.story.addToList('characters','Spock')
chars = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=characters')) chars = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=characters'))
charstext = [stripHTML(char) for char in chars] charstext = [stripHTML(char) for char in chars]
for char in charstext: for char in charstext:
self.story.addToList('characters',stripHTML(char)) self.story.addToList('characters',stripHTML(char))
@ -213,7 +213,7 @@ class KSArchiveComAdapter(BaseSiteAdapter): # XXX
## leaving it in. Check to make sure the type_id number ## leaving it in. Check to make sure the type_id number
## is correct, though--it's site specific. ## is correct, though--it's site specific.
if 'Genre' in label: if 'Genre' in label:
genres = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=1')) # XXX genres = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=class&type_id=1')) # XXX
genrestext = [stripHTML(genre) for genre in genres] genrestext = [stripHTML(genre) for genre in genres]
self.genre = ', '.join(genrestext) self.genre = ', '.join(genrestext)
for genre in genrestext: for genre in genrestext:
@ -223,7 +223,7 @@ class KSArchiveComAdapter(BaseSiteAdapter): # XXX
## has 'Story Type', which is much more what most sites ## has 'Story Type', which is much more what most sites
## call genre. ## call genre.
if 'Story Type' in label: if 'Story Type' in label:
genres = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=5')) # XXX genres = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=class&type_id=5')) # XXX
genrestext = [stripHTML(genre) for genre in genres] genrestext = [stripHTML(genre) for genre in genres]
self.genre = ', '.join(genrestext) self.genre = ', '.join(genrestext)
for genre in genrestext: for genre in genrestext:
@ -233,21 +233,21 @@ class KSArchiveComAdapter(BaseSiteAdapter): # XXX
## leaving it in. Check to make sure the type_id number ## leaving it in. Check to make sure the type_id number
## is correct, though--it's site specific. ## is correct, though--it's site specific.
if 'Warnings' in label: if 'Warnings' in label:
warnings = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=2')) # XXX warnings = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=class&type_id=2')) # XXX
warningstext = [stripHTML(warning) for warning in warnings] warningstext = [stripHTML(warning) for warning in warnings]
self.warning = ', '.join(warningstext) self.warning = ', '.join(warningstext)
for warning in warningstext: for warning in warningstext:
self.story.addToList('warnings',stripHTML(warning)) self.story.addToList('warnings',stripHTML(warning))
if 'Universe' in label: if 'Universe' in label:
universes = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=3')) # XXX universes = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=class&type_id=3')) # XXX
universestext = [stripHTML(universe) for universe in universes] universestext = [stripHTML(universe) for universe in universes]
self.universe = ', '.join(universestext) self.universe = ', '.join(universestext)
for universe in universestext: for universe in universestext:
self.story.addToList('universe',stripHTML(universe)) self.story.addToList('universe',stripHTML(universe))
if 'Crossover Fandom' in label: if 'Crossover Fandom' in label:
crossoverfandoms = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=4')) # XXX crossoverfandoms = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=class&type_id=4')) # XXX
crossoverfandomstext = [stripHTML(crossoverfandom) for crossoverfandom in crossoverfandoms] crossoverfandomstext = [stripHTML(crossoverfandom) for crossoverfandom in crossoverfandoms]
self.crossoverfandom = ', '.join(crossoverfandomstext) self.crossoverfandom = ', '.join(crossoverfandomstext)
for crossoverfandom in crossoverfandomstext: for crossoverfandom in crossoverfandomstext:
@ -274,7 +274,7 @@ class KSArchiveComAdapter(BaseSiteAdapter): # XXX
series_url = 'https://'+self.host+'/'+a['href'] series_url = 'https://'+self.host+'/'+a['href']
seriessoup = self.make_soup(self.get_request(series_url)) seriessoup = self.make_soup(self.get_request(series_url))
storyas = seriessoup.findAll('a', href=re.compile(r'viewstory.php\?sid=\d+')) storyas = seriessoup.find_all('a', href=re.compile(r'viewstory.php\?sid=\d+'))
i=1 i=1
for a in storyas: for a in storyas:
# skip 'report this' and 'TOC' links # skip 'report this' and 'TOC' links

View file

@ -236,7 +236,7 @@ class LiteroticaSiteAdapter(BaseSiteAdapter):
breadcrumbs = soup.find('div', id='BreadCrumbComponent') breadcrumbs = soup.find('div', id='BreadCrumbComponent')
if not breadcrumbs: if not breadcrumbs:
breadcrumbs = soup.select_one('ul[class^="_breadcrumbs_list_"]') breadcrumbs = soup.select_one('ul[class^="_breadcrumbs_list_"]')
self.story.addToList('category', breadcrumbs.findAll('a')[1].string) self.story.addToList('category', breadcrumbs.find_all('a')[1].string)
## one-shot chapter ## one-shot chapter
self.add_chapter(self.story.getMetadata('title'), self.url) self.add_chapter(self.story.getMetadata('title'), self.url)
@ -356,7 +356,7 @@ class LiteroticaSiteAdapter(BaseSiteAdapter):
raw_page = raw_page.replace('<div class="b-story-body-x x-r15"><div><p>','<div class="b-story-body-x x-r15"><div>') raw_page = raw_page.replace('<div class="b-story-body-x x-r15"><div><p>','<div class="b-story-body-x x-r15"><div>')
# logger.debug("\tChapter text: %s" % raw_page) # logger.debug("\tChapter text: %s" % raw_page)
page_soup = self.make_soup(raw_page) page_soup = self.make_soup(raw_page)
[comment.extract() for comment in page_soup.findAll(string=lambda text:isinstance(text, Comment))] [comment.extract() for comment in page_soup.find_all(string=lambda text:isinstance(text, Comment))]
fullhtml = "" fullhtml = ""
for aa_ht_div in page_soup.find_all('div', 'aa_ht') + page_soup.select('div[class^="_article__content_"]'): for aa_ht_div in page_soup.find_all('div', 'aa_ht') + page_soup.select('div[class^="_article__content_"]'):
if aa_ht_div.div: if aa_ht_div.div:

View file

@ -116,7 +116,7 @@ class LumosSycophantHexComAdapter(BaseSiteAdapter):
self.story.setMetadata('rating', rating) self.story.setMetadata('rating', rating)
# Find the chapters: # Find the chapters:
for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+r"&chapter=\d+$")): for chapter in soup.find_all('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+r"&chapter=\d+$")):
# just in case there's tags, like <i> in chapter titles. # just in case there's tags, like <i> in chapter titles.
self.add_chapter(chapter,'http://'+self.host+'/'+chapter['href']+addurl) self.add_chapter(chapter,'http://'+self.host+'/'+chapter['href']+addurl)
@ -134,7 +134,7 @@ class LumosSycophantHexComAdapter(BaseSiteAdapter):
# <span class="label">Rated:</span> NC-17<br /> etc # <span class="label">Rated:</span> NC-17<br /> etc
labels = soup.findAll('span',{'class':'label'}) labels = soup.find_all('span',{'class':'label'})
value = labels[0].previousSibling value = labels[0].previousSibling
svalue = "" svalue = ""
@ -154,22 +154,22 @@ class LumosSycophantHexComAdapter(BaseSiteAdapter):
self.story.setMetadata('numWords', value.split(' -')[0]) self.story.setMetadata('numWords', value.split(' -')[0])
if 'Categories' in label: if 'Categories' in label:
cats = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=categories')) cats = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=categories'))
for cat in cats: for cat in cats:
self.story.addToList('category',cat.string) self.story.addToList('category',cat.string)
if 'Characters' in label: if 'Characters' in label:
chars = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=characters')) chars = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=characters'))
for char in chars: for char in chars:
self.story.addToList('characters',char.string) self.story.addToList('characters',char.string)
if 'Genre' in label: if 'Genre' in label:
genres = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=1')) genres = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=class&type_id=1'))
for genre in genres: for genre in genres:
self.story.addToList('genre',genre.string) self.story.addToList('genre',genre.string)
if 'Warnings' in label: if 'Warnings' in label:
warnings = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=2')) warnings = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=class&type_id=2'))
for warning in warnings: for warning in warnings:
self.story.addToList('warnings',warning.string) self.story.addToList('warnings',warning.string)
@ -194,7 +194,7 @@ class LumosSycophantHexComAdapter(BaseSiteAdapter):
series_url = 'http://'+self.host+'/'+a['href'] series_url = 'http://'+self.host+'/'+a['href']
seriessoup = self.make_soup(self.get_request(series_url)) seriessoup = self.make_soup(self.get_request(series_url))
storyas = seriessoup.findAll('a', href=re.compile(r'^viewstory.php\?sid=\d+$')) storyas = seriessoup.find_all('a', href=re.compile(r'^viewstory.php\?sid=\d+$'))
i=1 i=1
for a in storyas: for a in storyas:
if a['href'] == ('viewstory.php?sid='+self.story.getMetadata('storyId')): if a['href'] == ('viewstory.php?sid='+self.story.getMetadata('storyId')):

View file

@ -678,7 +678,7 @@ class Chapter(object):
def _excludeEditorSignature(self, root): def _excludeEditorSignature(self, root):
"""Exclude editor signature from within `root' element.""" """Exclude editor signature from within `root' element."""
for stringNode in root.findAll(string=True): for stringNode in root.find_all(string=True):
if re.match(self.SIGNED_PATTERN, textNode.string): if re.match(self.SIGNED_PATTERN, textNode.string):
editorLink = textNode.findNext('a') editorLink = textNode.findNext('a')
if editorLink: if editorLink:

View file

@ -148,12 +148,12 @@ class MediaMinerOrgSiteAdapter(BaseSiteAdapter):
# category # category
# <a href="/fanfic/src.php/a/567">Ranma 1/2</a> # <a href="/fanfic/src.php/a/567">Ranma 1/2</a>
for a in soup.findAll('a',href=re.compile(r"^/fanfic/a/")): for a in soup.find_all('a',href=re.compile(r"^/fanfic/a/")):
self.story.addToList('category',a.string) self.story.addToList('category',a.string)
# genre # genre
# <a href="/fanfic/src.php/g/567">Ranma 1/2</a> # <a href="/fanfic/src.php/g/567">Ranma 1/2</a>
for a in soup.findAll('a',href=re.compile(r"^/fanfic/src.php/g/")): for a in soup.find_all('a',href=re.compile(r"^/fanfic/src.php/g/")):
self.story.addToList('genre',a.string) self.story.addToList('genre',a.string)
metasoup = soup.find("div",{"class":"post-meta"}) metasoup = soup.find("div",{"class":"post-meta"})

View file

@ -154,7 +154,7 @@ class MidnightwhispersAdapter(BaseSiteAdapter): # XXX
self.story.setMetadata('author',a.string) self.story.setMetadata('author',a.string)
# Find the chapters: # Find the chapters:
for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+r"&chapter=\d+$")): for chapter in soup.find_all('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+r"&chapter=\d+$")):
# just in case there's tags, like <i> in chapter titles. # just in case there's tags, like <i> in chapter titles.
self.add_chapter(chapter,'https://'+self.host+'/'+chapter['href']+addurl) self.add_chapter(chapter,'https://'+self.host+'/'+chapter['href']+addurl)
@ -170,7 +170,7 @@ class MidnightwhispersAdapter(BaseSiteAdapter): # XXX
return "" return ""
# <span class="label">Rated:</span> NC-17<br /> etc # <span class="label">Rated:</span> NC-17<br /> etc
labels = soup.findAll('span',{'class':'label'}) labels = soup.find_all('span',{'class':'label'})
for labelspan in labels: for labelspan in labels:
value = labelspan.nextSibling value = labelspan.nextSibling
label = labelspan.string label = labelspan.string
@ -191,13 +191,13 @@ class MidnightwhispersAdapter(BaseSiteAdapter): # XXX
self.story.setMetadata('numWords', value) self.story.setMetadata('numWords', value)
if 'Categories' in label: if 'Categories' in label:
cats = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=categories')) cats = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=categories'))
catstext = [cat.string for cat in cats] catstext = [cat.string for cat in cats]
for cat in catstext: for cat in catstext:
self.story.addToList('category',cat.string) self.story.addToList('category',cat.string)
if 'Characters' in label: if 'Characters' in label:
chars = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=characters')) chars = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=characters'))
charstext = [char.string for char in chars] charstext = [char.string for char in chars]
for char in charstext: for char in charstext:
self.story.addToList('characters',char.string) self.story.addToList('characters',char.string)
@ -206,7 +206,7 @@ class MidnightwhispersAdapter(BaseSiteAdapter): # XXX
## leaving it in. Check to make sure the type_id number ## leaving it in. Check to make sure the type_id number
## is correct, though--it's site specific. ## is correct, though--it's site specific.
if 'Genre' in label: if 'Genre' in label:
genres = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=2')) # XXX genres = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=class&type_id=2')) # XXX
genrestext = [genre.string for genre in genres] genrestext = [genre.string for genre in genres]
self.genre = ', '.join(genrestext) self.genre = ', '.join(genrestext)
for genre in genrestext: for genre in genrestext:
@ -216,7 +216,7 @@ class MidnightwhispersAdapter(BaseSiteAdapter): # XXX
## leaving it in. Check to make sure the type_id number ## leaving it in. Check to make sure the type_id number
## is correct, though--it's site specific. ## is correct, though--it's site specific.
if 'Warnings' in label: if 'Warnings' in label:
warnings = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=2')) # XXX warnings = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=class&type_id=2')) # XXX
warningstext = [warning.string for warning in warnings] warningstext = [warning.string for warning in warnings]
self.warning = ', '.join(warningstext) self.warning = ', '.join(warningstext)
for warning in warningstext: for warning in warningstext:
@ -243,7 +243,7 @@ class MidnightwhispersAdapter(BaseSiteAdapter): # XXX
series_url = 'https://'+self.host+'/'+a['href'] series_url = 'https://'+self.host+'/'+a['href']
seriessoup = self.make_soup(self.get_request(series_url)) seriessoup = self.make_soup(self.get_request(series_url))
storyas = seriessoup.findAll('a', href=re.compile(r'^viewstory.php\?sid=\d+$')) storyas = seriessoup.find_all('a', href=re.compile(r'^viewstory.php\?sid=\d+$'))
i=1 i=1
for a in storyas: for a in storyas:
# skip 'report this' and 'TOC' links # skip 'report this' and 'TOC' links

View file

@ -195,7 +195,7 @@ class LightNovelGateSiteAdapter(BaseSiteAdapter):
[a.extract() for a in story.find_all('a')] [a.extract() for a in story.find_all('a')]
# Some tags have non-standard tag name. # Some tags have non-standard tag name.
for tag in story.findAll(recursive=True): for tag in story.find_all(recursive=True):
if tag.name not in HTML_TAGS: if tag.name not in HTML_TAGS:
tag.name = 'span' tag.name = 'span'

View file

@ -137,14 +137,14 @@ class OcclumencySycophantHexComAdapter(BaseSiteAdapter):
try: try:
# in case link points somewhere other than the first chapter # in case link points somewhere other than the first chapter
a = soup.findAll('option')[1]['value'] a = soup.find_all('option')[1]['value']
self.story.setMetadata('storyId',a.split('=',)[1]) self.story.setMetadata('storyId',a.split('=',)[1])
url = 'http://'+self.host+'/'+a url = 'http://'+self.host+'/'+a
soup = self.make_soup(self.get_request(url)) soup = self.make_soup(self.get_request(url))
except: except:
pass pass
for info in asoup.findAll('table', {'class' : 'border'}): for info in asoup.find_all('table', {'class' : 'border'}):
a = info.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$")) a = info.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$"))
if a != None: if a != None:
self.story.setMetadata('title',stripHTML(a)) self.story.setMetadata('title',stripHTML(a))
@ -152,7 +152,7 @@ class OcclumencySycophantHexComAdapter(BaseSiteAdapter):
# Find the chapters: # Find the chapters:
chapters=soup.findAll('a', href=re.compile(r'viewstory.php\?sid=\d+&i=1$')) chapters=soup.find_all('a', href=re.compile(r'viewstory.php\?sid=\d+&i=1$'))
if len(chapters) == 0: if len(chapters) == 0:
self.add_chapter(self.story.getMetadata('title'),url) self.add_chapter(self.story.getMetadata('title'),url)
else: else:
@ -171,7 +171,7 @@ class OcclumencySycophantHexComAdapter(BaseSiteAdapter):
except: except:
return "" return ""
cats = info.findAll('a',href=re.compile('categories.php')) cats = info.find_all('a',href=re.compile('categories.php'))
for cat in cats: for cat in cats:
self.story.addToList('category',cat.string) self.story.addToList('category',cat.string)
@ -188,7 +188,7 @@ class OcclumencySycophantHexComAdapter(BaseSiteAdapter):
self.setDescription(url,svalue) self.setDescription(url,svalue)
# <span class="label">Rated:</span> NC-17<br /> etc # <span class="label">Rated:</span> NC-17<br /> etc
labels = info.findAll('b') labels = info.find_all('b')
for labelspan in labels: for labelspan in labels:
value = labelspan.nextSibling value = labelspan.nextSibling
label = stripHTML(labelspan) label = stripHTML(labelspan)

View file

@ -93,26 +93,26 @@ class PhoenixSongNetAdapter(BaseSiteAdapter):
chapters = soup.find('select') chapters = soup.find('select')
if chapters == None: if chapters == None:
self.add_chapter(self.story.getMetadata('title'),url) self.add_chapter(self.story.getMetadata('title'),url)
for b in soup.findAll('b'): for b in soup.find_all('b'):
if b.text == "Updated": if b.text == "Updated":
date = b.nextSibling.string.split(': ')[1].split(',') date = b.nextSibling.string.split(': ')[1].split(',')
self.story.setMetadata('datePublished', makeDate(date[0]+date[1], self.dateformat)) self.story.setMetadata('datePublished', makeDate(date[0]+date[1], self.dateformat))
self.story.setMetadata('dateUpdated', makeDate(date[0]+date[1], self.dateformat)) self.story.setMetadata('dateUpdated', makeDate(date[0]+date[1], self.dateformat))
else: else:
i = 0 i = 0
chapters = chapters.findAll('option') chapters = chapters.find_all('option')
for chapter in chapters: for chapter in chapters:
self.add_chapter(chapter,'https://'+self.host+chapter['value']) self.add_chapter(chapter,'https://'+self.host+chapter['value'])
if i == 0: if i == 0:
self.story.setMetadata('storyId',chapter['value'].split('/')[3]) self.story.setMetadata('storyId',chapter['value'].split('/')[3])
head = self.make_soup(self.get_request('https://'+self.host+chapter['value'])).findAll('b') head = self.make_soup(self.get_request('https://'+self.host+chapter['value'])).find_all('b')
for b in head: for b in head:
if b.text == "Updated": if b.text == "Updated":
date = b.nextSibling.string.split(': ')[1].split(',') date = b.nextSibling.string.split(': ')[1].split(',')
self.story.setMetadata('datePublished', makeDate(date[0]+date[1], self.dateformat)) self.story.setMetadata('datePublished', makeDate(date[0]+date[1], self.dateformat))
if i == (len(chapters)-1): if i == (len(chapters)-1):
head = self.make_soup(self.get_request('https://'+self.host+chapter['value'])).findAll('b') head = self.make_soup(self.get_request('https://'+self.host+chapter['value'])).find_all('b')
for b in head: for b in head:
if b.text == "Updated": if b.text == "Updated":
date = b.nextSibling.string.split(': ')[1].split(',') date = b.nextSibling.string.split(': ')[1].split(',')
@ -160,20 +160,20 @@ class PhoenixSongNetAdapter(BaseSiteAdapter):
soup = self.make_soup(self.get_request(url)) soup = self.make_soup(self.get_request(url))
chapter=self.make_soup('<div class="story"></div>') chapter=self.make_soup('<div class="story"></div>')
for p in soup.findAll(['p','blockquote']): for p in soup.find_all(['p','blockquote']):
if "This is for problems with the formatting or the layout of the chapter." in stripHTML(p): if "This is for problems with the formatting or the layout of the chapter." in stripHTML(p):
break break
chapter.append(p) chapter.append(p)
for a in chapter.findAll('div'): for a in chapter.find_all('div'):
a.extract() a.extract()
for a in chapter.findAll('table'): for a in chapter.find_all('table'):
a.extract() a.extract()
for a in chapter.findAll('script'): for a in chapter.find_all('script'):
a.extract() a.extract()
for a in chapter.findAll('form'): for a in chapter.find_all('form'):
a.extract() a.extract()
for a in chapter.findAll('textarea'): for a in chapter.find_all('textarea'):
a.extract() a.extract()

View file

@ -80,7 +80,7 @@ class PotionsAndSnitchesOrgSiteAdapter(BaseSiteAdapter):
self.story.setMetadata('author',a.string) self.story.setMetadata('author',a.string)
# Find the chapters: # Find the chapters:
for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+r"&chapter=\d+$")): for chapter in soup.find_all('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+r"&chapter=\d+$")):
# just in case there's tags, like <i> in chapter titles. # just in case there's tags, like <i> in chapter titles.
self.add_chapter(chapter,'http://'+self.host+'/fanfiction/'+chapter['href']) self.add_chapter(chapter,'http://'+self.host+'/fanfiction/'+chapter['href'])
@ -92,7 +92,7 @@ class PotionsAndSnitchesOrgSiteAdapter(BaseSiteAdapter):
return "" return ""
# <span class="label">Rated:</span> NC-17<br /> etc # <span class="label">Rated:</span> NC-17<br /> etc
labels = soup.findAll('span',{'class':'label'}) labels = soup.find_all('span',{'class':'label'})
for labelspan in labels: for labelspan in labels:
value = labelspan.nextSibling value = labelspan.nextSibling
label = labelspan.string label = labelspan.string
@ -116,13 +116,13 @@ class PotionsAndSnitchesOrgSiteAdapter(BaseSiteAdapter):
self.story.setMetadata('reads', value) self.story.setMetadata('reads', value)
if 'Categories' in label: if 'Categories' in label:
cats = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=categories')) cats = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=categories'))
catstext = [cat.string for cat in cats] catstext = [cat.string for cat in cats]
for cat in catstext: for cat in catstext:
self.story.addToList('category',cat.string) self.story.addToList('category',cat.string)
if 'Characters' in label: if 'Characters' in label:
chars = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=characters')) chars = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=characters'))
charstext = [char.string for char in chars] charstext = [char.string for char in chars]
for char in charstext: for char in charstext:
if "Snape and Harry (required)" in char: if "Snape and Harry (required)" in char:
@ -132,27 +132,27 @@ class PotionsAndSnitchesOrgSiteAdapter(BaseSiteAdapter):
self.story.addToList('characters',char.string) self.story.addToList('characters',char.string)
if 'Warning' in label: if 'Warning' in label:
warnings = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class')) warnings = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=class'))
for warning in warnings: for warning in warnings:
self.story.addToList('warnings',stripHTML(warning)) self.story.addToList('warnings',stripHTML(warning))
if 'Genre' in label: if 'Genre' in label:
genres = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class')) genres = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=class'))
for genre in genres: for genre in genres:
self.story.addToList('genre',stripHTML(genre)) self.story.addToList('genre',stripHTML(genre))
if 'Takes Place' in label: if 'Takes Place' in label:
takesplaces = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class')) takesplaces = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=class'))
for takesplace in takesplaces: for takesplace in takesplaces:
self.story.addToList('takesplaces',stripHTML(takesplace)) self.story.addToList('takesplaces',stripHTML(takesplace))
if 'Snape flavour' in label: if 'Snape flavour' in label:
snapeflavours = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class')) snapeflavours = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=class'))
for snapeflavour in snapeflavours: for snapeflavour in snapeflavours:
self.story.addToList('snapeflavours',stripHTML(snapeflavour)) self.story.addToList('snapeflavours',stripHTML(snapeflavour))
if 'Tags' in label: if 'Tags' in label:
sitetags = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class')) sitetags = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=class'))
for sitetag in sitetags: for sitetag in sitetags:
self.story.addToList('sitetags',stripHTML(sitetag)) self.story.addToList('sitetags',stripHTML(sitetag))
@ -176,7 +176,7 @@ class PotionsAndSnitchesOrgSiteAdapter(BaseSiteAdapter):
series_url = 'http://'+self.host+'/fanfiction/'+a['href'] series_url = 'http://'+self.host+'/fanfiction/'+a['href']
seriessoup = self.make_soup(self.get_request(series_url)) seriessoup = self.make_soup(self.get_request(series_url))
storyas = seriessoup.findAll('a', href=re.compile(r'^viewstory.php\?sid=\d+$')) storyas = seriessoup.find_all('a', href=re.compile(r'^viewstory.php\?sid=\d+$'))
i=1 i=1
for a in storyas: for a in storyas:
if a['href'] == ('viewstory.php?sid='+self.story.getMetadata('storyId')): if a['href'] == ('viewstory.php?sid='+self.story.getMetadata('storyId')):

View file

@ -121,7 +121,7 @@ class PretenderCenterComAdapter(BaseSiteAdapter):
self.story.setMetadata('author',a.string) self.story.setMetadata('author',a.string)
# Find the chapters: # Find the chapters:
for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+r"&chapter=\d+$")): for chapter in soup.find_all('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+r"&chapter=\d+$")):
# just in case there's tags, like <i> in chapter titles. # just in case there's tags, like <i> in chapter titles.
self.add_chapter(chapter,'https://'+self.host+'/missingpieces/'+chapter['href']+addurl) self.add_chapter(chapter,'https://'+self.host+'/missingpieces/'+chapter['href']+addurl)
@ -138,7 +138,7 @@ class PretenderCenterComAdapter(BaseSiteAdapter):
# <span class="label">Rated:</span> NC-17<br /> etc # <span class="label">Rated:</span> NC-17<br /> etc
labels = soup.findAll('span',{'class':'label'}) labels = soup.find_all('span',{'class':'label'})
for labelspan in labels: for labelspan in labels:
value = labelspan.nextSibling value = labelspan.nextSibling
label = labelspan.string label = labelspan.string
@ -159,22 +159,22 @@ class PretenderCenterComAdapter(BaseSiteAdapter):
self.story.setMetadata('numWords', value) self.story.setMetadata('numWords', value)
if 'Categories' in label: if 'Categories' in label:
cats = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=categories')) cats = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=categories'))
for cat in cats: for cat in cats:
self.story.addToList('category',cat.string) self.story.addToList('category',cat.string)
if 'Characters' in label: if 'Characters' in label:
chars = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=characters')) chars = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=characters'))
for char in chars: for char in chars:
self.story.addToList('characters',char.string) self.story.addToList('characters',char.string)
if 'Genre' in label: if 'Genre' in label:
genres = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=1')) # XXX genres = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=class&type_id=1')) # XXX
for genre in genres: for genre in genres:
self.story.addToList('genre',genre.string) self.story.addToList('genre',genre.string)
if 'Warnings' in label: if 'Warnings' in label:
warnings = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=2')) # XXX warnings = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=class&type_id=2')) # XXX
for warning in warnings: for warning in warnings:
self.story.addToList('warnings',warning.string) self.story.addToList('warnings',warning.string)
@ -198,7 +198,7 @@ class PretenderCenterComAdapter(BaseSiteAdapter):
seriessoup = self.make_soup(self.get_request(series_url)) seriessoup = self.make_soup(self.get_request(series_url))
# can't use ^viewstory...$ in case of higher rated stories with javascript href. # can't use ^viewstory...$ in case of higher rated stories with javascript href.
storyas = seriessoup.findAll('a', href=re.compile(r'viewstory.php\?sid=\d+')) storyas = seriessoup.find_all('a', href=re.compile(r'viewstory.php\?sid=\d+'))
i=1 i=1
for a in storyas: for a in storyas:
# skip 'report this' and 'TOC' links # skip 'report this' and 'TOC' links

View file

@ -111,7 +111,7 @@ class PsychFicComAdapter(BaseSiteAdapter):
self.story.setMetadata('author',a.string) self.story.setMetadata('author',a.string)
# Find the chapters: # Find the chapters:
for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+r"&chapter=\d+$")): for chapter in soup.find_all('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+r"&chapter=\d+$")):
# just in case there's tags, like <i> in chapter titles. # just in case there's tags, like <i> in chapter titles.
self.add_chapter(chapter,'http://'+self.host+'/'+chapter['href']+addurl) self.add_chapter(chapter,'http://'+self.host+'/'+chapter['href']+addurl)
@ -126,7 +126,7 @@ class PsychFicComAdapter(BaseSiteAdapter):
except: except:
return "" return ""
labels = soup.findAll('span',{'class':'label'}) labels = soup.find_all('span',{'class':'label'})
for labelspan in labels: for labelspan in labels:
value = labelspan.nextSibling value = labelspan.nextSibling
label = labelspan.string label = labelspan.string
@ -147,22 +147,22 @@ class PsychFicComAdapter(BaseSiteAdapter):
self.story.setMetadata('numWords', value) self.story.setMetadata('numWords', value)
if 'Categories' in label: if 'Categories' in label:
cats = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=categories')) cats = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=categories'))
for cat in cats: for cat in cats:
self.story.addToList('category',cat.string) self.story.addToList('category',cat.string)
if 'Characters' in label: if 'Characters' in label:
chars = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=characters')) chars = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=characters'))
for char in chars: for char in chars:
self.story.addToList('characters',char.string) self.story.addToList('characters',char.string)
if 'Genre' in label: if 'Genre' in label:
genres = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=1')) genres = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=class&type_id=1'))
for genre in genres: for genre in genres:
self.story.addToList('genre',genre.string) self.story.addToList('genre',genre.string)
if 'Warnings' in label: if 'Warnings' in label:
warnings = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=2')) warnings = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=class&type_id=2'))
for warning in warnings: for warning in warnings:
self.story.addToList('warnings',warning.string) self.story.addToList('warnings',warning.string)
@ -186,7 +186,7 @@ class PsychFicComAdapter(BaseSiteAdapter):
series_url = 'http://'+self.host+'/'+a['href'] series_url = 'http://'+self.host+'/'+a['href']
seriessoup = self.make_soup(self.get_request(series_url)) seriessoup = self.make_soup(self.get_request(series_url))
storyas = seriessoup.findAll('a', href=re.compile(r'^viewstory.php\?sid=\d+$')) storyas = seriessoup.find_all('a', href=re.compile(r'^viewstory.php\?sid=\d+$'))
i=1 i=1
for a in storyas: for a in storyas:
if a['href'] == ('viewstory.php?sid='+self.story.getMetadata('storyId')): if a['href'] == ('viewstory.php?sid='+self.story.getMetadata('storyId')):

View file

@ -187,7 +187,7 @@ class RoyalRoadAdapter(BaseSiteAdapter):
chapters = soup.find('table',{'id':'chapters'}).find('tbody') chapters = soup.find('table',{'id':'chapters'}).find('tbody')
tds = [tr.findAll('td') for tr in chapters.findAll('tr')] tds = [tr.find_all('td') for tr in chapters.find_all('tr')]
if not tds: if not tds:
raise exceptions.FailedToDownload( raise exceptions.FailedToDownload(

View file

@ -193,7 +193,7 @@ class SamAndJackNetAdapter(BaseSiteAdapter): # XXX
# Find authorid and URL from... author url. # Find authorid and URL from... author url.
# (fetch multiple authors) # (fetch multiple authors)
alist = soup.findAll('a', href=re.compile(r"viewuser.php\?uid=\d+")) alist = soup.find_all('a', href=re.compile(r"viewuser.php\?uid=\d+"))
for a in alist: for a in alist:
self.story.addToList('authorId',a['href'].split('=')[1]) self.story.addToList('authorId',a['href'].split('=')[1])
self.story.addToList('authorUrl','http://'+self.host+'/fanfics/'+a['href']) self.story.addToList('authorUrl','http://'+self.host+'/fanfics/'+a['href'])
@ -201,11 +201,11 @@ class SamAndJackNetAdapter(BaseSiteAdapter): # XXX
# Reviews # Reviews
reviewdata = soup.find('div', {'id' : 'sort'}) reviewdata = soup.find('div', {'id' : 'sort'})
a = reviewdata.findAll('a', href=re.compile(r'reviews.php\?type=ST&(amp;)?item='+self.story.getMetadata('storyId')+"$"))[1] # second one. a = reviewdata.find_all('a', href=re.compile(r'reviews.php\?type=ST&(amp;)?item='+self.story.getMetadata('storyId')+"$"))[1] # second one.
self.story.setMetadata('reviews',stripHTML(a)) self.story.setMetadata('reviews',stripHTML(a))
# Find the chapters: # Find the chapters:
for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+r"&chapter=\d+$")): for chapter in soup.find_all('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+r"&chapter=\d+$")):
# just in case there's tags, like <i> in chapter titles. # just in case there's tags, like <i> in chapter titles.
self.add_chapter(chapter,'http://'+self.host+'/fanfics/'+chapter['href']+addurl) self.add_chapter(chapter,'http://'+self.host+'/fanfics/'+chapter['href']+addurl)
@ -222,7 +222,7 @@ class SamAndJackNetAdapter(BaseSiteAdapter): # XXX
# <span class="label">Rated:</span> NC-17<br /> etc # <span class="label">Rated:</span> NC-17<br /> etc
labels = soup.findAll('span',{'class':'label'}) labels = soup.find_all('span',{'class':'label'})
for labelspan in labels: for labelspan in labels:
value = labelspan.nextSibling value = labelspan.nextSibling
label = labelspan.string label = labelspan.string
@ -237,13 +237,13 @@ class SamAndJackNetAdapter(BaseSiteAdapter): # XXX
self.story.setMetadata('numWords', value) self.story.setMetadata('numWords', value)
if 'Categories' in label: if 'Categories' in label:
cats = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=categories')) cats = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=categories'))
catstext = [cat.string for cat in cats] catstext = [cat.string for cat in cats]
for cat in catstext: for cat in catstext:
self.story.addToList('category',cat.string) self.story.addToList('category',cat.string)
if 'Characters' in label: if 'Characters' in label:
chars = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=characters')) chars = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=characters'))
charstext = [char.string for char in chars] charstext = [char.string for char in chars]
for char in charstext: for char in charstext:
self.story.addToList('characters',char.string) self.story.addToList('characters',char.string)
@ -252,7 +252,7 @@ class SamAndJackNetAdapter(BaseSiteAdapter): # XXX
## leaving it in. Check to make sure the type_id number ## leaving it in. Check to make sure the type_id number
## is correct, though--it's site specific. ## is correct, though--it's site specific.
if 'Genre' in label: if 'Genre' in label:
genres = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=1')) # XXX genres = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=class&type_id=1')) # XXX
genrestext = [genre.string for genre in genres] genrestext = [genre.string for genre in genres]
self.genre = ', '.join(genrestext) self.genre = ', '.join(genrestext)
for genre in genrestext: for genre in genrestext:
@ -262,7 +262,7 @@ class SamAndJackNetAdapter(BaseSiteAdapter): # XXX
## leaving it in. Check to make sure the type_id number ## leaving it in. Check to make sure the type_id number
## is correct, though--it's site specific. ## is correct, though--it's site specific.
if 'Warnings' in label: if 'Warnings' in label:
warnings = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=2')) # XXX warnings = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=class&type_id=2')) # XXX
warningstext = [warning.string for warning in warnings] warningstext = [warning.string for warning in warnings]
self.warning = ', '.join(warningstext) self.warning = ', '.join(warningstext)
for warning in warningstext: for warning in warningstext:
@ -291,7 +291,7 @@ class SamAndJackNetAdapter(BaseSiteAdapter): # XXX
series_url = 'http://'+self.host+'/fanfics/'+a['href'] series_url = 'http://'+self.host+'/fanfics/'+a['href']
seriessoup = self.make_soup(self.get_request(series_url)) seriessoup = self.make_soup(self.get_request(series_url))
storyas = seriessoup.findAll('a', href=re.compile(r'^viewstory.php\?sid=\d+$')) storyas = seriessoup.find_all('a', href=re.compile(r'^viewstory.php\?sid=\d+$'))
i=1 i=1
for a in storyas: for a in storyas:
if a['href'] == ('viewstory.php?sid='+self.story.getMetadata('storyId')): if a['href'] == ('viewstory.php?sid='+self.story.getMetadata('storyId')):

View file

@ -240,13 +240,13 @@ class ScribbleHubComAdapter(BaseSiteAdapter): # XXX
# Categories # Categories
if soup.find('span',{'class': 'wi_fic_showtags_inner'}): if soup.find('span',{'class': 'wi_fic_showtags_inner'}):
categories = soup.find('span',{'class': 'wi_fic_showtags_inner'}).findAll('a') categories = soup.find('span',{'class': 'wi_fic_showtags_inner'}).find_all('a')
for category in categories: for category in categories:
self.story.addToList('category', stripHTML(category)) self.story.addToList('category', stripHTML(category))
# Genres # Genres
if soup.find('a',{'class': 'fic_genre'}): if soup.find('a',{'class': 'fic_genre'}):
genres = soup.findAll('a',{'class': 'fic_genre'}) genres = soup.find_all('a',{'class': 'fic_genre'})
for genre in genres: for genre in genres:
self.story.addToList('genre', stripHTML(genre)) self.story.addToList('genre', stripHTML(genre))
@ -258,7 +258,7 @@ class ScribbleHubComAdapter(BaseSiteAdapter): # XXX
# Content Warnings # Content Warnings
if soup.find('ul',{'class': 'ul_rate_expand'}): if soup.find('ul',{'class': 'ul_rate_expand'}):
warnings = soup.find('ul',{'class': 'ul_rate_expand'}).findAll('a') warnings = soup.find('ul',{'class': 'ul_rate_expand'}).find_all('a')
for warn in warnings: for warn in warnings:
self.story.addToList('warnings', stripHTML(warn)) self.story.addToList('warnings', stripHTML(warn))
@ -312,7 +312,7 @@ class ScribbleHubComAdapter(BaseSiteAdapter): # XXX
self.story.setMetadata(metadata, stripHTML(row.find('td'))) self.story.setMetadata(metadata, stripHTML(row.find('td')))
if soup.find('table',{'class': 'table_pro_overview'}): if soup.find('table',{'class': 'table_pro_overview'}):
stats_table = soup.find('table',{'class': 'table_pro_overview'}).findAll('tr') stats_table = soup.find('table',{'class': 'table_pro_overview'}).find_all('tr')
for row in stats_table: for row in stats_table:
find_stats_data("Total Views (All)", row, "views") find_stats_data("Total Views (All)", row, "views")
find_stats_data("Word Count", row, "numWords") find_stats_data("Word Count", row, "numWords")

View file

@ -171,7 +171,7 @@ class SheppardWeirComAdapter(BaseSiteAdapter): # XXX
# Find authorid and URL from... author url. # Find authorid and URL from... author url.
# (fetch multiple authors) # (fetch multiple authors)
alist = soup.findAll('a', href=re.compile(r"viewuser.php\?uid=\d+")) alist = soup.find_all('a', href=re.compile(r"viewuser.php\?uid=\d+"))
for a in alist: for a in alist:
self.story.addToList('authorId',a['href'].split('=')[1]) self.story.addToList('authorId',a['href'].split('=')[1])
self.story.addToList('authorUrl','https://'+self.host+'/fanfics/'+a['href']) self.story.addToList('authorUrl','https://'+self.host+'/fanfics/'+a['href'])
@ -180,12 +180,12 @@ class SheppardWeirComAdapter(BaseSiteAdapter): # XXX
# Reviews # Reviews
reviewdata = soup.find('div', {'id' : 'sort'}) reviewdata = soup.find('div', {'id' : 'sort'})
a = reviewdata.findAll('a', href=re.compile(r'reviews.php\?type=ST&(amp;)?item='+self.story.getMetadata('storyId')+"$"))[1] # second one. a = reviewdata.find_all('a', href=re.compile(r'reviews.php\?type=ST&(amp;)?item='+self.story.getMetadata('storyId')+"$"))[1] # second one.
self.story.setMetadata('reviews',stripHTML(a)) self.story.setMetadata('reviews',stripHTML(a))
# Find the chapters: # Find the chapters:
for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+r"&chapter=\d+$")): for chapter in soup.find_all('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+r"&chapter=\d+$")):
# just in case there's tags, like <i> in chapter titles. # just in case there's tags, like <i> in chapter titles.
self.add_chapter(chapter,'https://'+self.host+'/fanfics/'+chapter['href']+addurl) self.add_chapter(chapter,'https://'+self.host+'/fanfics/'+chapter['href']+addurl)
@ -208,7 +208,7 @@ class SheppardWeirComAdapter(BaseSiteAdapter): # XXX
self.setDescription(url,self.make_soup(summarydata)) self.setDescription(url,self.make_soup(summarydata))
# <span class="label">Rated:</span> NC-17<br /> etc # <span class="label">Rated:</span> NC-17<br /> etc
labels = soup.findAll('span',{'class':'label'}) labels = soup.find_all('span',{'class':'label'})
for labelspan in labels: for labelspan in labels:
value = labelspan.nextSibling value = labelspan.nextSibling
label = labelspan.string label = labelspan.string
@ -220,13 +220,13 @@ class SheppardWeirComAdapter(BaseSiteAdapter): # XXX
self.story.setMetadata('numWords', value) self.story.setMetadata('numWords', value)
if 'Categories' in label: if 'Categories' in label:
cats = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=categories')) cats = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=categories'))
catstext = [cat.string for cat in cats] catstext = [cat.string for cat in cats]
for cat in catstext: for cat in catstext:
self.story.addToList('category',cat.string) self.story.addToList('category',cat.string)
if 'Characters' in label: if 'Characters' in label:
chars = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=characters')) chars = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=characters'))
charstext = [char.string for char in chars] charstext = [char.string for char in chars]
for char in charstext: for char in charstext:
self.story.addToList('characters',char.string) self.story.addToList('characters',char.string)
@ -235,7 +235,7 @@ class SheppardWeirComAdapter(BaseSiteAdapter): # XXX
## leaving it in. Check to make sure the type_id number ## leaving it in. Check to make sure the type_id number
## is correct, though--it's site specific. ## is correct, though--it's site specific.
if 'Genre' in label: if 'Genre' in label:
genres = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=1')) # XXX genres = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=class&type_id=1')) # XXX
genrestext = [genre.string for genre in genres] genrestext = [genre.string for genre in genres]
self.genre = ', '.join(genrestext) self.genre = ', '.join(genrestext)
for genre in genrestext: for genre in genrestext:
@ -245,7 +245,7 @@ class SheppardWeirComAdapter(BaseSiteAdapter): # XXX
## leaving it in. Check to make sure the type_id number ## leaving it in. Check to make sure the type_id number
## is correct, though--it's site specific. ## is correct, though--it's site specific.
if 'Warnings' in label: if 'Warnings' in label:
warnings = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=2')) # XXX warnings = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=class&type_id=2')) # XXX
warningstext = [warning.string for warning in warnings] warningstext = [warning.string for warning in warnings]
self.warning = ', '.join(warningstext) self.warning = ', '.join(warningstext)
for warning in warningstext: for warning in warningstext:
@ -273,7 +273,7 @@ class SheppardWeirComAdapter(BaseSiteAdapter): # XXX
series_url = 'https://'+self.host+'/fanfics/'+a['href'] series_url = 'https://'+self.host+'/fanfics/'+a['href']
seriessoup = self.make_soup(self.get_request(series_url)) seriessoup = self.make_soup(self.get_request(series_url))
storyas = seriessoup.findAll('a', href=re.compile(r'^viewstory.php\?sid=\d+$')) storyas = seriessoup.find_all('a', href=re.compile(r'^viewstory.php\?sid=\d+$'))
i=1 i=1
for a in storyas: for a in storyas:
if a['href'] == ('viewstory.php?sid='+self.story.getMetadata('storyId')): if a['href'] == ('viewstory.php?sid='+self.story.getMetadata('storyId')):

View file

@ -109,7 +109,7 @@ class SiyeCoUkAdapter(BaseSiteAdapter): # XXX
self.story.setMetadata('title',stripHTML(titlea)) self.story.setMetadata('title',stripHTML(titlea))
# Find the chapters (from soup, not authsoup): # Find the chapters (from soup, not authsoup):
for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+r"&chapter=\d+$")): for chapter in soup.find_all('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+r"&chapter=\d+$")):
# just in case there's tags, like <i> in chapter titles. # just in case there's tags, like <i> in chapter titles.
self.add_chapter(chapter,'https://'+self.host+'/siye/'+chapter['href']) self.add_chapter(chapter,'https://'+self.host+'/siye/'+chapter['href'])
@ -121,7 +121,7 @@ class SiyeCoUkAdapter(BaseSiteAdapter): # XXX
metatable = soup.find('table',{'width':'95%'}) metatable = soup.find('table',{'width':'95%'})
# Categories # Categories
cat_as = metatable.findAll('a', href=re.compile(r'categories.php')) cat_as = metatable.find_all('a', href=re.compile(r'categories.php'))
for cat_a in cat_as: for cat_a in cat_as:
self.story.addToList('category',stripHTML(cat_a)) self.story.addToList('category',stripHTML(cat_a))
@ -209,7 +209,7 @@ class SiyeCoUkAdapter(BaseSiteAdapter): # XXX
series_url = 'https://'+self.host+'/'+a['href'] series_url = 'https://'+self.host+'/'+a['href']
seriessoup = self.make_soup(self.get_request(series_url)) seriessoup = self.make_soup(self.get_request(series_url))
storyas = seriessoup.findAll('a', href=re.compile(r'^viewstory.php\?sid=\d+$')) storyas = seriessoup.find_all('a', href=re.compile(r'^viewstory.php\?sid=\d+$'))
i=1 i=1
for a in storyas: for a in storyas:
if a['href'] == ('viewstory.php?sid='+self.story.getMetadata('storyId')): if a['href'] == ('viewstory.php?sid='+self.story.getMetadata('storyId')):

View file

@ -99,7 +99,7 @@ class SpiritFanfictionComAdapter(BaseSiteAdapter):
# Authors # Authors
# Find authorid and URL # Find authorid and URL
authors = (title.find_next('div', {'class':'left'})).findAll('span', {'class':'usuario'}) authors = (title.find_next('div', {'class':'left'})).find_all('span', {'class':'usuario'})
for author in authors: for author in authors:
self.story.addToList('authorId', author.find('a')['href'].split('/')[-1]) self.story.addToList('authorId', author.find('a')['href'].split('/')[-1])
@ -114,10 +114,10 @@ class SpiritFanfictionComAdapter(BaseSiteAdapter):
newestChapter = None newestChapter = None
self.newestChapterNum = None # save for comparing during update. self.newestChapterNum = None # save for comparing during update.
# Find the chapters: # Find the chapters:
chapters = soup.findAll('table', {'class':'listagemCapitulos espacamentoTop'}) chapters = soup.find_all('table', {'class':'listagemCapitulos espacamentoTop'})
for chapter in chapters: for chapter in chapters:
for row in chapter.findAll('tr', {'class': 'listagem-textoBg1'}): # Find each row with chapter info for row in chapter.find_all('tr', {'class': 'listagem-textoBg1'}): # Find each row with chapter info
a = row.find('a') # Chapter link a = row.find('a') # Chapter link
# Datetime # Datetime

View file

@ -93,7 +93,7 @@ class StoriesOfArdaComAdapter(BaseSiteAdapter):
self.story.setMetadata('title',stripHTML(a)) self.story.setMetadata('title',stripHTML(a))
# Find the chapters: chapterview.asp?sid=7000&cid=30919 # Find the chapters: chapterview.asp?sid=7000&cid=30919
chapters=soup.findAll('a', href=re.compile(r'chapterview.asp\?sid='+self.story.getMetadata('storyId')+r"&cid=\d+$")) chapters=soup.find_all('a', href=re.compile(r'chapterview.asp\?sid='+self.story.getMetadata('storyId')+r"&cid=\d+$"))
if len(chapters)==1: if len(chapters)==1:
self.add_chapter(self.story.getMetadata('title'),'http://'+self.host+'/'+chapters[0]['href']) self.add_chapter(self.story.getMetadata('title'),'http://'+self.host+'/'+chapters[0]['href'])
else: else:
@ -109,14 +109,14 @@ class StoriesOfArdaComAdapter(BaseSiteAdapter):
# no convenient way to get word count # no convenient way to get word count
for td in asoup.findAll('td', {'colspan' : '3'}): for td in asoup.find_all('td', {'colspan' : '3'}):
if td.find('a', href=re.compile(r'chapterlistview.asp\?SID='+self.story.getMetadata('storyId'))) != None: if td.find('a', href=re.compile(r'chapterlistview.asp\?SID='+self.story.getMetadata('storyId'))) != None:
break break
td=td.nextSibling.nextSibling td=td.nextSibling.nextSibling
self.story.setMetadata('dateUpdated', makeDate(stripHTML(td).split(': ')[1], self.dateformat)) self.story.setMetadata('dateUpdated', makeDate(stripHTML(td).split(': ')[1], self.dateformat))
try: try:
tr=td.parent.nextSibling.nextSibling.nextSibling.nextSibling tr=td.parent.nextSibling.nextSibling.nextSibling.nextSibling
td=tr.findAll('td') td=tr.find_all('td')
self.story.setMetadata('rating', td[0].string.split(': ')[1]) self.story.setMetadata('rating', td[0].string.split(': ')[1])
self.story.setMetadata('status', td[2].string.split(': ')[1]) self.story.setMetadata('status', td[2].string.split(': ')[1])
self.story.setMetadata('datePublished', makeDate(stripHTML(td[4]).split(': ')[1], self.dateformat)) self.story.setMetadata('datePublished', makeDate(stripHTML(td[4]).split(': ')[1], self.dateformat))

View file

@ -230,7 +230,7 @@ class StoriesOnlineNetAdapter(BaseSiteAdapter):
self.story.setMetadata('title',stripHTML(a)) self.story.setMetadata('title',stripHTML(a))
authfrom = soup.find('footer') authfrom = soup.find('footer')
alist = authfrom.findAll('a', {'rel' : 'author'}) alist = authfrom.find_all('a', {'rel' : 'author'})
for a in alist: for a in alist:
self.story.addToList('authorId',a['href'].split('/')[2]) self.story.addToList('authorId',a['href'].split('/')[2])
self.story.addToList('authorUrl','https://'+self.host+a['href']) self.story.addToList('authorUrl','https://'+self.host+a['href'])
@ -298,7 +298,7 @@ class StoriesOnlineNetAdapter(BaseSiteAdapter):
self.has_universes = False self.has_universes = False
title_cell = story_row.find('td', {'class' : 'lc2'}) title_cell = story_row.find('td', {'class' : 'lc2'})
for cat in title_cell.findAll('div', {'class' : 'typediv'}): for cat in title_cell.find_all('div', {'class' : 'typediv'}):
self.story.addToList('genre',cat.text) self.story.addToList('genre',cat.text)
# in lieu of word count. # in lieu of word count.
@ -382,7 +382,7 @@ class StoriesOnlineNetAdapter(BaseSiteAdapter):
universes_soup = self.make_soup(self.get_request(universe_url) ) universes_soup = self.make_soup(self.get_request(universe_url) )
# logger.debug("Universe url='{0}'".format(universe_url)) # logger.debug("Universe url='{0}'".format(universe_url))
if universes_soup: if universes_soup:
universes = universes_soup.findAll('div', {'class' : 'ser-box'}) universes = universes_soup.find_all('div', {'class' : 'ser-box'})
# logger.debug("Number of Universes: %d" % len(universes)) # logger.debug("Number of Universes: %d" % len(universes))
for universe in universes: for universe in universes:
# logger.debug("universe.find('a')={0}".format(universe.find('a'))) # logger.debug("universe.find('a')={0}".format(universe.find('a')))
@ -477,7 +477,7 @@ class StoriesOnlineNetAdapter(BaseSiteAdapter):
return value return value
def parseOtherAttributes(self, other_attribute_element): def parseOtherAttributes(self, other_attribute_element):
for b in other_attribute_element.findAll('b'): for b in other_attribute_element.find_all('b'):
#logger.debug('Getting metadata: "%s"' % b) #logger.debug('Getting metadata: "%s"' % b)
label = b.text label = b.text
if label in ['Posted:', 'Concluded:', 'Updated:']: if label in ['Posted:', 'Concluded:', 'Updated:']:
@ -576,7 +576,7 @@ class StoriesOnlineNetAdapter(BaseSiteAdapter):
if pager != None: if pager != None:
urls=pager.findAll('a') urls=pager.find_all('a')
urls=urls[:len(urls)-1] urls=urls[:len(urls)-1]
# logger.debug("pager urls:%s"%urls) # logger.debug("pager urls:%s"%urls)
pager.extract() pager.extract()
@ -630,7 +630,7 @@ class StoriesOnlineNetAdapter(BaseSiteAdapter):
# putting a 'conTag' at the *top* now, too. So this # putting a 'conTag' at the *top* now, too. So this
# was nuking every page but the first and last. Now # was nuking every page but the first and last. Now
# only if 'Continues' # only if 'Continues'
for contag in pagetag.findAll('span', {'class' : 'conTag'}): for contag in pagetag.find_all('span', {'class' : 'conTag'}):
# remove everything after continues... # remove everything after continues...
if 'Continuation' in contag.text: if 'Continuation' in contag.text:
tag = contag tag = contag
@ -659,7 +659,7 @@ class StoriesOnlineNetAdapter(BaseSiteAdapter):
# If it is a chapter, there are dates at the start for when it was posted or modified. These plus # If it is a chapter, there are dates at the start for when it was posted or modified. These plus
# everything before them can be discarded. # everything before them can be discarded.
postedDates = pagetag.findAll('div', {'class' : 'date'}) postedDates = pagetag.find_all('div', {'class' : 'date'})
# logger.debug(postedDates) # logger.debug(postedDates)
if postedDates: if postedDates:
a = postedDates[0].previousSibling a = postedDates[0].previousSibling
@ -668,7 +668,7 @@ class StoriesOnlineNetAdapter(BaseSiteAdapter):
b = a.previousSibling b = a.previousSibling
a.extract() a.extract()
a = b a = b
for a in pagetag.findAll('div', {'class' : 'date'}): for a in pagetag.find_all('div', {'class' : 'date'}):
a.extract() a.extract()
# Kill the vote form and everything after it. # Kill the vote form and everything after it.

View file

@ -61,7 +61,7 @@ class SwiOrgRuAdapter(BaseSiteAdapter):
soup = self.make_soup(data) soup = self.make_soup(data)
title = soup.find('h1') title = soup.find('h1')
for tag in title.findAll('sup'): for tag in title.find_all('sup'):
tag.extract() tag.extract()
self.story.setMetadata('title', stripHTML(title.text)) self.story.setMetadata('title', stripHTML(title.text))
@ -91,7 +91,7 @@ class SwiOrgRuAdapter(BaseSiteAdapter):
if "NC-18" in rating: if "NC-18" in rating:
raise exceptions.AdultCheckRequired(self.url) raise exceptions.AdultCheckRequired(self.url)
characters = soup.findAll('img', src=re.compile(r"/mlp-fim/img/chars/\d+.png")) characters = soup.find_all('img', src=re.compile(r"/mlp-fim/img/chars/\d+.png"))
logger.debug("numCharacters: (%s)"%str(len(characters))) logger.debug("numCharacters: (%s)"%str(len(characters)))
for x in range(0,len(characters)): for x in range(0,len(characters)):
@ -119,7 +119,7 @@ class SwiOrgRuAdapter(BaseSiteAdapter):
self.story.setMetadata('language','Russian') self.story.setMetadata('language','Russian')
chapters=chapters_table.findAll('a', href=re.compile(r'/mlp-fim/story/'+self.story.getMetadata('storyId')+r"/chapter\d+")) chapters=chapters_table.find_all('a', href=re.compile(r'/mlp-fim/story/'+self.story.getMetadata('storyId')+r"/chapter\d+"))
self.story.setMetadata('numChapters', len(chapters)) self.story.setMetadata('numChapters', len(chapters))
logger.debug("numChapters: (%s)"%str(self.story.getMetadata('numChapters'))) logger.debug("numChapters: (%s)"%str(self.story.getMetadata('numChapters')))

View file

@ -131,7 +131,7 @@ class TenhawkPresentsSiteAdapter(BaseSiteAdapter):
self.story.setMetadata('author',a.string) self.story.setMetadata('author',a.string)
# Find the chapters: # Find the chapters:
for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+r"&chapter=\d+$")): for chapter in soup.find_all('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+r"&chapter=\d+$")):
# just in case there's tags, like <i> in chapter titles. # just in case there's tags, like <i> in chapter titles.
self.add_chapter(chapter,'http://'+self.host+'/'+chapter['href']+addurl) self.add_chapter(chapter,'http://'+self.host+'/'+chapter['href']+addurl)
@ -143,7 +143,7 @@ class TenhawkPresentsSiteAdapter(BaseSiteAdapter):
return "" return ""
# <span class="label">Rated:</span> NC-17<br /> etc # <span class="label">Rated:</span> NC-17<br /> etc
labels = soup.findAll('span',{'class':'label'}) labels = soup.find_all('span',{'class':'label'})
for labelspan in labels: for labelspan in labels:
value = labelspan.nextSibling value = labelspan.nextSibling
label = labelspan.string label = labelspan.string
@ -164,19 +164,19 @@ class TenhawkPresentsSiteAdapter(BaseSiteAdapter):
self.story.setMetadata('numWords', value) self.story.setMetadata('numWords', value)
if 'Categories' in label: if 'Categories' in label:
cats = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=categories')) cats = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=categories'))
catstext = [cat.string for cat in cats] catstext = [cat.string for cat in cats]
for cat in catstext: for cat in catstext:
self.story.addToList('category',cat.string) self.story.addToList('category',cat.string)
if 'Characters' in label: if 'Characters' in label:
chars = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=characters')) chars = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=characters'))
charstext = [char.string for char in chars] charstext = [char.string for char in chars]
for char in charstext: for char in charstext:
self.story.addToList('characters',char.string) self.story.addToList('characters',char.string)
if 'Genre' in label: if 'Genre' in label:
genres = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class')) genres = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=class'))
genrestext = [genre.string for genre in genres] genrestext = [genre.string for genre in genres]
self.genre = ', '.join(genrestext) self.genre = ', '.join(genrestext)
for genre in genrestext: for genre in genrestext:
@ -203,7 +203,7 @@ class TenhawkPresentsSiteAdapter(BaseSiteAdapter):
series_url = 'http://'+self.host+'/'+a['href'] series_url = 'http://'+self.host+'/'+a['href']
seriessoup = self.make_soup(self.get_request(series_url)) seriessoup = self.make_soup(self.get_request(series_url))
storyas = seriessoup.findAll('a', href=re.compile(r'^viewstory.php\?sid=\d+$')) storyas = seriessoup.find_all('a', href=re.compile(r'^viewstory.php\?sid=\d+$'))
i=1 i=1
for a in storyas: for a in storyas:
if a['href'] == ('viewstory.php?sid='+self.story.getMetadata('storyId')): if a['href'] == ('viewstory.php?sid='+self.story.getMetadata('storyId')):

View file

@ -168,7 +168,7 @@ class TheMasqueNetAdapter(BaseSiteAdapter):
self.story.setMetadata('author',a.string) self.story.setMetadata('author',a.string)
# Find the chapters: # Find the chapters:
for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+r"&chapter=\d+$")): for chapter in soup.find_all('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+r"&chapter=\d+$")):
# just in case there's tags, like <i> in chapter titles. # just in case there's tags, like <i> in chapter titles.
self.add_chapter(chapter,'https://'+self.host + self.section + chapter['href']+addurl) self.add_chapter(chapter,'https://'+self.host + self.section + chapter['href']+addurl)
@ -186,7 +186,7 @@ class TheMasqueNetAdapter(BaseSiteAdapter):
# summary, rated, word count, categories, characters, genre, warnings, completed, published, updated, seires # summary, rated, word count, categories, characters, genre, warnings, completed, published, updated, seires
# <span class="label">Rated:</span> NC-17<br /> etc # <span class="label">Rated:</span> NC-17<br /> etc
labels = soup.findAll('span',{'class':'label'}) labels = soup.find_all('span',{'class':'label'})
for labelspan in labels: for labelspan in labels:
value = labelspan.nextSibling value = labelspan.nextSibling
label = labelspan.text label = labelspan.text
@ -207,22 +207,22 @@ class TheMasqueNetAdapter(BaseSiteAdapter):
self.story.setMetadata('numWords', value) self.story.setMetadata('numWords', value)
if 'Categories' in label: if 'Categories' in label:
cats = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=categories')) cats = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=categories'))
for cat in cats: for cat in cats:
self.story.addToList('category',cat.string) self.story.addToList('category',cat.string)
if 'Characters' in label: if 'Characters' in label:
chars = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=characters')) chars = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=characters'))
for char in chars: for char in chars:
self.story.addToList('characters',char.string) self.story.addToList('characters',char.string)
if 'Genre' in label: if 'Genre' in label:
genres = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=1')) genres = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=class&type_id=1'))
for genre in genres: for genre in genres:
self.story.addToList('genre',genre.string) self.story.addToList('genre',genre.string)
if 'Warnings' in label: if 'Warnings' in label:
warnings = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=2')) warnings = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=class&type_id=2'))
for warning in warnings: for warning in warnings:
self.story.addToList('warnings',warning.string) self.story.addToList('warnings',warning.string)

View file

@ -199,14 +199,14 @@ class TwistingTheHellmouthSiteAdapter(BaseSiteAdapter):
infodata = self.get_request(infourl) infodata = self.get_request(infourl)
infosoup = self.make_soup(infodata) infosoup = self.make_soup(infodata)
# for a in infosoup.findAll('a',href=re.compile(r"^/Author-\d+")): # for a in infosoup.find_all('a',href=re.compile(r"^/Author-\d+")):
# self.story.addToList('authorId',a['href'].split('/')[1].split('-')[1]) # self.story.addToList('authorId',a['href'].split('/')[1].split('-')[1])
# self.story.addToList('authorUrl','https://'+self.host+a['href'].replace("/Author-","/AuthorStories-")) # self.story.addToList('authorUrl','https://'+self.host+a['href'].replace("/Author-","/AuthorStories-"))
# self.story.addToList('author',stripHTML(a)) # self.story.addToList('author',stripHTML(a))
# second verticaltable is the chapter list. # second verticaltable is the chapter list.
table = infosoup.findAll('table',{'class':'verticaltable'})[1] table = infosoup.find_all('table',{'class':'verticaltable'})[1]
for a in table.findAll('a',href=re.compile(r"^/Story-"+self.story.getMetadata('storyId'))): for a in table.find_all('a',href=re.compile(r"^/Story-"+self.story.getMetadata('storyId'))):
autha = a.findNext('a',href=re.compile(r"^/Author-\d+")) autha = a.findNext('a',href=re.compile(r"^/Author-\d+"))
self.story.addToList('authorId',autha['href'].split('/')[1].split('-')[1]) self.story.addToList('authorId',autha['href'].split('/')[1].split('-')[1])
self.story.addToList('authorUrl','https://'+self.host+autha['href'].replace("/Author-","/AuthorStories-")) self.story.addToList('authorUrl','https://'+self.host+autha['href'].replace("/Author-","/AuthorStories-"))
@ -224,7 +224,7 @@ class TwistingTheHellmouthSiteAdapter(BaseSiteAdapter):
# no selector found, so it's a one-chapter story. # no selector found, so it's a one-chapter story.
self.add_chapter(self.story.getMetadata('title'),url) self.add_chapter(self.story.getMetadata('title'),url)
else: else:
allOptions = select.findAll('option') allOptions = select.find_all('option')
for o in allOptions: for o in allOptions:
url = "https://"+self.host+o['value'] url = "https://"+self.host+o['value']
# just in case there's tags, like <i> in chapter titles. # just in case there's tags, like <i> in chapter titles.
@ -237,7 +237,7 @@ class TwistingTheHellmouthSiteAdapter(BaseSiteAdapter):
BtVSNonX = False BtVSNonX = False
char=None char=None
romance=False romance=False
for cat in verticaltable.findAll('a', href=re.compile(r"^/Category-")): for cat in verticaltable.find_all('a', href=re.compile(r"^/Category-")):
# assumes only one -Centered and one Pairing: cat can ever # assumes only one -Centered and one Pairing: cat can ever
# be applied to one story. # be applied to one story.
# Seen at least once: incorrect (empty) cat link, thus "and cat.string" # Seen at least once: incorrect (empty) cat link, thus "and cat.string"
@ -265,7 +265,7 @@ class TwistingTheHellmouthSiteAdapter(BaseSiteAdapter):
if 'BtVS/AtS Non-Crossover' == cat.string: if 'BtVS/AtS Non-Crossover' == cat.string:
BtVSNonX = True BtVSNonX = True
verticaltabletds = verticaltable.findAll('td') verticaltabletds = verticaltable.find_all('td')
self.story.setMetadata('rating', verticaltabletds[2].string) self.story.setMetadata('rating', verticaltabletds[2].string)
self.story.setMetadata('numWords', verticaltabletds[4].string) self.story.setMetadata('numWords', verticaltabletds[4].string)
@ -279,7 +279,7 @@ class TwistingTheHellmouthSiteAdapter(BaseSiteAdapter):
self.story.setMetadata('datePublished',makeDate(stripHTML(verticaltabletds[8].string), self.dateformat)) self.story.setMetadata('datePublished',makeDate(stripHTML(verticaltabletds[8].string), self.dateformat))
self.story.setMetadata('dateUpdated',makeDate(stripHTML(verticaltabletds[9].string), self.dateformat)) self.story.setMetadata('dateUpdated',makeDate(stripHTML(verticaltabletds[9].string), self.dateformat))
for icon in storydiv.find('span',{'class':'storyicons'}).findAll('img'): for icon in storydiv.find('span',{'class':'storyicons'}).find_all('img'):
if( icon['title'] not in ['Non-Crossover'] ) : if( icon['title'] not in ['Non-Crossover'] ) :
self.story.addToList('genre',icon['title']) self.story.addToList('genre',icon['title'])
else: else:

View file

@ -127,7 +127,7 @@ class TwilightedNetSiteAdapter(BaseSiteAdapter):
self.story.setMetadata('author',a.string) self.story.setMetadata('author',a.string)
# Find the chapters: # Find the chapters:
for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+r"&chapter=\d+$")): for chapter in soup.find_all('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+r"&chapter=\d+$")):
# just in case there's tags, like <i> in chapter titles. # just in case there's tags, like <i> in chapter titles.
self.add_chapter(chapter,'https://'+self.host+'/'+chapter['href']) self.add_chapter(chapter,'https://'+self.host+'/'+chapter['href'])
@ -139,7 +139,7 @@ class TwilightedNetSiteAdapter(BaseSiteAdapter):
return "" return ""
# <span class="label">Rated:</span> NC-17<br /> etc # <span class="label">Rated:</span> NC-17<br /> etc
labels = soup.findAll('span',{'class':'label'}) labels = soup.find_all('span',{'class':'label'})
for labelspan in labels: for labelspan in labels:
value = labelspan.nextSibling value = labelspan.nextSibling
label = labelspan.string label = labelspan.string
@ -159,20 +159,20 @@ class TwilightedNetSiteAdapter(BaseSiteAdapter):
self.story.setMetadata('numWords', value) self.story.setMetadata('numWords', value)
if 'Categories' in label: if 'Categories' in label:
cats = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=categories')) cats = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=categories'))
catstext = [cat.string for cat in cats] catstext = [cat.string for cat in cats]
for cat in catstext: for cat in catstext:
self.story.addToList('category',cat.string) self.story.addToList('category',cat.string)
if 'Characters' in label: if 'Characters' in label:
chars = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=characters')) chars = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=characters'))
charstext = [char.string for char in chars] charstext = [char.string for char in chars]
for char in charstext: for char in charstext:
self.story.addToList('characters',char.string) self.story.addToList('characters',char.string)
## twilighted.net doesn't use genre. ## twilighted.net doesn't use genre.
# if 'Genre' in label: # if 'Genre' in label:
# genres = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class')) # genres = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=class'))
# genrestext = [genre.string for genre in genres] # genrestext = [genre.string for genre in genres]
# self.genre = ', '.join(genrestext) # self.genre = ', '.join(genrestext)
# for genre in genrestext: # for genre in genrestext:
@ -199,7 +199,7 @@ class TwilightedNetSiteAdapter(BaseSiteAdapter):
series_url = 'https://'+self.host+'/'+a['href'] series_url = 'https://'+self.host+'/'+a['href']
seriessoup = self.make_soup(self.get_request(series_url)) seriessoup = self.make_soup(self.get_request(series_url))
storyas = seriessoup.findAll('a', href=re.compile(r'^viewstory.php\?sid=\d+$')) storyas = seriessoup.find_all('a', href=re.compile(r'^viewstory.php\?sid=\d+$'))
i=1 i=1
for a in storyas: for a in storyas:
if a['href'] == ('viewstory.php?sid='+self.story.getMetadata('storyId')): if a['href'] == ('viewstory.php?sid='+self.story.getMetadata('storyId')):

View file

@ -111,7 +111,7 @@ class WalkingThePlankOrgAdapter(BaseSiteAdapter):
self.story.setMetadata('author',a.string) self.story.setMetadata('author',a.string)
# Find the chapters: # Find the chapters:
for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+r"&chapter=\d+$")): for chapter in soup.find_all('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+r"&chapter=\d+$")):
# just in case there's tags, like <i> in chapter titles. # just in case there's tags, like <i> in chapter titles.
self.add_chapter(chapter,'http://'+self.host+'/archive/'+chapter['href']+addurl) self.add_chapter(chapter,'http://'+self.host+'/archive/'+chapter['href']+addurl)
@ -126,7 +126,7 @@ class WalkingThePlankOrgAdapter(BaseSiteAdapter):
except: except:
return "" return ""
labels = soup.findAll('span',{'class':'label'}) labels = soup.find_all('span',{'class':'label'})
for labelspan in labels: for labelspan in labels:
value = labelspan.nextSibling value = labelspan.nextSibling
label = labelspan.string label = labelspan.string
@ -150,24 +150,24 @@ class WalkingThePlankOrgAdapter(BaseSiteAdapter):
self.story.setMetadata('reads', value) self.story.setMetadata('reads', value)
if 'Categories' in label: if 'Categories' in label:
cats = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=categories')) cats = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=categories'))
catstext = [cat.string for cat in cats] catstext = [cat.string for cat in cats]
for cat in catstext: for cat in catstext:
self.story.addToList('category',cat.string) self.story.addToList('category',cat.string)
if 'Characters' in label: if 'Characters' in label:
chars = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=characters')) chars = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=characters'))
charstext = [char.string for char in chars] charstext = [char.string for char in chars]
for char in charstext: for char in charstext:
self.story.addToList('characters',char.string) self.story.addToList('characters',char.string)
if 'Genre' in label: if 'Genre' in label:
genres = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=1')) genres = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=class&type_id=1'))
for genre in genres: for genre in genres:
self.story.addToList('genre',genre.string) self.story.addToList('genre',genre.string)
if 'Warnings' in label: if 'Warnings' in label:
warnings = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=2')) # XXX warnings = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=class&type_id=2')) # XXX
for warning in warnings: for warning in warnings:
self.story.addToList('warnings',warning.string) self.story.addToList('warnings',warning.string)
@ -190,7 +190,7 @@ class WalkingThePlankOrgAdapter(BaseSiteAdapter):
series_url = 'http://'+self.host+'/archive/'+a['href'] series_url = 'http://'+self.host+'/archive/'+a['href']
seriessoup = self.make_soup(self.get_request(series_url)) seriessoup = self.make_soup(self.get_request(series_url))
storyas = seriessoup.findAll('a', href=re.compile(r'^viewstory.php\?sid=\d+$')) storyas = seriessoup.find_all('a', href=re.compile(r'^viewstory.php\?sid=\d+$'))
i=1 i=1
for a in storyas: for a in storyas:
if a['href'] == ('viewstory.php?sid='+self.story.getMetadata('storyId')): if a['href'] == ('viewstory.php?sid='+self.story.getMetadata('storyId')):

View file

@ -80,7 +80,7 @@ class WhoficComSiteAdapter(BaseSiteAdapter):
# no selector found, so it's a one-chapter story. # no selector found, so it's a one-chapter story.
self.add_chapter(self.story.getMetadata('title'),url) self.add_chapter(self.story.getMetadata('title'),url)
else: else:
allOptions = select.findAll('option') allOptions = select.find_all('option')
for o in allOptions: for o in allOptions:
url = self.url + "&chapter=%s" % o['value'] url = self.url + "&chapter=%s" % o['value']
# just in case there's tags, like <i> in chapter titles. # just in case there's tags, like <i> in chapter titles.
@ -178,7 +178,7 @@ class WhoficComSiteAdapter(BaseSiteAdapter):
series_url = 'https://'+self.host+'/'+a['href'] series_url = 'https://'+self.host+'/'+a['href']
try: try:
seriessoup = self.make_soup(self.get_request(series_url)) seriessoup = self.make_soup(self.get_request(series_url))
storyas = seriessoup.findAll('a', href=re.compile(r'^viewstory.php\?sid=\d+$')) storyas = seriessoup.find_all('a', href=re.compile(r'^viewstory.php\?sid=\d+$'))
i=1 i=1
for a in storyas: for a in storyas:
if a['href'] == ('viewstory.php?sid='+self.story.getMetadata('storyId')): if a['href'] == ('viewstory.php?sid='+self.story.getMetadata('storyId')):

View file

@ -100,7 +100,7 @@ class WolverineAndRogueComAdapter(BaseSiteAdapter):
self.story.setMetadata('rating', rating) self.story.setMetadata('rating', rating)
# Find the chapters: # Find the chapters:
for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+r"&chapter=\d+$")): for chapter in soup.find_all('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+r"&chapter=\d+$")):
# just in case there's tags, like <i> in chapter titles. # just in case there's tags, like <i> in chapter titles.
self.add_chapter(chapter,'https://'+self.host+'/wrfa/'+chapter['href']) self.add_chapter(chapter,'https://'+self.host+'/wrfa/'+chapter['href'])
@ -110,7 +110,7 @@ class WolverineAndRogueComAdapter(BaseSiteAdapter):
# <span class="label">Rated:</span> NC-17<br /> etc # <span class="label">Rated:</span> NC-17<br /> etc
content=soup.find('div',{'class' : 'content'}) content=soup.find('div',{'class' : 'content'})
labels = soup.findAll('span',{'class':'label'}) labels = soup.find_all('span',{'class':'label'})
value = labels[0].previousSibling value = labels[0].previousSibling
svalue = "" svalue = ""
@ -134,22 +134,22 @@ class WolverineAndRogueComAdapter(BaseSiteAdapter):
self.story.setMetadata('numWords', value.split(' -')[0]) self.story.setMetadata('numWords', value.split(' -')[0])
if 'Categories' in label: if 'Categories' in label:
cats = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=categories')) cats = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=categories'))
for cat in cats: for cat in cats:
self.story.addToList('category',cat.string) self.story.addToList('category',cat.string)
if 'Characters' in label: if 'Characters' in label:
chars = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=characters')) chars = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=characters'))
for char in chars: for char in chars:
self.story.addToList('characters',char.string) self.story.addToList('characters',char.string)
if 'Genre' in label: if 'Genre' in label:
genres = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=1')) genres = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=class&type_id=1'))
for genre in genres: for genre in genres:
self.story.addToList('genre',genre.string) self.story.addToList('genre',genre.string)
if 'Warnings' in label: if 'Warnings' in label:
warnings = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=2')) warnings = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=class&type_id=2'))
for warning in warnings: for warning in warnings:
self.story.addToList('warnings',warning.string) self.story.addToList('warnings',warning.string)
@ -173,7 +173,7 @@ class WolverineAndRogueComAdapter(BaseSiteAdapter):
seriessoup = self.make_soup(self.get_request(series_url)) seriessoup = self.make_soup(self.get_request(series_url))
# can't use ^viewstory...$ in case of higher rated stories with javascript href. # can't use ^viewstory...$ in case of higher rated stories with javascript href.
storyas = seriessoup.findAll('a', href=re.compile(r'viewstory.php\?sid=\d+')) storyas = seriessoup.find_all('a', href=re.compile(r'viewstory.php\?sid=\d+'))
i=1 i=1
for a in storyas: for a in storyas:
# skip 'report this' and 'TOC' links # skip 'report this' and 'TOC' links

View file

@ -268,7 +268,7 @@ class WWWNovelAllComAdapter(BaseSiteAdapter):
tag.extract() tag.extract()
# Some tags have non-standard tag name. # Some tags have non-standard tag name.
for tag in story.findAll(recursive=True): for tag in story.find_all(recursive=True):
if tag.name not in HTML_TAGS: if tag.name not in HTML_TAGS:
tag.name = 'span' tag.name = 'span'

View file

@ -127,7 +127,7 @@ class WWWUtopiastoriesComAdapter(BaseSiteAdapter):
self.story.setMetadata('status', 'Completed') self.story.setMetadata('status', 'Completed')
for detail in soup.findAll('li'): for detail in soup.find_all('li'):
det = unicode(detail).replace(u"\xa0",'') det = unicode(detail).replace(u"\xa0",'')
heading = stripHTML(det).split(' - ')[0] heading = stripHTML(det).split(' - ')[0]
text = stripHTML(det).replace(heading+' - ','') text = stripHTML(det).replace(heading+' - ','')
@ -180,18 +180,18 @@ class WWWUtopiastoriesComAdapter(BaseSiteAdapter):
logger.debug('Using the html retrieved previously from: %s' % url) logger.debug('Using the html retrieved previously from: %s' % url)
story = self.html.findAll('table')[0].findAll('td')[0].find('div') story = self.html.find_all('table')[0].find_all('td')[0].find('div')
if None == story: if None == story:
raise exceptions.FailedToDownload( raise exceptions.FailedToDownload(
"Error downloading Chapter: %s! Missing required element!" % url) "Error downloading Chapter: %s! Missing required element!" % url)
## Removing the scripts, tables, links and divs from the story ## Removing the scripts, tables, links and divs from the story
for tag in (story.findAll('script') + story.findAll('table') + story.findAll('a') + for tag in (story.find_all('script') + story.find_all('table') + story.find_all('a') +
story.findAll('div')): story.find_all('div')):
tag.extract() tag.extract()
#strip comments from story #strip comments from story
[comment.extract() for comment in story.findAll(string=lambda text:isinstance(text, Comment))] [comment.extract() for comment in story.find_all(string=lambda text:isinstance(text, Comment))]
return self.utf8FromSoup(url,story) return self.utf8FromSoup(url,story)

View file

@ -803,7 +803,7 @@ class BaseSiteAdapter(Requestable):
# show up differently and doing stripHTML() also # show up differently and doing stripHTML() also
# catches <br> etc. # catches <br> etc.
soup = BeautifulSoup(unicode(soup),'html5lib') soup = BeautifulSoup(unicode(soup),'html5lib')
for t in soup.findAll(recursive=True): for t in soup.find_all(recursive=True):
for attr in self.get_attr_keys(t): for attr in self.get_attr_keys(t):
if attr not in acceptable_attributes: if attr not in acceptable_attributes:
del t[attr] ## strip all tag attributes except acceptable_attributes del t[attr] ## strip all tag attributes except acceptable_attributes

View file

@ -235,7 +235,7 @@ class BaseEfictionAdapter(BaseSiteAdapter):
soup = self.make_soup(html) soup = self.make_soup(html)
## fix all local image 'src' to absolute ## fix all local image 'src' to absolute
for img in soup.findAll("img", {"src": _REGEX_DOESNT_START_WITH_HTTP}): for img in soup.find_all("img", {"src": _REGEX_DOESNT_START_WITH_HTTP}):
# TODO handle '../../' and so on # TODO handle '../../' and so on
if img['src'].startswith('/'): if img['src'].startswith('/'):
img['src'] = img['src'][1:] img['src'] = img['src'][1:]
@ -410,13 +410,13 @@ class BaseEfictionAdapter(BaseSiteAdapter):
if pagetitleDiv.find('a') is None: if pagetitleDiv.find('a') is None:
raise exceptions.FailedToDownload("Couldn't find title and author") raise exceptions.FailedToDownload("Couldn't find title and author")
self.story.setMetadata('title', stripHTML(pagetitleDiv.find("a"))) self.story.setMetadata('title', stripHTML(pagetitleDiv.find("a")))
authorLink = pagetitleDiv.findAll("a")[1] authorLink = pagetitleDiv.find_all("a")[1]
self.story.setMetadata('author', stripHTML(authorLink)) self.story.setMetadata('author', stripHTML(authorLink))
self.story.setMetadata('authorId', re.search(r"\d+", authorLink['href']).group(0)) self.story.setMetadata('authorId', re.search(r"\d+", authorLink['href']).group(0))
self.story.setMetadata('authorUrl', self.getViewUserUrl(self.story.getMetadata('authorId'))) self.story.setMetadata('authorUrl', self.getViewUserUrl(self.story.getMetadata('authorId')))
## Parse the infobox ## Parse the infobox
labelSpans = soup.find("div", "infobox").find("div", "content").findAll("span", "label") labelSpans = soup.find("div", "infobox").find("div", "content").find_all("span", "label")
for labelSpan in labelSpans: for labelSpan in labelSpans:
valueStr = "" valueStr = ""
nextEl = labelSpan.nextSibling nextEl = labelSpan.nextSibling

View file

@ -190,10 +190,10 @@ class BaseOTWAdapter(BaseSiteAdapter):
raise exceptions.FailedToDownload('Site says: "Sorry, you don\'t have permission to access the page you were trying to reach."') raise exceptions.FailedToDownload('Site says: "Sorry, you don\'t have permission to access the page you were trying to reach."')
soup = self.make_soup(data) soup = self.make_soup(data)
for tag in soup.findAll('div',id='admin-banner'): for tag in soup.find_all('div',id='admin-banner'):
tag.extract() tag.extract()
metasoup = self.make_soup(meta) metasoup = self.make_soup(meta)
for tag in metasoup.findAll('div',id='admin-banner'): for tag in metasoup.find_all('div',id='admin-banner'):
tag.extract() tag.extract()
@ -234,7 +234,7 @@ class BaseOTWAdapter(BaseSiteAdapter):
self.story.setMetadata('restricted','Restricted') self.story.setMetadata('restricted','Restricted')
# Find authorid and URL from... author url. # Find authorid and URL from... author url.
alist = soup.findAll('a', href=re.compile(r"/users/\w+/pseuds/.+")) alist = soup.find_all('a', href=re.compile(r"/users/\w+/pseuds/.+"))
if len(alist) < 1: # ao3 allows for author 'Anonymous' with no author link. if len(alist) < 1: # ao3 allows for author 'Anonymous' with no author link.
self.story.setMetadata('author','Anonymous') self.story.setMetadata('author','Anonymous')
self.story.setMetadata('authorUrl','https://' + self.getSiteDomain() + '/') self.story.setMetadata('authorUrl','https://' + self.getSiteDomain() + '/')
@ -267,7 +267,7 @@ class BaseOTWAdapter(BaseSiteAdapter):
# change the dates of earlier ones by editing them--That WILL # change the dates of earlier ones by editing them--That WILL
# break epub update. # break epub update.
# Find the chapters: # Find the chapters:
chapters=soup.findAll('a', href=re.compile(r'/works/'+self.story.getMetadata('storyId')+r"/chapters/\d+$")) chapters=soup.find_all('a', href=re.compile(r'/works/'+self.story.getMetadata('storyId')+r"/chapters/\d+$"))
self.story.setMetadata('numChapters',len(chapters)) self.story.setMetadata('numChapters',len(chapters))
logger.debug("numChapters: (%s)"%self.story.getMetadata('numChapters')) logger.debug("numChapters: (%s)"%self.story.getMetadata('numChapters'))
if len(chapters)==1: if len(chapters)==1:
@ -300,50 +300,50 @@ class BaseOTWAdapter(BaseSiteAdapter):
a = metasoup.find('dd',{'class':"fandom tags"}) a = metasoup.find('dd',{'class':"fandom tags"})
if a != None: if a != None:
fandoms = a.findAll('a',{'class':"tag"}) fandoms = a.find_all('a',{'class':"tag"})
for fandom in fandoms: for fandom in fandoms:
self.story.addToList('fandoms',fandom.string) self.story.addToList('fandoms',fandom.string)
a = metasoup.find('dd',{'class':"warning tags"}) a = metasoup.find('dd',{'class':"warning tags"})
if a != None: if a != None:
warnings = a.findAll('a',{'class':"tag"}) warnings = a.find_all('a',{'class':"tag"})
for warning in warnings: for warning in warnings:
self.story.addToList('warnings',warning.string) self.story.addToList('warnings',warning.string)
a = metasoup.find('dd',{'class':"freeform tags"}) a = metasoup.find('dd',{'class':"freeform tags"})
if a != None: if a != None:
genres = a.findAll('a',{'class':"tag"}) genres = a.find_all('a',{'class':"tag"})
for genre in genres: for genre in genres:
self.story.addToList('freeformtags',genre.string) self.story.addToList('freeformtags',genre.string)
a = metasoup.find('dd',{'class':"category tags"}) a = metasoup.find('dd',{'class':"category tags"})
if a != None: if a != None:
genres = a.findAll('a',{'class':"tag"}) genres = a.find_all('a',{'class':"tag"})
for genre in genres: for genre in genres:
if genre != "Gen": if genre != "Gen":
self.story.addToList('ao3categories',genre.string) self.story.addToList('ao3categories',genre.string)
a = metasoup.find('dd',{'class':"character tags"}) a = metasoup.find('dd',{'class':"character tags"})
if a != None: if a != None:
chars = a.findAll('a',{'class':"tag"}) chars = a.find_all('a',{'class':"tag"})
for char in chars: for char in chars:
self.story.addToList('characters',char.string) self.story.addToList('characters',char.string)
a = metasoup.find('dd',{'class':"relationship tags"}) a = metasoup.find('dd',{'class':"relationship tags"})
if a != None: if a != None:
ships = a.findAll('a',{'class':"tag"}) ships = a.find_all('a',{'class':"tag"})
for ship in ships: for ship in ships:
self.story.addToList('ships',ship.string) self.story.addToList('ships',ship.string)
a = metasoup.find('dd',{'class':"collections"}) a = metasoup.find('dd',{'class':"collections"})
if a != None: if a != None:
collections = a.findAll('a') collections = a.find_all('a')
for collection in collections: for collection in collections:
self.story.addToList('collections',collection.string) self.story.addToList('collections',collection.string)
stats = metasoup.find('dl',{'class':'stats'}) stats = metasoup.find('dl',{'class':'stats'})
dt = stats.findAll('dt') dt = stats.find_all('dt')
dd = stats.findAll('dd') dd = stats.find_all('dd')
for x in range(0,len(dt)): for x in range(0,len(dt)):
label = dt[x].text label = dt[x].text
value = dd[x].text value = dd[x].text
@ -386,7 +386,7 @@ class BaseOTWAdapter(BaseSiteAdapter):
ddseries = metasoup.find('dd',{'class':"series"}) ddseries = metasoup.find('dd',{'class':"series"})
if ddseries: if ddseries:
for i, a in enumerate(ddseries.findAll('a', href=re.compile(r"/series/\d+"))): for i, a in enumerate(ddseries.find_all('a', href=re.compile(r"/series/\d+"))):
series_name = stripHTML(a) series_name = stripHTML(a)
series_url = 'https://'+self.host+a['href'] series_url = 'https://'+self.host+a['href']
series_index = int(stripHTML(a.previousSibling).replace(', ','').split(' ')[1]) # "Part # of" or ", Part #" series_index = int(stripHTML(a.previousSibling).replace(', ','').split(' ')[1]) # "Part # of" or ", Part #"

View file

@ -377,7 +377,7 @@ class BaseXenForo2ForumAdapter(BaseSiteAdapter):
return return
def get_forumtags(self,topsoup): def get_forumtags(self,topsoup):
return topsoup.find('div',{'class':'p-description'}).findAll('a',{'class':'tagItem'}) return topsoup.find('div',{'class':'p-description'}).find_all('a',{'class':'tagItem'})
def parse_author(self,souptag): def parse_author(self,souptag):
user = souptag.find('section',{'class':'message-user'}) user = souptag.find('section',{'class':'message-user'})

View file

@ -73,11 +73,11 @@ def get_update_data(inputio,
break break
soup = make_soup(oldcoverhtmldata.decode("utf-8")) soup = make_soup(oldcoverhtmldata.decode("utf-8"))
# first img or image tag. # first img or image tag.
imgs = soup.findAll('img') imgs = soup.find_all('img')
if imgs: if imgs:
src = get_path_part(href)+imgs[0]['src'] src = get_path_part(href)+imgs[0]['src']
else: else:
imgs = soup.findAll('image') imgs = soup.find_all('image')
if imgs: if imgs:
src=get_path_part(href)+imgs[0]['xlink:href'] src=get_path_part(href)+imgs[0]['xlink:href']
@ -128,7 +128,7 @@ def get_update_data(inputio,
# 3/OEBPS/file0005_u3.xhtml etc. # 3/OEBPS/file0005_u3.xhtml etc.
if getsoups: if getsoups:
soup = make_soup(epub.read(href).decode("utf-8")) soup = make_soup(epub.read(href).decode("utf-8"))
for img in soup.findAll('img'): for img in soup.find_all('img'):
newsrc='' newsrc=''
longdesc='' longdesc=''
## skip <img src="data:image..." ## skip <img src="data:image..."
@ -159,7 +159,7 @@ def get_update_data(inputio,
if h2: if h2:
h2.extract() h2.extract()
for skip in bodysoup.findAll(attrs={'class':'skip_on_ffdl_update'}): for skip in bodysoup.find_all(attrs={'class':'skip_on_ffdl_update'}):
skip.extract() skip.extract()
## <meta name="chapterurl" content="${url}"></meta> ## <meta name="chapterurl" content="${url}"></meta>
@ -232,7 +232,7 @@ def get_story_url_from_epub_html(inputio,_is_good_url=None):
if( item.getAttribute("media-type") == "application/xhtml+xml" ): if( item.getAttribute("media-type") == "application/xhtml+xml" ):
filehref=relpath+item.getAttribute("href") filehref=relpath+item.getAttribute("href")
soup = make_soup(epub.read(filehref).decode("utf-8")) soup = make_soup(epub.read(filehref).decode("utf-8"))
for link in soup.findAll('a',href=re.compile(r'^http.*')): for link in soup.find_all('a',href=re.compile(r'^http.*')):
ahref=link['href'] ahref=link['href']
# print("href:(%s)"%ahref) # print("href:(%s)"%ahref)
# hack for bad ficsaver ffnet URLs. # hack for bad ficsaver ffnet URLs.
@ -277,7 +277,7 @@ def get_story_url_from_zip_html(inputio,_is_good_url=None):
except UnicodeDecodeError: except UnicodeDecodeError:
# calibre converted to html zip fails with decode. # calibre converted to html zip fails with decode.
soup = make_soup(zipf.read(item)) soup = make_soup(zipf.read(item))
for link in soup.findAll('a',href=re.compile(r'^http.*')): for link in soup.find_all('a',href=re.compile(r'^http.*')):
ahref=link['href'] ahref=link['href']
# print("href:(%s)"%ahref) # print("href:(%s)"%ahref)
if _is_good_url == None or _is_good_url(ahref): if _is_good_url == None or _is_good_url(ahref):

View file

@ -71,7 +71,7 @@ def get_urls_from_html(data,url=None,configuration=None,normalize=False,foremail
# logger.debug("dbl souping") # logger.debug("dbl souping")
soup = BeautifulSoup(unicode(BeautifulSoup(data,"html5lib")),"html5lib") soup = BeautifulSoup(unicode(BeautifulSoup(data,"html5lib")),"html5lib")
for a in soup.findAll('a'): for a in soup.find_all('a'):
if a.has_attr('href'): if a.has_attr('href'):
# logger.debug("a['href']:%s"%a['href']) # logger.debug("a['href']:%s"%a['href'])
href = form_url(url,a['href']) href = form_url(url,a['href'])

View file

@ -59,9 +59,9 @@ class HtmlProcessor:
self._anchor_references = [] self._anchor_references = []
anchor_num = 0 anchor_num = 0
# anchor links # anchor links
anchorlist = self._soup.findAll('a', href=re.compile('^#')) anchorlist = self._soup.find_all('a', href=re.compile('^#'))
# treat reference tags like a tags for TOCTOP. # treat reference tags like a tags for TOCTOP.
anchorlist.extend(self._soup.findAll('reference', href=re.compile('^#'))) anchorlist.extend(self._soup.find_all('reference', href=re.compile('^#')))
for anchor in anchorlist: for anchor in anchorlist:
self._anchor_references.append((anchor_num, anchor['href'])) self._anchor_references.append((anchor_num, anchor['href']))
anchor['filepos'] = '%.10d' % anchor_num anchor['filepos'] = '%.10d' % anchor_num
@ -99,7 +99,7 @@ class HtmlProcessor:
def _FixPreTags(self): def _FixPreTags(self):
'''Replace <pre> tags with HTML-ified text.''' '''Replace <pre> tags with HTML-ified text.'''
pres = self._soup.findAll('pre') pres = self._soup.find_all('pre')
for pre in pres: for pre in pres:
pre.replaceWith(self._FixPreContents(unicode(pre.contents[0]))) pre.replaceWith(self._FixPreContents(unicode(pre.contents[0])))
@ -120,15 +120,15 @@ class HtmlProcessor:
# TODO(chatham): <link> tags to script? # TODO(chatham): <link> tags to script?
unsupported_tags = ('script', 'style') unsupported_tags = ('script', 'style')
for tag_type in unsupported_tags: for tag_type in unsupported_tags:
for element in self._soup.findAll(tag_type): for element in self._soup.find_all(tag_type):
element.extract() element.extract()
def RenameAnchors(self, prefix): def RenameAnchors(self, prefix):
'''Rename every internal anchor to have the given prefix, then '''Rename every internal anchor to have the given prefix, then
return the contents of the body tag.''' return the contents of the body tag.'''
for anchor in self._soup.findAll('a', href=re.compile('^#')): for anchor in self._soup.find_all('a', href=re.compile('^#')):
anchor['href'] = '#' + prefix + anchor['href'][1:] anchor['href'] = '#' + prefix + anchor['href'][1:]
for a in self._soup.findAll('a'): for a in self._soup.find_all('a'):
if a.get('name'): if a.get('name'):
a['name'] = prefix + a['name'] a['name'] = prefix + a['name']