Migrate to new bs4 API

Signed-off-by: Emmanuel Ferdman <emmanuelferdman@gmail.com>
This commit is contained in:
Emmanuel Ferdman 2025-05-06 13:12:18 -07:00 committed by Jim Miller
parent 3edd3c3e7b
commit aca07bbf59
57 changed files with 291 additions and 291 deletions

View file

@ -171,7 +171,7 @@ class AdultFanFictionOrgAdapter(BaseSiteAdapter):
# params['submit'] = 'Login'
# # copy all hidden input tags to pick up appropriate tokens.
# for tag in soup.findAll('input',{'type':'hidden'}):
# for tag in soup.find_all('input',{'type':'hidden'}):
# params[tag['name']] = tag['value']
# logger.debug("Will now login to URL {0} as {1} with password: {2}".format(url, params['email'],params['pass1']))
@ -218,7 +218,7 @@ class AdultFanFictionOrgAdapter(BaseSiteAdapter):
# Find the chapters:
chapters = soup.find('ul',{'class':'dropdown-content'})
for i, chapter in enumerate(chapters.findAll('a')):
for i, chapter in enumerate(chapters.find_all('a')):
self.add_chapter(chapter,self.url+'&chapter='+unicode(i+1))
@ -262,7 +262,7 @@ class AdultFanFictionOrgAdapter(BaseSiteAdapter):
pages = 0
pagination=asoup.find('ul',{'class' : 'pagination'})
if pagination:
pages = pagination.findAll('li')[-1].find('a')
pages = pagination.find_all('li')[-1].find('a')
if not pages == None:
pages = pages['href'].split('=')[-1]
else:
@ -271,7 +271,7 @@ class AdultFanFictionOrgAdapter(BaseSiteAdapter):
storya = None
##If there is only 1 page of stories, check it to get the Metadata,
if pages == 0:
a = asoup.findAll('li')
a = asoup.find_all('li')
for lc2 in a:
if lc2.find('a', href=re.compile(r'story.php\?no='+self.story.getMetadata('storyId')+"$")):
storya = lc2
@ -294,7 +294,7 @@ class AdultFanFictionOrgAdapter(BaseSiteAdapter):
# we look for the li element that has the story here
asoup = self.make_soup(adata)
a = asoup.findAll('li')
a = asoup.find_all('li')
for lc2 in a:
if lc2.find('a', href=re.compile(r'story.php\?no='+self.story.getMetadata('storyId')+"$")):
i=1

View file

@ -92,7 +92,7 @@ class ASexStoriesComAdapter(BaseSiteAdapter):
self.story.setMetadata('title', title.string)
# Author
author = soup1.find('div',{'class':'story-info'}).findAll('div',{'class':'story-info-bl'})[1].find('a')
author = soup1.find('div',{'class':'story-info'}).find_all('div',{'class':'story-info-bl'})[1].find('a')
authorurl = author['href']
self.story.setMetadata('author', author.string)
self.story.setMetadata('authorUrl', authorurl)
@ -112,7 +112,7 @@ class ASexStoriesComAdapter(BaseSiteAdapter):
### add it before the rest of the pages, if any
self.add_chapter('1', self.url)
chapterTable = soup1.find('div',{'class':'pages'}).findAll('a')
chapterTable = soup1.find('div',{'class':'pages'}).find_all('a')
if chapterTable is not None:
# Multi-chapter story
@ -124,7 +124,7 @@ class ASexStoriesComAdapter(BaseSiteAdapter):
self.add_chapter(chapterTitle, chapterUrl)
rated = soup1.find('div',{'class':'story-info'}).findAll('div',{'class':'story-info-bl5'})[0].find('img')['title'].replace('- Rate','').strip()
rated = soup1.find('div',{'class':'story-info'}).find_all('div',{'class':'story-info-bl5'})[0].find('img')['title'].replace('- Rate','').strip()
self.story.setMetadata('rating',rated)
self.story.setMetadata('dateUpdated', makeDate('01/01/2001', '%m/%d/%Y'))

View file

@ -136,14 +136,14 @@ class AshwinderSycophantHexComAdapter(BaseSiteAdapter):
try:
# in case link points somewhere other than the first chapter
a = soup.findAll('option')[1]['value']
a = soup.find_all('option')[1]['value']
self.story.setMetadata('storyId',a.split('=',)[1])
url = 'http://'+self.host+'/'+a
soup = self.make_soup(self.get_request(url))
except:
pass
for info in asoup.findAll('table', {'width' : '100%', 'bordercolor' : re.compile(r'#')}):
for info in asoup.find_all('table', {'width' : '100%', 'bordercolor' : re.compile(r'#')}):
a = info.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$"))
if a != None:
self.story.setMetadata('title',stripHTML(a))
@ -151,7 +151,7 @@ class AshwinderSycophantHexComAdapter(BaseSiteAdapter):
# Find the chapters:
chapters=soup.findAll('a', href=re.compile(r'viewstory.php\?sid=\d+&i=1$'))
chapters=soup.find_all('a', href=re.compile(r'viewstory.php\?sid=\d+&i=1$'))
if len(chapters) == 0:
self.add_chapter(self.story.getMetadata('title'),url)
else:
@ -170,7 +170,7 @@ class AshwinderSycophantHexComAdapter(BaseSiteAdapter):
except:
return ""
cats = info.findAll('a',href=re.compile('categories.php'))
cats = info.find_all('a',href=re.compile('categories.php'))
for cat in cats:
self.story.addToList('category',cat.string)
@ -188,7 +188,7 @@ class AshwinderSycophantHexComAdapter(BaseSiteAdapter):
## <td><span class="sb"><b>Published:</b> 04/08/2007</td>
## one story had <b>Updated...</b> in the description. Restrict to sub-table
labels = info.find('table').findAll('b')
labels = info.find('table').find_all('b')
for labelspan in labels:
value = labelspan.nextSibling
label = stripHTML(labelspan)

View file

@ -147,7 +147,7 @@ class AsianFanFicsComAdapter(BaseSiteAdapter):
# Find authorid and URL from... author url.
mainmeta = soup.find('footer', {'class': 'main-meta'})
alist = mainmeta.find('span', string='Author(s)')
alist = alist.parent.findAll('a', href=re.compile(r"/profile/u/[^/]+"))
alist = alist.parent.find_all('a', href=re.compile(r"/profile/u/[^/]+"))
for a in alist:
self.story.addToList('authorId',a['href'].split('/')[-1])
self.story.addToList('authorUrl','https://'+self.host+a['href'])
@ -159,10 +159,10 @@ class AsianFanFicsComAdapter(BaseSiteAdapter):
chapters=soup.find('select',{'name':'chapter-nav'})
hrefattr=None
if chapters:
chapters=chapters.findAll('option')
chapters=chapters.find_all('option')
hrefattr='value'
else: # didn't find <select name='chapter-nav', look for alternative
chapters=soup.find('div',{'class':'widget--chapters'}).findAll('a')
chapters=soup.find('div',{'class':'widget--chapters'}).find_all('a')
hrefattr='href'
for index, chapter in enumerate(chapters):
if chapter.text != 'Foreword' and 'Collapse chapters' not in chapter.text:
@ -202,7 +202,7 @@ class AsianFanFicsComAdapter(BaseSiteAdapter):
# story tags
a = mainmeta.find('span',string='Tags')
if a:
tags = a.parent.findAll('a')
tags = a.parent.find_all('a')
for tag in tags:
self.story.addToList('tags', tag.text)
@ -230,7 +230,7 @@ class AsianFanFicsComAdapter(BaseSiteAdapter):
# upvote, subs, and views
a = soup.find('div',{'class':'title-meta'})
spans = a.findAll('span', recursive=False)
spans = a.find_all('span', recursive=False)
self.story.setMetadata('upvotes', re.search(r'\(([^)]+)', spans[0].find('span').text).group(1))
self.story.setMetadata('subscribers', re.search(r'\(([^)]+)', spans[1].find('span').text).group(1))
if len(spans) > 2: # views can be private

View file

@ -126,7 +126,7 @@ class BDSMLibraryComSiteAdapter(BaseSiteAdapter):
# Find the chapters:
# The update date is with the chapter links... so we will update it here as well
for chapter in soup.findAll('a', href=re.compile(r'/stories/chapter.php\?storyid='+self.story.getMetadata('storyId')+r"&chapterid=\d+$")):
for chapter in soup.find_all('a', href=re.compile(r'/stories/chapter.php\?storyid='+self.story.getMetadata('storyId')+r"&chapterid=\d+$")):
value = chapter.findNext('td').findNext('td').string.replace('(added on','').replace(')','').strip()
self.story.setMetadata('dateUpdated', makeDate(value, self.dateformat))
self.add_chapter(chapter,'https://'+self.getSiteDomain()+chapter['href'])
@ -134,11 +134,11 @@ class BDSMLibraryComSiteAdapter(BaseSiteAdapter):
# Get the MetaData
# Erotia Tags
tags = soup.findAll('a',href=re.compile(r'/stories/search.php\?selectedcode'))
tags = soup.find_all('a',href=re.compile(r'/stories/search.php\?selectedcode'))
for tag in tags:
self.story.addToList('eroticatags',tag.text)
for td in soup.findAll('td'):
for td in soup.find_all('td'):
if len(td.text)>0:
if 'Added on:' in td.text and '<table' not in unicode(td):
value = td.text.replace('Added on:','').strip()
@ -169,20 +169,20 @@ class BDSMLibraryComSiteAdapter(BaseSiteAdapter):
raise exceptions.FailedToDownload("Error downloading Chapter: {0}! Missing required element!".format(url))
#strip comments from soup
[comment.extract() for comment in chaptertag.findAll(string=lambda text:isinstance(text, Comment))]
[comment.extract() for comment in chaptertag.find_all(string=lambda text:isinstance(text, Comment))]
# BDSM Library basically wraps it's own html around the document,
# so we will be removing the script, title and meta content from the
# storyblock
for tag in chaptertag.findAll('head') + chaptertag.findAll('style') + chaptertag.findAll('title') + chaptertag.findAll('meta') + chaptertag.findAll('o:p') + chaptertag.findAll('link'):
for tag in chaptertag.find_all('head') + chaptertag.find_all('style') + chaptertag.find_all('title') + chaptertag.find_all('meta') + chaptertag.find_all('o:p') + chaptertag.find_all('link'):
tag.extract()
for tag in chaptertag.findAll('o:smarttagtype'):
for tag in chaptertag.find_all('o:smarttagtype'):
tag.name = 'span'
## I'm going to take the attributes off all of the tags
## because they usually refer to the style that we removed above.
for tag in chaptertag.findAll(True):
for tag in chaptertag.find_all(True):
tag.attrs = None
return self.utf8FromSoup(url,chaptertag)

View file

@ -116,7 +116,7 @@ class ChaosSycophantHexComAdapter(BaseSiteAdapter):
self.story.setMetadata('rating', rating)
# Find the chapters:
for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+r"&chapter=\d+$")):
for chapter in soup.find_all('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+r"&chapter=\d+$")):
# just in case there's tags, like <i> in chapter titles.
self.add_chapter(chapter,'http://'+self.host+'/'+chapter['href']+addurl)
@ -134,7 +134,7 @@ class ChaosSycophantHexComAdapter(BaseSiteAdapter):
# <span class="label">Rated:</span> NC-17<br /> etc
labels = soup.findAll('span',{'class':'label'})
labels = soup.find_all('span',{'class':'label'})
value = labels[0].previousSibling
svalue = ""
@ -154,22 +154,22 @@ class ChaosSycophantHexComAdapter(BaseSiteAdapter):
self.story.setMetadata('numWords', value.split(' -')[0])
if 'Categories' in label:
cats = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=categories'))
cats = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=categories'))
for cat in cats:
self.story.addToList('category',cat.string)
if 'Characters' in label:
chars = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=characters'))
chars = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=characters'))
for char in chars:
self.story.addToList('characters',char.string)
if 'Genre' in label:
genres = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=1'))
genres = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=class&type_id=1'))
for genre in genres:
self.story.addToList('genre',genre.string)
if 'Warnings' in label:
warnings = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=2'))
warnings = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=class&type_id=2'))
for warning in warnings:
self.story.addToList('warnings',warning.string)
@ -194,7 +194,7 @@ class ChaosSycophantHexComAdapter(BaseSiteAdapter):
series_url = 'http://'+self.host+'/'+a['href']
seriessoup = self.make_soup(self.get_request(series_url))
storyas = seriessoup.findAll('a', href=re.compile(r'^viewstory.php\?sid=\d+$'))
storyas = seriessoup.find_all('a', href=re.compile(r'^viewstory.php\?sid=\d+$'))
i=1
for a in storyas:
if a['href'] == ('viewstory.php?sid='+self.story.getMetadata('storyId')):

View file

@ -88,8 +88,8 @@ class ChireadsComSiteAdapter(BaseSiteAdapter):
intro = stripHTML(info.select_one('.inform-inform-txt').span)
self.setDescription(self.url, intro)
for content in soup.findAll('div', {'id': 'content'}):
for a in content.findAll('a'):
for content in soup.find_all('div', {'id': 'content'}):
for a in content.find_all('a'):
self.add_chapter(a.get_text(), a['href'])

View file

@ -98,7 +98,7 @@ class ChosenTwoFanFicArchiveAdapter(BaseSiteAdapter):
## Title
## Some stories have a banner that has it's own a tag before the actual text title...
## so I'm checking the pagetitle div for all a tags that match the criteria, then taking the last.
a = soup.find('div',{'id':'pagetitle'}).findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$"))[-1]
a = soup.find('div',{'id':'pagetitle'}).find_all('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$"))[-1]
self.story.setMetadata('title',stripHTML(a))
# Find authorid and URL from... author url.
@ -110,7 +110,7 @@ class ChosenTwoFanFicArchiveAdapter(BaseSiteAdapter):
self.story.setMetadata('author',a.string)
# Find the chapters:
for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+r"&chapter=\d+$")):
for chapter in soup.find_all('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+r"&chapter=\d+$")):
# just in case there's tags, like <i> in chapter titles.
#self.add_chapter(chapter,'http://'+self.host+'/'+chapter['href'])
self.add_chapter(chapter,'https://{0}/{1}{2}'.format(self.host, chapter['href'],addURL))
@ -127,7 +127,7 @@ class ChosenTwoFanFicArchiveAdapter(BaseSiteAdapter):
return ""
# <span class="label">Rated:</span> NC-17<br /> etc
labels = soup.findAll('span',{'class':'label'})
labels = soup.find_all('span',{'class':'label'})
for labelspan in labels:
val = labelspan.nextSibling
value = unicode('')
@ -149,27 +149,27 @@ class ChosenTwoFanFicArchiveAdapter(BaseSiteAdapter):
self.story.setMetadata('numWords', stripHTML(value))
if 'Categories' in label:
cats = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=categories'))
cats = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=categories'))
for cat in cats:
self.story.addToList('category',cat.string)
if 'Characters' in label:
chars = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=characters'))
chars = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=characters'))
for char in chars:
self.story.addToList('characters',char.string)
if 'Genre' in label:
genres = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=1')) # XXX
genres = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=class&type_id=1')) # XXX
for genre in genres:
self.story.addToList('genre',genre.string)
if 'Pairing' in label:
ships = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=4'))
ships = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=class&type_id=4'))
for ship in ships:
self.story.addToList('ships',ship.string)
if 'Warnings' in label:
warnings = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=2')) # XXX
warnings = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=class&type_id=2')) # XXX
for warning in warnings:
self.story.addToList('warnings',warning.string)
@ -196,7 +196,7 @@ class ChosenTwoFanFicArchiveAdapter(BaseSiteAdapter):
seriessoup = self.make_soup(self.get_request(series_url))
# can't use ^viewstory...$ in case of higher rated stories with javascript href.
storyas = seriessoup.findAll('a', href=re.compile(r'viewstory.php\?sid=\d+'))
storyas = seriessoup.find_all('a', href=re.compile(r'viewstory.php\?sid=\d+'))
i=1
for a in storyas:
# this site has several links to each story.

View file

@ -95,7 +95,7 @@ class DokugaComAdapter(BaseSiteAdapter):
params['Submit'] = 'Submit'
# copy all hidden input tags to pick up appropriate tokens.
for tag in soup.findAll('input',{'type':'hidden'}):
for tag in soup.find_all('input',{'type':'hidden'}):
params[tag['name']] = tag['value']
loginUrl = 'http://' + self.getSiteDomain() + '/fanfiction'
@ -153,7 +153,7 @@ class DokugaComAdapter(BaseSiteAdapter):
self.story.setMetadata('title',stripHTML(a))
# Find the chapters:
chapters = soup.find('select').findAll('option')
chapters = soup.find('select').find_all('option')
if len(chapters)==1:
self.add_chapter(self.story.getMetadata('title'),'http://'+self.host+'/'+self.section+'/story/'+self.story.getMetadata('storyId')+'/1')
else:
@ -168,7 +168,7 @@ class DokugaComAdapter(BaseSiteAdapter):
asoup=asoup.find('div', {'id' : 'cb_tabid_52'}).find('div')
#grab the rest of the metadata from the author's page
for div in asoup.findAll('div'):
for div in asoup.find_all('div'):
nav=div.find('a', href=re.compile(r'/fanfiction/story/'+self.story.getMetadata('storyId')+"/1$"))
if nav != None:
break
@ -208,7 +208,7 @@ class DokugaComAdapter(BaseSiteAdapter):
else:
asoup=asoup.find('div', {'id' : 'maincol'}).find('div', {'class' : 'padding'})
for div in asoup.findAll('div'):
for div in asoup.find_all('div'):
nav=div.find('a', href=re.compile(r'/spark/story/'+self.story.getMetadata('storyId')+"/1$"))
if nav != None:
break

View file

@ -161,7 +161,7 @@ class DracoAndGinnyComAdapter(BaseSiteAdapter):
self.story.setMetadata('author',a.string)
# Find the chapters:
for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+r"&chapter=\d+$")):
for chapter in soup.find_all('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+r"&chapter=\d+$")):
# just in case there's tags, like <i> in chapter titles.
self.add_chapter(chapter,'http://'+self.host+'/'+chapter['href']+addurl)
@ -181,13 +181,13 @@ class DracoAndGinnyComAdapter(BaseSiteAdapter):
self.setDescription(url,content.find('blockquote'))
for genre in content.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=1')):
for genre in content.find_all('a',href=re.compile(r'browse.php\?type=class&type_id=1')):
self.story.addToList('genre',genre.string)
for warning in content.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=2')):
for warning in content.find_all('a',href=re.compile(r'browse.php\?type=class&type_id=2')):
self.story.addToList('warnings',warning.string)
labels = content.findAll('b')
labels = content.find_all('b')
for labelspan in labels:
value = labelspan.nextSibling
@ -208,22 +208,22 @@ class DracoAndGinnyComAdapter(BaseSiteAdapter):
self.story.setMetadata('rating', value)
if 'Categories' in label:
cats = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=categories'))
cats = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=categories'))
for cat in cats:
self.story.addToList('category',cat.string)
if 'Characters' in label:
chars = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=characters'))
chars = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=characters'))
for char in chars:
self.story.addToList('characters',char.string)
if 'Genre' in label:
genres = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=1'))
genres = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=class&type_id=1'))
for genre in genres:
self.story.addToList('genre',genre.string)
if 'Warnings' in label:
warnings = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=2'))
warnings = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=class&type_id=2'))
for warning in warnings:
self.story.addToList('warnings',warning.string)
@ -247,7 +247,7 @@ class DracoAndGinnyComAdapter(BaseSiteAdapter):
seriessoup = self.make_soup(self.get_request(series_url))
# can't use ^viewstory...$ in case of higher rated stories with javascript href.
storyas = seriessoup.findAll('a', href=re.compile(r'viewstory.php\?sid=\d+'))
storyas = seriessoup.find_all('a', href=re.compile(r'viewstory.php\?sid=\d+'))
i=1
for a in storyas:
# skip 'report this' and 'TOC' links

View file

@ -138,7 +138,7 @@ class EFPFanFicNet(BaseSiteAdapter):
# no selector found, so it's a one-chapter story.
self.add_chapter(self.story.getMetadata('title'),url)
else:
allOptions = select.findAll('option', {'value' : re.compile(r'viewstory')})
allOptions = select.find_all('option', {'value' : re.compile(r'viewstory')})
for o in allOptions:
url = u'https://%s/%s' % ( self.getSiteDomain(),
o['value'])
@ -170,14 +170,14 @@ class EFPFanFicNet(BaseSiteAdapter):
if authsoup != None:
# last author link with offset should be the 'next' link.
authurl = u'https://%s/%s' % ( self.getSiteDomain(),
authsoup.findAll('a',href=re.compile(r'viewuser\.php\?uid=\d+&catid=&offset='))[-1]['href'] )
authsoup.find_all('a',href=re.compile(r'viewuser\.php\?uid=\d+&catid=&offset='))[-1]['href'] )
# Need author page for most of the metadata.
logger.debug("fetching author page: (%s)"%authurl)
authsoup = self.make_soup(self.get_request(authurl))
#print("authsoup:%s"%authsoup)
storyas = authsoup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+r'&i=1$'))
storyas = authsoup.find_all('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+r'&i=1$'))
for storya in storyas:
#print("======storya:%s"%storya)
storyblock = storya.findParent('div',{'class':'storybloc'})
@ -194,7 +194,7 @@ class EFPFanFicNet(BaseSiteAdapter):
# Tipo di coppia: Het | Personaggi: Akasuna no Sasori , Akatsuki, Nuovo Personaggio | Note: OOC | Avvertimenti: Tematiche delicate<br />
# Categoria: <a href="categories.php?catid=1&amp;parentcatid=1">Anime & Manga</a> > <a href="categories.php?catid=108&amp;parentcatid=108">Naruto</a> | Contesto: Naruto Shippuuden | Leggi le <a href="reviews.php?sid=1331275&amp;a=">3</a> recensioni</div>
cats = noteblock.findAll('a',href=re.compile(r'browse.php\?type=categories'))
cats = noteblock.find_all('a',href=re.compile(r'browse.php\?type=categories'))
for cat in cats:
self.story.addToList('category',cat.string)
@ -262,7 +262,7 @@ class EFPFanFicNet(BaseSiteAdapter):
seriessoup = self.make_soup(self.get_request(series_url))
# can't use ^viewstory...$ in case of higher rated stories with javascript href.
storyas = seriessoup.findAll('a', href=re.compile(r'viewstory.php\?sid=\d+&i=1'))
storyas = seriessoup.find_all('a', href=re.compile(r'viewstory.php\?sid=\d+&i=1'))
i=1
for a in storyas:
if a['href'] == ('viewstory.php?sid='+self.story.getMetadata('storyId'))+'&i=1':
@ -288,11 +288,11 @@ class EFPFanFicNet(BaseSiteAdapter):
raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url)
# remove any header and 'o:p' tags.
for tag in div.findAll("head") + div.findAll("o:p"):
for tag in div.find_all("head") + div.find_all("o:p"):
tag.extract()
# change any html and body tags to div.
for tag in div.findAll("html") + div.findAll("body"):
for tag in div.find_all("html") + div.find_all("body"):
tag.name='div'
# remove extra bogus doctype.

View file

@ -126,7 +126,7 @@ class ErosnSapphoSycophantHexComAdapter(BaseSiteAdapter):
self.story.setMetadata('rating', rating)
# Find the chapters:
for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+r"&chapter=\d+$")):
for chapter in soup.find_all('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+r"&chapter=\d+$")):
# just in case there's tags, like <i> in chapter titles.
self.add_chapter(chapter,'http://'+self.host+'/'+chapter['href']+addurl)
@ -144,7 +144,7 @@ class ErosnSapphoSycophantHexComAdapter(BaseSiteAdapter):
# <span class="label">Rated:</span> NC-17<br /> etc
labels = soup.findAll('span',{'class':'label'})
labels = soup.find_all('span',{'class':'label'})
value = labels[0].previousSibling
svalue = ""
@ -164,22 +164,22 @@ class ErosnSapphoSycophantHexComAdapter(BaseSiteAdapter):
self.story.setMetadata('numWords', value.split(' -')[0])
if 'Categories' in label:
cats = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=categories'))
cats = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=categories'))
for cat in cats:
self.story.addToList('category',cat.string)
if 'Characters' in label:
chars = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=characters'))
chars = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=characters'))
for char in chars:
self.story.addToList('characters',char.string)
if 'Genre' in label:
genres = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=1'))
genres = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=class&type_id=1'))
for genre in genres:
self.story.addToList('genre',genre.string)
if 'Warnings' in label:
warnings = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=2'))
warnings = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=class&type_id=2'))
for warning in warnings:
self.story.addToList('warnings',warning.string)
@ -204,7 +204,7 @@ class ErosnSapphoSycophantHexComAdapter(BaseSiteAdapter):
series_url = 'http://'+self.host+'/'+a['href']
seriessoup = self.make_soup(self.get_request(series_url))
storyas = seriessoup.findAll('a', href=re.compile(r'viewstory.php\?sid=\d+'))
storyas = seriessoup.find_all('a', href=re.compile(r'viewstory.php\?sid=\d+'))
i=1
for a in storyas:
# skip 'report this' and 'TOC' links

View file

@ -163,7 +163,7 @@ class FanficAuthorsNetAdapter(BaseSiteAdapter):
# Find the chapters:
# The published and update dates are with the chapter links...
# so we have to get them from there.
chapters = soup.findAll('a', href=re.compile('/'+self.story.getMetadata(
chapters = soup.find_all('a', href=re.compile('/'+self.story.getMetadata(
'storyId')+'/([a-zA-Z0-9_]+)/'))
# Here we are getting the published date. It is the date the first chapter was "updated"
@ -241,8 +241,8 @@ class FanficAuthorsNetAdapter(BaseSiteAdapter):
"Error downloading Chapter: '{0}'! Missing required element!".format(url))
#Now, there are a lot of extranious tags within the story division.. so we will remove them.
for tag in story.findAll('ul',{'class':'pager'}) + story.findAll(
'div',{'class':'alert'}) + story.findAll('div', {'class':'btn-group'}):
for tag in story.find_all('ul',{'class':'pager'}) + story.find_all(
'div',{'class':'alert'}) + story.find_all('div', {'class':'btn-group'}):
tag.extract()
return self.utf8FromSoup(url,story)

View file

@ -167,7 +167,7 @@ class FanFictionNetSiteAdapter(BaseSiteAdapter):
## the first chapter. It generates another server request and
## doesn't seem to be needed lately, so now default it to off.
try:
chapcount = len(soup.find('select', { 'name' : 'chapter' } ).findAll('option'))
chapcount = len(soup.find('select', { 'name' : 'chapter' } ).find_all('option'))
# get chapter part of url.
except:
chapcount = 1
@ -212,7 +212,7 @@ class FanFictionNetSiteAdapter(BaseSiteAdapter):
## For 1, use the second link.
## For 2, fetch the crossover page and pull the two categories from there.
pre_links = soup.find('div',{'id':'pre_story_links'})
categories = pre_links.findAll('a',{'class':'xcontrast_txt'})
categories = pre_links.find_all('a',{'class':'xcontrast_txt'})
#print("xcontrast_txt a:%s"%categories)
if len(categories) > 1:
# Strangely, the ones with *two* links are the
@ -251,7 +251,7 @@ class FanFictionNetSiteAdapter(BaseSiteAdapter):
grayspan = gui_table1i.find('span', {'class':'xgray xcontrast_txt'})
# for b in grayspan.findAll('button'):
# for b in grayspan.find_all('button'):
# b.extract()
metatext = stripHTML(grayspan).replace('Hurt/Comfort','Hurt-Comfort')
#logger.debug("metatext:(%s)"%metatext)
@ -290,7 +290,7 @@ class FanFictionNetSiteAdapter(BaseSiteAdapter):
# Updated: <span data-xutime='1368059198'>5/8</span> - Published: <span data-xutime='1278984264'>7/12/2010</span>
# Published: <span data-xutime='1384358726'>8m ago</span>
dates = soup.findAll('span',{'data-xutime':re.compile(r'^\d+$')})
dates = soup.find_all('span',{'data-xutime':re.compile(r'^\d+$')})
if len(dates) > 1 :
# updated get set to the same as published upstream if not found.
self.story.setMetadata('dateUpdated',datetime.fromtimestamp(float(dates[0]['data-xutime'])))
@ -395,7 +395,7 @@ class FanFictionNetSiteAdapter(BaseSiteAdapter):
# no selector found, so it's a one-chapter story.
self.add_chapter(self.story.getMetadata('title'),url)
else:
allOptions = select.findAll('option')
allOptions = select.find_all('option')
for o in allOptions:
## title URL will be put back on chapter URL during
## normalize_chapterurl() anyway, but also here for

View file

@ -134,7 +134,7 @@ class FanFiktionDeAdapter(BaseSiteAdapter):
self.story.setMetadata('author',stripHTML(a))
# Find the chapters:
for chapter in soup.find('select').findAll('option'):
for chapter in soup.find('select').find_all('option'):
self.add_chapter(chapter,'https://'+self.host+'/s/'+self.story.getMetadata('storyId')+'/'+chapter['value'])
## title="Wörter" failed with max_zalgo:1
@ -181,13 +181,13 @@ class FanFiktionDeAdapter(BaseSiteAdapter):
# #find metadata on the author's page
# asoup = self.make_soup(self.get_request("https://"+self.getSiteDomain()+"?a=q&a1=v&t=nickdetailsstories&lbi=stories&ar=0&nick="+self.story.getMetadata('authorId')))
# tr=asoup.findAll('tr')
# tr=asoup.find_all('tr')
# for i in range(1,len(tr)):
# a = tr[i].find('a')
# if '/s/'+self.story.getMetadata('storyId')+'/1/' in a['href']:
# break
# td = tr[i].findAll('td')
# td = tr[i].find_all('td')
# self.story.addToList('category',stripHTML(td[2]))
# self.story.setMetadata('rating', stripHTML(td[5]))
# self.story.setMetadata('numWords', stripHTML(td[6]))
@ -204,7 +204,7 @@ class FanFiktionDeAdapter(BaseSiteAdapter):
soup = self.make_soup(self.get_request(url))
div = soup.find('div', {'id' : 'storytext'})
for a in div.findAll('script'):
for a in div.find_all('script'):
a.extract()
if None == div:

View file

@ -146,7 +146,7 @@ class FicBookNetAdapter(BaseSiteAdapter):
pubdate = None
chapters = soup.find('ul', {'class' : 'list-of-fanfic-parts'})
if chapters != None:
for chapdiv in chapters.findAll('li', {'class':'part'}):
for chapdiv in chapters.find_all('li', {'class':'part'}):
chapter=chapdiv.find('a',href=re.compile(r'/readfic/'+self.story.getMetadata('storyId')+r"/\d+#part_content$"))
churl='https://'+self.host+chapter['href']
@ -179,7 +179,7 @@ class FicBookNetAdapter(BaseSiteAdapter):
# pr=soup.find('a', href=re.compile(r'/printfic/\w+'))
# pr='https://'+self.host+pr['href']
# pr = self.make_soup(self.get_request(pr))
# pr=pr.findAll('div', {'class' : 'part_text'})
# pr=pr.find_all('div', {'class' : 'part_text'})
# i=0
# for part in pr:
# i=i+len(stripHTML(part).split(' '))
@ -196,7 +196,7 @@ class FicBookNetAdapter(BaseSiteAdapter):
self.story.setMetadata('seriesUrl','https://' + self.getSiteDomain() + series_div.a.get('href'))
i=0
fandoms = dlinfo.select_one('div:not([class])').findAll('a', href=re.compile(r'/fanfiction/\w+'))
fandoms = dlinfo.select_one('div:not([class])').find_all('a', href=re.compile(r'/fanfiction/\w+'))
for fandom in fandoms:
self.story.addToList('category',fandom.string)
i=i+1
@ -205,13 +205,13 @@ class FicBookNetAdapter(BaseSiteAdapter):
tags = soup.find('div',{'class':'tags'})
if tags:
for genre in tags.findAll('a',href=re.compile(r'/tags/')):
for genre in tags.find_all('a',href=re.compile(r'/tags/')):
self.story.addToList('genre',stripHTML(genre))
ratingdt = dlinfo.find('div',{'class':re.compile(r'badge-rating-.*')})
self.story.setMetadata('rating', stripHTML(ratingdt.find('span')))
# meta=table.findAll('a', href=re.compile(r'/ratings/'))
# meta=table.find_all('a', href=re.compile(r'/ratings/'))
# i=0
# for m in meta:
# if i == 0:

View file

@ -201,10 +201,10 @@ class FictionAlleyArchiveOrgSiteAdapter(BaseSiteAdapter):
# epubutils.py
# Yes, this still applies to fictionalley-archive.
for tag in chaptext.findAll('head') + chaptext.findAll('meta') + chaptext.findAll('script'):
for tag in chaptext.find_all('head') + chaptext.find_all('meta') + chaptext.find_all('script'):
tag.extract()
for tag in chaptext.findAll('body') + chaptext.findAll('html'):
for tag in chaptext.find_all('body') + chaptext.find_all('html'):
tag.name = 'div'
if self.getConfig('include_author_notes'):

View file

@ -136,7 +136,7 @@ class FicwadComSiteAdapter(BaseSiteAdapter):
# <span class="req"><a href="/help/38" title="Medium Spoilers">[!!] </a> <a href="/help/38" title="Rape/Sexual Violence">[R] </a> <a href="/help/38" title="Violence">[V] </a> <a href="/help/38" title="Child/Underage Sex">[Y] </a></span>
spanreq = metap.find("span",{"class":"story-warnings"})
if spanreq: # can be no warnings.
for a in spanreq.findAll("a"):
for a in spanreq.find_all("a"):
self.story.addToList('warnings',a['title'])
## perhaps not the most efficient way to parse this, using
@ -186,7 +186,7 @@ class FicwadComSiteAdapter(BaseSiteAdapter):
# no list found, so it's a one-chapter story.
self.add_chapter(self.story.getMetadata('title'),url)
else:
chapterlistlis = storylistul.findAll('li')
chapterlistlis = storylistul.find_all('li')
for chapterli in chapterlistlis:
if "blocked" in chapterli['class']:
# paranoia check. We should already be logged in by now.

View file

@ -161,7 +161,7 @@ class ImagineEFicComAdapter(BaseSiteAdapter):
self.story.setMetadata('author',a.string)
# Find the chapters:
for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+r"&chapter=\d+$")):
for chapter in soup.find_all('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+r"&chapter=\d+$")):
# just in case there's tags, like <i> in chapter titles.
self.add_chapter(chapter,'https://'+self.host+'/'+chapter['href']+addurl)
@ -178,7 +178,7 @@ class ImagineEFicComAdapter(BaseSiteAdapter):
# <span class="label">Rated:</span> NC-17<br /> etc
labels = soup.findAll('span',{'class':'label'})
labels = soup.find_all('span',{'class':'label'})
for labelspan in labels:
value = labelspan.nextSibling
label = labelspan.string
@ -199,22 +199,22 @@ class ImagineEFicComAdapter(BaseSiteAdapter):
self.story.setMetadata('numWords', value)
if 'Categories' in label:
cats = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=categories'))
cats = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=categories'))
for cat in cats:
self.story.addToList('category',cat.string)
if 'Characters' in label:
chars = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=characters'))
chars = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=characters'))
for char in chars:
self.story.addToList('characters',char.string)
if 'Genre' in label:
genres = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=1'))
genres = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=class&type_id=1'))
for genre in genres:
self.story.addToList('genre',genre.string)
if 'Warnings' in label:
warnings = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=2'))
warnings = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=class&type_id=2'))
for warning in warnings:
self.story.addToList('warnings',warning.string)
@ -238,7 +238,7 @@ class ImagineEFicComAdapter(BaseSiteAdapter):
seriessoup = self.make_soup(self.get_request(series_url))
# can't use ^viewstory...$ in case of higher rated stories with javascript href.
storyas = seriessoup.findAll('a', href=re.compile(r'viewstory.php\?sid=\d+'))
storyas = seriessoup.find_all('a', href=re.compile(r'viewstory.php\?sid=\d+'))
i=1
for a in storyas:
# skip 'report this' and 'TOC' links

View file

@ -125,7 +125,7 @@ class InkBunnyNetSiteAdapter(BaseSiteAdapter):
soup = self.make_soup(self.get_request(url,usecache=False))
# removing all of the scripts
for tag in soup.findAll('script'):
for tag in soup.find_all('script'):
tag.extract()

View file

@ -144,13 +144,13 @@ class KSArchiveComAdapter(BaseSiteAdapter): # XXX
# Find authorid and URL from... author urls.
pagetitle = soup.find('div',id='pagetitle')
for a in pagetitle.findAll('a', href=re.compile(r"viewuser.php\?uid=\d+")):
for a in pagetitle.find_all('a', href=re.compile(r"viewuser.php\?uid=\d+")):
self.story.addToList('authorId',a['href'].split('=')[1])
self.story.addToList('authorUrl','https://'+self.host+'/'+a['href'])
self.story.addToList('author',stripHTML(a))
# Find the chapters:
for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+r"&chapter=\d+$")):
for chapter in soup.find_all('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+r"&chapter=\d+$")):
# just in case there's tags, like <i> in chapter titles.
self.add_chapter(chapter,'https://'+self.host+'/'+chapter['href']+addurl)
@ -166,7 +166,7 @@ class KSArchiveComAdapter(BaseSiteAdapter): # XXX
return ""
# <span class="label">Rated:</span> NC-17<br /> etc
labels = soup.findAll('span',{'class':'label'})
labels = soup.find_all('span',{'class':'label'})
for labelspan in labels:
value = labelspan.nextSibling
label = stripHTML(labelspan)
@ -193,7 +193,7 @@ class KSArchiveComAdapter(BaseSiteAdapter): # XXX
self.story.setMetadata('numWords', value)
if 'Categories' in label:
cats = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=categories'))
cats = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=categories'))
catstext = [stripHTML(cat) for cat in cats]
for cat in catstext:
# ran across one story with an empty <a href="browse.php?type=categories&amp;catid=1"></a>
@ -204,7 +204,7 @@ class KSArchiveComAdapter(BaseSiteAdapter): # XXX
if 'Characters' in label:
self.story.addToList('characters','Kirk')
self.story.addToList('characters','Spock')
chars = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=characters'))
chars = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=characters'))
charstext = [stripHTML(char) for char in chars]
for char in charstext:
self.story.addToList('characters',stripHTML(char))
@ -213,7 +213,7 @@ class KSArchiveComAdapter(BaseSiteAdapter): # XXX
## leaving it in. Check to make sure the type_id number
## is correct, though--it's site specific.
if 'Genre' in label:
genres = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=1')) # XXX
genres = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=class&type_id=1')) # XXX
genrestext = [stripHTML(genre) for genre in genres]
self.genre = ', '.join(genrestext)
for genre in genrestext:
@ -223,7 +223,7 @@ class KSArchiveComAdapter(BaseSiteAdapter): # XXX
## has 'Story Type', which is much more what most sites
## call genre.
if 'Story Type' in label:
genres = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=5')) # XXX
genres = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=class&type_id=5')) # XXX
genrestext = [stripHTML(genre) for genre in genres]
self.genre = ', '.join(genrestext)
for genre in genrestext:
@ -233,21 +233,21 @@ class KSArchiveComAdapter(BaseSiteAdapter): # XXX
## leaving it in. Check to make sure the type_id number
## is correct, though--it's site specific.
if 'Warnings' in label:
warnings = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=2')) # XXX
warnings = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=class&type_id=2')) # XXX
warningstext = [stripHTML(warning) for warning in warnings]
self.warning = ', '.join(warningstext)
for warning in warningstext:
self.story.addToList('warnings',stripHTML(warning))
if 'Universe' in label:
universes = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=3')) # XXX
universes = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=class&type_id=3')) # XXX
universestext = [stripHTML(universe) for universe in universes]
self.universe = ', '.join(universestext)
for universe in universestext:
self.story.addToList('universe',stripHTML(universe))
if 'Crossover Fandom' in label:
crossoverfandoms = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=4')) # XXX
crossoverfandoms = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=class&type_id=4')) # XXX
crossoverfandomstext = [stripHTML(crossoverfandom) for crossoverfandom in crossoverfandoms]
self.crossoverfandom = ', '.join(crossoverfandomstext)
for crossoverfandom in crossoverfandomstext:
@ -274,7 +274,7 @@ class KSArchiveComAdapter(BaseSiteAdapter): # XXX
series_url = 'https://'+self.host+'/'+a['href']
seriessoup = self.make_soup(self.get_request(series_url))
storyas = seriessoup.findAll('a', href=re.compile(r'viewstory.php\?sid=\d+'))
storyas = seriessoup.find_all('a', href=re.compile(r'viewstory.php\?sid=\d+'))
i=1
for a in storyas:
# skip 'report this' and 'TOC' links

View file

@ -236,7 +236,7 @@ class LiteroticaSiteAdapter(BaseSiteAdapter):
breadcrumbs = soup.find('div', id='BreadCrumbComponent')
if not breadcrumbs:
breadcrumbs = soup.select_one('ul[class^="_breadcrumbs_list_"]')
self.story.addToList('category', breadcrumbs.findAll('a')[1].string)
self.story.addToList('category', breadcrumbs.find_all('a')[1].string)
## one-shot chapter
self.add_chapter(self.story.getMetadata('title'), self.url)
@ -356,7 +356,7 @@ class LiteroticaSiteAdapter(BaseSiteAdapter):
raw_page = raw_page.replace('<div class="b-story-body-x x-r15"><div><p>','<div class="b-story-body-x x-r15"><div>')
# logger.debug("\tChapter text: %s" % raw_page)
page_soup = self.make_soup(raw_page)
[comment.extract() for comment in page_soup.findAll(string=lambda text:isinstance(text, Comment))]
[comment.extract() for comment in page_soup.find_all(string=lambda text:isinstance(text, Comment))]
fullhtml = ""
for aa_ht_div in page_soup.find_all('div', 'aa_ht') + page_soup.select('div[class^="_article__content_"]'):
if aa_ht_div.div:

View file

@ -116,7 +116,7 @@ class LumosSycophantHexComAdapter(BaseSiteAdapter):
self.story.setMetadata('rating', rating)
# Find the chapters:
for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+r"&chapter=\d+$")):
for chapter in soup.find_all('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+r"&chapter=\d+$")):
# just in case there's tags, like <i> in chapter titles.
self.add_chapter(chapter,'http://'+self.host+'/'+chapter['href']+addurl)
@ -134,7 +134,7 @@ class LumosSycophantHexComAdapter(BaseSiteAdapter):
# <span class="label">Rated:</span> NC-17<br /> etc
labels = soup.findAll('span',{'class':'label'})
labels = soup.find_all('span',{'class':'label'})
value = labels[0].previousSibling
svalue = ""
@ -154,22 +154,22 @@ class LumosSycophantHexComAdapter(BaseSiteAdapter):
self.story.setMetadata('numWords', value.split(' -')[0])
if 'Categories' in label:
cats = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=categories'))
cats = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=categories'))
for cat in cats:
self.story.addToList('category',cat.string)
if 'Characters' in label:
chars = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=characters'))
chars = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=characters'))
for char in chars:
self.story.addToList('characters',char.string)
if 'Genre' in label:
genres = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=1'))
genres = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=class&type_id=1'))
for genre in genres:
self.story.addToList('genre',genre.string)
if 'Warnings' in label:
warnings = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=2'))
warnings = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=class&type_id=2'))
for warning in warnings:
self.story.addToList('warnings',warning.string)
@ -194,7 +194,7 @@ class LumosSycophantHexComAdapter(BaseSiteAdapter):
series_url = 'http://'+self.host+'/'+a['href']
seriessoup = self.make_soup(self.get_request(series_url))
storyas = seriessoup.findAll('a', href=re.compile(r'^viewstory.php\?sid=\d+$'))
storyas = seriessoup.find_all('a', href=re.compile(r'^viewstory.php\?sid=\d+$'))
i=1
for a in storyas:
if a['href'] == ('viewstory.php?sid='+self.story.getMetadata('storyId')):

View file

@ -678,7 +678,7 @@ class Chapter(object):
def _excludeEditorSignature(self, root):
"""Exclude editor signature from within `root' element."""
for stringNode in root.findAll(string=True):
for stringNode in root.find_all(string=True):
if re.match(self.SIGNED_PATTERN, textNode.string):
editorLink = textNode.findNext('a')
if editorLink:

View file

@ -148,12 +148,12 @@ class MediaMinerOrgSiteAdapter(BaseSiteAdapter):
# category
# <a href="/fanfic/src.php/a/567">Ranma 1/2</a>
for a in soup.findAll('a',href=re.compile(r"^/fanfic/a/")):
for a in soup.find_all('a',href=re.compile(r"^/fanfic/a/")):
self.story.addToList('category',a.string)
# genre
# <a href="/fanfic/src.php/g/567">Ranma 1/2</a>
for a in soup.findAll('a',href=re.compile(r"^/fanfic/src.php/g/")):
for a in soup.find_all('a',href=re.compile(r"^/fanfic/src.php/g/")):
self.story.addToList('genre',a.string)
metasoup = soup.find("div",{"class":"post-meta"})

View file

@ -154,7 +154,7 @@ class MidnightwhispersAdapter(BaseSiteAdapter): # XXX
self.story.setMetadata('author',a.string)
# Find the chapters:
for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+r"&chapter=\d+$")):
for chapter in soup.find_all('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+r"&chapter=\d+$")):
# just in case there's tags, like <i> in chapter titles.
self.add_chapter(chapter,'https://'+self.host+'/'+chapter['href']+addurl)
@ -170,7 +170,7 @@ class MidnightwhispersAdapter(BaseSiteAdapter): # XXX
return ""
# <span class="label">Rated:</span> NC-17<br /> etc
labels = soup.findAll('span',{'class':'label'})
labels = soup.find_all('span',{'class':'label'})
for labelspan in labels:
value = labelspan.nextSibling
label = labelspan.string
@ -191,13 +191,13 @@ class MidnightwhispersAdapter(BaseSiteAdapter): # XXX
self.story.setMetadata('numWords', value)
if 'Categories' in label:
cats = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=categories'))
cats = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=categories'))
catstext = [cat.string for cat in cats]
for cat in catstext:
self.story.addToList('category',cat.string)
if 'Characters' in label:
chars = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=characters'))
chars = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=characters'))
charstext = [char.string for char in chars]
for char in charstext:
self.story.addToList('characters',char.string)
@ -206,7 +206,7 @@ class MidnightwhispersAdapter(BaseSiteAdapter): # XXX
## leaving it in. Check to make sure the type_id number
## is correct, though--it's site specific.
if 'Genre' in label:
genres = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=2')) # XXX
genres = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=class&type_id=2')) # XXX
genrestext = [genre.string for genre in genres]
self.genre = ', '.join(genrestext)
for genre in genrestext:
@ -216,7 +216,7 @@ class MidnightwhispersAdapter(BaseSiteAdapter): # XXX
## leaving it in. Check to make sure the type_id number
## is correct, though--it's site specific.
if 'Warnings' in label:
warnings = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=2')) # XXX
warnings = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=class&type_id=2')) # XXX
warningstext = [warning.string for warning in warnings]
self.warning = ', '.join(warningstext)
for warning in warningstext:
@ -243,7 +243,7 @@ class MidnightwhispersAdapter(BaseSiteAdapter): # XXX
series_url = 'https://'+self.host+'/'+a['href']
seriessoup = self.make_soup(self.get_request(series_url))
storyas = seriessoup.findAll('a', href=re.compile(r'^viewstory.php\?sid=\d+$'))
storyas = seriessoup.find_all('a', href=re.compile(r'^viewstory.php\?sid=\d+$'))
i=1
for a in storyas:
# skip 'report this' and 'TOC' links

View file

@ -195,7 +195,7 @@ class LightNovelGateSiteAdapter(BaseSiteAdapter):
[a.extract() for a in story.find_all('a')]
# Some tags have non-standard tag name.
for tag in story.findAll(recursive=True):
for tag in story.find_all(recursive=True):
if tag.name not in HTML_TAGS:
tag.name = 'span'

View file

@ -137,14 +137,14 @@ class OcclumencySycophantHexComAdapter(BaseSiteAdapter):
try:
# in case link points somewhere other than the first chapter
a = soup.findAll('option')[1]['value']
a = soup.find_all('option')[1]['value']
self.story.setMetadata('storyId',a.split('=',)[1])
url = 'http://'+self.host+'/'+a
soup = self.make_soup(self.get_request(url))
except:
pass
for info in asoup.findAll('table', {'class' : 'border'}):
for info in asoup.find_all('table', {'class' : 'border'}):
a = info.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$"))
if a != None:
self.story.setMetadata('title',stripHTML(a))
@ -152,7 +152,7 @@ class OcclumencySycophantHexComAdapter(BaseSiteAdapter):
# Find the chapters:
chapters=soup.findAll('a', href=re.compile(r'viewstory.php\?sid=\d+&i=1$'))
chapters=soup.find_all('a', href=re.compile(r'viewstory.php\?sid=\d+&i=1$'))
if len(chapters) == 0:
self.add_chapter(self.story.getMetadata('title'),url)
else:
@ -171,7 +171,7 @@ class OcclumencySycophantHexComAdapter(BaseSiteAdapter):
except:
return ""
cats = info.findAll('a',href=re.compile('categories.php'))
cats = info.find_all('a',href=re.compile('categories.php'))
for cat in cats:
self.story.addToList('category',cat.string)
@ -188,7 +188,7 @@ class OcclumencySycophantHexComAdapter(BaseSiteAdapter):
self.setDescription(url,svalue)
# <span class="label">Rated:</span> NC-17<br /> etc
labels = info.findAll('b')
labels = info.find_all('b')
for labelspan in labels:
value = labelspan.nextSibling
label = stripHTML(labelspan)

View file

@ -93,26 +93,26 @@ class PhoenixSongNetAdapter(BaseSiteAdapter):
chapters = soup.find('select')
if chapters == None:
self.add_chapter(self.story.getMetadata('title'),url)
for b in soup.findAll('b'):
for b in soup.find_all('b'):
if b.text == "Updated":
date = b.nextSibling.string.split(': ')[1].split(',')
self.story.setMetadata('datePublished', makeDate(date[0]+date[1], self.dateformat))
self.story.setMetadata('dateUpdated', makeDate(date[0]+date[1], self.dateformat))
else:
i = 0
chapters = chapters.findAll('option')
chapters = chapters.find_all('option')
for chapter in chapters:
self.add_chapter(chapter,'https://'+self.host+chapter['value'])
if i == 0:
self.story.setMetadata('storyId',chapter['value'].split('/')[3])
head = self.make_soup(self.get_request('https://'+self.host+chapter['value'])).findAll('b')
head = self.make_soup(self.get_request('https://'+self.host+chapter['value'])).find_all('b')
for b in head:
if b.text == "Updated":
date = b.nextSibling.string.split(': ')[1].split(',')
self.story.setMetadata('datePublished', makeDate(date[0]+date[1], self.dateformat))
if i == (len(chapters)-1):
head = self.make_soup(self.get_request('https://'+self.host+chapter['value'])).findAll('b')
head = self.make_soup(self.get_request('https://'+self.host+chapter['value'])).find_all('b')
for b in head:
if b.text == "Updated":
date = b.nextSibling.string.split(': ')[1].split(',')
@ -160,20 +160,20 @@ class PhoenixSongNetAdapter(BaseSiteAdapter):
soup = self.make_soup(self.get_request(url))
chapter=self.make_soup('<div class="story"></div>')
for p in soup.findAll(['p','blockquote']):
for p in soup.find_all(['p','blockquote']):
if "This is for problems with the formatting or the layout of the chapter." in stripHTML(p):
break
chapter.append(p)
for a in chapter.findAll('div'):
for a in chapter.find_all('div'):
a.extract()
for a in chapter.findAll('table'):
for a in chapter.find_all('table'):
a.extract()
for a in chapter.findAll('script'):
for a in chapter.find_all('script'):
a.extract()
for a in chapter.findAll('form'):
for a in chapter.find_all('form'):
a.extract()
for a in chapter.findAll('textarea'):
for a in chapter.find_all('textarea'):
a.extract()

View file

@ -80,7 +80,7 @@ class PotionsAndSnitchesOrgSiteAdapter(BaseSiteAdapter):
self.story.setMetadata('author',a.string)
# Find the chapters:
for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+r"&chapter=\d+$")):
for chapter in soup.find_all('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+r"&chapter=\d+$")):
# just in case there's tags, like <i> in chapter titles.
self.add_chapter(chapter,'http://'+self.host+'/fanfiction/'+chapter['href'])
@ -92,7 +92,7 @@ class PotionsAndSnitchesOrgSiteAdapter(BaseSiteAdapter):
return ""
# <span class="label">Rated:</span> NC-17<br /> etc
labels = soup.findAll('span',{'class':'label'})
labels = soup.find_all('span',{'class':'label'})
for labelspan in labels:
value = labelspan.nextSibling
label = labelspan.string
@ -116,13 +116,13 @@ class PotionsAndSnitchesOrgSiteAdapter(BaseSiteAdapter):
self.story.setMetadata('reads', value)
if 'Categories' in label:
cats = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=categories'))
cats = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=categories'))
catstext = [cat.string for cat in cats]
for cat in catstext:
self.story.addToList('category',cat.string)
if 'Characters' in label:
chars = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=characters'))
chars = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=characters'))
charstext = [char.string for char in chars]
for char in charstext:
if "Snape and Harry (required)" in char:
@ -132,27 +132,27 @@ class PotionsAndSnitchesOrgSiteAdapter(BaseSiteAdapter):
self.story.addToList('characters',char.string)
if 'Warning' in label:
warnings = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class'))
warnings = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=class'))
for warning in warnings:
self.story.addToList('warnings',stripHTML(warning))
if 'Genre' in label:
genres = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class'))
genres = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=class'))
for genre in genres:
self.story.addToList('genre',stripHTML(genre))
if 'Takes Place' in label:
takesplaces = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class'))
takesplaces = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=class'))
for takesplace in takesplaces:
self.story.addToList('takesplaces',stripHTML(takesplace))
if 'Snape flavour' in label:
snapeflavours = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class'))
snapeflavours = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=class'))
for snapeflavour in snapeflavours:
self.story.addToList('snapeflavours',stripHTML(snapeflavour))
if 'Tags' in label:
sitetags = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class'))
sitetags = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=class'))
for sitetag in sitetags:
self.story.addToList('sitetags',stripHTML(sitetag))
@ -176,7 +176,7 @@ class PotionsAndSnitchesOrgSiteAdapter(BaseSiteAdapter):
series_url = 'http://'+self.host+'/fanfiction/'+a['href']
seriessoup = self.make_soup(self.get_request(series_url))
storyas = seriessoup.findAll('a', href=re.compile(r'^viewstory.php\?sid=\d+$'))
storyas = seriessoup.find_all('a', href=re.compile(r'^viewstory.php\?sid=\d+$'))
i=1
for a in storyas:
if a['href'] == ('viewstory.php?sid='+self.story.getMetadata('storyId')):

View file

@ -121,7 +121,7 @@ class PretenderCenterComAdapter(BaseSiteAdapter):
self.story.setMetadata('author',a.string)
# Find the chapters:
for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+r"&chapter=\d+$")):
for chapter in soup.find_all('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+r"&chapter=\d+$")):
# just in case there's tags, like <i> in chapter titles.
self.add_chapter(chapter,'https://'+self.host+'/missingpieces/'+chapter['href']+addurl)
@ -138,7 +138,7 @@ class PretenderCenterComAdapter(BaseSiteAdapter):
# <span class="label">Rated:</span> NC-17<br /> etc
labels = soup.findAll('span',{'class':'label'})
labels = soup.find_all('span',{'class':'label'})
for labelspan in labels:
value = labelspan.nextSibling
label = labelspan.string
@ -159,22 +159,22 @@ class PretenderCenterComAdapter(BaseSiteAdapter):
self.story.setMetadata('numWords', value)
if 'Categories' in label:
cats = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=categories'))
cats = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=categories'))
for cat in cats:
self.story.addToList('category',cat.string)
if 'Characters' in label:
chars = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=characters'))
chars = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=characters'))
for char in chars:
self.story.addToList('characters',char.string)
if 'Genre' in label:
genres = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=1')) # XXX
genres = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=class&type_id=1')) # XXX
for genre in genres:
self.story.addToList('genre',genre.string)
if 'Warnings' in label:
warnings = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=2')) # XXX
warnings = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=class&type_id=2')) # XXX
for warning in warnings:
self.story.addToList('warnings',warning.string)
@ -198,7 +198,7 @@ class PretenderCenterComAdapter(BaseSiteAdapter):
seriessoup = self.make_soup(self.get_request(series_url))
# can't use ^viewstory...$ in case of higher rated stories with javascript href.
storyas = seriessoup.findAll('a', href=re.compile(r'viewstory.php\?sid=\d+'))
storyas = seriessoup.find_all('a', href=re.compile(r'viewstory.php\?sid=\d+'))
i=1
for a in storyas:
# skip 'report this' and 'TOC' links

View file

@ -111,7 +111,7 @@ class PsychFicComAdapter(BaseSiteAdapter):
self.story.setMetadata('author',a.string)
# Find the chapters:
for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+r"&chapter=\d+$")):
for chapter in soup.find_all('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+r"&chapter=\d+$")):
# just in case there's tags, like <i> in chapter titles.
self.add_chapter(chapter,'http://'+self.host+'/'+chapter['href']+addurl)
@ -126,7 +126,7 @@ class PsychFicComAdapter(BaseSiteAdapter):
except:
return ""
labels = soup.findAll('span',{'class':'label'})
labels = soup.find_all('span',{'class':'label'})
for labelspan in labels:
value = labelspan.nextSibling
label = labelspan.string
@ -147,22 +147,22 @@ class PsychFicComAdapter(BaseSiteAdapter):
self.story.setMetadata('numWords', value)
if 'Categories' in label:
cats = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=categories'))
cats = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=categories'))
for cat in cats:
self.story.addToList('category',cat.string)
if 'Characters' in label:
chars = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=characters'))
chars = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=characters'))
for char in chars:
self.story.addToList('characters',char.string)
if 'Genre' in label:
genres = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=1'))
genres = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=class&type_id=1'))
for genre in genres:
self.story.addToList('genre',genre.string)
if 'Warnings' in label:
warnings = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=2'))
warnings = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=class&type_id=2'))
for warning in warnings:
self.story.addToList('warnings',warning.string)
@ -186,7 +186,7 @@ class PsychFicComAdapter(BaseSiteAdapter):
series_url = 'http://'+self.host+'/'+a['href']
seriessoup = self.make_soup(self.get_request(series_url))
storyas = seriessoup.findAll('a', href=re.compile(r'^viewstory.php\?sid=\d+$'))
storyas = seriessoup.find_all('a', href=re.compile(r'^viewstory.php\?sid=\d+$'))
i=1
for a in storyas:
if a['href'] == ('viewstory.php?sid='+self.story.getMetadata('storyId')):

View file

@ -187,7 +187,7 @@ class RoyalRoadAdapter(BaseSiteAdapter):
chapters = soup.find('table',{'id':'chapters'}).find('tbody')
tds = [tr.findAll('td') for tr in chapters.findAll('tr')]
tds = [tr.find_all('td') for tr in chapters.find_all('tr')]
if not tds:
raise exceptions.FailedToDownload(

View file

@ -193,7 +193,7 @@ class SamAndJackNetAdapter(BaseSiteAdapter): # XXX
# Find authorid and URL from... author url.
# (fetch multiple authors)
alist = soup.findAll('a', href=re.compile(r"viewuser.php\?uid=\d+"))
alist = soup.find_all('a', href=re.compile(r"viewuser.php\?uid=\d+"))
for a in alist:
self.story.addToList('authorId',a['href'].split('=')[1])
self.story.addToList('authorUrl','http://'+self.host+'/fanfics/'+a['href'])
@ -201,11 +201,11 @@ class SamAndJackNetAdapter(BaseSiteAdapter): # XXX
# Reviews
reviewdata = soup.find('div', {'id' : 'sort'})
a = reviewdata.findAll('a', href=re.compile(r'reviews.php\?type=ST&(amp;)?item='+self.story.getMetadata('storyId')+"$"))[1] # second one.
a = reviewdata.find_all('a', href=re.compile(r'reviews.php\?type=ST&(amp;)?item='+self.story.getMetadata('storyId')+"$"))[1] # second one.
self.story.setMetadata('reviews',stripHTML(a))
# Find the chapters:
for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+r"&chapter=\d+$")):
for chapter in soup.find_all('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+r"&chapter=\d+$")):
# just in case there's tags, like <i> in chapter titles.
self.add_chapter(chapter,'http://'+self.host+'/fanfics/'+chapter['href']+addurl)
@ -222,7 +222,7 @@ class SamAndJackNetAdapter(BaseSiteAdapter): # XXX
# <span class="label">Rated:</span> NC-17<br /> etc
labels = soup.findAll('span',{'class':'label'})
labels = soup.find_all('span',{'class':'label'})
for labelspan in labels:
value = labelspan.nextSibling
label = labelspan.string
@ -237,13 +237,13 @@ class SamAndJackNetAdapter(BaseSiteAdapter): # XXX
self.story.setMetadata('numWords', value)
if 'Categories' in label:
cats = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=categories'))
cats = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=categories'))
catstext = [cat.string for cat in cats]
for cat in catstext:
self.story.addToList('category',cat.string)
if 'Characters' in label:
chars = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=characters'))
chars = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=characters'))
charstext = [char.string for char in chars]
for char in charstext:
self.story.addToList('characters',char.string)
@ -252,7 +252,7 @@ class SamAndJackNetAdapter(BaseSiteAdapter): # XXX
## leaving it in. Check to make sure the type_id number
## is correct, though--it's site specific.
if 'Genre' in label:
genres = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=1')) # XXX
genres = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=class&type_id=1')) # XXX
genrestext = [genre.string for genre in genres]
self.genre = ', '.join(genrestext)
for genre in genrestext:
@ -262,7 +262,7 @@ class SamAndJackNetAdapter(BaseSiteAdapter): # XXX
## leaving it in. Check to make sure the type_id number
## is correct, though--it's site specific.
if 'Warnings' in label:
warnings = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=2')) # XXX
warnings = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=class&type_id=2')) # XXX
warningstext = [warning.string for warning in warnings]
self.warning = ', '.join(warningstext)
for warning in warningstext:
@ -291,7 +291,7 @@ class SamAndJackNetAdapter(BaseSiteAdapter): # XXX
series_url = 'http://'+self.host+'/fanfics/'+a['href']
seriessoup = self.make_soup(self.get_request(series_url))
storyas = seriessoup.findAll('a', href=re.compile(r'^viewstory.php\?sid=\d+$'))
storyas = seriessoup.find_all('a', href=re.compile(r'^viewstory.php\?sid=\d+$'))
i=1
for a in storyas:
if a['href'] == ('viewstory.php?sid='+self.story.getMetadata('storyId')):

View file

@ -240,13 +240,13 @@ class ScribbleHubComAdapter(BaseSiteAdapter): # XXX
# Categories
if soup.find('span',{'class': 'wi_fic_showtags_inner'}):
categories = soup.find('span',{'class': 'wi_fic_showtags_inner'}).findAll('a')
categories = soup.find('span',{'class': 'wi_fic_showtags_inner'}).find_all('a')
for category in categories:
self.story.addToList('category', stripHTML(category))
# Genres
if soup.find('a',{'class': 'fic_genre'}):
genres = soup.findAll('a',{'class': 'fic_genre'})
genres = soup.find_all('a',{'class': 'fic_genre'})
for genre in genres:
self.story.addToList('genre', stripHTML(genre))
@ -258,7 +258,7 @@ class ScribbleHubComAdapter(BaseSiteAdapter): # XXX
# Content Warnings
if soup.find('ul',{'class': 'ul_rate_expand'}):
warnings = soup.find('ul',{'class': 'ul_rate_expand'}).findAll('a')
warnings = soup.find('ul',{'class': 'ul_rate_expand'}).find_all('a')
for warn in warnings:
self.story.addToList('warnings', stripHTML(warn))
@ -312,7 +312,7 @@ class ScribbleHubComAdapter(BaseSiteAdapter): # XXX
self.story.setMetadata(metadata, stripHTML(row.find('td')))
if soup.find('table',{'class': 'table_pro_overview'}):
stats_table = soup.find('table',{'class': 'table_pro_overview'}).findAll('tr')
stats_table = soup.find('table',{'class': 'table_pro_overview'}).find_all('tr')
for row in stats_table:
find_stats_data("Total Views (All)", row, "views")
find_stats_data("Word Count", row, "numWords")

View file

@ -171,7 +171,7 @@ class SheppardWeirComAdapter(BaseSiteAdapter): # XXX
# Find authorid and URL from... author url.
# (fetch multiple authors)
alist = soup.findAll('a', href=re.compile(r"viewuser.php\?uid=\d+"))
alist = soup.find_all('a', href=re.compile(r"viewuser.php\?uid=\d+"))
for a in alist:
self.story.addToList('authorId',a['href'].split('=')[1])
self.story.addToList('authorUrl','https://'+self.host+'/fanfics/'+a['href'])
@ -180,12 +180,12 @@ class SheppardWeirComAdapter(BaseSiteAdapter): # XXX
# Reviews
reviewdata = soup.find('div', {'id' : 'sort'})
a = reviewdata.findAll('a', href=re.compile(r'reviews.php\?type=ST&(amp;)?item='+self.story.getMetadata('storyId')+"$"))[1] # second one.
a = reviewdata.find_all('a', href=re.compile(r'reviews.php\?type=ST&(amp;)?item='+self.story.getMetadata('storyId')+"$"))[1] # second one.
self.story.setMetadata('reviews',stripHTML(a))
# Find the chapters:
for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+r"&chapter=\d+$")):
for chapter in soup.find_all('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+r"&chapter=\d+$")):
# just in case there's tags, like <i> in chapter titles.
self.add_chapter(chapter,'https://'+self.host+'/fanfics/'+chapter['href']+addurl)
@ -208,7 +208,7 @@ class SheppardWeirComAdapter(BaseSiteAdapter): # XXX
self.setDescription(url,self.make_soup(summarydata))
# <span class="label">Rated:</span> NC-17<br /> etc
labels = soup.findAll('span',{'class':'label'})
labels = soup.find_all('span',{'class':'label'})
for labelspan in labels:
value = labelspan.nextSibling
label = labelspan.string
@ -220,13 +220,13 @@ class SheppardWeirComAdapter(BaseSiteAdapter): # XXX
self.story.setMetadata('numWords', value)
if 'Categories' in label:
cats = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=categories'))
cats = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=categories'))
catstext = [cat.string for cat in cats]
for cat in catstext:
self.story.addToList('category',cat.string)
if 'Characters' in label:
chars = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=characters'))
chars = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=characters'))
charstext = [char.string for char in chars]
for char in charstext:
self.story.addToList('characters',char.string)
@ -235,7 +235,7 @@ class SheppardWeirComAdapter(BaseSiteAdapter): # XXX
## leaving it in. Check to make sure the type_id number
## is correct, though--it's site specific.
if 'Genre' in label:
genres = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=1')) # XXX
genres = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=class&type_id=1')) # XXX
genrestext = [genre.string for genre in genres]
self.genre = ', '.join(genrestext)
for genre in genrestext:
@ -245,7 +245,7 @@ class SheppardWeirComAdapter(BaseSiteAdapter): # XXX
## leaving it in. Check to make sure the type_id number
## is correct, though--it's site specific.
if 'Warnings' in label:
warnings = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=2')) # XXX
warnings = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=class&type_id=2')) # XXX
warningstext = [warning.string for warning in warnings]
self.warning = ', '.join(warningstext)
for warning in warningstext:
@ -273,7 +273,7 @@ class SheppardWeirComAdapter(BaseSiteAdapter): # XXX
series_url = 'https://'+self.host+'/fanfics/'+a['href']
seriessoup = self.make_soup(self.get_request(series_url))
storyas = seriessoup.findAll('a', href=re.compile(r'^viewstory.php\?sid=\d+$'))
storyas = seriessoup.find_all('a', href=re.compile(r'^viewstory.php\?sid=\d+$'))
i=1
for a in storyas:
if a['href'] == ('viewstory.php?sid='+self.story.getMetadata('storyId')):

View file

@ -109,7 +109,7 @@ class SiyeCoUkAdapter(BaseSiteAdapter): # XXX
self.story.setMetadata('title',stripHTML(titlea))
# Find the chapters (from soup, not authsoup):
for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+r"&chapter=\d+$")):
for chapter in soup.find_all('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+r"&chapter=\d+$")):
# just in case there's tags, like <i> in chapter titles.
self.add_chapter(chapter,'https://'+self.host+'/siye/'+chapter['href'])
@ -121,7 +121,7 @@ class SiyeCoUkAdapter(BaseSiteAdapter): # XXX
metatable = soup.find('table',{'width':'95%'})
# Categories
cat_as = metatable.findAll('a', href=re.compile(r'categories.php'))
cat_as = metatable.find_all('a', href=re.compile(r'categories.php'))
for cat_a in cat_as:
self.story.addToList('category',stripHTML(cat_a))
@ -209,7 +209,7 @@ class SiyeCoUkAdapter(BaseSiteAdapter): # XXX
series_url = 'https://'+self.host+'/'+a['href']
seriessoup = self.make_soup(self.get_request(series_url))
storyas = seriessoup.findAll('a', href=re.compile(r'^viewstory.php\?sid=\d+$'))
storyas = seriessoup.find_all('a', href=re.compile(r'^viewstory.php\?sid=\d+$'))
i=1
for a in storyas:
if a['href'] == ('viewstory.php?sid='+self.story.getMetadata('storyId')):

View file

@ -99,7 +99,7 @@ class SpiritFanfictionComAdapter(BaseSiteAdapter):
# Authors
# Find authorid and URL
authors = (title.find_next('div', {'class':'left'})).findAll('span', {'class':'usuario'})
authors = (title.find_next('div', {'class':'left'})).find_all('span', {'class':'usuario'})
for author in authors:
self.story.addToList('authorId', author.find('a')['href'].split('/')[-1])
@ -114,10 +114,10 @@ class SpiritFanfictionComAdapter(BaseSiteAdapter):
newestChapter = None
self.newestChapterNum = None # save for comparing during update.
# Find the chapters:
chapters = soup.findAll('table', {'class':'listagemCapitulos espacamentoTop'})
chapters = soup.find_all('table', {'class':'listagemCapitulos espacamentoTop'})
for chapter in chapters:
for row in chapter.findAll('tr', {'class': 'listagem-textoBg1'}): # Find each row with chapter info
for row in chapter.find_all('tr', {'class': 'listagem-textoBg1'}): # Find each row with chapter info
a = row.find('a') # Chapter link
# Datetime

View file

@ -93,7 +93,7 @@ class StoriesOfArdaComAdapter(BaseSiteAdapter):
self.story.setMetadata('title',stripHTML(a))
# Find the chapters: chapterview.asp?sid=7000&cid=30919
chapters=soup.findAll('a', href=re.compile(r'chapterview.asp\?sid='+self.story.getMetadata('storyId')+r"&cid=\d+$"))
chapters=soup.find_all('a', href=re.compile(r'chapterview.asp\?sid='+self.story.getMetadata('storyId')+r"&cid=\d+$"))
if len(chapters)==1:
self.add_chapter(self.story.getMetadata('title'),'http://'+self.host+'/'+chapters[0]['href'])
else:
@ -109,14 +109,14 @@ class StoriesOfArdaComAdapter(BaseSiteAdapter):
# no convenient way to get word count
for td in asoup.findAll('td', {'colspan' : '3'}):
for td in asoup.find_all('td', {'colspan' : '3'}):
if td.find('a', href=re.compile(r'chapterlistview.asp\?SID='+self.story.getMetadata('storyId'))) != None:
break
td=td.nextSibling.nextSibling
self.story.setMetadata('dateUpdated', makeDate(stripHTML(td).split(': ')[1], self.dateformat))
try:
tr=td.parent.nextSibling.nextSibling.nextSibling.nextSibling
td=tr.findAll('td')
td=tr.find_all('td')
self.story.setMetadata('rating', td[0].string.split(': ')[1])
self.story.setMetadata('status', td[2].string.split(': ')[1])
self.story.setMetadata('datePublished', makeDate(stripHTML(td[4]).split(': ')[1], self.dateformat))

View file

@ -230,7 +230,7 @@ class StoriesOnlineNetAdapter(BaseSiteAdapter):
self.story.setMetadata('title',stripHTML(a))
authfrom = soup.find('footer')
alist = authfrom.findAll('a', {'rel' : 'author'})
alist = authfrom.find_all('a', {'rel' : 'author'})
for a in alist:
self.story.addToList('authorId',a['href'].split('/')[2])
self.story.addToList('authorUrl','https://'+self.host+a['href'])
@ -298,7 +298,7 @@ class StoriesOnlineNetAdapter(BaseSiteAdapter):
self.has_universes = False
title_cell = story_row.find('td', {'class' : 'lc2'})
for cat in title_cell.findAll('div', {'class' : 'typediv'}):
for cat in title_cell.find_all('div', {'class' : 'typediv'}):
self.story.addToList('genre',cat.text)
# in lieu of word count.
@ -382,7 +382,7 @@ class StoriesOnlineNetAdapter(BaseSiteAdapter):
universes_soup = self.make_soup(self.get_request(universe_url) )
# logger.debug("Universe url='{0}'".format(universe_url))
if universes_soup:
universes = universes_soup.findAll('div', {'class' : 'ser-box'})
universes = universes_soup.find_all('div', {'class' : 'ser-box'})
# logger.debug("Number of Universes: %d" % len(universes))
for universe in universes:
# logger.debug("universe.find('a')={0}".format(universe.find('a')))
@ -477,7 +477,7 @@ class StoriesOnlineNetAdapter(BaseSiteAdapter):
return value
def parseOtherAttributes(self, other_attribute_element):
for b in other_attribute_element.findAll('b'):
for b in other_attribute_element.find_all('b'):
#logger.debug('Getting metadata: "%s"' % b)
label = b.text
if label in ['Posted:', 'Concluded:', 'Updated:']:
@ -576,7 +576,7 @@ class StoriesOnlineNetAdapter(BaseSiteAdapter):
if pager != None:
urls=pager.findAll('a')
urls=pager.find_all('a')
urls=urls[:len(urls)-1]
# logger.debug("pager urls:%s"%urls)
pager.extract()
@ -630,7 +630,7 @@ class StoriesOnlineNetAdapter(BaseSiteAdapter):
# putting a 'conTag' at the *top* now, too. So this
# was nuking every page but the first and last. Now
# only if 'Continues'
for contag in pagetag.findAll('span', {'class' : 'conTag'}):
for contag in pagetag.find_all('span', {'class' : 'conTag'}):
# remove everything after continues...
if 'Continuation' in contag.text:
tag = contag
@ -659,7 +659,7 @@ class StoriesOnlineNetAdapter(BaseSiteAdapter):
# If it is a chapter, there are dates at the start for when it was posted or modified. These plus
# everything before them can be discarded.
postedDates = pagetag.findAll('div', {'class' : 'date'})
postedDates = pagetag.find_all('div', {'class' : 'date'})
# logger.debug(postedDates)
if postedDates:
a = postedDates[0].previousSibling
@ -668,7 +668,7 @@ class StoriesOnlineNetAdapter(BaseSiteAdapter):
b = a.previousSibling
a.extract()
a = b
for a in pagetag.findAll('div', {'class' : 'date'}):
for a in pagetag.find_all('div', {'class' : 'date'}):
a.extract()
# Kill the vote form and everything after it.

View file

@ -61,7 +61,7 @@ class SwiOrgRuAdapter(BaseSiteAdapter):
soup = self.make_soup(data)
title = soup.find('h1')
for tag in title.findAll('sup'):
for tag in title.find_all('sup'):
tag.extract()
self.story.setMetadata('title', stripHTML(title.text))
@ -91,7 +91,7 @@ class SwiOrgRuAdapter(BaseSiteAdapter):
if "NC-18" in rating:
raise exceptions.AdultCheckRequired(self.url)
characters = soup.findAll('img', src=re.compile(r"/mlp-fim/img/chars/\d+.png"))
characters = soup.find_all('img', src=re.compile(r"/mlp-fim/img/chars/\d+.png"))
logger.debug("numCharacters: (%s)"%str(len(characters)))
for x in range(0,len(characters)):
@ -119,7 +119,7 @@ class SwiOrgRuAdapter(BaseSiteAdapter):
self.story.setMetadata('language','Russian')
chapters=chapters_table.findAll('a', href=re.compile(r'/mlp-fim/story/'+self.story.getMetadata('storyId')+r"/chapter\d+"))
chapters=chapters_table.find_all('a', href=re.compile(r'/mlp-fim/story/'+self.story.getMetadata('storyId')+r"/chapter\d+"))
self.story.setMetadata('numChapters', len(chapters))
logger.debug("numChapters: (%s)"%str(self.story.getMetadata('numChapters')))

View file

@ -131,7 +131,7 @@ class TenhawkPresentsSiteAdapter(BaseSiteAdapter):
self.story.setMetadata('author',a.string)
# Find the chapters:
for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+r"&chapter=\d+$")):
for chapter in soup.find_all('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+r"&chapter=\d+$")):
# just in case there's tags, like <i> in chapter titles.
self.add_chapter(chapter,'http://'+self.host+'/'+chapter['href']+addurl)
@ -143,7 +143,7 @@ class TenhawkPresentsSiteAdapter(BaseSiteAdapter):
return ""
# <span class="label">Rated:</span> NC-17<br /> etc
labels = soup.findAll('span',{'class':'label'})
labels = soup.find_all('span',{'class':'label'})
for labelspan in labels:
value = labelspan.nextSibling
label = labelspan.string
@ -164,19 +164,19 @@ class TenhawkPresentsSiteAdapter(BaseSiteAdapter):
self.story.setMetadata('numWords', value)
if 'Categories' in label:
cats = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=categories'))
cats = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=categories'))
catstext = [cat.string for cat in cats]
for cat in catstext:
self.story.addToList('category',cat.string)
if 'Characters' in label:
chars = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=characters'))
chars = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=characters'))
charstext = [char.string for char in chars]
for char in charstext:
self.story.addToList('characters',char.string)
if 'Genre' in label:
genres = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class'))
genres = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=class'))
genrestext = [genre.string for genre in genres]
self.genre = ', '.join(genrestext)
for genre in genrestext:
@ -203,7 +203,7 @@ class TenhawkPresentsSiteAdapter(BaseSiteAdapter):
series_url = 'http://'+self.host+'/'+a['href']
seriessoup = self.make_soup(self.get_request(series_url))
storyas = seriessoup.findAll('a', href=re.compile(r'^viewstory.php\?sid=\d+$'))
storyas = seriessoup.find_all('a', href=re.compile(r'^viewstory.php\?sid=\d+$'))
i=1
for a in storyas:
if a['href'] == ('viewstory.php?sid='+self.story.getMetadata('storyId')):

View file

@ -168,7 +168,7 @@ class TheMasqueNetAdapter(BaseSiteAdapter):
self.story.setMetadata('author',a.string)
# Find the chapters:
for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+r"&chapter=\d+$")):
for chapter in soup.find_all('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+r"&chapter=\d+$")):
# just in case there's tags, like <i> in chapter titles.
self.add_chapter(chapter,'https://'+self.host + self.section + chapter['href']+addurl)
@ -186,7 +186,7 @@ class TheMasqueNetAdapter(BaseSiteAdapter):
# summary, rated, word count, categories, characters, genre, warnings, completed, published, updated, seires
# <span class="label">Rated:</span> NC-17<br /> etc
labels = soup.findAll('span',{'class':'label'})
labels = soup.find_all('span',{'class':'label'})
for labelspan in labels:
value = labelspan.nextSibling
label = labelspan.text
@ -207,22 +207,22 @@ class TheMasqueNetAdapter(BaseSiteAdapter):
self.story.setMetadata('numWords', value)
if 'Categories' in label:
cats = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=categories'))
cats = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=categories'))
for cat in cats:
self.story.addToList('category',cat.string)
if 'Characters' in label:
chars = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=characters'))
chars = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=characters'))
for char in chars:
self.story.addToList('characters',char.string)
if 'Genre' in label:
genres = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=1'))
genres = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=class&type_id=1'))
for genre in genres:
self.story.addToList('genre',genre.string)
if 'Warnings' in label:
warnings = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=2'))
warnings = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=class&type_id=2'))
for warning in warnings:
self.story.addToList('warnings',warning.string)

View file

@ -199,14 +199,14 @@ class TwistingTheHellmouthSiteAdapter(BaseSiteAdapter):
infodata = self.get_request(infourl)
infosoup = self.make_soup(infodata)
# for a in infosoup.findAll('a',href=re.compile(r"^/Author-\d+")):
# for a in infosoup.find_all('a',href=re.compile(r"^/Author-\d+")):
# self.story.addToList('authorId',a['href'].split('/')[1].split('-')[1])
# self.story.addToList('authorUrl','https://'+self.host+a['href'].replace("/Author-","/AuthorStories-"))
# self.story.addToList('author',stripHTML(a))
# second verticaltable is the chapter list.
table = infosoup.findAll('table',{'class':'verticaltable'})[1]
for a in table.findAll('a',href=re.compile(r"^/Story-"+self.story.getMetadata('storyId'))):
table = infosoup.find_all('table',{'class':'verticaltable'})[1]
for a in table.find_all('a',href=re.compile(r"^/Story-"+self.story.getMetadata('storyId'))):
autha = a.findNext('a',href=re.compile(r"^/Author-\d+"))
self.story.addToList('authorId',autha['href'].split('/')[1].split('-')[1])
self.story.addToList('authorUrl','https://'+self.host+autha['href'].replace("/Author-","/AuthorStories-"))
@ -224,7 +224,7 @@ class TwistingTheHellmouthSiteAdapter(BaseSiteAdapter):
# no selector found, so it's a one-chapter story.
self.add_chapter(self.story.getMetadata('title'),url)
else:
allOptions = select.findAll('option')
allOptions = select.find_all('option')
for o in allOptions:
url = "https://"+self.host+o['value']
# just in case there's tags, like <i> in chapter titles.
@ -237,7 +237,7 @@ class TwistingTheHellmouthSiteAdapter(BaseSiteAdapter):
BtVSNonX = False
char=None
romance=False
for cat in verticaltable.findAll('a', href=re.compile(r"^/Category-")):
for cat in verticaltable.find_all('a', href=re.compile(r"^/Category-")):
# assumes only one -Centered and one Pairing: cat can ever
# be applied to one story.
# Seen at least once: incorrect (empty) cat link, thus "and cat.string"
@ -265,7 +265,7 @@ class TwistingTheHellmouthSiteAdapter(BaseSiteAdapter):
if 'BtVS/AtS Non-Crossover' == cat.string:
BtVSNonX = True
verticaltabletds = verticaltable.findAll('td')
verticaltabletds = verticaltable.find_all('td')
self.story.setMetadata('rating', verticaltabletds[2].string)
self.story.setMetadata('numWords', verticaltabletds[4].string)
@ -279,7 +279,7 @@ class TwistingTheHellmouthSiteAdapter(BaseSiteAdapter):
self.story.setMetadata('datePublished',makeDate(stripHTML(verticaltabletds[8].string), self.dateformat))
self.story.setMetadata('dateUpdated',makeDate(stripHTML(verticaltabletds[9].string), self.dateformat))
for icon in storydiv.find('span',{'class':'storyicons'}).findAll('img'):
for icon in storydiv.find('span',{'class':'storyicons'}).find_all('img'):
if( icon['title'] not in ['Non-Crossover'] ) :
self.story.addToList('genre',icon['title'])
else:

View file

@ -127,7 +127,7 @@ class TwilightedNetSiteAdapter(BaseSiteAdapter):
self.story.setMetadata('author',a.string)
# Find the chapters:
for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+r"&chapter=\d+$")):
for chapter in soup.find_all('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+r"&chapter=\d+$")):
# just in case there's tags, like <i> in chapter titles.
self.add_chapter(chapter,'https://'+self.host+'/'+chapter['href'])
@ -139,7 +139,7 @@ class TwilightedNetSiteAdapter(BaseSiteAdapter):
return ""
# <span class="label">Rated:</span> NC-17<br /> etc
labels = soup.findAll('span',{'class':'label'})
labels = soup.find_all('span',{'class':'label'})
for labelspan in labels:
value = labelspan.nextSibling
label = labelspan.string
@ -159,20 +159,20 @@ class TwilightedNetSiteAdapter(BaseSiteAdapter):
self.story.setMetadata('numWords', value)
if 'Categories' in label:
cats = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=categories'))
cats = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=categories'))
catstext = [cat.string for cat in cats]
for cat in catstext:
self.story.addToList('category',cat.string)
if 'Characters' in label:
chars = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=characters'))
chars = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=characters'))
charstext = [char.string for char in chars]
for char in charstext:
self.story.addToList('characters',char.string)
## twilighted.net doesn't use genre.
# if 'Genre' in label:
# genres = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class'))
# genres = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=class'))
# genrestext = [genre.string for genre in genres]
# self.genre = ', '.join(genrestext)
# for genre in genrestext:
@ -199,7 +199,7 @@ class TwilightedNetSiteAdapter(BaseSiteAdapter):
series_url = 'https://'+self.host+'/'+a['href']
seriessoup = self.make_soup(self.get_request(series_url))
storyas = seriessoup.findAll('a', href=re.compile(r'^viewstory.php\?sid=\d+$'))
storyas = seriessoup.find_all('a', href=re.compile(r'^viewstory.php\?sid=\d+$'))
i=1
for a in storyas:
if a['href'] == ('viewstory.php?sid='+self.story.getMetadata('storyId')):

View file

@ -111,7 +111,7 @@ class WalkingThePlankOrgAdapter(BaseSiteAdapter):
self.story.setMetadata('author',a.string)
# Find the chapters:
for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+r"&chapter=\d+$")):
for chapter in soup.find_all('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+r"&chapter=\d+$")):
# just in case there's tags, like <i> in chapter titles.
self.add_chapter(chapter,'http://'+self.host+'/archive/'+chapter['href']+addurl)
@ -126,7 +126,7 @@ class WalkingThePlankOrgAdapter(BaseSiteAdapter):
except:
return ""
labels = soup.findAll('span',{'class':'label'})
labels = soup.find_all('span',{'class':'label'})
for labelspan in labels:
value = labelspan.nextSibling
label = labelspan.string
@ -150,24 +150,24 @@ class WalkingThePlankOrgAdapter(BaseSiteAdapter):
self.story.setMetadata('reads', value)
if 'Categories' in label:
cats = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=categories'))
cats = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=categories'))
catstext = [cat.string for cat in cats]
for cat in catstext:
self.story.addToList('category',cat.string)
if 'Characters' in label:
chars = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=characters'))
chars = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=characters'))
charstext = [char.string for char in chars]
for char in charstext:
self.story.addToList('characters',char.string)
if 'Genre' in label:
genres = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=1'))
genres = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=class&type_id=1'))
for genre in genres:
self.story.addToList('genre',genre.string)
if 'Warnings' in label:
warnings = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=2')) # XXX
warnings = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=class&type_id=2')) # XXX
for warning in warnings:
self.story.addToList('warnings',warning.string)
@ -190,7 +190,7 @@ class WalkingThePlankOrgAdapter(BaseSiteAdapter):
series_url = 'http://'+self.host+'/archive/'+a['href']
seriessoup = self.make_soup(self.get_request(series_url))
storyas = seriessoup.findAll('a', href=re.compile(r'^viewstory.php\?sid=\d+$'))
storyas = seriessoup.find_all('a', href=re.compile(r'^viewstory.php\?sid=\d+$'))
i=1
for a in storyas:
if a['href'] == ('viewstory.php?sid='+self.story.getMetadata('storyId')):

View file

@ -80,7 +80,7 @@ class WhoficComSiteAdapter(BaseSiteAdapter):
# no selector found, so it's a one-chapter story.
self.add_chapter(self.story.getMetadata('title'),url)
else:
allOptions = select.findAll('option')
allOptions = select.find_all('option')
for o in allOptions:
url = self.url + "&chapter=%s" % o['value']
# just in case there's tags, like <i> in chapter titles.
@ -178,7 +178,7 @@ class WhoficComSiteAdapter(BaseSiteAdapter):
series_url = 'https://'+self.host+'/'+a['href']
try:
seriessoup = self.make_soup(self.get_request(series_url))
storyas = seriessoup.findAll('a', href=re.compile(r'^viewstory.php\?sid=\d+$'))
storyas = seriessoup.find_all('a', href=re.compile(r'^viewstory.php\?sid=\d+$'))
i=1
for a in storyas:
if a['href'] == ('viewstory.php?sid='+self.story.getMetadata('storyId')):

View file

@ -100,7 +100,7 @@ class WolverineAndRogueComAdapter(BaseSiteAdapter):
self.story.setMetadata('rating', rating)
# Find the chapters:
for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+r"&chapter=\d+$")):
for chapter in soup.find_all('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+r"&chapter=\d+$")):
# just in case there's tags, like <i> in chapter titles.
self.add_chapter(chapter,'https://'+self.host+'/wrfa/'+chapter['href'])
@ -110,7 +110,7 @@ class WolverineAndRogueComAdapter(BaseSiteAdapter):
# <span class="label">Rated:</span> NC-17<br /> etc
content=soup.find('div',{'class' : 'content'})
labels = soup.findAll('span',{'class':'label'})
labels = soup.find_all('span',{'class':'label'})
value = labels[0].previousSibling
svalue = ""
@ -134,22 +134,22 @@ class WolverineAndRogueComAdapter(BaseSiteAdapter):
self.story.setMetadata('numWords', value.split(' -')[0])
if 'Categories' in label:
cats = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=categories'))
cats = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=categories'))
for cat in cats:
self.story.addToList('category',cat.string)
if 'Characters' in label:
chars = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=characters'))
chars = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=characters'))
for char in chars:
self.story.addToList('characters',char.string)
if 'Genre' in label:
genres = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=1'))
genres = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=class&type_id=1'))
for genre in genres:
self.story.addToList('genre',genre.string)
if 'Warnings' in label:
warnings = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=2'))
warnings = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=class&type_id=2'))
for warning in warnings:
self.story.addToList('warnings',warning.string)
@ -173,7 +173,7 @@ class WolverineAndRogueComAdapter(BaseSiteAdapter):
seriessoup = self.make_soup(self.get_request(series_url))
# can't use ^viewstory...$ in case of higher rated stories with javascript href.
storyas = seriessoup.findAll('a', href=re.compile(r'viewstory.php\?sid=\d+'))
storyas = seriessoup.find_all('a', href=re.compile(r'viewstory.php\?sid=\d+'))
i=1
for a in storyas:
# skip 'report this' and 'TOC' links

View file

@ -268,7 +268,7 @@ class WWWNovelAllComAdapter(BaseSiteAdapter):
tag.extract()
# Some tags have non-standard tag name.
for tag in story.findAll(recursive=True):
for tag in story.find_all(recursive=True):
if tag.name not in HTML_TAGS:
tag.name = 'span'

View file

@ -127,7 +127,7 @@ class WWWUtopiastoriesComAdapter(BaseSiteAdapter):
self.story.setMetadata('status', 'Completed')
for detail in soup.findAll('li'):
for detail in soup.find_all('li'):
det = unicode(detail).replace(u"\xa0",'')
heading = stripHTML(det).split(' - ')[0]
text = stripHTML(det).replace(heading+' - ','')
@ -180,18 +180,18 @@ class WWWUtopiastoriesComAdapter(BaseSiteAdapter):
logger.debug('Using the html retrieved previously from: %s' % url)
story = self.html.findAll('table')[0].findAll('td')[0].find('div')
story = self.html.find_all('table')[0].find_all('td')[0].find('div')
if None == story:
raise exceptions.FailedToDownload(
"Error downloading Chapter: %s! Missing required element!" % url)
## Removing the scripts, tables, links and divs from the story
for tag in (story.findAll('script') + story.findAll('table') + story.findAll('a') +
story.findAll('div')):
for tag in (story.find_all('script') + story.find_all('table') + story.find_all('a') +
story.find_all('div')):
tag.extract()
#strip comments from story
[comment.extract() for comment in story.findAll(string=lambda text:isinstance(text, Comment))]
[comment.extract() for comment in story.find_all(string=lambda text:isinstance(text, Comment))]
return self.utf8FromSoup(url,story)

View file

@ -803,7 +803,7 @@ class BaseSiteAdapter(Requestable):
# show up differently and doing stripHTML() also
# catches <br> etc.
soup = BeautifulSoup(unicode(soup),'html5lib')
for t in soup.findAll(recursive=True):
for t in soup.find_all(recursive=True):
for attr in self.get_attr_keys(t):
if attr not in acceptable_attributes:
del t[attr] ## strip all tag attributes except acceptable_attributes

View file

@ -235,7 +235,7 @@ class BaseEfictionAdapter(BaseSiteAdapter):
soup = self.make_soup(html)
## fix all local image 'src' to absolute
for img in soup.findAll("img", {"src": _REGEX_DOESNT_START_WITH_HTTP}):
for img in soup.find_all("img", {"src": _REGEX_DOESNT_START_WITH_HTTP}):
# TODO handle '../../' and so on
if img['src'].startswith('/'):
img['src'] = img['src'][1:]
@ -410,13 +410,13 @@ class BaseEfictionAdapter(BaseSiteAdapter):
if pagetitleDiv.find('a') is None:
raise exceptions.FailedToDownload("Couldn't find title and author")
self.story.setMetadata('title', stripHTML(pagetitleDiv.find("a")))
authorLink = pagetitleDiv.findAll("a")[1]
authorLink = pagetitleDiv.find_all("a")[1]
self.story.setMetadata('author', stripHTML(authorLink))
self.story.setMetadata('authorId', re.search(r"\d+", authorLink['href']).group(0))
self.story.setMetadata('authorUrl', self.getViewUserUrl(self.story.getMetadata('authorId')))
## Parse the infobox
labelSpans = soup.find("div", "infobox").find("div", "content").findAll("span", "label")
labelSpans = soup.find("div", "infobox").find("div", "content").find_all("span", "label")
for labelSpan in labelSpans:
valueStr = ""
nextEl = labelSpan.nextSibling

View file

@ -190,10 +190,10 @@ class BaseOTWAdapter(BaseSiteAdapter):
raise exceptions.FailedToDownload('Site says: "Sorry, you don\'t have permission to access the page you were trying to reach."')
soup = self.make_soup(data)
for tag in soup.findAll('div',id='admin-banner'):
for tag in soup.find_all('div',id='admin-banner'):
tag.extract()
metasoup = self.make_soup(meta)
for tag in metasoup.findAll('div',id='admin-banner'):
for tag in metasoup.find_all('div',id='admin-banner'):
tag.extract()
@ -234,7 +234,7 @@ class BaseOTWAdapter(BaseSiteAdapter):
self.story.setMetadata('restricted','Restricted')
# Find authorid and URL from... author url.
alist = soup.findAll('a', href=re.compile(r"/users/\w+/pseuds/.+"))
alist = soup.find_all('a', href=re.compile(r"/users/\w+/pseuds/.+"))
if len(alist) < 1: # ao3 allows for author 'Anonymous' with no author link.
self.story.setMetadata('author','Anonymous')
self.story.setMetadata('authorUrl','https://' + self.getSiteDomain() + '/')
@ -267,7 +267,7 @@ class BaseOTWAdapter(BaseSiteAdapter):
# change the dates of earlier ones by editing them--That WILL
# break epub update.
# Find the chapters:
chapters=soup.findAll('a', href=re.compile(r'/works/'+self.story.getMetadata('storyId')+r"/chapters/\d+$"))
chapters=soup.find_all('a', href=re.compile(r'/works/'+self.story.getMetadata('storyId')+r"/chapters/\d+$"))
self.story.setMetadata('numChapters',len(chapters))
logger.debug("numChapters: (%s)"%self.story.getMetadata('numChapters'))
if len(chapters)==1:
@ -300,50 +300,50 @@ class BaseOTWAdapter(BaseSiteAdapter):
a = metasoup.find('dd',{'class':"fandom tags"})
if a != None:
fandoms = a.findAll('a',{'class':"tag"})
fandoms = a.find_all('a',{'class':"tag"})
for fandom in fandoms:
self.story.addToList('fandoms',fandom.string)
a = metasoup.find('dd',{'class':"warning tags"})
if a != None:
warnings = a.findAll('a',{'class':"tag"})
warnings = a.find_all('a',{'class':"tag"})
for warning in warnings:
self.story.addToList('warnings',warning.string)
a = metasoup.find('dd',{'class':"freeform tags"})
if a != None:
genres = a.findAll('a',{'class':"tag"})
genres = a.find_all('a',{'class':"tag"})
for genre in genres:
self.story.addToList('freeformtags',genre.string)
a = metasoup.find('dd',{'class':"category tags"})
if a != None:
genres = a.findAll('a',{'class':"tag"})
genres = a.find_all('a',{'class':"tag"})
for genre in genres:
if genre != "Gen":
self.story.addToList('ao3categories',genre.string)
a = metasoup.find('dd',{'class':"character tags"})
if a != None:
chars = a.findAll('a',{'class':"tag"})
chars = a.find_all('a',{'class':"tag"})
for char in chars:
self.story.addToList('characters',char.string)
a = metasoup.find('dd',{'class':"relationship tags"})
if a != None:
ships = a.findAll('a',{'class':"tag"})
ships = a.find_all('a',{'class':"tag"})
for ship in ships:
self.story.addToList('ships',ship.string)
a = metasoup.find('dd',{'class':"collections"})
if a != None:
collections = a.findAll('a')
collections = a.find_all('a')
for collection in collections:
self.story.addToList('collections',collection.string)
stats = metasoup.find('dl',{'class':'stats'})
dt = stats.findAll('dt')
dd = stats.findAll('dd')
dt = stats.find_all('dt')
dd = stats.find_all('dd')
for x in range(0,len(dt)):
label = dt[x].text
value = dd[x].text
@ -386,7 +386,7 @@ class BaseOTWAdapter(BaseSiteAdapter):
ddseries = metasoup.find('dd',{'class':"series"})
if ddseries:
for i, a in enumerate(ddseries.findAll('a', href=re.compile(r"/series/\d+"))):
for i, a in enumerate(ddseries.find_all('a', href=re.compile(r"/series/\d+"))):
series_name = stripHTML(a)
series_url = 'https://'+self.host+a['href']
series_index = int(stripHTML(a.previousSibling).replace(', ','').split(' ')[1]) # "Part # of" or ", Part #"

View file

@ -377,7 +377,7 @@ class BaseXenForo2ForumAdapter(BaseSiteAdapter):
return
def get_forumtags(self,topsoup):
return topsoup.find('div',{'class':'p-description'}).findAll('a',{'class':'tagItem'})
return topsoup.find('div',{'class':'p-description'}).find_all('a',{'class':'tagItem'})
def parse_author(self,souptag):
user = souptag.find('section',{'class':'message-user'})

View file

@ -73,11 +73,11 @@ def get_update_data(inputio,
break
soup = make_soup(oldcoverhtmldata.decode("utf-8"))
# first img or image tag.
imgs = soup.findAll('img')
imgs = soup.find_all('img')
if imgs:
src = get_path_part(href)+imgs[0]['src']
else:
imgs = soup.findAll('image')
imgs = soup.find_all('image')
if imgs:
src=get_path_part(href)+imgs[0]['xlink:href']
@ -128,7 +128,7 @@ def get_update_data(inputio,
# 3/OEBPS/file0005_u3.xhtml etc.
if getsoups:
soup = make_soup(epub.read(href).decode("utf-8"))
for img in soup.findAll('img'):
for img in soup.find_all('img'):
newsrc=''
longdesc=''
## skip <img src="data:image..."
@ -159,7 +159,7 @@ def get_update_data(inputio,
if h2:
h2.extract()
for skip in bodysoup.findAll(attrs={'class':'skip_on_ffdl_update'}):
for skip in bodysoup.find_all(attrs={'class':'skip_on_ffdl_update'}):
skip.extract()
## <meta name="chapterurl" content="${url}"></meta>
@ -232,7 +232,7 @@ def get_story_url_from_epub_html(inputio,_is_good_url=None):
if( item.getAttribute("media-type") == "application/xhtml+xml" ):
filehref=relpath+item.getAttribute("href")
soup = make_soup(epub.read(filehref).decode("utf-8"))
for link in soup.findAll('a',href=re.compile(r'^http.*')):
for link in soup.find_all('a',href=re.compile(r'^http.*')):
ahref=link['href']
# print("href:(%s)"%ahref)
# hack for bad ficsaver ffnet URLs.
@ -277,7 +277,7 @@ def get_story_url_from_zip_html(inputio,_is_good_url=None):
except UnicodeDecodeError:
# calibre converted to html zip fails with decode.
soup = make_soup(zipf.read(item))
for link in soup.findAll('a',href=re.compile(r'^http.*')):
for link in soup.find_all('a',href=re.compile(r'^http.*')):
ahref=link['href']
# print("href:(%s)"%ahref)
if _is_good_url == None or _is_good_url(ahref):

View file

@ -71,7 +71,7 @@ def get_urls_from_html(data,url=None,configuration=None,normalize=False,foremail
# logger.debug("dbl souping")
soup = BeautifulSoup(unicode(BeautifulSoup(data,"html5lib")),"html5lib")
for a in soup.findAll('a'):
for a in soup.find_all('a'):
if a.has_attr('href'):
# logger.debug("a['href']:%s"%a['href'])
href = form_url(url,a['href'])

View file

@ -59,9 +59,9 @@ class HtmlProcessor:
self._anchor_references = []
anchor_num = 0
# anchor links
anchorlist = self._soup.findAll('a', href=re.compile('^#'))
anchorlist = self._soup.find_all('a', href=re.compile('^#'))
# treat reference tags like a tags for TOCTOP.
anchorlist.extend(self._soup.findAll('reference', href=re.compile('^#')))
anchorlist.extend(self._soup.find_all('reference', href=re.compile('^#')))
for anchor in anchorlist:
self._anchor_references.append((anchor_num, anchor['href']))
anchor['filepos'] = '%.10d' % anchor_num
@ -99,7 +99,7 @@ class HtmlProcessor:
def _FixPreTags(self):
'''Replace <pre> tags with HTML-ified text.'''
pres = self._soup.findAll('pre')
pres = self._soup.find_all('pre')
for pre in pres:
pre.replaceWith(self._FixPreContents(unicode(pre.contents[0])))
@ -120,15 +120,15 @@ class HtmlProcessor:
# TODO(chatham): <link> tags to script?
unsupported_tags = ('script', 'style')
for tag_type in unsupported_tags:
for element in self._soup.findAll(tag_type):
for element in self._soup.find_all(tag_type):
element.extract()
def RenameAnchors(self, prefix):
'''Rename every internal anchor to have the given prefix, then
return the contents of the body tag.'''
for anchor in self._soup.findAll('a', href=re.compile('^#')):
for anchor in self._soup.find_all('a', href=re.compile('^#')):
anchor['href'] = '#' + prefix + anchor['href'][1:]
for a in self._soup.findAll('a'):
for a in self._soup.find_all('a'):
if a.get('name'):
a['name'] = prefix + a['name']