mirror of
https://github.com/JimmXinu/FanFicFare.git
synced 2025-12-06 08:52:55 +01:00
Migrate to new bs4 API
Signed-off-by: Emmanuel Ferdman <emmanuelferdman@gmail.com>
This commit is contained in:
parent
3edd3c3e7b
commit
aca07bbf59
57 changed files with 291 additions and 291 deletions
|
|
@ -171,7 +171,7 @@ class AdultFanFictionOrgAdapter(BaseSiteAdapter):
|
|||
# params['submit'] = 'Login'
|
||||
|
||||
# # copy all hidden input tags to pick up appropriate tokens.
|
||||
# for tag in soup.findAll('input',{'type':'hidden'}):
|
||||
# for tag in soup.find_all('input',{'type':'hidden'}):
|
||||
# params[tag['name']] = tag['value']
|
||||
|
||||
# logger.debug("Will now login to URL {0} as {1} with password: {2}".format(url, params['email'],params['pass1']))
|
||||
|
|
@ -218,7 +218,7 @@ class AdultFanFictionOrgAdapter(BaseSiteAdapter):
|
|||
|
||||
# Find the chapters:
|
||||
chapters = soup.find('ul',{'class':'dropdown-content'})
|
||||
for i, chapter in enumerate(chapters.findAll('a')):
|
||||
for i, chapter in enumerate(chapters.find_all('a')):
|
||||
self.add_chapter(chapter,self.url+'&chapter='+unicode(i+1))
|
||||
|
||||
|
||||
|
|
@ -262,7 +262,7 @@ class AdultFanFictionOrgAdapter(BaseSiteAdapter):
|
|||
pages = 0
|
||||
pagination=asoup.find('ul',{'class' : 'pagination'})
|
||||
if pagination:
|
||||
pages = pagination.findAll('li')[-1].find('a')
|
||||
pages = pagination.find_all('li')[-1].find('a')
|
||||
if not pages == None:
|
||||
pages = pages['href'].split('=')[-1]
|
||||
else:
|
||||
|
|
@ -271,7 +271,7 @@ class AdultFanFictionOrgAdapter(BaseSiteAdapter):
|
|||
storya = None
|
||||
##If there is only 1 page of stories, check it to get the Metadata,
|
||||
if pages == 0:
|
||||
a = asoup.findAll('li')
|
||||
a = asoup.find_all('li')
|
||||
for lc2 in a:
|
||||
if lc2.find('a', href=re.compile(r'story.php\?no='+self.story.getMetadata('storyId')+"$")):
|
||||
storya = lc2
|
||||
|
|
@ -294,7 +294,7 @@ class AdultFanFictionOrgAdapter(BaseSiteAdapter):
|
|||
# we look for the li element that has the story here
|
||||
asoup = self.make_soup(adata)
|
||||
|
||||
a = asoup.findAll('li')
|
||||
a = asoup.find_all('li')
|
||||
for lc2 in a:
|
||||
if lc2.find('a', href=re.compile(r'story.php\?no='+self.story.getMetadata('storyId')+"$")):
|
||||
i=1
|
||||
|
|
|
|||
|
|
@ -92,7 +92,7 @@ class ASexStoriesComAdapter(BaseSiteAdapter):
|
|||
self.story.setMetadata('title', title.string)
|
||||
|
||||
# Author
|
||||
author = soup1.find('div',{'class':'story-info'}).findAll('div',{'class':'story-info-bl'})[1].find('a')
|
||||
author = soup1.find('div',{'class':'story-info'}).find_all('div',{'class':'story-info-bl'})[1].find('a')
|
||||
authorurl = author['href']
|
||||
self.story.setMetadata('author', author.string)
|
||||
self.story.setMetadata('authorUrl', authorurl)
|
||||
|
|
@ -112,7 +112,7 @@ class ASexStoriesComAdapter(BaseSiteAdapter):
|
|||
### add it before the rest of the pages, if any
|
||||
self.add_chapter('1', self.url)
|
||||
|
||||
chapterTable = soup1.find('div',{'class':'pages'}).findAll('a')
|
||||
chapterTable = soup1.find('div',{'class':'pages'}).find_all('a')
|
||||
|
||||
if chapterTable is not None:
|
||||
# Multi-chapter story
|
||||
|
|
@ -124,7 +124,7 @@ class ASexStoriesComAdapter(BaseSiteAdapter):
|
|||
self.add_chapter(chapterTitle, chapterUrl)
|
||||
|
||||
|
||||
rated = soup1.find('div',{'class':'story-info'}).findAll('div',{'class':'story-info-bl5'})[0].find('img')['title'].replace('- Rate','').strip()
|
||||
rated = soup1.find('div',{'class':'story-info'}).find_all('div',{'class':'story-info-bl5'})[0].find('img')['title'].replace('- Rate','').strip()
|
||||
self.story.setMetadata('rating',rated)
|
||||
|
||||
self.story.setMetadata('dateUpdated', makeDate('01/01/2001', '%m/%d/%Y'))
|
||||
|
|
|
|||
|
|
@ -136,14 +136,14 @@ class AshwinderSycophantHexComAdapter(BaseSiteAdapter):
|
|||
|
||||
try:
|
||||
# in case link points somewhere other than the first chapter
|
||||
a = soup.findAll('option')[1]['value']
|
||||
a = soup.find_all('option')[1]['value']
|
||||
self.story.setMetadata('storyId',a.split('=',)[1])
|
||||
url = 'http://'+self.host+'/'+a
|
||||
soup = self.make_soup(self.get_request(url))
|
||||
except:
|
||||
pass
|
||||
|
||||
for info in asoup.findAll('table', {'width' : '100%', 'bordercolor' : re.compile(r'#')}):
|
||||
for info in asoup.find_all('table', {'width' : '100%', 'bordercolor' : re.compile(r'#')}):
|
||||
a = info.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$"))
|
||||
if a != None:
|
||||
self.story.setMetadata('title',stripHTML(a))
|
||||
|
|
@ -151,7 +151,7 @@ class AshwinderSycophantHexComAdapter(BaseSiteAdapter):
|
|||
|
||||
|
||||
# Find the chapters:
|
||||
chapters=soup.findAll('a', href=re.compile(r'viewstory.php\?sid=\d+&i=1$'))
|
||||
chapters=soup.find_all('a', href=re.compile(r'viewstory.php\?sid=\d+&i=1$'))
|
||||
if len(chapters) == 0:
|
||||
self.add_chapter(self.story.getMetadata('title'),url)
|
||||
else:
|
||||
|
|
@ -170,7 +170,7 @@ class AshwinderSycophantHexComAdapter(BaseSiteAdapter):
|
|||
except:
|
||||
return ""
|
||||
|
||||
cats = info.findAll('a',href=re.compile('categories.php'))
|
||||
cats = info.find_all('a',href=re.compile('categories.php'))
|
||||
for cat in cats:
|
||||
self.story.addToList('category',cat.string)
|
||||
|
||||
|
|
@ -188,7 +188,7 @@ class AshwinderSycophantHexComAdapter(BaseSiteAdapter):
|
|||
## <td><span class="sb"><b>Published:</b> 04/08/2007</td>
|
||||
|
||||
## one story had <b>Updated...</b> in the description. Restrict to sub-table
|
||||
labels = info.find('table').findAll('b')
|
||||
labels = info.find('table').find_all('b')
|
||||
for labelspan in labels:
|
||||
value = labelspan.nextSibling
|
||||
label = stripHTML(labelspan)
|
||||
|
|
|
|||
|
|
@ -147,7 +147,7 @@ class AsianFanFicsComAdapter(BaseSiteAdapter):
|
|||
# Find authorid and URL from... author url.
|
||||
mainmeta = soup.find('footer', {'class': 'main-meta'})
|
||||
alist = mainmeta.find('span', string='Author(s)')
|
||||
alist = alist.parent.findAll('a', href=re.compile(r"/profile/u/[^/]+"))
|
||||
alist = alist.parent.find_all('a', href=re.compile(r"/profile/u/[^/]+"))
|
||||
for a in alist:
|
||||
self.story.addToList('authorId',a['href'].split('/')[-1])
|
||||
self.story.addToList('authorUrl','https://'+self.host+a['href'])
|
||||
|
|
@ -159,10 +159,10 @@ class AsianFanFicsComAdapter(BaseSiteAdapter):
|
|||
chapters=soup.find('select',{'name':'chapter-nav'})
|
||||
hrefattr=None
|
||||
if chapters:
|
||||
chapters=chapters.findAll('option')
|
||||
chapters=chapters.find_all('option')
|
||||
hrefattr='value'
|
||||
else: # didn't find <select name='chapter-nav', look for alternative
|
||||
chapters=soup.find('div',{'class':'widget--chapters'}).findAll('a')
|
||||
chapters=soup.find('div',{'class':'widget--chapters'}).find_all('a')
|
||||
hrefattr='href'
|
||||
for index, chapter in enumerate(chapters):
|
||||
if chapter.text != 'Foreword' and 'Collapse chapters' not in chapter.text:
|
||||
|
|
@ -202,7 +202,7 @@ class AsianFanFicsComAdapter(BaseSiteAdapter):
|
|||
# story tags
|
||||
a = mainmeta.find('span',string='Tags')
|
||||
if a:
|
||||
tags = a.parent.findAll('a')
|
||||
tags = a.parent.find_all('a')
|
||||
for tag in tags:
|
||||
self.story.addToList('tags', tag.text)
|
||||
|
||||
|
|
@ -230,7 +230,7 @@ class AsianFanFicsComAdapter(BaseSiteAdapter):
|
|||
|
||||
# upvote, subs, and views
|
||||
a = soup.find('div',{'class':'title-meta'})
|
||||
spans = a.findAll('span', recursive=False)
|
||||
spans = a.find_all('span', recursive=False)
|
||||
self.story.setMetadata('upvotes', re.search(r'\(([^)]+)', spans[0].find('span').text).group(1))
|
||||
self.story.setMetadata('subscribers', re.search(r'\(([^)]+)', spans[1].find('span').text).group(1))
|
||||
if len(spans) > 2: # views can be private
|
||||
|
|
|
|||
|
|
@ -126,7 +126,7 @@ class BDSMLibraryComSiteAdapter(BaseSiteAdapter):
|
|||
|
||||
# Find the chapters:
|
||||
# The update date is with the chapter links... so we will update it here as well
|
||||
for chapter in soup.findAll('a', href=re.compile(r'/stories/chapter.php\?storyid='+self.story.getMetadata('storyId')+r"&chapterid=\d+$")):
|
||||
for chapter in soup.find_all('a', href=re.compile(r'/stories/chapter.php\?storyid='+self.story.getMetadata('storyId')+r"&chapterid=\d+$")):
|
||||
value = chapter.findNext('td').findNext('td').string.replace('(added on','').replace(')','').strip()
|
||||
self.story.setMetadata('dateUpdated', makeDate(value, self.dateformat))
|
||||
self.add_chapter(chapter,'https://'+self.getSiteDomain()+chapter['href'])
|
||||
|
|
@ -134,11 +134,11 @@ class BDSMLibraryComSiteAdapter(BaseSiteAdapter):
|
|||
|
||||
# Get the MetaData
|
||||
# Erotia Tags
|
||||
tags = soup.findAll('a',href=re.compile(r'/stories/search.php\?selectedcode'))
|
||||
tags = soup.find_all('a',href=re.compile(r'/stories/search.php\?selectedcode'))
|
||||
for tag in tags:
|
||||
self.story.addToList('eroticatags',tag.text)
|
||||
|
||||
for td in soup.findAll('td'):
|
||||
for td in soup.find_all('td'):
|
||||
if len(td.text)>0:
|
||||
if 'Added on:' in td.text and '<table' not in unicode(td):
|
||||
value = td.text.replace('Added on:','').strip()
|
||||
|
|
@ -169,20 +169,20 @@ class BDSMLibraryComSiteAdapter(BaseSiteAdapter):
|
|||
raise exceptions.FailedToDownload("Error downloading Chapter: {0}! Missing required element!".format(url))
|
||||
|
||||
#strip comments from soup
|
||||
[comment.extract() for comment in chaptertag.findAll(string=lambda text:isinstance(text, Comment))]
|
||||
[comment.extract() for comment in chaptertag.find_all(string=lambda text:isinstance(text, Comment))]
|
||||
|
||||
# BDSM Library basically wraps it's own html around the document,
|
||||
# so we will be removing the script, title and meta content from the
|
||||
# storyblock
|
||||
for tag in chaptertag.findAll('head') + chaptertag.findAll('style') + chaptertag.findAll('title') + chaptertag.findAll('meta') + chaptertag.findAll('o:p') + chaptertag.findAll('link'):
|
||||
for tag in chaptertag.find_all('head') + chaptertag.find_all('style') + chaptertag.find_all('title') + chaptertag.find_all('meta') + chaptertag.find_all('o:p') + chaptertag.find_all('link'):
|
||||
tag.extract()
|
||||
|
||||
for tag in chaptertag.findAll('o:smarttagtype'):
|
||||
for tag in chaptertag.find_all('o:smarttagtype'):
|
||||
tag.name = 'span'
|
||||
|
||||
## I'm going to take the attributes off all of the tags
|
||||
## because they usually refer to the style that we removed above.
|
||||
for tag in chaptertag.findAll(True):
|
||||
for tag in chaptertag.find_all(True):
|
||||
tag.attrs = None
|
||||
|
||||
return self.utf8FromSoup(url,chaptertag)
|
||||
|
|
|
|||
|
|
@ -116,7 +116,7 @@ class ChaosSycophantHexComAdapter(BaseSiteAdapter):
|
|||
self.story.setMetadata('rating', rating)
|
||||
|
||||
# Find the chapters:
|
||||
for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+r"&chapter=\d+$")):
|
||||
for chapter in soup.find_all('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+r"&chapter=\d+$")):
|
||||
# just in case there's tags, like <i> in chapter titles.
|
||||
self.add_chapter(chapter,'http://'+self.host+'/'+chapter['href']+addurl)
|
||||
|
||||
|
|
@ -134,7 +134,7 @@ class ChaosSycophantHexComAdapter(BaseSiteAdapter):
|
|||
|
||||
# <span class="label">Rated:</span> NC-17<br /> etc
|
||||
|
||||
labels = soup.findAll('span',{'class':'label'})
|
||||
labels = soup.find_all('span',{'class':'label'})
|
||||
|
||||
value = labels[0].previousSibling
|
||||
svalue = ""
|
||||
|
|
@ -154,22 +154,22 @@ class ChaosSycophantHexComAdapter(BaseSiteAdapter):
|
|||
self.story.setMetadata('numWords', value.split(' -')[0])
|
||||
|
||||
if 'Categories' in label:
|
||||
cats = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=categories'))
|
||||
cats = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=categories'))
|
||||
for cat in cats:
|
||||
self.story.addToList('category',cat.string)
|
||||
|
||||
if 'Characters' in label:
|
||||
chars = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=characters'))
|
||||
chars = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=characters'))
|
||||
for char in chars:
|
||||
self.story.addToList('characters',char.string)
|
||||
|
||||
if 'Genre' in label:
|
||||
genres = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=1'))
|
||||
genres = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=class&type_id=1'))
|
||||
for genre in genres:
|
||||
self.story.addToList('genre',genre.string)
|
||||
|
||||
if 'Warnings' in label:
|
||||
warnings = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=2'))
|
||||
warnings = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=class&type_id=2'))
|
||||
for warning in warnings:
|
||||
self.story.addToList('warnings',warning.string)
|
||||
|
||||
|
|
@ -194,7 +194,7 @@ class ChaosSycophantHexComAdapter(BaseSiteAdapter):
|
|||
series_url = 'http://'+self.host+'/'+a['href']
|
||||
|
||||
seriessoup = self.make_soup(self.get_request(series_url))
|
||||
storyas = seriessoup.findAll('a', href=re.compile(r'^viewstory.php\?sid=\d+$'))
|
||||
storyas = seriessoup.find_all('a', href=re.compile(r'^viewstory.php\?sid=\d+$'))
|
||||
i=1
|
||||
for a in storyas:
|
||||
if a['href'] == ('viewstory.php?sid='+self.story.getMetadata('storyId')):
|
||||
|
|
|
|||
|
|
@ -88,8 +88,8 @@ class ChireadsComSiteAdapter(BaseSiteAdapter):
|
|||
intro = stripHTML(info.select_one('.inform-inform-txt').span)
|
||||
self.setDescription(self.url, intro)
|
||||
|
||||
for content in soup.findAll('div', {'id': 'content'}):
|
||||
for a in content.findAll('a'):
|
||||
for content in soup.find_all('div', {'id': 'content'}):
|
||||
for a in content.find_all('a'):
|
||||
self.add_chapter(a.get_text(), a['href'])
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -98,7 +98,7 @@ class ChosenTwoFanFicArchiveAdapter(BaseSiteAdapter):
|
|||
## Title
|
||||
## Some stories have a banner that has it's own a tag before the actual text title...
|
||||
## so I'm checking the pagetitle div for all a tags that match the criteria, then taking the last.
|
||||
a = soup.find('div',{'id':'pagetitle'}).findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$"))[-1]
|
||||
a = soup.find('div',{'id':'pagetitle'}).find_all('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$"))[-1]
|
||||
self.story.setMetadata('title',stripHTML(a))
|
||||
|
||||
# Find authorid and URL from... author url.
|
||||
|
|
@ -110,7 +110,7 @@ class ChosenTwoFanFicArchiveAdapter(BaseSiteAdapter):
|
|||
self.story.setMetadata('author',a.string)
|
||||
|
||||
# Find the chapters:
|
||||
for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+r"&chapter=\d+$")):
|
||||
for chapter in soup.find_all('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+r"&chapter=\d+$")):
|
||||
# just in case there's tags, like <i> in chapter titles.
|
||||
#self.add_chapter(chapter,'http://'+self.host+'/'+chapter['href'])
|
||||
self.add_chapter(chapter,'https://{0}/{1}{2}'.format(self.host, chapter['href'],addURL))
|
||||
|
|
@ -127,7 +127,7 @@ class ChosenTwoFanFicArchiveAdapter(BaseSiteAdapter):
|
|||
return ""
|
||||
|
||||
# <span class="label">Rated:</span> NC-17<br /> etc
|
||||
labels = soup.findAll('span',{'class':'label'})
|
||||
labels = soup.find_all('span',{'class':'label'})
|
||||
for labelspan in labels:
|
||||
val = labelspan.nextSibling
|
||||
value = unicode('')
|
||||
|
|
@ -149,27 +149,27 @@ class ChosenTwoFanFicArchiveAdapter(BaseSiteAdapter):
|
|||
self.story.setMetadata('numWords', stripHTML(value))
|
||||
|
||||
if 'Categories' in label:
|
||||
cats = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=categories'))
|
||||
cats = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=categories'))
|
||||
for cat in cats:
|
||||
self.story.addToList('category',cat.string)
|
||||
|
||||
if 'Characters' in label:
|
||||
chars = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=characters'))
|
||||
chars = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=characters'))
|
||||
for char in chars:
|
||||
self.story.addToList('characters',char.string)
|
||||
|
||||
if 'Genre' in label:
|
||||
genres = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=1')) # XXX
|
||||
genres = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=class&type_id=1')) # XXX
|
||||
for genre in genres:
|
||||
self.story.addToList('genre',genre.string)
|
||||
|
||||
if 'Pairing' in label:
|
||||
ships = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=4'))
|
||||
ships = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=class&type_id=4'))
|
||||
for ship in ships:
|
||||
self.story.addToList('ships',ship.string)
|
||||
|
||||
if 'Warnings' in label:
|
||||
warnings = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=2')) # XXX
|
||||
warnings = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=class&type_id=2')) # XXX
|
||||
for warning in warnings:
|
||||
self.story.addToList('warnings',warning.string)
|
||||
|
||||
|
|
@ -196,7 +196,7 @@ class ChosenTwoFanFicArchiveAdapter(BaseSiteAdapter):
|
|||
|
||||
seriessoup = self.make_soup(self.get_request(series_url))
|
||||
# can't use ^viewstory...$ in case of higher rated stories with javascript href.
|
||||
storyas = seriessoup.findAll('a', href=re.compile(r'viewstory.php\?sid=\d+'))
|
||||
storyas = seriessoup.find_all('a', href=re.compile(r'viewstory.php\?sid=\d+'))
|
||||
i=1
|
||||
for a in storyas:
|
||||
# this site has several links to each story.
|
||||
|
|
|
|||
|
|
@ -95,7 +95,7 @@ class DokugaComAdapter(BaseSiteAdapter):
|
|||
params['Submit'] = 'Submit'
|
||||
|
||||
# copy all hidden input tags to pick up appropriate tokens.
|
||||
for tag in soup.findAll('input',{'type':'hidden'}):
|
||||
for tag in soup.find_all('input',{'type':'hidden'}):
|
||||
params[tag['name']] = tag['value']
|
||||
|
||||
loginUrl = 'http://' + self.getSiteDomain() + '/fanfiction'
|
||||
|
|
@ -153,7 +153,7 @@ class DokugaComAdapter(BaseSiteAdapter):
|
|||
self.story.setMetadata('title',stripHTML(a))
|
||||
|
||||
# Find the chapters:
|
||||
chapters = soup.find('select').findAll('option')
|
||||
chapters = soup.find('select').find_all('option')
|
||||
if len(chapters)==1:
|
||||
self.add_chapter(self.story.getMetadata('title'),'http://'+self.host+'/'+self.section+'/story/'+self.story.getMetadata('storyId')+'/1')
|
||||
else:
|
||||
|
|
@ -168,7 +168,7 @@ class DokugaComAdapter(BaseSiteAdapter):
|
|||
asoup=asoup.find('div', {'id' : 'cb_tabid_52'}).find('div')
|
||||
|
||||
#grab the rest of the metadata from the author's page
|
||||
for div in asoup.findAll('div'):
|
||||
for div in asoup.find_all('div'):
|
||||
nav=div.find('a', href=re.compile(r'/fanfiction/story/'+self.story.getMetadata('storyId')+"/1$"))
|
||||
if nav != None:
|
||||
break
|
||||
|
|
@ -208,7 +208,7 @@ class DokugaComAdapter(BaseSiteAdapter):
|
|||
|
||||
else:
|
||||
asoup=asoup.find('div', {'id' : 'maincol'}).find('div', {'class' : 'padding'})
|
||||
for div in asoup.findAll('div'):
|
||||
for div in asoup.find_all('div'):
|
||||
nav=div.find('a', href=re.compile(r'/spark/story/'+self.story.getMetadata('storyId')+"/1$"))
|
||||
if nav != None:
|
||||
break
|
||||
|
|
|
|||
|
|
@ -161,7 +161,7 @@ class DracoAndGinnyComAdapter(BaseSiteAdapter):
|
|||
self.story.setMetadata('author',a.string)
|
||||
|
||||
# Find the chapters:
|
||||
for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+r"&chapter=\d+$")):
|
||||
for chapter in soup.find_all('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+r"&chapter=\d+$")):
|
||||
# just in case there's tags, like <i> in chapter titles.
|
||||
self.add_chapter(chapter,'http://'+self.host+'/'+chapter['href']+addurl)
|
||||
|
||||
|
|
@ -181,13 +181,13 @@ class DracoAndGinnyComAdapter(BaseSiteAdapter):
|
|||
|
||||
self.setDescription(url,content.find('blockquote'))
|
||||
|
||||
for genre in content.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=1')):
|
||||
for genre in content.find_all('a',href=re.compile(r'browse.php\?type=class&type_id=1')):
|
||||
self.story.addToList('genre',genre.string)
|
||||
|
||||
for warning in content.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=2')):
|
||||
for warning in content.find_all('a',href=re.compile(r'browse.php\?type=class&type_id=2')):
|
||||
self.story.addToList('warnings',warning.string)
|
||||
|
||||
labels = content.findAll('b')
|
||||
labels = content.find_all('b')
|
||||
|
||||
for labelspan in labels:
|
||||
value = labelspan.nextSibling
|
||||
|
|
@ -208,22 +208,22 @@ class DracoAndGinnyComAdapter(BaseSiteAdapter):
|
|||
self.story.setMetadata('rating', value)
|
||||
|
||||
if 'Categories' in label:
|
||||
cats = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=categories'))
|
||||
cats = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=categories'))
|
||||
for cat in cats:
|
||||
self.story.addToList('category',cat.string)
|
||||
|
||||
if 'Characters' in label:
|
||||
chars = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=characters'))
|
||||
chars = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=characters'))
|
||||
for char in chars:
|
||||
self.story.addToList('characters',char.string)
|
||||
|
||||
if 'Genre' in label:
|
||||
genres = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=1'))
|
||||
genres = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=class&type_id=1'))
|
||||
for genre in genres:
|
||||
self.story.addToList('genre',genre.string)
|
||||
|
||||
if 'Warnings' in label:
|
||||
warnings = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=2'))
|
||||
warnings = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=class&type_id=2'))
|
||||
for warning in warnings:
|
||||
self.story.addToList('warnings',warning.string)
|
||||
|
||||
|
|
@ -247,7 +247,7 @@ class DracoAndGinnyComAdapter(BaseSiteAdapter):
|
|||
|
||||
seriessoup = self.make_soup(self.get_request(series_url))
|
||||
# can't use ^viewstory...$ in case of higher rated stories with javascript href.
|
||||
storyas = seriessoup.findAll('a', href=re.compile(r'viewstory.php\?sid=\d+'))
|
||||
storyas = seriessoup.find_all('a', href=re.compile(r'viewstory.php\?sid=\d+'))
|
||||
i=1
|
||||
for a in storyas:
|
||||
# skip 'report this' and 'TOC' links
|
||||
|
|
|
|||
|
|
@ -138,7 +138,7 @@ class EFPFanFicNet(BaseSiteAdapter):
|
|||
# no selector found, so it's a one-chapter story.
|
||||
self.add_chapter(self.story.getMetadata('title'),url)
|
||||
else:
|
||||
allOptions = select.findAll('option', {'value' : re.compile(r'viewstory')})
|
||||
allOptions = select.find_all('option', {'value' : re.compile(r'viewstory')})
|
||||
for o in allOptions:
|
||||
url = u'https://%s/%s' % ( self.getSiteDomain(),
|
||||
o['value'])
|
||||
|
|
@ -170,14 +170,14 @@ class EFPFanFicNet(BaseSiteAdapter):
|
|||
if authsoup != None:
|
||||
# last author link with offset should be the 'next' link.
|
||||
authurl = u'https://%s/%s' % ( self.getSiteDomain(),
|
||||
authsoup.findAll('a',href=re.compile(r'viewuser\.php\?uid=\d+&catid=&offset='))[-1]['href'] )
|
||||
authsoup.find_all('a',href=re.compile(r'viewuser\.php\?uid=\d+&catid=&offset='))[-1]['href'] )
|
||||
|
||||
# Need author page for most of the metadata.
|
||||
logger.debug("fetching author page: (%s)"%authurl)
|
||||
authsoup = self.make_soup(self.get_request(authurl))
|
||||
#print("authsoup:%s"%authsoup)
|
||||
|
||||
storyas = authsoup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+r'&i=1$'))
|
||||
storyas = authsoup.find_all('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+r'&i=1$'))
|
||||
for storya in storyas:
|
||||
#print("======storya:%s"%storya)
|
||||
storyblock = storya.findParent('div',{'class':'storybloc'})
|
||||
|
|
@ -194,7 +194,7 @@ class EFPFanFicNet(BaseSiteAdapter):
|
|||
# Tipo di coppia: Het | Personaggi: Akasuna no Sasori , Akatsuki, Nuovo Personaggio | Note: OOC | Avvertimenti: Tematiche delicate<br />
|
||||
# Categoria: <a href="categories.php?catid=1&parentcatid=1">Anime & Manga</a> > <a href="categories.php?catid=108&parentcatid=108">Naruto</a> | Contesto: Naruto Shippuuden | Leggi le <a href="reviews.php?sid=1331275&a=">3</a> recensioni</div>
|
||||
|
||||
cats = noteblock.findAll('a',href=re.compile(r'browse.php\?type=categories'))
|
||||
cats = noteblock.find_all('a',href=re.compile(r'browse.php\?type=categories'))
|
||||
for cat in cats:
|
||||
self.story.addToList('category',cat.string)
|
||||
|
||||
|
|
@ -262,7 +262,7 @@ class EFPFanFicNet(BaseSiteAdapter):
|
|||
|
||||
seriessoup = self.make_soup(self.get_request(series_url))
|
||||
# can't use ^viewstory...$ in case of higher rated stories with javascript href.
|
||||
storyas = seriessoup.findAll('a', href=re.compile(r'viewstory.php\?sid=\d+&i=1'))
|
||||
storyas = seriessoup.find_all('a', href=re.compile(r'viewstory.php\?sid=\d+&i=1'))
|
||||
i=1
|
||||
for a in storyas:
|
||||
if a['href'] == ('viewstory.php?sid='+self.story.getMetadata('storyId'))+'&i=1':
|
||||
|
|
@ -288,11 +288,11 @@ class EFPFanFicNet(BaseSiteAdapter):
|
|||
raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url)
|
||||
|
||||
# remove any header and 'o:p' tags.
|
||||
for tag in div.findAll("head") + div.findAll("o:p"):
|
||||
for tag in div.find_all("head") + div.find_all("o:p"):
|
||||
tag.extract()
|
||||
|
||||
# change any html and body tags to div.
|
||||
for tag in div.findAll("html") + div.findAll("body"):
|
||||
for tag in div.find_all("html") + div.find_all("body"):
|
||||
tag.name='div'
|
||||
|
||||
# remove extra bogus doctype.
|
||||
|
|
|
|||
|
|
@ -126,7 +126,7 @@ class ErosnSapphoSycophantHexComAdapter(BaseSiteAdapter):
|
|||
self.story.setMetadata('rating', rating)
|
||||
|
||||
# Find the chapters:
|
||||
for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+r"&chapter=\d+$")):
|
||||
for chapter in soup.find_all('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+r"&chapter=\d+$")):
|
||||
# just in case there's tags, like <i> in chapter titles.
|
||||
self.add_chapter(chapter,'http://'+self.host+'/'+chapter['href']+addurl)
|
||||
|
||||
|
|
@ -144,7 +144,7 @@ class ErosnSapphoSycophantHexComAdapter(BaseSiteAdapter):
|
|||
|
||||
# <span class="label">Rated:</span> NC-17<br /> etc
|
||||
|
||||
labels = soup.findAll('span',{'class':'label'})
|
||||
labels = soup.find_all('span',{'class':'label'})
|
||||
|
||||
value = labels[0].previousSibling
|
||||
svalue = ""
|
||||
|
|
@ -164,22 +164,22 @@ class ErosnSapphoSycophantHexComAdapter(BaseSiteAdapter):
|
|||
self.story.setMetadata('numWords', value.split(' -')[0])
|
||||
|
||||
if 'Categories' in label:
|
||||
cats = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=categories'))
|
||||
cats = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=categories'))
|
||||
for cat in cats:
|
||||
self.story.addToList('category',cat.string)
|
||||
|
||||
if 'Characters' in label:
|
||||
chars = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=characters'))
|
||||
chars = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=characters'))
|
||||
for char in chars:
|
||||
self.story.addToList('characters',char.string)
|
||||
|
||||
if 'Genre' in label:
|
||||
genres = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=1'))
|
||||
genres = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=class&type_id=1'))
|
||||
for genre in genres:
|
||||
self.story.addToList('genre',genre.string)
|
||||
|
||||
if 'Warnings' in label:
|
||||
warnings = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=2'))
|
||||
warnings = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=class&type_id=2'))
|
||||
for warning in warnings:
|
||||
self.story.addToList('warnings',warning.string)
|
||||
|
||||
|
|
@ -204,7 +204,7 @@ class ErosnSapphoSycophantHexComAdapter(BaseSiteAdapter):
|
|||
series_url = 'http://'+self.host+'/'+a['href']
|
||||
|
||||
seriessoup = self.make_soup(self.get_request(series_url))
|
||||
storyas = seriessoup.findAll('a', href=re.compile(r'viewstory.php\?sid=\d+'))
|
||||
storyas = seriessoup.find_all('a', href=re.compile(r'viewstory.php\?sid=\d+'))
|
||||
i=1
|
||||
for a in storyas:
|
||||
# skip 'report this' and 'TOC' links
|
||||
|
|
|
|||
|
|
@ -163,7 +163,7 @@ class FanficAuthorsNetAdapter(BaseSiteAdapter):
|
|||
# Find the chapters:
|
||||
# The published and update dates are with the chapter links...
|
||||
# so we have to get them from there.
|
||||
chapters = soup.findAll('a', href=re.compile('/'+self.story.getMetadata(
|
||||
chapters = soup.find_all('a', href=re.compile('/'+self.story.getMetadata(
|
||||
'storyId')+'/([a-zA-Z0-9_]+)/'))
|
||||
|
||||
# Here we are getting the published date. It is the date the first chapter was "updated"
|
||||
|
|
@ -241,8 +241,8 @@ class FanficAuthorsNetAdapter(BaseSiteAdapter):
|
|||
"Error downloading Chapter: '{0}'! Missing required element!".format(url))
|
||||
|
||||
#Now, there are a lot of extranious tags within the story division.. so we will remove them.
|
||||
for tag in story.findAll('ul',{'class':'pager'}) + story.findAll(
|
||||
'div',{'class':'alert'}) + story.findAll('div', {'class':'btn-group'}):
|
||||
for tag in story.find_all('ul',{'class':'pager'}) + story.find_all(
|
||||
'div',{'class':'alert'}) + story.find_all('div', {'class':'btn-group'}):
|
||||
tag.extract()
|
||||
|
||||
return self.utf8FromSoup(url,story)
|
||||
|
|
|
|||
|
|
@ -167,7 +167,7 @@ class FanFictionNetSiteAdapter(BaseSiteAdapter):
|
|||
## the first chapter. It generates another server request and
|
||||
## doesn't seem to be needed lately, so now default it to off.
|
||||
try:
|
||||
chapcount = len(soup.find('select', { 'name' : 'chapter' } ).findAll('option'))
|
||||
chapcount = len(soup.find('select', { 'name' : 'chapter' } ).find_all('option'))
|
||||
# get chapter part of url.
|
||||
except:
|
||||
chapcount = 1
|
||||
|
|
@ -212,7 +212,7 @@ class FanFictionNetSiteAdapter(BaseSiteAdapter):
|
|||
## For 1, use the second link.
|
||||
## For 2, fetch the crossover page and pull the two categories from there.
|
||||
pre_links = soup.find('div',{'id':'pre_story_links'})
|
||||
categories = pre_links.findAll('a',{'class':'xcontrast_txt'})
|
||||
categories = pre_links.find_all('a',{'class':'xcontrast_txt'})
|
||||
#print("xcontrast_txt a:%s"%categories)
|
||||
if len(categories) > 1:
|
||||
# Strangely, the ones with *two* links are the
|
||||
|
|
@ -251,7 +251,7 @@ class FanFictionNetSiteAdapter(BaseSiteAdapter):
|
|||
|
||||
|
||||
grayspan = gui_table1i.find('span', {'class':'xgray xcontrast_txt'})
|
||||
# for b in grayspan.findAll('button'):
|
||||
# for b in grayspan.find_all('button'):
|
||||
# b.extract()
|
||||
metatext = stripHTML(grayspan).replace('Hurt/Comfort','Hurt-Comfort')
|
||||
#logger.debug("metatext:(%s)"%metatext)
|
||||
|
|
@ -290,7 +290,7 @@ class FanFictionNetSiteAdapter(BaseSiteAdapter):
|
|||
|
||||
# Updated: <span data-xutime='1368059198'>5/8</span> - Published: <span data-xutime='1278984264'>7/12/2010</span>
|
||||
# Published: <span data-xutime='1384358726'>8m ago</span>
|
||||
dates = soup.findAll('span',{'data-xutime':re.compile(r'^\d+$')})
|
||||
dates = soup.find_all('span',{'data-xutime':re.compile(r'^\d+$')})
|
||||
if len(dates) > 1 :
|
||||
# updated get set to the same as published upstream if not found.
|
||||
self.story.setMetadata('dateUpdated',datetime.fromtimestamp(float(dates[0]['data-xutime'])))
|
||||
|
|
@ -395,7 +395,7 @@ class FanFictionNetSiteAdapter(BaseSiteAdapter):
|
|||
# no selector found, so it's a one-chapter story.
|
||||
self.add_chapter(self.story.getMetadata('title'),url)
|
||||
else:
|
||||
allOptions = select.findAll('option')
|
||||
allOptions = select.find_all('option')
|
||||
for o in allOptions:
|
||||
## title URL will be put back on chapter URL during
|
||||
## normalize_chapterurl() anyway, but also here for
|
||||
|
|
|
|||
|
|
@ -134,7 +134,7 @@ class FanFiktionDeAdapter(BaseSiteAdapter):
|
|||
self.story.setMetadata('author',stripHTML(a))
|
||||
|
||||
# Find the chapters:
|
||||
for chapter in soup.find('select').findAll('option'):
|
||||
for chapter in soup.find('select').find_all('option'):
|
||||
self.add_chapter(chapter,'https://'+self.host+'/s/'+self.story.getMetadata('storyId')+'/'+chapter['value'])
|
||||
|
||||
## title="Wörter" failed with max_zalgo:1
|
||||
|
|
@ -181,13 +181,13 @@ class FanFiktionDeAdapter(BaseSiteAdapter):
|
|||
|
||||
# #find metadata on the author's page
|
||||
# asoup = self.make_soup(self.get_request("https://"+self.getSiteDomain()+"?a=q&a1=v&t=nickdetailsstories&lbi=stories&ar=0&nick="+self.story.getMetadata('authorId')))
|
||||
# tr=asoup.findAll('tr')
|
||||
# tr=asoup.find_all('tr')
|
||||
# for i in range(1,len(tr)):
|
||||
# a = tr[i].find('a')
|
||||
# if '/s/'+self.story.getMetadata('storyId')+'/1/' in a['href']:
|
||||
# break
|
||||
|
||||
# td = tr[i].findAll('td')
|
||||
# td = tr[i].find_all('td')
|
||||
# self.story.addToList('category',stripHTML(td[2]))
|
||||
# self.story.setMetadata('rating', stripHTML(td[5]))
|
||||
# self.story.setMetadata('numWords', stripHTML(td[6]))
|
||||
|
|
@ -204,7 +204,7 @@ class FanFiktionDeAdapter(BaseSiteAdapter):
|
|||
soup = self.make_soup(self.get_request(url))
|
||||
|
||||
div = soup.find('div', {'id' : 'storytext'})
|
||||
for a in div.findAll('script'):
|
||||
for a in div.find_all('script'):
|
||||
a.extract()
|
||||
|
||||
if None == div:
|
||||
|
|
|
|||
|
|
@ -146,7 +146,7 @@ class FicBookNetAdapter(BaseSiteAdapter):
|
|||
pubdate = None
|
||||
chapters = soup.find('ul', {'class' : 'list-of-fanfic-parts'})
|
||||
if chapters != None:
|
||||
for chapdiv in chapters.findAll('li', {'class':'part'}):
|
||||
for chapdiv in chapters.find_all('li', {'class':'part'}):
|
||||
chapter=chapdiv.find('a',href=re.compile(r'/readfic/'+self.story.getMetadata('storyId')+r"/\d+#part_content$"))
|
||||
churl='https://'+self.host+chapter['href']
|
||||
|
||||
|
|
@ -179,7 +179,7 @@ class FicBookNetAdapter(BaseSiteAdapter):
|
|||
# pr=soup.find('a', href=re.compile(r'/printfic/\w+'))
|
||||
# pr='https://'+self.host+pr['href']
|
||||
# pr = self.make_soup(self.get_request(pr))
|
||||
# pr=pr.findAll('div', {'class' : 'part_text'})
|
||||
# pr=pr.find_all('div', {'class' : 'part_text'})
|
||||
# i=0
|
||||
# for part in pr:
|
||||
# i=i+len(stripHTML(part).split(' '))
|
||||
|
|
@ -196,7 +196,7 @@ class FicBookNetAdapter(BaseSiteAdapter):
|
|||
self.story.setMetadata('seriesUrl','https://' + self.getSiteDomain() + series_div.a.get('href'))
|
||||
|
||||
i=0
|
||||
fandoms = dlinfo.select_one('div:not([class])').findAll('a', href=re.compile(r'/fanfiction/\w+'))
|
||||
fandoms = dlinfo.select_one('div:not([class])').find_all('a', href=re.compile(r'/fanfiction/\w+'))
|
||||
for fandom in fandoms:
|
||||
self.story.addToList('category',fandom.string)
|
||||
i=i+1
|
||||
|
|
@ -205,13 +205,13 @@ class FicBookNetAdapter(BaseSiteAdapter):
|
|||
|
||||
tags = soup.find('div',{'class':'tags'})
|
||||
if tags:
|
||||
for genre in tags.findAll('a',href=re.compile(r'/tags/')):
|
||||
for genre in tags.find_all('a',href=re.compile(r'/tags/')):
|
||||
self.story.addToList('genre',stripHTML(genre))
|
||||
|
||||
ratingdt = dlinfo.find('div',{'class':re.compile(r'badge-rating-.*')})
|
||||
self.story.setMetadata('rating', stripHTML(ratingdt.find('span')))
|
||||
|
||||
# meta=table.findAll('a', href=re.compile(r'/ratings/'))
|
||||
# meta=table.find_all('a', href=re.compile(r'/ratings/'))
|
||||
# i=0
|
||||
# for m in meta:
|
||||
# if i == 0:
|
||||
|
|
|
|||
|
|
@ -201,10 +201,10 @@ class FictionAlleyArchiveOrgSiteAdapter(BaseSiteAdapter):
|
|||
# epubutils.py
|
||||
# Yes, this still applies to fictionalley-archive.
|
||||
|
||||
for tag in chaptext.findAll('head') + chaptext.findAll('meta') + chaptext.findAll('script'):
|
||||
for tag in chaptext.find_all('head') + chaptext.find_all('meta') + chaptext.find_all('script'):
|
||||
tag.extract()
|
||||
|
||||
for tag in chaptext.findAll('body') + chaptext.findAll('html'):
|
||||
for tag in chaptext.find_all('body') + chaptext.find_all('html'):
|
||||
tag.name = 'div'
|
||||
|
||||
if self.getConfig('include_author_notes'):
|
||||
|
|
|
|||
|
|
@ -136,7 +136,7 @@ class FicwadComSiteAdapter(BaseSiteAdapter):
|
|||
# <span class="req"><a href="/help/38" title="Medium Spoilers">[!!] </a> <a href="/help/38" title="Rape/Sexual Violence">[R] </a> <a href="/help/38" title="Violence">[V] </a> <a href="/help/38" title="Child/Underage Sex">[Y] </a></span>
|
||||
spanreq = metap.find("span",{"class":"story-warnings"})
|
||||
if spanreq: # can be no warnings.
|
||||
for a in spanreq.findAll("a"):
|
||||
for a in spanreq.find_all("a"):
|
||||
self.story.addToList('warnings',a['title'])
|
||||
|
||||
## perhaps not the most efficient way to parse this, using
|
||||
|
|
@ -186,7 +186,7 @@ class FicwadComSiteAdapter(BaseSiteAdapter):
|
|||
# no list found, so it's a one-chapter story.
|
||||
self.add_chapter(self.story.getMetadata('title'),url)
|
||||
else:
|
||||
chapterlistlis = storylistul.findAll('li')
|
||||
chapterlistlis = storylistul.find_all('li')
|
||||
for chapterli in chapterlistlis:
|
||||
if "blocked" in chapterli['class']:
|
||||
# paranoia check. We should already be logged in by now.
|
||||
|
|
|
|||
|
|
@ -161,7 +161,7 @@ class ImagineEFicComAdapter(BaseSiteAdapter):
|
|||
self.story.setMetadata('author',a.string)
|
||||
|
||||
# Find the chapters:
|
||||
for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+r"&chapter=\d+$")):
|
||||
for chapter in soup.find_all('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+r"&chapter=\d+$")):
|
||||
# just in case there's tags, like <i> in chapter titles.
|
||||
self.add_chapter(chapter,'https://'+self.host+'/'+chapter['href']+addurl)
|
||||
|
||||
|
|
@ -178,7 +178,7 @@ class ImagineEFicComAdapter(BaseSiteAdapter):
|
|||
|
||||
|
||||
# <span class="label">Rated:</span> NC-17<br /> etc
|
||||
labels = soup.findAll('span',{'class':'label'})
|
||||
labels = soup.find_all('span',{'class':'label'})
|
||||
for labelspan in labels:
|
||||
value = labelspan.nextSibling
|
||||
label = labelspan.string
|
||||
|
|
@ -199,22 +199,22 @@ class ImagineEFicComAdapter(BaseSiteAdapter):
|
|||
self.story.setMetadata('numWords', value)
|
||||
|
||||
if 'Categories' in label:
|
||||
cats = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=categories'))
|
||||
cats = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=categories'))
|
||||
for cat in cats:
|
||||
self.story.addToList('category',cat.string)
|
||||
|
||||
if 'Characters' in label:
|
||||
chars = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=characters'))
|
||||
chars = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=characters'))
|
||||
for char in chars:
|
||||
self.story.addToList('characters',char.string)
|
||||
|
||||
if 'Genre' in label:
|
||||
genres = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=1'))
|
||||
genres = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=class&type_id=1'))
|
||||
for genre in genres:
|
||||
self.story.addToList('genre',genre.string)
|
||||
|
||||
if 'Warnings' in label:
|
||||
warnings = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=2'))
|
||||
warnings = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=class&type_id=2'))
|
||||
for warning in warnings:
|
||||
self.story.addToList('warnings',warning.string)
|
||||
|
||||
|
|
@ -238,7 +238,7 @@ class ImagineEFicComAdapter(BaseSiteAdapter):
|
|||
|
||||
seriessoup = self.make_soup(self.get_request(series_url))
|
||||
# can't use ^viewstory...$ in case of higher rated stories with javascript href.
|
||||
storyas = seriessoup.findAll('a', href=re.compile(r'viewstory.php\?sid=\d+'))
|
||||
storyas = seriessoup.find_all('a', href=re.compile(r'viewstory.php\?sid=\d+'))
|
||||
i=1
|
||||
for a in storyas:
|
||||
# skip 'report this' and 'TOC' links
|
||||
|
|
|
|||
|
|
@ -125,7 +125,7 @@ class InkBunnyNetSiteAdapter(BaseSiteAdapter):
|
|||
soup = self.make_soup(self.get_request(url,usecache=False))
|
||||
|
||||
# removing all of the scripts
|
||||
for tag in soup.findAll('script'):
|
||||
for tag in soup.find_all('script'):
|
||||
tag.extract()
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -144,13 +144,13 @@ class KSArchiveComAdapter(BaseSiteAdapter): # XXX
|
|||
|
||||
# Find authorid and URL from... author urls.
|
||||
pagetitle = soup.find('div',id='pagetitle')
|
||||
for a in pagetitle.findAll('a', href=re.compile(r"viewuser.php\?uid=\d+")):
|
||||
for a in pagetitle.find_all('a', href=re.compile(r"viewuser.php\?uid=\d+")):
|
||||
self.story.addToList('authorId',a['href'].split('=')[1])
|
||||
self.story.addToList('authorUrl','https://'+self.host+'/'+a['href'])
|
||||
self.story.addToList('author',stripHTML(a))
|
||||
|
||||
# Find the chapters:
|
||||
for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+r"&chapter=\d+$")):
|
||||
for chapter in soup.find_all('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+r"&chapter=\d+$")):
|
||||
# just in case there's tags, like <i> in chapter titles.
|
||||
self.add_chapter(chapter,'https://'+self.host+'/'+chapter['href']+addurl)
|
||||
|
||||
|
|
@ -166,7 +166,7 @@ class KSArchiveComAdapter(BaseSiteAdapter): # XXX
|
|||
return ""
|
||||
|
||||
# <span class="label">Rated:</span> NC-17<br /> etc
|
||||
labels = soup.findAll('span',{'class':'label'})
|
||||
labels = soup.find_all('span',{'class':'label'})
|
||||
for labelspan in labels:
|
||||
value = labelspan.nextSibling
|
||||
label = stripHTML(labelspan)
|
||||
|
|
@ -193,7 +193,7 @@ class KSArchiveComAdapter(BaseSiteAdapter): # XXX
|
|||
self.story.setMetadata('numWords', value)
|
||||
|
||||
if 'Categories' in label:
|
||||
cats = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=categories'))
|
||||
cats = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=categories'))
|
||||
catstext = [stripHTML(cat) for cat in cats]
|
||||
for cat in catstext:
|
||||
# ran across one story with an empty <a href="browse.php?type=categories&catid=1"></a>
|
||||
|
|
@ -204,7 +204,7 @@ class KSArchiveComAdapter(BaseSiteAdapter): # XXX
|
|||
if 'Characters' in label:
|
||||
self.story.addToList('characters','Kirk')
|
||||
self.story.addToList('characters','Spock')
|
||||
chars = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=characters'))
|
||||
chars = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=characters'))
|
||||
charstext = [stripHTML(char) for char in chars]
|
||||
for char in charstext:
|
||||
self.story.addToList('characters',stripHTML(char))
|
||||
|
|
@ -213,7 +213,7 @@ class KSArchiveComAdapter(BaseSiteAdapter): # XXX
|
|||
## leaving it in. Check to make sure the type_id number
|
||||
## is correct, though--it's site specific.
|
||||
if 'Genre' in label:
|
||||
genres = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=1')) # XXX
|
||||
genres = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=class&type_id=1')) # XXX
|
||||
genrestext = [stripHTML(genre) for genre in genres]
|
||||
self.genre = ', '.join(genrestext)
|
||||
for genre in genrestext:
|
||||
|
|
@ -223,7 +223,7 @@ class KSArchiveComAdapter(BaseSiteAdapter): # XXX
|
|||
## has 'Story Type', which is much more what most sites
|
||||
## call genre.
|
||||
if 'Story Type' in label:
|
||||
genres = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=5')) # XXX
|
||||
genres = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=class&type_id=5')) # XXX
|
||||
genrestext = [stripHTML(genre) for genre in genres]
|
||||
self.genre = ', '.join(genrestext)
|
||||
for genre in genrestext:
|
||||
|
|
@ -233,21 +233,21 @@ class KSArchiveComAdapter(BaseSiteAdapter): # XXX
|
|||
## leaving it in. Check to make sure the type_id number
|
||||
## is correct, though--it's site specific.
|
||||
if 'Warnings' in label:
|
||||
warnings = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=2')) # XXX
|
||||
warnings = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=class&type_id=2')) # XXX
|
||||
warningstext = [stripHTML(warning) for warning in warnings]
|
||||
self.warning = ', '.join(warningstext)
|
||||
for warning in warningstext:
|
||||
self.story.addToList('warnings',stripHTML(warning))
|
||||
|
||||
if 'Universe' in label:
|
||||
universes = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=3')) # XXX
|
||||
universes = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=class&type_id=3')) # XXX
|
||||
universestext = [stripHTML(universe) for universe in universes]
|
||||
self.universe = ', '.join(universestext)
|
||||
for universe in universestext:
|
||||
self.story.addToList('universe',stripHTML(universe))
|
||||
|
||||
if 'Crossover Fandom' in label:
|
||||
crossoverfandoms = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=4')) # XXX
|
||||
crossoverfandoms = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=class&type_id=4')) # XXX
|
||||
crossoverfandomstext = [stripHTML(crossoverfandom) for crossoverfandom in crossoverfandoms]
|
||||
self.crossoverfandom = ', '.join(crossoverfandomstext)
|
||||
for crossoverfandom in crossoverfandomstext:
|
||||
|
|
@ -274,7 +274,7 @@ class KSArchiveComAdapter(BaseSiteAdapter): # XXX
|
|||
series_url = 'https://'+self.host+'/'+a['href']
|
||||
|
||||
seriessoup = self.make_soup(self.get_request(series_url))
|
||||
storyas = seriessoup.findAll('a', href=re.compile(r'viewstory.php\?sid=\d+'))
|
||||
storyas = seriessoup.find_all('a', href=re.compile(r'viewstory.php\?sid=\d+'))
|
||||
i=1
|
||||
for a in storyas:
|
||||
# skip 'report this' and 'TOC' links
|
||||
|
|
|
|||
|
|
@ -236,7 +236,7 @@ class LiteroticaSiteAdapter(BaseSiteAdapter):
|
|||
breadcrumbs = soup.find('div', id='BreadCrumbComponent')
|
||||
if not breadcrumbs:
|
||||
breadcrumbs = soup.select_one('ul[class^="_breadcrumbs_list_"]')
|
||||
self.story.addToList('category', breadcrumbs.findAll('a')[1].string)
|
||||
self.story.addToList('category', breadcrumbs.find_all('a')[1].string)
|
||||
|
||||
## one-shot chapter
|
||||
self.add_chapter(self.story.getMetadata('title'), self.url)
|
||||
|
|
@ -356,7 +356,7 @@ class LiteroticaSiteAdapter(BaseSiteAdapter):
|
|||
raw_page = raw_page.replace('<div class="b-story-body-x x-r15"><div><p>','<div class="b-story-body-x x-r15"><div>')
|
||||
# logger.debug("\tChapter text: %s" % raw_page)
|
||||
page_soup = self.make_soup(raw_page)
|
||||
[comment.extract() for comment in page_soup.findAll(string=lambda text:isinstance(text, Comment))]
|
||||
[comment.extract() for comment in page_soup.find_all(string=lambda text:isinstance(text, Comment))]
|
||||
fullhtml = ""
|
||||
for aa_ht_div in page_soup.find_all('div', 'aa_ht') + page_soup.select('div[class^="_article__content_"]'):
|
||||
if aa_ht_div.div:
|
||||
|
|
|
|||
|
|
@ -116,7 +116,7 @@ class LumosSycophantHexComAdapter(BaseSiteAdapter):
|
|||
self.story.setMetadata('rating', rating)
|
||||
|
||||
# Find the chapters:
|
||||
for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+r"&chapter=\d+$")):
|
||||
for chapter in soup.find_all('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+r"&chapter=\d+$")):
|
||||
# just in case there's tags, like <i> in chapter titles.
|
||||
self.add_chapter(chapter,'http://'+self.host+'/'+chapter['href']+addurl)
|
||||
|
||||
|
|
@ -134,7 +134,7 @@ class LumosSycophantHexComAdapter(BaseSiteAdapter):
|
|||
|
||||
# <span class="label">Rated:</span> NC-17<br /> etc
|
||||
|
||||
labels = soup.findAll('span',{'class':'label'})
|
||||
labels = soup.find_all('span',{'class':'label'})
|
||||
|
||||
value = labels[0].previousSibling
|
||||
svalue = ""
|
||||
|
|
@ -154,22 +154,22 @@ class LumosSycophantHexComAdapter(BaseSiteAdapter):
|
|||
self.story.setMetadata('numWords', value.split(' -')[0])
|
||||
|
||||
if 'Categories' in label:
|
||||
cats = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=categories'))
|
||||
cats = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=categories'))
|
||||
for cat in cats:
|
||||
self.story.addToList('category',cat.string)
|
||||
|
||||
if 'Characters' in label:
|
||||
chars = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=characters'))
|
||||
chars = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=characters'))
|
||||
for char in chars:
|
||||
self.story.addToList('characters',char.string)
|
||||
|
||||
if 'Genre' in label:
|
||||
genres = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=1'))
|
||||
genres = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=class&type_id=1'))
|
||||
for genre in genres:
|
||||
self.story.addToList('genre',genre.string)
|
||||
|
||||
if 'Warnings' in label:
|
||||
warnings = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=2'))
|
||||
warnings = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=class&type_id=2'))
|
||||
for warning in warnings:
|
||||
self.story.addToList('warnings',warning.string)
|
||||
|
||||
|
|
@ -194,7 +194,7 @@ class LumosSycophantHexComAdapter(BaseSiteAdapter):
|
|||
series_url = 'http://'+self.host+'/'+a['href']
|
||||
|
||||
seriessoup = self.make_soup(self.get_request(series_url))
|
||||
storyas = seriessoup.findAll('a', href=re.compile(r'^viewstory.php\?sid=\d+$'))
|
||||
storyas = seriessoup.find_all('a', href=re.compile(r'^viewstory.php\?sid=\d+$'))
|
||||
i=1
|
||||
for a in storyas:
|
||||
if a['href'] == ('viewstory.php?sid='+self.story.getMetadata('storyId')):
|
||||
|
|
|
|||
|
|
@ -678,7 +678,7 @@ class Chapter(object):
|
|||
|
||||
def _excludeEditorSignature(self, root):
|
||||
"""Exclude editor signature from within `root' element."""
|
||||
for stringNode in root.findAll(string=True):
|
||||
for stringNode in root.find_all(string=True):
|
||||
if re.match(self.SIGNED_PATTERN, textNode.string):
|
||||
editorLink = textNode.findNext('a')
|
||||
if editorLink:
|
||||
|
|
|
|||
|
|
@ -148,12 +148,12 @@ class MediaMinerOrgSiteAdapter(BaseSiteAdapter):
|
|||
|
||||
# category
|
||||
# <a href="/fanfic/src.php/a/567">Ranma 1/2</a>
|
||||
for a in soup.findAll('a',href=re.compile(r"^/fanfic/a/")):
|
||||
for a in soup.find_all('a',href=re.compile(r"^/fanfic/a/")):
|
||||
self.story.addToList('category',a.string)
|
||||
|
||||
# genre
|
||||
# <a href="/fanfic/src.php/g/567">Ranma 1/2</a>
|
||||
for a in soup.findAll('a',href=re.compile(r"^/fanfic/src.php/g/")):
|
||||
for a in soup.find_all('a',href=re.compile(r"^/fanfic/src.php/g/")):
|
||||
self.story.addToList('genre',a.string)
|
||||
|
||||
metasoup = soup.find("div",{"class":"post-meta"})
|
||||
|
|
|
|||
|
|
@ -154,7 +154,7 @@ class MidnightwhispersAdapter(BaseSiteAdapter): # XXX
|
|||
self.story.setMetadata('author',a.string)
|
||||
|
||||
# Find the chapters:
|
||||
for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+r"&chapter=\d+$")):
|
||||
for chapter in soup.find_all('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+r"&chapter=\d+$")):
|
||||
# just in case there's tags, like <i> in chapter titles.
|
||||
self.add_chapter(chapter,'https://'+self.host+'/'+chapter['href']+addurl)
|
||||
|
||||
|
|
@ -170,7 +170,7 @@ class MidnightwhispersAdapter(BaseSiteAdapter): # XXX
|
|||
return ""
|
||||
|
||||
# <span class="label">Rated:</span> NC-17<br /> etc
|
||||
labels = soup.findAll('span',{'class':'label'})
|
||||
labels = soup.find_all('span',{'class':'label'})
|
||||
for labelspan in labels:
|
||||
value = labelspan.nextSibling
|
||||
label = labelspan.string
|
||||
|
|
@ -191,13 +191,13 @@ class MidnightwhispersAdapter(BaseSiteAdapter): # XXX
|
|||
self.story.setMetadata('numWords', value)
|
||||
|
||||
if 'Categories' in label:
|
||||
cats = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=categories'))
|
||||
cats = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=categories'))
|
||||
catstext = [cat.string for cat in cats]
|
||||
for cat in catstext:
|
||||
self.story.addToList('category',cat.string)
|
||||
|
||||
if 'Characters' in label:
|
||||
chars = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=characters'))
|
||||
chars = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=characters'))
|
||||
charstext = [char.string for char in chars]
|
||||
for char in charstext:
|
||||
self.story.addToList('characters',char.string)
|
||||
|
|
@ -206,7 +206,7 @@ class MidnightwhispersAdapter(BaseSiteAdapter): # XXX
|
|||
## leaving it in. Check to make sure the type_id number
|
||||
## is correct, though--it's site specific.
|
||||
if 'Genre' in label:
|
||||
genres = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=2')) # XXX
|
||||
genres = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=class&type_id=2')) # XXX
|
||||
genrestext = [genre.string for genre in genres]
|
||||
self.genre = ', '.join(genrestext)
|
||||
for genre in genrestext:
|
||||
|
|
@ -216,7 +216,7 @@ class MidnightwhispersAdapter(BaseSiteAdapter): # XXX
|
|||
## leaving it in. Check to make sure the type_id number
|
||||
## is correct, though--it's site specific.
|
||||
if 'Warnings' in label:
|
||||
warnings = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=2')) # XXX
|
||||
warnings = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=class&type_id=2')) # XXX
|
||||
warningstext = [warning.string for warning in warnings]
|
||||
self.warning = ', '.join(warningstext)
|
||||
for warning in warningstext:
|
||||
|
|
@ -243,7 +243,7 @@ class MidnightwhispersAdapter(BaseSiteAdapter): # XXX
|
|||
series_url = 'https://'+self.host+'/'+a['href']
|
||||
|
||||
seriessoup = self.make_soup(self.get_request(series_url))
|
||||
storyas = seriessoup.findAll('a', href=re.compile(r'^viewstory.php\?sid=\d+$'))
|
||||
storyas = seriessoup.find_all('a', href=re.compile(r'^viewstory.php\?sid=\d+$'))
|
||||
i=1
|
||||
for a in storyas:
|
||||
# skip 'report this' and 'TOC' links
|
||||
|
|
|
|||
|
|
@ -195,7 +195,7 @@ class LightNovelGateSiteAdapter(BaseSiteAdapter):
|
|||
[a.extract() for a in story.find_all('a')]
|
||||
|
||||
# Some tags have non-standard tag name.
|
||||
for tag in story.findAll(recursive=True):
|
||||
for tag in story.find_all(recursive=True):
|
||||
if tag.name not in HTML_TAGS:
|
||||
tag.name = 'span'
|
||||
|
||||
|
|
|
|||
|
|
@ -137,14 +137,14 @@ class OcclumencySycophantHexComAdapter(BaseSiteAdapter):
|
|||
|
||||
try:
|
||||
# in case link points somewhere other than the first chapter
|
||||
a = soup.findAll('option')[1]['value']
|
||||
a = soup.find_all('option')[1]['value']
|
||||
self.story.setMetadata('storyId',a.split('=',)[1])
|
||||
url = 'http://'+self.host+'/'+a
|
||||
soup = self.make_soup(self.get_request(url))
|
||||
except:
|
||||
pass
|
||||
|
||||
for info in asoup.findAll('table', {'class' : 'border'}):
|
||||
for info in asoup.find_all('table', {'class' : 'border'}):
|
||||
a = info.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$"))
|
||||
if a != None:
|
||||
self.story.setMetadata('title',stripHTML(a))
|
||||
|
|
@ -152,7 +152,7 @@ class OcclumencySycophantHexComAdapter(BaseSiteAdapter):
|
|||
|
||||
|
||||
# Find the chapters:
|
||||
chapters=soup.findAll('a', href=re.compile(r'viewstory.php\?sid=\d+&i=1$'))
|
||||
chapters=soup.find_all('a', href=re.compile(r'viewstory.php\?sid=\d+&i=1$'))
|
||||
if len(chapters) == 0:
|
||||
self.add_chapter(self.story.getMetadata('title'),url)
|
||||
else:
|
||||
|
|
@ -171,7 +171,7 @@ class OcclumencySycophantHexComAdapter(BaseSiteAdapter):
|
|||
except:
|
||||
return ""
|
||||
|
||||
cats = info.findAll('a',href=re.compile('categories.php'))
|
||||
cats = info.find_all('a',href=re.compile('categories.php'))
|
||||
for cat in cats:
|
||||
self.story.addToList('category',cat.string)
|
||||
|
||||
|
|
@ -188,7 +188,7 @@ class OcclumencySycophantHexComAdapter(BaseSiteAdapter):
|
|||
self.setDescription(url,svalue)
|
||||
|
||||
# <span class="label">Rated:</span> NC-17<br /> etc
|
||||
labels = info.findAll('b')
|
||||
labels = info.find_all('b')
|
||||
for labelspan in labels:
|
||||
value = labelspan.nextSibling
|
||||
label = stripHTML(labelspan)
|
||||
|
|
|
|||
|
|
@ -93,26 +93,26 @@ class PhoenixSongNetAdapter(BaseSiteAdapter):
|
|||
chapters = soup.find('select')
|
||||
if chapters == None:
|
||||
self.add_chapter(self.story.getMetadata('title'),url)
|
||||
for b in soup.findAll('b'):
|
||||
for b in soup.find_all('b'):
|
||||
if b.text == "Updated":
|
||||
date = b.nextSibling.string.split(': ')[1].split(',')
|
||||
self.story.setMetadata('datePublished', makeDate(date[0]+date[1], self.dateformat))
|
||||
self.story.setMetadata('dateUpdated', makeDate(date[0]+date[1], self.dateformat))
|
||||
else:
|
||||
i = 0
|
||||
chapters = chapters.findAll('option')
|
||||
chapters = chapters.find_all('option')
|
||||
for chapter in chapters:
|
||||
self.add_chapter(chapter,'https://'+self.host+chapter['value'])
|
||||
if i == 0:
|
||||
self.story.setMetadata('storyId',chapter['value'].split('/')[3])
|
||||
head = self.make_soup(self.get_request('https://'+self.host+chapter['value'])).findAll('b')
|
||||
head = self.make_soup(self.get_request('https://'+self.host+chapter['value'])).find_all('b')
|
||||
for b in head:
|
||||
if b.text == "Updated":
|
||||
date = b.nextSibling.string.split(': ')[1].split(',')
|
||||
self.story.setMetadata('datePublished', makeDate(date[0]+date[1], self.dateformat))
|
||||
|
||||
if i == (len(chapters)-1):
|
||||
head = self.make_soup(self.get_request('https://'+self.host+chapter['value'])).findAll('b')
|
||||
head = self.make_soup(self.get_request('https://'+self.host+chapter['value'])).find_all('b')
|
||||
for b in head:
|
||||
if b.text == "Updated":
|
||||
date = b.nextSibling.string.split(': ')[1].split(',')
|
||||
|
|
@ -160,20 +160,20 @@ class PhoenixSongNetAdapter(BaseSiteAdapter):
|
|||
soup = self.make_soup(self.get_request(url))
|
||||
|
||||
chapter=self.make_soup('<div class="story"></div>')
|
||||
for p in soup.findAll(['p','blockquote']):
|
||||
for p in soup.find_all(['p','blockquote']):
|
||||
if "This is for problems with the formatting or the layout of the chapter." in stripHTML(p):
|
||||
break
|
||||
chapter.append(p)
|
||||
|
||||
for a in chapter.findAll('div'):
|
||||
for a in chapter.find_all('div'):
|
||||
a.extract()
|
||||
for a in chapter.findAll('table'):
|
||||
for a in chapter.find_all('table'):
|
||||
a.extract()
|
||||
for a in chapter.findAll('script'):
|
||||
for a in chapter.find_all('script'):
|
||||
a.extract()
|
||||
for a in chapter.findAll('form'):
|
||||
for a in chapter.find_all('form'):
|
||||
a.extract()
|
||||
for a in chapter.findAll('textarea'):
|
||||
for a in chapter.find_all('textarea'):
|
||||
a.extract()
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -80,7 +80,7 @@ class PotionsAndSnitchesOrgSiteAdapter(BaseSiteAdapter):
|
|||
self.story.setMetadata('author',a.string)
|
||||
|
||||
# Find the chapters:
|
||||
for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+r"&chapter=\d+$")):
|
||||
for chapter in soup.find_all('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+r"&chapter=\d+$")):
|
||||
# just in case there's tags, like <i> in chapter titles.
|
||||
self.add_chapter(chapter,'http://'+self.host+'/fanfiction/'+chapter['href'])
|
||||
|
||||
|
|
@ -92,7 +92,7 @@ class PotionsAndSnitchesOrgSiteAdapter(BaseSiteAdapter):
|
|||
return ""
|
||||
|
||||
# <span class="label">Rated:</span> NC-17<br /> etc
|
||||
labels = soup.findAll('span',{'class':'label'})
|
||||
labels = soup.find_all('span',{'class':'label'})
|
||||
for labelspan in labels:
|
||||
value = labelspan.nextSibling
|
||||
label = labelspan.string
|
||||
|
|
@ -116,13 +116,13 @@ class PotionsAndSnitchesOrgSiteAdapter(BaseSiteAdapter):
|
|||
self.story.setMetadata('reads', value)
|
||||
|
||||
if 'Categories' in label:
|
||||
cats = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=categories'))
|
||||
cats = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=categories'))
|
||||
catstext = [cat.string for cat in cats]
|
||||
for cat in catstext:
|
||||
self.story.addToList('category',cat.string)
|
||||
|
||||
if 'Characters' in label:
|
||||
chars = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=characters'))
|
||||
chars = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=characters'))
|
||||
charstext = [char.string for char in chars]
|
||||
for char in charstext:
|
||||
if "Snape and Harry (required)" in char:
|
||||
|
|
@ -132,27 +132,27 @@ class PotionsAndSnitchesOrgSiteAdapter(BaseSiteAdapter):
|
|||
self.story.addToList('characters',char.string)
|
||||
|
||||
if 'Warning' in label:
|
||||
warnings = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class'))
|
||||
warnings = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=class'))
|
||||
for warning in warnings:
|
||||
self.story.addToList('warnings',stripHTML(warning))
|
||||
|
||||
if 'Genre' in label:
|
||||
genres = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class'))
|
||||
genres = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=class'))
|
||||
for genre in genres:
|
||||
self.story.addToList('genre',stripHTML(genre))
|
||||
|
||||
if 'Takes Place' in label:
|
||||
takesplaces = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class'))
|
||||
takesplaces = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=class'))
|
||||
for takesplace in takesplaces:
|
||||
self.story.addToList('takesplaces',stripHTML(takesplace))
|
||||
|
||||
if 'Snape flavour' in label:
|
||||
snapeflavours = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class'))
|
||||
snapeflavours = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=class'))
|
||||
for snapeflavour in snapeflavours:
|
||||
self.story.addToList('snapeflavours',stripHTML(snapeflavour))
|
||||
|
||||
if 'Tags' in label:
|
||||
sitetags = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class'))
|
||||
sitetags = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=class'))
|
||||
for sitetag in sitetags:
|
||||
self.story.addToList('sitetags',stripHTML(sitetag))
|
||||
|
||||
|
|
@ -176,7 +176,7 @@ class PotionsAndSnitchesOrgSiteAdapter(BaseSiteAdapter):
|
|||
series_url = 'http://'+self.host+'/fanfiction/'+a['href']
|
||||
|
||||
seriessoup = self.make_soup(self.get_request(series_url))
|
||||
storyas = seriessoup.findAll('a', href=re.compile(r'^viewstory.php\?sid=\d+$'))
|
||||
storyas = seriessoup.find_all('a', href=re.compile(r'^viewstory.php\?sid=\d+$'))
|
||||
i=1
|
||||
for a in storyas:
|
||||
if a['href'] == ('viewstory.php?sid='+self.story.getMetadata('storyId')):
|
||||
|
|
|
|||
|
|
@ -121,7 +121,7 @@ class PretenderCenterComAdapter(BaseSiteAdapter):
|
|||
self.story.setMetadata('author',a.string)
|
||||
|
||||
# Find the chapters:
|
||||
for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+r"&chapter=\d+$")):
|
||||
for chapter in soup.find_all('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+r"&chapter=\d+$")):
|
||||
# just in case there's tags, like <i> in chapter titles.
|
||||
self.add_chapter(chapter,'https://'+self.host+'/missingpieces/'+chapter['href']+addurl)
|
||||
|
||||
|
|
@ -138,7 +138,7 @@ class PretenderCenterComAdapter(BaseSiteAdapter):
|
|||
|
||||
|
||||
# <span class="label">Rated:</span> NC-17<br /> etc
|
||||
labels = soup.findAll('span',{'class':'label'})
|
||||
labels = soup.find_all('span',{'class':'label'})
|
||||
for labelspan in labels:
|
||||
value = labelspan.nextSibling
|
||||
label = labelspan.string
|
||||
|
|
@ -159,22 +159,22 @@ class PretenderCenterComAdapter(BaseSiteAdapter):
|
|||
self.story.setMetadata('numWords', value)
|
||||
|
||||
if 'Categories' in label:
|
||||
cats = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=categories'))
|
||||
cats = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=categories'))
|
||||
for cat in cats:
|
||||
self.story.addToList('category',cat.string)
|
||||
|
||||
if 'Characters' in label:
|
||||
chars = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=characters'))
|
||||
chars = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=characters'))
|
||||
for char in chars:
|
||||
self.story.addToList('characters',char.string)
|
||||
|
||||
if 'Genre' in label:
|
||||
genres = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=1')) # XXX
|
||||
genres = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=class&type_id=1')) # XXX
|
||||
for genre in genres:
|
||||
self.story.addToList('genre',genre.string)
|
||||
|
||||
if 'Warnings' in label:
|
||||
warnings = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=2')) # XXX
|
||||
warnings = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=class&type_id=2')) # XXX
|
||||
for warning in warnings:
|
||||
self.story.addToList('warnings',warning.string)
|
||||
|
||||
|
|
@ -198,7 +198,7 @@ class PretenderCenterComAdapter(BaseSiteAdapter):
|
|||
|
||||
seriessoup = self.make_soup(self.get_request(series_url))
|
||||
# can't use ^viewstory...$ in case of higher rated stories with javascript href.
|
||||
storyas = seriessoup.findAll('a', href=re.compile(r'viewstory.php\?sid=\d+'))
|
||||
storyas = seriessoup.find_all('a', href=re.compile(r'viewstory.php\?sid=\d+'))
|
||||
i=1
|
||||
for a in storyas:
|
||||
# skip 'report this' and 'TOC' links
|
||||
|
|
|
|||
|
|
@ -111,7 +111,7 @@ class PsychFicComAdapter(BaseSiteAdapter):
|
|||
self.story.setMetadata('author',a.string)
|
||||
|
||||
# Find the chapters:
|
||||
for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+r"&chapter=\d+$")):
|
||||
for chapter in soup.find_all('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+r"&chapter=\d+$")):
|
||||
# just in case there's tags, like <i> in chapter titles.
|
||||
self.add_chapter(chapter,'http://'+self.host+'/'+chapter['href']+addurl)
|
||||
|
||||
|
|
@ -126,7 +126,7 @@ class PsychFicComAdapter(BaseSiteAdapter):
|
|||
except:
|
||||
return ""
|
||||
|
||||
labels = soup.findAll('span',{'class':'label'})
|
||||
labels = soup.find_all('span',{'class':'label'})
|
||||
for labelspan in labels:
|
||||
value = labelspan.nextSibling
|
||||
label = labelspan.string
|
||||
|
|
@ -147,22 +147,22 @@ class PsychFicComAdapter(BaseSiteAdapter):
|
|||
self.story.setMetadata('numWords', value)
|
||||
|
||||
if 'Categories' in label:
|
||||
cats = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=categories'))
|
||||
cats = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=categories'))
|
||||
for cat in cats:
|
||||
self.story.addToList('category',cat.string)
|
||||
|
||||
if 'Characters' in label:
|
||||
chars = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=characters'))
|
||||
chars = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=characters'))
|
||||
for char in chars:
|
||||
self.story.addToList('characters',char.string)
|
||||
|
||||
if 'Genre' in label:
|
||||
genres = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=1'))
|
||||
genres = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=class&type_id=1'))
|
||||
for genre in genres:
|
||||
self.story.addToList('genre',genre.string)
|
||||
|
||||
if 'Warnings' in label:
|
||||
warnings = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=2'))
|
||||
warnings = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=class&type_id=2'))
|
||||
for warning in warnings:
|
||||
self.story.addToList('warnings',warning.string)
|
||||
|
||||
|
|
@ -186,7 +186,7 @@ class PsychFicComAdapter(BaseSiteAdapter):
|
|||
series_url = 'http://'+self.host+'/'+a['href']
|
||||
|
||||
seriessoup = self.make_soup(self.get_request(series_url))
|
||||
storyas = seriessoup.findAll('a', href=re.compile(r'^viewstory.php\?sid=\d+$'))
|
||||
storyas = seriessoup.find_all('a', href=re.compile(r'^viewstory.php\?sid=\d+$'))
|
||||
i=1
|
||||
for a in storyas:
|
||||
if a['href'] == ('viewstory.php?sid='+self.story.getMetadata('storyId')):
|
||||
|
|
|
|||
|
|
@ -187,7 +187,7 @@ class RoyalRoadAdapter(BaseSiteAdapter):
|
|||
|
||||
|
||||
chapters = soup.find('table',{'id':'chapters'}).find('tbody')
|
||||
tds = [tr.findAll('td') for tr in chapters.findAll('tr')]
|
||||
tds = [tr.find_all('td') for tr in chapters.find_all('tr')]
|
||||
|
||||
if not tds:
|
||||
raise exceptions.FailedToDownload(
|
||||
|
|
|
|||
|
|
@ -193,7 +193,7 @@ class SamAndJackNetAdapter(BaseSiteAdapter): # XXX
|
|||
|
||||
# Find authorid and URL from... author url.
|
||||
# (fetch multiple authors)
|
||||
alist = soup.findAll('a', href=re.compile(r"viewuser.php\?uid=\d+"))
|
||||
alist = soup.find_all('a', href=re.compile(r"viewuser.php\?uid=\d+"))
|
||||
for a in alist:
|
||||
self.story.addToList('authorId',a['href'].split('=')[1])
|
||||
self.story.addToList('authorUrl','http://'+self.host+'/fanfics/'+a['href'])
|
||||
|
|
@ -201,11 +201,11 @@ class SamAndJackNetAdapter(BaseSiteAdapter): # XXX
|
|||
|
||||
# Reviews
|
||||
reviewdata = soup.find('div', {'id' : 'sort'})
|
||||
a = reviewdata.findAll('a', href=re.compile(r'reviews.php\?type=ST&(amp;)?item='+self.story.getMetadata('storyId')+"$"))[1] # second one.
|
||||
a = reviewdata.find_all('a', href=re.compile(r'reviews.php\?type=ST&(amp;)?item='+self.story.getMetadata('storyId')+"$"))[1] # second one.
|
||||
self.story.setMetadata('reviews',stripHTML(a))
|
||||
|
||||
# Find the chapters:
|
||||
for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+r"&chapter=\d+$")):
|
||||
for chapter in soup.find_all('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+r"&chapter=\d+$")):
|
||||
# just in case there's tags, like <i> in chapter titles.
|
||||
self.add_chapter(chapter,'http://'+self.host+'/fanfics/'+chapter['href']+addurl)
|
||||
|
||||
|
|
@ -222,7 +222,7 @@ class SamAndJackNetAdapter(BaseSiteAdapter): # XXX
|
|||
|
||||
|
||||
# <span class="label">Rated:</span> NC-17<br /> etc
|
||||
labels = soup.findAll('span',{'class':'label'})
|
||||
labels = soup.find_all('span',{'class':'label'})
|
||||
for labelspan in labels:
|
||||
value = labelspan.nextSibling
|
||||
label = labelspan.string
|
||||
|
|
@ -237,13 +237,13 @@ class SamAndJackNetAdapter(BaseSiteAdapter): # XXX
|
|||
self.story.setMetadata('numWords', value)
|
||||
|
||||
if 'Categories' in label:
|
||||
cats = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=categories'))
|
||||
cats = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=categories'))
|
||||
catstext = [cat.string for cat in cats]
|
||||
for cat in catstext:
|
||||
self.story.addToList('category',cat.string)
|
||||
|
||||
if 'Characters' in label:
|
||||
chars = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=characters'))
|
||||
chars = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=characters'))
|
||||
charstext = [char.string for char in chars]
|
||||
for char in charstext:
|
||||
self.story.addToList('characters',char.string)
|
||||
|
|
@ -252,7 +252,7 @@ class SamAndJackNetAdapter(BaseSiteAdapter): # XXX
|
|||
## leaving it in. Check to make sure the type_id number
|
||||
## is correct, though--it's site specific.
|
||||
if 'Genre' in label:
|
||||
genres = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=1')) # XXX
|
||||
genres = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=class&type_id=1')) # XXX
|
||||
genrestext = [genre.string for genre in genres]
|
||||
self.genre = ', '.join(genrestext)
|
||||
for genre in genrestext:
|
||||
|
|
@ -262,7 +262,7 @@ class SamAndJackNetAdapter(BaseSiteAdapter): # XXX
|
|||
## leaving it in. Check to make sure the type_id number
|
||||
## is correct, though--it's site specific.
|
||||
if 'Warnings' in label:
|
||||
warnings = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=2')) # XXX
|
||||
warnings = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=class&type_id=2')) # XXX
|
||||
warningstext = [warning.string for warning in warnings]
|
||||
self.warning = ', '.join(warningstext)
|
||||
for warning in warningstext:
|
||||
|
|
@ -291,7 +291,7 @@ class SamAndJackNetAdapter(BaseSiteAdapter): # XXX
|
|||
series_url = 'http://'+self.host+'/fanfics/'+a['href']
|
||||
|
||||
seriessoup = self.make_soup(self.get_request(series_url))
|
||||
storyas = seriessoup.findAll('a', href=re.compile(r'^viewstory.php\?sid=\d+$'))
|
||||
storyas = seriessoup.find_all('a', href=re.compile(r'^viewstory.php\?sid=\d+$'))
|
||||
i=1
|
||||
for a in storyas:
|
||||
if a['href'] == ('viewstory.php?sid='+self.story.getMetadata('storyId')):
|
||||
|
|
|
|||
|
|
@ -240,13 +240,13 @@ class ScribbleHubComAdapter(BaseSiteAdapter): # XXX
|
|||
|
||||
# Categories
|
||||
if soup.find('span',{'class': 'wi_fic_showtags_inner'}):
|
||||
categories = soup.find('span',{'class': 'wi_fic_showtags_inner'}).findAll('a')
|
||||
categories = soup.find('span',{'class': 'wi_fic_showtags_inner'}).find_all('a')
|
||||
for category in categories:
|
||||
self.story.addToList('category', stripHTML(category))
|
||||
|
||||
# Genres
|
||||
if soup.find('a',{'class': 'fic_genre'}):
|
||||
genres = soup.findAll('a',{'class': 'fic_genre'})
|
||||
genres = soup.find_all('a',{'class': 'fic_genre'})
|
||||
for genre in genres:
|
||||
self.story.addToList('genre', stripHTML(genre))
|
||||
|
||||
|
|
@ -258,7 +258,7 @@ class ScribbleHubComAdapter(BaseSiteAdapter): # XXX
|
|||
|
||||
# Content Warnings
|
||||
if soup.find('ul',{'class': 'ul_rate_expand'}):
|
||||
warnings = soup.find('ul',{'class': 'ul_rate_expand'}).findAll('a')
|
||||
warnings = soup.find('ul',{'class': 'ul_rate_expand'}).find_all('a')
|
||||
for warn in warnings:
|
||||
self.story.addToList('warnings', stripHTML(warn))
|
||||
|
||||
|
|
@ -312,7 +312,7 @@ class ScribbleHubComAdapter(BaseSiteAdapter): # XXX
|
|||
self.story.setMetadata(metadata, stripHTML(row.find('td')))
|
||||
|
||||
if soup.find('table',{'class': 'table_pro_overview'}):
|
||||
stats_table = soup.find('table',{'class': 'table_pro_overview'}).findAll('tr')
|
||||
stats_table = soup.find('table',{'class': 'table_pro_overview'}).find_all('tr')
|
||||
for row in stats_table:
|
||||
find_stats_data("Total Views (All)", row, "views")
|
||||
find_stats_data("Word Count", row, "numWords")
|
||||
|
|
|
|||
|
|
@ -171,7 +171,7 @@ class SheppardWeirComAdapter(BaseSiteAdapter): # XXX
|
|||
|
||||
# Find authorid and URL from... author url.
|
||||
# (fetch multiple authors)
|
||||
alist = soup.findAll('a', href=re.compile(r"viewuser.php\?uid=\d+"))
|
||||
alist = soup.find_all('a', href=re.compile(r"viewuser.php\?uid=\d+"))
|
||||
for a in alist:
|
||||
self.story.addToList('authorId',a['href'].split('=')[1])
|
||||
self.story.addToList('authorUrl','https://'+self.host+'/fanfics/'+a['href'])
|
||||
|
|
@ -180,12 +180,12 @@ class SheppardWeirComAdapter(BaseSiteAdapter): # XXX
|
|||
|
||||
# Reviews
|
||||
reviewdata = soup.find('div', {'id' : 'sort'})
|
||||
a = reviewdata.findAll('a', href=re.compile(r'reviews.php\?type=ST&(amp;)?item='+self.story.getMetadata('storyId')+"$"))[1] # second one.
|
||||
a = reviewdata.find_all('a', href=re.compile(r'reviews.php\?type=ST&(amp;)?item='+self.story.getMetadata('storyId')+"$"))[1] # second one.
|
||||
self.story.setMetadata('reviews',stripHTML(a))
|
||||
|
||||
|
||||
# Find the chapters:
|
||||
for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+r"&chapter=\d+$")):
|
||||
for chapter in soup.find_all('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+r"&chapter=\d+$")):
|
||||
# just in case there's tags, like <i> in chapter titles.
|
||||
self.add_chapter(chapter,'https://'+self.host+'/fanfics/'+chapter['href']+addurl)
|
||||
|
||||
|
|
@ -208,7 +208,7 @@ class SheppardWeirComAdapter(BaseSiteAdapter): # XXX
|
|||
self.setDescription(url,self.make_soup(summarydata))
|
||||
|
||||
# <span class="label">Rated:</span> NC-17<br /> etc
|
||||
labels = soup.findAll('span',{'class':'label'})
|
||||
labels = soup.find_all('span',{'class':'label'})
|
||||
for labelspan in labels:
|
||||
value = labelspan.nextSibling
|
||||
label = labelspan.string
|
||||
|
|
@ -220,13 +220,13 @@ class SheppardWeirComAdapter(BaseSiteAdapter): # XXX
|
|||
self.story.setMetadata('numWords', value)
|
||||
|
||||
if 'Categories' in label:
|
||||
cats = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=categories'))
|
||||
cats = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=categories'))
|
||||
catstext = [cat.string for cat in cats]
|
||||
for cat in catstext:
|
||||
self.story.addToList('category',cat.string)
|
||||
|
||||
if 'Characters' in label:
|
||||
chars = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=characters'))
|
||||
chars = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=characters'))
|
||||
charstext = [char.string for char in chars]
|
||||
for char in charstext:
|
||||
self.story.addToList('characters',char.string)
|
||||
|
|
@ -235,7 +235,7 @@ class SheppardWeirComAdapter(BaseSiteAdapter): # XXX
|
|||
## leaving it in. Check to make sure the type_id number
|
||||
## is correct, though--it's site specific.
|
||||
if 'Genre' in label:
|
||||
genres = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=1')) # XXX
|
||||
genres = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=class&type_id=1')) # XXX
|
||||
genrestext = [genre.string for genre in genres]
|
||||
self.genre = ', '.join(genrestext)
|
||||
for genre in genrestext:
|
||||
|
|
@ -245,7 +245,7 @@ class SheppardWeirComAdapter(BaseSiteAdapter): # XXX
|
|||
## leaving it in. Check to make sure the type_id number
|
||||
## is correct, though--it's site specific.
|
||||
if 'Warnings' in label:
|
||||
warnings = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=2')) # XXX
|
||||
warnings = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=class&type_id=2')) # XXX
|
||||
warningstext = [warning.string for warning in warnings]
|
||||
self.warning = ', '.join(warningstext)
|
||||
for warning in warningstext:
|
||||
|
|
@ -273,7 +273,7 @@ class SheppardWeirComAdapter(BaseSiteAdapter): # XXX
|
|||
series_url = 'https://'+self.host+'/fanfics/'+a['href']
|
||||
|
||||
seriessoup = self.make_soup(self.get_request(series_url))
|
||||
storyas = seriessoup.findAll('a', href=re.compile(r'^viewstory.php\?sid=\d+$'))
|
||||
storyas = seriessoup.find_all('a', href=re.compile(r'^viewstory.php\?sid=\d+$'))
|
||||
i=1
|
||||
for a in storyas:
|
||||
if a['href'] == ('viewstory.php?sid='+self.story.getMetadata('storyId')):
|
||||
|
|
|
|||
|
|
@ -109,7 +109,7 @@ class SiyeCoUkAdapter(BaseSiteAdapter): # XXX
|
|||
self.story.setMetadata('title',stripHTML(titlea))
|
||||
|
||||
# Find the chapters (from soup, not authsoup):
|
||||
for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+r"&chapter=\d+$")):
|
||||
for chapter in soup.find_all('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+r"&chapter=\d+$")):
|
||||
# just in case there's tags, like <i> in chapter titles.
|
||||
self.add_chapter(chapter,'https://'+self.host+'/siye/'+chapter['href'])
|
||||
|
||||
|
|
@ -121,7 +121,7 @@ class SiyeCoUkAdapter(BaseSiteAdapter): # XXX
|
|||
metatable = soup.find('table',{'width':'95%'})
|
||||
|
||||
# Categories
|
||||
cat_as = metatable.findAll('a', href=re.compile(r'categories.php'))
|
||||
cat_as = metatable.find_all('a', href=re.compile(r'categories.php'))
|
||||
for cat_a in cat_as:
|
||||
self.story.addToList('category',stripHTML(cat_a))
|
||||
|
||||
|
|
@ -209,7 +209,7 @@ class SiyeCoUkAdapter(BaseSiteAdapter): # XXX
|
|||
series_url = 'https://'+self.host+'/'+a['href']
|
||||
|
||||
seriessoup = self.make_soup(self.get_request(series_url))
|
||||
storyas = seriessoup.findAll('a', href=re.compile(r'^viewstory.php\?sid=\d+$'))
|
||||
storyas = seriessoup.find_all('a', href=re.compile(r'^viewstory.php\?sid=\d+$'))
|
||||
i=1
|
||||
for a in storyas:
|
||||
if a['href'] == ('viewstory.php?sid='+self.story.getMetadata('storyId')):
|
||||
|
|
|
|||
|
|
@ -99,7 +99,7 @@ class SpiritFanfictionComAdapter(BaseSiteAdapter):
|
|||
|
||||
# Authors
|
||||
# Find authorid and URL
|
||||
authors = (title.find_next('div', {'class':'left'})).findAll('span', {'class':'usuario'})
|
||||
authors = (title.find_next('div', {'class':'left'})).find_all('span', {'class':'usuario'})
|
||||
|
||||
for author in authors:
|
||||
self.story.addToList('authorId', author.find('a')['href'].split('/')[-1])
|
||||
|
|
@ -114,10 +114,10 @@ class SpiritFanfictionComAdapter(BaseSiteAdapter):
|
|||
newestChapter = None
|
||||
self.newestChapterNum = None # save for comparing during update.
|
||||
# Find the chapters:
|
||||
chapters = soup.findAll('table', {'class':'listagemCapitulos espacamentoTop'})
|
||||
chapters = soup.find_all('table', {'class':'listagemCapitulos espacamentoTop'})
|
||||
for chapter in chapters:
|
||||
|
||||
for row in chapter.findAll('tr', {'class': 'listagem-textoBg1'}): # Find each row with chapter info
|
||||
for row in chapter.find_all('tr', {'class': 'listagem-textoBg1'}): # Find each row with chapter info
|
||||
a = row.find('a') # Chapter link
|
||||
|
||||
# Datetime
|
||||
|
|
|
|||
|
|
@ -93,7 +93,7 @@ class StoriesOfArdaComAdapter(BaseSiteAdapter):
|
|||
self.story.setMetadata('title',stripHTML(a))
|
||||
|
||||
# Find the chapters: chapterview.asp?sid=7000&cid=30919
|
||||
chapters=soup.findAll('a', href=re.compile(r'chapterview.asp\?sid='+self.story.getMetadata('storyId')+r"&cid=\d+$"))
|
||||
chapters=soup.find_all('a', href=re.compile(r'chapterview.asp\?sid='+self.story.getMetadata('storyId')+r"&cid=\d+$"))
|
||||
if len(chapters)==1:
|
||||
self.add_chapter(self.story.getMetadata('title'),'http://'+self.host+'/'+chapters[0]['href'])
|
||||
else:
|
||||
|
|
@ -109,14 +109,14 @@ class StoriesOfArdaComAdapter(BaseSiteAdapter):
|
|||
|
||||
# no convenient way to get word count
|
||||
|
||||
for td in asoup.findAll('td', {'colspan' : '3'}):
|
||||
for td in asoup.find_all('td', {'colspan' : '3'}):
|
||||
if td.find('a', href=re.compile(r'chapterlistview.asp\?SID='+self.story.getMetadata('storyId'))) != None:
|
||||
break
|
||||
td=td.nextSibling.nextSibling
|
||||
self.story.setMetadata('dateUpdated', makeDate(stripHTML(td).split(': ')[1], self.dateformat))
|
||||
try:
|
||||
tr=td.parent.nextSibling.nextSibling.nextSibling.nextSibling
|
||||
td=tr.findAll('td')
|
||||
td=tr.find_all('td')
|
||||
self.story.setMetadata('rating', td[0].string.split(': ')[1])
|
||||
self.story.setMetadata('status', td[2].string.split(': ')[1])
|
||||
self.story.setMetadata('datePublished', makeDate(stripHTML(td[4]).split(': ')[1], self.dateformat))
|
||||
|
|
|
|||
|
|
@ -230,7 +230,7 @@ class StoriesOnlineNetAdapter(BaseSiteAdapter):
|
|||
self.story.setMetadata('title',stripHTML(a))
|
||||
|
||||
authfrom = soup.find('footer')
|
||||
alist = authfrom.findAll('a', {'rel' : 'author'})
|
||||
alist = authfrom.find_all('a', {'rel' : 'author'})
|
||||
for a in alist:
|
||||
self.story.addToList('authorId',a['href'].split('/')[2])
|
||||
self.story.addToList('authorUrl','https://'+self.host+a['href'])
|
||||
|
|
@ -298,7 +298,7 @@ class StoriesOnlineNetAdapter(BaseSiteAdapter):
|
|||
self.has_universes = False
|
||||
|
||||
title_cell = story_row.find('td', {'class' : 'lc2'})
|
||||
for cat in title_cell.findAll('div', {'class' : 'typediv'}):
|
||||
for cat in title_cell.find_all('div', {'class' : 'typediv'}):
|
||||
self.story.addToList('genre',cat.text)
|
||||
|
||||
# in lieu of word count.
|
||||
|
|
@ -382,7 +382,7 @@ class StoriesOnlineNetAdapter(BaseSiteAdapter):
|
|||
universes_soup = self.make_soup(self.get_request(universe_url) )
|
||||
# logger.debug("Universe url='{0}'".format(universe_url))
|
||||
if universes_soup:
|
||||
universes = universes_soup.findAll('div', {'class' : 'ser-box'})
|
||||
universes = universes_soup.find_all('div', {'class' : 'ser-box'})
|
||||
# logger.debug("Number of Universes: %d" % len(universes))
|
||||
for universe in universes:
|
||||
# logger.debug("universe.find('a')={0}".format(universe.find('a')))
|
||||
|
|
@ -477,7 +477,7 @@ class StoriesOnlineNetAdapter(BaseSiteAdapter):
|
|||
return value
|
||||
|
||||
def parseOtherAttributes(self, other_attribute_element):
|
||||
for b in other_attribute_element.findAll('b'):
|
||||
for b in other_attribute_element.find_all('b'):
|
||||
#logger.debug('Getting metadata: "%s"' % b)
|
||||
label = b.text
|
||||
if label in ['Posted:', 'Concluded:', 'Updated:']:
|
||||
|
|
@ -576,7 +576,7 @@ class StoriesOnlineNetAdapter(BaseSiteAdapter):
|
|||
|
||||
if pager != None:
|
||||
|
||||
urls=pager.findAll('a')
|
||||
urls=pager.find_all('a')
|
||||
urls=urls[:len(urls)-1]
|
||||
# logger.debug("pager urls:%s"%urls)
|
||||
pager.extract()
|
||||
|
|
@ -630,7 +630,7 @@ class StoriesOnlineNetAdapter(BaseSiteAdapter):
|
|||
# putting a 'conTag' at the *top* now, too. So this
|
||||
# was nuking every page but the first and last. Now
|
||||
# only if 'Continues'
|
||||
for contag in pagetag.findAll('span', {'class' : 'conTag'}):
|
||||
for contag in pagetag.find_all('span', {'class' : 'conTag'}):
|
||||
# remove everything after continues...
|
||||
if 'Continuation' in contag.text:
|
||||
tag = contag
|
||||
|
|
@ -659,7 +659,7 @@ class StoriesOnlineNetAdapter(BaseSiteAdapter):
|
|||
|
||||
# If it is a chapter, there are dates at the start for when it was posted or modified. These plus
|
||||
# everything before them can be discarded.
|
||||
postedDates = pagetag.findAll('div', {'class' : 'date'})
|
||||
postedDates = pagetag.find_all('div', {'class' : 'date'})
|
||||
# logger.debug(postedDates)
|
||||
if postedDates:
|
||||
a = postedDates[0].previousSibling
|
||||
|
|
@ -668,7 +668,7 @@ class StoriesOnlineNetAdapter(BaseSiteAdapter):
|
|||
b = a.previousSibling
|
||||
a.extract()
|
||||
a = b
|
||||
for a in pagetag.findAll('div', {'class' : 'date'}):
|
||||
for a in pagetag.find_all('div', {'class' : 'date'}):
|
||||
a.extract()
|
||||
|
||||
# Kill the vote form and everything after it.
|
||||
|
|
|
|||
|
|
@ -61,7 +61,7 @@ class SwiOrgRuAdapter(BaseSiteAdapter):
|
|||
soup = self.make_soup(data)
|
||||
|
||||
title = soup.find('h1')
|
||||
for tag in title.findAll('sup'):
|
||||
for tag in title.find_all('sup'):
|
||||
tag.extract()
|
||||
|
||||
self.story.setMetadata('title', stripHTML(title.text))
|
||||
|
|
@ -91,7 +91,7 @@ class SwiOrgRuAdapter(BaseSiteAdapter):
|
|||
if "NC-18" in rating:
|
||||
raise exceptions.AdultCheckRequired(self.url)
|
||||
|
||||
characters = soup.findAll('img', src=re.compile(r"/mlp-fim/img/chars/\d+.png"))
|
||||
characters = soup.find_all('img', src=re.compile(r"/mlp-fim/img/chars/\d+.png"))
|
||||
logger.debug("numCharacters: (%s)"%str(len(characters)))
|
||||
|
||||
for x in range(0,len(characters)):
|
||||
|
|
@ -119,7 +119,7 @@ class SwiOrgRuAdapter(BaseSiteAdapter):
|
|||
|
||||
self.story.setMetadata('language','Russian')
|
||||
|
||||
chapters=chapters_table.findAll('a', href=re.compile(r'/mlp-fim/story/'+self.story.getMetadata('storyId')+r"/chapter\d+"))
|
||||
chapters=chapters_table.find_all('a', href=re.compile(r'/mlp-fim/story/'+self.story.getMetadata('storyId')+r"/chapter\d+"))
|
||||
self.story.setMetadata('numChapters', len(chapters))
|
||||
logger.debug("numChapters: (%s)"%str(self.story.getMetadata('numChapters')))
|
||||
|
||||
|
|
|
|||
|
|
@ -131,7 +131,7 @@ class TenhawkPresentsSiteAdapter(BaseSiteAdapter):
|
|||
self.story.setMetadata('author',a.string)
|
||||
|
||||
# Find the chapters:
|
||||
for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+r"&chapter=\d+$")):
|
||||
for chapter in soup.find_all('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+r"&chapter=\d+$")):
|
||||
# just in case there's tags, like <i> in chapter titles.
|
||||
self.add_chapter(chapter,'http://'+self.host+'/'+chapter['href']+addurl)
|
||||
|
||||
|
|
@ -143,7 +143,7 @@ class TenhawkPresentsSiteAdapter(BaseSiteAdapter):
|
|||
return ""
|
||||
|
||||
# <span class="label">Rated:</span> NC-17<br /> etc
|
||||
labels = soup.findAll('span',{'class':'label'})
|
||||
labels = soup.find_all('span',{'class':'label'})
|
||||
for labelspan in labels:
|
||||
value = labelspan.nextSibling
|
||||
label = labelspan.string
|
||||
|
|
@ -164,19 +164,19 @@ class TenhawkPresentsSiteAdapter(BaseSiteAdapter):
|
|||
self.story.setMetadata('numWords', value)
|
||||
|
||||
if 'Categories' in label:
|
||||
cats = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=categories'))
|
||||
cats = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=categories'))
|
||||
catstext = [cat.string for cat in cats]
|
||||
for cat in catstext:
|
||||
self.story.addToList('category',cat.string)
|
||||
|
||||
if 'Characters' in label:
|
||||
chars = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=characters'))
|
||||
chars = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=characters'))
|
||||
charstext = [char.string for char in chars]
|
||||
for char in charstext:
|
||||
self.story.addToList('characters',char.string)
|
||||
|
||||
if 'Genre' in label:
|
||||
genres = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class'))
|
||||
genres = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=class'))
|
||||
genrestext = [genre.string for genre in genres]
|
||||
self.genre = ', '.join(genrestext)
|
||||
for genre in genrestext:
|
||||
|
|
@ -203,7 +203,7 @@ class TenhawkPresentsSiteAdapter(BaseSiteAdapter):
|
|||
series_url = 'http://'+self.host+'/'+a['href']
|
||||
|
||||
seriessoup = self.make_soup(self.get_request(series_url))
|
||||
storyas = seriessoup.findAll('a', href=re.compile(r'^viewstory.php\?sid=\d+$'))
|
||||
storyas = seriessoup.find_all('a', href=re.compile(r'^viewstory.php\?sid=\d+$'))
|
||||
i=1
|
||||
for a in storyas:
|
||||
if a['href'] == ('viewstory.php?sid='+self.story.getMetadata('storyId')):
|
||||
|
|
|
|||
|
|
@ -168,7 +168,7 @@ class TheMasqueNetAdapter(BaseSiteAdapter):
|
|||
self.story.setMetadata('author',a.string)
|
||||
|
||||
# Find the chapters:
|
||||
for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+r"&chapter=\d+$")):
|
||||
for chapter in soup.find_all('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+r"&chapter=\d+$")):
|
||||
# just in case there's tags, like <i> in chapter titles.
|
||||
self.add_chapter(chapter,'https://'+self.host + self.section + chapter['href']+addurl)
|
||||
|
||||
|
|
@ -186,7 +186,7 @@ class TheMasqueNetAdapter(BaseSiteAdapter):
|
|||
# summary, rated, word count, categories, characters, genre, warnings, completed, published, updated, seires
|
||||
|
||||
# <span class="label">Rated:</span> NC-17<br /> etc
|
||||
labels = soup.findAll('span',{'class':'label'})
|
||||
labels = soup.find_all('span',{'class':'label'})
|
||||
for labelspan in labels:
|
||||
value = labelspan.nextSibling
|
||||
label = labelspan.text
|
||||
|
|
@ -207,22 +207,22 @@ class TheMasqueNetAdapter(BaseSiteAdapter):
|
|||
self.story.setMetadata('numWords', value)
|
||||
|
||||
if 'Categories' in label:
|
||||
cats = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=categories'))
|
||||
cats = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=categories'))
|
||||
for cat in cats:
|
||||
self.story.addToList('category',cat.string)
|
||||
|
||||
if 'Characters' in label:
|
||||
chars = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=characters'))
|
||||
chars = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=characters'))
|
||||
for char in chars:
|
||||
self.story.addToList('characters',char.string)
|
||||
|
||||
if 'Genre' in label:
|
||||
genres = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=1'))
|
||||
genres = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=class&type_id=1'))
|
||||
for genre in genres:
|
||||
self.story.addToList('genre',genre.string)
|
||||
|
||||
if 'Warnings' in label:
|
||||
warnings = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=2'))
|
||||
warnings = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=class&type_id=2'))
|
||||
for warning in warnings:
|
||||
self.story.addToList('warnings',warning.string)
|
||||
|
||||
|
|
|
|||
|
|
@ -199,14 +199,14 @@ class TwistingTheHellmouthSiteAdapter(BaseSiteAdapter):
|
|||
infodata = self.get_request(infourl)
|
||||
infosoup = self.make_soup(infodata)
|
||||
|
||||
# for a in infosoup.findAll('a',href=re.compile(r"^/Author-\d+")):
|
||||
# for a in infosoup.find_all('a',href=re.compile(r"^/Author-\d+")):
|
||||
# self.story.addToList('authorId',a['href'].split('/')[1].split('-')[1])
|
||||
# self.story.addToList('authorUrl','https://'+self.host+a['href'].replace("/Author-","/AuthorStories-"))
|
||||
# self.story.addToList('author',stripHTML(a))
|
||||
|
||||
# second verticaltable is the chapter list.
|
||||
table = infosoup.findAll('table',{'class':'verticaltable'})[1]
|
||||
for a in table.findAll('a',href=re.compile(r"^/Story-"+self.story.getMetadata('storyId'))):
|
||||
table = infosoup.find_all('table',{'class':'verticaltable'})[1]
|
||||
for a in table.find_all('a',href=re.compile(r"^/Story-"+self.story.getMetadata('storyId'))):
|
||||
autha = a.findNext('a',href=re.compile(r"^/Author-\d+"))
|
||||
self.story.addToList('authorId',autha['href'].split('/')[1].split('-')[1])
|
||||
self.story.addToList('authorUrl','https://'+self.host+autha['href'].replace("/Author-","/AuthorStories-"))
|
||||
|
|
@ -224,7 +224,7 @@ class TwistingTheHellmouthSiteAdapter(BaseSiteAdapter):
|
|||
# no selector found, so it's a one-chapter story.
|
||||
self.add_chapter(self.story.getMetadata('title'),url)
|
||||
else:
|
||||
allOptions = select.findAll('option')
|
||||
allOptions = select.find_all('option')
|
||||
for o in allOptions:
|
||||
url = "https://"+self.host+o['value']
|
||||
# just in case there's tags, like <i> in chapter titles.
|
||||
|
|
@ -237,7 +237,7 @@ class TwistingTheHellmouthSiteAdapter(BaseSiteAdapter):
|
|||
BtVSNonX = False
|
||||
char=None
|
||||
romance=False
|
||||
for cat in verticaltable.findAll('a', href=re.compile(r"^/Category-")):
|
||||
for cat in verticaltable.find_all('a', href=re.compile(r"^/Category-")):
|
||||
# assumes only one -Centered and one Pairing: cat can ever
|
||||
# be applied to one story.
|
||||
# Seen at least once: incorrect (empty) cat link, thus "and cat.string"
|
||||
|
|
@ -265,7 +265,7 @@ class TwistingTheHellmouthSiteAdapter(BaseSiteAdapter):
|
|||
if 'BtVS/AtS Non-Crossover' == cat.string:
|
||||
BtVSNonX = True
|
||||
|
||||
verticaltabletds = verticaltable.findAll('td')
|
||||
verticaltabletds = verticaltable.find_all('td')
|
||||
self.story.setMetadata('rating', verticaltabletds[2].string)
|
||||
self.story.setMetadata('numWords', verticaltabletds[4].string)
|
||||
|
||||
|
|
@ -279,7 +279,7 @@ class TwistingTheHellmouthSiteAdapter(BaseSiteAdapter):
|
|||
self.story.setMetadata('datePublished',makeDate(stripHTML(verticaltabletds[8].string), self.dateformat))
|
||||
self.story.setMetadata('dateUpdated',makeDate(stripHTML(verticaltabletds[9].string), self.dateformat))
|
||||
|
||||
for icon in storydiv.find('span',{'class':'storyicons'}).findAll('img'):
|
||||
for icon in storydiv.find('span',{'class':'storyicons'}).find_all('img'):
|
||||
if( icon['title'] not in ['Non-Crossover'] ) :
|
||||
self.story.addToList('genre',icon['title'])
|
||||
else:
|
||||
|
|
|
|||
|
|
@ -127,7 +127,7 @@ class TwilightedNetSiteAdapter(BaseSiteAdapter):
|
|||
self.story.setMetadata('author',a.string)
|
||||
|
||||
# Find the chapters:
|
||||
for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+r"&chapter=\d+$")):
|
||||
for chapter in soup.find_all('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+r"&chapter=\d+$")):
|
||||
# just in case there's tags, like <i> in chapter titles.
|
||||
self.add_chapter(chapter,'https://'+self.host+'/'+chapter['href'])
|
||||
|
||||
|
|
@ -139,7 +139,7 @@ class TwilightedNetSiteAdapter(BaseSiteAdapter):
|
|||
return ""
|
||||
|
||||
# <span class="label">Rated:</span> NC-17<br /> etc
|
||||
labels = soup.findAll('span',{'class':'label'})
|
||||
labels = soup.find_all('span',{'class':'label'})
|
||||
for labelspan in labels:
|
||||
value = labelspan.nextSibling
|
||||
label = labelspan.string
|
||||
|
|
@ -159,20 +159,20 @@ class TwilightedNetSiteAdapter(BaseSiteAdapter):
|
|||
self.story.setMetadata('numWords', value)
|
||||
|
||||
if 'Categories' in label:
|
||||
cats = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=categories'))
|
||||
cats = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=categories'))
|
||||
catstext = [cat.string for cat in cats]
|
||||
for cat in catstext:
|
||||
self.story.addToList('category',cat.string)
|
||||
|
||||
if 'Characters' in label:
|
||||
chars = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=characters'))
|
||||
chars = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=characters'))
|
||||
charstext = [char.string for char in chars]
|
||||
for char in charstext:
|
||||
self.story.addToList('characters',char.string)
|
||||
|
||||
## twilighted.net doesn't use genre.
|
||||
# if 'Genre' in label:
|
||||
# genres = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class'))
|
||||
# genres = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=class'))
|
||||
# genrestext = [genre.string for genre in genres]
|
||||
# self.genre = ', '.join(genrestext)
|
||||
# for genre in genrestext:
|
||||
|
|
@ -199,7 +199,7 @@ class TwilightedNetSiteAdapter(BaseSiteAdapter):
|
|||
series_url = 'https://'+self.host+'/'+a['href']
|
||||
|
||||
seriessoup = self.make_soup(self.get_request(series_url))
|
||||
storyas = seriessoup.findAll('a', href=re.compile(r'^viewstory.php\?sid=\d+$'))
|
||||
storyas = seriessoup.find_all('a', href=re.compile(r'^viewstory.php\?sid=\d+$'))
|
||||
i=1
|
||||
for a in storyas:
|
||||
if a['href'] == ('viewstory.php?sid='+self.story.getMetadata('storyId')):
|
||||
|
|
|
|||
|
|
@ -111,7 +111,7 @@ class WalkingThePlankOrgAdapter(BaseSiteAdapter):
|
|||
self.story.setMetadata('author',a.string)
|
||||
|
||||
# Find the chapters:
|
||||
for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+r"&chapter=\d+$")):
|
||||
for chapter in soup.find_all('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+r"&chapter=\d+$")):
|
||||
# just in case there's tags, like <i> in chapter titles.
|
||||
self.add_chapter(chapter,'http://'+self.host+'/archive/'+chapter['href']+addurl)
|
||||
|
||||
|
|
@ -126,7 +126,7 @@ class WalkingThePlankOrgAdapter(BaseSiteAdapter):
|
|||
except:
|
||||
return ""
|
||||
|
||||
labels = soup.findAll('span',{'class':'label'})
|
||||
labels = soup.find_all('span',{'class':'label'})
|
||||
for labelspan in labels:
|
||||
value = labelspan.nextSibling
|
||||
label = labelspan.string
|
||||
|
|
@ -150,24 +150,24 @@ class WalkingThePlankOrgAdapter(BaseSiteAdapter):
|
|||
self.story.setMetadata('reads', value)
|
||||
|
||||
if 'Categories' in label:
|
||||
cats = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=categories'))
|
||||
cats = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=categories'))
|
||||
catstext = [cat.string for cat in cats]
|
||||
for cat in catstext:
|
||||
self.story.addToList('category',cat.string)
|
||||
|
||||
if 'Characters' in label:
|
||||
chars = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=characters'))
|
||||
chars = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=characters'))
|
||||
charstext = [char.string for char in chars]
|
||||
for char in charstext:
|
||||
self.story.addToList('characters',char.string)
|
||||
|
||||
if 'Genre' in label:
|
||||
genres = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=1'))
|
||||
genres = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=class&type_id=1'))
|
||||
for genre in genres:
|
||||
self.story.addToList('genre',genre.string)
|
||||
|
||||
if 'Warnings' in label:
|
||||
warnings = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=2')) # XXX
|
||||
warnings = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=class&type_id=2')) # XXX
|
||||
for warning in warnings:
|
||||
self.story.addToList('warnings',warning.string)
|
||||
|
||||
|
|
@ -190,7 +190,7 @@ class WalkingThePlankOrgAdapter(BaseSiteAdapter):
|
|||
series_url = 'http://'+self.host+'/archive/'+a['href']
|
||||
|
||||
seriessoup = self.make_soup(self.get_request(series_url))
|
||||
storyas = seriessoup.findAll('a', href=re.compile(r'^viewstory.php\?sid=\d+$'))
|
||||
storyas = seriessoup.find_all('a', href=re.compile(r'^viewstory.php\?sid=\d+$'))
|
||||
i=1
|
||||
for a in storyas:
|
||||
if a['href'] == ('viewstory.php?sid='+self.story.getMetadata('storyId')):
|
||||
|
|
|
|||
|
|
@ -80,7 +80,7 @@ class WhoficComSiteAdapter(BaseSiteAdapter):
|
|||
# no selector found, so it's a one-chapter story.
|
||||
self.add_chapter(self.story.getMetadata('title'),url)
|
||||
else:
|
||||
allOptions = select.findAll('option')
|
||||
allOptions = select.find_all('option')
|
||||
for o in allOptions:
|
||||
url = self.url + "&chapter=%s" % o['value']
|
||||
# just in case there's tags, like <i> in chapter titles.
|
||||
|
|
@ -178,7 +178,7 @@ class WhoficComSiteAdapter(BaseSiteAdapter):
|
|||
series_url = 'https://'+self.host+'/'+a['href']
|
||||
try:
|
||||
seriessoup = self.make_soup(self.get_request(series_url))
|
||||
storyas = seriessoup.findAll('a', href=re.compile(r'^viewstory.php\?sid=\d+$'))
|
||||
storyas = seriessoup.find_all('a', href=re.compile(r'^viewstory.php\?sid=\d+$'))
|
||||
i=1
|
||||
for a in storyas:
|
||||
if a['href'] == ('viewstory.php?sid='+self.story.getMetadata('storyId')):
|
||||
|
|
|
|||
|
|
@ -100,7 +100,7 @@ class WolverineAndRogueComAdapter(BaseSiteAdapter):
|
|||
self.story.setMetadata('rating', rating)
|
||||
|
||||
# Find the chapters:
|
||||
for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+r"&chapter=\d+$")):
|
||||
for chapter in soup.find_all('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+r"&chapter=\d+$")):
|
||||
# just in case there's tags, like <i> in chapter titles.
|
||||
self.add_chapter(chapter,'https://'+self.host+'/wrfa/'+chapter['href'])
|
||||
|
||||
|
|
@ -110,7 +110,7 @@ class WolverineAndRogueComAdapter(BaseSiteAdapter):
|
|||
|
||||
# <span class="label">Rated:</span> NC-17<br /> etc
|
||||
content=soup.find('div',{'class' : 'content'})
|
||||
labels = soup.findAll('span',{'class':'label'})
|
||||
labels = soup.find_all('span',{'class':'label'})
|
||||
|
||||
value = labels[0].previousSibling
|
||||
svalue = ""
|
||||
|
|
@ -134,22 +134,22 @@ class WolverineAndRogueComAdapter(BaseSiteAdapter):
|
|||
self.story.setMetadata('numWords', value.split(' -')[0])
|
||||
|
||||
if 'Categories' in label:
|
||||
cats = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=categories'))
|
||||
cats = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=categories'))
|
||||
for cat in cats:
|
||||
self.story.addToList('category',cat.string)
|
||||
|
||||
if 'Characters' in label:
|
||||
chars = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=characters'))
|
||||
chars = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=characters'))
|
||||
for char in chars:
|
||||
self.story.addToList('characters',char.string)
|
||||
|
||||
if 'Genre' in label:
|
||||
genres = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=1'))
|
||||
genres = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=class&type_id=1'))
|
||||
for genre in genres:
|
||||
self.story.addToList('genre',genre.string)
|
||||
|
||||
if 'Warnings' in label:
|
||||
warnings = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=2'))
|
||||
warnings = labelspan.parent.find_all('a',href=re.compile(r'browse.php\?type=class&type_id=2'))
|
||||
for warning in warnings:
|
||||
self.story.addToList('warnings',warning.string)
|
||||
|
||||
|
|
@ -173,7 +173,7 @@ class WolverineAndRogueComAdapter(BaseSiteAdapter):
|
|||
|
||||
seriessoup = self.make_soup(self.get_request(series_url))
|
||||
# can't use ^viewstory...$ in case of higher rated stories with javascript href.
|
||||
storyas = seriessoup.findAll('a', href=re.compile(r'viewstory.php\?sid=\d+'))
|
||||
storyas = seriessoup.find_all('a', href=re.compile(r'viewstory.php\?sid=\d+'))
|
||||
i=1
|
||||
for a in storyas:
|
||||
# skip 'report this' and 'TOC' links
|
||||
|
|
|
|||
|
|
@ -268,7 +268,7 @@ class WWWNovelAllComAdapter(BaseSiteAdapter):
|
|||
tag.extract()
|
||||
|
||||
# Some tags have non-standard tag name.
|
||||
for tag in story.findAll(recursive=True):
|
||||
for tag in story.find_all(recursive=True):
|
||||
if tag.name not in HTML_TAGS:
|
||||
tag.name = 'span'
|
||||
|
||||
|
|
|
|||
|
|
@ -127,7 +127,7 @@ class WWWUtopiastoriesComAdapter(BaseSiteAdapter):
|
|||
self.story.setMetadata('status', 'Completed')
|
||||
|
||||
|
||||
for detail in soup.findAll('li'):
|
||||
for detail in soup.find_all('li'):
|
||||
det = unicode(detail).replace(u"\xa0",'')
|
||||
heading = stripHTML(det).split(' - ')[0]
|
||||
text = stripHTML(det).replace(heading+' - ','')
|
||||
|
|
@ -180,18 +180,18 @@ class WWWUtopiastoriesComAdapter(BaseSiteAdapter):
|
|||
|
||||
logger.debug('Using the html retrieved previously from: %s' % url)
|
||||
|
||||
story = self.html.findAll('table')[0].findAll('td')[0].find('div')
|
||||
story = self.html.find_all('table')[0].find_all('td')[0].find('div')
|
||||
|
||||
if None == story:
|
||||
raise exceptions.FailedToDownload(
|
||||
"Error downloading Chapter: %s! Missing required element!" % url)
|
||||
|
||||
## Removing the scripts, tables, links and divs from the story
|
||||
for tag in (story.findAll('script') + story.findAll('table') + story.findAll('a') +
|
||||
story.findAll('div')):
|
||||
for tag in (story.find_all('script') + story.find_all('table') + story.find_all('a') +
|
||||
story.find_all('div')):
|
||||
tag.extract()
|
||||
|
||||
#strip comments from story
|
||||
[comment.extract() for comment in story.findAll(string=lambda text:isinstance(text, Comment))]
|
||||
[comment.extract() for comment in story.find_all(string=lambda text:isinstance(text, Comment))]
|
||||
|
||||
return self.utf8FromSoup(url,story)
|
||||
|
|
|
|||
|
|
@ -803,7 +803,7 @@ class BaseSiteAdapter(Requestable):
|
|||
# show up differently and doing stripHTML() also
|
||||
# catches <br> etc.
|
||||
soup = BeautifulSoup(unicode(soup),'html5lib')
|
||||
for t in soup.findAll(recursive=True):
|
||||
for t in soup.find_all(recursive=True):
|
||||
for attr in self.get_attr_keys(t):
|
||||
if attr not in acceptable_attributes:
|
||||
del t[attr] ## strip all tag attributes except acceptable_attributes
|
||||
|
|
|
|||
|
|
@ -235,7 +235,7 @@ class BaseEfictionAdapter(BaseSiteAdapter):
|
|||
soup = self.make_soup(html)
|
||||
|
||||
## fix all local image 'src' to absolute
|
||||
for img in soup.findAll("img", {"src": _REGEX_DOESNT_START_WITH_HTTP}):
|
||||
for img in soup.find_all("img", {"src": _REGEX_DOESNT_START_WITH_HTTP}):
|
||||
# TODO handle '../../' and so on
|
||||
if img['src'].startswith('/'):
|
||||
img['src'] = img['src'][1:]
|
||||
|
|
@ -410,13 +410,13 @@ class BaseEfictionAdapter(BaseSiteAdapter):
|
|||
if pagetitleDiv.find('a') is None:
|
||||
raise exceptions.FailedToDownload("Couldn't find title and author")
|
||||
self.story.setMetadata('title', stripHTML(pagetitleDiv.find("a")))
|
||||
authorLink = pagetitleDiv.findAll("a")[1]
|
||||
authorLink = pagetitleDiv.find_all("a")[1]
|
||||
self.story.setMetadata('author', stripHTML(authorLink))
|
||||
self.story.setMetadata('authorId', re.search(r"\d+", authorLink['href']).group(0))
|
||||
self.story.setMetadata('authorUrl', self.getViewUserUrl(self.story.getMetadata('authorId')))
|
||||
|
||||
## Parse the infobox
|
||||
labelSpans = soup.find("div", "infobox").find("div", "content").findAll("span", "label")
|
||||
labelSpans = soup.find("div", "infobox").find("div", "content").find_all("span", "label")
|
||||
for labelSpan in labelSpans:
|
||||
valueStr = ""
|
||||
nextEl = labelSpan.nextSibling
|
||||
|
|
|
|||
|
|
@ -190,10 +190,10 @@ class BaseOTWAdapter(BaseSiteAdapter):
|
|||
raise exceptions.FailedToDownload('Site says: "Sorry, you don\'t have permission to access the page you were trying to reach."')
|
||||
|
||||
soup = self.make_soup(data)
|
||||
for tag in soup.findAll('div',id='admin-banner'):
|
||||
for tag in soup.find_all('div',id='admin-banner'):
|
||||
tag.extract()
|
||||
metasoup = self.make_soup(meta)
|
||||
for tag in metasoup.findAll('div',id='admin-banner'):
|
||||
for tag in metasoup.find_all('div',id='admin-banner'):
|
||||
tag.extract()
|
||||
|
||||
|
||||
|
|
@ -234,7 +234,7 @@ class BaseOTWAdapter(BaseSiteAdapter):
|
|||
self.story.setMetadata('restricted','Restricted')
|
||||
|
||||
# Find authorid and URL from... author url.
|
||||
alist = soup.findAll('a', href=re.compile(r"/users/\w+/pseuds/.+"))
|
||||
alist = soup.find_all('a', href=re.compile(r"/users/\w+/pseuds/.+"))
|
||||
if len(alist) < 1: # ao3 allows for author 'Anonymous' with no author link.
|
||||
self.story.setMetadata('author','Anonymous')
|
||||
self.story.setMetadata('authorUrl','https://' + self.getSiteDomain() + '/')
|
||||
|
|
@ -267,7 +267,7 @@ class BaseOTWAdapter(BaseSiteAdapter):
|
|||
# change the dates of earlier ones by editing them--That WILL
|
||||
# break epub update.
|
||||
# Find the chapters:
|
||||
chapters=soup.findAll('a', href=re.compile(r'/works/'+self.story.getMetadata('storyId')+r"/chapters/\d+$"))
|
||||
chapters=soup.find_all('a', href=re.compile(r'/works/'+self.story.getMetadata('storyId')+r"/chapters/\d+$"))
|
||||
self.story.setMetadata('numChapters',len(chapters))
|
||||
logger.debug("numChapters: (%s)"%self.story.getMetadata('numChapters'))
|
||||
if len(chapters)==1:
|
||||
|
|
@ -300,50 +300,50 @@ class BaseOTWAdapter(BaseSiteAdapter):
|
|||
|
||||
a = metasoup.find('dd',{'class':"fandom tags"})
|
||||
if a != None:
|
||||
fandoms = a.findAll('a',{'class':"tag"})
|
||||
fandoms = a.find_all('a',{'class':"tag"})
|
||||
for fandom in fandoms:
|
||||
self.story.addToList('fandoms',fandom.string)
|
||||
|
||||
a = metasoup.find('dd',{'class':"warning tags"})
|
||||
if a != None:
|
||||
warnings = a.findAll('a',{'class':"tag"})
|
||||
warnings = a.find_all('a',{'class':"tag"})
|
||||
for warning in warnings:
|
||||
self.story.addToList('warnings',warning.string)
|
||||
|
||||
a = metasoup.find('dd',{'class':"freeform tags"})
|
||||
if a != None:
|
||||
genres = a.findAll('a',{'class':"tag"})
|
||||
genres = a.find_all('a',{'class':"tag"})
|
||||
for genre in genres:
|
||||
self.story.addToList('freeformtags',genre.string)
|
||||
|
||||
a = metasoup.find('dd',{'class':"category tags"})
|
||||
if a != None:
|
||||
genres = a.findAll('a',{'class':"tag"})
|
||||
genres = a.find_all('a',{'class':"tag"})
|
||||
for genre in genres:
|
||||
if genre != "Gen":
|
||||
self.story.addToList('ao3categories',genre.string)
|
||||
|
||||
a = metasoup.find('dd',{'class':"character tags"})
|
||||
if a != None:
|
||||
chars = a.findAll('a',{'class':"tag"})
|
||||
chars = a.find_all('a',{'class':"tag"})
|
||||
for char in chars:
|
||||
self.story.addToList('characters',char.string)
|
||||
|
||||
a = metasoup.find('dd',{'class':"relationship tags"})
|
||||
if a != None:
|
||||
ships = a.findAll('a',{'class':"tag"})
|
||||
ships = a.find_all('a',{'class':"tag"})
|
||||
for ship in ships:
|
||||
self.story.addToList('ships',ship.string)
|
||||
|
||||
a = metasoup.find('dd',{'class':"collections"})
|
||||
if a != None:
|
||||
collections = a.findAll('a')
|
||||
collections = a.find_all('a')
|
||||
for collection in collections:
|
||||
self.story.addToList('collections',collection.string)
|
||||
|
||||
stats = metasoup.find('dl',{'class':'stats'})
|
||||
dt = stats.findAll('dt')
|
||||
dd = stats.findAll('dd')
|
||||
dt = stats.find_all('dt')
|
||||
dd = stats.find_all('dd')
|
||||
for x in range(0,len(dt)):
|
||||
label = dt[x].text
|
||||
value = dd[x].text
|
||||
|
|
@ -386,7 +386,7 @@ class BaseOTWAdapter(BaseSiteAdapter):
|
|||
ddseries = metasoup.find('dd',{'class':"series"})
|
||||
|
||||
if ddseries:
|
||||
for i, a in enumerate(ddseries.findAll('a', href=re.compile(r"/series/\d+"))):
|
||||
for i, a in enumerate(ddseries.find_all('a', href=re.compile(r"/series/\d+"))):
|
||||
series_name = stripHTML(a)
|
||||
series_url = 'https://'+self.host+a['href']
|
||||
series_index = int(stripHTML(a.previousSibling).replace(', ','').split(' ')[1]) # "Part # of" or ", Part #"
|
||||
|
|
|
|||
|
|
@ -377,7 +377,7 @@ class BaseXenForo2ForumAdapter(BaseSiteAdapter):
|
|||
return
|
||||
|
||||
def get_forumtags(self,topsoup):
|
||||
return topsoup.find('div',{'class':'p-description'}).findAll('a',{'class':'tagItem'})
|
||||
return topsoup.find('div',{'class':'p-description'}).find_all('a',{'class':'tagItem'})
|
||||
|
||||
def parse_author(self,souptag):
|
||||
user = souptag.find('section',{'class':'message-user'})
|
||||
|
|
|
|||
|
|
@ -73,11 +73,11 @@ def get_update_data(inputio,
|
|||
break
|
||||
soup = make_soup(oldcoverhtmldata.decode("utf-8"))
|
||||
# first img or image tag.
|
||||
imgs = soup.findAll('img')
|
||||
imgs = soup.find_all('img')
|
||||
if imgs:
|
||||
src = get_path_part(href)+imgs[0]['src']
|
||||
else:
|
||||
imgs = soup.findAll('image')
|
||||
imgs = soup.find_all('image')
|
||||
if imgs:
|
||||
src=get_path_part(href)+imgs[0]['xlink:href']
|
||||
|
||||
|
|
@ -128,7 +128,7 @@ def get_update_data(inputio,
|
|||
# 3/OEBPS/file0005_u3.xhtml etc.
|
||||
if getsoups:
|
||||
soup = make_soup(epub.read(href).decode("utf-8"))
|
||||
for img in soup.findAll('img'):
|
||||
for img in soup.find_all('img'):
|
||||
newsrc=''
|
||||
longdesc=''
|
||||
## skip <img src="data:image..."
|
||||
|
|
@ -159,7 +159,7 @@ def get_update_data(inputio,
|
|||
if h2:
|
||||
h2.extract()
|
||||
|
||||
for skip in bodysoup.findAll(attrs={'class':'skip_on_ffdl_update'}):
|
||||
for skip in bodysoup.find_all(attrs={'class':'skip_on_ffdl_update'}):
|
||||
skip.extract()
|
||||
|
||||
## <meta name="chapterurl" content="${url}"></meta>
|
||||
|
|
@ -232,7 +232,7 @@ def get_story_url_from_epub_html(inputio,_is_good_url=None):
|
|||
if( item.getAttribute("media-type") == "application/xhtml+xml" ):
|
||||
filehref=relpath+item.getAttribute("href")
|
||||
soup = make_soup(epub.read(filehref).decode("utf-8"))
|
||||
for link in soup.findAll('a',href=re.compile(r'^http.*')):
|
||||
for link in soup.find_all('a',href=re.compile(r'^http.*')):
|
||||
ahref=link['href']
|
||||
# print("href:(%s)"%ahref)
|
||||
# hack for bad ficsaver ffnet URLs.
|
||||
|
|
@ -277,7 +277,7 @@ def get_story_url_from_zip_html(inputio,_is_good_url=None):
|
|||
except UnicodeDecodeError:
|
||||
# calibre converted to html zip fails with decode.
|
||||
soup = make_soup(zipf.read(item))
|
||||
for link in soup.findAll('a',href=re.compile(r'^http.*')):
|
||||
for link in soup.find_all('a',href=re.compile(r'^http.*')):
|
||||
ahref=link['href']
|
||||
# print("href:(%s)"%ahref)
|
||||
if _is_good_url == None or _is_good_url(ahref):
|
||||
|
|
|
|||
|
|
@ -71,7 +71,7 @@ def get_urls_from_html(data,url=None,configuration=None,normalize=False,foremail
|
|||
# logger.debug("dbl souping")
|
||||
soup = BeautifulSoup(unicode(BeautifulSoup(data,"html5lib")),"html5lib")
|
||||
|
||||
for a in soup.findAll('a'):
|
||||
for a in soup.find_all('a'):
|
||||
if a.has_attr('href'):
|
||||
# logger.debug("a['href']:%s"%a['href'])
|
||||
href = form_url(url,a['href'])
|
||||
|
|
|
|||
|
|
@ -59,9 +59,9 @@ class HtmlProcessor:
|
|||
self._anchor_references = []
|
||||
anchor_num = 0
|
||||
# anchor links
|
||||
anchorlist = self._soup.findAll('a', href=re.compile('^#'))
|
||||
anchorlist = self._soup.find_all('a', href=re.compile('^#'))
|
||||
# treat reference tags like a tags for TOCTOP.
|
||||
anchorlist.extend(self._soup.findAll('reference', href=re.compile('^#')))
|
||||
anchorlist.extend(self._soup.find_all('reference', href=re.compile('^#')))
|
||||
for anchor in anchorlist:
|
||||
self._anchor_references.append((anchor_num, anchor['href']))
|
||||
anchor['filepos'] = '%.10d' % anchor_num
|
||||
|
|
@ -99,7 +99,7 @@ class HtmlProcessor:
|
|||
|
||||
def _FixPreTags(self):
|
||||
'''Replace <pre> tags with HTML-ified text.'''
|
||||
pres = self._soup.findAll('pre')
|
||||
pres = self._soup.find_all('pre')
|
||||
for pre in pres:
|
||||
pre.replaceWith(self._FixPreContents(unicode(pre.contents[0])))
|
||||
|
||||
|
|
@ -120,15 +120,15 @@ class HtmlProcessor:
|
|||
# TODO(chatham): <link> tags to script?
|
||||
unsupported_tags = ('script', 'style')
|
||||
for tag_type in unsupported_tags:
|
||||
for element in self._soup.findAll(tag_type):
|
||||
for element in self._soup.find_all(tag_type):
|
||||
element.extract()
|
||||
|
||||
def RenameAnchors(self, prefix):
|
||||
'''Rename every internal anchor to have the given prefix, then
|
||||
return the contents of the body tag.'''
|
||||
for anchor in self._soup.findAll('a', href=re.compile('^#')):
|
||||
for anchor in self._soup.find_all('a', href=re.compile('^#')):
|
||||
anchor['href'] = '#' + prefix + anchor['href'][1:]
|
||||
for a in self._soup.findAll('a'):
|
||||
for a in self._soup.find_all('a'):
|
||||
if a.get('name'):
|
||||
a['name'] = prefix + a['name']
|
||||
|
||||
|
|
|
|||
Loading…
Reference in a new issue