mirror of
https://github.com/JimmXinu/FanFicFare.git
synced 2026-05-09 05:21:13 +02:00
Change bs4.find(text=) to string= for deprecation change.
This commit is contained in:
parent
22534986d3
commit
646693ca3e
23 changed files with 46 additions and 46 deletions
|
|
@ -79,7 +79,7 @@ class ASexStoriesComAdapter(BaseSiteAdapter):
|
|||
data1 = self.get_request(self.url)
|
||||
soup1 = self.make_soup(data1)
|
||||
#strip comments from soup
|
||||
[comment.extract() for comment in soup1.find_all(text=lambda text:isinstance(text, Comment))]
|
||||
[comment.extract() for comment in soup1.find_all(string=lambda text:isinstance(text, Comment))]
|
||||
|
||||
if 'Page Not Found.' in data1:
|
||||
raise exceptions.StoryDoesNotExist(self.url)
|
||||
|
|
|
|||
|
|
@ -146,7 +146,7 @@ class AsianFanFicsComAdapter(BaseSiteAdapter):
|
|||
|
||||
# Find authorid and URL from... author url.
|
||||
mainmeta = soup.find('footer', {'class': 'main-meta'})
|
||||
alist = mainmeta.find('span', text='Author(s)')
|
||||
alist = mainmeta.find('span', string='Author(s)')
|
||||
alist = alist.parent.findAll('a', href=re.compile(r"/profile/u/[^/]+"))
|
||||
for a in alist:
|
||||
self.story.addToList('authorId',a['href'].split('/')[-1])
|
||||
|
|
@ -171,9 +171,9 @@ class AsianFanFicsComAdapter(BaseSiteAdapter):
|
|||
|
||||
|
||||
# find timestamp
|
||||
a = soup.find('span', text='Updated')
|
||||
a = soup.find('span', string='Updated')
|
||||
if a == None:
|
||||
a = soup.find('span', text='Published') # use published date if work was never updated
|
||||
a = soup.find('span', string='Published') # use published date if work was never updated
|
||||
a = a.parent.find('time')
|
||||
chapterDate = makeDate(a['datetime'],self.dateformat)
|
||||
if newestChapter == None or chapterDate > newestChapter:
|
||||
|
|
@ -181,7 +181,7 @@ class AsianFanFicsComAdapter(BaseSiteAdapter):
|
|||
self.newestChapterNum = index
|
||||
|
||||
# story status
|
||||
a = mainmeta.find('span', text='Completed')
|
||||
a = mainmeta.find('span', string='Completed')
|
||||
if a:
|
||||
self.story.setMetadata('status', 'Completed')
|
||||
else:
|
||||
|
|
@ -200,30 +200,30 @@ class AsianFanFicsComAdapter(BaseSiteAdapter):
|
|||
self.setDescription(url,a)
|
||||
|
||||
# story tags
|
||||
a = mainmeta.find('span',text='Tags')
|
||||
a = mainmeta.find('span',string='Tags')
|
||||
if a:
|
||||
tags = a.parent.findAll('a')
|
||||
for tag in tags:
|
||||
self.story.addToList('tags', tag.text)
|
||||
|
||||
# story tags
|
||||
a = mainmeta.find('span',text='Characters')
|
||||
a = mainmeta.find('span',string='Characters')
|
||||
if a:
|
||||
self.story.addToList('characters', a.nextSibling)
|
||||
|
||||
# published on
|
||||
a = soup.find('span', text='Published')
|
||||
a = soup.find('span', string='Published')
|
||||
a = a.parent.find('time')
|
||||
self.story.setMetadata('datePublished', makeDate(a['datetime'], self.dateformat))
|
||||
|
||||
# updated on
|
||||
a = soup.find('span', text='Updated')
|
||||
a = soup.find('span', string='Updated')
|
||||
if a:
|
||||
a = a.parent.find('time')
|
||||
self.story.setMetadata('dateUpdated', makeDate(a['datetime'], self.dateformat))
|
||||
|
||||
# word count
|
||||
a = soup.find('span', text='Total Word Count')
|
||||
a = soup.find('span', string='Total Word Count')
|
||||
if a:
|
||||
a = a.find_next('span')
|
||||
self.story.setMetadata('numWords', int(a.text.split()[0]))
|
||||
|
|
|
|||
|
|
@ -169,7 +169,7 @@ class BDSMLibraryComSiteAdapter(BaseSiteAdapter):
|
|||
raise exceptions.FailedToDownload("Error downloading Chapter: {0}! Missing required element!".format(url))
|
||||
|
||||
#strip comments from soup
|
||||
[comment.extract() for comment in chaptertag.findAll(text=lambda text:isinstance(text, Comment))]
|
||||
[comment.extract() for comment in chaptertag.findAll(string=lambda text:isinstance(text, Comment))]
|
||||
|
||||
# BDSM Library basically wraps it's own html around the document,
|
||||
# so we will be removing the script, title and meta content from the
|
||||
|
|
|
|||
|
|
@ -117,7 +117,7 @@ class BloodshedverseComAdapter(BaseSiteAdapter):
|
|||
|
||||
summary_div = list_box.find('div', {'class': 'list_summary'})
|
||||
if not self.getConfig('keep_summary_html'):
|
||||
summary = ''.join(summary_div(text=True))
|
||||
summary = ''.join(summary_div(string=True))
|
||||
else:
|
||||
summary = self.utf8FromSoup(author_url, summary_div)
|
||||
|
||||
|
|
|
|||
|
|
@ -206,7 +206,7 @@ class FicBookNetAdapter(BaseSiteAdapter):
|
|||
else:
|
||||
self.story.setMetadata('status', 'In-Progress')
|
||||
|
||||
paircharsdt = soup.find('strong',text='Пэйринг и персонажи:')
|
||||
paircharsdt = soup.find('strong',string='Пэйринг и персонажи:')
|
||||
# site keeps both ships and indiv chars in /pairings/ links.
|
||||
if paircharsdt:
|
||||
for paira in paircharsdt.find_next('div').find_all('a', href=re.compile(r'/pairings/')):
|
||||
|
|
|
|||
|
|
@ -177,7 +177,7 @@ class FictionAlleyArchiveOrgSiteAdapter(BaseSiteAdapter):
|
|||
elif key == 'Words':
|
||||
self.story.setMetadata('numWords',val)
|
||||
|
||||
summary = soup.find('dt',text='Story Summary:')
|
||||
summary = soup.find('dt',string='Story Summary:')
|
||||
if summary:
|
||||
summary = summary.find_next_sibling('dd')
|
||||
summary.name='div'
|
||||
|
|
@ -210,7 +210,7 @@ class FictionAlleyArchiveOrgSiteAdapter(BaseSiteAdapter):
|
|||
if self.getConfig('include_author_notes'):
|
||||
row = chaptext.find_previous_sibling('div',class_='row')
|
||||
logger.debug(row)
|
||||
andt = row.find('dt',text="Author's Note:")
|
||||
andt = row.find('dt',string="Author's Note:")
|
||||
logger.debug(andt)
|
||||
if andt:
|
||||
chaptext.insert(0,andt.parent.extract())
|
||||
|
|
|
|||
|
|
@ -235,7 +235,7 @@ class FictionHuntComSiteAdapter(BaseSiteAdapter):
|
|||
# logger.debug(data)
|
||||
self.story.setMetadata('title',stripHTML(soup.find('h1',{'class':'Story__title'})))
|
||||
|
||||
summhead = soup.find('h5',text='Summary')
|
||||
summhead = soup.find('h5',string='Summary')
|
||||
self.setDescription(url,summhead.find_next('div'))
|
||||
|
||||
## author:
|
||||
|
|
@ -244,12 +244,12 @@ class FictionHuntComSiteAdapter(BaseSiteAdapter):
|
|||
self.story.setMetadata('authorUrl',autha['href'])
|
||||
self.story.setMetadata('author',autha.string)
|
||||
|
||||
updlab = soup.find('label',text='Last Updated:')
|
||||
updlab = soup.find('label',string='Last Updated:')
|
||||
if updlab:
|
||||
update = updlab.find_next('time')['datetime']
|
||||
self.story.setMetadata('dateUpdated', makeDate(update, self.dateformat))
|
||||
|
||||
publab = soup.find('label',text='Published:')
|
||||
publab = soup.find('label',string='Published:')
|
||||
if publab:
|
||||
pubdate = publab.find_next('time')['datetime']
|
||||
self.story.setMetadata('datePublished', makeDate(pubdate, self.dateformat))
|
||||
|
|
@ -280,7 +280,7 @@ class FictionHuntComSiteAdapter(BaseSiteAdapter):
|
|||
# logger.debug(meta)
|
||||
|
||||
# Find original ffnet URL
|
||||
a = soup.find('a', text="Source")
|
||||
a = soup.find('a', string="Source")
|
||||
self.story.setMetadata('origin',stripHTML(a))
|
||||
self.story.setMetadata('originUrl',a['href'])
|
||||
|
||||
|
|
|
|||
|
|
@ -304,7 +304,7 @@ class FimFictionNetSiteAdapter(BaseSiteAdapter):
|
|||
|
||||
#sequels
|
||||
for header in soup.find_all('h1', {'class':'header-stories'}):
|
||||
# I don't know why using text=re.compile with find() wouldn't work, but it didn't.
|
||||
# I don't know why using string=re.compile with find() wouldn't work, but it didn't.
|
||||
if header.text.startswith('Sequels'):
|
||||
sequelContainer = header.parent
|
||||
for sequel in sequelContainer.find_all('a', {'class':'story_link'}):
|
||||
|
|
|
|||
|
|
@ -134,7 +134,7 @@ class LiteroticaSiteAdapter(BaseSiteAdapter):
|
|||
logger.debug("set opened url:%s"%self.url)
|
||||
soup1 = self.make_soup(data1)
|
||||
#strip comments from soup
|
||||
[comment.extract() for comment in soup1.findAll(text=lambda text:isinstance(text, Comment))]
|
||||
[comment.extract() for comment in soup1.findAll(string=lambda text:isinstance(text, Comment))]
|
||||
|
||||
if "This submission is awaiting moderator's approval" in data1:
|
||||
raise exceptions.StoryDoesNotExist("This submission is awaiting moderator's approval. %s"%self.url)
|
||||
|
|
@ -154,7 +154,7 @@ class LiteroticaSiteAdapter(BaseSiteAdapter):
|
|||
dataAuth = self.get_request(authorurl)
|
||||
soupAuth = self.make_soup(dataAuth)
|
||||
#strip comments from soup
|
||||
[comment.extract() for comment in soupAuth.findAll(text=lambda text:isinstance(text, Comment))]
|
||||
[comment.extract() for comment in soupAuth.findAll(string=lambda text:isinstance(text, Comment))]
|
||||
# logger.debug(soupAuth)
|
||||
|
||||
## Find link to url in author's page
|
||||
|
|
@ -336,7 +336,7 @@ class LiteroticaSiteAdapter(BaseSiteAdapter):
|
|||
raw_page = raw_page.replace('<div class="b-story-body-x x-r15"><div><p>','<div class="b-story-body-x x-r15"><div>')
|
||||
# logger.debug("\tChapter text: %s" % raw_page)
|
||||
page_soup = self.make_soup(raw_page)
|
||||
[comment.extract() for comment in page_soup.findAll(text=lambda text:isinstance(text, Comment))]
|
||||
[comment.extract() for comment in page_soup.findAll(string=lambda text:isinstance(text, Comment))]
|
||||
story2 = page_soup.find('div', 'aa_ht').div
|
||||
# logger.debug('getPageText - story2: %s' % story2)
|
||||
|
||||
|
|
|
|||
|
|
@ -678,7 +678,7 @@ class Chapter(object):
|
|||
|
||||
def _excludeEditorSignature(self, root):
|
||||
"""Exclude editor signature from within `root' element."""
|
||||
for textNode in root.findAll(text=True):
|
||||
for stringNode in root.findAll(string=True):
|
||||
if re.match(self.SIGNED_PATTERN, textNode.string):
|
||||
editorLink = textNode.findNext('a')
|
||||
if editorLink:
|
||||
|
|
|
|||
|
|
@ -83,7 +83,7 @@ class MCStoriesComSiteAdapter(BaseSiteAdapter):
|
|||
data1 = self.get_request(self.url)
|
||||
soup1 = self.make_soup(data1)
|
||||
#strip comments from soup
|
||||
[comment.extract() for comment in soup1.find_all(text=lambda text:isinstance(text, Comment))]
|
||||
[comment.extract() for comment in soup1.find_all(string=lambda text:isinstance(text, Comment))]
|
||||
|
||||
if 'Page Not Found.' in data1:
|
||||
raise exceptions.StoryDoesNotExist(self.url)
|
||||
|
|
@ -161,7 +161,7 @@ class MCStoriesComSiteAdapter(BaseSiteAdapter):
|
|||
soup1 = self.make_soup(data1)
|
||||
|
||||
#strip comments from soup
|
||||
[comment.extract() for comment in soup1.find_all(text=lambda text:isinstance(text, Comment))]
|
||||
[comment.extract() for comment in soup1.find_all(string=lambda text:isinstance(text, Comment))]
|
||||
|
||||
# get story text
|
||||
story1 = soup1.find('article', id='mcstories')
|
||||
|
|
|
|||
|
|
@ -40,7 +40,7 @@ class NovelFullSiteAdapter(BaseSiteAdapter):
|
|||
|
||||
self.story.setMetadata("title", soup.select_one("h3.title").text)
|
||||
|
||||
for author in soup.find("h3", text="Author:").fetchNextSiblings(
|
||||
for author in soup.find("h3", string="Author:").fetchNextSiblings(
|
||||
"a", href=re.compile("/author/")
|
||||
):
|
||||
self.story.addToList("authorId", author.text)
|
||||
|
|
@ -91,7 +91,7 @@ class NovelFullSiteAdapter(BaseSiteAdapter):
|
|||
content = soup.find(id="chapter-content")
|
||||
|
||||
# Remove chapter header if present
|
||||
chapter_header = content.find(["p", "h3"], text=re.compile(r"Chapter \d+:"))
|
||||
chapter_header = content.find(["p", "h3"], string=re.compile(r"Chapter \d+:"))
|
||||
|
||||
if chapter_header:
|
||||
chapter_header.decompose()
|
||||
|
|
|
|||
|
|
@ -189,7 +189,7 @@ class LightNovelGateSiteAdapter(BaseSiteAdapter):
|
|||
"Error downloading Chapter: %s! Missing required element!" % url)
|
||||
|
||||
# Some comments we will get is invalid. Remove them all.
|
||||
[comment.extract() for comment in story.find_all(text=lambda text:isinstance(text, Comment))]
|
||||
[comment.extract() for comment in story.find_all(string=lambda text:isinstance(text, Comment))]
|
||||
|
||||
# We don't need links. They have a bad css and they are not working most of times.
|
||||
[a.extract() for a in story.find_all('a')]
|
||||
|
|
|
|||
|
|
@ -182,7 +182,7 @@ class ReadOnlyMindComAdapter(BaseSiteAdapter):
|
|||
soup = self.make_soup(self.get_request(url))
|
||||
|
||||
#strip comments from soup
|
||||
# [comment.extract() for comment in soup1.find_all(text=lambda text:isinstance(text, Comment))]
|
||||
# [comment.extract() for comment in soup1.find_all(string=lambda text:isinstance(text, Comment))]
|
||||
|
||||
# story text
|
||||
story = soup.find('section', id='chapter-content')
|
||||
|
|
|
|||
|
|
@ -228,7 +228,7 @@ class RoyalRoadAdapter(BaseSiteAdapter):
|
|||
self.story.setMetadata('stars',stars)
|
||||
logger.debug("stars:(%s)"%self.story.getMetadata('stars'))
|
||||
|
||||
warning = soup.find('strong',text='Warning')
|
||||
warning = soup.find('strong',string='Warning')
|
||||
if warning != None:
|
||||
for li in warning.find_next('ul').find_all('li'):
|
||||
self.story.addToList('warnings',stripHTML(li))
|
||||
|
|
|
|||
|
|
@ -67,7 +67,7 @@ class SwiOrgRuAdapter(BaseSiteAdapter):
|
|||
self.story.setMetadata('title', stripHTML(title.text))
|
||||
logger.debug("Title: (%s)"%self.story.getMetadata('title'))
|
||||
|
||||
author_title = soup.find('strong', text = re.compile(u"Автор: "))
|
||||
author_title = soup.find('strong', string = re.compile(u"Автор: "))
|
||||
if author_title == None:
|
||||
raise exceptions.FailedToDownload("Error downloading page: %s! Missing required author_title element!" % url)
|
||||
|
||||
|
|
@ -78,11 +78,11 @@ class SwiOrgRuAdapter(BaseSiteAdapter):
|
|||
self.story.setMetadata('author', author.text)
|
||||
logger.debug("Author: (%s)"%self.story.getMetadata('author'))
|
||||
|
||||
date_pub = soup.find('em', text = re.compile(r'\d{4}.\d{2}.\d{2}'))
|
||||
date_pub = soup.find('em', string = re.compile(r'\d{4}.\d{2}.\d{2}'))
|
||||
if not date_pub == None:
|
||||
self.story.setMetadata('datePublished', makeDate(date_pub.text, self.dateformat))
|
||||
|
||||
rating_label = soup.find('strong', text = re.compile(u"рейтинг:"))
|
||||
rating_label = soup.find('strong', string = re.compile(u"рейтинг:"))
|
||||
if not rating_label == None:
|
||||
rating = rating_label.next_sibling.next_sibling
|
||||
self.story.setMetadata('rating', stripHTML(rating))
|
||||
|
|
@ -98,12 +98,12 @@ class SwiOrgRuAdapter(BaseSiteAdapter):
|
|||
character=characters[x]
|
||||
self.story.addToList('characters', character['title'])
|
||||
|
||||
if soup.find('font', color = r"green", text = u"завершен"):
|
||||
if soup.find('font', color = r"green", string = u"завершен"):
|
||||
self.story.setMetadata('status', 'Completed')
|
||||
else:
|
||||
self.story.setMetadata('status', 'In-Progress')
|
||||
|
||||
categories_label = soup.find('strong', text = u"категории:")
|
||||
categories_label = soup.find('strong', string = u"категории:")
|
||||
if not categories_label == None:
|
||||
categories_element = categories_label.next_sibling.next_sibling
|
||||
categories = re.findall(r'"(.+?)"', categories_element.text)
|
||||
|
|
@ -111,7 +111,7 @@ class SwiOrgRuAdapter(BaseSiteAdapter):
|
|||
category=categories[x]
|
||||
self.story.addToList('category', category)
|
||||
|
||||
chapters_header = soup.find('h2', text = re.compile(u"Главы:"))
|
||||
chapters_header = soup.find('h2', string = re.compile(u"Главы:"))
|
||||
if chapters_header==None:
|
||||
raise exceptions.FailedToDownload("Error downloading page: %s! Missing required chapters_header element!" % url)
|
||||
|
||||
|
|
|
|||
|
|
@ -104,7 +104,7 @@ class Voracity2EficComAdapter(BaseSiteAdapter):
|
|||
# error.
|
||||
errortext_div = soup.find('div', {'class': 'errortext'})
|
||||
if errortext_div:
|
||||
error_text = ''.join(errortext_div(text=True)).strip()
|
||||
error_text = ''.join(errortext_div(string=True)).strip()
|
||||
if error_text == 'Registered Users Only':
|
||||
if not (self.is_adult or self.getConfig('is_adult')):
|
||||
raise exceptions.AdultCheckRequired(self.url)
|
||||
|
|
@ -167,7 +167,7 @@ class Voracity2EficComAdapter(BaseSiteAdapter):
|
|||
if keep_summary_html:
|
||||
contents.append(self.utf8FromSoup(self.url, sibling))
|
||||
else:
|
||||
contents.append(''.join(sibling(text=True)))
|
||||
contents.append(''.join(sibling(string=True)))
|
||||
else:
|
||||
contents.append(sibling)
|
||||
|
||||
|
|
|
|||
|
|
@ -85,7 +85,7 @@ class WuxiaWorldXyzSiteAdapter(BaseSiteAdapter):
|
|||
## site doesn't have authorUrl links.
|
||||
|
||||
## getting status
|
||||
status_label = soup.find('h3',text='Status:')
|
||||
status_label = soup.find('h3',string='Status:')
|
||||
status = stripHTML(status_label.nextSibling)
|
||||
if status == 'Completed':
|
||||
self.story.setMetadata('status', 'Completed')
|
||||
|
|
|
|||
|
|
@ -87,7 +87,7 @@ class WWWAnEroticStoryComAdapter(BaseSiteAdapter):
|
|||
data1 = self.get_request(self.url)
|
||||
soup1 = self.make_soup(data1)
|
||||
#strip comments and scripts from soup
|
||||
[comment.extract() for comment in soup1.find_all(text=lambda text:isinstance(text, Comment))]
|
||||
[comment.extract() for comment in soup1.find_all(string=lambda text:isinstance(text, Comment))]
|
||||
[script.extract() for script in soup1.find_all('script')]
|
||||
|
||||
url = self.url
|
||||
|
|
|
|||
|
|
@ -142,7 +142,7 @@ class WWWNovelAllComAdapter(BaseSiteAdapter):
|
|||
title = title[:-len(" Novel")]
|
||||
self.story.setMetadata('title', title)
|
||||
|
||||
authorspan = soup.find('span',text='Author:')
|
||||
authorspan = soup.find('span',string='Author:')
|
||||
authora = authorspan.find_next_sibling('a')
|
||||
## authors appear to just be comma separated and the only URL
|
||||
## is a search, so this appears to work.
|
||||
|
|
@ -259,7 +259,7 @@ class WWWNovelAllComAdapter(BaseSiteAdapter):
|
|||
"Error downloading Chapter: %s! Missing required element!" % url)
|
||||
|
||||
# Some comments we will get is invalid. Remove them all.
|
||||
for comment in story.find_all(text=lambda text:isinstance(text, Comment)):
|
||||
for comment in story.find_all(string=lambda text:isinstance(text, Comment)):
|
||||
comment.extract()
|
||||
|
||||
extract_tags = ('a', 'ins', 'script')
|
||||
|
|
|
|||
|
|
@ -203,6 +203,6 @@ class WWWUtopiastoriesComAdapter(BaseSiteAdapter):
|
|||
tag.extract()
|
||||
|
||||
#strip comments from story
|
||||
[comment.extract() for comment in story.findAll(text=lambda text:isinstance(text, Comment))]
|
||||
[comment.extract() for comment in story.findAll(string=lambda text:isinstance(text, Comment))]
|
||||
|
||||
return self.utf8FromSoup(url,story)
|
||||
|
|
|
|||
|
|
@ -160,7 +160,7 @@ class BaseOTWAdapter(BaseSiteAdapter):
|
|||
## "?view_adult=true" doesn't work on base story
|
||||
## URL anymore, which means we have to
|
||||
metasoup = self.make_soup(meta)
|
||||
a = metasoup.find('a',text='Proceed')
|
||||
a = metasoup.find('a',string='Proceed')
|
||||
metaurl = 'https://'+self.host+a['href']
|
||||
meta = self.get_request(metaurl)
|
||||
else:
|
||||
|
|
|
|||
|
|
@ -112,7 +112,7 @@ class BaseXenForo2ForumAdapter(BaseXenForoForumAdapter):
|
|||
if title:
|
||||
title.a.decompose() # remove RSS link.
|
||||
self.story.setMetadata("threadmarks_title",stripHTML(title))
|
||||
statusdt = header.find('dt',text="Index progress")
|
||||
statusdt = header.find('dt',string="Index progress")
|
||||
if statusdt:
|
||||
statusdd = statusdt.find_next_sibling('dd')
|
||||
if statusdd:
|
||||
|
|
|
|||
Loading…
Reference in a new issue