Change bs4.find(text=) to string= for deprecation change.

This commit is contained in:
Jim Miller 2023-10-19 13:01:47 -05:00
parent 22534986d3
commit 646693ca3e
23 changed files with 46 additions and 46 deletions

View file

@ -79,7 +79,7 @@ class ASexStoriesComAdapter(BaseSiteAdapter):
data1 = self.get_request(self.url)
soup1 = self.make_soup(data1)
#strip comments from soup
[comment.extract() for comment in soup1.find_all(text=lambda text:isinstance(text, Comment))]
[comment.extract() for comment in soup1.find_all(string=lambda text:isinstance(text, Comment))]
if 'Page Not Found.' in data1:
raise exceptions.StoryDoesNotExist(self.url)

View file

@ -146,7 +146,7 @@ class AsianFanFicsComAdapter(BaseSiteAdapter):
# Find authorid and URL from... author url.
mainmeta = soup.find('footer', {'class': 'main-meta'})
alist = mainmeta.find('span', text='Author(s)')
alist = mainmeta.find('span', string='Author(s)')
alist = alist.parent.findAll('a', href=re.compile(r"/profile/u/[^/]+"))
for a in alist:
self.story.addToList('authorId',a['href'].split('/')[-1])
@ -171,9 +171,9 @@ class AsianFanFicsComAdapter(BaseSiteAdapter):
# find timestamp
a = soup.find('span', text='Updated')
a = soup.find('span', string='Updated')
if a == None:
a = soup.find('span', text='Published') # use published date if work was never updated
a = soup.find('span', string='Published') # use published date if work was never updated
a = a.parent.find('time')
chapterDate = makeDate(a['datetime'],self.dateformat)
if newestChapter == None or chapterDate > newestChapter:
@ -181,7 +181,7 @@ class AsianFanFicsComAdapter(BaseSiteAdapter):
self.newestChapterNum = index
# story status
a = mainmeta.find('span', text='Completed')
a = mainmeta.find('span', string='Completed')
if a:
self.story.setMetadata('status', 'Completed')
else:
@ -200,30 +200,30 @@ class AsianFanFicsComAdapter(BaseSiteAdapter):
self.setDescription(url,a)
# story tags
a = mainmeta.find('span',text='Tags')
a = mainmeta.find('span',string='Tags')
if a:
tags = a.parent.findAll('a')
for tag in tags:
self.story.addToList('tags', tag.text)
# story tags
a = mainmeta.find('span',text='Characters')
a = mainmeta.find('span',string='Characters')
if a:
self.story.addToList('characters', a.nextSibling)
# published on
a = soup.find('span', text='Published')
a = soup.find('span', string='Published')
a = a.parent.find('time')
self.story.setMetadata('datePublished', makeDate(a['datetime'], self.dateformat))
# updated on
a = soup.find('span', text='Updated')
a = soup.find('span', string='Updated')
if a:
a = a.parent.find('time')
self.story.setMetadata('dateUpdated', makeDate(a['datetime'], self.dateformat))
# word count
a = soup.find('span', text='Total Word Count')
a = soup.find('span', string='Total Word Count')
if a:
a = a.find_next('span')
self.story.setMetadata('numWords', int(a.text.split()[0]))

View file

@ -169,7 +169,7 @@ class BDSMLibraryComSiteAdapter(BaseSiteAdapter):
raise exceptions.FailedToDownload("Error downloading Chapter: {0}! Missing required element!".format(url))
#strip comments from soup
[comment.extract() for comment in chaptertag.findAll(text=lambda text:isinstance(text, Comment))]
[comment.extract() for comment in chaptertag.findAll(string=lambda text:isinstance(text, Comment))]
# BDSM Library basically wraps it's own html around the document,
# so we will be removing the script, title and meta content from the

View file

@ -117,7 +117,7 @@ class BloodshedverseComAdapter(BaseSiteAdapter):
summary_div = list_box.find('div', {'class': 'list_summary'})
if not self.getConfig('keep_summary_html'):
summary = ''.join(summary_div(text=True))
summary = ''.join(summary_div(string=True))
else:
summary = self.utf8FromSoup(author_url, summary_div)

View file

@ -206,7 +206,7 @@ class FicBookNetAdapter(BaseSiteAdapter):
else:
self.story.setMetadata('status', 'In-Progress')
paircharsdt = soup.find('strong',text='Пэйринг и персонажи:')
paircharsdt = soup.find('strong',string='Пэйринг и персонажи:')
# site keeps both ships and indiv chars in /pairings/ links.
if paircharsdt:
for paira in paircharsdt.find_next('div').find_all('a', href=re.compile(r'/pairings/')):

View file

@ -177,7 +177,7 @@ class FictionAlleyArchiveOrgSiteAdapter(BaseSiteAdapter):
elif key == 'Words':
self.story.setMetadata('numWords',val)
summary = soup.find('dt',text='Story Summary:')
summary = soup.find('dt',string='Story Summary:')
if summary:
summary = summary.find_next_sibling('dd')
summary.name='div'
@ -210,7 +210,7 @@ class FictionAlleyArchiveOrgSiteAdapter(BaseSiteAdapter):
if self.getConfig('include_author_notes'):
row = chaptext.find_previous_sibling('div',class_='row')
logger.debug(row)
andt = row.find('dt',text="Author's Note:")
andt = row.find('dt',string="Author's Note:")
logger.debug(andt)
if andt:
chaptext.insert(0,andt.parent.extract())

View file

@ -235,7 +235,7 @@ class FictionHuntComSiteAdapter(BaseSiteAdapter):
# logger.debug(data)
self.story.setMetadata('title',stripHTML(soup.find('h1',{'class':'Story__title'})))
summhead = soup.find('h5',text='Summary')
summhead = soup.find('h5',string='Summary')
self.setDescription(url,summhead.find_next('div'))
## author:
@ -244,12 +244,12 @@ class FictionHuntComSiteAdapter(BaseSiteAdapter):
self.story.setMetadata('authorUrl',autha['href'])
self.story.setMetadata('author',autha.string)
updlab = soup.find('label',text='Last Updated:')
updlab = soup.find('label',string='Last Updated:')
if updlab:
update = updlab.find_next('time')['datetime']
self.story.setMetadata('dateUpdated', makeDate(update, self.dateformat))
publab = soup.find('label',text='Published:')
publab = soup.find('label',string='Published:')
if publab:
pubdate = publab.find_next('time')['datetime']
self.story.setMetadata('datePublished', makeDate(pubdate, self.dateformat))
@ -280,7 +280,7 @@ class FictionHuntComSiteAdapter(BaseSiteAdapter):
# logger.debug(meta)
# Find original ffnet URL
a = soup.find('a', text="Source")
a = soup.find('a', string="Source")
self.story.setMetadata('origin',stripHTML(a))
self.story.setMetadata('originUrl',a['href'])

View file

@ -304,7 +304,7 @@ class FimFictionNetSiteAdapter(BaseSiteAdapter):
#sequels
for header in soup.find_all('h1', {'class':'header-stories'}):
# I don't know why using text=re.compile with find() wouldn't work, but it didn't.
# I don't know why using string=re.compile with find() wouldn't work, but it didn't.
if header.text.startswith('Sequels'):
sequelContainer = header.parent
for sequel in sequelContainer.find_all('a', {'class':'story_link'}):

View file

@ -134,7 +134,7 @@ class LiteroticaSiteAdapter(BaseSiteAdapter):
logger.debug("set opened url:%s"%self.url)
soup1 = self.make_soup(data1)
#strip comments from soup
[comment.extract() for comment in soup1.findAll(text=lambda text:isinstance(text, Comment))]
[comment.extract() for comment in soup1.findAll(string=lambda text:isinstance(text, Comment))]
if "This submission is awaiting moderator's approval" in data1:
raise exceptions.StoryDoesNotExist("This submission is awaiting moderator's approval. %s"%self.url)
@ -154,7 +154,7 @@ class LiteroticaSiteAdapter(BaseSiteAdapter):
dataAuth = self.get_request(authorurl)
soupAuth = self.make_soup(dataAuth)
#strip comments from soup
[comment.extract() for comment in soupAuth.findAll(text=lambda text:isinstance(text, Comment))]
[comment.extract() for comment in soupAuth.findAll(string=lambda text:isinstance(text, Comment))]
# logger.debug(soupAuth)
## Find link to url in author's page
@ -336,7 +336,7 @@ class LiteroticaSiteAdapter(BaseSiteAdapter):
raw_page = raw_page.replace('<div class="b-story-body-x x-r15"><div><p>','<div class="b-story-body-x x-r15"><div>')
# logger.debug("\tChapter text: %s" % raw_page)
page_soup = self.make_soup(raw_page)
[comment.extract() for comment in page_soup.findAll(text=lambda text:isinstance(text, Comment))]
[comment.extract() for comment in page_soup.findAll(string=lambda text:isinstance(text, Comment))]
story2 = page_soup.find('div', 'aa_ht').div
# logger.debug('getPageText - story2: %s' % story2)

View file

@ -678,7 +678,7 @@ class Chapter(object):
def _excludeEditorSignature(self, root):
"""Exclude editor signature from within `root' element."""
for textNode in root.findAll(text=True):
for stringNode in root.findAll(string=True):
if re.match(self.SIGNED_PATTERN, textNode.string):
editorLink = textNode.findNext('a')
if editorLink:

View file

@ -83,7 +83,7 @@ class MCStoriesComSiteAdapter(BaseSiteAdapter):
data1 = self.get_request(self.url)
soup1 = self.make_soup(data1)
#strip comments from soup
[comment.extract() for comment in soup1.find_all(text=lambda text:isinstance(text, Comment))]
[comment.extract() for comment in soup1.find_all(string=lambda text:isinstance(text, Comment))]
if 'Page Not Found.' in data1:
raise exceptions.StoryDoesNotExist(self.url)
@ -161,7 +161,7 @@ class MCStoriesComSiteAdapter(BaseSiteAdapter):
soup1 = self.make_soup(data1)
#strip comments from soup
[comment.extract() for comment in soup1.find_all(text=lambda text:isinstance(text, Comment))]
[comment.extract() for comment in soup1.find_all(string=lambda text:isinstance(text, Comment))]
# get story text
story1 = soup1.find('article', id='mcstories')

View file

@ -40,7 +40,7 @@ class NovelFullSiteAdapter(BaseSiteAdapter):
self.story.setMetadata("title", soup.select_one("h3.title").text)
for author in soup.find("h3", text="Author:").fetchNextSiblings(
for author in soup.find("h3", string="Author:").fetchNextSiblings(
"a", href=re.compile("/author/")
):
self.story.addToList("authorId", author.text)
@ -91,7 +91,7 @@ class NovelFullSiteAdapter(BaseSiteAdapter):
content = soup.find(id="chapter-content")
# Remove chapter header if present
chapter_header = content.find(["p", "h3"], text=re.compile(r"Chapter \d+:"))
chapter_header = content.find(["p", "h3"], string=re.compile(r"Chapter \d+:"))
if chapter_header:
chapter_header.decompose()

View file

@ -189,7 +189,7 @@ class LightNovelGateSiteAdapter(BaseSiteAdapter):
"Error downloading Chapter: %s! Missing required element!" % url)
# Some comments we will get is invalid. Remove them all.
[comment.extract() for comment in story.find_all(text=lambda text:isinstance(text, Comment))]
[comment.extract() for comment in story.find_all(string=lambda text:isinstance(text, Comment))]
# We don't need links. They have a bad css and they are not working most of times.
[a.extract() for a in story.find_all('a')]

View file

@ -182,7 +182,7 @@ class ReadOnlyMindComAdapter(BaseSiteAdapter):
soup = self.make_soup(self.get_request(url))
#strip comments from soup
# [comment.extract() for comment in soup1.find_all(text=lambda text:isinstance(text, Comment))]
# [comment.extract() for comment in soup1.find_all(string=lambda text:isinstance(text, Comment))]
# story text
story = soup.find('section', id='chapter-content')

View file

@ -228,7 +228,7 @@ class RoyalRoadAdapter(BaseSiteAdapter):
self.story.setMetadata('stars',stars)
logger.debug("stars:(%s)"%self.story.getMetadata('stars'))
warning = soup.find('strong',text='Warning')
warning = soup.find('strong',string='Warning')
if warning != None:
for li in warning.find_next('ul').find_all('li'):
self.story.addToList('warnings',stripHTML(li))

View file

@ -67,7 +67,7 @@ class SwiOrgRuAdapter(BaseSiteAdapter):
self.story.setMetadata('title', stripHTML(title.text))
logger.debug("Title: (%s)"%self.story.getMetadata('title'))
author_title = soup.find('strong', text = re.compile(u"Автор: "))
author_title = soup.find('strong', string = re.compile(u"Автор: "))
if author_title == None:
raise exceptions.FailedToDownload("Error downloading page: %s! Missing required author_title element!" % url)
@ -78,11 +78,11 @@ class SwiOrgRuAdapter(BaseSiteAdapter):
self.story.setMetadata('author', author.text)
logger.debug("Author: (%s)"%self.story.getMetadata('author'))
date_pub = soup.find('em', text = re.compile(r'\d{4}.\d{2}.\d{2}'))
date_pub = soup.find('em', string = re.compile(r'\d{4}.\d{2}.\d{2}'))
if not date_pub == None:
self.story.setMetadata('datePublished', makeDate(date_pub.text, self.dateformat))
rating_label = soup.find('strong', text = re.compile(u"рейтинг:"))
rating_label = soup.find('strong', string = re.compile(u"рейтинг:"))
if not rating_label == None:
rating = rating_label.next_sibling.next_sibling
self.story.setMetadata('rating', stripHTML(rating))
@ -98,12 +98,12 @@ class SwiOrgRuAdapter(BaseSiteAdapter):
character=characters[x]
self.story.addToList('characters', character['title'])
if soup.find('font', color = r"green", text = u"завершен"):
if soup.find('font', color = r"green", string = u"завершен"):
self.story.setMetadata('status', 'Completed')
else:
self.story.setMetadata('status', 'In-Progress')
categories_label = soup.find('strong', text = u"категории:")
categories_label = soup.find('strong', string = u"категории:")
if not categories_label == None:
categories_element = categories_label.next_sibling.next_sibling
categories = re.findall(r'"(.+?)"', categories_element.text)
@ -111,7 +111,7 @@ class SwiOrgRuAdapter(BaseSiteAdapter):
category=categories[x]
self.story.addToList('category', category)
chapters_header = soup.find('h2', text = re.compile(u"Главы:"))
chapters_header = soup.find('h2', string = re.compile(u"Главы:"))
if chapters_header==None:
raise exceptions.FailedToDownload("Error downloading page: %s! Missing required chapters_header element!" % url)

View file

@ -104,7 +104,7 @@ class Voracity2EficComAdapter(BaseSiteAdapter):
# error.
errortext_div = soup.find('div', {'class': 'errortext'})
if errortext_div:
error_text = ''.join(errortext_div(text=True)).strip()
error_text = ''.join(errortext_div(string=True)).strip()
if error_text == 'Registered Users Only':
if not (self.is_adult or self.getConfig('is_adult')):
raise exceptions.AdultCheckRequired(self.url)
@ -167,7 +167,7 @@ class Voracity2EficComAdapter(BaseSiteAdapter):
if keep_summary_html:
contents.append(self.utf8FromSoup(self.url, sibling))
else:
contents.append(''.join(sibling(text=True)))
contents.append(''.join(sibling(string=True)))
else:
contents.append(sibling)

View file

@ -85,7 +85,7 @@ class WuxiaWorldXyzSiteAdapter(BaseSiteAdapter):
## site doesn't have authorUrl links.
## getting status
status_label = soup.find('h3',text='Status:')
status_label = soup.find('h3',string='Status:')
status = stripHTML(status_label.nextSibling)
if status == 'Completed':
self.story.setMetadata('status', 'Completed')

View file

@ -87,7 +87,7 @@ class WWWAnEroticStoryComAdapter(BaseSiteAdapter):
data1 = self.get_request(self.url)
soup1 = self.make_soup(data1)
#strip comments and scripts from soup
[comment.extract() for comment in soup1.find_all(text=lambda text:isinstance(text, Comment))]
[comment.extract() for comment in soup1.find_all(string=lambda text:isinstance(text, Comment))]
[script.extract() for script in soup1.find_all('script')]
url = self.url

View file

@ -142,7 +142,7 @@ class WWWNovelAllComAdapter(BaseSiteAdapter):
title = title[:-len(" Novel")]
self.story.setMetadata('title', title)
authorspan = soup.find('span',text='Author:')
authorspan = soup.find('span',string='Author:')
authora = authorspan.find_next_sibling('a')
## authors appear to just be comma separated and the only URL
## is a search, so this appears to work.
@ -259,7 +259,7 @@ class WWWNovelAllComAdapter(BaseSiteAdapter):
"Error downloading Chapter: %s! Missing required element!" % url)
# Some comments we will get is invalid. Remove them all.
for comment in story.find_all(text=lambda text:isinstance(text, Comment)):
for comment in story.find_all(string=lambda text:isinstance(text, Comment)):
comment.extract()
extract_tags = ('a', 'ins', 'script')

View file

@ -203,6 +203,6 @@ class WWWUtopiastoriesComAdapter(BaseSiteAdapter):
tag.extract()
#strip comments from story
[comment.extract() for comment in story.findAll(text=lambda text:isinstance(text, Comment))]
[comment.extract() for comment in story.findAll(string=lambda text:isinstance(text, Comment))]
return self.utf8FromSoup(url,story)

View file

@ -160,7 +160,7 @@ class BaseOTWAdapter(BaseSiteAdapter):
## "?view_adult=true" doesn't work on base story
## URL anymore, which means we have to
metasoup = self.make_soup(meta)
a = metasoup.find('a',text='Proceed')
a = metasoup.find('a',string='Proceed')
metaurl = 'https://'+self.host+a['href']
meta = self.get_request(metaurl)
else:

View file

@ -112,7 +112,7 @@ class BaseXenForo2ForumAdapter(BaseXenForoForumAdapter):
if title:
title.a.decompose() # remove RSS link.
self.story.setMetadata("threadmarks_title",stripHTML(title))
statusdt = header.find('dt',text="Index progress")
statusdt = header.find('dt',string="Index progress")
if statusdt:
statusdd = statusdt.find_next_sibling('dd')
if statusdd: