Fix incorrect soup usage in various recipes

Also make SoupStrainer available in calibre.ebooks.BeautifulSoup
This commit is contained in:
Kovid Goyal 2019-03-25 10:17:27 +05:30
parent de9d97d688
commit ba59ac679d
No known key found for this signature in database
GPG key ID: 06BC317B515ACE7C
6 changed files with 10 additions and 12 deletions

View file

@ -1,5 +1,5 @@
from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import re
import re
class AdvancedUserRecipe1283848012(BasicNewsRecipe):

View file

@ -1,5 +1,5 @@
from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import re
import re
class AdvancedUserRecipe1283848012(BasicNewsRecipe):

View file

@ -3,7 +3,6 @@
from calibre.web.feeds.news import BasicNewsRecipe
import re
from calibre.utils.magick import Image
from calibre.ebooks.BeautifulSoup import BeautifulSoup
''' Version 1.2, updated cover image to match the changed website.
added info date on title
@ -163,7 +162,7 @@ def safeRemovePart(self, killingSoup, soupIsArray):
return killingSoup
class MerryProcess(BeautifulSoup):
class MerryProcess(object):
myKiller = MerryExtract()
myPrepare = MerryPreProcess()

View file

@ -1,7 +1,6 @@
import re
import urllib2
from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import BeautifulSoup, SoupStrainer
class Ebert(BasicNewsRecipe):
@ -78,8 +77,8 @@ def parse_index(self):
description = match.group(2)
self.log(thislink)
for link in BeautifulSoup(thislink, parseOnlyThese=SoupStrainer('a')):
soup = self.index_to_soup(thislink)
for link in soup.findAll('a', href=True):
thisurl = self.PREFIX + link['href']
thislinktext = self.tag_to_string(link)
@ -91,7 +90,7 @@ def parse_index(self):
if thistitle == '':
thistitle = 'Ebert Journal Post'
"""
r"""
pattern2 = r'AID=\/(.*?)\/'
reg2 = re.compile(pattern2, re.IGNORECASE|re.DOTALL)
match2 = reg2.search(thisurl)

View file

@ -2,7 +2,6 @@
import urllib2
import time
from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import BeautifulSoup, SoupStrainer
from calibre import strftime
'''
@ -94,8 +93,9 @@ def parse_index(self):
description = match.group(2)
self.log(thislink)
soup = self.index_to_soup(thislink)
for link in BeautifulSoup(thislink, parseOnlyThese=SoupStrainer('a')):
for link in soup.findAll('a', href=True):
thisurl = self.PREFIX + link['href']
thislinktext = self.tag_to_string(link)

View file

@ -6,8 +6,8 @@
import bs4
from bs4 import ( # noqa
CData, Comment, Declaration, NavigableString, ProcessingInstruction, Tag,
__version__
CData, Comment, Declaration, NavigableString, ProcessingInstruction,
SoupStrainer, Tag, __version__
)
from polyglot.builtins import unicode_type