mirror of
git://github.com/kovidgoyal/calibre.git
synced 2026-05-05 23:33:47 +02:00
Fix incorrect soup usage in various recipes
Also make SoupStrainer available in calibre.ebooks.BeautifulSoup
This commit is contained in:
parent
de9d97d688
commit
ba59ac679d
6 changed files with 10 additions and 12 deletions
|
|
@ -1,5 +1,5 @@
|
|||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
from calibre.ebooks.BeautifulSoup import re
|
||||
import re
|
||||
|
||||
|
||||
class AdvancedUserRecipe1283848012(BasicNewsRecipe):
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
from calibre.ebooks.BeautifulSoup import re
|
||||
import re
|
||||
|
||||
|
||||
class AdvancedUserRecipe1283848012(BasicNewsRecipe):
|
||||
|
|
|
|||
|
|
@ -3,7 +3,6 @@
|
|||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
import re
|
||||
from calibre.utils.magick import Image
|
||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup
|
||||
|
||||
''' Version 1.2, updated cover image to match the changed website.
|
||||
added info date on title
|
||||
|
|
@ -163,7 +162,7 @@ def safeRemovePart(self, killingSoup, soupIsArray):
|
|||
return killingSoup
|
||||
|
||||
|
||||
class MerryProcess(BeautifulSoup):
|
||||
class MerryProcess(object):
|
||||
myKiller = MerryExtract()
|
||||
myPrepare = MerryPreProcess()
|
||||
|
||||
|
|
|
|||
|
|
@ -1,7 +1,6 @@
|
|||
import re
|
||||
import urllib2
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup, SoupStrainer
|
||||
|
||||
|
||||
class Ebert(BasicNewsRecipe):
|
||||
|
|
@ -78,8 +77,8 @@ def parse_index(self):
|
|||
description = match.group(2)
|
||||
|
||||
self.log(thislink)
|
||||
|
||||
for link in BeautifulSoup(thislink, parseOnlyThese=SoupStrainer('a')):
|
||||
soup = self.index_to_soup(thislink)
|
||||
for link in soup.findAll('a', href=True):
|
||||
thisurl = self.PREFIX + link['href']
|
||||
thislinktext = self.tag_to_string(link)
|
||||
|
||||
|
|
@ -91,7 +90,7 @@ def parse_index(self):
|
|||
if thistitle == '':
|
||||
thistitle = 'Ebert Journal Post'
|
||||
|
||||
"""
|
||||
r"""
|
||||
pattern2 = r'AID=\/(.*?)\/'
|
||||
reg2 = re.compile(pattern2, re.IGNORECASE|re.DOTALL)
|
||||
match2 = reg2.search(thisurl)
|
||||
|
|
|
|||
|
|
@ -2,7 +2,6 @@
|
|||
import urllib2
|
||||
import time
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup, SoupStrainer
|
||||
from calibre import strftime
|
||||
|
||||
'''
|
||||
|
|
@ -94,8 +93,9 @@ def parse_index(self):
|
|||
description = match.group(2)
|
||||
|
||||
self.log(thislink)
|
||||
soup = self.index_to_soup(thislink)
|
||||
|
||||
for link in BeautifulSoup(thislink, parseOnlyThese=SoupStrainer('a')):
|
||||
for link in soup.findAll('a', href=True):
|
||||
thisurl = self.PREFIX + link['href']
|
||||
thislinktext = self.tag_to_string(link)
|
||||
|
||||
|
|
|
|||
|
|
@ -6,8 +6,8 @@
|
|||
|
||||
import bs4
|
||||
from bs4 import ( # noqa
|
||||
CData, Comment, Declaration, NavigableString, ProcessingInstruction, Tag,
|
||||
__version__
|
||||
CData, Comment, Declaration, NavigableString, ProcessingInstruction,
|
||||
SoupStrainer, Tag, __version__
|
||||
)
|
||||
|
||||
from polyglot.builtins import unicode_type
|
||||
|
|
|
|||
Loading…
Reference in a new issue