mirror of
git://github.com/kovidgoyal/calibre.git
synced 2025-12-31 04:35:01 +01:00
Fix images not working for Guardian and Independent
Apparently they serve images in JPEG-XR format if the user agent is IE
This commit is contained in:
parent
bcbac05d04
commit
d700523080
2 changed files with 12 additions and 0 deletions
|
|
@ -52,6 +52,12 @@ class Guardian(BasicNewsRecipe):
|
|||
dict(attrs={'class': lambda x: x and 'content__article-body' in x.split()}),
|
||||
]
|
||||
|
||||
def get_browser(self, *a, **kw):
|
||||
# This site returns images in JPEG-XR format if the user agent is IE
|
||||
br = BasicNewsRecipe.get_browser(self, *a, **kw)
|
||||
br.addheaders = [('User-agent', 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.111 Safari/537.36')]
|
||||
return br
|
||||
|
||||
def preprocess_raw_html(self, raw, url):
|
||||
import html5lib
|
||||
from lxml import html
|
||||
|
|
|
|||
|
|
@ -36,6 +36,12 @@ class TheIndependentNew(BasicNewsRecipe):
|
|||
|
||||
remove_attributes = ['style']
|
||||
|
||||
def get_browser(self, *a, **kw):
|
||||
# This site returns images in JPEG-XR format if the user agent is IE
|
||||
br = BasicNewsRecipe.get_browser(self, *a, **kw)
|
||||
br.addheaders = [('User-agent', 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.111 Safari/537.36')]
|
||||
return br
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for div in soup.findAll(attrs={'class': 'full-gallery'}):
|
||||
imgs = {}
|
||||
|
|
|
|||
Loading…
Reference in a new issue