mirror of
git://github.com/kovidgoyal/calibre.git
synced 2025-12-06 11:15:07 +01:00
Better workaround for wsj free index page getting stuck
This commit is contained in:
parent
1b6faaa3bc
commit
93a8e83b93
2 changed files with 24 additions and 4 deletions
|
|
@ -6,6 +6,7 @@
|
||||||
|
|
||||||
import json, time, random
|
import json, time, random
|
||||||
from base64 import standard_b64encode
|
from base64 import standard_b64encode
|
||||||
|
from datetime import date, timedelta
|
||||||
|
|
||||||
from mechanize import Request
|
from mechanize import Request
|
||||||
|
|
||||||
|
|
@ -97,8 +98,7 @@ def get_cover_url(self):
|
||||||
|
|
||||||
def get_browser_for_wsj(self, *a, **kw):
|
def get_browser_for_wsj(self, *a, **kw):
|
||||||
br = BasicNewsRecipe.get_browser(self, *a, **kw)
|
br = BasicNewsRecipe.get_browser(self, *a, **kw)
|
||||||
if needs_subscription:
|
br.set_cookie('wsjregion', 'na,us', '.wsj.com')
|
||||||
br.set_cookie('wsjregion', 'na,us', '.wsj.com')
|
|
||||||
br.set_cookie('gdprApplies', 'false', '.wsj.com')
|
br.set_cookie('gdprApplies', 'false', '.wsj.com')
|
||||||
br.set_cookie('ccpaApplies', 'false', '.wsj.com')
|
br.set_cookie('ccpaApplies', 'false', '.wsj.com')
|
||||||
return br
|
return br
|
||||||
|
|
@ -167,6 +167,16 @@ def get_browser(self, *a, **kw):
|
||||||
def get_browser(self, *a, **kw):
|
def get_browser(self, *a, **kw):
|
||||||
br = self.get_browser_for_wsj(*a, **kw)
|
br = self.get_browser_for_wsj(*a, **kw)
|
||||||
res = br.open(self.WSJ_ITP)
|
res = br.open(self.WSJ_ITP)
|
||||||
|
url = res.geturl()
|
||||||
|
if '/20210913/' in url:
|
||||||
|
today = date.today()
|
||||||
|
q = today.isoformat().replace('-', '')
|
||||||
|
try:
|
||||||
|
res = br.open(url.replace('/20210913/', '/' + q + '/'))
|
||||||
|
except Exception:
|
||||||
|
today -= timedelta(days=1)
|
||||||
|
q = today.isoformat().replace('-', '')
|
||||||
|
res = br.open(url.replace('/20210913/', '/' + q + '/'))
|
||||||
self.log('Print edition resolved url:', res.geturl())
|
self.log('Print edition resolved url:', res.geturl())
|
||||||
self.wsj_itp_page = res.read()
|
self.wsj_itp_page = res.read()
|
||||||
return br
|
return br
|
||||||
|
|
|
||||||
|
|
@ -6,6 +6,7 @@
|
||||||
|
|
||||||
import json, time, random
|
import json, time, random
|
||||||
from base64 import standard_b64encode
|
from base64 import standard_b64encode
|
||||||
|
from datetime import date, timedelta
|
||||||
|
|
||||||
from mechanize import Request
|
from mechanize import Request
|
||||||
|
|
||||||
|
|
@ -97,8 +98,7 @@ def get_cover_url(self):
|
||||||
|
|
||||||
def get_browser_for_wsj(self, *a, **kw):
|
def get_browser_for_wsj(self, *a, **kw):
|
||||||
br = BasicNewsRecipe.get_browser(self, *a, **kw)
|
br = BasicNewsRecipe.get_browser(self, *a, **kw)
|
||||||
if needs_subscription:
|
br.set_cookie('wsjregion', 'na,us', '.wsj.com')
|
||||||
br.set_cookie('wsjregion', 'na,us', '.wsj.com')
|
|
||||||
br.set_cookie('gdprApplies', 'false', '.wsj.com')
|
br.set_cookie('gdprApplies', 'false', '.wsj.com')
|
||||||
br.set_cookie('ccpaApplies', 'false', '.wsj.com')
|
br.set_cookie('ccpaApplies', 'false', '.wsj.com')
|
||||||
return br
|
return br
|
||||||
|
|
@ -167,6 +167,16 @@ def get_browser(self, *a, **kw):
|
||||||
def get_browser(self, *a, **kw):
|
def get_browser(self, *a, **kw):
|
||||||
br = self.get_browser_for_wsj(*a, **kw)
|
br = self.get_browser_for_wsj(*a, **kw)
|
||||||
res = br.open(self.WSJ_ITP)
|
res = br.open(self.WSJ_ITP)
|
||||||
|
url = res.geturl()
|
||||||
|
if '/20210913/' in url:
|
||||||
|
today = date.today()
|
||||||
|
q = today.isoformat().replace('-', '')
|
||||||
|
try:
|
||||||
|
res = br.open(url.replace('/20210913/', '/' + q + '/'))
|
||||||
|
except Exception:
|
||||||
|
today -= timedelta(days=1)
|
||||||
|
q = today.isoformat().replace('-', '')
|
||||||
|
res = br.open(url.replace('/20210913/', '/' + q + '/'))
|
||||||
self.log('Print edition resolved url:', res.geturl())
|
self.log('Print edition resolved url:', res.geturl())
|
||||||
self.wsj_itp_page = res.read()
|
self.wsj_itp_page = res.read()
|
||||||
return br
|
return br
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue