mirror of
git://github.com/kovidgoyal/calibre.git
synced 2025-12-06 10:05:38 +01:00
workaround for nyt todays paper page using undefinedinstead of null in its json
This commit is contained in:
parent
803da449ba
commit
07b4239634
2 changed files with 4 additions and 2 deletions
|
|
@ -186,7 +186,8 @@ def parse_todays_page(self):
|
||||||
soup = self.read_nyt_metadata()
|
soup = self.read_nyt_metadata()
|
||||||
script = soup.findAll('script', text=lambda x: x and 'window.__preloadedData' in x)[0]
|
script = soup.findAll('script', text=lambda x: x and 'window.__preloadedData' in x)[0]
|
||||||
script = type(u'')(script)
|
script = type(u'')(script)
|
||||||
data = json.loads(script[script.find('{'):script.rfind(';')].strip().rstrip(';'))['initialState']
|
json_data = script[script.find('{'):script.rfind(';')].strip().rstrip(';')
|
||||||
|
data = json.loads(json_data.replace(':undefined', ':null'))['initialState']
|
||||||
containers, sections = {}, {}
|
containers, sections = {}, {}
|
||||||
article_map = {}
|
article_map = {}
|
||||||
gc_pat = re.compile(r'groupings.(\d+).containers.(\d+)')
|
gc_pat = re.compile(r'groupings.(\d+).containers.(\d+)')
|
||||||
|
|
|
||||||
|
|
@ -186,7 +186,8 @@ def parse_todays_page(self):
|
||||||
soup = self.read_nyt_metadata()
|
soup = self.read_nyt_metadata()
|
||||||
script = soup.findAll('script', text=lambda x: x and 'window.__preloadedData' in x)[0]
|
script = soup.findAll('script', text=lambda x: x and 'window.__preloadedData' in x)[0]
|
||||||
script = type(u'')(script)
|
script = type(u'')(script)
|
||||||
data = json.loads(script[script.find('{'):script.rfind(';')].strip().rstrip(';'))['initialState']
|
json_data = script[script.find('{'):script.rfind(';')].strip().rstrip(';')
|
||||||
|
data = json.loads(json_data.replace(':undefined', ':null'))['initialState']
|
||||||
containers, sections = {}, {}
|
containers, sections = {}, {}
|
||||||
article_map = {}
|
article_map = {}
|
||||||
gc_pat = re.compile(r'groupings.(\d+).containers.(\d+)')
|
gc_pat = re.compile(r'groupings.(\d+).containers.(\d+)')
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue