mirror of
https://github.com/JimmXinu/FanFicFare.git
synced 2026-05-09 05:21:13 +02:00
forum adapters - Take pub/update dates from index post if not from threadmarks.
This commit is contained in:
parent
ce5869313d
commit
ddc3607df6
1 changed files with 34 additions and 13 deletions
|
|
@ -128,18 +128,7 @@ class ForumsSpacebattlesComAdapter(BaseSiteAdapter):
|
||||||
markas = soupmarks.find('ol',{'class':'overlayScroll'}).find_all('a')
|
markas = soupmarks.find('ol',{'class':'overlayScroll'}).find_all('a')
|
||||||
if len(markas) > 1:
|
if len(markas) > 1:
|
||||||
for (atag,url,name) in [ (x,x['href'],stripHTML(x)) for x in markas ]:
|
for (atag,url,name) in [ (x,x['href'],stripHTML(x)) for x in markas ]:
|
||||||
datestr=None
|
date = self.make_date(atag.find_next_sibling('div',{'class':'extra'}))
|
||||||
datetag = atag.find_next_sibling('div',{'class':'extra'}).find('span',{'class':'DateTime'})
|
|
||||||
if datetag:
|
|
||||||
datestr = datetag['title']
|
|
||||||
else:
|
|
||||||
datetag = atag.find_next_sibling('div',{'class':'extra'}).find('abbr',{'class':'DateTime'})
|
|
||||||
if datetag:
|
|
||||||
datestr="%s at %s"%(datetag['data-datestring'],datetag['data-timestring'])
|
|
||||||
# Apr 24, 2015 at 4:39 AM
|
|
||||||
# May 1, 2015 at 5:47 AM
|
|
||||||
datestr = re.sub(r' (\d[^\d])',r' 0\1',datestr) # add leading 0 for single digit day & hours.
|
|
||||||
date = makeDate(datestr, self.dateformat)
|
|
||||||
if not self.story.getMetadataRaw('datePublished') or date < self.story.getMetadataRaw('datePublished'):
|
if not self.story.getMetadataRaw('datePublished') or date < self.story.getMetadataRaw('datePublished'):
|
||||||
self.story.setMetadata('datePublished', date)
|
self.story.setMetadata('datePublished', date)
|
||||||
if not self.story.getMetadataRaw('dateUpdated') or date > self.story.getMetadataRaw('dateUpdated'):
|
if not self.story.getMetadataRaw('dateUpdated') or date > self.story.getMetadataRaw('dateUpdated'):
|
||||||
|
|
@ -149,6 +138,8 @@ class ForumsSpacebattlesComAdapter(BaseSiteAdapter):
|
||||||
name = '%s %s'%(name,date)
|
name = '%s %s'%(name,date)
|
||||||
|
|
||||||
self.chapterUrls.append((name,self.getURLPrefix()+'/'+url))
|
self.chapterUrls.append((name,self.getURLPrefix()+'/'+url))
|
||||||
|
|
||||||
|
soup = soup.find('li') # limit first post for date stuff below. ('#' posts above)
|
||||||
|
|
||||||
# Now go hunting for the 'chapter list'.
|
# Now go hunting for the 'chapter list'.
|
||||||
bq = soup.find('blockquote') # assume first posting contains TOC urls.
|
bq = soup.find('blockquote') # assume first posting contains TOC urls.
|
||||||
|
|
@ -177,9 +168,39 @@ class ForumsSpacebattlesComAdapter(BaseSiteAdapter):
|
||||||
# remove "First Post" if included in list.
|
# remove "First Post" if included in list.
|
||||||
logger.debug("delete dup chapter: %s %s"%self.chapterUrls[0])
|
logger.debug("delete dup chapter: %s %s"%self.chapterUrls[0])
|
||||||
del self.chapterUrls[0]
|
del self.chapterUrls[0]
|
||||||
|
|
||||||
|
# Didn't use threadmarks, so take created/updated dates
|
||||||
|
# from the 'first' posting created and updated.
|
||||||
|
date = self.make_date(soup.find('a',{'class':'datePermalink'}))
|
||||||
|
if date:
|
||||||
|
self.story.setMetadata('datePublished', date)
|
||||||
|
self.story.setMetadata('dateUpdated', date) # updated overwritten below if found.
|
||||||
|
|
||||||
|
date = self.make_date(soup.find('div',{'class':'editDate'}))
|
||||||
|
if date:
|
||||||
|
self.story.setMetadata('dateUpdated', date)
|
||||||
|
|
||||||
self.story.setMetadata('numChapters',len(self.chapterUrls))
|
self.story.setMetadata('numChapters',len(self.chapterUrls))
|
||||||
|
|
||||||
|
def make_date(self,parenttag): # forums use a BS thing where dates
|
||||||
|
# can appear different if recent.
|
||||||
|
datestr=None
|
||||||
|
try:
|
||||||
|
datetag = parenttag.find('span',{'class':'DateTime'})
|
||||||
|
if datetag:
|
||||||
|
datestr = datetag['title']
|
||||||
|
else:
|
||||||
|
datetag = parenttag.find('abbr',{'class':'DateTime'})
|
||||||
|
if datetag:
|
||||||
|
datestr="%s at %s"%(datetag['data-datestring'],datetag['data-timestring'])
|
||||||
|
# Apr 24, 2015 at 4:39 AM
|
||||||
|
# May 1, 2015 at 5:47 AM
|
||||||
|
datestr = re.sub(r' (\d[^\d])',r' 0\1',datestr) # add leading 0 for single digit day & hours.
|
||||||
|
return makeDate(datestr, self.dateformat)
|
||||||
|
except:
|
||||||
|
logger.debug('No date found in %s'%parenttag)
|
||||||
|
return None
|
||||||
|
|
||||||
# grab the text for an individual chapter.
|
# grab the text for an individual chapter.
|
||||||
def getChapterText(self, url):
|
def getChapterText(self, url):
|
||||||
logger.debug('Getting chapter text from: %s' % url)
|
logger.debug('Getting chapter text from: %s' % url)
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue