Fix for %p (am/pm) date parsing on non-en_US locales.

This commit is contained in:
Jim Miller 2016-04-22 21:24:00 -05:00
parent 76faea3b4e
commit c9f86bd784
5 changed files with 35 additions and 13 deletions

View file

@ -869,6 +869,12 @@ extracategories:The Sentinel
## it has +90% confidence. 'auto' is not reliable.
website_encodings:Windows-1252,ISO-8859-1,auto
## dateUpdate doesn't usually have time, but it does on
## bloodshedverse.com. See
## http://docs.python.org/library/datetime.html#strftime-strptime-behavior
## Note that ini format requires % to be escaped as %%.
dateUpdated_format:%%Y-%%m-%%d %%H:%%M
## Extra metadata that this adapter knows about. See [dramione.org]
## for examples of how to use them.
extra_valid_entries:warnings,reviews

View file

@ -28,7 +28,7 @@ class BloodshedverseComAdapter(BaseSiteAdapter):
READ_URL_TEMPLATE = BASE_URL + 'stories.php?go=read&no=%s'
STARTED_DATETIME_FORMAT = '%m/%d/%Y'
UPDATED_DATETIME_FORMAT = '%m/%d/%Y %I:%M'
UPDATED_DATETIME_FORMAT = '%m/%d/%Y %I:%M %p'
def __init__(self, config, url):
BaseSiteAdapter.__init__(self, config, url)
@ -168,14 +168,8 @@ class BloodshedverseComAdapter(BaseSiteAdapter):
self.story.setMetadata('datePublished', makeDate(value, self.STARTED_DATETIME_FORMAT))
elif key == 'Updated':
date_string, period = value.rsplit(' ', 1)
date = makeDate(date_string, self.UPDATED_DATETIME_FORMAT)
# Rather ugly hack to work around Calibre's changing of
# Python's locale setting, causing am/pm to not be properly
# parsed by strptime() when using a non-english locale
if period == 'pm':
date += timedelta(hours=12)
date = makeDate(value, self.UPDATED_DATETIME_FORMAT)
# ugly %p(am/pm) hack moved into makeDate so other sites can use it.
self.story.setMetadata('dateUpdated', date)
if self.story.getMetadata('rating') == 'NC-17' and not (self.is_adult or self.getConfig('is_adult')):

View file

@ -16,7 +16,8 @@
#
import re
import datetime
from datetime import datetime, timedelta
import time
import logging
import urllib
@ -99,7 +100,7 @@ class BaseSiteAdapter(Configurable):
self.metadataDone = False
self.story = Story(configuration)
self.story.setMetadata('site',self.getConfigSection())
self.story.setMetadata('dateCreated',datetime.datetime.now())
self.story.setMetadata('dateCreated',datetime.now())
self.chapterUrls = [] # tuples of (chapter title,chapter url)
self.chapterFirst = None
self.chapterLast = None
@ -688,8 +689,23 @@ def makeDate(string,dateform):
if name in string:
string = string.replace(name,num)
break
# Many locales don't define %p for AM/PM. So if %p, remove from
# dateform, look for 'pm' in string, remove am/pm from string and
# add 12 hours if pm found.
add_hours = False
if u"%p" in dateform:
dateform = dateform.replace(u"%p",u"")
if 'pm' in string or 'PM' in string:
add_hours = True
string = string.replace(u"AM",u"").replace(u"PM",u"").replace(u"am",u"").replace(u"pm",u"")
date = datetime.strptime(string.encode('utf-8'),dateform.encode('utf-8'))
if add_hours:
date += timedelta(hours=12)
return datetime.datetime.strptime(string.encode('utf-8'),dateform.encode('utf-8'))
return date
# .? for AO3's ']' in param names.
safe_url_re = re.compile(r'(?P<attr>(password|name|login).?=)[^&]*(?P<amp>&|$)',flags=re.MULTILINE)

View file

@ -272,7 +272,7 @@ class BaseXenForoForumAdapter(BaseSiteAdapter):
datestr = re.sub(r' (\d[^\d])',r' 0\1',datestr) # add leading 0 for single digit day & hours.
return makeDate(datestr, self.dateformat)
except:
logger.debug('No date found in %s'%parenttag)
logger.debug('No date found in %s'%parenttag,exc_info=True)
return None
# grab the text for an individual chapter.

View file

@ -875,6 +875,12 @@ extracategories:The Sentinel
## it has +90% confidence. 'auto' is not reliable.
website_encodings:Windows-1252,ISO-8859-1,auto
## dateUpdate doesn't usually have time, but it does on
## bloodshedverse.com. See
## http://docs.python.org/library/datetime.html#strftime-strptime-behavior
## Note that ini format requires % to be escaped as %%.
dateUpdated_format:%%Y-%%m-%%d %%H:%%M
## Extra metadata that this adapter knows about. See [dramione.org]
## for examples of how to use them.
extra_valid_entries:warnings,reviews