Intelligently convert (almost) all filenames to ASCII. This should make for more readable file names as opposed to the previous practice of simply replacing unicode chracters with underscores.

This commit is contained in:
Kovid Goyal 2009-07-26 10:26:51 -06:00
parent 11068e0e09
commit 6cf006db05
8 changed files with 37 additions and 31 deletions

View file

@ -8,7 +8,7 @@
import shutil
from itertools import cycle
from calibre import sanitize_file_name as sanitize
from calibre.utils.filenames import ascii_filename as sanitize
from calibre.devices.usbms.driver import USBMS
import calibre.devices.cybookg3.t2b as t2b
@ -98,7 +98,7 @@ def upload_books(self, files, names, on_card=None, end_session=True,
self.report_progress(i / float(len(files)), _('Transferring books to device...'))
self.report_progress(1.0, _('Transferring books to device...'))
return zip(paths, cycle([on_card]))
def delete_books(self, paths, end_session=True):

View file

@ -8,7 +8,7 @@
from itertools import cycle
from calibre.devices.usbms.driver import USBMS
from calibre import sanitize_file_name as sanitize
from calibre.utils.filenames import ascii_filename as sanitize
from calibre.ebooks.metadata import string_to_authors
class JETBOOK(USBMS):

View file

@ -21,7 +21,8 @@
from lxml import html, etree
from calibre import entity_to_unicode, sanitize_file_name
from calibre import entity_to_unicode
from calibre.utils.filenames import ascii_filename
from calibre.ptempfile import TemporaryDirectory
from calibre.ebooks import DRMError
from calibre.ebooks.chardet import ENCODING_PATS
@ -374,7 +375,7 @@ def extract_content(self, output_dir, parse_cache):
fname = self.name.encode('ascii', 'replace')
fname = re.sub(r'[\x08\x15\0]+', '', fname)
htmlfile = os.path.join(output_dir,
sanitize_file_name(fname) + '.html')
ascii_filename(fname) + '.html')
try:
for ref in guide.xpath('descendant::reference'):
if ref.attrib.has_key('href'):

View file

@ -57,6 +57,7 @@
import re
from calibre.ebooks.unidecode.unicodepoints import CODEPOINTS
from calibre.constants import preferred_encoding
class Unidecoder(object):
@ -70,7 +71,10 @@ def decode(self, text):
try:
text = unicode(text)
except:
text = text.decode('utf-8', 'ignore')
try:
text = text.decode(preferred_encoding)
except:
text = text.decode('utf-8', 'replace')
# Replace characters larger than 127 with their ASCII equivelent.
return re.sub('[^\x00-\x7f]', lambda x: self.replace_point(x.group()),
text)
@ -80,7 +84,7 @@ def replace_point(self, codepoint):
Returns the replacement character or ? if none can be found.
'''
try:
# Splite the unicode character xABCD into parts 0xAB and 0xCD.
# Split the unicode character xABCD into parts 0xAB and 0xCD.
# 0xAB represents the group within CODEPOINTS to query and 0xCD
# represents the position in the list of characters for the group.
return CODEPOINTS[self.code_group(codepoint)][self.grouped_point(

View file

@ -21,7 +21,7 @@
pixmap_to_data, warning_dialog, \
question_dialog
from calibre.ebooks.metadata import authors_to_string
from calibre import sanitize_file_name, preferred_encoding
from calibre import preferred_encoding
from calibre.utils.filenames import ascii_filename
from calibre.devices.errors import FreeSpaceError
from calibre.utils.smtp import compose_mail, sendmail, extract_email_address, \
@ -542,7 +542,7 @@ def send_by_mail(self, to, fmts, delete_from_library, send_ids=None,
'\n\n' + t + '\n\t' + _('by') + ' ' + a + '\n\n' + \
_('in the %s format.') %
os.path.splitext(f)[1][1:].upper())
prefix = sanitize_file_name(t+' - '+a)
prefix = ascii_filename(t+' - '+a)
if not isinstance(prefix, unicode):
prefix = prefix.decode(preferred_encoding, 'replace')
attachment_names.append(prefix + os.path.splitext(f)[1])
@ -693,7 +693,7 @@ def sync_news(self, send_ids=None, do_auto_convert=True):
rows_are_ids=True)
names = []
for mi in metadata:
prefix = sanitize_file_name(mi['title'])
prefix = ascii_filename(mi['title'])
if not isinstance(prefix, unicode):
prefix = prefix.decode(preferred_encoding, 'replace')
prefix = ascii_filename(prefix)
@ -758,7 +758,7 @@ def sync_to_device(self, on_card, delete_from_library,
a = mi['authors']
if not a:
a = _('Unknown')
prefix = sanitize_file_name(t+' - '+a)
prefix = ascii_filename(t+' - '+a)
if not isinstance(prefix, unicode):
prefix = prefix.decode(preferred_encoding, 'replace')
prefix = ascii_filename(prefix)

View file

@ -14,8 +14,9 @@
QMessageBox, QStackedLayout
from PyQt4.QtSvg import QSvgRenderer
from calibre import __version__, __appname__, sanitize_file_name, \
from calibre import __version__, __appname__, \
iswindows, isosx, prints, patheq
from calibre.utils.filenames import ascii_filename
from calibre.ptempfile import PersistentTemporaryFile
from calibre.utils.config import prefs, dynamic
from calibre.utils.ipc.server import Server
@ -852,7 +853,7 @@ def _add_books(self, paths, to_device, on_card=None):
def _files_added(self, paths=[], names=[], infos=[], on_card=None):
if paths:
self.upload_books(paths,
list(map(sanitize_file_name, names)),
list(map(ascii_filename, names)),
infos, on_card=on_card)
self.status_bar.showMessage(
_('Uploading books to device.'), 2000)

View file

@ -34,7 +34,7 @@
from calibre.ptempfile import PersistentTemporaryFile
from calibre.customize.ui import run_plugins_on_import
from calibre import sanitize_file_name
from calibre.utils.filenames import ascii_filename
from calibre.ebooks import BOOK_EXTENSIONS
if iswindows:
@ -652,8 +652,8 @@ def construct_path_name(self, id):
authors = self.authors(id, index_is_id=True)
if not authors:
authors = _('Unknown')
author = sanitize_file_name(authors.split(',')[0][:self.PATH_LIMIT]).decode(filesystem_encoding, 'ignore')
title = sanitize_file_name(self.title(id, index_is_id=True)[:self.PATH_LIMIT]).decode(filesystem_encoding, 'ignore')
author = ascii_filename(authors.split(',')[0][:self.PATH_LIMIT]).decode(filesystem_encoding, 'ignore')
title = ascii_filename(self.title(id, index_is_id=True)[:self.PATH_LIMIT]).decode(filesystem_encoding, 'ignore')
path = author + '/' + title + ' (%d)'%id
return path
@ -664,8 +664,8 @@ def construct_file_name(self, id):
authors = self.authors(id, index_is_id=True)
if not authors:
authors = _('Unknown')
author = sanitize_file_name(authors.split(',')[0][:self.PATH_LIMIT]).decode(filesystem_encoding, 'replace')
title = sanitize_file_name(self.title(id, index_is_id=True)[:self.PATH_LIMIT]).decode(filesystem_encoding, 'replace')
author = ascii_filename(authors.split(',')[0][:self.PATH_LIMIT]).decode(filesystem_encoding, 'replace')
title = ascii_filename(self.title(id, index_is_id=True)[:self.PATH_LIMIT]).decode(filesystem_encoding, 'replace')
name = title + ' - ' + author
while name.endswith('.'):
name = name[:-1]
@ -1520,12 +1520,12 @@ def get_data_as_dict(self, prefix=None, authors_as_string=False):
x['cover'] = os.path.join(path, 'cover.jpg')
if not self.has_cover(x['id'], index_is_id=True):
x['cover'] = None
path += os.sep + self.construct_file_name(record[FIELD_MAP['id']]) + '.%s'
formats = self.formats(record[FIELD_MAP['id']], index_is_id=True)
if formats:
for fmt in formats.split(','):
x['formats'].append(path%fmt.lower())
x['fmt_'+fmt.lower()] = path%fmt.lower()
path = self.format_abspath(x['id'], fmt, index_is_id=True)
x['formats'].append(path)
x['fmt_'+fmt.lower()] = path
x['available_formats'] = [i.upper() for i in formats.split(',')]
return data
@ -1602,12 +1602,12 @@ def export_to_dir(self, dir, indices, byauthor=False, single_dir=False,
by_author[au] = []
by_author[au].append(index)
for au in by_author.keys():
apath = os.path.join(dir, sanitize_file_name(au))
apath = os.path.join(dir, ascii_filename(au))
if not single_dir and not os.path.exists(apath):
os.mkdir(apath)
for idx in by_author[au]:
title = re.sub(r'\s', ' ', self.title(idx, index_is_id=index_is_id))
tpath = os.path.join(apath, sanitize_file_name(title))
tpath = os.path.join(apath, ascii_filename(title))
id = idx if index_is_id else self.id(idx)
id = str(id)
if not single_dir and not os.path.exists(tpath):
@ -1621,10 +1621,10 @@ def export_to_dir(self, dir, indices, byauthor=False, single_dir=False,
mi.authors = [_('Unknown')]
cdata = self.cover(int(id), index_is_id=True)
if cdata is not None:
cname = sanitize_file_name(name)+'.jpg'
cname = ascii_filename(name)+'.jpg'
open(os.path.join(base, cname), 'wb').write(cdata)
mi.cover = cname
with open(os.path.join(base, sanitize_file_name(name)+'.opf'),
with open(os.path.join(base, ascii_filename(name)+'.opf'),
'wb') as f:
f.write(metadata_to_opf(mi))
@ -1636,7 +1636,7 @@ def export_to_dir(self, dir, indices, byauthor=False, single_dir=False,
if not data:
continue
fname = name +'.'+fmt.lower()
fname = sanitize_file_name(fname)
fname = ascii_filename(fname)
f = open(os.path.join(base, fname), 'w+b')
f.write(data)
f.flush()
@ -1671,7 +1671,7 @@ def export_single_format_to_dir(self, dir, indices, format,
if not au:
au = _('Unknown')
fname = '%s - %s.%s'%(title, au, format.lower())
fname = sanitize_file_name(fname)
fname = ascii_filename(fname)
if not os.path.exists(dir):
os.makedirs(dir)
f = open(os.path.join(dir, fname), 'w+b')

View file

@ -14,8 +14,8 @@
from PIL import Image
from cStringIO import StringIO
from calibre import browser, sanitize_file_name, \
relpath, unicode_path
from calibre import browser, relpath, unicode_path
from calibre.utils.filenames import ascii_filename
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
from calibre.ebooks.chardet import xml_to_unicode
from calibre.utils.config import OptionParser
@ -313,7 +313,7 @@ def process_images(self, soup, baseurl):
self.log.exception('Could not fetch image %s'% iurl)
continue
c += 1
fname = sanitize_file_name('img'+str(c)+ext)
fname = ascii_filename('img'+str(c)+ext)
if isinstance(fname, unicode):
fname = fname.encode('ascii', 'replace')
imgpath = os.path.join(diskpath, fname+'.jpg')
@ -416,7 +416,7 @@ def process_links(self, soup, baseurl, recursion_level, into_dir='links'):
if not isinstance(_fname, unicode):
_fname.decode('latin1', 'replace')
_fname = _fname.encode('ascii', 'replace').replace('%', '').replace(os.sep, '')
_fname = sanitize_file_name(_fname)
_fname = ascii_filename(_fname)
_fname = os.path.splitext(_fname)[0]+'.xhtml'
res = os.path.join(linkdiskpath, _fname)
self.downloaded_paths.append(res)