mirror of
git://github.com/kovidgoyal/calibre.git
synced 2026-05-09 02:24:02 +02:00
Intelligently convert (almost) all filenames to ASCII. This should make for more readable file names as opposed to the previous practice of simply replacing unicode chracters with underscores.
This commit is contained in:
parent
11068e0e09
commit
6cf006db05
8 changed files with 37 additions and 31 deletions
|
|
@ -8,7 +8,7 @@
|
|||
import shutil
|
||||
from itertools import cycle
|
||||
|
||||
from calibre import sanitize_file_name as sanitize
|
||||
from calibre.utils.filenames import ascii_filename as sanitize
|
||||
from calibre.devices.usbms.driver import USBMS
|
||||
import calibre.devices.cybookg3.t2b as t2b
|
||||
|
||||
|
|
@ -98,7 +98,7 @@ def upload_books(self, files, names, on_card=None, end_session=True,
|
|||
self.report_progress(i / float(len(files)), _('Transferring books to device...'))
|
||||
|
||||
self.report_progress(1.0, _('Transferring books to device...'))
|
||||
|
||||
|
||||
return zip(paths, cycle([on_card]))
|
||||
|
||||
def delete_books(self, paths, end_session=True):
|
||||
|
|
|
|||
|
|
@ -8,7 +8,7 @@
|
|||
from itertools import cycle
|
||||
|
||||
from calibre.devices.usbms.driver import USBMS
|
||||
from calibre import sanitize_file_name as sanitize
|
||||
from calibre.utils.filenames import ascii_filename as sanitize
|
||||
from calibre.ebooks.metadata import string_to_authors
|
||||
|
||||
class JETBOOK(USBMS):
|
||||
|
|
|
|||
|
|
@ -21,7 +21,8 @@
|
|||
|
||||
from lxml import html, etree
|
||||
|
||||
from calibre import entity_to_unicode, sanitize_file_name
|
||||
from calibre import entity_to_unicode
|
||||
from calibre.utils.filenames import ascii_filename
|
||||
from calibre.ptempfile import TemporaryDirectory
|
||||
from calibre.ebooks import DRMError
|
||||
from calibre.ebooks.chardet import ENCODING_PATS
|
||||
|
|
@ -374,7 +375,7 @@ def extract_content(self, output_dir, parse_cache):
|
|||
fname = self.name.encode('ascii', 'replace')
|
||||
fname = re.sub(r'[\x08\x15\0]+', '', fname)
|
||||
htmlfile = os.path.join(output_dir,
|
||||
sanitize_file_name(fname) + '.html')
|
||||
ascii_filename(fname) + '.html')
|
||||
try:
|
||||
for ref in guide.xpath('descendant::reference'):
|
||||
if ref.attrib.has_key('href'):
|
||||
|
|
|
|||
|
|
@ -57,6 +57,7 @@
|
|||
import re
|
||||
|
||||
from calibre.ebooks.unidecode.unicodepoints import CODEPOINTS
|
||||
from calibre.constants import preferred_encoding
|
||||
|
||||
class Unidecoder(object):
|
||||
|
||||
|
|
@ -70,7 +71,10 @@ def decode(self, text):
|
|||
try:
|
||||
text = unicode(text)
|
||||
except:
|
||||
text = text.decode('utf-8', 'ignore')
|
||||
try:
|
||||
text = text.decode(preferred_encoding)
|
||||
except:
|
||||
text = text.decode('utf-8', 'replace')
|
||||
# Replace characters larger than 127 with their ASCII equivelent.
|
||||
return re.sub('[^\x00-\x7f]', lambda x: self.replace_point(x.group()),
|
||||
text)
|
||||
|
|
@ -80,7 +84,7 @@ def replace_point(self, codepoint):
|
|||
Returns the replacement character or ? if none can be found.
|
||||
'''
|
||||
try:
|
||||
# Splite the unicode character xABCD into parts 0xAB and 0xCD.
|
||||
# Split the unicode character xABCD into parts 0xAB and 0xCD.
|
||||
# 0xAB represents the group within CODEPOINTS to query and 0xCD
|
||||
# represents the position in the list of characters for the group.
|
||||
return CODEPOINTS[self.code_group(codepoint)][self.grouped_point(
|
||||
|
|
|
|||
|
|
@ -21,7 +21,7 @@
|
|||
pixmap_to_data, warning_dialog, \
|
||||
question_dialog
|
||||
from calibre.ebooks.metadata import authors_to_string
|
||||
from calibre import sanitize_file_name, preferred_encoding
|
||||
from calibre import preferred_encoding
|
||||
from calibre.utils.filenames import ascii_filename
|
||||
from calibre.devices.errors import FreeSpaceError
|
||||
from calibre.utils.smtp import compose_mail, sendmail, extract_email_address, \
|
||||
|
|
@ -542,7 +542,7 @@ def send_by_mail(self, to, fmts, delete_from_library, send_ids=None,
|
|||
'\n\n' + t + '\n\t' + _('by') + ' ' + a + '\n\n' + \
|
||||
_('in the %s format.') %
|
||||
os.path.splitext(f)[1][1:].upper())
|
||||
prefix = sanitize_file_name(t+' - '+a)
|
||||
prefix = ascii_filename(t+' - '+a)
|
||||
if not isinstance(prefix, unicode):
|
||||
prefix = prefix.decode(preferred_encoding, 'replace')
|
||||
attachment_names.append(prefix + os.path.splitext(f)[1])
|
||||
|
|
@ -693,7 +693,7 @@ def sync_news(self, send_ids=None, do_auto_convert=True):
|
|||
rows_are_ids=True)
|
||||
names = []
|
||||
for mi in metadata:
|
||||
prefix = sanitize_file_name(mi['title'])
|
||||
prefix = ascii_filename(mi['title'])
|
||||
if not isinstance(prefix, unicode):
|
||||
prefix = prefix.decode(preferred_encoding, 'replace')
|
||||
prefix = ascii_filename(prefix)
|
||||
|
|
@ -758,7 +758,7 @@ def sync_to_device(self, on_card, delete_from_library,
|
|||
a = mi['authors']
|
||||
if not a:
|
||||
a = _('Unknown')
|
||||
prefix = sanitize_file_name(t+' - '+a)
|
||||
prefix = ascii_filename(t+' - '+a)
|
||||
if not isinstance(prefix, unicode):
|
||||
prefix = prefix.decode(preferred_encoding, 'replace')
|
||||
prefix = ascii_filename(prefix)
|
||||
|
|
|
|||
|
|
@ -14,8 +14,9 @@
|
|||
QMessageBox, QStackedLayout
|
||||
from PyQt4.QtSvg import QSvgRenderer
|
||||
|
||||
from calibre import __version__, __appname__, sanitize_file_name, \
|
||||
from calibre import __version__, __appname__, \
|
||||
iswindows, isosx, prints, patheq
|
||||
from calibre.utils.filenames import ascii_filename
|
||||
from calibre.ptempfile import PersistentTemporaryFile
|
||||
from calibre.utils.config import prefs, dynamic
|
||||
from calibre.utils.ipc.server import Server
|
||||
|
|
@ -852,7 +853,7 @@ def _add_books(self, paths, to_device, on_card=None):
|
|||
def _files_added(self, paths=[], names=[], infos=[], on_card=None):
|
||||
if paths:
|
||||
self.upload_books(paths,
|
||||
list(map(sanitize_file_name, names)),
|
||||
list(map(ascii_filename, names)),
|
||||
infos, on_card=on_card)
|
||||
self.status_bar.showMessage(
|
||||
_('Uploading books to device.'), 2000)
|
||||
|
|
|
|||
|
|
@ -34,7 +34,7 @@
|
|||
from calibre.ptempfile import PersistentTemporaryFile
|
||||
from calibre.customize.ui import run_plugins_on_import
|
||||
|
||||
from calibre import sanitize_file_name
|
||||
from calibre.utils.filenames import ascii_filename
|
||||
from calibre.ebooks import BOOK_EXTENSIONS
|
||||
|
||||
if iswindows:
|
||||
|
|
@ -652,8 +652,8 @@ def construct_path_name(self, id):
|
|||
authors = self.authors(id, index_is_id=True)
|
||||
if not authors:
|
||||
authors = _('Unknown')
|
||||
author = sanitize_file_name(authors.split(',')[0][:self.PATH_LIMIT]).decode(filesystem_encoding, 'ignore')
|
||||
title = sanitize_file_name(self.title(id, index_is_id=True)[:self.PATH_LIMIT]).decode(filesystem_encoding, 'ignore')
|
||||
author = ascii_filename(authors.split(',')[0][:self.PATH_LIMIT]).decode(filesystem_encoding, 'ignore')
|
||||
title = ascii_filename(self.title(id, index_is_id=True)[:self.PATH_LIMIT]).decode(filesystem_encoding, 'ignore')
|
||||
path = author + '/' + title + ' (%d)'%id
|
||||
return path
|
||||
|
||||
|
|
@ -664,8 +664,8 @@ def construct_file_name(self, id):
|
|||
authors = self.authors(id, index_is_id=True)
|
||||
if not authors:
|
||||
authors = _('Unknown')
|
||||
author = sanitize_file_name(authors.split(',')[0][:self.PATH_LIMIT]).decode(filesystem_encoding, 'replace')
|
||||
title = sanitize_file_name(self.title(id, index_is_id=True)[:self.PATH_LIMIT]).decode(filesystem_encoding, 'replace')
|
||||
author = ascii_filename(authors.split(',')[0][:self.PATH_LIMIT]).decode(filesystem_encoding, 'replace')
|
||||
title = ascii_filename(self.title(id, index_is_id=True)[:self.PATH_LIMIT]).decode(filesystem_encoding, 'replace')
|
||||
name = title + ' - ' + author
|
||||
while name.endswith('.'):
|
||||
name = name[:-1]
|
||||
|
|
@ -1520,12 +1520,12 @@ def get_data_as_dict(self, prefix=None, authors_as_string=False):
|
|||
x['cover'] = os.path.join(path, 'cover.jpg')
|
||||
if not self.has_cover(x['id'], index_is_id=True):
|
||||
x['cover'] = None
|
||||
path += os.sep + self.construct_file_name(record[FIELD_MAP['id']]) + '.%s'
|
||||
formats = self.formats(record[FIELD_MAP['id']], index_is_id=True)
|
||||
if formats:
|
||||
for fmt in formats.split(','):
|
||||
x['formats'].append(path%fmt.lower())
|
||||
x['fmt_'+fmt.lower()] = path%fmt.lower()
|
||||
path = self.format_abspath(x['id'], fmt, index_is_id=True)
|
||||
x['formats'].append(path)
|
||||
x['fmt_'+fmt.lower()] = path
|
||||
x['available_formats'] = [i.upper() for i in formats.split(',')]
|
||||
|
||||
return data
|
||||
|
|
@ -1602,12 +1602,12 @@ def export_to_dir(self, dir, indices, byauthor=False, single_dir=False,
|
|||
by_author[au] = []
|
||||
by_author[au].append(index)
|
||||
for au in by_author.keys():
|
||||
apath = os.path.join(dir, sanitize_file_name(au))
|
||||
apath = os.path.join(dir, ascii_filename(au))
|
||||
if not single_dir and not os.path.exists(apath):
|
||||
os.mkdir(apath)
|
||||
for idx in by_author[au]:
|
||||
title = re.sub(r'\s', ' ', self.title(idx, index_is_id=index_is_id))
|
||||
tpath = os.path.join(apath, sanitize_file_name(title))
|
||||
tpath = os.path.join(apath, ascii_filename(title))
|
||||
id = idx if index_is_id else self.id(idx)
|
||||
id = str(id)
|
||||
if not single_dir and not os.path.exists(tpath):
|
||||
|
|
@ -1621,10 +1621,10 @@ def export_to_dir(self, dir, indices, byauthor=False, single_dir=False,
|
|||
mi.authors = [_('Unknown')]
|
||||
cdata = self.cover(int(id), index_is_id=True)
|
||||
if cdata is not None:
|
||||
cname = sanitize_file_name(name)+'.jpg'
|
||||
cname = ascii_filename(name)+'.jpg'
|
||||
open(os.path.join(base, cname), 'wb').write(cdata)
|
||||
mi.cover = cname
|
||||
with open(os.path.join(base, sanitize_file_name(name)+'.opf'),
|
||||
with open(os.path.join(base, ascii_filename(name)+'.opf'),
|
||||
'wb') as f:
|
||||
f.write(metadata_to_opf(mi))
|
||||
|
||||
|
|
@ -1636,7 +1636,7 @@ def export_to_dir(self, dir, indices, byauthor=False, single_dir=False,
|
|||
if not data:
|
||||
continue
|
||||
fname = name +'.'+fmt.lower()
|
||||
fname = sanitize_file_name(fname)
|
||||
fname = ascii_filename(fname)
|
||||
f = open(os.path.join(base, fname), 'w+b')
|
||||
f.write(data)
|
||||
f.flush()
|
||||
|
|
@ -1671,7 +1671,7 @@ def export_single_format_to_dir(self, dir, indices, format,
|
|||
if not au:
|
||||
au = _('Unknown')
|
||||
fname = '%s - %s.%s'%(title, au, format.lower())
|
||||
fname = sanitize_file_name(fname)
|
||||
fname = ascii_filename(fname)
|
||||
if not os.path.exists(dir):
|
||||
os.makedirs(dir)
|
||||
f = open(os.path.join(dir, fname), 'w+b')
|
||||
|
|
|
|||
|
|
@ -14,8 +14,8 @@
|
|||
from PIL import Image
|
||||
from cStringIO import StringIO
|
||||
|
||||
from calibre import browser, sanitize_file_name, \
|
||||
relpath, unicode_path
|
||||
from calibre import browser, relpath, unicode_path
|
||||
from calibre.utils.filenames import ascii_filename
|
||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
|
||||
from calibre.ebooks.chardet import xml_to_unicode
|
||||
from calibre.utils.config import OptionParser
|
||||
|
|
@ -313,7 +313,7 @@ def process_images(self, soup, baseurl):
|
|||
self.log.exception('Could not fetch image %s'% iurl)
|
||||
continue
|
||||
c += 1
|
||||
fname = sanitize_file_name('img'+str(c)+ext)
|
||||
fname = ascii_filename('img'+str(c)+ext)
|
||||
if isinstance(fname, unicode):
|
||||
fname = fname.encode('ascii', 'replace')
|
||||
imgpath = os.path.join(diskpath, fname+'.jpg')
|
||||
|
|
@ -416,7 +416,7 @@ def process_links(self, soup, baseurl, recursion_level, into_dir='links'):
|
|||
if not isinstance(_fname, unicode):
|
||||
_fname.decode('latin1', 'replace')
|
||||
_fname = _fname.encode('ascii', 'replace').replace('%', '').replace(os.sep, '')
|
||||
_fname = sanitize_file_name(_fname)
|
||||
_fname = ascii_filename(_fname)
|
||||
_fname = os.path.splitext(_fname)[0]+'.xhtml'
|
||||
res = os.path.join(linkdiskpath, _fname)
|
||||
self.downloaded_paths.append(res)
|
||||
|
|
|
|||
Loading…
Reference in a new issue