mirror of
https://github.com/beetbox/beets.git
synced 2026-01-02 05:52:50 +01:00
use unidecode to deal with accents and such (#118)
This commit is contained in:
parent
3b23198324
commit
078252d31e
4 changed files with 20 additions and 7 deletions
4
NEWS
4
NEWS
|
|
@ -10,6 +10,10 @@
|
|||
pipeline stages during imports. This should bring additional
|
||||
performance when using album art plugins like embedart or
|
||||
beets-lyrics.
|
||||
* Accents and other Unicode decorators on characters are now treated
|
||||
more fairly by the autotagger. For example, if you're missing the
|
||||
acute accent on the "e" in "cafe", that change won't be penalized.
|
||||
This introduces a new dependency on the "unidecode" Python module.
|
||||
* Fixed a problem where duplicate albums or items imported at the same
|
||||
time would fail to be detected.
|
||||
* BPD now uses a persistent "virtual filesystem" in order to fake a
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
# This file is part of beets.
|
||||
# Copyright 2010, Adrian Sampson.
|
||||
# Copyright 2011, Adrian Sampson.
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining
|
||||
# a copy of this software and associated documentation files (the
|
||||
|
|
@ -14,15 +14,16 @@
|
|||
|
||||
"""Facilities for automatically determining files' correct metadata.
|
||||
"""
|
||||
|
||||
import os
|
||||
import logging
|
||||
from collections import defaultdict
|
||||
from beets.autotag import mb
|
||||
import re
|
||||
from munkres import Munkres
|
||||
from unidecode import unidecode
|
||||
|
||||
from beets.autotag import mb
|
||||
from beets import library, mediafile, plugins
|
||||
from beets.util import levenshtein, sorted_walk
|
||||
import logging
|
||||
|
||||
# Try 5 releases. In the future, this should be more dynamic: let the
|
||||
# probability of continuing to the next release be inversely
|
||||
|
|
@ -114,8 +115,12 @@ def albums_in_dir(path):
|
|||
|
||||
def _string_dist_basic(str1, str2):
|
||||
"""Basic edit distance between two strings, ignoring
|
||||
non-alphanumeric characters and case. Normalized by string length.
|
||||
non-alphanumeric characters and case. Comparisons are based on a
|
||||
transliteration/lowering to ASCII characters. Normalized by string
|
||||
length.
|
||||
"""
|
||||
str1 = unidecode(str1)
|
||||
str2 = unidecode(str2)
|
||||
str1 = re.sub(r'[^a-z0-9]', '', str1.lower())
|
||||
str2 = re.sub(r'[^a-z0-9]', '', str2.lower())
|
||||
if not str1 and not str2:
|
||||
|
|
|
|||
1
setup.py
1
setup.py
|
|
@ -51,6 +51,7 @@ setup(name='beets',
|
|||
'mutagen',
|
||||
'python-musicbrainz2 >= 0.7.2',
|
||||
'munkres',
|
||||
'unidecode',
|
||||
],
|
||||
|
||||
classifiers=[
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
# This file is part of beets.
|
||||
# Copyright 2010, Adrian Sampson.
|
||||
# Copyright 2011, Adrian Sampson.
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining
|
||||
# a copy of this software and associated documentation files (the
|
||||
|
|
@ -14,7 +14,6 @@
|
|||
|
||||
"""Tests for autotagging functionality.
|
||||
"""
|
||||
|
||||
import unittest
|
||||
import os
|
||||
import shutil
|
||||
|
|
@ -495,6 +494,10 @@ class StringDistanceTest(unittest.TestCase):
|
|||
dist = autotag.string_dist('And', '&')
|
||||
self.assertEqual(dist, 0.0)
|
||||
|
||||
def test_accented_characters(self):
|
||||
dist = autotag.string_dist(u'\xe9\xe1\xf1', u'ean')
|
||||
self.assertEqual(dist, 0.0)
|
||||
|
||||
def suite():
|
||||
return unittest.TestLoader().loadTestsFromName(__name__)
|
||||
|
||||
|
|
|
|||
Loading…
Reference in a new issue