mirror of
https://github.com/beetbox/beets.git
synced 2026-02-25 16:53:17 +01:00
Use Levenshtein distance from Jellyfish library
Its about two orders of magnitude faster than current 'handrolled' one See #646
This commit is contained in:
parent
882723a0bf
commit
57196e643a
4 changed files with 4 additions and 25 deletions
|
|
@ -23,7 +23,7 @@ from beets import logging
|
|||
from beets import plugins
|
||||
from beets import config
|
||||
from beets.autotag import mb
|
||||
from beets.util import levenshtein
|
||||
from jellyfish import levenshtein_distance
|
||||
from unidecode import unidecode
|
||||
|
||||
log = logging.getLogger('beets')
|
||||
|
|
@ -209,7 +209,7 @@ def _string_dist_basic(str1, str2):
|
|||
str2 = re.sub(r'[^a-z0-9]', '', str2.lower())
|
||||
if not str1 and not str2:
|
||||
return 0.0
|
||||
return levenshtein(str1, str2) / float(max(len(str1), len(str2)))
|
||||
return levenshtein_distance(str1, str2) / float(max(len(str1), len(str2)))
|
||||
|
||||
|
||||
def string_dist(str1, str2):
|
||||
|
|
|
|||
|
|
@ -564,29 +564,6 @@ def as_string(value):
|
|||
return unicode(value)
|
||||
|
||||
|
||||
def levenshtein(s1, s2):
|
||||
"""A nice DP edit distance implementation from Wikibooks:
|
||||
http://en.wikibooks.org/wiki/Algorithm_implementation/Strings/
|
||||
Levenshtein_distance#Python
|
||||
"""
|
||||
if len(s1) < len(s2):
|
||||
return levenshtein(s2, s1)
|
||||
if not s1:
|
||||
return len(s2)
|
||||
|
||||
previous_row = xrange(len(s2) + 1)
|
||||
for i, c1 in enumerate(s1):
|
||||
current_row = [i + 1]
|
||||
for j, c2 in enumerate(s2):
|
||||
insertions = previous_row[j + 1] + 1
|
||||
deletions = current_row[j] + 1
|
||||
substitutions = previous_row[j] + (c1 != c2)
|
||||
current_row.append(min(insertions, deletions, substitutions))
|
||||
previous_row = current_row
|
||||
|
||||
return previous_row[-1]
|
||||
|
||||
|
||||
def plurality(objs):
|
||||
"""Given a sequence of comparable objects, returns the object that
|
||||
is most common in the set and the frequency of that object. The
|
||||
|
|
|
|||
1
setup.py
1
setup.py
|
|
@ -82,6 +82,7 @@ setup(
|
|||
'unidecode',
|
||||
'musicbrainzngs>=0.4',
|
||||
'pyyaml',
|
||||
'jellyfish',
|
||||
] + (['colorama'] if (sys.platform == 'win32') else []) +
|
||||
(['ordereddict'] if sys.version_info < (2, 7, 0) else []),
|
||||
|
||||
|
|
|
|||
1
tox.ini
1
tox.ini
|
|
@ -18,6 +18,7 @@ deps =
|
|||
responses
|
||||
pathlib
|
||||
pyxdg
|
||||
jellyfish
|
||||
commands =
|
||||
nosetests {posargs}
|
||||
|
||||
|
|
|
|||
Loading…
Reference in a new issue