fix a bug where string heuristics could penalize

This commit is contained in:
Adrian Sampson 2010-09-22 20:23:36 -07:00
parent 75cd929cda
commit ea26e6660e
2 changed files with 7 additions and 1 deletions

View file

@ -198,7 +198,9 @@ def string_dist(str1, str2):
# the current case), recalculate the distances for the
# modified strings.
case_dist = _string_dist_basic(case_str1, case_str2)
case_delta = max(0, base_dist - case_dist)
case_delta = max(0.0, base_dist - case_dist)
if case_delta == 0.0:
continue
# Shift our baseline strings down (to avoid rematching the
# same part of the string) and add a scaled distance

View file

@ -305,6 +305,10 @@ class StringDistanceTest(unittest.TestCase):
autotag.string_dist('(EP)', '(EP)')
autotag.string_dist(', An', '')
def test_heuristic_does_not_harm_distance(self):
dist = autotag.string_dist('Untitled', '[Untitled]')
self.assertEqual(dist, 0.0)
def suite():
return unittest.TestLoader().loadTestsFromName(__name__)