Source code for stringcompare.distance.jaro

from .comparator import StringComparator


[docs]def jaro(s, t): if len(s) + len(t) == 0: return 1.0 # Implementation is from https://rosettacode.org/wiki/Jaro_similarity#Python window = max(1, max(len(s), len(t)) // 2 - 1) m = 0 found_s = len(s) * [False] found_t = len(t) * [False] for i, si in enumerate(s): for j, tj in enumerate(t): if (abs(i - j) < window) and (not found_t[j]) and si == tj: m = m + 1 found_s[i] = True found_t[j] = True break if m == 0: return 0.0 transpositions = 0 j = 0 for i, si in enumerate(s): if found_s[i]: while not found_t[j]: j = j + 1 if s[i] != t[j]: transpositions += 1 j = j + 1 return (m / len(s) + m / len(t) + (m - transpositions / 2.0) / m) / 3.0
[docs]class Jaro(StringComparator): def __init__(self, similarity=False): self.similarity = similarity
[docs] def compare(self, s, t): if self.similarity: return jaro(s, t) else: return 1.0 - jaro(s, t)