Source code for stringcompare.distance.levenshtein

import numpy as np
from .comparator import StringComparator


[docs]def levenshtein(s, t): m = len(s) n = len(t) buffer = np.arange(max(n, m) + 1) p = m for j in range(1, n + 1): temp = j - 1 p = j for i in range(1, m + 1): p = min(p + 1, buffer[i] + 1, temp + (s[i - 1] != t[j - 1])) temp = buffer[i] buffer[i] = p return p
[docs]class Levenshtein(StringComparator): def __init__(self, normalize=True, similarity=False, dmat_size=100): self.normalize = normalize self.similarity = similarity
[docs] def compare(self, s, t): size = len(s) + len(t) if size == 0: return 1 * self.similarity dist = levenshtein(s, t) if self.similarity: sim = (size - dist) / 2.0 if self.normalize: sim = sim / (size - sim) return sim else: if self.normalize: dist = 2 * dist / (size + dist) return dist