rust_dsa/
levenshtein.rs

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
use std::{cmp, iter, mem};

/// Returns the [Levenshtein distance](https://en.wikipedia.org/wiki/Levenshtein_distance)
/// between two slices.
///
/// # Example
/// ```
/// use rust_dsa::edit_distance;
///
/// let a = [9, 4, 8, 5, 9, 3, 8, 5];
/// let b = [1, 9, 4, 8, 3, 5];
/// assert_eq!(edit_distance(&a, &b), 4);
///
/// let kitten = ['k', 'i', 't', 't', 'e', 'n'];
/// let sitting = ['s', 'i', 't', 't', 'i', 'n', 'g'];
/// assert_eq!(edit_distance(&kitten, &sitting), 3);
///
/// let x = ["foo", "bar", "baz", "baz"];
/// let y = ["baz", "foo", "bar", "baz"];
/// assert_eq!(edit_distance(&x, &y), 2);
/// ```
pub fn edit_distance<T>(a: &[T], b: &[T]) -> usize
where
    T: PartialEq,
{
    if a.is_empty() {
        return b.len();
    } else if b.is_empty() {
        return a.len();
    }

    let mut old_row: Vec<_> = (0..(b.len() + 1)).collect();
    let mut new_row: Vec<_> = iter::repeat(0).take(b.len() + 1).collect();

    #[allow(clippy::needless_range_loop)]
    for i in 0..a.len() {
        new_row[0] = i + 1;
        for j in 0..b.len() {
            let ne = a[i] != b[j];
            new_row[j + 1] = min(new_row[j] + 1, old_row[j + 1] + 1, old_row[j] + ne as usize);
        }
        mem::swap(&mut new_row, &mut old_row);
    }

    old_row[b.len()]
}

/// Returns the [Levenshtein distance](https://en.wikipedia.org/wiki/Levenshtein_distance)
/// between two [`str`]s.
///
/// # Example
/// ```
/// use rust_dsa::str_distance;
///
/// assert_eq!(str_distance("kitten", "sitting"), 3);
///
/// assert_eq!(str_distance("intention", "execution"), 5);
///
/// assert_eq!(str_distance("sail", "wail"), 1);
///
/// assert_eq!(str_distance("Levenshtein", "Levenshtein"), 0);
///
/// assert_eq!(str_distance("", "foo"), 3);
/// ```
pub fn str_distance(a: &str, b: &str) -> usize {
    let a: Vec<_> = a.chars().collect();
    let b: Vec<_> = b.chars().collect();

    edit_distance(&a, &b)
}

fn min<T>(a: T, b: T, c: T) -> T
where
    T: Ord,
{
    cmp::min(cmp::min(a, b), c)
}