forked from feature23/StringSimilarity.NET
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathNormalizedLevenshtein.cs
More file actions
114 lines (103 loc) · 5.13 KB
/
NormalizedLevenshtein.cs
File metadata and controls
114 lines (103 loc) · 5.13 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
/*
* The MIT License
*
* Copyright 2016 feature[23]
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
using System;
using F23.StringSimilarity.Interfaces;
namespace F23.StringSimilarity
{
/// <summary>
/// This distance is computed as levenshtein distance divided by the length of
/// the longest string. The resulting value is always in the interval [0.0 1.0]
/// but it is not a metric anymore! The similarity is computed as 1 - normalized
/// distance.
/// </summary>
public class NormalizedLevenshtein : INormalizedStringDistance, INormalizedStringSimilarity, INormalizedSpanDistance, INormalizedSpanSimilarity
{
private readonly Levenshtein l = new Levenshtein();
/// <summary>
/// Compute distance as Levenshtein(s1, s2) / max(|s1|, |s2|).
/// </summary>
/// <param name="s1">The first string to compare.</param>
/// <param name="s2">The second string to compare.</param>
/// <returns>The computed distance in the range [0, 1]</returns>
/// <exception cref="ArgumentNullException">If s1 or s2 is null.</exception>
public double Distance(string s1, string s2)
=> Distance(s1.AsSpan(), s2.AsSpan());
/// <summary>
/// Calculates the normalized distance between two sequences of elements.
/// </summary>
/// <remarks>The distance is normalized by the length of the longer sequence. This ensures the
/// result is always in the range [0.0, 1.0], where 0.0 indicates identical sequences and 1.0 indicates
/// maximum dissimilarity.</remarks>
/// <typeparam name="T">The type of elements in the sequences. Must implement <see cref="IEquatable{T}"/>.</typeparam>
/// <param name="s1">The first sequence to compare. Cannot be empty or null.</param>
/// <param name="s2">The second sequence to compare. Cannot be empty or null.</param>
/// <returns>A double value representing the normalized distance between the two sequences. Returns 0.0 if the sequences
/// are equal or both are empty.</returns>
/// <exception cref="ArgumentNullException">Thrown if <paramref name="s1"/> or <paramref name="s2"/> is null.</exception>
public double Distance<T>(ReadOnlySpan<T> s1, ReadOnlySpan<T> s2)
where T : IEquatable<T>
{
if (s1 == null)
{
throw new ArgumentNullException(nameof(s1));
}
if (s2 == null)
{
throw new ArgumentNullException(nameof(s2));
}
if (s1.SequenceEqual(s2))
{
return 0.0;
}
int m_len = Math.Max(s1.Length, s2.Length);
if (m_len == 0)
{
return 0.0;
}
return l.Distance(s1, s2) / m_len;
}
/// <summary>
/// Return 1 - distance.
/// </summary>
/// <param name="s1">The first string to compare.</param>
/// <param name="s2">The second string to compare.</param>
/// <returns>1 - distance</returns>
/// <exception cref="ArgumentNullException">If s1 or s2 is null.</exception>
public double Similarity(string s1, string s2)
=> 1.0 - Distance(s1, s2);
/// <summary>
/// Calculates the similarity between two sequences based on their distance.
/// </summary>
/// <remarks>The similarity is calculated as 1.0 minus the distance between the two
/// sequences.</remarks>
/// <typeparam name="T">The type of elements in the sequences. Must implement <see cref="IEquatable{T}"/>.</typeparam>
/// <param name="s1">The first sequence to compare.</param>
/// <param name="s2">The second sequence to compare.</param>
/// <returns>A value between 0.0 and 1.0 representing the similarity of the two sequences, where 1.0 indicates identical
/// sequences and 0.0 indicates completely dissimilar sequences.</returns>
public double Similarity<T>(ReadOnlySpan<T> s1, ReadOnlySpan<T> s2)
where T : IEquatable<T>
=> 1.0 - Distance(s1, s2);
}
}