Skip to content

Commit 5eb0254

Browse files
authored
Add Optimal String Alignment (OSA) Distance Algorithm (#464)
1 parent 6b37d04 commit 5eb0254

File tree

3 files changed

+232
-0
lines changed

3 files changed

+232
-0
lines changed
Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
using Algorithms.Strings.Similarity;
2+
using FluentAssertions;
3+
using NUnit.Framework;
4+
using System;
5+
6+
namespace Algorithms.Tests.Strings.Similarity
7+
{
8+
[TestFixture]
9+
public class OptimalStringAlignmentTests
10+
{
11+
[Test]
12+
public void Calculate_IdenticalStrings_ReturnsZero()
13+
{
14+
var result = OptimalStringAlignment.Calculate("example", "example");
15+
result.Should().Be(0.0);
16+
}
17+
18+
[Test]
19+
public void Calculate_FirstStringEmpty_ReturnsLengthOfSecondString()
20+
{
21+
var result = OptimalStringAlignment.Calculate("", "example");
22+
result.Should().Be("example".Length);
23+
}
24+
25+
[Test]
26+
public void Calculate_SecondStringEmpty_ReturnsLengthOfFirstString()
27+
{
28+
var result = OptimalStringAlignment.Calculate("example", "");
29+
result.Should().Be("example".Length);
30+
}
31+
32+
[Test]
33+
public void Calculate_BothStringsEmpty_ReturnsZero()
34+
{
35+
var result = OptimalStringAlignment.Calculate("", "");
36+
result.Should().Be(0.0);
37+
}
38+
39+
[Test]
40+
public void Calculate_OneInsertion_ReturnsOne()
41+
{
42+
var result = OptimalStringAlignment.Calculate("example", "examples");
43+
result.Should().Be(1.0);
44+
}
45+
46+
[Test]
47+
public void Calculate_OneDeletion_ReturnsOne()
48+
{
49+
var result = OptimalStringAlignment.Calculate("examples", "example");
50+
result.Should().Be(1.0);
51+
}
52+
53+
[Test]
54+
public void Calculate_OneSubstitution_ReturnsOne()
55+
{
56+
var result = OptimalStringAlignment.Calculate("example", "exbmple");
57+
result.Should().Be(1.0);
58+
}
59+
60+
[Test]
61+
public void Calculate_OneTransposition_ReturnsOne()
62+
{
63+
var result = OptimalStringAlignment.Calculate("example", "exmaple");
64+
result.Should().Be(1.0);
65+
}
66+
67+
[Test]
68+
public void Calculate_MultipleOperations_ReturnsCorrectDistance()
69+
{
70+
var result = OptimalStringAlignment.Calculate("kitten", "sitting");
71+
result.Should().Be(3.0);
72+
}
73+
}
74+
}
Lines changed: 157 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,157 @@
1+
using System;
2+
3+
namespace Algorithms.Strings.Similarity
4+
{
5+
/// <summary>
6+
/// Provides methods to calculate the Optimal String Alignment distance between two strings.
7+
///
8+
/// The Optimal String Alignment distance, also known as the restricted Damerau-Levenshtein distance,
9+
/// is a string metric used to measure the difference between two sequences. It is similar to the
10+
/// Levenshtein distance, but it also considers transpositions (swapping of two adjacent characters)
11+
/// as a single operation. This metric is particularly useful when adjacent characters are commonly
12+
/// transposed, such as in typographical errors.
13+
///
14+
/// The OSA distance between two strings is defined as the minimum number of operations required to
15+
/// transform one string into the other, where the operations include:
16+
///
17+
/// 1. Insertion: Adding a single character.
18+
/// 2. Deletion: Removing a single character.
19+
/// 3. Substitution: Replacing one character with another.
20+
/// 4. Transposition: Swapping two adjacent characters (this is what distinguishes OSA from the
21+
/// traditional Levenshtein distance).
22+
///
23+
/// The OSA distance algorithm ensures that no operation is applied more than once to the same
24+
/// character in the same position. This is the main difference between the OSA and the more general
25+
/// Damerau-Levenshtein distance, which does not have this restriction.
26+
///
27+
/// <example>
28+
/// Example Usage:
29+
/// <code>
30+
/// int distance = OptimalStringAlignmentDistance("example", "exmaple");
31+
/// Console.WriteLine(distance); // Output: 1
32+
/// </code>
33+
/// In this example, the strings "example" and "exmaple" differ by one transposition of adjacent characters ('a' and 'm'),
34+
/// so the OSA distance is 1.
35+
///
36+
/// <code>
37+
/// int distance = OptimalStringAlignmentDistance("kitten", "sitting");
38+
/// Console.WriteLine(distance); // Output: 3
39+
/// </code>
40+
/// Here, the strings "kitten" and "sitting" have three differences (substitutions 'k' to 's', 'e' to 'i', and insertion of 'g'),
41+
/// resulting in an OSA distance of 3.
42+
/// </example>
43+
/// </summary>
44+
/// <remarks>
45+
/// This algorithm has a time complexity of O(n * m), where n and m are the lengths of the two input strings.
46+
/// It is efficient for moderate-sized strings but may become computationally expensive for very long strings.
47+
/// </remarks>
48+
public static class OptimalStringAlignment
49+
{
50+
/// <summary>
51+
/// Calculates the Optimal String Alignment distance between two strings.
52+
/// </summary>
53+
/// <param name="firstString">The first string.</param>
54+
/// <param name="secondString">The second string.</param>
55+
/// <returns>The Optimal String Alignment distance between the two strings.</returns>
56+
/// <exception cref="ArgumentNullException">Thrown when either of the input strings is null.</exception>
57+
public static double Calculate(string firstString, string secondString)
58+
{
59+
ArgumentNullException.ThrowIfNull(nameof(firstString));
60+
ArgumentNullException.ThrowIfNull(nameof(secondString));
61+
62+
if (firstString == secondString)
63+
{
64+
return 0.0;
65+
}
66+
67+
if (firstString.Length == 0)
68+
{
69+
return secondString.Length;
70+
}
71+
72+
if (secondString.Length == 0)
73+
{
74+
return firstString.Length;
75+
}
76+
77+
var distanceMatrix = GenerateDistanceMatrix(firstString.Length, secondString.Length);
78+
distanceMatrix = CalculateDistance(firstString, secondString, distanceMatrix);
79+
80+
return distanceMatrix[firstString.Length, secondString.Length];
81+
}
82+
83+
/// <summary>
84+
/// Generates the initial distance matrix for the given lengths of the two strings.
85+
/// </summary>
86+
/// <param name="firstLength">The length of the first string.</param>
87+
/// <param name="secondLength">The length of the second string.</param>
88+
/// <returns>The initialized distance matrix.</returns>
89+
private static int[,] GenerateDistanceMatrix(int firstLength, int secondLength)
90+
{
91+
var distanceMatrix = new int[firstLength + 2, secondLength + 2];
92+
93+
for (var i = 0; i <= firstLength; i++)
94+
{
95+
distanceMatrix[i, 0] = i;
96+
}
97+
98+
for (var j = 0; j <= secondLength; j++)
99+
{
100+
distanceMatrix[0, j] = j;
101+
}
102+
103+
return distanceMatrix;
104+
}
105+
106+
/// <summary>
107+
/// Calculates the distance matrix for the given strings using the Optimal String Alignment algorithm.
108+
/// </summary>
109+
/// <param name="firstString">The first string.</param>
110+
/// <param name="secondString">The second string.</param>
111+
/// <param name="distanceMatrix">The initial distance matrix.</param>
112+
/// <returns>The calculated distance matrix.</returns>
113+
private static int[,] CalculateDistance(string firstString, string secondString, int[,] distanceMatrix)
114+
{
115+
for (var i = 1; i <= firstString.Length; i++)
116+
{
117+
for (var j = 1; j <= secondString.Length; j++)
118+
{
119+
var cost = 1;
120+
121+
if (firstString[i - 1] == secondString[j - 1])
122+
{
123+
cost = 0;
124+
}
125+
126+
distanceMatrix[i, j] = Minimum(
127+
distanceMatrix[i - 1, j - 1] + cost, // substitution
128+
distanceMatrix[i, j - 1] + 1, // insertion
129+
distanceMatrix[i - 1, j] + 1); // deletion
130+
131+
if (i > 1 && j > 1
132+
&& firstString[i - 1] == secondString[j - 2]
133+
&& firstString[i - 2] == secondString[j - 1])
134+
{
135+
distanceMatrix[i, j] = Math.Min(
136+
distanceMatrix[i, j],
137+
distanceMatrix[i - 2, j - 2] + cost); // transposition
138+
}
139+
}
140+
}
141+
142+
return distanceMatrix;
143+
}
144+
145+
/// <summary>
146+
/// Returns the minimum of three integers.
147+
/// </summary>
148+
/// <param name="a">The first integer.</param>
149+
/// <param name="b">The second integer.</param>
150+
/// <param name="c">The third integer.</param>
151+
/// <returns>The minimum of the three integers.</returns>
152+
private static int Minimum(int a, int b, int c)
153+
{
154+
return Math.Min(a, Math.Min(b, c));
155+
}
156+
}
157+
}

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -185,6 +185,7 @@ find more than one implementation for the same objective but using different alg
185185
* [Hamming Distance](./Algorithms/Strings/Similarity/HammingDistance.cs)
186186
* [Jaro Similarity](./Algorithms/Strings/Similarity/JaroSimilarity.cs)
187187
* [Jaro-Winkler Distance](./Algorithms/Strings/Similarity/JaroWinklerDistance.cs)
188+
* [Optimal String Alignment](./Algorithms/Strings/Similarity/OptimalStringAlignment.cs)
188189
* [Pattern Matching](./Algorithms/Strings/PatternMatching/)
189190
* [Bitop Pattern Matching](./Algorithms/Strings/PatternMatching/Bitap.cs)
190191
* [Naive String Search](./Algorithms/Strings/PatternMatching/NaiveStringSearch.cs)

0 commit comments

Comments
 (0)