|
| 1 | +using System; |
| 2 | + |
| 3 | +namespace Algorithms.Strings.Similarity |
| 4 | +{ |
| 5 | + /// <summary> |
| 6 | + /// Provides methods to calculate the Optimal String Alignment distance between two strings. |
| 7 | + /// |
| 8 | + /// The Optimal String Alignment distance, also known as the restricted Damerau-Levenshtein distance, |
| 9 | + /// is a string metric used to measure the difference between two sequences. It is similar to the |
| 10 | + /// Levenshtein distance, but it also considers transpositions (swapping of two adjacent characters) |
| 11 | + /// as a single operation. This metric is particularly useful when adjacent characters are commonly |
| 12 | + /// transposed, such as in typographical errors. |
| 13 | + /// |
| 14 | + /// The OSA distance between two strings is defined as the minimum number of operations required to |
| 15 | + /// transform one string into the other, where the operations include: |
| 16 | + /// |
| 17 | + /// 1. Insertion: Adding a single character. |
| 18 | + /// 2. Deletion: Removing a single character. |
| 19 | + /// 3. Substitution: Replacing one character with another. |
| 20 | + /// 4. Transposition: Swapping two adjacent characters (this is what distinguishes OSA from the |
| 21 | + /// traditional Levenshtein distance). |
| 22 | + /// |
| 23 | + /// The OSA distance algorithm ensures that no operation is applied more than once to the same |
| 24 | + /// character in the same position. This is the main difference between the OSA and the more general |
| 25 | + /// Damerau-Levenshtein distance, which does not have this restriction. |
| 26 | + /// |
| 27 | + /// <example> |
| 28 | + /// Example Usage: |
| 29 | + /// <code> |
| 30 | + /// int distance = OptimalStringAlignmentDistance("example", "exmaple"); |
| 31 | + /// Console.WriteLine(distance); // Output: 1 |
| 32 | + /// </code> |
| 33 | + /// In this example, the strings "example" and "exmaple" differ by one transposition of adjacent characters ('a' and 'm'), |
| 34 | + /// so the OSA distance is 1. |
| 35 | + /// |
| 36 | + /// <code> |
| 37 | + /// int distance = OptimalStringAlignmentDistance("kitten", "sitting"); |
| 38 | + /// Console.WriteLine(distance); // Output: 3 |
| 39 | + /// </code> |
| 40 | + /// Here, the strings "kitten" and "sitting" have three differences (substitutions 'k' to 's', 'e' to 'i', and insertion of 'g'), |
| 41 | + /// resulting in an OSA distance of 3. |
| 42 | + /// </example> |
| 43 | + /// </summary> |
| 44 | + /// <remarks> |
| 45 | + /// This algorithm has a time complexity of O(n * m), where n and m are the lengths of the two input strings. |
| 46 | + /// It is efficient for moderate-sized strings but may become computationally expensive for very long strings. |
| 47 | + /// </remarks> |
| 48 | + public static class OptimalStringAlignment |
| 49 | + { |
| 50 | + /// <summary> |
| 51 | + /// Calculates the Optimal String Alignment distance between two strings. |
| 52 | + /// </summary> |
| 53 | + /// <param name="firstString">The first string.</param> |
| 54 | + /// <param name="secondString">The second string.</param> |
| 55 | + /// <returns>The Optimal String Alignment distance between the two strings.</returns> |
| 56 | + /// <exception cref="ArgumentNullException">Thrown when either of the input strings is null.</exception> |
| 57 | + public static double Calculate(string firstString, string secondString) |
| 58 | + { |
| 59 | + ArgumentNullException.ThrowIfNull(nameof(firstString)); |
| 60 | + ArgumentNullException.ThrowIfNull(nameof(secondString)); |
| 61 | + |
| 62 | + if (firstString == secondString) |
| 63 | + { |
| 64 | + return 0.0; |
| 65 | + } |
| 66 | + |
| 67 | + if (firstString.Length == 0) |
| 68 | + { |
| 69 | + return secondString.Length; |
| 70 | + } |
| 71 | + |
| 72 | + if (secondString.Length == 0) |
| 73 | + { |
| 74 | + return firstString.Length; |
| 75 | + } |
| 76 | + |
| 77 | + var distanceMatrix = GenerateDistanceMatrix(firstString.Length, secondString.Length); |
| 78 | + distanceMatrix = CalculateDistance(firstString, secondString, distanceMatrix); |
| 79 | + |
| 80 | + return distanceMatrix[firstString.Length, secondString.Length]; |
| 81 | + } |
| 82 | + |
| 83 | + /// <summary> |
| 84 | + /// Generates the initial distance matrix for the given lengths of the two strings. |
| 85 | + /// </summary> |
| 86 | + /// <param name="firstLength">The length of the first string.</param> |
| 87 | + /// <param name="secondLength">The length of the second string.</param> |
| 88 | + /// <returns>The initialized distance matrix.</returns> |
| 89 | + private static int[,] GenerateDistanceMatrix(int firstLength, int secondLength) |
| 90 | + { |
| 91 | + var distanceMatrix = new int[firstLength + 2, secondLength + 2]; |
| 92 | + |
| 93 | + for (var i = 0; i <= firstLength; i++) |
| 94 | + { |
| 95 | + distanceMatrix[i, 0] = i; |
| 96 | + } |
| 97 | + |
| 98 | + for (var j = 0; j <= secondLength; j++) |
| 99 | + { |
| 100 | + distanceMatrix[0, j] = j; |
| 101 | + } |
| 102 | + |
| 103 | + return distanceMatrix; |
| 104 | + } |
| 105 | + |
| 106 | + /// <summary> |
| 107 | + /// Calculates the distance matrix for the given strings using the Optimal String Alignment algorithm. |
| 108 | + /// </summary> |
| 109 | + /// <param name="firstString">The first string.</param> |
| 110 | + /// <param name="secondString">The second string.</param> |
| 111 | + /// <param name="distanceMatrix">The initial distance matrix.</param> |
| 112 | + /// <returns>The calculated distance matrix.</returns> |
| 113 | + private static int[,] CalculateDistance(string firstString, string secondString, int[,] distanceMatrix) |
| 114 | + { |
| 115 | + for (var i = 1; i <= firstString.Length; i++) |
| 116 | + { |
| 117 | + for (var j = 1; j <= secondString.Length; j++) |
| 118 | + { |
| 119 | + var cost = 1; |
| 120 | + |
| 121 | + if (firstString[i - 1] == secondString[j - 1]) |
| 122 | + { |
| 123 | + cost = 0; |
| 124 | + } |
| 125 | + |
| 126 | + distanceMatrix[i, j] = Minimum( |
| 127 | + distanceMatrix[i - 1, j - 1] + cost, // substitution |
| 128 | + distanceMatrix[i, j - 1] + 1, // insertion |
| 129 | + distanceMatrix[i - 1, j] + 1); // deletion |
| 130 | + |
| 131 | + if (i > 1 && j > 1 |
| 132 | + && firstString[i - 1] == secondString[j - 2] |
| 133 | + && firstString[i - 2] == secondString[j - 1]) |
| 134 | + { |
| 135 | + distanceMatrix[i, j] = Math.Min( |
| 136 | + distanceMatrix[i, j], |
| 137 | + distanceMatrix[i - 2, j - 2] + cost); // transposition |
| 138 | + } |
| 139 | + } |
| 140 | + } |
| 141 | + |
| 142 | + return distanceMatrix; |
| 143 | + } |
| 144 | + |
| 145 | + /// <summary> |
| 146 | + /// Returns the minimum of three integers. |
| 147 | + /// </summary> |
| 148 | + /// <param name="a">The first integer.</param> |
| 149 | + /// <param name="b">The second integer.</param> |
| 150 | + /// <param name="c">The third integer.</param> |
| 151 | + /// <returns>The minimum of the three integers.</returns> |
| 152 | + private static int Minimum(int a, int b, int c) |
| 153 | + { |
| 154 | + return Math.Min(a, Math.Min(b, c)); |
| 155 | + } |
| 156 | + } |
| 157 | +} |
0 commit comments