|
7 | 7 |
|
8 | 8 | #its also called levenshtein distance
|
9 | 9 | #it can be done via recursion too
|
| 10 | +#the recursion version is much more elegant yet less efficient |
10 | 11 | # https://github.com/je-suis-tm/recursion/blob/master/edit%20distance%20recursion.py
|
11 | 12 |
|
12 |
| -#edit distance is to minimumize the steps transfering one string to another |
13 |
| -#the way to solve this problem, is very similar is to knapsack |
| 13 | +#edit distance is to minimize the steps transforming one string to another |
| 14 | +#the way to solve this problem is very similar is to knapsack |
14 | 15 | #assume we have two strings a and b
|
15 | 16 | #we build a matrix len(a)*len(b)
|
16 |
| -#however, given lists start at index zero |
| 17 | +#however,given lists start at index zero |
17 | 18 | #our matrix should be (len(a)+1)*(len(b)+1)
|
18 | 19 |
|
19 | 20 | #there are three different ways to transform a string
|
20 |
| -#insert, delete and replace |
| 21 | +#insert,delete and replace |
21 | 22 | #we can use any of them or combined
|
22 | 23 | #lets take a look at the best case first
|
23 | 24 | #assume string a is string b
|
|
27 | 28 | #when string a has nothing in common with string b
|
28 | 29 | #we would have to replace the whole string a
|
29 | 30 | #the steps become the maximum step which is max(len(a),len(b))
|
30 |
| -#for general case, the steps would fall between the worst and the best case |
| 31 | +#for general case,the number of steps would fall between the worst and the best case |
31 | 32 | #assume we are at mth letter of string a and nth letter string b
|
32 |
| -#if we wanna get the optimized steps of transforming string a to string b |
| 33 | +#if we wanna get the optimal steps of transforming string a to string b |
33 | 34 | #we have to make sure at each letter transformation
|
34 |
| -#a[:m] and b[:n] have reached their optimization |
35 |
| -#otherwise, we could always find another combinations of insert, delete and replace |
36 |
| -#to get a more optimized a[:m] and b[:n] |
37 |
| -#it would make our string transformation not so optimized any more |
| 35 | +#a[:m] and b[:n] have reached their optimal status |
| 36 | +#otherwise,we could always find another combination of insert,delete and replace |
| 37 | +#to get a "real" optimal a[:m] and b[:n] |
| 38 | +#it would make our string transformation not so optimal any more |
38 | 39 | #it is the same logic as the optimization of knapsack problem
|
39 | 40 | #after we set our logic straight
|
40 | 41 | #we would take a look at three different approaches
|
41 | 42 | #lets take a look at insertion
|
42 | 43 | #basically we need to insert nth letter from string b into string a at nth position
|
43 | 44 | #the cumulated steps we have taken should be matrix[m][n-1]+1
|
44 | 45 | #matrix[m][n-1] is the steps for a[:m] to b[:n]
|
45 |
| -#for delete, it is vice versa |
| 46 | +#for delete,it is vice versa |
46 | 47 | #the cumulated steps we have taken should be matrix[m-1][n]+1
|
47 |
| -#for replacement, it is a lil bit tricky |
| 48 | +#for replacement,it is a lil bit tricky |
48 | 49 | #there are two scenarios
|
49 | 50 | #if a[m]==b[n]
|
50 | 51 | #it should be matrix[m-1][n-1]
|
51 | 52 | #we dont need any replacement at all
|
52 |
| -#else, it should be matrix[m-1][n-1]+1 |
| 53 | +#else,it should be matrix[m-1][n-1]+1 |
53 | 54 | #we replace mth letter of string a with nth letter of string b
|
54 | 55 | #after we managed to understand three different approaches
|
55 |
| -#we want to take the minimum steps among these three approaches |
| 56 | +#we want to take the minimum number of steps among these three approaches |
56 | 57 | #throughout the iteration of different positions of both strings
|
57 |
| -#in the end, we would get the optimized steps to transform one string to another, YAY |
58 |
| -def edit(a,b): |
| 58 | +#in the end,we would get the optimal steps to transform one string to another,YAY |
| 59 | +def edit_distance(a,b): |
| 60 | + |
59 | 61 | len_a=len(a)
|
60 | 62 | len_b=len(b)
|
61 | 63 |
|
62 | 64 | #this part is to create a matrix of len(a)*len(b)
|
63 | 65 | #as lists start at index 0
|
64 | 66 | #we get a matrix of (len(a)+1)*(len(b)+1) instead
|
65 |
| - c=[] |
66 |
| - for i in range(len_a+1): |
67 |
| - c.append([0]*(len_b+1)) |
| 67 | + c=[[0]*(len_b+1) for i in range(len_a+1)] |
| 68 | + |
68 | 69 | for j in range(len_a+1):
|
69 | 70 | c[j][0]=j
|
70 | 71 | for k in range(len_b+1):
|
71 | 72 | c[0][k]=k
|
72 | 73 |
|
73 | 74 | #we take iterations on both string a and b
|
74 |
| - #next, we check if a[m]==b[n] |
75 |
| - #if yes, no replacement needed |
76 |
| - #if no, replacement needed |
77 |
| - #we take a minimum functions to see which combinations would give the minimum steps |
| 75 | + #next,we check if a[m]==b[n] |
| 76 | + #if yes,no replacement needed |
| 77 | + #if no,replacement needed |
| 78 | + #we take a minimum function to see which combination would give the minimum steps |
78 | 79 | #eventually we got what we are after
|
79 | 80 | for l in range(1,len_a+1):
|
80 |
| - for m in range(len_b+1): |
| 81 | + for m in range(1,len_b+1): |
81 | 82 | if a[l-1]==b[m-1]:
|
82 | 83 | c[l][m]=min(c[l-1][m]+1,c[l][m-1]+1,c[l-1][m-1])
|
83 | 84 | else:
|
84 | 85 | c[l][m]=min(c[l-1][m]+1,c[l][m-1]+1,c[l-1][m-1]+1)
|
85 | 86 |
|
86 | 87 | return c[len_a][len_b]
|
87 | 88 |
|
88 |
| -#lets get some random strings to test |
89 |
| -import random as rd |
90 |
| - |
91 |
| -temp=rd.randint(1,20) |
92 |
| -temp1=rd.randint(1,20) |
93 |
| -alphabet='abcdefghijklmnopqrstuvwxyz' |
94 |
| -temp2='' |
95 |
| -temp3='' |
96 |
| - |
97 |
| -for n in range(temp): |
98 |
| - temp2+=alphabet[rd.randint(0,25)] |
99 |
| - |
100 |
| -for o in range(temp1): |
101 |
| - temp3+=alphabet[rd.randint(0,25)] |
102 |
| - |
103 |
| - |
104 |
| - |
105 |
| - |
106 |
| -print(temp2,temp3) |
107 |
| -print(edit(temp2,temp3)) |
| 89 | +print(edit_distance('baiseé','bas')) |
0 commit comments