Skip to content

Commit 1e4769b

Browse files
committed
csv_diff addition command line arguments
1 parent ee6a3ab commit 1e4769b

File tree

2 files changed

+92
-2
lines changed

2 files changed

+92
-2
lines changed

csv_diff.py

Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,84 @@
1+
import numpy as np
2+
from scipy.stats.stats import pearsonr
3+
import sys
4+
5+
print("Difference Parameters")
6+
mzdiff=0
7+
rtdiff=0
8+
corrthresh=0.99
9+
print("Median MZ Diff: "+ str(mzdiff))
10+
print("Median RT Diff: "+ str(rtdiff))
11+
print("Correlation Threshold: "+ str(corrthresh))
12+
if(len(sys.argv)<3):
13+
print("python csv_diff.csv file1.csv file2.csv")
14+
sys.exit(0)
15+
16+
print("Programs gives the differences in file 2 with respect to file 1")
17+
file1=sys.argv[1]
18+
file2=sys.argv[2]
19+
20+
21+
line_array1=[]
22+
vec_array1=[]
23+
with open(file1) as f:
24+
first=0
25+
for line in f:
26+
line=line.rstrip('\n')
27+
if(first==0):
28+
first=1
29+
table_labels=line.split(',')
30+
print("Removing Blanks")
31+
continue
32+
temp_array=line.split(',')
33+
t=5
34+
while(t!=0):
35+
temp_array.remove('')
36+
t-=1
37+
line_array1.append(map(float,temp_array))
38+
temp_array=temp_array[9:]
39+
vec_array1.append(map(float,temp_array))
40+
41+
line_array2=[]
42+
vec_array2=[]
43+
with open(file2) as f:
44+
first=0
45+
for line in f:
46+
line=line.rstrip('\n')
47+
if(first==0):
48+
first=1
49+
table_labels=line.split(',')
50+
print("Removing Blanks")
51+
continue
52+
temp_array=line.split(',')
53+
t=5
54+
while(t!=0):
55+
temp_array.remove('')
56+
t-=1
57+
line_array2.append(map(float,temp_array))
58+
temp_array=temp_array[9:]
59+
vec_array2.append(map(float,temp_array))
60+
61+
line_array1=np.array(line_array1)
62+
line_array2=np.array(line_array2)
63+
vec_array1=np.array(vec_array1)
64+
vec_array2=np.array(vec_array2)
65+
print(vec_array1.shape)
66+
print(vec_array2.shape)
67+
reported_difference=[]
68+
for i in range(len(line_array2)):
69+
flag=0
70+
for j in range(len(line_array1)):
71+
if(abs(line_array1[j][3]-line_array2[i][3])<=mzdiff and abs(line_array1[j][4]-line_array2[i][4])<=rtdiff):
72+
first_corr=pearsonr(vec_array2[i],vec_array1[j])[0]
73+
if(first_corr==1):
74+
flag=1
75+
if(flag==0):
76+
reported_difference.append(line_array2[i])
77+
78+
print(len(reported_difference))
79+
# for diff in reported_difference:
80+
# print(diff)
81+
82+
83+
84+

metric_euc.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import numpy as np
22
import scipy.spatial.distance as eucd
33
from scipy.stats.stats import pearsonr
4+
import sys
45

56
line_array=[]
67
vec_array=[]
@@ -12,7 +13,12 @@
1213
print("Median RT Diff: "+ str(rtdiff))
1314
print("Correlation Threshold: "+ str(corrthresh))
1415
table_labels=0
15-
with open('../csvFiles/test1.csv') as f:
16+
17+
if(len(sys.argv)<3):
18+
print("python metric_euc.py csv_file.csv duplicate_file.csv")
19+
sys.exit(1)
20+
21+
with open(sys.argv[1]) as f:
1622
first=0
1723
for line in f:
1824
line=line.rstrip('\n')
@@ -67,7 +73,7 @@
6773

6874

6975
write_flag=0
70-
with open('../csvFiles/duplicates1.csv', 'w') as f:
76+
with open(sys.argv[2], 'w') as f:
7177
f.write('duplicate_id,'+dell.join(table_labels)+'\n')
7278
ind=1
7379
for t in tup_duplicates:

0 commit comments

Comments
 (0)