Skip to content

Commit 4e4f048

Browse files
authored
Add files via upload
1 parent e462f43 commit 4e4f048

4 files changed

+69
-10
lines changed

directpath-comparison-experiment.py

Lines changed: 16 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11

22
import re
33
from czno import getMinBarrierPathDijkstra, basePairList, energyOfStr
4+
from previous_direct_methods import MorganHiggs1998GreedyDirect, Voss2004GreedyDirect
45

56
def HammingDistance(structure1, structure2):
67
bp1 = set(basePairList(structure1))
@@ -33,14 +34,24 @@ def PathwayToBarrier(sequence, pathway):
3334
return max([energyOfStr(sequence, s) for s in pathway])
3435

3536

36-
def SingleExperiment(sequence, structure1, structure2):
37+
def DirectPathSingleExperiment(sequence, structure1, structure2):
3738

3839
result = []
3940

40-
pathway = getMinBarrierPathDijkstra(sequence, structure1, structure2)
41-
result.append(max([energyOfStr(sequence, s) for s in pathway]))
41+
pathway1 = getMinBarrierPathDijkstra(sequence, structure1, structure2)
42+
result.append(PathwayToBarrier(sequence, pathway1))
43+
44+
pathway2 = MorganHiggs1998GreedyDirect(sequence, structure1, structure2)
45+
result.append(PathwayToBarrier(sequence, pathway2))
46+
47+
pathway3 = Voss2004GreedyDirect(sequence, structure1, structure2)
48+
result.append(PathwayToBarrier(sequence, pathway3))
4249

43-
#TODO: ここで、先行手法をsubprocessで叩いて結果を得てエネルギーバリアを求める
50+
best_value4 = float("inf")
51+
for i in range(1000):
52+
pathway4 = Voss2004GreedyDirect(sequence, structure1, structure2, 10, i)
53+
best_value4 = min(best_value4, PathwayToBarrier(sequence, pathway4))
54+
result.append(best_value4)
4455

4556
return result
4657

@@ -52,6 +63,6 @@ def SingleExperiment(sequence, structure1, structure2):
5263
for j in range(i+1, len(data[1])):
5364
hamdist = HammingDistance(data[1][i], data[1][j])
5465
if 5 <= hamdist and hamdist <= 20:
55-
result = SingleExperiment(data[0], data[1][i], data[1][j])
66+
result = DirectPathSingleExperiment(data[0], data[1][i], data[1][j])
5667
print(str(len(data[0]))+" "+str(hamdist)+" "+" ".join([str(x) for x in result]))
5768

previous_direct_methods.py

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
2+
import random
3+
from czno import energyOfStr, basePairList, isExclusive
4+
5+
def basePairSetToDotNotation(sequence, bp):
6+
answer = ["." for _ in sequence]
7+
for x in bp:
8+
assert x[0] < x[1]
9+
answer[x[0]]="("
10+
answer[x[1]]=")"
11+
return "".join(answer)
12+
13+
14+
15+
def MorganHiggs1998GreedyDirect(sequence, structure1, structure2):
16+
A = set(basePairList(structure1))
17+
B = set(basePairList(structure2))
18+
answer = [structure1]
19+
while A != B:
20+
NeedToClose = sorted(list(B - A))
21+
NeedToOpen = sorted(list(A - B))
22+
if len(NeedToClose) >= 1:
23+
NextClose = min([(sum([isExclusive(x,y) for y in NeedToOpen]), x) for x in NeedToClose])[1]
24+
NextOpens = [y for y in NeedToOpen if isExclusive(NextClose, y)]
25+
for x in NextOpens:
26+
A.remove(x)
27+
answer.append(basePairSetToDotNotation(sequence, A))
28+
A.add(NextClose)
29+
answer.append(basePairSetToDotNotation(sequence, A))
30+
else:
31+
for x in NeedToOpen:
32+
A.remove(x)
33+
answer.append(basePairSetToDotNotation(sequence, A))
34+
return answer
35+
36+
def Voss2004GreedyDirect(sequence, structure1, structure2, k = 1, seed = 12345):
37+
random.seed(seed)
38+
A = set(basePairList(structure1))
39+
B = set(basePairList(structure2))
40+
answer = [structure1]
41+
while A != B:
42+
NeedToClose = sorted(list(B - A))
43+
NeedToOpen = sorted(list(A - B))
44+
PossibleClose = [x for x in NeedToClose if sum([isExclusive(x,y) for y in NeedToOpen]) == 0]
45+
PossibleChange = NeedToOpen + PossibleClose
46+
PossibleNextState = [basePairSetToDotNotation(sequence, A^set([x])) for x in PossibleChange]
47+
PossibleNextStateAndEnergy = [(energyOfStr(sequence, x), x) for x in PossibleNextState]
48+
NextState = random.choice(sorted(PossibleNextStateAndEnergy)[0:k])[1]
49+
answer.append(NextState)
50+
return answer

random_dataset_generator.py

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -41,11 +41,7 @@ def basePairList(secStr):
4141

4242
def randomSeqGenerator(length, seed):
4343
random.seed(seed)
44-
base = ["A","C","G","U"]
45-
seq = ""
46-
for i in range(length):
47-
seq += base[random.randint(0,3)]
48-
return seq
44+
return "".join([random.choice(["A","C","G","U"]) for _ in range(length)])
4945

5046
def isoLenGenerator(length, dist_lower, dist_upper, n):
5147
result = [set([]) for i in range(dist_lower, dist_upper+1)]

research.sh

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,3 +5,5 @@ python3 iso_len_time_experiment.py dataset-iso-len.txt > result-isolen-time.txt
55
python3 random_dataset_generator.py isodist > dataset-iso-dist.txt
66
python3 iso_dist_time_experiment.py dataset-iso-dist.txt > result-isodist-time.txt
77
python3 s151Rfam-localminima-generator.py > s151-localminima-dataset.txt
8+
tar cvJf s151-localminima-dataset.txt.tar.xz s151-localminima-dataset.txt
9+
tar xvJf s151-localminima-dataset.txt.tar.xz

0 commit comments

Comments
 (0)