|
@@ -21,11 +21,23 @@ class Conformations:
|
21
|
21
|
|
22
|
22
|
def __init__(self, filename):
|
23
|
23
|
"""
|
24
|
|
- df : pd.DataFrame object
|
|
24
|
+ filename : a .pdb file or several pdb file containing the conformations
|
|
25
|
+
|
|
26
|
+ Attribute :
|
|
27
|
+ - df : pd.DataFrame object
|
|
28
|
+ - filename : a list of str, paths to the structure files
|
|
29
|
+ - simple_dist : matrix distance computed with the identity method
|
|
30
|
+ Initiality empty, must be computed
|
|
31
|
+ - dissimilarity_dist : matrix distance computed with the
|
|
32
|
+ dissimilarity method, initialy empty because
|
|
33
|
+ the computation time is long
|
25
|
34
|
Each row of the dataframe is a conformation and each column a position
|
26
|
35
|
of the sequence.
|
27
|
36
|
"""
|
28
|
37
|
self.df = pd.DataFrame()
|
|
38
|
+ self.filename = filename
|
|
39
|
+ self.identity_dist = None
|
|
40
|
+ self.dissimilarity_dist = None
|
29
|
41
|
for chain_name, chain in pbx.chains_from_files([filename]):
|
30
|
42
|
dihedrals = chain.get_phi_psi_angles()
|
31
|
43
|
pb_seq = pbx.assign(dihedrals)
|
|
@@ -34,7 +46,7 @@ class Conformations:
|
34
|
46
|
|
35
|
47
|
def identity(self):
|
36
|
48
|
"""
|
37
|
|
- Returns a matrix of the distance between all conformations based on
|
|
49
|
+ Computes a distance matrix between all conformations based on
|
38
|
50
|
wether the PB at each position is identical or not.
|
39
|
51
|
Returns the matrix as a numpy.ndarray object.
|
40
|
52
|
"""
|
|
@@ -43,8 +55,10 @@ class Conformations:
|
43
|
55
|
'o':15,'p':16} # dict to transform PB to int,
|
44
|
56
|
# necessary for the pdsit function
|
45
|
57
|
dfnum = self.df.replace(dict_str_to_float)
|
|
58
|
+ dist = squareform(pdist(dfnum, metric ='jaccard'))
|
|
59
|
+ self.identity_dist = dist
|
46
|
60
|
|
47
|
|
- return squareform(pdist(dfnum, metric ='jaccard'))
|
|
61
|
+ return dist
|
48
|
62
|
|
49
|
63
|
def dissimilarity(self, matrix=None):
|
50
|
64
|
"""
|
|
@@ -69,9 +83,9 @@ class Conformations:
|
69
|
83
|
dissimilarity[i][j] += matrix[confs.df.loc[i,k]][confs.df.loc[j,k]]
|
70
|
84
|
dissimilarity = dissimilarity + dissimilarity.T # fills the whole matrix
|
71
|
85
|
|
72
|
|
- return dissimilarity
|
|
86
|
+ return self.dist_from_dissimilarity(dissimilarity)
|
73
|
87
|
|
74
|
|
- def dist_from_dissimilarity(diss_matrix):
|
|
88
|
+ def dist_from_dissimilarity(self, diss_matrix):
|
75
|
89
|
"""
|
76
|
90
|
Using the substitution matrix from the PBxplore package, the obtained
|
77
|
91
|
dissimilarity matrix has both positive and negative values. Low value
|
|
@@ -85,14 +99,20 @@ class Conformations:
|
85
|
99
|
diss_matrix = -diss_matrix
|
86
|
100
|
diss_matrix = (diss_matrix - np.min(diss_matrix))/np.ptp(diss_matrix)
|
87
|
101
|
dist = diss_matrix * abs((np.identity(confs.df.shape[0])-1))
|
88
|
|
-
|
|
102
|
+
|
|
103
|
+ self.dissimilarity_dist = dist
|
|
104
|
+
|
89
|
105
|
return dist
|
|
106
|
+
|
|
107
|
+ #def small_kmedoid(self, ncluster):
|
|
108
|
+ # pass
|
|
109
|
+
|
90
|
110
|
|
91
|
111
|
if __name__ == "__main__":
|
92
|
|
- if len(sys.argv) != 2:
|
|
112
|
+ if len(sys.argv) < 2:
|
93
|
113
|
sys.exit("Error : usage '$ python3 projet8 md.pdb'")
|
94
|
|
-
|
|
114
|
+
|
95
|
115
|
confs = Conformations(sys.argv[1])
|
96
|
|
-
|
|
116
|
+
|
97
|
117
|
|
98
|
118
|
print(confs.df)
|