Browse Source

modified code (__init__) to save distance matrixes in Conformations instances

nicolas-zimmermann 4 years ago
parent
commit
dfe4789af2
1 changed files with 29 additions and 9 deletions
  1. 29 9
      src/projet8.py

+ 29 - 9
src/projet8.py View File

21
 
21
 
22
     def __init__(self, filename):
22
     def __init__(self, filename):
23
         """
23
         """
24
-        df : pd.DataFrame object
24
+        filename : a .pdb file or several pdb file containing the conformations
25
+
26
+        Attribute :
27
+            - df : pd.DataFrame object
28
+            - filename : a list of str, paths to the structure files
29
+            - simple_dist : matrix distance computed with the identity method
30
+                            Initiality empty, must be computed
31
+            - dissimilarity_dist : matrix distance computed with the
32
+                                   dissimilarity method, initialy empty because
33
+                                   the computation time is long
25
         Each row of the dataframe is a conformation and each column a position
34
         Each row of the dataframe is a conformation and each column a position
26
         of the sequence.
35
         of the sequence.
27
         """
36
         """
28
         self.df = pd.DataFrame()
37
         self.df = pd.DataFrame()
38
+        self.filename = filename
39
+        self.identity_dist = None
40
+        self.dissimilarity_dist = None
29
         for chain_name, chain in pbx.chains_from_files([filename]): 
41
         for chain_name, chain in pbx.chains_from_files([filename]): 
30
                 dihedrals = chain.get_phi_psi_angles() 
42
                 dihedrals = chain.get_phi_psi_angles() 
31
                 pb_seq = pbx.assign(dihedrals) 
43
                 pb_seq = pbx.assign(dihedrals) 
34
  
46
  
35
     def identity(self):
47
     def identity(self):
36
         """
48
         """
37
-        Returns a matrix of the distance between all conformations based on
49
+        Computes a distance matrix between all conformations based on
38
         wether the PB at each position is identical or not.
50
         wether the PB at each position is identical or not.
39
         Returns the matrix as a numpy.ndarray object.
51
         Returns the matrix as a numpy.ndarray object.
40
         """
52
         """
43
                              'o':15,'p':16} # dict to transform PB to int,
55
                              'o':15,'p':16} # dict to transform PB to int,
44
                                             # necessary for the pdsit function
56
                                             # necessary for the pdsit function
45
         dfnum = self.df.replace(dict_str_to_float)
57
         dfnum = self.df.replace(dict_str_to_float)
58
+        dist = squareform(pdist(dfnum, metric ='jaccard'))
59
+        self.identity_dist = dist
46
 
60
 
47
-        return squareform(pdist(dfnum, metric ='jaccard'))
61
+        return dist
48
 
62
 
49
     def dissimilarity(self, matrix=None):
63
     def dissimilarity(self, matrix=None):
50
         """
64
         """
69
                     dissimilarity[i][j] += matrix[confs.df.loc[i,k]][confs.df.loc[j,k]]
83
                     dissimilarity[i][j] += matrix[confs.df.loc[i,k]][confs.df.loc[j,k]]
70
         dissimilarity = dissimilarity + dissimilarity.T # fills the whole matrix
84
         dissimilarity = dissimilarity + dissimilarity.T # fills the whole matrix
71
         
85
         
72
-        return dissimilarity
86
+        return self.dist_from_dissimilarity(dissimilarity)
73
 
87
 
74
-    def dist_from_dissimilarity(diss_matrix):
88
+    def dist_from_dissimilarity(self, diss_matrix):
75
         """
89
         """
76
         Using the substitution matrix from the PBxplore package, the obtained
90
         Using the substitution matrix from the PBxplore package, the obtained
77
         dissimilarity matrix has both positive and negative values. Low value
91
         dissimilarity matrix has both positive and negative values. Low value
85
         diss_matrix = -diss_matrix
99
         diss_matrix = -diss_matrix
86
         diss_matrix = (diss_matrix - np.min(diss_matrix))/np.ptp(diss_matrix)
100
         diss_matrix = (diss_matrix - np.min(diss_matrix))/np.ptp(diss_matrix)
87
         dist = diss_matrix * abs((np.identity(confs.df.shape[0])-1))
101
         dist = diss_matrix * abs((np.identity(confs.df.shape[0])-1))
88
-        
102
+
103
+        self.dissimilarity_dist = dist
104
+
89
         return dist
105
         return dist
106
+    
107
+    #def small_kmedoid(self, ncluster):
108
+    #    pass
109
+    
90
 
110
 
91
 if __name__ == "__main__":
111
 if __name__ == "__main__":
92
-    if len(sys.argv) != 2:
112
+    if len(sys.argv) < 2:
93
         sys.exit("Error : usage '$ python3 projet8 md.pdb'")
113
         sys.exit("Error : usage '$ python3 projet8 md.pdb'")
94
-
114
+    
95
     confs = Conformations(sys.argv[1])
115
     confs = Conformations(sys.argv[1])
96
-
116
+    
97
 
117
 
98
     print(confs.df)
118
     print(confs.df)