Browse Source

modified code (__init__) to save distance matrixes in Conformations instances

nicolas-zimmermann 4 years ago
parent
commit
dfe4789af2
1 changed files with 29 additions and 9 deletions
  1. 29 9
      src/projet8.py

+ 29 - 9
src/projet8.py View File

@@ -21,11 +21,23 @@ class Conformations:
21 21
 
22 22
     def __init__(self, filename):
23 23
         """
24
-        df : pd.DataFrame object
24
+        filename : a .pdb file or several pdb file containing the conformations
25
+
26
+        Attribute :
27
+            - df : pd.DataFrame object
28
+            - filename : a list of str, paths to the structure files
29
+            - simple_dist : matrix distance computed with the identity method
30
+                            Initiality empty, must be computed
31
+            - dissimilarity_dist : matrix distance computed with the
32
+                                   dissimilarity method, initialy empty because
33
+                                   the computation time is long
25 34
         Each row of the dataframe is a conformation and each column a position
26 35
         of the sequence.
27 36
         """
28 37
         self.df = pd.DataFrame()
38
+        self.filename = filename
39
+        self.identity_dist = None
40
+        self.dissimilarity_dist = None
29 41
         for chain_name, chain in pbx.chains_from_files([filename]): 
30 42
                 dihedrals = chain.get_phi_psi_angles() 
31 43
                 pb_seq = pbx.assign(dihedrals) 
@@ -34,7 +46,7 @@ class Conformations:
34 46
  
35 47
     def identity(self):
36 48
         """
37
-        Returns a matrix of the distance between all conformations based on
49
+        Computes a distance matrix between all conformations based on
38 50
         wether the PB at each position is identical or not.
39 51
         Returns the matrix as a numpy.ndarray object.
40 52
         """
@@ -43,8 +55,10 @@ class Conformations:
43 55
                              'o':15,'p':16} # dict to transform PB to int,
44 56
                                             # necessary for the pdsit function
45 57
         dfnum = self.df.replace(dict_str_to_float)
58
+        dist = squareform(pdist(dfnum, metric ='jaccard'))
59
+        self.identity_dist = dist
46 60
 
47
-        return squareform(pdist(dfnum, metric ='jaccard'))
61
+        return dist
48 62
 
49 63
     def dissimilarity(self, matrix=None):
50 64
         """
@@ -69,9 +83,9 @@ class Conformations:
69 83
                     dissimilarity[i][j] += matrix[confs.df.loc[i,k]][confs.df.loc[j,k]]
70 84
         dissimilarity = dissimilarity + dissimilarity.T # fills the whole matrix
71 85
         
72
-        return dissimilarity
86
+        return self.dist_from_dissimilarity(dissimilarity)
73 87
 
74
-    def dist_from_dissimilarity(diss_matrix):
88
+    def dist_from_dissimilarity(self, diss_matrix):
75 89
         """
76 90
         Using the substitution matrix from the PBxplore package, the obtained
77 91
         dissimilarity matrix has both positive and negative values. Low value
@@ -85,14 +99,20 @@ class Conformations:
85 99
         diss_matrix = -diss_matrix
86 100
         diss_matrix = (diss_matrix - np.min(diss_matrix))/np.ptp(diss_matrix)
87 101
         dist = diss_matrix * abs((np.identity(confs.df.shape[0])-1))
88
-        
102
+
103
+        self.dissimilarity_dist = dist
104
+
89 105
         return dist
106
+    
107
+    #def small_kmedoid(self, ncluster):
108
+    #    pass
109
+    
90 110
 
91 111
 if __name__ == "__main__":
92
-    if len(sys.argv) != 2:
112
+    if len(sys.argv) < 2:
93 113
         sys.exit("Error : usage '$ python3 projet8 md.pdb'")
94
-
114
+    
95 115
     confs = Conformations(sys.argv[1])
96
-
116
+    
97 117
 
98 118
     print(confs.df)