Browse Source

added Conformations.small_kmedoids method to compute kmedoids

nicolas-zimmermann 4 years ago
parent
commit
1f2f9fcfef
1 changed files with 37 additions and 5 deletions
  1. 37 5
      src/projet8.py

+ 37 - 5
src/projet8.py View File

@@ -36,8 +36,8 @@ class Conformations:
36 36
         """
37 37
         self.df = pd.DataFrame()
38 38
         self.filename = filename
39
-        self.identity_dist = None
40
-        self.dissimilarity_dist = None
39
+        self.identity_dist = np.array(None)
40
+        self.dissimilarity_dist = np.array(None)
41 41
         for chain_name, chain in pbx.chains_from_files([filename]): 
42 42
                 dihedrals = chain.get_phi_psi_angles() 
43 43
                 pb_seq = pbx.assign(dihedrals) 
@@ -104,9 +104,41 @@ class Conformations:
104 104
 
105 105
         return dist
106 106
     
107
-    #def small_kmedoid(self, ncluster):
108
-    #    pass
109
-    
107
+    def small_kmedoids(self, matrix, ncluster):
108
+        """
109
+        Returns clusters and medoids computed with kmedoids on a distance matrix
110
+        Arguments :
111
+            - matrix : str, ('identity' or 'dissimilarity')
112
+                       corresponding to the desired distance matrix to
113
+                       computed
114
+            - ncluster number of clusters to be computed
115
+        """
116
+        if matrix == 'identity':
117
+
118
+            matrix = self.identity_dist
119
+            if matrix.all() == None:
120
+                print("Error : distance matrix from identity hasn't been " \
121
+                      "computed yet")
122
+                return
123
+        elif matrix == 'dissimilarity':
124
+            matrix = self.dissimilarity_dist
125
+            if matrix.all() == None:
126
+                print("Error : distance matrix from dissimilarity hasn't " \
127
+                      "been computed yet")
128
+                return
129
+        
130
+        if ncluster > matrix.shape[0]:
131
+            print("Error : number of desired clusters > number of objects")
132
+            return
133
+
134
+        initial_medoids = np.random.randint(matrix.shape[0], size=ncluster)
135
+        kmed1 = kmedoids(matrix, initial_medoids, data_type='distance_matrix')
136
+        kmed1.process()
137
+
138
+        clusters = kmed1.get_clusters()
139
+        medoids = kmed1.get_medoids()
140
+
141
+        return (clusters, medoids)
110 142
 
111 143
 if __name__ == "__main__":
112 144
     if len(sys.argv) < 2: