Browse Source

tested steroids

Nicolasticot 4 years ago
parent
commit
ed4fe52cfe
1 changed files with 70 additions and 29 deletions
  1. 70 29
      DeepDrug.py

+ 70 - 29
DeepDrug.py View File

11
 import numpy as np
11
 import numpy as np
12
 import tensorflow as tf
12
 import tensorflow as tf
13
 from sklearn.preprocessing import LabelEncoder
13
 from sklearn.preprocessing import LabelEncoder
14
-from keras.models import Sequential
14
+from keras.models import Sequential, load_model
15
 from keras import optimizers, callbacks
15
 from keras import optimizers, callbacks
16
 from keras.layers import Dense, Flatten, TimeDistributed, Dropout
16
 from keras.layers import Dense, Flatten, TimeDistributed, Dropout
17
 from keras import Input, Model
17
 from keras import Input, Model
23
 
23
 
24
 # ### used to store model prediction in order to plot roc curve
24
 # ### used to store model prediction in order to plot roc curve
25
 
25
 
26
-# In[ ]:
27
-
28
-
29
-class prediction_history(callbacks.Callback):
30
-    def __init__(self):
31
-        self.predhis = []
32
-    def on_epoch_end(self, epoch, logs={}):
33
-        self.predhis.append(model.predict(predictor_train))
34
-
35
-
36
 # ### Creating input and ouputs
26
 # ### Creating input and ouputs
37
 
27
 
38
 # In[ ]:
28
 # In[ ]:
130
 # In[8]:
120
 # In[8]:
131
 
121
 
132
 
122
 
133
-def model_new(): # créer un objet modèle
123
+def model_fast_k32(): # créer un objet modèle
124
+    """
125
+    Return a simple sequentiel model
126
+    
127
+    Returns :
128
+        - model : keras.Model
129
+    """
130
+    inputs = Input(shape=(14,32,32,32))
131
+    conv_1 = Convolution3D(filters=64, kernel_size=32, padding="valid", data_format='channels_first')(inputs)
132
+    activation_1 = LeakyReLU(alpha = 0.1)(conv_1)
133
+    drop_1 = Dropout(0.2)(activation_1)
134
+    conv_2 = Convolution3D(filters=128, kernel_size=32, padding="valid", data_format='channels_first')(drop_1)
135
+    activation_2 = LeakyReLU(alpha = 0.1)(conv_2)
136
+    maxpool = MaxPooling3D(pool_size=(2,2,2),
137
+                            strides=None,
138
+                            padding='valid',
139
+                            data_format='channels_first')(activation_2)
140
+    drop_2 = Dropout(0.4)(maxpool)
141
+    flatters = Flatten()(drop_2)
142
+    dense = Dense(256)(flatters)
143
+    activation_3 = LeakyReLU(alpha = 0.1)(dense)
144
+    drop_3 = Dropout(0.4)(activation_3)
145
+    output = Dense(3, activation='softmax')(drop_3)
146
+    model = Model(inputs=inputs, outputs=output)
147
+    my_opt = optimizers.Adam(learning_rate=0.000001, beta_1=0.9, beta_2=0.999, amsgrad=False)
148
+    print(model.summary)
149
+    model.compile(optimizer=my_opt, loss="categorical_crossentropy",
150
+                  metrics=["accuracy"])
151
+    return model
152
+
153
+def model_fast_k16(): # créer un objet modèle
134
     """
154
     """
135
     Return a simple sequentiel model
155
     Return a simple sequentiel model
136
     
156
     
138
         - model : keras.Model
158
         - model : keras.Model
139
     """
159
     """
140
     inputs = Input(shape=(14,32,32,32))
160
     inputs = Input(shape=(14,32,32,32))
141
-    conv_1 = Convolution3D(filters=64, kernel_size=5, padding="valid", data_format='channels_first')(inputs)
161
+    conv_1 = Convolution3D(filters=64, kernel_size=16, padding="valid", data_format='channels_first')(inputs)
142
     activation_1 = LeakyReLU(alpha = 0.1)(conv_1)
162
     activation_1 = LeakyReLU(alpha = 0.1)(conv_1)
143
     drop_1 = Dropout(0.2)(activation_1)
163
     drop_1 = Dropout(0.2)(activation_1)
144
-    conv_2 = Convolution3D(filters=64, kernel_size=3, padding="valid", data_format='channels_first')(drop_1)
164
+    conv_2 = Convolution3D(filters=128, kernel_size=16, padding="valid", data_format='channels_first')(drop_1)
145
     activation_2 = LeakyReLU(alpha = 0.1)(conv_2)
165
     activation_2 = LeakyReLU(alpha = 0.1)(conv_2)
146
     maxpool = MaxPooling3D(pool_size=(2,2,2),
166
     maxpool = MaxPooling3D(pool_size=(2,2,2),
147
                             strides=None,
167
                             strides=None,
149
                             data_format='channels_first')(activation_2)
169
                             data_format='channels_first')(activation_2)
150
     drop_2 = Dropout(0.4)(maxpool)
170
     drop_2 = Dropout(0.4)(maxpool)
151
     flatters = Flatten()(drop_2)
171
     flatters = Flatten()(drop_2)
152
-    dense = Dense(128)(flatters)
172
+    dense = Dense(256)(flatters)
153
     activation_3 = LeakyReLU(alpha = 0.1)(dense)
173
     activation_3 = LeakyReLU(alpha = 0.1)(dense)
154
     drop_3 = Dropout(0.4)(activation_3)
174
     drop_3 = Dropout(0.4)(activation_3)
155
     output = Dense(3, activation='softmax')(drop_3)
175
     output = Dense(3, activation='softmax')(drop_3)
200
 
220
 
201
 # In[ ]:
221
 # In[ ]:
202
 
222
 
203
-
204
-data = in_out_lists(1400)
223
+sample = 1000
224
+data = in_out_lists(sample)
205
 pockets = np.cumsum(data[1], axis=0)[-1]
225
 pockets = np.cumsum(data[1], axis=0)[-1]
206
 
226
 
207
 
227
 
208
 # In[ ]:
228
 # In[ ]:
209
 
229
 
210
 
230
 
211
-print("with random seed=9001 and a 1400 pockets dataset the rates are:\n      {} heme, {} nucleotide, {} control\n      Total avaible dataset are composed of the following proportions:\n      {} heme, {} nucleotide, {} control".format(pockets[0]/1400, pockets[1]/1400,pockets[2]/1400,
212
-                                                0.145, 0.380, 0.475))
231
+print("with random seed=9001 and a {} pockets dataset the rates are:\n      {} heme, {} nucleotide, {} control\n      Total avaible dataset are composed of the following proportions:\n      {} heme, {} nucleotide, {} control".format(sample, pockets[0]/sample,
232
+                                                                                       pockets[1]/sample,pockets[2]/sample,
233
+                                                                                       0.145, 0.380, 0.475))
213
 
234
 
214
 
235
 
215
 # In[ ]:
236
 # In[ ]:
216
 
237
 
238
+train = int(sample*0.6)
217
 
239
 
218
 data_onehot = data[0]
240
 data_onehot = data[0]
219
 output = data[1]
241
 output = data[1]
220
-X_train = data_onehot[0:1000,]
221
-Y_train = output[0:1000,]
222
-X_test = data_onehot[1000:,]
223
-Y_test = output[1000:,]
242
+
243
+X_train = data_onehot[0:train,]
244
+Y_train = output[0:train,]
245
+X_test = data_onehot[train:,]
246
+Y_test = output[train:,]
224
 
247
 
225
 
248
 
226
 # In[ ]:
249
 # In[ ]:
227
 
250
 
228
 
251
 
229
-my_model = model_new()
252
+my_model = model_fast_k16()
230
 
253
 
231
 
254
 
232
 # In[ ]:
255
 # In[ ]:
233
 
256
 
234
 
257
 
235
 tf.test.is_gpu_available()
258
 tf.test.is_gpu_available()
236
-#my_model.fit(X_train, Y_train, epochs=50, batch_size=30)
237
 
259
 
238
 
260
 
239
 # In[ ]:
261
 # In[ ]:
240
 
262
 
241
 
263
 
242
-history_mild_2mp = my_model.fit(X_train, Y_train, validation_data=(X_test, Y_test), epochs=30, batch_size=32)
243
-my_model.save('new_model_e30_b32_t1000.h5')
264
+my_model.fit(X_train, Y_train, validation_data=(X_test, Y_test), epochs=50, batch_size=32)
265
+#my_model.save('new_model_e50_b32_t1600.h5')
266
+#my_model = load_model('new_model_e50_b32_t1600.h5')
244
 
267
 
268
+# ## Testing steroids
245
 
269
 
246
-# In[ ]:
270
+with open("steroid.list", "r") as filin: 
271
+    steroid = filin.read() 
272
+    steroid = steroid.split("\n") 
273
+    steroid.pop()
274
+
275
+X_steroid = np.ndarray(shape=(69, 14, 32, 32, 32))
276
+
277
+i = -1
278
+for pocket in steroid:
279
+    i += 1
280
+    X_steroid[i,] = np.load("deepdrug3d_voxel_data/"+pocket+".npy")
247
 
281
 
282
+Y_pred_steroid = my_model.predict(X_steroid)
283
+Y_pred_steroid = Y_pred_steroid.round()
248
 
284
 
249
-#predictions=prediction_history()
285
+steroid_predict = Y_pred_steroid.cumsum(axis=0)
286
+print("On 69 steroid-binded pockets, prediction are the following:\n\
287
+      predicted as heme:\t{}\npredicted as nucleotide:\t{}\n\
288
+      predicted as control:\t{}\n".format(steroid_predict[0],
289
+                                          steroid_predict[1],
290
+                                          steroid_predict[2]))
250
 
291