Browse Source

tested steroids

Nicolasticot 4 years ago
parent
commit
ed4fe52cfe
1 changed files with 70 additions and 29 deletions
  1. 70 29
      DeepDrug.py

+ 70 - 29
DeepDrug.py View File

@@ -11,7 +11,7 @@
11 11
 import numpy as np
12 12
 import tensorflow as tf
13 13
 from sklearn.preprocessing import LabelEncoder
14
-from keras.models import Sequential
14
+from keras.models import Sequential, load_model
15 15
 from keras import optimizers, callbacks
16 16
 from keras.layers import Dense, Flatten, TimeDistributed, Dropout
17 17
 from keras import Input, Model
@@ -23,16 +23,6 @@ from keras.layers import Convolution3D, MaxPooling3D
23 23
 
24 24
 # ### used to store model prediction in order to plot roc curve
25 25
 
26
-# In[ ]:
27
-
28
-
29
-class prediction_history(callbacks.Callback):
30
-    def __init__(self):
31
-        self.predhis = []
32
-    def on_epoch_end(self, epoch, logs={}):
33
-        self.predhis.append(model.predict(predictor_train))
34
-
35
-
36 26
 # ### Creating input and ouputs
37 27
 
38 28
 # In[ ]:
@@ -130,7 +120,37 @@ def model_heavy(): # créer un objet modèle
130 120
 # In[8]:
131 121
 
132 122
 
133
-def model_new(): # créer un objet modèle
123
+def model_fast_k32(): # créer un objet modèle
124
+    """
125
+    Return a simple sequentiel model
126
+    
127
+    Returns :
128
+        - model : keras.Model
129
+    """
130
+    inputs = Input(shape=(14,32,32,32))
131
+    conv_1 = Convolution3D(filters=64, kernel_size=32, padding="valid", data_format='channels_first')(inputs)
132
+    activation_1 = LeakyReLU(alpha = 0.1)(conv_1)
133
+    drop_1 = Dropout(0.2)(activation_1)
134
+    conv_2 = Convolution3D(filters=128, kernel_size=32, padding="valid", data_format='channels_first')(drop_1)
135
+    activation_2 = LeakyReLU(alpha = 0.1)(conv_2)
136
+    maxpool = MaxPooling3D(pool_size=(2,2,2),
137
+                            strides=None,
138
+                            padding='valid',
139
+                            data_format='channels_first')(activation_2)
140
+    drop_2 = Dropout(0.4)(maxpool)
141
+    flatters = Flatten()(drop_2)
142
+    dense = Dense(256)(flatters)
143
+    activation_3 = LeakyReLU(alpha = 0.1)(dense)
144
+    drop_3 = Dropout(0.4)(activation_3)
145
+    output = Dense(3, activation='softmax')(drop_3)
146
+    model = Model(inputs=inputs, outputs=output)
147
+    my_opt = optimizers.Adam(learning_rate=0.000001, beta_1=0.9, beta_2=0.999, amsgrad=False)
148
+    print(model.summary)
149
+    model.compile(optimizer=my_opt, loss="categorical_crossentropy",
150
+                  metrics=["accuracy"])
151
+    return model
152
+
153
+def model_fast_k16(): # créer un objet modèle
134 154
     """
135 155
     Return a simple sequentiel model
136 156
     
@@ -138,10 +158,10 @@ def model_new(): # créer un objet modèle
138 158
         - model : keras.Model
139 159
     """
140 160
     inputs = Input(shape=(14,32,32,32))
141
-    conv_1 = Convolution3D(filters=64, kernel_size=5, padding="valid", data_format='channels_first')(inputs)
161
+    conv_1 = Convolution3D(filters=64, kernel_size=16, padding="valid", data_format='channels_first')(inputs)
142 162
     activation_1 = LeakyReLU(alpha = 0.1)(conv_1)
143 163
     drop_1 = Dropout(0.2)(activation_1)
144
-    conv_2 = Convolution3D(filters=64, kernel_size=3, padding="valid", data_format='channels_first')(drop_1)
164
+    conv_2 = Convolution3D(filters=128, kernel_size=16, padding="valid", data_format='channels_first')(drop_1)
145 165
     activation_2 = LeakyReLU(alpha = 0.1)(conv_2)
146 166
     maxpool = MaxPooling3D(pool_size=(2,2,2),
147 167
                             strides=None,
@@ -149,7 +169,7 @@ def model_new(): # créer un objet modèle
149 169
                             data_format='channels_first')(activation_2)
150 170
     drop_2 = Dropout(0.4)(maxpool)
151 171
     flatters = Flatten()(drop_2)
152
-    dense = Dense(128)(flatters)
172
+    dense = Dense(256)(flatters)
153 173
     activation_3 = LeakyReLU(alpha = 0.1)(dense)
154 174
     drop_3 = Dropout(0.4)(activation_3)
155 175
     output = Dense(3, activation='softmax')(drop_3)
@@ -200,51 +220,72 @@ def model_light(): # créer un objet modèle
200 220
 
201 221
 # In[ ]:
202 222
 
203
-
204
-data = in_out_lists(1400)
223
+sample = 1000
224
+data = in_out_lists(sample)
205 225
 pockets = np.cumsum(data[1], axis=0)[-1]
206 226
 
207 227
 
208 228
 # In[ ]:
209 229
 
210 230
 
211
-print("with random seed=9001 and a 1400 pockets dataset the rates are:\n      {} heme, {} nucleotide, {} control\n      Total avaible dataset are composed of the following proportions:\n      {} heme, {} nucleotide, {} control".format(pockets[0]/1400, pockets[1]/1400,pockets[2]/1400,
212
-                                                0.145, 0.380, 0.475))
231
+print("with random seed=9001 and a {} pockets dataset the rates are:\n      {} heme, {} nucleotide, {} control\n      Total avaible dataset are composed of the following proportions:\n      {} heme, {} nucleotide, {} control".format(sample, pockets[0]/sample,
232
+                                                                                       pockets[1]/sample,pockets[2]/sample,
233
+                                                                                       0.145, 0.380, 0.475))
213 234
 
214 235
 
215 236
 # In[ ]:
216 237
 
238
+train = int(sample*0.6)
217 239
 
218 240
 data_onehot = data[0]
219 241
 output = data[1]
220
-X_train = data_onehot[0:1000,]
221
-Y_train = output[0:1000,]
222
-X_test = data_onehot[1000:,]
223
-Y_test = output[1000:,]
242
+
243
+X_train = data_onehot[0:train,]
244
+Y_train = output[0:train,]
245
+X_test = data_onehot[train:,]
246
+Y_test = output[train:,]
224 247
 
225 248
 
226 249
 # In[ ]:
227 250
 
228 251
 
229
-my_model = model_new()
252
+my_model = model_fast_k16()
230 253
 
231 254
 
232 255
 # In[ ]:
233 256
 
234 257
 
235 258
 tf.test.is_gpu_available()
236
-#my_model.fit(X_train, Y_train, epochs=50, batch_size=30)
237 259
 
238 260
 
239 261
 # In[ ]:
240 262
 
241 263
 
242
-history_mild_2mp = my_model.fit(X_train, Y_train, validation_data=(X_test, Y_test), epochs=30, batch_size=32)
243
-my_model.save('new_model_e30_b32_t1000.h5')
264
+my_model.fit(X_train, Y_train, validation_data=(X_test, Y_test), epochs=50, batch_size=32)
265
+#my_model.save('new_model_e50_b32_t1600.h5')
266
+#my_model = load_model('new_model_e50_b32_t1600.h5')
244 267
 
268
+# ## Testing steroids
245 269
 
246
-# In[ ]:
270
+with open("steroid.list", "r") as filin: 
271
+    steroid = filin.read() 
272
+    steroid = steroid.split("\n") 
273
+    steroid.pop()
274
+
275
+X_steroid = np.ndarray(shape=(69, 14, 32, 32, 32))
276
+
277
+i = -1
278
+for pocket in steroid:
279
+    i += 1
280
+    X_steroid[i,] = np.load("deepdrug3d_voxel_data/"+pocket+".npy")
247 281
 
282
+Y_pred_steroid = my_model.predict(X_steroid)
283
+Y_pred_steroid = Y_pred_steroid.round()
248 284
 
249
-#predictions=prediction_history()
285
+steroid_predict = Y_pred_steroid.cumsum(axis=0)
286
+print("On 69 steroid-binded pockets, prediction are the following:\n\
287
+      predicted as heme:\t{}\npredicted as nucleotide:\t{}\n\
288
+      predicted as control:\t{}\n".format(steroid_predict[0],
289
+                                          steroid_predict[1],
290
+                                          steroid_predict[2]))
250 291