|
@@ -11,7 +11,7 @@
|
11
|
11
|
import numpy as np
|
12
|
12
|
import tensorflow as tf
|
13
|
13
|
from sklearn.preprocessing import LabelEncoder
|
14
|
|
-from keras.models import Sequential
|
|
14
|
+from keras.models import Sequential, load_model
|
15
|
15
|
from keras import optimizers, callbacks
|
16
|
16
|
from keras.layers import Dense, Flatten, TimeDistributed, Dropout
|
17
|
17
|
from keras import Input, Model
|
|
@@ -23,16 +23,6 @@ from keras.layers import Convolution3D, MaxPooling3D
|
23
|
23
|
|
24
|
24
|
# ### used to store model prediction in order to plot roc curve
|
25
|
25
|
|
26
|
|
-# In[ ]:
|
27
|
|
-
|
28
|
|
-
|
29
|
|
-class prediction_history(callbacks.Callback):
|
30
|
|
- def __init__(self):
|
31
|
|
- self.predhis = []
|
32
|
|
- def on_epoch_end(self, epoch, logs={}):
|
33
|
|
- self.predhis.append(model.predict(predictor_train))
|
34
|
|
-
|
35
|
|
-
|
36
|
26
|
# ### Creating input and ouputs
|
37
|
27
|
|
38
|
28
|
# In[ ]:
|
|
@@ -130,7 +120,37 @@ def model_heavy(): # créer un objet modèle
|
130
|
120
|
# In[8]:
|
131
|
121
|
|
132
|
122
|
|
133
|
|
-def model_new(): # créer un objet modèle
|
|
123
|
+def model_fast_k32(): # créer un objet modèle
|
|
124
|
+ """
|
|
125
|
+ Return a simple sequentiel model
|
|
126
|
+
|
|
127
|
+ Returns :
|
|
128
|
+ - model : keras.Model
|
|
129
|
+ """
|
|
130
|
+ inputs = Input(shape=(14,32,32,32))
|
|
131
|
+ conv_1 = Convolution3D(filters=64, kernel_size=32, padding="valid", data_format='channels_first')(inputs)
|
|
132
|
+ activation_1 = LeakyReLU(alpha = 0.1)(conv_1)
|
|
133
|
+ drop_1 = Dropout(0.2)(activation_1)
|
|
134
|
+ conv_2 = Convolution3D(filters=128, kernel_size=32, padding="valid", data_format='channels_first')(drop_1)
|
|
135
|
+ activation_2 = LeakyReLU(alpha = 0.1)(conv_2)
|
|
136
|
+ maxpool = MaxPooling3D(pool_size=(2,2,2),
|
|
137
|
+ strides=None,
|
|
138
|
+ padding='valid',
|
|
139
|
+ data_format='channels_first')(activation_2)
|
|
140
|
+ drop_2 = Dropout(0.4)(maxpool)
|
|
141
|
+ flatters = Flatten()(drop_2)
|
|
142
|
+ dense = Dense(256)(flatters)
|
|
143
|
+ activation_3 = LeakyReLU(alpha = 0.1)(dense)
|
|
144
|
+ drop_3 = Dropout(0.4)(activation_3)
|
|
145
|
+ output = Dense(3, activation='softmax')(drop_3)
|
|
146
|
+ model = Model(inputs=inputs, outputs=output)
|
|
147
|
+ my_opt = optimizers.Adam(learning_rate=0.000001, beta_1=0.9, beta_2=0.999, amsgrad=False)
|
|
148
|
+ print(model.summary)
|
|
149
|
+ model.compile(optimizer=my_opt, loss="categorical_crossentropy",
|
|
150
|
+ metrics=["accuracy"])
|
|
151
|
+ return model
|
|
152
|
+
|
|
153
|
+def model_fast_k16(): # créer un objet modèle
|
134
|
154
|
"""
|
135
|
155
|
Return a simple sequentiel model
|
136
|
156
|
|
|
@@ -138,10 +158,10 @@ def model_new(): # créer un objet modèle
|
138
|
158
|
- model : keras.Model
|
139
|
159
|
"""
|
140
|
160
|
inputs = Input(shape=(14,32,32,32))
|
141
|
|
- conv_1 = Convolution3D(filters=64, kernel_size=5, padding="valid", data_format='channels_first')(inputs)
|
|
161
|
+ conv_1 = Convolution3D(filters=64, kernel_size=16, padding="valid", data_format='channels_first')(inputs)
|
142
|
162
|
activation_1 = LeakyReLU(alpha = 0.1)(conv_1)
|
143
|
163
|
drop_1 = Dropout(0.2)(activation_1)
|
144
|
|
- conv_2 = Convolution3D(filters=64, kernel_size=3, padding="valid", data_format='channels_first')(drop_1)
|
|
164
|
+ conv_2 = Convolution3D(filters=128, kernel_size=16, padding="valid", data_format='channels_first')(drop_1)
|
145
|
165
|
activation_2 = LeakyReLU(alpha = 0.1)(conv_2)
|
146
|
166
|
maxpool = MaxPooling3D(pool_size=(2,2,2),
|
147
|
167
|
strides=None,
|
|
@@ -149,7 +169,7 @@ def model_new(): # créer un objet modèle
|
149
|
169
|
data_format='channels_first')(activation_2)
|
150
|
170
|
drop_2 = Dropout(0.4)(maxpool)
|
151
|
171
|
flatters = Flatten()(drop_2)
|
152
|
|
- dense = Dense(128)(flatters)
|
|
172
|
+ dense = Dense(256)(flatters)
|
153
|
173
|
activation_3 = LeakyReLU(alpha = 0.1)(dense)
|
154
|
174
|
drop_3 = Dropout(0.4)(activation_3)
|
155
|
175
|
output = Dense(3, activation='softmax')(drop_3)
|
|
@@ -200,51 +220,72 @@ def model_light(): # créer un objet modèle
|
200
|
220
|
|
201
|
221
|
# In[ ]:
|
202
|
222
|
|
203
|
|
-
|
204
|
|
-data = in_out_lists(1400)
|
|
223
|
+sample = 1000
|
|
224
|
+data = in_out_lists(sample)
|
205
|
225
|
pockets = np.cumsum(data[1], axis=0)[-1]
|
206
|
226
|
|
207
|
227
|
|
208
|
228
|
# In[ ]:
|
209
|
229
|
|
210
|
230
|
|
211
|
|
-print("with random seed=9001 and a 1400 pockets dataset the rates are:\n {} heme, {} nucleotide, {} control\n Total avaible dataset are composed of the following proportions:\n {} heme, {} nucleotide, {} control".format(pockets[0]/1400, pockets[1]/1400,pockets[2]/1400,
|
212
|
|
- 0.145, 0.380, 0.475))
|
|
231
|
+print("with random seed=9001 and a {} pockets dataset the rates are:\n {} heme, {} nucleotide, {} control\n Total avaible dataset are composed of the following proportions:\n {} heme, {} nucleotide, {} control".format(sample, pockets[0]/sample,
|
|
232
|
+ pockets[1]/sample,pockets[2]/sample,
|
|
233
|
+ 0.145, 0.380, 0.475))
|
213
|
234
|
|
214
|
235
|
|
215
|
236
|
# In[ ]:
|
216
|
237
|
|
|
238
|
+train = int(sample*0.6)
|
217
|
239
|
|
218
|
240
|
data_onehot = data[0]
|
219
|
241
|
output = data[1]
|
220
|
|
-X_train = data_onehot[0:1000,]
|
221
|
|
-Y_train = output[0:1000,]
|
222
|
|
-X_test = data_onehot[1000:,]
|
223
|
|
-Y_test = output[1000:,]
|
|
242
|
+
|
|
243
|
+X_train = data_onehot[0:train,]
|
|
244
|
+Y_train = output[0:train,]
|
|
245
|
+X_test = data_onehot[train:,]
|
|
246
|
+Y_test = output[train:,]
|
224
|
247
|
|
225
|
248
|
|
226
|
249
|
# In[ ]:
|
227
|
250
|
|
228
|
251
|
|
229
|
|
-my_model = model_new()
|
|
252
|
+my_model = model_fast_k16()
|
230
|
253
|
|
231
|
254
|
|
232
|
255
|
# In[ ]:
|
233
|
256
|
|
234
|
257
|
|
235
|
258
|
tf.test.is_gpu_available()
|
236
|
|
-#my_model.fit(X_train, Y_train, epochs=50, batch_size=30)
|
237
|
259
|
|
238
|
260
|
|
239
|
261
|
# In[ ]:
|
240
|
262
|
|
241
|
263
|
|
242
|
|
-history_mild_2mp = my_model.fit(X_train, Y_train, validation_data=(X_test, Y_test), epochs=30, batch_size=32)
|
243
|
|
-my_model.save('new_model_e30_b32_t1000.h5')
|
|
264
|
+my_model.fit(X_train, Y_train, validation_data=(X_test, Y_test), epochs=50, batch_size=32)
|
|
265
|
+#my_model.save('new_model_e50_b32_t1600.h5')
|
|
266
|
+#my_model = load_model('new_model_e50_b32_t1600.h5')
|
244
|
267
|
|
|
268
|
+# ## Testing steroids
|
245
|
269
|
|
246
|
|
-# In[ ]:
|
|
270
|
+with open("steroid.list", "r") as filin:
|
|
271
|
+ steroid = filin.read()
|
|
272
|
+ steroid = steroid.split("\n")
|
|
273
|
+ steroid.pop()
|
|
274
|
+
|
|
275
|
+X_steroid = np.ndarray(shape=(69, 14, 32, 32, 32))
|
|
276
|
+
|
|
277
|
+i = -1
|
|
278
|
+for pocket in steroid:
|
|
279
|
+ i += 1
|
|
280
|
+ X_steroid[i,] = np.load("deepdrug3d_voxel_data/"+pocket+".npy")
|
247
|
281
|
|
|
282
|
+Y_pred_steroid = my_model.predict(X_steroid)
|
|
283
|
+Y_pred_steroid = Y_pred_steroid.round()
|
248
|
284
|
|
249
|
|
-#predictions=prediction_history()
|
|
285
|
+steroid_predict = Y_pred_steroid.cumsum(axis=0)
|
|
286
|
+print("On 69 steroid-binded pockets, prediction are the following:\n\
|
|
287
|
+ predicted as heme:\t{}\npredicted as nucleotide:\t{}\n\
|
|
288
|
+ predicted as control:\t{}\n".format(steroid_predict[0],
|
|
289
|
+ steroid_predict[1],
|
|
290
|
+ steroid_predict[2]))
|
250
|
291
|
|