2 Commits cc992f7f2d ... bb9a7f549e

Author SHA1 Message Date
  Nicolasticot bb9a7f549e added new model, with different hyperparameter from dd3d git 5 years ago
  Nicolasticot 860e797732 changed mild model, added maxpooling 5 years ago
2 changed files with 490 additions and 106 deletions
  1. 240 106
      DeepDrug.ipynb
  2. 250 0
      DeepDrug.py

+ 240 - 106
DeepDrug.ipynb View File

7
     "# DeepDrug3D"
7
     "# DeepDrug3D"
8
    ]
8
    ]
9
   },
9
   },
10
+  {
11
+   "cell_type": "markdown",
12
+   "metadata": {},
13
+   "source": [
14
+    "## Importing library"
15
+   ]
16
+  },
10
   {
17
   {
11
    "cell_type": "code",
18
    "cell_type": "code",
12
-   "execution_count": 1,
13
-   "metadata": {},
14
-   "outputs": [
15
-    {
16
-     "name": "stderr",
17
-     "output_type": "stream",
18
-     "text": [
19
-      "Using TensorFlow backend.\n"
20
-     ]
21
-    }
22
-   ],
19
+   "execution_count": 6,
20
+   "metadata": {},
21
+   "outputs": [],
23
    "source": [
22
    "source": [
24
     "import numpy as np\n",
23
     "import numpy as np\n",
25
-    "\n",
24
+    "import tensorflow as tf\n",
26
     "from sklearn.preprocessing import LabelEncoder\n",
25
     "from sklearn.preprocessing import LabelEncoder\n",
27
     "from keras.models import Sequential\n",
26
     "from keras.models import Sequential\n",
28
-    "from keras import optimizers\n",
29
-    "from keras.layers import Dense, Flatten, TimeDistributedn, Dropout\n",
27
+    "from keras import optimizers, callbacks\n",
28
+    "from keras.layers import Dense, Flatten, TimeDistributed, Dropout\n",
30
     "from keras import Input, Model\n",
29
     "from keras import Input, Model\n",
31
     "from keras.layers import add, Activation\n",
30
     "from keras.layers import add, Activation\n",
31
+    "from keras.layers.advanced_activations import LeakyReLU\n",
32
     "#from keras.utils import plot_model  # Needs pydot.\n",
32
     "#from keras.utils import plot_model  # Needs pydot.\n",
33
-    "from keras.layers import Conv3D, MaxPooling3D"
33
+    "from keras.layers import Convolution3D, MaxPooling3D"
34
    ]
34
    ]
35
   },
35
   },
36
   {
36
   {
37
    "cell_type": "markdown",
37
    "cell_type": "markdown",
38
    "metadata": {},
38
    "metadata": {},
39
    "source": [
39
    "source": [
40
-    "## Create pocket lists\n",
41
-    "4 pockets are created :\n",
42
-    "  + control\n",
43
-    "  + steroid\n",
44
-    "  + heme\n",
45
-    "  + nucleotide"
40
+    "### used to store model prediction in order to plot roc curve"
46
    ]
41
    ]
47
   },
42
   },
48
   {
43
   {
49
    "cell_type": "code",
44
    "cell_type": "code",
50
-   "execution_count": 2,
51
-   "metadata": {},
52
-   "outputs": [
53
-    {
54
-     "data": {
55
-      "text/plain": [
56
-       "''"
57
-      ]
58
-     },
59
-     "execution_count": 2,
60
-     "metadata": {},
61
-     "output_type": "execute_result"
62
-    }
63
-   ],
64
-   "source": [
65
-    "with open(\"control.list\", \"r\") as filin:\n",
66
-    "    control = filin.read()\n",
67
-    "control = control.split(\"\\n\")\n",
68
-    "control.pop()\n",
69
-    "\n",
70
-    "with open(\"steroid.list\", \"r\") as filin:\n",
71
-    "    steroid = filin.read()\n",
72
-    "steroid = steroid.split(\"\\n\")\n",
73
-    "steroid.pop()\n",
74
-    "\n",
75
-    "with open(\"heme.list\", \"r\") as filin:\n",
76
-    "    heme = filin.read()\n",
77
-    "heme = heme.split(\"\\n\")\n",
78
-    "heme.pop()\n",
79
-    "\n",
80
-    "with open(\"nucleotide.list\", \"r\") as filin:\n",
81
-    "    nucleotide = filin.read()\n",
82
-    "nucleotide = nucleotide.split(\"\\n\")\n",
83
-    "nucleotide.pop()"
45
+   "execution_count": null,
46
+   "metadata": {},
47
+   "outputs": [],
48
+   "source": [
49
+    "class prediction_history(callbacks.Callback):\n",
50
+    "    def __init__(self):\n",
51
+    "        self.predhis = []\n",
52
+    "    def on_epoch_end(self, epoch, logs={}):\n",
53
+    "        self.predhis.append(model.predict(predictor_train))"
84
    ]
54
    ]
85
   },
55
   },
86
   {
56
   {
92
   },
62
   },
93
   {
63
   {
94
    "cell_type": "code",
64
    "cell_type": "code",
95
-   "execution_count": 3,
65
+   "execution_count": null,
96
    "metadata": {},
66
    "metadata": {},
97
    "outputs": [],
67
    "outputs": [],
98
    "source": [
68
    "source": [
99
-    "data_onehot = np.ndarray(shape=(2219, 14, 32, 32, 32)) # initializing empty array\n",
100
-    "indices = np.random.permutation(2219)\n",
101
-    "output = np.ndarray(shape=(2219, 3)) # softmax 3, {steroid=1, heme=1, nucleotide=1}\n",
102
-    "lmin = len(steroid)\n",
103
-    "lmid = len(heme)\n",
104
-    "lmax = len(nucleotide)"
69
+    "def in_out_lists(size=1000):\n",
70
+    "    \"\"\"\n",
71
+    "    returns a tuple of array used as input and output for the model\n",
72
+    "    Arguments:\n",
73
+    "        - size, int: default 1000, size of the lists to be created\n",
74
+    "        \n",
75
+    "    Returns:\n",
76
+    "        - tuple (data_onehot, output):\n",
77
+    "            -data_onehot, ndarray: containing one-hot encoded pockets\n",
78
+    "            -output, ndarray: containing size-3 vectors for classification\n",
79
+    "    \"\"\"\n",
80
+    "    with open(\"control.list\", \"r\") as filin:\n",
81
+    "        control = filin.read()\n",
82
+    "        control = control.split(\"\\n\")\n",
83
+    "        control.pop()\n",
84
+    "\n",
85
+    "    with open(\"steroid.list\", \"r\") as filin:\n",
86
+    "        steroid = filin.read()\n",
87
+    "        steroid = steroid.split(\"\\n\")\n",
88
+    "        steroid.pop()\n",
89
+    "\n",
90
+    "    with open(\"heme.list\", \"r\") as filin:\n",
91
+    "        heme = filin.read()\n",
92
+    "        heme = heme.split(\"\\n\")\n",
93
+    "        heme.pop()\n",
94
+    "\n",
95
+    "    with open(\"nucleotide.list\", \"r\") as filin:\n",
96
+    "        nucleotide = filin.read()\n",
97
+    "        nucleotide = nucleotide.split(\"\\n\")\n",
98
+    "        nucleotide.pop()\n",
99
+    "    \n",
100
+    "    lmin = len(heme)\n",
101
+    "    lmid = len(nucleotide)\n",
102
+    "    lmax = len(control)\n",
103
+    "    tot_size = lmin + lmid + lmax\n",
104
+    "    data_onehot = np.ndarray(shape=(size, 14, 32, 32, 32)) # initializing empty array\n",
105
+    "\n",
106
+    "    np.random.seed(9001)\n",
107
+    "    indices = np.random.permutation(tot_size)\n",
108
+    "    indices = indices[:size]\n",
109
+    "    output = np.ndarray(shape=(size, 3)) # softmax 3, {steroid=1, heme=1, nucleotide=1}\n",
110
+    "\n",
111
+    "    n = -1\n",
112
+    "    for i in indices:\n",
113
+    "        n += 1\n",
114
+    "        if i < lmin:\n",
115
+    "            data_onehot[n,] = np.load(\"deepdrug3d_voxel_data/\"+heme[i]+\".npy\")\n",
116
+    "            output[n,] = [1,0,0]\n",
117
+    "        elif i > lmin and i < (lmin + lmid):\n",
118
+    "            data_onehot[n,] = np.load(\"deepdrug3d_voxel_data/\"+nucleotide[i - lmin]+\".npy\")\n",
119
+    "            output[n,] = [0,1,0]\n",
120
+    "        else:\n",
121
+    "            data_onehot[n,] = np.load(\"deepdrug3d_voxel_data/\"+control[i - (lmin+lmid) - 1]+\".npy\")\n",
122
+    "            output[n,] = [0,0,1]\n",
123
+    "    \n",
124
+    "    return (data_onehot, output)"
125
+   ]
126
+  },
127
+  {
128
+   "cell_type": "markdown",
129
+   "metadata": {},
130
+   "source": [
131
+    "### Defining different model to test and compare"
105
    ]
132
    ]
106
   },
133
   },
107
   {
134
   {
108
    "cell_type": "code",
135
    "cell_type": "code",
109
-   "execution_count": 4,
136
+   "execution_count": null,
110
    "metadata": {},
137
    "metadata": {},
111
    "outputs": [],
138
    "outputs": [],
112
    "source": [
139
    "source": [
113
-    "n = -1\n",
114
-    "for i in indices:\n",
115
-    "    n += 1\n",
116
-    "    if i < lmin:\n",
117
-    "        data_onehot[n,] = np.load(\"deepdrug3d_voxel_data/\"+steroid[i]+\".npy\")\n",
118
-    "        output[n,] = [1,0,0]\n",
119
-    "    elif i > lmin and i < (lmin + lmid):\n",
120
-    "        data_onehot[n,] = np.load(\"deepdrug3d_voxel_data/\"+heme[i - lmin]+\".npy\")\n",
121
-    "        output[n,] = [0,1,0]\n",
122
-    "    else:\n",
123
-    "        data_onehot[n,] = np.load(\"deepdrug3d_voxel_data/\"+nucleotide[i - (lmin+lmid) - 1]+\".npy\")\n",
124
-    "        output[n,] = [0,0,1]"
140
+    "def model_heavy(): # créer un objet modèle\n",
141
+    "    \"\"\"\n",
142
+    "    Return a simple sequentiel model\n",
143
+    "    \n",
144
+    "    Returns :\n",
145
+    "        - model : keras.Model\n",
146
+    "    \"\"\"\n",
147
+    "    inputs = Input(shape=(14,32,32,32))\n",
148
+    "    conv_1 = Conv3D(64, (28, 28, 28), padding=\"same\", activation=\"relu\", kernel_initializer=\"he_normal\")(inputs)\n",
149
+    "    conv_2 = Conv3D(64, (26, 26, 26), padding=\"same\", activation=\"relu\", kernel_initializer=\"he_normal\")(conv_1)\n",
150
+    "    drop_1 = Dropout(0.2)(conv_2)\n",
151
+    "    maxpool = MaxPooling3D()(drop_1)\n",
152
+    "    drop_2 = Dropout(0.4)(maxpool)\n",
153
+    "    dense = Dense(512)(drop_2)\n",
154
+    "    drop_3 = Dropout(0.4)(dense)\n",
155
+    "    flatters = Flatten()(drop_3)\n",
156
+    "    #output = TimeDistributed(Dense(3, activation='softmax'))(drop_3)\n",
157
+    "    output = Dense(3, activation='softmax')(flatters)\n",
158
+    "    model = Model(inputs=inputs, outputs=output)\n",
159
+    "    my_opt = optimizers.Adam(learning_rate=0.000001, beta_1=0.9, beta_2=0.999, amsgrad=False)\n",
160
+    "    print(model.summary)\n",
161
+    "    model.compile(optimizer=my_opt, loss=\"categorical_crossentropy\",\n",
162
+    "                  metrics=[\"accuracy\"])\n",
163
+    "    return model"
125
    ]
164
    ]
126
   },
165
   },
127
   {
166
   {
128
    "cell_type": "code",
167
    "cell_type": "code",
129
-   "execution_count": 5,
168
+   "execution_count": 8,
130
    "metadata": {},
169
    "metadata": {},
131
    "outputs": [],
170
    "outputs": [],
132
    "source": [
171
    "source": [
133
-    "X_train = data_onehot[0:1664,]\n",
134
-    "Y_train = output[0:1664,]\n",
135
-    "X_test = data_onehot[1664:,]\n",
136
-    "Y_test = output[1664:,]"
172
+    "def model_new(): # créer un objet modèle\n",
173
+    "    \"\"\"\n",
174
+    "    Return a simple sequentiel model\n",
175
+    "    \n",
176
+    "    Returns :\n",
177
+    "        - model : keras.Model\n",
178
+    "    \"\"\"\n",
179
+    "    inputs = Input(shape=(14,32,32,32))\n",
180
+    "    conv_1 = Convolution3D(filters=64, kernel_size=5, padding=\"valid\", data_format='channels_first')(inputs)\n",
181
+    "    activation_1 = LeakyReLU(alpha = 0.1)(conv_1)\n",
182
+    "    drop_1 = Dropout(0.2)(activation_1)\n",
183
+    "    conv_2 = Convolution3D(filters=64, kernel_size=3, padding=\"valid\", data_format='channels_first')(drop_1)\n",
184
+    "    activation_2 = LeakyReLU(alpha = 0.1)(conv_2)\n",
185
+    "    maxpool = MaxPooling3D(pool_size=(2,2,2),\n",
186
+    "                            strides=None,\n",
187
+    "                            padding='valid',\n",
188
+    "                            data_format='channels_first')(activation_2)\n",
189
+    "    drop_2 = Dropout(0.4)(maxpool)\n",
190
+    "    flatters = Flatten()(drop_2)\n",
191
+    "    dense = Dense(128)(flatters)\n",
192
+    "    activation_3 = LeakyReLU(alpha = 0.1)(dense)\n",
193
+    "    drop_3 = Dropout(0.4)(activation_3)\n",
194
+    "    output = Dense(3, activation='softmax')(drop_3)\n",
195
+    "    model = Model(inputs=inputs, outputs=output)\n",
196
+    "    my_opt = optimizers.Adam(learning_rate=0.000001, beta_1=0.9, beta_2=0.999, amsgrad=False)\n",
197
+    "    print(model.summary)\n",
198
+    "    model.compile(optimizer=my_opt, loss=\"categorical_crossentropy\",\n",
199
+    "                  metrics=[\"accuracy\"])\n",
200
+    "    return model"
137
    ]
201
    ]
138
   },
202
   },
139
   {
203
   {
140
    "cell_type": "code",
204
    "cell_type": "code",
141
-   "execution_count": 14,
142
-   "metadata": {},
143
-   "outputs": [
144
-    {
145
-     "data": {
146
-      "text/plain": [
147
-       "(1, 14, 32, 32, 32)"
148
-      ]
149
-     },
150
-     "execution_count": 14,
151
-     "metadata": {},
152
-     "output_type": "execute_result"
153
-    }
154
-   ],
155
-   "source": [
156
-    "def model_sequential(): # créer un objet modèle\n",
205
+   "execution_count": null,
206
+   "metadata": {},
207
+   "outputs": [],
208
+   "source": [
209
+    "def model_light(): # créer un objet modèle\n",
157
     "    \"\"\"\n",
210
     "    \"\"\"\n",
158
     "    Return a simple sequentiel model\n",
211
     "    Return a simple sequentiel model\n",
159
     "    \n",
212
     "    \n",
160
     "    Returns :\n",
213
     "    Returns :\n",
161
     "        - model : keras.Model\n",
214
     "        - model : keras.Model\n",
162
     "    \"\"\"\n",
215
     "    \"\"\"\n",
163
-    "    inputs = Input(shape=(32,32,32,14)) # 759 aa, 21 car onehot\n",
164
-    "    conv_1 = Conv3D(64, (28, 28, 28), padding=\"same\", activation=\"LeakyReLU\",\n",
165
-    "                        kernel_initializer=\"he_normal\")(inputs)\n",
166
-    "    conv_2 = Conv3D(64, (26, 26, 26), padding=\"same\", activation=\"LeakyReLU\",\n",
167
-    "                        kernel_initializer=\"he_normal\")(conv_1)\n",
216
+    "    inputs = Input(shape=(14,32,32,32))\n",
217
+    "    conv_1 = Conv3D(32, (28, 28, 28), padding=\"same\", activation=\"relu\", kernel_initializer=\"he_normal\")(inputs)\n",
218
+    "    conv_2 = Conv3D(64, (26, 26, 26), padding=\"same\", activation=\"relu\", kernel_initializer=\"he_normal\")(conv_1)\n",
168
     "    drop_1 = Dropout(0.2)(conv_2)\n",
219
     "    drop_1 = Dropout(0.2)(conv_2)\n",
169
     "    maxpool = MaxPooling3D()(drop_1)\n",
220
     "    maxpool = MaxPooling3D()(drop_1)\n",
170
-    "    drop_2 = Dropout(0.4)(maxpool)\n",
171
-    "    dense = Dense(512)(drop_2)\n",
172
-    "    drop_3 = Dropout(0.4)(dense)\n",
173
-    "    output = TimeDistributed(Dense(3, activation='softmax'))(drop_3)\n",
221
+    "    drop_2 = Dropout(0.3)(maxpool)\n",
222
+    "    maxpool_2 = MaxPooling3D()(drop_2)\n",
223
+    "    drop_3 = Dropout(0.3)(maxpool_2)\n",
224
+    "    dense = Dense(256)(drop_3)\n",
225
+    "    drop_4 = Dropout(0.4)(dense)\n",
226
+    "    flatters = Flatten()(drop_4)\n",
227
+    "    output = Dense(3, activation='softmax')(flatters)\n",
174
     "    model = Model(inputs=inputs, outputs=output)\n",
228
     "    model = Model(inputs=inputs, outputs=output)\n",
175
     "    my_opt = optimizers.Adam(learning_rate=0.000001, beta_1=0.9, beta_2=0.999, amsgrad=False)\n",
229
     "    my_opt = optimizers.Adam(learning_rate=0.000001, beta_1=0.9, beta_2=0.999, amsgrad=False)\n",
176
     "    print(model.summary)\n",
230
     "    print(model.summary)\n",
179
     "    return model"
233
     "    return model"
180
    ]
234
    ]
181
   },
235
   },
236
+  {
237
+   "cell_type": "markdown",
238
+   "metadata": {},
239
+   "source": [
240
+    "## Create pocket lists\n",
241
+    "4 lists are created :\n",
242
+    "  + control\n",
243
+    "  + steroid\n",
244
+    "  + heme\n",
245
+    "  + nucleotide"
246
+   ]
247
+  },
248
+  {
249
+   "cell_type": "code",
250
+   "execution_count": null,
251
+   "metadata": {},
252
+   "outputs": [],
253
+   "source": [
254
+    "data = in_out_lists(1400)\n",
255
+    "pockets = np.cumsum(data[1], axis=0)[-1]"
256
+   ]
257
+  },
258
+  {
259
+   "cell_type": "code",
260
+   "execution_count": null,
261
+   "metadata": {},
262
+   "outputs": [],
263
+   "source": [
264
+    "print(\"with random seed=9001 and a 1400 pockets dataset the rates are:\\n\\\n",
265
+    "      {} heme, {} nucleotide, {} control\\n\\\n",
266
+    "      Total avaible dataset are composed of the following proportions:\\n\\\n",
267
+    "      {} heme, {} nucleotide, {} control\".format(pockets[0]/1400, pockets[1]/1400,pockets[2]/1400,\n",
268
+    "                                                0.145, 0.380, 0.475))"
269
+   ]
270
+  },
182
   {
271
   {
183
    "cell_type": "code",
272
    "cell_type": "code",
184
    "execution_count": null,
273
    "execution_count": null,
185
    "metadata": {},
274
    "metadata": {},
186
    "outputs": [],
275
    "outputs": [],
187
-   "source": []
276
+   "source": [
277
+    "data_onehot = data[0]\n",
278
+    "output = data[1]\n",
279
+    "X_train = data_onehot[0:1000,]\n",
280
+    "Y_train = output[0:1000,]\n",
281
+    "X_test = data_onehot[1000:,]\n",
282
+    "Y_test = output[1000:,]"
283
+   ]
284
+  },
285
+  {
286
+   "cell_type": "code",
287
+   "execution_count": null,
288
+   "metadata": {},
289
+   "outputs": [],
290
+   "source": [
291
+    "my_model = model_new()"
292
+   ]
293
+  },
294
+  {
295
+   "cell_type": "code",
296
+   "execution_count": null,
297
+   "metadata": {},
298
+   "outputs": [],
299
+   "source": [
300
+    "tf.test.is_gpu_available()\n",
301
+    "#my_model.fit(X_train, Y_train, epochs=50, batch_size=30)"
302
+   ]
303
+  },
304
+  {
305
+   "cell_type": "code",
306
+   "execution_count": null,
307
+   "metadata": {},
308
+   "outputs": [],
309
+   "source": [
310
+    "history_mild_2mp = mild_model.fit(X_train, Y_train, validation_data=(X_test, Y_test), epochs=30, batch_size=32)\n",
311
+    "my_model.save('new_model_e30_b32_t1000.h5')"
312
+   ]
313
+  },
314
+  {
315
+   "cell_type": "code",
316
+   "execution_count": null,
317
+   "metadata": {},
318
+   "outputs": [],
319
+   "source": [
320
+    "#predictions=prediction_history()"
321
+   ]
188
   }
322
   }
189
  ],
323
  ],
190
  "metadata": {
324
  "metadata": {

+ 250 - 0
DeepDrug.py View File

1
+#!/usr/bin/env python
2
+# coding: utf-8
3
+
4
+# # DeepDrug3D
5
+
6
+# ## Importing library
7
+
8
+# In[6]:
9
+
10
+
11
+import numpy as np
12
+import tensorflow as tf
13
+from sklearn.preprocessing import LabelEncoder
14
+from keras.models import Sequential
15
+from keras import optimizers, callbacks
16
+from keras.layers import Dense, Flatten, TimeDistributed, Dropout
17
+from keras import Input, Model
18
+from keras.layers import add, Activation
19
+from keras.layers.advanced_activations import LeakyReLU
20
+#from keras.utils import plot_model  # Needs pydot.
21
+from keras.layers import Convolution3D, MaxPooling3D
22
+
23
+
24
+# ### used to store model prediction in order to plot roc curve
25
+
26
+# In[ ]:
27
+
28
+
29
+class prediction_history(callbacks.Callback):
30
+    def __init__(self):
31
+        self.predhis = []
32
+    def on_epoch_end(self, epoch, logs={}):
33
+        self.predhis.append(model.predict(predictor_train))
34
+
35
+
36
+# ### Creating input and ouputs
37
+
38
+# In[ ]:
39
+
40
+
41
+def in_out_lists(size=1000):
42
+    """
43
+    returns a tuple of array used as input and output for the model
44
+    Arguments:
45
+        - size, int: default 1000, size of the lists to be created
46
+        
47
+    Returns:
48
+        - tuple (data_onehot, output):
49
+            -data_onehot, ndarray: containing one-hot encoded pockets
50
+            -output, ndarray: containing size-3 vectors for classification
51
+    """
52
+    with open("control.list", "r") as filin:
53
+        control = filin.read()
54
+        control = control.split("\n")
55
+        control.pop()
56
+
57
+    with open("steroid.list", "r") as filin:
58
+        steroid = filin.read()
59
+        steroid = steroid.split("\n")
60
+        steroid.pop()
61
+
62
+    with open("heme.list", "r") as filin:
63
+        heme = filin.read()
64
+        heme = heme.split("\n")
65
+        heme.pop()
66
+
67
+    with open("nucleotide.list", "r") as filin:
68
+        nucleotide = filin.read()
69
+        nucleotide = nucleotide.split("\n")
70
+        nucleotide.pop()
71
+    
72
+    lmin = len(heme)
73
+    lmid = len(nucleotide)
74
+    lmax = len(control)
75
+    tot_size = lmin + lmid + lmax
76
+    data_onehot = np.ndarray(shape=(size, 14, 32, 32, 32)) # initializing empty array
77
+
78
+    np.random.seed(9001)
79
+    indices = np.random.permutation(tot_size)
80
+    indices = indices[:size]
81
+    output = np.ndarray(shape=(size, 3)) # softmax 3, {steroid=1, heme=1, nucleotide=1}
82
+
83
+    n = -1
84
+    for i in indices:
85
+        n += 1
86
+        if i < lmin:
87
+            data_onehot[n,] = np.load("deepdrug3d_voxel_data/"+heme[i]+".npy")
88
+            output[n,] = [1,0,0]
89
+        elif i > lmin and i < (lmin + lmid):
90
+            data_onehot[n,] = np.load("deepdrug3d_voxel_data/"+nucleotide[i - lmin]+".npy")
91
+            output[n,] = [0,1,0]
92
+        else:
93
+            data_onehot[n,] = np.load("deepdrug3d_voxel_data/"+control[i - (lmin+lmid) - 1]+".npy")
94
+            output[n,] = [0,0,1]
95
+    
96
+    return (data_onehot, output)
97
+
98
+
99
+# ### Defining different model to test and compare
100
+
101
+# In[ ]:
102
+
103
+
104
+def model_heavy(): # créer un objet modèle
105
+    """
106
+    Return a simple sequentiel model
107
+    
108
+    Returns :
109
+        - model : keras.Model
110
+    """
111
+    inputs = Input(shape=(14,32,32,32))
112
+    conv_1 = Conv3D(64, (28, 28, 28), padding="same", activation="relu", kernel_initializer="he_normal")(inputs)
113
+    conv_2 = Conv3D(64, (26, 26, 26), padding="same", activation="relu", kernel_initializer="he_normal")(conv_1)
114
+    drop_1 = Dropout(0.2)(conv_2)
115
+    maxpool = MaxPooling3D()(drop_1)
116
+    drop_2 = Dropout(0.4)(maxpool)
117
+    dense = Dense(512)(drop_2)
118
+    drop_3 = Dropout(0.4)(dense)
119
+    flatters = Flatten()(drop_3)
120
+    #output = TimeDistributed(Dense(3, activation='softmax'))(drop_3)
121
+    output = Dense(3, activation='softmax')(flatters)
122
+    model = Model(inputs=inputs, outputs=output)
123
+    my_opt = optimizers.Adam(learning_rate=0.000001, beta_1=0.9, beta_2=0.999, amsgrad=False)
124
+    print(model.summary)
125
+    model.compile(optimizer=my_opt, loss="categorical_crossentropy",
126
+                  metrics=["accuracy"])
127
+    return model
128
+
129
+
130
+# In[8]:
131
+
132
+
133
+def model_new(): # créer un objet modèle
134
+    """
135
+    Return a simple sequentiel model
136
+    
137
+    Returns :
138
+        - model : keras.Model
139
+    """
140
+    inputs = Input(shape=(14,32,32,32))
141
+    conv_1 = Convolution3D(filters=64, kernel_size=5, padding="valid", data_format='channels_first')(inputs)
142
+    activation_1 = LeakyReLU(alpha = 0.1)(conv_1)
143
+    drop_1 = Dropout(0.2)(activation_1)
144
+    conv_2 = Convolution3D(filters=64, kernel_size=3, padding="valid", data_format='channels_first')(drop_1)
145
+    activation_2 = LeakyReLU(alpha = 0.1)(conv_2)
146
+    maxpool = MaxPooling3D(pool_size=(2,2,2),
147
+                            strides=None,
148
+                            padding='valid',
149
+                            data_format='channels_first')(activation_2)
150
+    drop_2 = Dropout(0.4)(maxpool)
151
+    flatters = Flatten()(drop_2)
152
+    dense = Dense(128)(flatters)
153
+    activation_3 = LeakyReLU(alpha = 0.1)(dense)
154
+    drop_3 = Dropout(0.4)(activation_3)
155
+    output = Dense(3, activation='softmax')(drop_3)
156
+    model = Model(inputs=inputs, outputs=output)
157
+    my_opt = optimizers.Adam(learning_rate=0.000001, beta_1=0.9, beta_2=0.999, amsgrad=False)
158
+    print(model.summary)
159
+    model.compile(optimizer=my_opt, loss="categorical_crossentropy",
160
+                  metrics=["accuracy"])
161
+    return model
162
+
163
+
164
+# In[ ]:
165
+
166
+
167
+def model_light(): # créer un objet modèle
168
+    """
169
+    Return a simple sequentiel model
170
+    
171
+    Returns :
172
+        - model : keras.Model
173
+    """
174
+    inputs = Input(shape=(14,32,32,32))
175
+    conv_1 = Conv3D(32, (28, 28, 28), padding="same", activation="relu", kernel_initializer="he_normal")(inputs)
176
+    conv_2 = Conv3D(64, (26, 26, 26), padding="same", activation="relu", kernel_initializer="he_normal")(conv_1)
177
+    drop_1 = Dropout(0.2)(conv_2)
178
+    maxpool = MaxPooling3D()(drop_1)
179
+    drop_2 = Dropout(0.3)(maxpool)
180
+    maxpool_2 = MaxPooling3D()(drop_2)
181
+    drop_3 = Dropout(0.3)(maxpool_2)
182
+    dense = Dense(256)(drop_3)
183
+    drop_4 = Dropout(0.4)(dense)
184
+    flatters = Flatten()(drop_4)
185
+    output = Dense(3, activation='softmax')(flatters)
186
+    model = Model(inputs=inputs, outputs=output)
187
+    my_opt = optimizers.Adam(learning_rate=0.000001, beta_1=0.9, beta_2=0.999, amsgrad=False)
188
+    print(model.summary)
189
+    model.compile(optimizer=my_opt, loss="categorical_crossentropy",
190
+                  metrics=["accuracy"])
191
+    return model
192
+
193
+
194
+# ## Create pocket lists
195
+# 4 lists are created :
196
+#   + control
197
+#   + steroid
198
+#   + heme
199
+#   + nucleotide
200
+
201
+# In[ ]:
202
+
203
+
204
+data = in_out_lists(1400)
205
+pockets = np.cumsum(data[1], axis=0)[-1]
206
+
207
+
208
+# In[ ]:
209
+
210
+
211
+print("with random seed=9001 and a 1400 pockets dataset the rates are:\n      {} heme, {} nucleotide, {} control\n      Total avaible dataset are composed of the following proportions:\n      {} heme, {} nucleotide, {} control".format(pockets[0]/1400, pockets[1]/1400,pockets[2]/1400,
212
+                                                0.145, 0.380, 0.475))
213
+
214
+
215
+# In[ ]:
216
+
217
+
218
+data_onehot = data[0]
219
+output = data[1]
220
+X_train = data_onehot[0:1000,]
221
+Y_train = output[0:1000,]
222
+X_test = data_onehot[1000:,]
223
+Y_test = output[1000:,]
224
+
225
+
226
+# In[ ]:
227
+
228
+
229
+my_model = model_new()
230
+
231
+
232
+# In[ ]:
233
+
234
+
235
+tf.test.is_gpu_available()
236
+#my_model.fit(X_train, Y_train, epochs=50, batch_size=30)
237
+
238
+
239
+# In[ ]:
240
+
241
+
242
+history_mild_2mp = mild_model.fit(X_train, Y_train, validation_data=(X_test, Y_test), epochs=30, batch_size=32)
243
+my_model.save('new_model_e30_b32_t1000.h5')
244
+
245
+
246
+# In[ ]:
247
+
248
+
249
+#predictions=prediction_history()
250
+