2 Commits cc992f7f2d ... bb9a7f549e

Autor SHA1 Mensagem Data
  Nicolasticot bb9a7f549e added new model, with different hyperparameter from dd3d git 5 anos atrás
  Nicolasticot 860e797732 changed mild model, added maxpooling 5 anos atrás
2 arquivos alterados com 490 adições e 106 exclusões
  1. 240 106
      DeepDrug.ipynb
  2. 250 0
      DeepDrug.py

+ 240 - 106
DeepDrug.ipynb Ver arquivo

@@ -7,80 +7,50 @@
7 7
     "# DeepDrug3D"
8 8
    ]
9 9
   },
10
+  {
11
+   "cell_type": "markdown",
12
+   "metadata": {},
13
+   "source": [
14
+    "## Importing library"
15
+   ]
16
+  },
10 17
   {
11 18
    "cell_type": "code",
12
-   "execution_count": 1,
13
-   "metadata": {},
14
-   "outputs": [
15
-    {
16
-     "name": "stderr",
17
-     "output_type": "stream",
18
-     "text": [
19
-      "Using TensorFlow backend.\n"
20
-     ]
21
-    }
22
-   ],
19
+   "execution_count": 6,
20
+   "metadata": {},
21
+   "outputs": [],
23 22
    "source": [
24 23
     "import numpy as np\n",
25
-    "\n",
24
+    "import tensorflow as tf\n",
26 25
     "from sklearn.preprocessing import LabelEncoder\n",
27 26
     "from keras.models import Sequential\n",
28
-    "from keras import optimizers\n",
29
-    "from keras.layers import Dense, Flatten, TimeDistributedn, Dropout\n",
27
+    "from keras import optimizers, callbacks\n",
28
+    "from keras.layers import Dense, Flatten, TimeDistributed, Dropout\n",
30 29
     "from keras import Input, Model\n",
31 30
     "from keras.layers import add, Activation\n",
31
+    "from keras.layers.advanced_activations import LeakyReLU\n",
32 32
     "#from keras.utils import plot_model  # Needs pydot.\n",
33
-    "from keras.layers import Conv3D, MaxPooling3D"
33
+    "from keras.layers import Convolution3D, MaxPooling3D"
34 34
    ]
35 35
   },
36 36
   {
37 37
    "cell_type": "markdown",
38 38
    "metadata": {},
39 39
    "source": [
40
-    "## Create pocket lists\n",
41
-    "4 pockets are created :\n",
42
-    "  + control\n",
43
-    "  + steroid\n",
44
-    "  + heme\n",
45
-    "  + nucleotide"
40
+    "### used to store model prediction in order to plot roc curve"
46 41
    ]
47 42
   },
48 43
   {
49 44
    "cell_type": "code",
50
-   "execution_count": 2,
51
-   "metadata": {},
52
-   "outputs": [
53
-    {
54
-     "data": {
55
-      "text/plain": [
56
-       "''"
57
-      ]
58
-     },
59
-     "execution_count": 2,
60
-     "metadata": {},
61
-     "output_type": "execute_result"
62
-    }
63
-   ],
64
-   "source": [
65
-    "with open(\"control.list\", \"r\") as filin:\n",
66
-    "    control = filin.read()\n",
67
-    "control = control.split(\"\\n\")\n",
68
-    "control.pop()\n",
69
-    "\n",
70
-    "with open(\"steroid.list\", \"r\") as filin:\n",
71
-    "    steroid = filin.read()\n",
72
-    "steroid = steroid.split(\"\\n\")\n",
73
-    "steroid.pop()\n",
74
-    "\n",
75
-    "with open(\"heme.list\", \"r\") as filin:\n",
76
-    "    heme = filin.read()\n",
77
-    "heme = heme.split(\"\\n\")\n",
78
-    "heme.pop()\n",
79
-    "\n",
80
-    "with open(\"nucleotide.list\", \"r\") as filin:\n",
81
-    "    nucleotide = filin.read()\n",
82
-    "nucleotide = nucleotide.split(\"\\n\")\n",
83
-    "nucleotide.pop()"
45
+   "execution_count": null,
46
+   "metadata": {},
47
+   "outputs": [],
48
+   "source": [
49
+    "class prediction_history(callbacks.Callback):\n",
50
+    "    def __init__(self):\n",
51
+    "        self.predhis = []\n",
52
+    "    def on_epoch_end(self, epoch, logs={}):\n",
53
+    "        self.predhis.append(model.predict(predictor_train))"
84 54
    ]
85 55
   },
86 56
   {
@@ -92,85 +62,169 @@
92 62
   },
93 63
   {
94 64
    "cell_type": "code",
95
-   "execution_count": 3,
65
+   "execution_count": null,
96 66
    "metadata": {},
97 67
    "outputs": [],
98 68
    "source": [
99
-    "data_onehot = np.ndarray(shape=(2219, 14, 32, 32, 32)) # initializing empty array\n",
100
-    "indices = np.random.permutation(2219)\n",
101
-    "output = np.ndarray(shape=(2219, 3)) # softmax 3, {steroid=1, heme=1, nucleotide=1}\n",
102
-    "lmin = len(steroid)\n",
103
-    "lmid = len(heme)\n",
104
-    "lmax = len(nucleotide)"
69
+    "def in_out_lists(size=1000):\n",
70
+    "    \"\"\"\n",
71
+    "    returns a tuple of array used as input and output for the model\n",
72
+    "    Arguments:\n",
73
+    "        - size, int: default 1000, size of the lists to be created\n",
74
+    "        \n",
75
+    "    Returns:\n",
76
+    "        - tuple (data_onehot, output):\n",
77
+    "            -data_onehot, ndarray: containing one-hot encoded pockets\n",
78
+    "            -output, ndarray: containing size-3 vectors for classification\n",
79
+    "    \"\"\"\n",
80
+    "    with open(\"control.list\", \"r\") as filin:\n",
81
+    "        control = filin.read()\n",
82
+    "        control = control.split(\"\\n\")\n",
83
+    "        control.pop()\n",
84
+    "\n",
85
+    "    with open(\"steroid.list\", \"r\") as filin:\n",
86
+    "        steroid = filin.read()\n",
87
+    "        steroid = steroid.split(\"\\n\")\n",
88
+    "        steroid.pop()\n",
89
+    "\n",
90
+    "    with open(\"heme.list\", \"r\") as filin:\n",
91
+    "        heme = filin.read()\n",
92
+    "        heme = heme.split(\"\\n\")\n",
93
+    "        heme.pop()\n",
94
+    "\n",
95
+    "    with open(\"nucleotide.list\", \"r\") as filin:\n",
96
+    "        nucleotide = filin.read()\n",
97
+    "        nucleotide = nucleotide.split(\"\\n\")\n",
98
+    "        nucleotide.pop()\n",
99
+    "    \n",
100
+    "    lmin = len(heme)\n",
101
+    "    lmid = len(nucleotide)\n",
102
+    "    lmax = len(control)\n",
103
+    "    tot_size = lmin + lmid + lmax\n",
104
+    "    data_onehot = np.ndarray(shape=(size, 14, 32, 32, 32)) # initializing empty array\n",
105
+    "\n",
106
+    "    np.random.seed(9001)\n",
107
+    "    indices = np.random.permutation(tot_size)\n",
108
+    "    indices = indices[:size]\n",
109
+    "    output = np.ndarray(shape=(size, 3)) # softmax 3, {steroid=1, heme=1, nucleotide=1}\n",
110
+    "\n",
111
+    "    n = -1\n",
112
+    "    for i in indices:\n",
113
+    "        n += 1\n",
114
+    "        if i < lmin:\n",
115
+    "            data_onehot[n,] = np.load(\"deepdrug3d_voxel_data/\"+heme[i]+\".npy\")\n",
116
+    "            output[n,] = [1,0,0]\n",
117
+    "        elif i > lmin and i < (lmin + lmid):\n",
118
+    "            data_onehot[n,] = np.load(\"deepdrug3d_voxel_data/\"+nucleotide[i - lmin]+\".npy\")\n",
119
+    "            output[n,] = [0,1,0]\n",
120
+    "        else:\n",
121
+    "            data_onehot[n,] = np.load(\"deepdrug3d_voxel_data/\"+control[i - (lmin+lmid) - 1]+\".npy\")\n",
122
+    "            output[n,] = [0,0,1]\n",
123
+    "    \n",
124
+    "    return (data_onehot, output)"
125
+   ]
126
+  },
127
+  {
128
+   "cell_type": "markdown",
129
+   "metadata": {},
130
+   "source": [
131
+    "### Defining different model to test and compare"
105 132
    ]
106 133
   },
107 134
   {
108 135
    "cell_type": "code",
109
-   "execution_count": 4,
136
+   "execution_count": null,
110 137
    "metadata": {},
111 138
    "outputs": [],
112 139
    "source": [
113
-    "n = -1\n",
114
-    "for i in indices:\n",
115
-    "    n += 1\n",
116
-    "    if i < lmin:\n",
117
-    "        data_onehot[n,] = np.load(\"deepdrug3d_voxel_data/\"+steroid[i]+\".npy\")\n",
118
-    "        output[n,] = [1,0,0]\n",
119
-    "    elif i > lmin and i < (lmin + lmid):\n",
120
-    "        data_onehot[n,] = np.load(\"deepdrug3d_voxel_data/\"+heme[i - lmin]+\".npy\")\n",
121
-    "        output[n,] = [0,1,0]\n",
122
-    "    else:\n",
123
-    "        data_onehot[n,] = np.load(\"deepdrug3d_voxel_data/\"+nucleotide[i - (lmin+lmid) - 1]+\".npy\")\n",
124
-    "        output[n,] = [0,0,1]"
140
+    "def model_heavy(): # créer un objet modèle\n",
141
+    "    \"\"\"\n",
142
+    "    Return a simple sequentiel model\n",
143
+    "    \n",
144
+    "    Returns :\n",
145
+    "        - model : keras.Model\n",
146
+    "    \"\"\"\n",
147
+    "    inputs = Input(shape=(14,32,32,32))\n",
148
+    "    conv_1 = Conv3D(64, (28, 28, 28), padding=\"same\", activation=\"relu\", kernel_initializer=\"he_normal\")(inputs)\n",
149
+    "    conv_2 = Conv3D(64, (26, 26, 26), padding=\"same\", activation=\"relu\", kernel_initializer=\"he_normal\")(conv_1)\n",
150
+    "    drop_1 = Dropout(0.2)(conv_2)\n",
151
+    "    maxpool = MaxPooling3D()(drop_1)\n",
152
+    "    drop_2 = Dropout(0.4)(maxpool)\n",
153
+    "    dense = Dense(512)(drop_2)\n",
154
+    "    drop_3 = Dropout(0.4)(dense)\n",
155
+    "    flatters = Flatten()(drop_3)\n",
156
+    "    #output = TimeDistributed(Dense(3, activation='softmax'))(drop_3)\n",
157
+    "    output = Dense(3, activation='softmax')(flatters)\n",
158
+    "    model = Model(inputs=inputs, outputs=output)\n",
159
+    "    my_opt = optimizers.Adam(learning_rate=0.000001, beta_1=0.9, beta_2=0.999, amsgrad=False)\n",
160
+    "    print(model.summary)\n",
161
+    "    model.compile(optimizer=my_opt, loss=\"categorical_crossentropy\",\n",
162
+    "                  metrics=[\"accuracy\"])\n",
163
+    "    return model"
125 164
    ]
126 165
   },
127 166
   {
128 167
    "cell_type": "code",
129
-   "execution_count": 5,
168
+   "execution_count": 8,
130 169
    "metadata": {},
131 170
    "outputs": [],
132 171
    "source": [
133
-    "X_train = data_onehot[0:1664,]\n",
134
-    "Y_train = output[0:1664,]\n",
135
-    "X_test = data_onehot[1664:,]\n",
136
-    "Y_test = output[1664:,]"
172
+    "def model_new(): # créer un objet modèle\n",
173
+    "    \"\"\"\n",
174
+    "    Return a simple sequentiel model\n",
175
+    "    \n",
176
+    "    Returns :\n",
177
+    "        - model : keras.Model\n",
178
+    "    \"\"\"\n",
179
+    "    inputs = Input(shape=(14,32,32,32))\n",
180
+    "    conv_1 = Convolution3D(filters=64, kernel_size=5, padding=\"valid\", data_format='channels_first')(inputs)\n",
181
+    "    activation_1 = LeakyReLU(alpha = 0.1)(conv_1)\n",
182
+    "    drop_1 = Dropout(0.2)(activation_1)\n",
183
+    "    conv_2 = Convolution3D(filters=64, kernel_size=3, padding=\"valid\", data_format='channels_first')(drop_1)\n",
184
+    "    activation_2 = LeakyReLU(alpha = 0.1)(conv_2)\n",
185
+    "    maxpool = MaxPooling3D(pool_size=(2,2,2),\n",
186
+    "                            strides=None,\n",
187
+    "                            padding='valid',\n",
188
+    "                            data_format='channels_first')(activation_2)\n",
189
+    "    drop_2 = Dropout(0.4)(maxpool)\n",
190
+    "    flatters = Flatten()(drop_2)\n",
191
+    "    dense = Dense(128)(flatters)\n",
192
+    "    activation_3 = LeakyReLU(alpha = 0.1)(dense)\n",
193
+    "    drop_3 = Dropout(0.4)(activation_3)\n",
194
+    "    output = Dense(3, activation='softmax')(drop_3)\n",
195
+    "    model = Model(inputs=inputs, outputs=output)\n",
196
+    "    my_opt = optimizers.Adam(learning_rate=0.000001, beta_1=0.9, beta_2=0.999, amsgrad=False)\n",
197
+    "    print(model.summary)\n",
198
+    "    model.compile(optimizer=my_opt, loss=\"categorical_crossentropy\",\n",
199
+    "                  metrics=[\"accuracy\"])\n",
200
+    "    return model"
137 201
    ]
138 202
   },
139 203
   {
140 204
    "cell_type": "code",
141
-   "execution_count": 14,
142
-   "metadata": {},
143
-   "outputs": [
144
-    {
145
-     "data": {
146
-      "text/plain": [
147
-       "(1, 14, 32, 32, 32)"
148
-      ]
149
-     },
150
-     "execution_count": 14,
151
-     "metadata": {},
152
-     "output_type": "execute_result"
153
-    }
154
-   ],
155
-   "source": [
156
-    "def model_sequential(): # créer un objet modèle\n",
205
+   "execution_count": null,
206
+   "metadata": {},
207
+   "outputs": [],
208
+   "source": [
209
+    "def model_light(): # créer un objet modèle\n",
157 210
     "    \"\"\"\n",
158 211
     "    Return a simple sequentiel model\n",
159 212
     "    \n",
160 213
     "    Returns :\n",
161 214
     "        - model : keras.Model\n",
162 215
     "    \"\"\"\n",
163
-    "    inputs = Input(shape=(32,32,32,14)) # 759 aa, 21 car onehot\n",
164
-    "    conv_1 = Conv3D(64, (28, 28, 28), padding=\"same\", activation=\"LeakyReLU\",\n",
165
-    "                        kernel_initializer=\"he_normal\")(inputs)\n",
166
-    "    conv_2 = Conv3D(64, (26, 26, 26), padding=\"same\", activation=\"LeakyReLU\",\n",
167
-    "                        kernel_initializer=\"he_normal\")(conv_1)\n",
216
+    "    inputs = Input(shape=(14,32,32,32))\n",
217
+    "    conv_1 = Conv3D(32, (28, 28, 28), padding=\"same\", activation=\"relu\", kernel_initializer=\"he_normal\")(inputs)\n",
218
+    "    conv_2 = Conv3D(64, (26, 26, 26), padding=\"same\", activation=\"relu\", kernel_initializer=\"he_normal\")(conv_1)\n",
168 219
     "    drop_1 = Dropout(0.2)(conv_2)\n",
169 220
     "    maxpool = MaxPooling3D()(drop_1)\n",
170
-    "    drop_2 = Dropout(0.4)(maxpool)\n",
171
-    "    dense = Dense(512)(drop_2)\n",
172
-    "    drop_3 = Dropout(0.4)(dense)\n",
173
-    "    output = TimeDistributed(Dense(3, activation='softmax'))(drop_3)\n",
221
+    "    drop_2 = Dropout(0.3)(maxpool)\n",
222
+    "    maxpool_2 = MaxPooling3D()(drop_2)\n",
223
+    "    drop_3 = Dropout(0.3)(maxpool_2)\n",
224
+    "    dense = Dense(256)(drop_3)\n",
225
+    "    drop_4 = Dropout(0.4)(dense)\n",
226
+    "    flatters = Flatten()(drop_4)\n",
227
+    "    output = Dense(3, activation='softmax')(flatters)\n",
174 228
     "    model = Model(inputs=inputs, outputs=output)\n",
175 229
     "    my_opt = optimizers.Adam(learning_rate=0.000001, beta_1=0.9, beta_2=0.999, amsgrad=False)\n",
176 230
     "    print(model.summary)\n",
@@ -179,12 +233,92 @@
179 233
     "    return model"
180 234
    ]
181 235
   },
236
+  {
237
+   "cell_type": "markdown",
238
+   "metadata": {},
239
+   "source": [
240
+    "## Create pocket lists\n",
241
+    "4 lists are created :\n",
242
+    "  + control\n",
243
+    "  + steroid\n",
244
+    "  + heme\n",
245
+    "  + nucleotide"
246
+   ]
247
+  },
248
+  {
249
+   "cell_type": "code",
250
+   "execution_count": null,
251
+   "metadata": {},
252
+   "outputs": [],
253
+   "source": [
254
+    "data = in_out_lists(1400)\n",
255
+    "pockets = np.cumsum(data[1], axis=0)[-1]"
256
+   ]
257
+  },
258
+  {
259
+   "cell_type": "code",
260
+   "execution_count": null,
261
+   "metadata": {},
262
+   "outputs": [],
263
+   "source": [
264
+    "print(\"with random seed=9001 and a 1400 pockets dataset the rates are:\\n\\\n",
265
+    "      {} heme, {} nucleotide, {} control\\n\\\n",
266
+    "      Total avaible dataset are composed of the following proportions:\\n\\\n",
267
+    "      {} heme, {} nucleotide, {} control\".format(pockets[0]/1400, pockets[1]/1400,pockets[2]/1400,\n",
268
+    "                                                0.145, 0.380, 0.475))"
269
+   ]
270
+  },
182 271
   {
183 272
    "cell_type": "code",
184 273
    "execution_count": null,
185 274
    "metadata": {},
186 275
    "outputs": [],
187
-   "source": []
276
+   "source": [
277
+    "data_onehot = data[0]\n",
278
+    "output = data[1]\n",
279
+    "X_train = data_onehot[0:1000,]\n",
280
+    "Y_train = output[0:1000,]\n",
281
+    "X_test = data_onehot[1000:,]\n",
282
+    "Y_test = output[1000:,]"
283
+   ]
284
+  },
285
+  {
286
+   "cell_type": "code",
287
+   "execution_count": null,
288
+   "metadata": {},
289
+   "outputs": [],
290
+   "source": [
291
+    "my_model = model_new()"
292
+   ]
293
+  },
294
+  {
295
+   "cell_type": "code",
296
+   "execution_count": null,
297
+   "metadata": {},
298
+   "outputs": [],
299
+   "source": [
300
+    "tf.test.is_gpu_available()\n",
301
+    "#my_model.fit(X_train, Y_train, epochs=50, batch_size=30)"
302
+   ]
303
+  },
304
+  {
305
+   "cell_type": "code",
306
+   "execution_count": null,
307
+   "metadata": {},
308
+   "outputs": [],
309
+   "source": [
310
+    "history_mild_2mp = mild_model.fit(X_train, Y_train, validation_data=(X_test, Y_test), epochs=30, batch_size=32)\n",
311
+    "my_model.save('new_model_e30_b32_t1000.h5')"
312
+   ]
313
+  },
314
+  {
315
+   "cell_type": "code",
316
+   "execution_count": null,
317
+   "metadata": {},
318
+   "outputs": [],
319
+   "source": [
320
+    "#predictions=prediction_history()"
321
+   ]
188 322
   }
189 323
  ],
190 324
  "metadata": {

+ 250 - 0
DeepDrug.py Ver arquivo

@@ -0,0 +1,250 @@
1
+#!/usr/bin/env python
2
+# coding: utf-8
3
+
4
+# # DeepDrug3D
5
+
6
+# ## Importing library
7
+
8
+# In[6]:
9
+
10
+
11
+import numpy as np
12
+import tensorflow as tf
13
+from sklearn.preprocessing import LabelEncoder
14
+from keras.models import Sequential
15
+from keras import optimizers, callbacks
16
+from keras.layers import Dense, Flatten, TimeDistributed, Dropout
17
+from keras import Input, Model
18
+from keras.layers import add, Activation
19
+from keras.layers.advanced_activations import LeakyReLU
20
+#from keras.utils import plot_model  # Needs pydot.
21
+from keras.layers import Convolution3D, MaxPooling3D
22
+
23
+
24
+# ### used to store model prediction in order to plot roc curve
25
+
26
+# In[ ]:
27
+
28
+
29
+class prediction_history(callbacks.Callback):
30
+    def __init__(self):
31
+        self.predhis = []
32
+    def on_epoch_end(self, epoch, logs={}):
33
+        self.predhis.append(model.predict(predictor_train))
34
+
35
+
36
+# ### Creating input and ouputs
37
+
38
+# In[ ]:
39
+
40
+
41
+def in_out_lists(size=1000):
42
+    """
43
+    returns a tuple of array used as input and output for the model
44
+    Arguments:
45
+        - size, int: default 1000, size of the lists to be created
46
+        
47
+    Returns:
48
+        - tuple (data_onehot, output):
49
+            -data_onehot, ndarray: containing one-hot encoded pockets
50
+            -output, ndarray: containing size-3 vectors for classification
51
+    """
52
+    with open("control.list", "r") as filin:
53
+        control = filin.read()
54
+        control = control.split("\n")
55
+        control.pop()
56
+
57
+    with open("steroid.list", "r") as filin:
58
+        steroid = filin.read()
59
+        steroid = steroid.split("\n")
60
+        steroid.pop()
61
+
62
+    with open("heme.list", "r") as filin:
63
+        heme = filin.read()
64
+        heme = heme.split("\n")
65
+        heme.pop()
66
+
67
+    with open("nucleotide.list", "r") as filin:
68
+        nucleotide = filin.read()
69
+        nucleotide = nucleotide.split("\n")
70
+        nucleotide.pop()
71
+    
72
+    lmin = len(heme)
73
+    lmid = len(nucleotide)
74
+    lmax = len(control)
75
+    tot_size = lmin + lmid + lmax
76
+    data_onehot = np.ndarray(shape=(size, 14, 32, 32, 32)) # initializing empty array
77
+
78
+    np.random.seed(9001)
79
+    indices = np.random.permutation(tot_size)
80
+    indices = indices[:size]
81
+    output = np.ndarray(shape=(size, 3)) # softmax 3, {steroid=1, heme=1, nucleotide=1}
82
+
83
+    n = -1
84
+    for i in indices:
85
+        n += 1
86
+        if i < lmin:
87
+            data_onehot[n,] = np.load("deepdrug3d_voxel_data/"+heme[i]+".npy")
88
+            output[n,] = [1,0,0]
89
+        elif i > lmin and i < (lmin + lmid):
90
+            data_onehot[n,] = np.load("deepdrug3d_voxel_data/"+nucleotide[i - lmin]+".npy")
91
+            output[n,] = [0,1,0]
92
+        else:
93
+            data_onehot[n,] = np.load("deepdrug3d_voxel_data/"+control[i - (lmin+lmid) - 1]+".npy")
94
+            output[n,] = [0,0,1]
95
+    
96
+    return (data_onehot, output)
97
+
98
+
99
+# ### Defining different model to test and compare
100
+
101
+# In[ ]:
102
+
103
+
104
+def model_heavy(): # créer un objet modèle
105
+    """
106
+    Return a simple sequentiel model
107
+    
108
+    Returns :
109
+        - model : keras.Model
110
+    """
111
+    inputs = Input(shape=(14,32,32,32))
112
+    conv_1 = Conv3D(64, (28, 28, 28), padding="same", activation="relu", kernel_initializer="he_normal")(inputs)
113
+    conv_2 = Conv3D(64, (26, 26, 26), padding="same", activation="relu", kernel_initializer="he_normal")(conv_1)
114
+    drop_1 = Dropout(0.2)(conv_2)
115
+    maxpool = MaxPooling3D()(drop_1)
116
+    drop_2 = Dropout(0.4)(maxpool)
117
+    dense = Dense(512)(drop_2)
118
+    drop_3 = Dropout(0.4)(dense)
119
+    flatters = Flatten()(drop_3)
120
+    #output = TimeDistributed(Dense(3, activation='softmax'))(drop_3)
121
+    output = Dense(3, activation='softmax')(flatters)
122
+    model = Model(inputs=inputs, outputs=output)
123
+    my_opt = optimizers.Adam(learning_rate=0.000001, beta_1=0.9, beta_2=0.999, amsgrad=False)
124
+    print(model.summary)
125
+    model.compile(optimizer=my_opt, loss="categorical_crossentropy",
126
+                  metrics=["accuracy"])
127
+    return model
128
+
129
+
130
+# In[8]:
131
+
132
+
133
+def model_new(): # créer un objet modèle
134
+    """
135
+    Return a simple sequentiel model
136
+    
137
+    Returns :
138
+        - model : keras.Model
139
+    """
140
+    inputs = Input(shape=(14,32,32,32))
141
+    conv_1 = Convolution3D(filters=64, kernel_size=5, padding="valid", data_format='channels_first')(inputs)
142
+    activation_1 = LeakyReLU(alpha = 0.1)(conv_1)
143
+    drop_1 = Dropout(0.2)(activation_1)
144
+    conv_2 = Convolution3D(filters=64, kernel_size=3, padding="valid", data_format='channels_first')(drop_1)
145
+    activation_2 = LeakyReLU(alpha = 0.1)(conv_2)
146
+    maxpool = MaxPooling3D(pool_size=(2,2,2),
147
+                            strides=None,
148
+                            padding='valid',
149
+                            data_format='channels_first')(activation_2)
150
+    drop_2 = Dropout(0.4)(maxpool)
151
+    flatters = Flatten()(drop_2)
152
+    dense = Dense(128)(flatters)
153
+    activation_3 = LeakyReLU(alpha = 0.1)(dense)
154
+    drop_3 = Dropout(0.4)(activation_3)
155
+    output = Dense(3, activation='softmax')(drop_3)
156
+    model = Model(inputs=inputs, outputs=output)
157
+    my_opt = optimizers.Adam(learning_rate=0.000001, beta_1=0.9, beta_2=0.999, amsgrad=False)
158
+    print(model.summary)
159
+    model.compile(optimizer=my_opt, loss="categorical_crossentropy",
160
+                  metrics=["accuracy"])
161
+    return model
162
+
163
+
164
+# In[ ]:
165
+
166
+
167
+def model_light(): # créer un objet modèle
168
+    """
169
+    Return a simple sequentiel model
170
+    
171
+    Returns :
172
+        - model : keras.Model
173
+    """
174
+    inputs = Input(shape=(14,32,32,32))
175
+    conv_1 = Conv3D(32, (28, 28, 28), padding="same", activation="relu", kernel_initializer="he_normal")(inputs)
176
+    conv_2 = Conv3D(64, (26, 26, 26), padding="same", activation="relu", kernel_initializer="he_normal")(conv_1)
177
+    drop_1 = Dropout(0.2)(conv_2)
178
+    maxpool = MaxPooling3D()(drop_1)
179
+    drop_2 = Dropout(0.3)(maxpool)
180
+    maxpool_2 = MaxPooling3D()(drop_2)
181
+    drop_3 = Dropout(0.3)(maxpool_2)
182
+    dense = Dense(256)(drop_3)
183
+    drop_4 = Dropout(0.4)(dense)
184
+    flatters = Flatten()(drop_4)
185
+    output = Dense(3, activation='softmax')(flatters)
186
+    model = Model(inputs=inputs, outputs=output)
187
+    my_opt = optimizers.Adam(learning_rate=0.000001, beta_1=0.9, beta_2=0.999, amsgrad=False)
188
+    print(model.summary)
189
+    model.compile(optimizer=my_opt, loss="categorical_crossentropy",
190
+                  metrics=["accuracy"])
191
+    return model
192
+
193
+
194
+# ## Create pocket lists
195
+# 4 lists are created :
196
+#   + control
197
+#   + steroid
198
+#   + heme
199
+#   + nucleotide
200
+
201
+# In[ ]:
202
+
203
+
204
+data = in_out_lists(1400)
205
+pockets = np.cumsum(data[1], axis=0)[-1]
206
+
207
+
208
+# In[ ]:
209
+
210
+
211
+print("with random seed=9001 and a 1400 pockets dataset the rates are:\n      {} heme, {} nucleotide, {} control\n      Total avaible dataset are composed of the following proportions:\n      {} heme, {} nucleotide, {} control".format(pockets[0]/1400, pockets[1]/1400,pockets[2]/1400,
212
+                                                0.145, 0.380, 0.475))
213
+
214
+
215
+# In[ ]:
216
+
217
+
218
+data_onehot = data[0]
219
+output = data[1]
220
+X_train = data_onehot[0:1000,]
221
+Y_train = output[0:1000,]
222
+X_test = data_onehot[1000:,]
223
+Y_test = output[1000:,]
224
+
225
+
226
+# In[ ]:
227
+
228
+
229
+my_model = model_new()
230
+
231
+
232
+# In[ ]:
233
+
234
+
235
+tf.test.is_gpu_available()
236
+#my_model.fit(X_train, Y_train, epochs=50, batch_size=30)
237
+
238
+
239
+# In[ ]:
240
+
241
+
242
+history_mild_2mp = mild_model.fit(X_train, Y_train, validation_data=(X_test, Y_test), epochs=30, batch_size=32)
243
+my_model.save('new_model_e30_b32_t1000.h5')
244
+
245
+
246
+# In[ ]:
247
+
248
+
249
+#predictions=prediction_history()
250
+