Browse Source

changed mild model, added maxpooling

Nicolasticot 5 years ago
parent
commit
860e797732
2 changed files with 418 additions and 107 deletions
  1. 203 107
      DeepDrug.ipynb
  2. 215 0
      DeepDrug.py

+ 203 - 107
DeepDrug.ipynb View File

7
     "# DeepDrug3D"
7
     "# DeepDrug3D"
8
    ]
8
    ]
9
   },
9
   },
10
+  {
11
+   "cell_type": "markdown",
12
+   "metadata": {},
13
+   "source": [
14
+    "## Importing library"
15
+   ]
16
+  },
10
   {
17
   {
11
    "cell_type": "code",
18
    "cell_type": "code",
12
-   "execution_count": 1,
13
-   "metadata": {},
14
-   "outputs": [
15
-    {
16
-     "name": "stderr",
17
-     "output_type": "stream",
18
-     "text": [
19
-      "Using TensorFlow backend.\n"
20
-     ]
21
-    }
22
-   ],
19
+   "execution_count": null,
20
+   "metadata": {},
21
+   "outputs": [],
23
    "source": [
22
    "source": [
24
     "import numpy as np\n",
23
     "import numpy as np\n",
25
-    "\n",
24
+    "import tensorflow as tf\n",
26
     "from sklearn.preprocessing import LabelEncoder\n",
25
     "from sklearn.preprocessing import LabelEncoder\n",
27
     "from keras.models import Sequential\n",
26
     "from keras.models import Sequential\n",
28
-    "from keras import optimizers\n",
29
-    "from keras.layers import Dense, Flatten, TimeDistributedn, Dropout\n",
27
+    "from keras import optimizers, callbacks\n",
28
+    "from keras.layers import Dense, Flatten, TimeDistributed, Dropout\n",
30
     "from keras import Input, Model\n",
29
     "from keras import Input, Model\n",
31
     "from keras.layers import add, Activation\n",
30
     "from keras.layers import add, Activation\n",
32
     "#from keras.utils import plot_model  # Needs pydot.\n",
31
     "#from keras.utils import plot_model  # Needs pydot.\n",
37
    "cell_type": "markdown",
36
    "cell_type": "markdown",
38
    "metadata": {},
37
    "metadata": {},
39
    "source": [
38
    "source": [
40
-    "## Create pocket lists\n",
41
-    "4 pockets are created :\n",
42
-    "  + control\n",
43
-    "  + steroid\n",
44
-    "  + heme\n",
45
-    "  + nucleotide"
39
+    "### used to store model prediction in order to plot roc curve"
46
    ]
40
    ]
47
   },
41
   },
48
   {
42
   {
49
    "cell_type": "code",
43
    "cell_type": "code",
50
-   "execution_count": 2,
51
-   "metadata": {},
52
-   "outputs": [
53
-    {
54
-     "data": {
55
-      "text/plain": [
56
-       "''"
57
-      ]
58
-     },
59
-     "execution_count": 2,
60
-     "metadata": {},
61
-     "output_type": "execute_result"
62
-    }
63
-   ],
64
-   "source": [
65
-    "with open(\"control.list\", \"r\") as filin:\n",
66
-    "    control = filin.read()\n",
67
-    "control = control.split(\"\\n\")\n",
68
-    "control.pop()\n",
69
-    "\n",
70
-    "with open(\"steroid.list\", \"r\") as filin:\n",
71
-    "    steroid = filin.read()\n",
72
-    "steroid = steroid.split(\"\\n\")\n",
73
-    "steroid.pop()\n",
74
-    "\n",
75
-    "with open(\"heme.list\", \"r\") as filin:\n",
76
-    "    heme = filin.read()\n",
77
-    "heme = heme.split(\"\\n\")\n",
78
-    "heme.pop()\n",
79
-    "\n",
80
-    "with open(\"nucleotide.list\", \"r\") as filin:\n",
81
-    "    nucleotide = filin.read()\n",
82
-    "nucleotide = nucleotide.split(\"\\n\")\n",
83
-    "nucleotide.pop()"
44
+   "execution_count": null,
45
+   "metadata": {},
46
+   "outputs": [],
47
+   "source": [
48
+    "class prediction_history(callbacks.Callback):\n",
49
+    "    def __init__(self):\n",
50
+    "        self.predhis = []\n",
51
+    "    def on_epoch_end(self, epoch, logs={}):\n",
52
+    "        self.predhis.append(model.predict(predictor_train))"
84
    ]
53
    ]
85
   },
54
   },
86
   {
55
   {
92
   },
61
   },
93
   {
62
   {
94
    "cell_type": "code",
63
    "cell_type": "code",
95
-   "execution_count": 3,
64
+   "execution_count": null,
96
    "metadata": {},
65
    "metadata": {},
97
    "outputs": [],
66
    "outputs": [],
98
    "source": [
67
    "source": [
99
-    "data_onehot = np.ndarray(shape=(2219, 14, 32, 32, 32)) # initializing empty array\n",
100
-    "indices = np.random.permutation(2219)\n",
101
-    "output = np.ndarray(shape=(2219, 3)) # softmax 3, {steroid=1, heme=1, nucleotide=1}\n",
102
-    "lmin = len(steroid)\n",
103
-    "lmid = len(heme)\n",
104
-    "lmax = len(nucleotide)"
68
+    "def in_out_lists(size=1000):\n",
69
+    "    \"\"\"\n",
70
+    "    returns a tuple of array used as input and output for the model\n",
71
+    "    Arguments:\n",
72
+    "        - size, int: default 1000, size of the lists to be created\n",
73
+    "        \n",
74
+    "    Returns:\n",
75
+    "        - tuple (data_onehot, output):\n",
76
+    "            -data_onehot, ndarray: containing one-hot encoded pockets\n",
77
+    "            -output, ndarray: containing size-3 vectors for classification\n",
78
+    "    \"\"\"\n",
79
+    "    with open(\"control.list\", \"r\") as filin:\n",
80
+    "        control = filin.read()\n",
81
+    "        control = control.split(\"\\n\")\n",
82
+    "        control.pop()\n",
83
+    "\n",
84
+    "    with open(\"steroid.list\", \"r\") as filin:\n",
85
+    "        steroid = filin.read()\n",
86
+    "        steroid = steroid.split(\"\\n\")\n",
87
+    "        steroid.pop()\n",
88
+    "\n",
89
+    "    with open(\"heme.list\", \"r\") as filin:\n",
90
+    "        heme = filin.read()\n",
91
+    "        heme = heme.split(\"\\n\")\n",
92
+    "        heme.pop()\n",
93
+    "\n",
94
+    "    with open(\"nucleotide.list\", \"r\") as filin:\n",
95
+    "        nucleotide = filin.read()\n",
96
+    "        nucleotide = nucleotide.split(\"\\n\")\n",
97
+    "        nucleotide.pop()\n",
98
+    "    \n",
99
+    "    lmin = len(heme)\n",
100
+    "    lmid = len(nucleotide)\n",
101
+    "    lmax = len(control)\n",
102
+    "    tot_size = lmin + lmid + lmax\n",
103
+    "    data_onehot = np.ndarray(shape=(size, 14, 32, 32, 32)) # initializing empty array\n",
104
+    "\n",
105
+    "    np.random.seed(9001)\n",
106
+    "    indices = np.random.permutation(tot_size)\n",
107
+    "    indices = indices[:size]\n",
108
+    "    output = np.ndarray(shape=(size, 3)) # softmax 3, {steroid=1, heme=1, nucleotide=1}\n",
109
+    "\n",
110
+    "    n = -1\n",
111
+    "    for i in indices:\n",
112
+    "        n += 1\n",
113
+    "        if i < lmin:\n",
114
+    "            data_onehot[n,] = np.load(\"deepdrug3d_voxel_data/\"+heme[i]+\".npy\")\n",
115
+    "            output[n,] = [1,0,0]\n",
116
+    "        elif i > lmin and i < (lmin + lmid):\n",
117
+    "            data_onehot[n,] = np.load(\"deepdrug3d_voxel_data/\"+nucleotide[i - lmin]+\".npy\")\n",
118
+    "            output[n,] = [0,1,0]\n",
119
+    "        else:\n",
120
+    "            data_onehot[n,] = np.load(\"deepdrug3d_voxel_data/\"+control[i - (lmin+lmid) - 1]+\".npy\")\n",
121
+    "            output[n,] = [0,0,1]\n",
122
+    "    \n",
123
+    "    return (data_onehot, output)"
105
    ]
124
    ]
106
   },
125
   },
107
   {
126
   {
108
-   "cell_type": "code",
109
-   "execution_count": 4,
127
+   "cell_type": "markdown",
110
    "metadata": {},
128
    "metadata": {},
111
-   "outputs": [],
112
    "source": [
129
    "source": [
113
-    "n = -1\n",
114
-    "for i in indices:\n",
115
-    "    n += 1\n",
116
-    "    if i < lmin:\n",
117
-    "        data_onehot[n,] = np.load(\"deepdrug3d_voxel_data/\"+steroid[i]+\".npy\")\n",
118
-    "        output[n,] = [1,0,0]\n",
119
-    "    elif i > lmin and i < (lmin + lmid):\n",
120
-    "        data_onehot[n,] = np.load(\"deepdrug3d_voxel_data/\"+heme[i - lmin]+\".npy\")\n",
121
-    "        output[n,] = [0,1,0]\n",
122
-    "    else:\n",
123
-    "        data_onehot[n,] = np.load(\"deepdrug3d_voxel_data/\"+nucleotide[i - (lmin+lmid) - 1]+\".npy\")\n",
124
-    "        output[n,] = [0,0,1]"
130
+    "### Defining different model to test and compare"
125
    ]
131
    ]
126
   },
132
   },
127
   {
133
   {
128
    "cell_type": "code",
134
    "cell_type": "code",
129
-   "execution_count": 5,
135
+   "execution_count": null,
130
    "metadata": {},
136
    "metadata": {},
131
    "outputs": [],
137
    "outputs": [],
132
    "source": [
138
    "source": [
133
-    "X_train = data_onehot[0:1664,]\n",
134
-    "Y_train = output[0:1664,]\n",
135
-    "X_test = data_onehot[1664:,]\n",
136
-    "Y_test = output[1664:,]"
139
+    "def model_heavy(): # créer un objet modèle\n",
140
+    "    \"\"\"\n",
141
+    "    Return a simple sequentiel model\n",
142
+    "    \n",
143
+    "    Returns :\n",
144
+    "        - model : keras.Model\n",
145
+    "    \"\"\"\n",
146
+    "    inputs = Input(shape=(14,32,32,32))\n",
147
+    "    conv_1 = Conv3D(64, (28, 28, 28), padding=\"same\", activation=\"relu\", kernel_initializer=\"he_normal\")(inputs)\n",
148
+    "    conv_2 = Conv3D(64, (26, 26, 26), padding=\"same\", activation=\"relu\", kernel_initializer=\"he_normal\")(conv_1)\n",
149
+    "    drop_1 = Dropout(0.2)(conv_2)\n",
150
+    "    maxpool = MaxPooling3D()(drop_1)\n",
151
+    "    drop_2 = Dropout(0.4)(maxpool)\n",
152
+    "    dense = Dense(512)(drop_2)\n",
153
+    "    drop_3 = Dropout(0.4)(dense)\n",
154
+    "    flatters = Flatten()(drop_3)\n",
155
+    "    #output = TimeDistributed(Dense(3, activation='softmax'))(drop_3)\n",
156
+    "    output = Dense(3, activation='softmax')(flatters)\n",
157
+    "    model = Model(inputs=inputs, outputs=output)\n",
158
+    "    my_opt = optimizers.Adam(learning_rate=0.000001, beta_1=0.9, beta_2=0.999, amsgrad=False)\n",
159
+    "    print(model.summary)\n",
160
+    "    model.compile(optimizer=my_opt, loss=\"categorical_crossentropy\",\n",
161
+    "                  metrics=[\"accuracy\"])\n",
162
+    "    return model"
137
    ]
163
    ]
138
   },
164
   },
139
   {
165
   {
140
    "cell_type": "code",
166
    "cell_type": "code",
141
-   "execution_count": 14,
142
-   "metadata": {},
143
-   "outputs": [
144
-    {
145
-     "data": {
146
-      "text/plain": [
147
-       "(1, 14, 32, 32, 32)"
148
-      ]
149
-     },
150
-     "execution_count": 14,
151
-     "metadata": {},
152
-     "output_type": "execute_result"
153
-    }
154
-   ],
155
-   "source": [
156
-    "def model_sequential(): # créer un objet modèle\n",
167
+   "execution_count": null,
168
+   "metadata": {},
169
+   "outputs": [],
170
+   "source": [
171
+    "def model_light(): # créer un objet modèle\n",
157
     "    \"\"\"\n",
172
     "    \"\"\"\n",
158
     "    Return a simple sequentiel model\n",
173
     "    Return a simple sequentiel model\n",
159
     "    \n",
174
     "    \n",
160
     "    Returns :\n",
175
     "    Returns :\n",
161
     "        - model : keras.Model\n",
176
     "        - model : keras.Model\n",
162
     "    \"\"\"\n",
177
     "    \"\"\"\n",
163
-    "    inputs = Input(shape=(32,32,32,14)) # 759 aa, 21 car onehot\n",
164
-    "    conv_1 = Conv3D(64, (28, 28, 28), padding=\"same\", activation=\"LeakyReLU\",\n",
165
-    "                        kernel_initializer=\"he_normal\")(inputs)\n",
166
-    "    conv_2 = Conv3D(64, (26, 26, 26), padding=\"same\", activation=\"LeakyReLU\",\n",
167
-    "                        kernel_initializer=\"he_normal\")(conv_1)\n",
178
+    "    inputs = Input(shape=(14,32,32,32))\n",
179
+    "    conv_1 = Conv3D(32, (28, 28, 28), padding=\"same\", activation=\"relu\", kernel_initializer=\"he_normal\")(inputs)\n",
180
+    "    conv_2 = Conv3D(64, (26, 26, 26), padding=\"same\", activation=\"relu\", kernel_initializer=\"he_normal\")(conv_1)\n",
168
     "    drop_1 = Dropout(0.2)(conv_2)\n",
181
     "    drop_1 = Dropout(0.2)(conv_2)\n",
169
     "    maxpool = MaxPooling3D()(drop_1)\n",
182
     "    maxpool = MaxPooling3D()(drop_1)\n",
170
-    "    drop_2 = Dropout(0.4)(maxpool)\n",
171
-    "    dense = Dense(512)(drop_2)\n",
172
-    "    drop_3 = Dropout(0.4)(dense)\n",
173
-    "    output = TimeDistributed(Dense(3, activation='softmax'))(drop_3)\n",
183
+    "    drop_2 = Dropout(0.3)(maxpool)\n",
184
+    "    maxpool_2 = MaxPooling3D()(drop_2)\n",
185
+    "    drop_3 = Dropout(0.3)(maxpool_2)\n",
186
+    "    dense = Dense(256)(drop_3)\n",
187
+    "    drop_4 = Dropout(0.4)(dense)\n",
188
+    "    flatters = Flatten()(drop_4)\n",
189
+    "    output = Dense(3, activation='softmax')(flatters)\n",
174
     "    model = Model(inputs=inputs, outputs=output)\n",
190
     "    model = Model(inputs=inputs, outputs=output)\n",
175
     "    my_opt = optimizers.Adam(learning_rate=0.000001, beta_1=0.9, beta_2=0.999, amsgrad=False)\n",
191
     "    my_opt = optimizers.Adam(learning_rate=0.000001, beta_1=0.9, beta_2=0.999, amsgrad=False)\n",
176
     "    print(model.summary)\n",
192
     "    print(model.summary)\n",
179
     "    return model"
195
     "    return model"
180
    ]
196
    ]
181
   },
197
   },
198
+  {
199
+   "cell_type": "markdown",
200
+   "metadata": {},
201
+   "source": [
202
+    "## Create pocket lists\n",
203
+    "4 lists are created :\n",
204
+    "  + control\n",
205
+    "  + steroid\n",
206
+    "  + heme\n",
207
+    "  + nucleotide"
208
+   ]
209
+  },
210
+  {
211
+   "cell_type": "code",
212
+   "execution_count": null,
213
+   "metadata": {},
214
+   "outputs": [],
215
+   "source": [
216
+    "data = in_out_lists(1400)\n",
217
+    "pockets = np.cumsum(data[1], axis=0)[-1]"
218
+   ]
219
+  },
220
+  {
221
+   "cell_type": "code",
222
+   "execution_count": null,
223
+   "metadata": {},
224
+   "outputs": [],
225
+   "source": [
226
+    "print(\"with random seed=9001 and a 1400 pockets dataset the rates are:\\n\\\n",
227
+    "      {} heme, {} nucleotide, {} control\\n\\\n",
228
+    "      Total avaible dataset are composed of the following proportions:\\n\\\n",
229
+    "      {} heme, {} nucleotide, {} control\".format(pockets[0]/1400, pockets[1]/1400,pockets[2]/1400,\n",
230
+    "                                                0.145, 0.380, 0.475))"
231
+   ]
232
+  },
233
+  {
234
+   "cell_type": "code",
235
+   "execution_count": null,
236
+   "metadata": {},
237
+   "outputs": [],
238
+   "source": [
239
+    "data_onehot = data[0]\n",
240
+    "output = data[1]\n",
241
+    "X_train = data_onehot[0:1000,]\n",
242
+    "Y_train = output[0:1000,]\n",
243
+    "X_test = data_onehot[1000:,]\n",
244
+    "Y_test = output[1000:,]"
245
+   ]
246
+  },
247
+  {
248
+   "cell_type": "code",
249
+   "execution_count": null,
250
+   "metadata": {},
251
+   "outputs": [],
252
+   "source": [
253
+    "my_model = model_light()"
254
+   ]
255
+  },
256
+  {
257
+   "cell_type": "code",
258
+   "execution_count": null,
259
+   "metadata": {},
260
+   "outputs": [],
261
+   "source": [
262
+    "tf.test.is_gpu_available()\n",
263
+    "#my_model.fit(X_train, Y_train, epochs=50, batch_size=30)"
264
+   ]
265
+  },
182
   {
266
   {
183
    "cell_type": "code",
267
    "cell_type": "code",
184
    "execution_count": null,
268
    "execution_count": null,
185
    "metadata": {},
269
    "metadata": {},
186
    "outputs": [],
270
    "outputs": [],
187
-   "source": []
271
+   "source": [
272
+    "history_mild_2mp = mild_model.fit(X_train, Y_train, validation_data=(X_test, Y_test), epochs=30, batch_size=32)\n",
273
+    "my_model.save('light_model_2mp_e30_b32.h5')"
274
+   ]
275
+  },
276
+  {
277
+   "cell_type": "code",
278
+   "execution_count": null,
279
+   "metadata": {},
280
+   "outputs": [],
281
+   "source": [
282
+    "#predictions=prediction_history()"
283
+   ]
188
   }
284
   }
189
  ],
285
  ],
190
  "metadata": {
286
  "metadata": {

+ 215 - 0
DeepDrug.py View File

1
+#!/usr/bin/env python
2
+# coding: utf-8
3
+
4
+# # DeepDrug3D
5
+
6
+# ## Importing library
7
+
8
+# In[ ]:
9
+
10
+
11
+import numpy as np
12
+import tensorflow as tf
13
+from sklearn.preprocessing import LabelEncoder
14
+from keras.models import Sequential
15
+from keras import optimizers, callbacks
16
+from keras.layers import Dense, Flatten, TimeDistributed, Dropout
17
+from keras import Input, Model
18
+from keras.layers import add, Activation
19
+#from keras.utils import plot_model  # Needs pydot.
20
+from keras.layers import Conv3D, MaxPooling3D
21
+
22
+
23
+# ### used to store model prediction in order to plot roc curve
24
+
25
+# In[ ]:
26
+
27
+
28
+class prediction_history(callbacks.Callback):
29
+    def __init__(self):
30
+        self.predhis = []
31
+    def on_epoch_end(self, epoch, logs={}):
32
+        self.predhis.append(model.predict(predictor_train))
33
+
34
+
35
+# ### Creating input and ouputs
36
+
37
+# In[ ]:
38
+
39
+
40
+def in_out_lists(size=1000):
41
+    """
42
+    returns a tuple of array used as input and output for the model
43
+    Arguments:
44
+        - size, int: default 1000, size of the lists to be created
45
+        
46
+    Returns:
47
+        - tuple (data_onehot, output):
48
+            -data_onehot, ndarray: containing one-hot encoded pockets
49
+            -output, ndarray: containing size-3 vectors for classification
50
+    """
51
+    with open("control.list", "r") as filin:
52
+        control = filin.read()
53
+        control = control.split("\n")
54
+        control.pop()
55
+
56
+    with open("steroid.list", "r") as filin:
57
+        steroid = filin.read()
58
+        steroid = steroid.split("\n")
59
+        steroid.pop()
60
+
61
+    with open("heme.list", "r") as filin:
62
+        heme = filin.read()
63
+        heme = heme.split("\n")
64
+        heme.pop()
65
+
66
+    with open("nucleotide.list", "r") as filin:
67
+        nucleotide = filin.read()
68
+        nucleotide = nucleotide.split("\n")
69
+        nucleotide.pop()
70
+    
71
+    lmin = len(heme)
72
+    lmid = len(nucleotide)
73
+    lmax = len(control)
74
+    tot_size = lmin + lmid + lmax
75
+    data_onehot = np.ndarray(shape=(size, 14, 32, 32, 32)) # initializing empty array
76
+
77
+    np.random.seed(9001)
78
+    indices = np.random.permutation(tot_size)
79
+    indices = indices[:size]
80
+    output = np.ndarray(shape=(size, 3)) # softmax 3, {steroid=1, heme=1, nucleotide=1}
81
+
82
+    n = -1
83
+    for i in indices:
84
+        n += 1
85
+        if i < lmin:
86
+            data_onehot[n,] = np.load("deepdrug3d_voxel_data/"+heme[i]+".npy")
87
+            output[n,] = [1,0,0]
88
+        elif i > lmin and i < (lmin + lmid):
89
+            data_onehot[n,] = np.load("deepdrug3d_voxel_data/"+nucleotide[i - lmin]+".npy")
90
+            output[n,] = [0,1,0]
91
+        else:
92
+            data_onehot[n,] = np.load("deepdrug3d_voxel_data/"+control[i - (lmin+lmid) - 1]+".npy")
93
+            output[n,] = [0,0,1]
94
+    
95
+    return (data_onehot, output)
96
+
97
+
98
+# ### Defining different model to test and compare
99
+
100
+# In[ ]:
101
+
102
+
103
+def model_heavy(): # créer un objet modèle
104
+    """
105
+    Return a simple sequentiel model
106
+    
107
+    Returns :
108
+        - model : keras.Model
109
+    """
110
+    inputs = Input(shape=(14,32,32,32))
111
+    conv_1 = Conv3D(64, (28, 28, 28), padding="same", activation="relu", kernel_initializer="he_normal")(inputs)
112
+    conv_2 = Conv3D(64, (26, 26, 26), padding="same", activation="relu", kernel_initializer="he_normal")(conv_1)
113
+    drop_1 = Dropout(0.2)(conv_2)
114
+    maxpool = MaxPooling3D()(drop_1)
115
+    drop_2 = Dropout(0.4)(maxpool)
116
+    dense = Dense(512)(drop_2)
117
+    drop_3 = Dropout(0.4)(dense)
118
+    flatters = Flatten()(drop_3)
119
+    #output = TimeDistributed(Dense(3, activation='softmax'))(drop_3)
120
+    output = Dense(3, activation='softmax')(flatters)
121
+    model = Model(inputs=inputs, outputs=output)
122
+    my_opt = optimizers.Adam(learning_rate=0.000001, beta_1=0.9, beta_2=0.999, amsgrad=False)
123
+    print(model.summary)
124
+    model.compile(optimizer=my_opt, loss="categorical_crossentropy",
125
+                  metrics=["accuracy"])
126
+    return model
127
+
128
+
129
+# In[ ]:
130
+
131
+
132
+def model_light(): # créer un objet modèle
133
+    """
134
+    Return a simple sequentiel model
135
+    
136
+    Returns :
137
+        - model : keras.Model
138
+    """
139
+    inputs = Input(shape=(14,32,32,32))
140
+    conv_1 = Conv3D(32, (28, 28, 28), padding="same", activation="relu", kernel_initializer="he_normal")(inputs)
141
+    conv_2 = Conv3D(64, (26, 26, 26), padding="same", activation="relu", kernel_initializer="he_normal")(conv_1)
142
+    drop_1 = Dropout(0.2)(conv_2)
143
+    maxpool = MaxPooling3D()(drop_1)
144
+    drop_2 = Dropout(0.3)(maxpool)
145
+    maxpool_2 = MaxPooling3D()(drop_2)
146
+    drop_3 = Dropout(0.3)(maxpool_2)
147
+    dense = Dense(256)(drop_3)
148
+    drop_4 = Dropout(0.4)(dense)
149
+    flatters = Flatten()(drop_4)
150
+    output = Dense(3, activation='softmax')(flatters)
151
+    model = Model(inputs=inputs, outputs=output)
152
+    my_opt = optimizers.Adam(learning_rate=0.000001, beta_1=0.9, beta_2=0.999, amsgrad=False)
153
+    print(model.summary)
154
+    model.compile(optimizer=my_opt, loss="categorical_crossentropy",
155
+                  metrics=["accuracy"])
156
+    return model
157
+
158
+
159
+# ## Create pocket lists
160
+# 4 lists are created :
161
+#   + control
162
+#   + steroid
163
+#   + heme
164
+#   + nucleotide
165
+
166
+# In[ ]:
167
+
168
+
169
+data = in_out_lists(1400)
170
+pockets = np.cumsum(data[1], axis=0)[-1]
171
+
172
+
173
+# In[ ]:
174
+
175
+
176
+print("with random seed=9001 and a 1400 pockets dataset the rates are:\n      {} heme, {} nucleotide, {} control\n      Total avaible dataset are composed of the following proportions:\n      {} heme, {} nucleotide, {} control".format(pockets[0]/1400, pockets[1]/1400,pockets[2]/1400,
177
+                                                0.145, 0.380, 0.475))
178
+
179
+
180
+# In[ ]:
181
+
182
+
183
+data_onehot = data[0]
184
+output = data[1]
185
+X_train = data_onehot[0:1000,]
186
+Y_train = output[0:1000,]
187
+X_test = data_onehot[1000:,]
188
+Y_test = output[1000:,]
189
+
190
+
191
+# In[ ]:
192
+
193
+
194
+my_model = model_light()
195
+
196
+
197
+# In[ ]:
198
+
199
+
200
+tf.test.is_gpu_available()
201
+#my_model.fit(X_train, Y_train, epochs=50, batch_size=30)
202
+
203
+
204
+# In[ ]:
205
+
206
+
207
+history_mild_2mp = my_model.fit(X_train, Y_train, validation_data=(X_test, Y_test), epochs=30, batch_size=32)
208
+my_model.save('light_model_2mp_e30_b32.h5')
209
+
210
+
211
+# In[ ]:
212
+
213
+
214
+#predictions=prediction_history()
215
+