|
@@ -9,11 +9,28 @@
|
9
|
9
|
},
|
10
|
10
|
{
|
11
|
11
|
"cell_type": "code",
|
12
|
|
- "execution_count": 2,
|
|
12
|
+ "execution_count": 1,
|
13
|
13
|
"metadata": {},
|
14
|
|
- "outputs": [],
|
|
14
|
+ "outputs": [
|
|
15
|
+ {
|
|
16
|
+ "name": "stderr",
|
|
17
|
+ "output_type": "stream",
|
|
18
|
+ "text": [
|
|
19
|
+ "Using TensorFlow backend.\n"
|
|
20
|
+ ]
|
|
21
|
+ }
|
|
22
|
+ ],
|
15
|
23
|
"source": [
|
16
|
|
- "import numpy as np"
|
|
24
|
+ "import numpy as np\n",
|
|
25
|
+ "\n",
|
|
26
|
+ "from sklearn.preprocessing import LabelEncoder\n",
|
|
27
|
+ "from keras.models import Sequential\n",
|
|
28
|
+ "from keras import optimizers\n",
|
|
29
|
+ "from keras.layers import Dense, Flatten, TimeDistributedn, Dropout\n",
|
|
30
|
+ "from keras import Input, Model\n",
|
|
31
|
+ "from keras.layers import add, Activation\n",
|
|
32
|
+ "#from keras.utils import plot_model # Needs pydot.\n",
|
|
33
|
+ "from keras.layers import Conv3D, MaxPooling3D"
|
17
|
34
|
]
|
18
|
35
|
},
|
19
|
36
|
{
|
|
@@ -30,7 +47,7 @@
|
30
|
47
|
},
|
31
|
48
|
{
|
32
|
49
|
"cell_type": "code",
|
33
|
|
- "execution_count": 3,
|
|
50
|
+ "execution_count": 2,
|
34
|
51
|
"metadata": {},
|
35
|
52
|
"outputs": [
|
36
|
53
|
{
|
|
@@ -39,7 +56,7 @@
|
39
|
56
|
"''"
|
40
|
57
|
]
|
41
|
58
|
},
|
42
|
|
- "execution_count": 3,
|
|
59
|
+ "execution_count": 2,
|
43
|
60
|
"metadata": {},
|
44
|
61
|
"output_type": "execute_result"
|
45
|
62
|
}
|
|
@@ -66,19 +83,101 @@
|
66
|
83
|
"nucleotide.pop()"
|
67
|
84
|
]
|
68
|
85
|
},
|
|
86
|
+ {
|
|
87
|
+ "cell_type": "markdown",
|
|
88
|
+ "metadata": {},
|
|
89
|
+ "source": [
|
|
90
|
+ "### Creating input and ouputs"
|
|
91
|
+ ]
|
|
92
|
+ },
|
69
|
93
|
{
|
70
|
94
|
"cell_type": "code",
|
71
|
|
- "execution_count": 6,
|
|
95
|
+ "execution_count": 3,
|
72
|
96
|
"metadata": {},
|
73
|
97
|
"outputs": [],
|
74
|
|
- "source": []
|
|
98
|
+ "source": [
|
|
99
|
+ "data_onehot = np.ndarray(shape=(2219, 14, 32, 32, 32)) # initializing empty array\n",
|
|
100
|
+ "indices = np.random.permutation(2219)\n",
|
|
101
|
+ "output = np.ndarray(shape=(2219, 3)) # softmax 3, {steroid=1, heme=1, nucleotide=1}\n",
|
|
102
|
+ "lmin = len(steroid)\n",
|
|
103
|
+ "lmid = len(heme)\n",
|
|
104
|
+ "lmax = len(nucleotide)"
|
|
105
|
+ ]
|
75
|
106
|
},
|
76
|
107
|
{
|
77
|
108
|
"cell_type": "code",
|
78
|
|
- "execution_count": null,
|
|
109
|
+ "execution_count": 4,
|
79
|
110
|
"metadata": {},
|
80
|
111
|
"outputs": [],
|
81
|
|
- "source": []
|
|
112
|
+ "source": [
|
|
113
|
+ "n = -1\n",
|
|
114
|
+ "for i in indices:\n",
|
|
115
|
+ " n += 1\n",
|
|
116
|
+ " if i < lmin:\n",
|
|
117
|
+ " data_onehot[n,] = np.load(\"deepdrug3d_voxel_data/\"+steroid[i]+\".npy\")\n",
|
|
118
|
+ " output[n,] = [1,0,0]\n",
|
|
119
|
+ " elif i > lmin and i < (lmin + lmid):\n",
|
|
120
|
+ " data_onehot[n,] = np.load(\"deepdrug3d_voxel_data/\"+heme[i - lmin]+\".npy\")\n",
|
|
121
|
+ " output[n,] = [0,1,0]\n",
|
|
122
|
+ " else:\n",
|
|
123
|
+ " data_onehot[n,] = np.load(\"deepdrug3d_voxel_data/\"+nucleotide[i - (lmin+lmid) - 1]+\".npy\")\n",
|
|
124
|
+ " output[n,] = [0,0,1]"
|
|
125
|
+ ]
|
|
126
|
+ },
|
|
127
|
+ {
|
|
128
|
+ "cell_type": "code",
|
|
129
|
+ "execution_count": 5,
|
|
130
|
+ "metadata": {},
|
|
131
|
+ "outputs": [],
|
|
132
|
+ "source": [
|
|
133
|
+ "X_train = data_onehot[0:1664,]\n",
|
|
134
|
+ "Y_train = output[0:1664,]\n",
|
|
135
|
+ "X_test = data_onehot[1664:,]\n",
|
|
136
|
+ "Y_test = output[1664:,]"
|
|
137
|
+ ]
|
|
138
|
+ },
|
|
139
|
+ {
|
|
140
|
+ "cell_type": "code",
|
|
141
|
+ "execution_count": 14,
|
|
142
|
+ "metadata": {},
|
|
143
|
+ "outputs": [
|
|
144
|
+ {
|
|
145
|
+ "data": {
|
|
146
|
+ "text/plain": [
|
|
147
|
+ "(1, 14, 32, 32, 32)"
|
|
148
|
+ ]
|
|
149
|
+ },
|
|
150
|
+ "execution_count": 14,
|
|
151
|
+ "metadata": {},
|
|
152
|
+ "output_type": "execute_result"
|
|
153
|
+ }
|
|
154
|
+ ],
|
|
155
|
+ "source": [
|
|
156
|
+ "def model_sequential(): # créer un objet modèle\n",
|
|
157
|
+ " \"\"\"\n",
|
|
158
|
+ " Return a simple sequentiel model\n",
|
|
159
|
+ " \n",
|
|
160
|
+ " Returns :\n",
|
|
161
|
+ " - model : keras.Model\n",
|
|
162
|
+ " \"\"\"\n",
|
|
163
|
+ " inputs = Input(shape=(32,32,32,14)) # 759 aa, 21 car onehot\n",
|
|
164
|
+ " conv_1 = Conv3D(64, (28, 28, 28), padding=\"same\", activation=\"LeakyReLU\",\n",
|
|
165
|
+ " kernel_initializer=\"he_normal\")(inputs)\n",
|
|
166
|
+ " conv_2 = Conv3D(64, (26, 26, 26), padding=\"same\", activation=\"LeakyReLU\",\n",
|
|
167
|
+ " kernel_initializer=\"he_normal\")(conv_1)\n",
|
|
168
|
+ " drop_1 = Dropout(0.2)(conv_2)\n",
|
|
169
|
+ " maxpool = MaxPooling3D()(drop_1)\n",
|
|
170
|
+ " drop_2 = Dropout(0.4)(maxpool)\n",
|
|
171
|
+ " dense = Dense(512)(drop_2)\n",
|
|
172
|
+ " drop_3 = Dropout(0.4)(dense)\n",
|
|
173
|
+ " output = TimeDistributed(Dense(3, activation='softmax'))(drop_3)\n",
|
|
174
|
+ " model = Model(inputs=inputs, outputs=output)\n",
|
|
175
|
+ " my_opt = optimizers.Adam(learning_rate=0.000001, beta_1=0.9, beta_2=0.999, amsgrad=False)\n",
|
|
176
|
+ " print(model.summary)\n",
|
|
177
|
+ " model.compile(optimizer=my_opt, loss=\"categorical_crossentropy\",\n",
|
|
178
|
+ " metrics=[\"accuracy\"])\n",
|
|
179
|
+ " return model"
|
|
180
|
+ ]
|
82
|
181
|
},
|
83
|
182
|
{
|
84
|
183
|
"cell_type": "code",
|