Browse Source

random shuffle of sample

nzimme 4 years ago
parent
commit
cc992f7f2d
1 changed files with 108 additions and 9 deletions
  1. 108 9
      DeepDrug.ipynb

+ 108 - 9
DeepDrug.ipynb View File

9
   },
9
   },
10
   {
10
   {
11
    "cell_type": "code",
11
    "cell_type": "code",
12
-   "execution_count": 2,
12
+   "execution_count": 1,
13
    "metadata": {},
13
    "metadata": {},
14
-   "outputs": [],
14
+   "outputs": [
15
+    {
16
+     "name": "stderr",
17
+     "output_type": "stream",
18
+     "text": [
19
+      "Using TensorFlow backend.\n"
20
+     ]
21
+    }
22
+   ],
15
    "source": [
23
    "source": [
16
-    "import numpy as np"
24
+    "import numpy as np\n",
25
+    "\n",
26
+    "from sklearn.preprocessing import LabelEncoder\n",
27
+    "from keras.models import Sequential\n",
28
+    "from keras import optimizers\n",
29
+    "from keras.layers import Dense, Flatten, TimeDistributedn, Dropout\n",
30
+    "from keras import Input, Model\n",
31
+    "from keras.layers import add, Activation\n",
32
+    "#from keras.utils import plot_model  # Needs pydot.\n",
33
+    "from keras.layers import Conv3D, MaxPooling3D"
17
    ]
34
    ]
18
   },
35
   },
19
   {
36
   {
30
   },
47
   },
31
   {
48
   {
32
    "cell_type": "code",
49
    "cell_type": "code",
33
-   "execution_count": 3,
50
+   "execution_count": 2,
34
    "metadata": {},
51
    "metadata": {},
35
    "outputs": [
52
    "outputs": [
36
     {
53
     {
39
        "''"
56
        "''"
40
       ]
57
       ]
41
      },
58
      },
42
-     "execution_count": 3,
59
+     "execution_count": 2,
43
      "metadata": {},
60
      "metadata": {},
44
      "output_type": "execute_result"
61
      "output_type": "execute_result"
45
     }
62
     }
66
     "nucleotide.pop()"
83
     "nucleotide.pop()"
67
    ]
84
    ]
68
   },
85
   },
86
+  {
87
+   "cell_type": "markdown",
88
+   "metadata": {},
89
+   "source": [
90
+    "### Creating input and ouputs"
91
+   ]
92
+  },
69
   {
93
   {
70
    "cell_type": "code",
94
    "cell_type": "code",
71
-   "execution_count": 6,
95
+   "execution_count": 3,
72
    "metadata": {},
96
    "metadata": {},
73
    "outputs": [],
97
    "outputs": [],
74
-   "source": []
98
+   "source": [
99
+    "data_onehot = np.ndarray(shape=(2219, 14, 32, 32, 32)) # initializing empty array\n",
100
+    "indices = np.random.permutation(2219)\n",
101
+    "output = np.ndarray(shape=(2219, 3)) # softmax 3, {steroid=1, heme=1, nucleotide=1}\n",
102
+    "lmin = len(steroid)\n",
103
+    "lmid = len(heme)\n",
104
+    "lmax = len(nucleotide)"
105
+   ]
75
   },
106
   },
76
   {
107
   {
77
    "cell_type": "code",
108
    "cell_type": "code",
78
-   "execution_count": null,
109
+   "execution_count": 4,
79
    "metadata": {},
110
    "metadata": {},
80
    "outputs": [],
111
    "outputs": [],
81
-   "source": []
112
+   "source": [
113
+    "n = -1\n",
114
+    "for i in indices:\n",
115
+    "    n += 1\n",
116
+    "    if i < lmin:\n",
117
+    "        data_onehot[n,] = np.load(\"deepdrug3d_voxel_data/\"+steroid[i]+\".npy\")\n",
118
+    "        output[n,] = [1,0,0]\n",
119
+    "    elif i > lmin and i < (lmin + lmid):\n",
120
+    "        data_onehot[n,] = np.load(\"deepdrug3d_voxel_data/\"+heme[i - lmin]+\".npy\")\n",
121
+    "        output[n,] = [0,1,0]\n",
122
+    "    else:\n",
123
+    "        data_onehot[n,] = np.load(\"deepdrug3d_voxel_data/\"+nucleotide[i - (lmin+lmid) - 1]+\".npy\")\n",
124
+    "        output[n,] = [0,0,1]"
125
+   ]
126
+  },
127
+  {
128
+   "cell_type": "code",
129
+   "execution_count": 5,
130
+   "metadata": {},
131
+   "outputs": [],
132
+   "source": [
133
+    "X_train = data_onehot[0:1664,]\n",
134
+    "Y_train = output[0:1664,]\n",
135
+    "X_test = data_onehot[1664:,]\n",
136
+    "Y_test = output[1664:,]"
137
+   ]
138
+  },
139
+  {
140
+   "cell_type": "code",
141
+   "execution_count": 14,
142
+   "metadata": {},
143
+   "outputs": [
144
+    {
145
+     "data": {
146
+      "text/plain": [
147
+       "(1, 14, 32, 32, 32)"
148
+      ]
149
+     },
150
+     "execution_count": 14,
151
+     "metadata": {},
152
+     "output_type": "execute_result"
153
+    }
154
+   ],
155
+   "source": [
156
+    "def model_sequential(): # créer un objet modèle\n",
157
+    "    \"\"\"\n",
158
+    "    Return a simple sequentiel model\n",
159
+    "    \n",
160
+    "    Returns :\n",
161
+    "        - model : keras.Model\n",
162
+    "    \"\"\"\n",
163
+    "    inputs = Input(shape=(32,32,32,14)) # 759 aa, 21 car onehot\n",
164
+    "    conv_1 = Conv3D(64, (28, 28, 28), padding=\"same\", activation=\"LeakyReLU\",\n",
165
+    "                        kernel_initializer=\"he_normal\")(inputs)\n",
166
+    "    conv_2 = Conv3D(64, (26, 26, 26), padding=\"same\", activation=\"LeakyReLU\",\n",
167
+    "                        kernel_initializer=\"he_normal\")(conv_1)\n",
168
+    "    drop_1 = Dropout(0.2)(conv_2)\n",
169
+    "    maxpool = MaxPooling3D()(drop_1)\n",
170
+    "    drop_2 = Dropout(0.4)(maxpool)\n",
171
+    "    dense = Dense(512)(drop_2)\n",
172
+    "    drop_3 = Dropout(0.4)(dense)\n",
173
+    "    output = TimeDistributed(Dense(3, activation='softmax'))(drop_3)\n",
174
+    "    model = Model(inputs=inputs, outputs=output)\n",
175
+    "    my_opt = optimizers.Adam(learning_rate=0.000001, beta_1=0.9, beta_2=0.999, amsgrad=False)\n",
176
+    "    print(model.summary)\n",
177
+    "    model.compile(optimizer=my_opt, loss=\"categorical_crossentropy\",\n",
178
+    "                  metrics=[\"accuracy\"])\n",
179
+    "    return model"
180
+   ]
82
   },
181
   },
83
   {
182
   {
84
    "cell_type": "code",
183
    "cell_type": "code",