Преглед на файлове

random shuffle of sample

nzimme преди 5 години
родител
ревизия
cc992f7f2d
променени са 1 файла, в които са добавени 108 реда и са изтрити 9 реда
  1. 108 9
      DeepDrug.ipynb

+ 108 - 9
DeepDrug.ipynb Целия файл

@@ -9,11 +9,28 @@
9 9
   },
10 10
   {
11 11
    "cell_type": "code",
12
-   "execution_count": 2,
12
+   "execution_count": 1,
13 13
    "metadata": {},
14
-   "outputs": [],
14
+   "outputs": [
15
+    {
16
+     "name": "stderr",
17
+     "output_type": "stream",
18
+     "text": [
19
+      "Using TensorFlow backend.\n"
20
+     ]
21
+    }
22
+   ],
15 23
    "source": [
16
-    "import numpy as np"
24
+    "import numpy as np\n",
25
+    "\n",
26
+    "from sklearn.preprocessing import LabelEncoder\n",
27
+    "from keras.models import Sequential\n",
28
+    "from keras import optimizers\n",
29
+    "from keras.layers import Dense, Flatten, TimeDistributedn, Dropout\n",
30
+    "from keras import Input, Model\n",
31
+    "from keras.layers import add, Activation\n",
32
+    "#from keras.utils import plot_model  # Needs pydot.\n",
33
+    "from keras.layers import Conv3D, MaxPooling3D"
17 34
    ]
18 35
   },
19 36
   {
@@ -30,7 +47,7 @@
30 47
   },
31 48
   {
32 49
    "cell_type": "code",
33
-   "execution_count": 3,
50
+   "execution_count": 2,
34 51
    "metadata": {},
35 52
    "outputs": [
36 53
     {
@@ -39,7 +56,7 @@
39 56
        "''"
40 57
       ]
41 58
      },
42
-     "execution_count": 3,
59
+     "execution_count": 2,
43 60
      "metadata": {},
44 61
      "output_type": "execute_result"
45 62
     }
@@ -66,19 +83,101 @@
66 83
     "nucleotide.pop()"
67 84
    ]
68 85
   },
86
+  {
87
+   "cell_type": "markdown",
88
+   "metadata": {},
89
+   "source": [
90
+    "### Creating input and ouputs"
91
+   ]
92
+  },
69 93
   {
70 94
    "cell_type": "code",
71
-   "execution_count": 6,
95
+   "execution_count": 3,
72 96
    "metadata": {},
73 97
    "outputs": [],
74
-   "source": []
98
+   "source": [
99
+    "data_onehot = np.ndarray(shape=(2219, 14, 32, 32, 32)) # initializing empty array\n",
100
+    "indices = np.random.permutation(2219)\n",
101
+    "output = np.ndarray(shape=(2219, 3)) # softmax 3, {steroid=1, heme=1, nucleotide=1}\n",
102
+    "lmin = len(steroid)\n",
103
+    "lmid = len(heme)\n",
104
+    "lmax = len(nucleotide)"
105
+   ]
75 106
   },
76 107
   {
77 108
    "cell_type": "code",
78
-   "execution_count": null,
109
+   "execution_count": 4,
79 110
    "metadata": {},
80 111
    "outputs": [],
81
-   "source": []
112
+   "source": [
113
+    "n = -1\n",
114
+    "for i in indices:\n",
115
+    "    n += 1\n",
116
+    "    if i < lmin:\n",
117
+    "        data_onehot[n,] = np.load(\"deepdrug3d_voxel_data/\"+steroid[i]+\".npy\")\n",
118
+    "        output[n,] = [1,0,0]\n",
119
+    "    elif i > lmin and i < (lmin + lmid):\n",
120
+    "        data_onehot[n,] = np.load(\"deepdrug3d_voxel_data/\"+heme[i - lmin]+\".npy\")\n",
121
+    "        output[n,] = [0,1,0]\n",
122
+    "    else:\n",
123
+    "        data_onehot[n,] = np.load(\"deepdrug3d_voxel_data/\"+nucleotide[i - (lmin+lmid) - 1]+\".npy\")\n",
124
+    "        output[n,] = [0,0,1]"
125
+   ]
126
+  },
127
+  {
128
+   "cell_type": "code",
129
+   "execution_count": 5,
130
+   "metadata": {},
131
+   "outputs": [],
132
+   "source": [
133
+    "X_train = data_onehot[0:1664,]\n",
134
+    "Y_train = output[0:1664,]\n",
135
+    "X_test = data_onehot[1664:,]\n",
136
+    "Y_test = output[1664:,]"
137
+   ]
138
+  },
139
+  {
140
+   "cell_type": "code",
141
+   "execution_count": 14,
142
+   "metadata": {},
143
+   "outputs": [
144
+    {
145
+     "data": {
146
+      "text/plain": [
147
+       "(1, 14, 32, 32, 32)"
148
+      ]
149
+     },
150
+     "execution_count": 14,
151
+     "metadata": {},
152
+     "output_type": "execute_result"
153
+    }
154
+   ],
155
+   "source": [
156
+    "def model_sequential(): # créer un objet modèle\n",
157
+    "    \"\"\"\n",
158
+    "    Return a simple sequentiel model\n",
159
+    "    \n",
160
+    "    Returns :\n",
161
+    "        - model : keras.Model\n",
162
+    "    \"\"\"\n",
163
+    "    inputs = Input(shape=(32,32,32,14)) # 759 aa, 21 car onehot\n",
164
+    "    conv_1 = Conv3D(64, (28, 28, 28), padding=\"same\", activation=\"LeakyReLU\",\n",
165
+    "                        kernel_initializer=\"he_normal\")(inputs)\n",
166
+    "    conv_2 = Conv3D(64, (26, 26, 26), padding=\"same\", activation=\"LeakyReLU\",\n",
167
+    "                        kernel_initializer=\"he_normal\")(conv_1)\n",
168
+    "    drop_1 = Dropout(0.2)(conv_2)\n",
169
+    "    maxpool = MaxPooling3D()(drop_1)\n",
170
+    "    drop_2 = Dropout(0.4)(maxpool)\n",
171
+    "    dense = Dense(512)(drop_2)\n",
172
+    "    drop_3 = Dropout(0.4)(dense)\n",
173
+    "    output = TimeDistributed(Dense(3, activation='softmax'))(drop_3)\n",
174
+    "    model = Model(inputs=inputs, outputs=output)\n",
175
+    "    my_opt = optimizers.Adam(learning_rate=0.000001, beta_1=0.9, beta_2=0.999, amsgrad=False)\n",
176
+    "    print(model.summary)\n",
177
+    "    model.compile(optimizer=my_opt, loss=\"categorical_crossentropy\",\n",
178
+    "                  metrics=[\"accuracy\"])\n",
179
+    "    return model"
180
+   ]
82 181
   },
83 182
   {
84 183
    "cell_type": "code",