Parcourir la source

initial commit

nicolas-zimmermann il y a 5 ans
révision
5ba788a7a0

BIN
._deepdrug3d_voxel_data Voir le fichier


+ 101 - 0
.ipynb_checkpoints/DeepDrug-checkpoint.ipynb Voir le fichier

@@ -0,0 +1,101 @@
1
+{
2
+ "cells": [
3
+  {
4
+   "cell_type": "markdown",
5
+   "metadata": {},
6
+   "source": [
7
+    "# DeepDrug3D"
8
+   ]
9
+  },
10
+  {
11
+   "cell_type": "code",
12
+   "execution_count": 1,
13
+   "metadata": {},
14
+   "outputs": [],
15
+   "source": [
16
+    "import numpy as np"
17
+   ]
18
+  },
19
+  {
20
+   "cell_type": "markdown",
21
+   "metadata": {},
22
+   "source": [
23
+    "## Create pocket lists\n",
24
+    "4 pockets are created :\n",
25
+    "  + control\n",
26
+    "  + steroid\n",
27
+    "  + heme\n",
28
+    "  + nucleotide"
29
+   ]
30
+  },
31
+  {
32
+   "cell_type": "code",
33
+   "execution_count": 3,
34
+   "metadata": {},
35
+   "outputs": [],
36
+   "source": [
37
+    "with open(\"control.list\", \"r\") as filin:\n",
38
+    "    control = filin.read()\n",
39
+    "control = control.split(\"\\n\")\n",
40
+    "control.pop()"
41
+   ]
42
+  },
43
+  {
44
+   "cell_type": "code",
45
+   "execution_count": null,
46
+   "metadata": {},
47
+   "outputs": [],
48
+   "source": [
49
+    "with open(\"steroid.list\", \"r\") as filin:\n",
50
+    "    steroid = filin.read()\n",
51
+    "steroid = steroid.split(\"\\n\")\n",
52
+    "steroid.pop()"
53
+   ]
54
+  },
55
+  {
56
+   "cell_type": "code",
57
+   "execution_count": null,
58
+   "metadata": {},
59
+   "outputs": [],
60
+   "source": [
61
+    "with open(\"heme.list\", \"r\") as filin:\n",
62
+    "    heme = filin.read()\n",
63
+    "heme = heme.split(\"\\n\")\n",
64
+    "heme.pop()"
65
+   ]
66
+  },
67
+  {
68
+   "cell_type": "code",
69
+   "execution_count": null,
70
+   "metadata": {},
71
+   "outputs": [],
72
+   "source": [
73
+    "with open(\"nucleotide.list\", \"r\") as filin:\n",
74
+    "    nucleotide = filin.read()\n",
75
+    "nucleotide = nucleotide.split(\"\\n\")\n",
76
+    "nucleotide.pop()"
77
+   ]
78
+  }
79
+ ],
80
+ "metadata": {
81
+  "kernelspec": {
82
+   "display_name": "Python 3",
83
+   "language": "python",
84
+   "name": "python3"
85
+  },
86
+  "language_info": {
87
+   "codemirror_mode": {
88
+    "name": "ipython",
89
+    "version": 3
90
+   },
91
+   "file_extension": ".py",
92
+   "mimetype": "text/x-python",
93
+   "name": "python",
94
+   "nbconvert_exporter": "python",
95
+   "pygments_lexer": "ipython3",
96
+   "version": "3.7.4"
97
+  }
98
+ },
99
+ "nbformat": 4,
100
+ "nbformat_minor": 4
101
+}

+ 387 - 0
.ipynb_checkpoints/secondary_struct-checkpoint.ipynb Voir le fichier

@@ -0,0 +1,387 @@
1
+{
2
+ "cells": [
3
+  {
4
+   "cell_type": "code",
5
+   "execution_count": 18,
6
+   "metadata": {},
7
+   "outputs": [],
8
+   "source": [
9
+    "import keras\n",
10
+    "import numpy as np\n",
11
+    "import pandas as pd\n",
12
+    "\n",
13
+    "from sklearn.preprocessing import LabelEncoder\n",
14
+    "from keras.models import Sequential\n",
15
+    "from keras.layers import Dense, Flatten, TimeDistributed\n",
16
+    "from keras import Input, Model\n",
17
+    "from keras.layers import add, Activation\n",
18
+    "#from keras.utils import plot_model  # Needs pydot.\n",
19
+    "from keras.layers import Conv1D, AveragePooling1D"
20
+   ]
21
+  },
22
+  {
23
+   "cell_type": "code",
24
+   "execution_count": 19,
25
+   "metadata": {},
26
+   "outputs": [],
27
+   "source": [
28
+    "def file_to_dataframe(filename):\n",
29
+    "    \"\"\"\n",
30
+    "    Returns a pandas dataframe ncol = len(longest_sequence),\n",
31
+    "                               nrow = number of sequences\n",
32
+    "                               \n",
33
+    "    Arguments :\n",
34
+    "        - filename : str\n",
35
+    "            path to file\n",
36
+    "    Takes a path to a files containing any sequences, must be 1 sequence\n",
37
+    "    per line.\n",
38
+    "    \n",
39
+    "    Sequences shorter than longest_sequence are completed with '0' char.\n",
40
+    "    \"\"\"\n",
41
+    "    filin = open(filename)\n",
42
+    "    fastas0 = filin.read()\n",
43
+    "    fastas0 = fastas0.split('\\n')\n",
44
+    "    nmax = len(max(fastas0, key=len))\n",
45
+    "    fastas = []\n",
46
+    "    for fasta in fastas0:\n",
47
+    "        fastas.append(fasta + (nmax - len(fasta)) * '0')\n",
48
+    "    \n",
49
+    "    seqs = pd.DataFrame(index=range(len(fastas)), columns=range(nmax))\n",
50
+    "    \n",
51
+    "    for i, fasta in enumerate(fastas):\n",
52
+    "        seqs.loc[i] = pd.Series(list(fasta))\n",
53
+    "    \n",
54
+    "    return seqs"
55
+   ]
56
+  },
57
+  {
58
+   "cell_type": "code",
59
+   "execution_count": 20,
60
+   "metadata": {},
61
+   "outputs": [],
62
+   "source": [
63
+    "def model_sequential(): # créer un objet modèle\n",
64
+    "    \"\"\"\n",
65
+    "    Return a simple sequentiel model\n",
66
+    "    \n",
67
+    "    Returns :\n",
68
+    "        - model : keras.Model\n",
69
+    "    \"\"\"\n",
70
+    "    inputs = Input(shape=(759,21)) # 759 aa, 21 car onehot\n",
71
+    "    conv_1 = Conv1D(25, (5), padding=\"same\", activation=\"relu\",\n",
72
+    "                        kernel_initializer=\"he_normal\")(inputs)\n",
73
+    "    conv_2 = Conv1D(35, (5), padding=\"same\", activation=\"relu\",\n",
74
+    "                        kernel_initializer=\"he_normal\")(conv_1)\n",
75
+    "    output = TimeDistributed(Dense(4, activation='softmax'))(conv_2)\n",
76
+    "    model = Model(inputs=inputs, outputs=output)\n",
77
+    "    print(model.summary)\n",
78
+    "    model.compile(optimizer=\"adam\", loss=\"categorical_crossentropy\",\n",
79
+    "                  metrics=[\"accuracy\"])\n",
80
+    "    return model"
81
+   ]
82
+  },
83
+  {
84
+   "cell_type": "code",
85
+   "execution_count": 21,
86
+   "metadata": {},
87
+   "outputs": [],
88
+   "source": [
89
+    "def char_to_onehot(df, ncol=0):\n",
90
+    "    \"\"\"\n",
91
+    "    Returns the given str-encoded dataframe into a onehot encoded dataframe as an array object\n",
92
+    "    \n",
93
+    "    * Arguments:\n",
94
+    "        - df : pandas.DataFrame, the dataframe containing the sequences, cell \n",
95
+    "                               containing strings.\n",
96
+    "        - ncol : int, Optionnal. the number of col used to build the LabelEncoder\n",
97
+    "                    which will transform the df cells into int(categorical).\n",
98
+    "    \"\"\"\n",
99
+    "    classes = LabelEncoder()\n",
100
+    "    classes.fit(df[ncol])\n",
101
+    "    df_categorical = df.apply(func=classes.transform, axis=0)\n",
102
+    "    df_onehot = keras.utils.to_categorical(df_categorical)\n",
103
+    "    return df_onehot"
104
+   ]
105
+  },
106
+  {
107
+   "cell_type": "code",
108
+   "execution_count": 22,
109
+   "metadata": {},
110
+   "outputs": [],
111
+   "source": [
112
+    "def true_accuracy(predictions, onehot_Y_test):\n",
113
+    "    \"\"\"\n",
114
+    "    Computes the accuracy ignoring the \"0\" of the DataFrame.\n",
115
+    "    \n",
116
+    "    * Arguments :\n",
117
+    "        - predictions : numpy.array, output of the model, onehot encoded.\n",
118
+    "        - onehot_Y_test : numpy.array, the true values, onehot encoded.\n",
119
+    "        \n",
120
+    "    * Returns :\n",
121
+    "        - acc_coor : int, accuracy corrected, not considering '0' predictions\n",
122
+    "    \"\"\"\n",
123
+    "    tp = 0\n",
124
+    "    tn = 0\n",
125
+    "    fn = 0\n",
126
+    "    fp = 0\n",
127
+    "    tot = 0\n",
128
+    "\n",
129
+    "    for i in range(len(predictions)):\n",
130
+    "        for j in range(len(predictions[i])):\n",
131
+    "            if onehot_Y_test[i, j, 3] != 0.:\n",
132
+    "                predmax = -1\n",
133
+    "                predict_class = -1\n",
134
+    "                true_class = -1\n",
135
+    "                for k in range(len(predictions[i, j])):\n",
136
+    "                    if predmax < predictions[i, j, k]:\n",
137
+    "                        predmax = predictions[i, j, k]\n",
138
+    "                        predict_class = k\n",
139
+    "                    if onehot_Y_test[i, j, k] == 1.:\n",
140
+    "                        true_class = k\n",
141
+    "                if predict_class == true_class:\n",
142
+    "                    tp = tp+1\n",
143
+    "                tot = tot + 1\n",
144
+    "    acc_corr = tp/tot*100\n",
145
+    "    return acc_corr"
146
+   ]
147
+  },
148
+  {
149
+   "cell_type": "code",
150
+   "execution_count": 23,
151
+   "metadata": {},
152
+   "outputs": [],
153
+   "source": [
154
+    "# Create dataframe from files\n",
155
+    "fasta = file_to_dataframe(\"data/train.fasta\")\n",
156
+    "fasta = fasta.replace(\"X\", \"A\") # 'X' in fasta sequences are replace with 'A'\n",
157
+    "dssp = file_to_dataframe(\"data/train.dssp\")"
158
+   ]
159
+  },
160
+  {
161
+   "cell_type": "code",
162
+   "execution_count": 24,
163
+   "metadata": {},
164
+   "outputs": [],
165
+   "source": [
166
+    "# Writes Dataframes to csv files\n",
167
+    "fasta.to_csv(path_or_buf=\"data/train_fasta.csv\")\n",
168
+    "dssp.to_csv(path_or_buf=\"data/train_dssp.csv\")"
169
+   ]
170
+  },
171
+  {
172
+   "cell_type": "code",
173
+   "execution_count": 25,
174
+   "metadata": {},
175
+   "outputs": [],
176
+   "source": [
177
+    "# To onehot\n",
178
+    "fasta_onehot = char_to_onehot(fasta, 0)\n",
179
+    "dssp_onehot = char_to_onehot(dssp, 2)\n"
180
+   ]
181
+  },
182
+  {
183
+   "cell_type": "code",
184
+   "execution_count": 26,
185
+   "metadata": {},
186
+   "outputs": [],
187
+   "source": [
188
+    "# Divides the dataset in train and test subsets\n",
189
+    "fasta_train = fasta_onehot[0:1000,]\n",
190
+    "dssp_train = dssp_onehot[0:1000,]\n",
191
+    "fasta_test = fasta_onehot[1000:,]\n",
192
+    "dssp_test = dssp_onehot[1000:]"
193
+   ]
194
+  },
195
+  {
196
+   "cell_type": "code",
197
+   "execution_count": 27,
198
+   "metadata": {},
199
+   "outputs": [
200
+    {
201
+     "name": "stdout",
202
+     "output_type": "stream",
203
+     "text": [
204
+      "<bound method Network.summary of <keras.engine.training.Model object at 0x7f68cfb7b0d0>>\n",
205
+      "Epoch 1/30\n",
206
+      "1000/1000 [==============================] - 2s 2ms/step - loss: 0.7467 - acc: 0.7447\n",
207
+      "Epoch 2/30\n",
208
+      "1000/1000 [==============================] - 1s 974us/step - loss: 0.2610 - acc: 0.8944\n",
209
+      "Epoch 3/30\n",
210
+      "1000/1000 [==============================] - 1s 964us/step - loss: 0.2053 - acc: 0.9086\n",
211
+      "Epoch 4/30\n",
212
+      "1000/1000 [==============================] - 1s 994us/step - loss: 0.1899 - acc: 0.9154\n",
213
+      "Epoch 5/30\n",
214
+      "1000/1000 [==============================] - 1s 986us/step - loss: 0.1817 - acc: 0.9193\n",
215
+      "Epoch 6/30\n",
216
+      "1000/1000 [==============================] - 1s 1ms/step - loss: 0.1790 - acc: 0.9201\n",
217
+      "Epoch 7/30\n",
218
+      "1000/1000 [==============================] - 1s 1ms/step - loss: 0.1759 - acc: 0.9217\n",
219
+      "Epoch 8/30\n",
220
+      "1000/1000 [==============================] - 1s 960us/step - loss: 0.1743 - acc: 0.9222\n",
221
+      "Epoch 9/30\n",
222
+      "1000/1000 [==============================] - 1s 977us/step - loss: 0.1735 - acc: 0.9226\n",
223
+      "Epoch 10/30\n",
224
+      "1000/1000 [==============================] - 1s 963us/step - loss: 0.1727 - acc: 0.9228\n",
225
+      "Epoch 11/30\n",
226
+      "1000/1000 [==============================] - 1s 999us/step - loss: 0.1712 - acc: 0.9239\n",
227
+      "Epoch 12/30\n",
228
+      "1000/1000 [==============================] - 1s 963us/step - loss: 0.1702 - acc: 0.9243\n",
229
+      "Epoch 13/30\n",
230
+      "1000/1000 [==============================] - 1s 1ms/step - loss: 0.1695 - acc: 0.9249\n",
231
+      "Epoch 14/30\n",
232
+      "1000/1000 [==============================] - 1s 978us/step - loss: 0.1687 - acc: 0.9252\n",
233
+      "Epoch 15/30\n",
234
+      "1000/1000 [==============================] - 1s 981us/step - loss: 0.1678 - acc: 0.9257\n",
235
+      "Epoch 16/30\n",
236
+      "1000/1000 [==============================] - 1s 985us/step - loss: 0.1669 - acc: 0.9260\n",
237
+      "Epoch 17/30\n",
238
+      "1000/1000 [==============================] - 1s 1ms/step - loss: 0.1661 - acc: 0.9267\n",
239
+      "Epoch 18/30\n",
240
+      "1000/1000 [==============================] - 1s 1000us/step - loss: 0.1656 - acc: 0.9269\n",
241
+      "Epoch 19/30\n",
242
+      "1000/1000 [==============================] - 1s 1ms/step - loss: 0.1650 - acc: 0.9273\n",
243
+      "Epoch 20/30\n",
244
+      "1000/1000 [==============================] - 1s 1ms/step - loss: 0.1640 - acc: 0.9277\n",
245
+      "Epoch 21/30\n",
246
+      "1000/1000 [==============================] - 1s 1ms/step - loss: 0.1637 - acc: 0.9281\n",
247
+      "Epoch 22/30\n",
248
+      "1000/1000 [==============================] - 1s 1ms/step - loss: 0.1630 - acc: 0.9285\n",
249
+      "Epoch 23/30\n",
250
+      "1000/1000 [==============================] - 1s 1ms/step - loss: 0.1621 - acc: 0.9289\n",
251
+      "Epoch 24/30\n",
252
+      "1000/1000 [==============================] - 1s 992us/step - loss: 0.1619 - acc: 0.9289\n",
253
+      "Epoch 25/30\n",
254
+      "1000/1000 [==============================] - 1s 1ms/step - loss: 0.1616 - acc: 0.9291\n",
255
+      "Epoch 26/30\n",
256
+      "1000/1000 [==============================] - 1s 1ms/step - loss: 0.1616 - acc: 0.9291\n",
257
+      "Epoch 27/30\n",
258
+      "1000/1000 [==============================] - 1s 1ms/step - loss: 0.1608 - acc: 0.9296\n",
259
+      "Epoch 28/30\n",
260
+      "1000/1000 [==============================] - 1s 1ms/step - loss: 0.1604 - acc: 0.9297\n",
261
+      "Epoch 29/30\n",
262
+      "1000/1000 [==============================] - 1s 1ms/step - loss: 0.1601 - acc: 0.9298\n",
263
+      "Epoch 30/30\n",
264
+      "1000/1000 [==============================] - 1s 1ms/step - loss: 0.1597 - acc: 0.9300\n"
265
+     ]
266
+    },
267
+    {
268
+     "data": {
269
+      "text/plain": [
270
+       "<keras.callbacks.History at 0x7f68cfcf6050>"
271
+      ]
272
+     },
273
+     "execution_count": 27,
274
+     "metadata": {},
275
+     "output_type": "execute_result"
276
+    }
277
+   ],
278
+   "source": [
279
+    "model = model_sequential()\n",
280
+    "model.fit(fasta_train, dssp_train, epochs=30, batch_size=30)"
281
+   ]
282
+  },
283
+  {
284
+   "cell_type": "code",
285
+   "execution_count": 30,
286
+   "metadata": {},
287
+   "outputs": [],
288
+   "source": [
289
+    "predictions = model.predict(fasta_test, batch_size=30)"
290
+   ]
291
+  },
292
+  {
293
+   "cell_type": "code",
294
+   "execution_count": 31,
295
+   "metadata": {},
296
+   "outputs": [
297
+    {
298
+     "name": "stdout",
299
+     "output_type": "stream",
300
+     "text": [
301
+      "accuracy : 68.06409539780138\n"
302
+     ]
303
+    }
304
+   ],
305
+   "source": [
306
+    "print(\"accuracy : {}\".format(true_accuracy(predictions, dssp_test)))"
307
+   ]
308
+  },
309
+  {
310
+   "cell_type": "code",
311
+   "execution_count": null,
312
+   "metadata": {},
313
+   "outputs": [],
314
+   "source": []
315
+  },
316
+  {
317
+   "cell_type": "code",
318
+   "execution_count": null,
319
+   "metadata": {},
320
+   "outputs": [],
321
+   "source": []
322
+  },
323
+  {
324
+   "cell_type": "code",
325
+   "execution_count": null,
326
+   "metadata": {},
327
+   "outputs": [],
328
+   "source": []
329
+  },
330
+  {
331
+   "cell_type": "code",
332
+   "execution_count": null,
333
+   "metadata": {},
334
+   "outputs": [],
335
+   "source": []
336
+  },
337
+  {
338
+   "cell_type": "code",
339
+   "execution_count": null,
340
+   "metadata": {},
341
+   "outputs": [],
342
+   "source": []
343
+  },
344
+  {
345
+   "cell_type": "code",
346
+   "execution_count": null,
347
+   "metadata": {},
348
+   "outputs": [],
349
+   "source": []
350
+  },
351
+  {
352
+   "cell_type": "code",
353
+   "execution_count": null,
354
+   "metadata": {},
355
+   "outputs": [],
356
+   "source": []
357
+  },
358
+  {
359
+   "cell_type": "code",
360
+   "execution_count": null,
361
+   "metadata": {},
362
+   "outputs": [],
363
+   "source": []
364
+  }
365
+ ],
366
+ "metadata": {
367
+  "kernelspec": {
368
+   "display_name": "Python 3",
369
+   "language": "python",
370
+   "name": "python3"
371
+  },
372
+  "language_info": {
373
+   "codemirror_mode": {
374
+    "name": "ipython",
375
+    "version": 3
376
+   },
377
+   "file_extension": ".py",
378
+   "mimetype": "text/x-python",
379
+   "name": "python",
380
+   "nbconvert_exporter": "python",
381
+   "pygments_lexer": "ipython3",
382
+   "version": "3.7.4"
383
+  }
384
+ },
385
+ "nbformat": 4,
386
+ "nbformat_minor": 4
387
+}

+ 112 - 0
DeepDrug.ipynb Voir le fichier

@@ -0,0 +1,112 @@
1
+{
2
+ "cells": [
3
+  {
4
+   "cell_type": "markdown",
5
+   "metadata": {},
6
+   "source": [
7
+    "# DeepDrug3D"
8
+   ]
9
+  },
10
+  {
11
+   "cell_type": "code",
12
+   "execution_count": 2,
13
+   "metadata": {},
14
+   "outputs": [],
15
+   "source": [
16
+    "import numpy as np"
17
+   ]
18
+  },
19
+  {
20
+   "cell_type": "markdown",
21
+   "metadata": {},
22
+   "source": [
23
+    "## Create pocket lists\n",
24
+    "4 pockets are created :\n",
25
+    "  + control\n",
26
+    "  + steroid\n",
27
+    "  + heme\n",
28
+    "  + nucleotide"
29
+   ]
30
+  },
31
+  {
32
+   "cell_type": "code",
33
+   "execution_count": 3,
34
+   "metadata": {},
35
+   "outputs": [
36
+    {
37
+     "data": {
38
+      "text/plain": [
39
+       "''"
40
+      ]
41
+     },
42
+     "execution_count": 3,
43
+     "metadata": {},
44
+     "output_type": "execute_result"
45
+    }
46
+   ],
47
+   "source": [
48
+    "with open(\"control.list\", \"r\") as filin:\n",
49
+    "    control = filin.read()\n",
50
+    "control = control.split(\"\\n\")\n",
51
+    "control.pop()\n",
52
+    "\n",
53
+    "with open(\"steroid.list\", \"r\") as filin:\n",
54
+    "    steroid = filin.read()\n",
55
+    "steroid = steroid.split(\"\\n\")\n",
56
+    "steroid.pop()\n",
57
+    "\n",
58
+    "with open(\"heme.list\", \"r\") as filin:\n",
59
+    "    heme = filin.read()\n",
60
+    "heme = heme.split(\"\\n\")\n",
61
+    "heme.pop()\n",
62
+    "\n",
63
+    "with open(\"nucleotide.list\", \"r\") as filin:\n",
64
+    "    nucleotide = filin.read()\n",
65
+    "nucleotide = nucleotide.split(\"\\n\")\n",
66
+    "nucleotide.pop()"
67
+   ]
68
+  },
69
+  {
70
+   "cell_type": "code",
71
+   "execution_count": 6,
72
+   "metadata": {},
73
+   "outputs": [],
74
+   "source": []
75
+  },
76
+  {
77
+   "cell_type": "code",
78
+   "execution_count": null,
79
+   "metadata": {},
80
+   "outputs": [],
81
+   "source": []
82
+  },
83
+  {
84
+   "cell_type": "code",
85
+   "execution_count": null,
86
+   "metadata": {},
87
+   "outputs": [],
88
+   "source": []
89
+  }
90
+ ],
91
+ "metadata": {
92
+  "kernelspec": {
93
+   "display_name": "Python 3",
94
+   "language": "python",
95
+   "name": "python3"
96
+  },
97
+  "language_info": {
98
+   "codemirror_mode": {
99
+    "name": "ipython",
100
+    "version": 3
101
+   },
102
+   "file_extension": ".py",
103
+   "mimetype": "text/x-python",
104
+   "name": "python",
105
+   "nbconvert_exporter": "python",
106
+   "pygments_lexer": "ipython3",
107
+   "version": "3.7.4"
108
+  }
109
+ },
110
+ "nbformat": 4,
111
+ "nbformat_minor": 4
112
+}

+ 120 - 0
config.yml Voir le fichier

@@ -0,0 +1,120 @@
1
+name: projet-dl
2
+channels:
3
+  - defaults
4
+dependencies:
5
+  - _libgcc_mutex=0.1=main
6
+  - attrs=19.1.0=py37_1
7
+  - backcall=0.1.0=py37_0
8
+  - blas=1.0=mkl
9
+  - bleach=3.1.0=py37_0
10
+  - ca-certificates=2019.5.15=1
11
+  - certifi=2019.6.16=py37_1
12
+  - dbus=1.13.6=h746ee38_0
13
+  - decorator=4.4.0=py37_1
14
+  - defusedxml=0.6.0=py_0
15
+  - entrypoints=0.3=py37_0
16
+  - expat=2.2.6=he6710b0_0
17
+  - fontconfig=2.13.0=h9420a91_0
18
+  - freetype=2.9.1=h8a8886c_1
19
+  - glib=2.56.2=hd408876_0
20
+  - gmp=6.1.2=h6c8ec71_1
21
+  - gst-plugins-base=1.14.0=hbbd80ab_1
22
+  - gstreamer=1.14.0=hb453b48_1
23
+  - icu=58.2=h9c2bf20_1
24
+  - intel-openmp=2019.4=243
25
+  - ipykernel=5.1.2=py37h39e3cac_0
26
+  - ipython=7.8.0=py37h39e3cac_0
27
+  - ipython_genutils=0.2.0=py37_0
28
+  - ipywidgets=7.5.1=py_0
29
+  - jedi=0.15.1=py37_0
30
+  - jinja2=2.10.1=py37_0
31
+  - jpeg=9b=h024ee3a_2
32
+  - json5=0.8.5=py_0
33
+  - jsonschema=3.0.2=py37_0
34
+  - jupyter=1.0.0=py37_7
35
+  - jupyter_client=5.3.1=py_0
36
+  - jupyter_console=6.0.0=py37_0
37
+  - jupyter_core=4.5.0=py_0
38
+  - jupyterlab=1.1.3=pyhf63ae98_0
39
+  - jupyterlab_server=1.0.0=py_1
40
+  - libedit=3.1.20181209=hc058e9b_0
41
+  - libffi=3.2.1=hd88cf55_4
42
+  - libgcc-ng=9.1.0=hdf63c60_0
43
+  - libgfortran-ng=7.3.0=hdf63c60_0
44
+  - libpng=1.6.37=hbc83047_0
45
+  - libsodium=1.0.16=h1bed415_0
46
+  - libstdcxx-ng=9.1.0=hdf63c60_0
47
+  - libuuid=1.0.3=h1bed415_2
48
+  - libxcb=1.13=h1bed415_1
49
+  - libxml2=2.9.9=hea5a465_1
50
+  - markupsafe=1.1.1=py37h7b6447c_0
51
+  - mistune=0.8.4=py37h7b6447c_0
52
+  - mkl=2019.4=243
53
+  - mkl-service=2.3.0=py37he904b0f_0
54
+  - mkl_fft=1.0.14=py37ha843d7b_0
55
+  - mkl_random=1.0.2=py37hd81dba3_0
56
+  - nbconvert=5.5.0=py_0
57
+  - nbformat=4.4.0=py37_0
58
+  - ncurses=6.1=he6710b0_1
59
+  - notebook=6.0.1=py37_0
60
+  - numpy=1.16.5=py37h7e9f1db_0
61
+  - numpy-base=1.16.5=py37hde5b4d6_0
62
+  - openssl=1.1.1d=h7b6447c_1
63
+  - pandas=0.25.1=py37he6710b0_0
64
+  - pandoc=2.2.3.2=0
65
+  - pandocfilters=1.4.2=py37_1
66
+  - parso=0.5.1=py_0
67
+  - pcre=8.43=he6710b0_0
68
+  - pexpect=4.7.0=py37_0
69
+  - pickleshare=0.7.5=py37_0
70
+  - pip=19.2.2=py37_0
71
+  - prometheus_client=0.7.1=py_0
72
+  - prompt_toolkit=2.0.9=py37_0
73
+  - ptyprocess=0.6.0=py37_0
74
+  - pygments=2.4.2=py_0
75
+  - pyqt=5.9.2=py37h05f1152_2
76
+  - pyrsistent=0.14.11=py37h7b6447c_0
77
+  - python=3.7.4=h265db76_1
78
+  - python-dateutil=2.8.0=py37_0
79
+  - pytz=2019.2=py_0
80
+  - pyzmq=18.1.0=py37he6710b0_0
81
+  - qt=5.9.7=h5867ecd_1
82
+  - qtconsole=4.5.5=py_0
83
+  - readline=7.0=h7b6447c_5
84
+  - scipy=1.3.1=py37h7c811a0_0
85
+  - send2trash=1.5.0=py37_0
86
+  - setuptools=41.0.1=py37_0
87
+  - sip=4.19.8=py37hf484d3e_0
88
+  - six=1.12.0=py37_0
89
+  - sqlite=3.29.0=h7b6447c_0
90
+  - terminado=0.8.2=py37_0
91
+  - testpath=0.4.2=py37_0
92
+  - tk=8.6.8=hbc83047_0
93
+  - tornado=6.0.3=py37h7b6447c_0
94
+  - traitlets=4.3.2=py37_0
95
+  - wcwidth=0.1.7=py37_0
96
+  - webencodings=0.5.1=py37_1
97
+  - wheel=0.33.4=py37_0
98
+  - widgetsnbextension=3.5.1=py37_0
99
+  - xz=5.2.4=h14c3975_4
100
+  - zeromq=4.3.1=he6710b0_3
101
+  - zlib=1.2.11=h7b6447c_3
102
+  - pip:
103
+    - biopython==1.74
104
+    - cycler==0.10.0
105
+    - griddataformats==0.5.0
106
+    - gsd==1.8.1
107
+    - joblib==0.13.2
108
+    - kiwisolver==1.1.0
109
+    - matplotlib==3.1.1
110
+    - mdanalysis==0.20.1
111
+    - mmtf-python==1.1.2
112
+    - mock==3.0.5
113
+    - msgpack==0.6.1
114
+    - networkx==2.3
115
+    - pbxplore==1.3.8
116
+    - pillow==6.1.0
117
+    - pyclustering==0.9.1
118
+    - pyparsing==2.4.2
119
+    - keras
120
+    - sklearn

Fichier diff supprimé car celui-ci est trop grand
+ 1946 - 0
control.list


+ 596 - 0
heme.list Voir le fichier

@@ -0,0 +1,596 @@
1
+101mA00
2
+1a2sA00
3
+1a4eD00
4
+1a4fA00
5
+1a4fB00
6
+1a56A00
7
+1a7vA00
8
+1a9wB00
9
+1akkA00
10
+1allA00
11
+1aqeA00
12
+1ashA00
13
+1b7vA00
14
+1b82A00
15
+1b8dA01
16
+1bbhA00
17
+1bbpA00
18
+1bccD01
19
+1be3C00
20
+1binA00
21
+1bvbA03
22
+1bz1A00
23
+1c2nA00
24
+1c52A00
25
+1c6oA00
26
+1c6sA00
27
+1c7dA00
28
+1cc5A00
29
+1cchA00
30
+1ccrA00
31
+1cg5A00
32
+1cg5B00
33
+1ci3A00
34
+1cl6A00
35
+1cnoG00
36
+1co6A00
37
+1cpcA00
38
+1cpcB00
39
+1cpqA00
40
+1ctmA00
41
+1cxaA00
42
+1cxyA00
43
+1cyfA00
44
+1cyjA00
45
+1d06A00
46
+1d1vA00
47
+1d4cA02
48
+1d7cA01
49
+1d8uA00
50
+1dlwA00
51
+1dm1A00
52
+1drmA00
53
+1dvhA00
54
+1dvvA00
55
+1dw0A00
56
+1dxtB00
57
+1e29A00
58
+1ebtA00
59
+1ecaA00
60
+1egyA00
61
+1etpA01
62
+1euoA00
63
+1ew6A00
64
+1ezvC00
65
+1f1cA00
66
+1f4uA00
67
+1f5oA00
68
+1f99A00
69
+1f99B00
70
+1fcbA00
71
+1fcdD01
72
+1fdhB00
73
+1fftE01
74
+1fhfA00
75
+1fj0A00
76
+1fs7A01
77
+1ft9A00
78
+1gcvA00
79
+1gcvB00
80
+1gdiA00
81
+1gdvA00
82
+1gjqA00
83
+1gksA00
84
+1gm4A02
85
+1gqaA00
86
+1gu2A00
87
+1gu6B01
88
+1gwsA14
89
+1gyoA03
90
+1h1oB02
91
+1h21A01
92
+1h31A01
93
+1h31B00
94
+1h97A00
95
+1hbgA00
96
+1hbhA00
97
+1hbhB00
98
+1hbrA00
99
+1hdsD00
100
+1he2A01
101
+1hlbA00
102
+1hlmA00
103
+1hroA00
104
+1i7yA00
105
+1iqcB01
106
+1it2A00
107
+1ithA00
108
+1iw1A01
109
+1ix3A00
110
+1izoA00
111
+1j0pA00
112
+1j77A00
113
+1jdlA00
114
+1jebA00
115
+1jjuA01
116
+1jmxA01
117
+1jniA01
118
+1jrxA02
119
+1kibA00
120
+1kn1B00
121
+1kqgC00
122
+1kr7A00
123
+1kwjA00
124
+1kx2A00
125
+1lhsA00
126
+1liaB00
127
+1ls9A00
128
+1ly8B03
129
+1m1pA01
130
+1m56A01
131
+1m7sA00
132
+1mg2D00
133
+1mhlB04
134
+1mj4A00
135
+1mjtA01
136
+1mqfA00
137
+1mwbA00
138
+1mytA00
139
+1n5uA05
140
+1ngkB00
141
+1nr6A00
142
+1ntfA00
143
+1oagA00
144
+1oahB02
145
+1ofwA09
146
+1ogyB01
147
+1oj6B00
148
+1or4A00
149
+1os6A02
150
+1outA00
151
+1outB00
152
+1prhA00
153
+1q90A00
154
+1qdbC04
155
+1qgwA00
156
+1qgwD02
157
+1qjsA00
158
+1qksA00
159
+1qn1A01
160
+1qn2A00
161
+1qwlA00
162
+1r3qA00
163
+1rz6A00
164
+1s56B00
165
+1s5lP00
166
+1s66A00
167
+1schB00
168
+1si8A00
169
+1sk7A00
170
+1soxA01
171
+1sp3A07
172
+1spgA00
173
+1spgB00
174
+1suoA00
175
+1sy7B00
176
+1t88A00
177
+1tguC00
178
+1tu9A00
179
+1u17A00
180
+1uvxA00
181
+1ux8A00
182
+1v4uA00
183
+1v4wB00
184
+1v75A00
185
+1w2lA00
186
+1w5cB14
187
+1w7oA03
188
+1we1A00
189
+1wmuB00
190
+1woxB00
191
+1wveB00
192
+1x3kA00
193
+1x3xA00
194
+1x9fA00
195
+1x9fB00
196
+1x9fD00
197
+1x9fG00
198
+1xbnA00
199
+1xf6B01
200
+1xq5A00
201
+1xq5B00
202
+1yeaA00
203
+1yhuA00
204
+1yhuB00
205
+1yhuC00
206
+1yiqA01
207
+1yq4D00
208
+1z1nA16
209
+1z24A00
210
+1zrtB00
211
+1zzhB01
212
+2a3mA03
213
+2amoB00
214
+2bh4A00
215
+2bk9A00
216
+2bkmA00
217
+2blfB00
218
+2bq4B03
219
+2bz9A00
220
+2c0kA00
221
+2c1dA00
222
+2c1dB00
223
+2c1vA01
224
+2c2cA00
225
+2c7jA00
226
+2c7jB01
227
+2c8sA00
228
+2ccyA00
229
+2civA01
230
+2czsA00
231
+2d09A00
232
+2d0sA00
233
+2d0wA00
234
+2d1eA00
235
+2d2mA00
236
+2d2mB00
237
+2d2mD00
238
+2dc3A00
239
+2dgeA00
240
+2e74A01
241
+2e84A14
242
+2eimA03
243
+2flqB01
244
+2fmyA00
245
+2fw5A01
246
+2fynA01
247
+2fynB00
248
+2hpdB00
249
+2hreA00
250
+2i89A00
251
+2i96A00
252
+2ibjA00
253
+2ig3A00
254
+2iufA00
255
+2ivfC00
256
+2j0pA00
257
+2j8wA00
258
+2je2A00
259
+2jxmB00
260
+2k2nA00
261
+2k3vA03
262
+2k78A00
263
+2l4dA00
264
+2noxB00
265
+2nwbA00
266
+2nzaA00
267
+2olpA00
268
+2oolA00
269
+2oyyA00
270
+2ozyA04
271
+2pbjD00
272
+2q2oA00
273
+2q7aA01
274
+2q8qA00
275
+2qppB00
276
+2r1hA00
277
+2r1hB00
278
+2r4zB00
279
+2r79A00
280
+2rfbA00
281
+2ri4D00
282
+2uulA00
283
+2uulB00
284
+2uydA00
285
+2v7iA00
286
+2vgrC00
287
+2vhdA00
288
+2vjhA01
289
+2vjhB00
290
+2vjrA00
291
+2vjrB01
292
+2vjtB00
293
+2vr0A06
294
+2vr0F03
295
+2vywA00
296
+2vzwB00
297
+2w31A00
298
+2wh8B00
299
+2wtgA00
300
+2wy4A00
301
+2x9oA00
302
+2xkrA00
303
+2xleA00
304
+2xq1O00
305
+2xtsB00
306
+2xykB00
307
+2y4eB01
308
+2y8hA00
309
+2ybqA01
310
+2yevB02
311
+2yiuB00
312
+2yk3A00
313
+2ynmC00
314
+2yp1C01
315
+2zboA00
316
+2zdoD00
317
+2zdpA00
318
+2zfoC00
319
+2zonD00
320
+2zooA00
321
+2zt9C01
322
+2zzsA00
323
+3a15B00
324
+3a4hA00
325
+3a9fA00
326
+3ae3D00
327
+3aeqA00
328
+3aq5A00
329
+3at5A00
330
+3at5B00
331
+3awmA00
332
+3ayfA01
333
+3b42A00
334
+3bcqA00
335
+3bcqB00
336
+3bk9A01
337
+3bxuB02
338
+3caoA03
339
+3cp5A00
340
+3cslB00
341
+3cwbC03
342
+3cx5D00
343
+3czhA01
344
+3dbmA00
345
+3dmiA00
346
+3dp5A00
347
+3dr0A00
348
+3dsjA00
349
+3e4wA00
350
+3e65B00
351
+3egwC00
352
+3ej6B03
353
+3ejeH01
354
+3ellA00
355
+3emmA00
356
+3fjwA00
357
+3gasA00
358
+3gm6A04
359
+3gw9A00
360
+3h4nB00
361
+3h8tA00
362
+3hf4A00
363
+3hq8A01
364
+3hx9A00
365
+3hyuA00
366
+3hyuB00
367
+3ia8A00
368
+3jbbI00
369
+3lf5A00
370
+3lxiB00
371
+3m5qA02
372
+3m97A00
373
+3mgxA00
374
+3mk7A01
375
+3mk7B01
376
+3mk7C01
377
+3mkbB00
378
+3ml1B01
379
+3mvcA00
380
+3nerB00
381
+3nn2B00
382
+3notA00
383
+3nu1A01
384
+3o0rC00
385
+3o0rD00
386
+3o5cA00
387
+3oa8B00
388
+3ocdA00
389
+3ov0A02
390
+3p3zA00
391
+3pc3A00
392
+3ph2A00
393
+3pmqA04
394
+3pt7B00
395
+3q08A00
396
+3qjtA00
397
+3qnsA00
398
+3qpiB00
399
+3qqrB00
400
+3qugA00
401
+3qzpB00
402
+3r9cA00
403
+3rivA00
404
+3rtlB01
405
+3rwlA00
406
+3s1iA00
407
+3sikB00
408
+3swjA01
409
+3t3zB00
410
+3t6dA01
411
+3tf8B00
412
+3tktA00
413
+3tm9A00
414
+3u8pB00
415
+3ubrA01
416
+3ucpA02
417
+3ut2A00
418
+3vliA00
419
+3vlmA00
420
+3volA00
421
+3vp5A00
422
+3vreB00
423
+3w2zA00
424
+3wctA00
425
+3wctB00
426
+3wctC00
427
+3wcuD00
428
+3wfxA00
429
+3wmmA00
430
+3x15A00
431
+3zbyB00
432
+3zhwA00
433
+3ziyA00
434
+3zjhB00
435
+4aamA00
436
+4atjB00
437
+4b2nA01
438
+4b2yA00
439
+4b4yA00
440
+4b7fA01
441
+4b8nB00
442
+4bf4D00
443
+4bjaA00
444
+4bwiA01
445
+4c0nA00
446
+4cabA00
447
+4cdpA00
448
+4cx9A00
449
+4czoA00
450
+4d1nC01
451
+4e04B00
452
+4e37B00
453
+4egnB00
454
+4eieA00
455
+4ejgE00
456
+4f0tB01
457
+4fa4B00
458
+4fofA00
459
+4g1bA00
460
+4g1xA00
461
+4gd3I00
462
+4gl5A00
463
+4gu7A00
464
+4gydA00
465
+4h0mA01
466
+4h2lB00
467
+4h8qA00
468
+4hkaA00
469
+4hrrA00
470
+4hrrB00
471
+4i3bA00
472
+4i8vC00
473
+4i91A00
474
+4j20A00
475
+4j6dB00
476
+4jetA00
477
+4jj0A01
478
+4jouA00
479
+4kmgA00
480
+4l0dA01
481
+4l2mA00
482
+4l6gA00
483
+4l6vS19
484
+4ljiA00
485
+4lm6A00
486
+4lm6B01
487
+4lm8A04
488
+4lmhD09
489
+4lmsA00
490
+4lmsC01
491
+4lmxC01
492
+4n4jA07
493
+4n4oA00
494
+4n73A00
495
+4nk2A00
496
+4o1wA00
497
+4o4sG00
498
+4o7gB00
499
+4oqrA00
500
+4ourB01
501
+4pwaD01
502
+4pwvA00
503
+4qi3A00
504
+4qi7A00
505
+4qoqA00
506
+4r1zA00
507
+4rknA08
508
+4rlrA00
509
+4rm4A00
510
+4tx3A00
511
+4u72A00
512
+4u8uA00
513
+4u8uB00
514
+4u8uC00
515
+4u8uD00
516
+4u99A00
517
+4ubsA06
518
+4uc1A05
519
+4uiiA00
520
+4uiqA00
521
+4uurA00
522
+4uylB00
523
+4uzvA00
524
+4v2kA00
525
+4w7jC00
526
+4wnuB00
527
+4x8yA00
528
+4xdiA00
529
+4xe3A00
530
+4xkbA00
531
+4xmhA01
532
+4xtqA00
533
+4xxiA00
534
+4xxlA00
535
+4xydB00
536
+4y7sA00
537
+4ympB00
538
+4yt3A00
539
+4zgxJ00
540
+4zvbA00
541
+5a13B00
542
+5akpA00
543
+5aqdA01
544
+5aqdM03
545
+5aurA00
546
+5b3iA00
547
+5b3vB01
548
+5b50A00
549
+5b6qA00
550
+5bv2B00
551
+5bv5A00
552
+5c2vC01
553
+5cx7E00
554
+5d1vA00
555
+5de0C00
556
+5dfxA00
557
+5f1aA01
558
+5f6zA00
559
+5ff1B05
560
+5foiA00
561
+5fujA00
562
+5fygA00
563
+5gj3A00
564
+5gweA00
565
+5hh3B00
566
+5irvC00
567
+5jggA00
568
+5jnzB00
569
+5k8zB01
570
+5kyoA00
571
+5kzlA00
572
+5l8rE04
573
+5l91A00
574
+5llyC00
575
+5lo9A02
576
+5loqC01
577
+5lthA00
578
+5mapB00
579
+5mg1A00
580
+5o0tA01
581
+5o1lB00
582
+5oheF00
583
+5t2kB00
584
+5ti9C00
585
+5tiaD00
586
+5v3uA00
587
+5vbuA00
588
+5vcgA00
589
+5x24A00
590
+5xnlC16
591
+5xnlD16
592
+5xnlV04
593
+5xnlW16
594
+5xnmA18
595
+5y1iA00
596
+5y8aA00

Fichier diff supprimé car celui-ci est trop grand
+ 1553 - 0
nucleotide.list


+ 387 - 0
secondary_struct.ipynb Voir le fichier

@@ -0,0 +1,387 @@
1
+{
2
+ "cells": [
3
+  {
4
+   "cell_type": "code",
5
+   "execution_count": 18,
6
+   "metadata": {},
7
+   "outputs": [],
8
+   "source": [
9
+    "import keras\n",
10
+    "import numpy as np\n",
11
+    "import pandas as pd\n",
12
+    "\n",
13
+    "from sklearn.preprocessing import LabelEncoder\n",
14
+    "from keras.models import Sequential\n",
15
+    "from keras.layers import Dense, Flatten, TimeDistributed\n",
16
+    "from keras import Input, Model\n",
17
+    "from keras.layers import add, Activation\n",
18
+    "#from keras.utils import plot_model  # Needs pydot.\n",
19
+    "from keras.layers import Conv1D, AveragePooling1D"
20
+   ]
21
+  },
22
+  {
23
+   "cell_type": "code",
24
+   "execution_count": 19,
25
+   "metadata": {},
26
+   "outputs": [],
27
+   "source": [
28
+    "def file_to_dataframe(filename):\n",
29
+    "    \"\"\"\n",
30
+    "    Returns a pandas dataframe ncol = len(longest_sequence),\n",
31
+    "                               nrow = number of sequences\n",
32
+    "                               \n",
33
+    "    Arguments :\n",
34
+    "        - filename : str\n",
35
+    "            path to file\n",
36
+    "    Takes a path to a files containing any sequences, must be 1 sequence\n",
37
+    "    per line.\n",
38
+    "    \n",
39
+    "    Sequences shorter than longest_sequence are completed with '0' char.\n",
40
+    "    \"\"\"\n",
41
+    "    filin = open(filename)\n",
42
+    "    fastas0 = filin.read()\n",
43
+    "    fastas0 = fastas0.split('\\n')\n",
44
+    "    nmax = len(max(fastas0, key=len))\n",
45
+    "    fastas = []\n",
46
+    "    for fasta in fastas0:\n",
47
+    "        fastas.append(fasta + (nmax - len(fasta)) * '0')\n",
48
+    "    \n",
49
+    "    seqs = pd.DataFrame(index=range(len(fastas)), columns=range(nmax))\n",
50
+    "    \n",
51
+    "    for i, fasta in enumerate(fastas):\n",
52
+    "        seqs.loc[i] = pd.Series(list(fasta))\n",
53
+    "    \n",
54
+    "    return seqs"
55
+   ]
56
+  },
57
+  {
58
+   "cell_type": "code",
59
+   "execution_count": 20,
60
+   "metadata": {},
61
+   "outputs": [],
62
+   "source": [
63
+    "def model_sequential(): # créer un objet modèle\n",
64
+    "    \"\"\"\n",
65
+    "    Return a simple sequentiel model\n",
66
+    "    \n",
67
+    "    Returns :\n",
68
+    "        - model : keras.Model\n",
69
+    "    \"\"\"\n",
70
+    "    inputs = Input(shape=(759,21)) # 759 aa, 21 car onehot\n",
71
+    "    conv_1 = Conv1D(25, (5), padding=\"same\", activation=\"relu\",\n",
72
+    "                        kernel_initializer=\"he_normal\")(inputs)\n",
73
+    "    conv_2 = Conv1D(35, (5), padding=\"same\", activation=\"relu\",\n",
74
+    "                        kernel_initializer=\"he_normal\")(conv_1)\n",
75
+    "    output = TimeDistributed(Dense(4, activation='softmax'))(conv_2)\n",
76
+    "    model = Model(inputs=inputs, outputs=output)\n",
77
+    "    print(model.summary)\n",
78
+    "    model.compile(optimizer=\"adam\", loss=\"categorical_crossentropy\",\n",
79
+    "                  metrics=[\"accuracy\"])\n",
80
+    "    return model"
81
+   ]
82
+  },
83
+  {
84
+   "cell_type": "code",
85
+   "execution_count": 21,
86
+   "metadata": {},
87
+   "outputs": [],
88
+   "source": [
89
+    "def char_to_onehot(df, ncol=0):\n",
90
+    "    \"\"\"\n",
91
+    "    Returns the given str-encoded dataframe into a onehot encoded dataframe as an array object\n",
92
+    "    \n",
93
+    "    * Arguments:\n",
94
+    "        - df : pandas.DataFrame, the dataframe containing the sequences, cell \n",
95
+    "                               containing strings.\n",
96
+    "        - ncol : int, Optionnal. the number of col used to build the LabelEncoder\n",
97
+    "                    which will transform the df cells into int(categorical).\n",
98
+    "    \"\"\"\n",
99
+    "    classes = LabelEncoder()\n",
100
+    "    classes.fit(df[ncol])\n",
101
+    "    df_categorical = df.apply(func=classes.transform, axis=0)\n",
102
+    "    df_onehot = keras.utils.to_categorical(df_categorical)\n",
103
+    "    return df_onehot"
104
+   ]
105
+  },
106
+  {
107
+   "cell_type": "code",
108
+   "execution_count": 22,
109
+   "metadata": {},
110
+   "outputs": [],
111
+   "source": [
112
+    "def true_accuracy(predictions, onehot_Y_test):\n",
113
+    "    \"\"\"\n",
114
+    "    Computes the accuracy ignoring the \"0\" of the DataFrame.\n",
115
+    "    \n",
116
+    "    * Arguments :\n",
117
+    "        - predictions : numpy.array, output of the model, onehot encoded.\n",
118
+    "        - onehot_Y_test : numpy.array, the true values, onehot encoded.\n",
119
+    "        \n",
120
+    "    * Returns :\n",
121
+    "        - acc_coor : int, accuracy corrected, not considering '0' predictions\n",
122
+    "    \"\"\"\n",
123
+    "    tp = 0\n",
124
+    "    tn = 0\n",
125
+    "    fn = 0\n",
126
+    "    fp = 0\n",
127
+    "    tot = 0\n",
128
+    "\n",
129
+    "    for i in range(len(predictions)):\n",
130
+    "        for j in range(len(predictions[i])):\n",
131
+    "            if onehot_Y_test[i, j, 3] != 0.:\n",
132
+    "                predmax = -1\n",
133
+    "                predict_class = -1\n",
134
+    "                true_class = -1\n",
135
+    "                for k in range(len(predictions[i, j])):\n",
136
+    "                    if predmax < predictions[i, j, k]:\n",
137
+    "                        predmax = predictions[i, j, k]\n",
138
+    "                        predict_class = k\n",
139
+    "                    if onehot_Y_test[i, j, k] == 1.:\n",
140
+    "                        true_class = k\n",
141
+    "                if predict_class == true_class:\n",
142
+    "                    tp = tp+1\n",
143
+    "                tot = tot + 1\n",
144
+    "    acc_corr = tp/tot*100\n",
145
+    "    return acc_corr"
146
+   ]
147
+  },
148
+  {
149
+   "cell_type": "code",
150
+   "execution_count": 23,
151
+   "metadata": {},
152
+   "outputs": [],
153
+   "source": [
154
+    "# Create dataframe from files\n",
155
+    "fasta = file_to_dataframe(\"data/train.fasta\")\n",
156
+    "fasta = fasta.replace(\"X\", \"A\") # 'X' in fasta sequences are replace with 'A'\n",
157
+    "dssp = file_to_dataframe(\"data/train.dssp\")"
158
+   ]
159
+  },
160
+  {
161
+   "cell_type": "code",
162
+   "execution_count": 24,
163
+   "metadata": {},
164
+   "outputs": [],
165
+   "source": [
166
+    "# Writes Dataframes to csv files\n",
167
+    "fasta.to_csv(path_or_buf=\"data/train_fasta.csv\")\n",
168
+    "dssp.to_csv(path_or_buf=\"data/train_dssp.csv\")"
169
+   ]
170
+  },
171
+  {
172
+   "cell_type": "code",
173
+   "execution_count": 25,
174
+   "metadata": {},
175
+   "outputs": [],
176
+   "source": [
177
+    "# To onehot\n",
178
+    "fasta_onehot = char_to_onehot(fasta, 0)\n",
179
+    "dssp_onehot = char_to_onehot(dssp, 2)\n"
180
+   ]
181
+  },
182
+  {
183
+   "cell_type": "code",
184
+   "execution_count": 26,
185
+   "metadata": {},
186
+   "outputs": [],
187
+   "source": [
188
+    "# Divides the dataset in train and test subsets\n",
189
+    "fasta_train = fasta_onehot[0:1000,]\n",
190
+    "dssp_train = dssp_onehot[0:1000,]\n",
191
+    "fasta_test = fasta_onehot[1000:,]\n",
192
+    "dssp_test = dssp_onehot[1000:]"
193
+   ]
194
+  },
195
+  {
196
+   "cell_type": "code",
197
+   "execution_count": 27,
198
+   "metadata": {},
199
+   "outputs": [
200
+    {
201
+     "name": "stdout",
202
+     "output_type": "stream",
203
+     "text": [
204
+      "<bound method Network.summary of <keras.engine.training.Model object at 0x7f68cfb7b0d0>>\n",
205
+      "Epoch 1/30\n",
206
+      "1000/1000 [==============================] - 2s 2ms/step - loss: 0.7467 - acc: 0.7447\n",
207
+      "Epoch 2/30\n",
208
+      "1000/1000 [==============================] - 1s 974us/step - loss: 0.2610 - acc: 0.8944\n",
209
+      "Epoch 3/30\n",
210
+      "1000/1000 [==============================] - 1s 964us/step - loss: 0.2053 - acc: 0.9086\n",
211
+      "Epoch 4/30\n",
212
+      "1000/1000 [==============================] - 1s 994us/step - loss: 0.1899 - acc: 0.9154\n",
213
+      "Epoch 5/30\n",
214
+      "1000/1000 [==============================] - 1s 986us/step - loss: 0.1817 - acc: 0.9193\n",
215
+      "Epoch 6/30\n",
216
+      "1000/1000 [==============================] - 1s 1ms/step - loss: 0.1790 - acc: 0.9201\n",
217
+      "Epoch 7/30\n",
218
+      "1000/1000 [==============================] - 1s 1ms/step - loss: 0.1759 - acc: 0.9217\n",
219
+      "Epoch 8/30\n",
220
+      "1000/1000 [==============================] - 1s 960us/step - loss: 0.1743 - acc: 0.9222\n",
221
+      "Epoch 9/30\n",
222
+      "1000/1000 [==============================] - 1s 977us/step - loss: 0.1735 - acc: 0.9226\n",
223
+      "Epoch 10/30\n",
224
+      "1000/1000 [==============================] - 1s 963us/step - loss: 0.1727 - acc: 0.9228\n",
225
+      "Epoch 11/30\n",
226
+      "1000/1000 [==============================] - 1s 999us/step - loss: 0.1712 - acc: 0.9239\n",
227
+      "Epoch 12/30\n",
228
+      "1000/1000 [==============================] - 1s 963us/step - loss: 0.1702 - acc: 0.9243\n",
229
+      "Epoch 13/30\n",
230
+      "1000/1000 [==============================] - 1s 1ms/step - loss: 0.1695 - acc: 0.9249\n",
231
+      "Epoch 14/30\n",
232
+      "1000/1000 [==============================] - 1s 978us/step - loss: 0.1687 - acc: 0.9252\n",
233
+      "Epoch 15/30\n",
234
+      "1000/1000 [==============================] - 1s 981us/step - loss: 0.1678 - acc: 0.9257\n",
235
+      "Epoch 16/30\n",
236
+      "1000/1000 [==============================] - 1s 985us/step - loss: 0.1669 - acc: 0.9260\n",
237
+      "Epoch 17/30\n",
238
+      "1000/1000 [==============================] - 1s 1ms/step - loss: 0.1661 - acc: 0.9267\n",
239
+      "Epoch 18/30\n",
240
+      "1000/1000 [==============================] - 1s 1000us/step - loss: 0.1656 - acc: 0.9269\n",
241
+      "Epoch 19/30\n",
242
+      "1000/1000 [==============================] - 1s 1ms/step - loss: 0.1650 - acc: 0.9273\n",
243
+      "Epoch 20/30\n",
244
+      "1000/1000 [==============================] - 1s 1ms/step - loss: 0.1640 - acc: 0.9277\n",
245
+      "Epoch 21/30\n",
246
+      "1000/1000 [==============================] - 1s 1ms/step - loss: 0.1637 - acc: 0.9281\n",
247
+      "Epoch 22/30\n",
248
+      "1000/1000 [==============================] - 1s 1ms/step - loss: 0.1630 - acc: 0.9285\n",
249
+      "Epoch 23/30\n",
250
+      "1000/1000 [==============================] - 1s 1ms/step - loss: 0.1621 - acc: 0.9289\n",
251
+      "Epoch 24/30\n",
252
+      "1000/1000 [==============================] - 1s 992us/step - loss: 0.1619 - acc: 0.9289\n",
253
+      "Epoch 25/30\n",
254
+      "1000/1000 [==============================] - 1s 1ms/step - loss: 0.1616 - acc: 0.9291\n",
255
+      "Epoch 26/30\n",
256
+      "1000/1000 [==============================] - 1s 1ms/step - loss: 0.1616 - acc: 0.9291\n",
257
+      "Epoch 27/30\n",
258
+      "1000/1000 [==============================] - 1s 1ms/step - loss: 0.1608 - acc: 0.9296\n",
259
+      "Epoch 28/30\n",
260
+      "1000/1000 [==============================] - 1s 1ms/step - loss: 0.1604 - acc: 0.9297\n",
261
+      "Epoch 29/30\n",
262
+      "1000/1000 [==============================] - 1s 1ms/step - loss: 0.1601 - acc: 0.9298\n",
263
+      "Epoch 30/30\n",
264
+      "1000/1000 [==============================] - 1s 1ms/step - loss: 0.1597 - acc: 0.9300\n"
265
+     ]
266
+    },
267
+    {
268
+     "data": {
269
+      "text/plain": [
270
+       "<keras.callbacks.History at 0x7f68cfcf6050>"
271
+      ]
272
+     },
273
+     "execution_count": 27,
274
+     "metadata": {},
275
+     "output_type": "execute_result"
276
+    }
277
+   ],
278
+   "source": [
279
+    "model = model_sequential()\n",
280
+    "model.fit(fasta_train, dssp_train, epochs=30, batch_size=30)"
281
+   ]
282
+  },
283
+  {
284
+   "cell_type": "code",
285
+   "execution_count": 30,
286
+   "metadata": {},
287
+   "outputs": [],
288
+   "source": [
289
+    "predictions = model.predict(fasta_test, batch_size=30)"
290
+   ]
291
+  },
292
+  {
293
+   "cell_type": "code",
294
+   "execution_count": 31,
295
+   "metadata": {},
296
+   "outputs": [
297
+    {
298
+     "name": "stdout",
299
+     "output_type": "stream",
300
+     "text": [
301
+      "accuracy : 68.06409539780138\n"
302
+     ]
303
+    }
304
+   ],
305
+   "source": [
306
+    "print(\"accuracy : {}\".format(true_accuracy(predictions, dssp_test)))"
307
+   ]
308
+  },
309
+  {
310
+   "cell_type": "code",
311
+   "execution_count": null,
312
+   "metadata": {},
313
+   "outputs": [],
314
+   "source": []
315
+  },
316
+  {
317
+   "cell_type": "code",
318
+   "execution_count": null,
319
+   "metadata": {},
320
+   "outputs": [],
321
+   "source": []
322
+  },
323
+  {
324
+   "cell_type": "code",
325
+   "execution_count": null,
326
+   "metadata": {},
327
+   "outputs": [],
328
+   "source": []
329
+  },
330
+  {
331
+   "cell_type": "code",
332
+   "execution_count": null,
333
+   "metadata": {},
334
+   "outputs": [],
335
+   "source": []
336
+  },
337
+  {
338
+   "cell_type": "code",
339
+   "execution_count": null,
340
+   "metadata": {},
341
+   "outputs": [],
342
+   "source": []
343
+  },
344
+  {
345
+   "cell_type": "code",
346
+   "execution_count": null,
347
+   "metadata": {},
348
+   "outputs": [],
349
+   "source": []
350
+  },
351
+  {
352
+   "cell_type": "code",
353
+   "execution_count": null,
354
+   "metadata": {},
355
+   "outputs": [],
356
+   "source": []
357
+  },
358
+  {
359
+   "cell_type": "code",
360
+   "execution_count": null,
361
+   "metadata": {},
362
+   "outputs": [],
363
+   "source": []
364
+  }
365
+ ],
366
+ "metadata": {
367
+  "kernelspec": {
368
+   "display_name": "Python 3",
369
+   "language": "python",
370
+   "name": "python3"
371
+  },
372
+  "language_info": {
373
+   "codemirror_mode": {
374
+    "name": "ipython",
375
+    "version": 3
376
+   },
377
+   "file_extension": ".py",
378
+   "mimetype": "text/x-python",
379
+   "name": "python",
380
+   "nbconvert_exporter": "python",
381
+   "pygments_lexer": "ipython3",
382
+   "version": "3.7.4"
383
+  }
384
+ },
385
+ "nbformat": 4,
386
+ "nbformat_minor": 4
387
+}

+ 69 - 0
steroid.list Voir le fichier

@@ -0,0 +1,69 @@
1
+1a28A00
2
+1afsA01
3
+1aquA01
4
+1coyA00
5
+1dbbB00
6
+1dbjA00
7
+1eupA02
8
+1fdtA00
9
+1h60A01
10
+1hv5E01
11
+1i7gA01
12
+1jnhB00
13
+1kdkA00
14
+1nfqA01
15
+1ogzA00
16
+1oh0B00
17
+1q13A01
18
+1q20A01
19
+1vpoB00
20
+1x8jA01
21
+1xj7A01
22
+1xnxA00
23
+1xv9B00
24
+1y5rA01
25
+1yb1A00
26
+2aaxA00
27
+2d06A01
28
+2hzqA00
29
+2ipgA01
30
+2o5yB00
31
+2qp4A00
32
+2v95A00
33
+2vctA00
34
+3burA01
35
+3eauA02
36
+3gn8A01
37
+3kdmB00
38
+3mnpA01
39
+3oikA00
40
+3ollA00
41
+3q95B00
42
+3rukD01
43
+3s79A01
44
+3wo2C02
45
+4c3yH02
46
+4c49A00
47
+4dvqA01
48
+4j6cA00
49
+4jvlA01
50
+4l1wA01
51
+4ltwA00
52
+4pleA02
53
+4qckA00
54
+4qdcA00
55
+4qdfB00
56
+4qf7A00
57
+4r1zA01
58
+4r21B01
59
+4ubtA01
60
+4x1gA00
61
+4y8wC01
62
+5a1pA01
63
+5hgcA00
64
+5hs6A01
65
+5if6A00
66
+5l91A04
67
+5lurA00
68
+5vb2D05
69
+6f88A01