123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212 |
- {
- "cells": [
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "# DeepDrug3D"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 1,
- "metadata": {},
- "outputs": [
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "Using TensorFlow backend.\n"
- ]
- }
- ],
- "source": [
- "import numpy as np\n",
- "\n",
- "from sklearn.preprocessing import LabelEncoder\n",
- "from keras.models import Sequential\n",
- "from keras import optimizers\n",
- "from keras.layers import Dense, Flatten, TimeDistributedn, Dropout\n",
- "from keras import Input, Model\n",
- "from keras.layers import add, Activation\n",
- "#from keras.utils import plot_model # Needs pydot.\n",
- "from keras.layers import Conv3D, MaxPooling3D"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## Create pocket lists\n",
- "4 pockets are created :\n",
- " + control\n",
- " + steroid\n",
- " + heme\n",
- " + nucleotide"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 2,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "''"
- ]
- },
- "execution_count": 2,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "with open(\"control.list\", \"r\") as filin:\n",
- " control = filin.read()\n",
- "control = control.split(\"\\n\")\n",
- "control.pop()\n",
- "\n",
- "with open(\"steroid.list\", \"r\") as filin:\n",
- " steroid = filin.read()\n",
- "steroid = steroid.split(\"\\n\")\n",
- "steroid.pop()\n",
- "\n",
- "with open(\"heme.list\", \"r\") as filin:\n",
- " heme = filin.read()\n",
- "heme = heme.split(\"\\n\")\n",
- "heme.pop()\n",
- "\n",
- "with open(\"nucleotide.list\", \"r\") as filin:\n",
- " nucleotide = filin.read()\n",
- "nucleotide = nucleotide.split(\"\\n\")\n",
- "nucleotide.pop()"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "### Creating input and ouputs"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 3,
- "metadata": {},
- "outputs": [],
- "source": [
- "data_onehot = np.ndarray(shape=(2219, 14, 32, 32, 32)) # initializing empty array\n",
- "indices = np.random.permutation(2219)\n",
- "output = np.ndarray(shape=(2219, 3)) # softmax 3, {steroid=1, heme=1, nucleotide=1}\n",
- "lmin = len(steroid)\n",
- "lmid = len(heme)\n",
- "lmax = len(nucleotide)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 4,
- "metadata": {},
- "outputs": [],
- "source": [
- "n = -1\n",
- "for i in indices:\n",
- " n += 1\n",
- " if i < lmin:\n",
- " data_onehot[n,] = np.load(\"deepdrug3d_voxel_data/\"+steroid[i]+\".npy\")\n",
- " output[n,] = [1,0,0]\n",
- " elif i > lmin and i < (lmin + lmid):\n",
- " data_onehot[n,] = np.load(\"deepdrug3d_voxel_data/\"+heme[i - lmin]+\".npy\")\n",
- " output[n,] = [0,1,0]\n",
- " else:\n",
- " data_onehot[n,] = np.load(\"deepdrug3d_voxel_data/\"+nucleotide[i - (lmin+lmid) - 1]+\".npy\")\n",
- " output[n,] = [0,0,1]"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 5,
- "metadata": {},
- "outputs": [],
- "source": [
- "X_train = data_onehot[0:1664,]\n",
- "Y_train = output[0:1664,]\n",
- "X_test = data_onehot[1664:,]\n",
- "Y_test = output[1664:,]"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 14,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "(1, 14, 32, 32, 32)"
- ]
- },
- "execution_count": 14,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "def model_sequential(): # créer un objet modèle\n",
- " \"\"\"\n",
- " Return a simple sequentiel model\n",
- " \n",
- " Returns :\n",
- " - model : keras.Model\n",
- " \"\"\"\n",
- " inputs = Input(shape=(32,32,32,14)) # 759 aa, 21 car onehot\n",
- " conv_1 = Conv3D(64, (28, 28, 28), padding=\"same\", activation=\"LeakyReLU\",\n",
- " kernel_initializer=\"he_normal\")(inputs)\n",
- " conv_2 = Conv3D(64, (26, 26, 26), padding=\"same\", activation=\"LeakyReLU\",\n",
- " kernel_initializer=\"he_normal\")(conv_1)\n",
- " drop_1 = Dropout(0.2)(conv_2)\n",
- " maxpool = MaxPooling3D()(drop_1)\n",
- " drop_2 = Dropout(0.4)(maxpool)\n",
- " dense = Dense(512)(drop_2)\n",
- " drop_3 = Dropout(0.4)(dense)\n",
- " output = TimeDistributed(Dense(3, activation='softmax'))(drop_3)\n",
- " model = Model(inputs=inputs, outputs=output)\n",
- " my_opt = optimizers.Adam(learning_rate=0.000001, beta_1=0.9, beta_2=0.999, amsgrad=False)\n",
- " print(model.summary)\n",
- " model.compile(optimizer=my_opt, loss=\"categorical_crossentropy\",\n",
- " metrics=[\"accuracy\"])\n",
- " return model"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": []
- }
- ],
- "metadata": {
- "kernelspec": {
- "display_name": "Python 3",
- "language": "python",
- "name": "python3"
- },
- "language_info": {
- "codemirror_mode": {
- "name": "ipython",
- "version": 3
- },
- "file_extension": ".py",
- "mimetype": "text/x-python",
- "name": "python",
- "nbconvert_exporter": "python",
- "pygments_lexer": "ipython3",
- "version": "3.7.4"
- }
- },
- "nbformat": 4,
- "nbformat_minor": 4
- }
|