nzimmermann
/
deepdrug3D


			
				
					
						
						
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212
							{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# DeepDrug3D"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Using TensorFlow backend.\n"
     ]
    }
   ],
   "source": [
    "import numpy as np\n",
    "\n",
    "from sklearn.preprocessing import LabelEncoder\n",
    "from keras.models import Sequential\n",
    "from keras import optimizers\n",
    "from keras.layers import Dense, Flatten, TimeDistributedn, Dropout\n",
    "from keras import Input, Model\n",
    "from keras.layers import add, Activation\n",
    "#from keras.utils import plot_model  # Needs pydot.\n",
    "from keras.layers import Conv3D, MaxPooling3D"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Create pocket lists\n",
    "4 pockets are created :\n",
    "  + control\n",
    "  + steroid\n",
    "  + heme\n",
    "  + nucleotide"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "''"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "with open(\"control.list\", \"r\") as filin:\n",
    "    control = filin.read()\n",
    "control = control.split(\"\\n\")\n",
    "control.pop()\n",
    "\n",
    "with open(\"steroid.list\", \"r\") as filin:\n",
    "    steroid = filin.read()\n",
    "steroid = steroid.split(\"\\n\")\n",
    "steroid.pop()\n",
    "\n",
    "with open(\"heme.list\", \"r\") as filin:\n",
    "    heme = filin.read()\n",
    "heme = heme.split(\"\\n\")\n",
    "heme.pop()\n",
    "\n",
    "with open(\"nucleotide.list\", \"r\") as filin:\n",
    "    nucleotide = filin.read()\n",
    "nucleotide = nucleotide.split(\"\\n\")\n",
    "nucleotide.pop()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Creating input and ouputs"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "data_onehot = np.ndarray(shape=(2219, 14, 32, 32, 32)) # initializing empty array\n",
    "indices = np.random.permutation(2219)\n",
    "output = np.ndarray(shape=(2219, 3)) # softmax 3, {steroid=1, heme=1, nucleotide=1}\n",
    "lmin = len(steroid)\n",
    "lmid = len(heme)\n",
    "lmax = len(nucleotide)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "n = -1\n",
    "for i in indices:\n",
    "    n += 1\n",
    "    if i < lmin:\n",
    "        data_onehot[n,] = np.load(\"deepdrug3d_voxel_data/\"+steroid[i]+\".npy\")\n",
    "        output[n,] = [1,0,0]\n",
    "    elif i > lmin and i < (lmin + lmid):\n",
    "        data_onehot[n,] = np.load(\"deepdrug3d_voxel_data/\"+heme[i - lmin]+\".npy\")\n",
    "        output[n,] = [0,1,0]\n",
    "    else:\n",
    "        data_onehot[n,] = np.load(\"deepdrug3d_voxel_data/\"+nucleotide[i - (lmin+lmid) - 1]+\".npy\")\n",
    "        output[n,] = [0,0,1]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "X_train = data_onehot[0:1664,]\n",
    "Y_train = output[0:1664,]\n",
    "X_test = data_onehot[1664:,]\n",
    "Y_test = output[1664:,]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(1, 14, 32, 32, 32)"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "def model_sequential(): # créer un objet modèle\n",
    "    \"\"\"\n",
    "    Return a simple sequentiel model\n",
    "    \n",
    "    Returns :\n",
    "        - model : keras.Model\n",
    "    \"\"\"\n",
    "    inputs = Input(shape=(32,32,32,14)) # 759 aa, 21 car onehot\n",
    "    conv_1 = Conv3D(64, (28, 28, 28), padding=\"same\", activation=\"LeakyReLU\",\n",
    "                        kernel_initializer=\"he_normal\")(inputs)\n",
    "    conv_2 = Conv3D(64, (26, 26, 26), padding=\"same\", activation=\"LeakyReLU\",\n",
    "                        kernel_initializer=\"he_normal\")(conv_1)\n",
    "    drop_1 = Dropout(0.2)(conv_2)\n",
    "    maxpool = MaxPooling3D()(drop_1)\n",
    "    drop_2 = Dropout(0.4)(maxpool)\n",
    "    dense = Dense(512)(drop_2)\n",
    "    drop_3 = Dropout(0.4)(dense)\n",
    "    output = TimeDistributed(Dense(3, activation='softmax'))(drop_3)\n",
    "    model = Model(inputs=inputs, outputs=output)\n",
    "    my_opt = optimizers.Adam(learning_rate=0.000001, beta_1=0.9, beta_2=0.999, amsgrad=False)\n",
    "    print(model.summary)\n",
    "    model.compile(optimizer=my_opt, loss=\"categorical_crossentropy\",\n",
    "                  metrics=[\"accuracy\"])\n",
    "    return model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}