--- a/DeepDrug.ipynb
+++ b/DeepDrug.ipynb
@@ -7,80 +7,50 @@
 
				     "# DeepDrug3D"
			
 
				    ]
			
 
				   },
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "metadata": {},
			
 
				+   "source": [
			
 
				+    "## Importing library"
			
 
				+   ]
			
 
				+  },
			
 
				   {
			
 
				    "cell_type": "code",
			
 
				-   "execution_count": 1,
			
 
				-   "metadata": {},
			
 
				-   "outputs": [
			
 
				-    {
			
 
				-     "name": "stderr",
			
 
				-     "output_type": "stream",
			
 
				-     "text": [
			
 
				-      "Using TensorFlow backend.\n"
			
 
				-     ]
			
 
				-    }
			
 
				-   ],
			
 
				+   "execution_count": 6,
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				    "source": [
			
 
				     "import numpy as np\n",
			
 
				-    "\n",
			
 
				+    "import tensorflow as tf\n",
			
 
				     "from sklearn.preprocessing import LabelEncoder\n",
			
 
				     "from keras.models import Sequential\n",
			
 
				-    "from keras import optimizers\n",
			
 
				-    "from keras.layers import Dense, Flatten, TimeDistributedn, Dropout\n",
			
 
				+    "from keras import optimizers, callbacks\n",
			
 
				+    "from keras.layers import Dense, Flatten, TimeDistributed, Dropout\n",
			
 
				     "from keras import Input, Model\n",
			
 
				     "from keras.layers import add, Activation\n",
			
 
				+    "from keras.layers.advanced_activations import LeakyReLU\n",
			
 
				     "#from keras.utils import plot_model  # Needs pydot.\n",
			
 
				-    "from keras.layers import Conv3D, MaxPooling3D"
			
 
				+    "from keras.layers import Convolution3D, MaxPooling3D"
			
 
				    ]
			
 
				   },
			
 
				   {
			
 
				    "cell_type": "markdown",
			
 
				    "metadata": {},
			
 
				    "source": [
			
 
				-    "## Create pocket lists\n",
			
 
				-    "4 pockets are created :\n",
			
 
				-    "  + control\n",
			
 
				-    "  + steroid\n",
			
 
				-    "  + heme\n",
			
 
				-    "  + nucleotide"
			
 
				+    "### used to store model prediction in order to plot roc curve"
			
 
				    ]
			
 
				   },
			
 
				   {
			
 
				    "cell_type": "code",
			
 
				-   "execution_count": 2,
			
 
				-   "metadata": {},
			
 
				-   "outputs": [
			
 
				-    {
			
 
				-     "data": {
			
 
				-      "text/plain": [
			
 
				-       "''"
			
 
				-      ]
			
 
				-     },
			
 
				-     "execution_count": 2,
			
 
				-     "metadata": {},
			
 
				-     "output_type": "execute_result"
			
 
				-    }
			
 
				-   ],
			
 
				-   "source": [
			
 
				-    "with open(\"control.list\", \"r\") as filin:\n",
			
 
				-    "    control = filin.read()\n",
			
 
				-    "control = control.split(\"\\n\")\n",
			
 
				-    "control.pop()\n",
			
 
				-    "\n",
			
 
				-    "with open(\"steroid.list\", \"r\") as filin:\n",
			
 
				-    "    steroid = filin.read()\n",
			
 
				-    "steroid = steroid.split(\"\\n\")\n",
			
 
				-    "steroid.pop()\n",
			
 
				-    "\n",
			
 
				-    "with open(\"heme.list\", \"r\") as filin:\n",
			
 
				-    "    heme = filin.read()\n",
			
 
				-    "heme = heme.split(\"\\n\")\n",
			
 
				-    "heme.pop()\n",
			
 
				-    "\n",
			
 
				-    "with open(\"nucleotide.list\", \"r\") as filin:\n",
			
 
				-    "    nucleotide = filin.read()\n",
			
 
				-    "nucleotide = nucleotide.split(\"\\n\")\n",
			
 
				-    "nucleotide.pop()"
			
 
				+   "execution_count": null,
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "class prediction_history(callbacks.Callback):\n",
			
 
				+    "    def __init__(self):\n",
			
 
				+    "        self.predhis = []\n",
			
 
				+    "    def on_epoch_end(self, epoch, logs={}):\n",
			
 
				+    "        self.predhis.append(model.predict(predictor_train))"
			
 
				    ]
			
 
				   },
			
 
				   {
			
@@ -92,85 +62,169 @@
 
				   },
			
 
				   {
			
 
				    "cell_type": "code",
			
 
				-   "execution_count": 3,
			
 
				+   "execution_count": null,
			
 
				    "metadata": {},
			
 
				    "outputs": [],
			
 
				    "source": [
			
 
				-    "data_onehot = np.ndarray(shape=(2219, 14, 32, 32, 32)) # initializing empty array\n",
			
 
				-    "indices = np.random.permutation(2219)\n",
			
 
				-    "output = np.ndarray(shape=(2219, 3)) # softmax 3, {steroid=1, heme=1, nucleotide=1}\n",
			
 
				-    "lmin = len(steroid)\n",
			
 
				-    "lmid = len(heme)\n",
			
 
				-    "lmax = len(nucleotide)"
			
 
				+    "def in_out_lists(size=1000):\n",
			
 
				+    "    \"\"\"\n",
			
 
				+    "    returns a tuple of array used as input and output for the model\n",
			
 
				+    "    Arguments:\n",
			
 
				+    "        - size, int: default 1000, size of the lists to be created\n",
			
 
				+    "        \n",
			
 
				+    "    Returns:\n",
			
 
				+    "        - tuple (data_onehot, output):\n",
			
 
				+    "            -data_onehot, ndarray: containing one-hot encoded pockets\n",
			
 
				+    "            -output, ndarray: containing size-3 vectors for classification\n",
			
 
				+    "    \"\"\"\n",
			
 
				+    "    with open(\"control.list\", \"r\") as filin:\n",
			
 
				+    "        control = filin.read()\n",
			
 
				+    "        control = control.split(\"\\n\")\n",
			
 
				+    "        control.pop()\n",
			
 
				+    "\n",
			
 
				+    "    with open(\"steroid.list\", \"r\") as filin:\n",
			
 
				+    "        steroid = filin.read()\n",
			
 
				+    "        steroid = steroid.split(\"\\n\")\n",
			
 
				+    "        steroid.pop()\n",
			
 
				+    "\n",
			
 
				+    "    with open(\"heme.list\", \"r\") as filin:\n",
			
 
				+    "        heme = filin.read()\n",
			
 
				+    "        heme = heme.split(\"\\n\")\n",
			
 
				+    "        heme.pop()\n",
			
 
				+    "\n",
			
 
				+    "    with open(\"nucleotide.list\", \"r\") as filin:\n",
			
 
				+    "        nucleotide = filin.read()\n",
			
 
				+    "        nucleotide = nucleotide.split(\"\\n\")\n",
			
 
				+    "        nucleotide.pop()\n",
			
 
				+    "    \n",
			
 
				+    "    lmin = len(heme)\n",
			
 
				+    "    lmid = len(nucleotide)\n",
			
 
				+    "    lmax = len(control)\n",
			
 
				+    "    tot_size = lmin + lmid + lmax\n",
			
 
				+    "    data_onehot = np.ndarray(shape=(size, 14, 32, 32, 32)) # initializing empty array\n",
			
 
				+    "\n",
			
 
				+    "    np.random.seed(9001)\n",
			
 
				+    "    indices = np.random.permutation(tot_size)\n",
			
 
				+    "    indices = indices[:size]\n",
			
 
				+    "    output = np.ndarray(shape=(size, 3)) # softmax 3, {steroid=1, heme=1, nucleotide=1}\n",
			
 
				+    "\n",
			
 
				+    "    n = -1\n",
			
 
				+    "    for i in indices:\n",
			
 
				+    "        n += 1\n",
			
 
				+    "        if i < lmin:\n",
			
 
				+    "            data_onehot[n,] = np.load(\"deepdrug3d_voxel_data/\"+heme[i]+\".npy\")\n",
			
 
				+    "            output[n,] = [1,0,0]\n",
			
 
				+    "        elif i > lmin and i < (lmin + lmid):\n",
			
 
				+    "            data_onehot[n,] = np.load(\"deepdrug3d_voxel_data/\"+nucleotide[i - lmin]+\".npy\")\n",
			
 
				+    "            output[n,] = [0,1,0]\n",
			
 
				+    "        else:\n",
			
 
				+    "            data_onehot[n,] = np.load(\"deepdrug3d_voxel_data/\"+control[i - (lmin+lmid) - 1]+\".npy\")\n",
			
 
				+    "            output[n,] = [0,0,1]\n",
			
 
				+    "    \n",
			
 
				+    "    return (data_onehot, output)"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "metadata": {},
			
 
				+   "source": [
			
 
				+    "### Defining different model to test and compare"
			
 
				    ]
			
 
				   },
			
 
				   {
			
 
				    "cell_type": "code",
			
 
				-   "execution_count": 4,
			
 
				+   "execution_count": null,
			
 
				    "metadata": {},
			
 
				    "outputs": [],
			
 
				    "source": [
			
 
				-    "n = -1\n",
			
 
				-    "for i in indices:\n",
			
 
				-    "    n += 1\n",
			
 
				-    "    if i < lmin:\n",
			
 
				-    "        data_onehot[n,] = np.load(\"deepdrug3d_voxel_data/\"+steroid[i]+\".npy\")\n",
			
 
				-    "        output[n,] = [1,0,0]\n",
			
 
				-    "    elif i > lmin and i < (lmin + lmid):\n",
			
 
				-    "        data_onehot[n,] = np.load(\"deepdrug3d_voxel_data/\"+heme[i - lmin]+\".npy\")\n",
			
 
				-    "        output[n,] = [0,1,0]\n",
			
 
				-    "    else:\n",
			
 
				-    "        data_onehot[n,] = np.load(\"deepdrug3d_voxel_data/\"+nucleotide[i - (lmin+lmid) - 1]+\".npy\")\n",
			
 
				-    "        output[n,] = [0,0,1]"
			
 
				+    "def model_heavy(): # créer un objet modèle\n",
			
 
				+    "    \"\"\"\n",
			
 
				+    "    Return a simple sequentiel model\n",
			
 
				+    "    \n",
			
 
				+    "    Returns :\n",
			
 
				+    "        - model : keras.Model\n",
			
 
				+    "    \"\"\"\n",
			
 
				+    "    inputs = Input(shape=(14,32,32,32))\n",
			
 
				+    "    conv_1 = Conv3D(64, (28, 28, 28), padding=\"same\", activation=\"relu\", kernel_initializer=\"he_normal\")(inputs)\n",
			
 
				+    "    conv_2 = Conv3D(64, (26, 26, 26), padding=\"same\", activation=\"relu\", kernel_initializer=\"he_normal\")(conv_1)\n",
			
 
				+    "    drop_1 = Dropout(0.2)(conv_2)\n",
			
 
				+    "    maxpool = MaxPooling3D()(drop_1)\n",
			
 
				+    "    drop_2 = Dropout(0.4)(maxpool)\n",
			
 
				+    "    dense = Dense(512)(drop_2)\n",
			
 
				+    "    drop_3 = Dropout(0.4)(dense)\n",
			
 
				+    "    flatters = Flatten()(drop_3)\n",
			
 
				+    "    #output = TimeDistributed(Dense(3, activation='softmax'))(drop_3)\n",
			
 
				+    "    output = Dense(3, activation='softmax')(flatters)\n",
			
 
				+    "    model = Model(inputs=inputs, outputs=output)\n",
			
 
				+    "    my_opt = optimizers.Adam(learning_rate=0.000001, beta_1=0.9, beta_2=0.999, amsgrad=False)\n",
			
 
				+    "    print(model.summary)\n",
			
 
				+    "    model.compile(optimizer=my_opt, loss=\"categorical_crossentropy\",\n",
			
 
				+    "                  metrics=[\"accuracy\"])\n",
			
 
				+    "    return model"
			
 
				    ]
			
 
				   },
			
 
				   {
			
 
				    "cell_type": "code",
			
 
				-   "execution_count": 5,
			
 
				+   "execution_count": 8,
			
 
				    "metadata": {},
			
 
				    "outputs": [],
			
 
				    "source": [
			
 
				-    "X_train = data_onehot[0:1664,]\n",
			
 
				-    "Y_train = output[0:1664,]\n",
			
 
				-    "X_test = data_onehot[1664:,]\n",
			
 
				-    "Y_test = output[1664:,]"
			
 
				+    "def model_new(): # créer un objet modèle\n",
			
 
				+    "    \"\"\"\n",
			
 
				+    "    Return a simple sequentiel model\n",
			
 
				+    "    \n",
			
 
				+    "    Returns :\n",
			
 
				+    "        - model : keras.Model\n",
			
 
				+    "    \"\"\"\n",
			
 
				+    "    inputs = Input(shape=(14,32,32,32))\n",
			
 
				+    "    conv_1 = Convolution3D(filters=64, kernel_size=5, padding=\"valid\", data_format='channels_first')(inputs)\n",
			
 
				+    "    activation_1 = LeakyReLU(alpha = 0.1)(conv_1)\n",
			
 
				+    "    drop_1 = Dropout(0.2)(activation_1)\n",
			
 
				+    "    conv_2 = Convolution3D(filters=64, kernel_size=3, padding=\"valid\", data_format='channels_first')(drop_1)\n",
			
 
				+    "    activation_2 = LeakyReLU(alpha = 0.1)(conv_2)\n",
			
 
				+    "    maxpool = MaxPooling3D(pool_size=(2,2,2),\n",
			
 
				+    "                            strides=None,\n",
			
 
				+    "                            padding='valid',\n",
			
 
				+    "                            data_format='channels_first')(activation_2)\n",
			
 
				+    "    drop_2 = Dropout(0.4)(maxpool)\n",
			
 
				+    "    flatters = Flatten()(drop_2)\n",
			
 
				+    "    dense = Dense(128)(flatters)\n",
			
 
				+    "    activation_3 = LeakyReLU(alpha = 0.1)(dense)\n",
			
 
				+    "    drop_3 = Dropout(0.4)(activation_3)\n",
			
 
				+    "    output = Dense(3, activation='softmax')(drop_3)\n",
			
 
				+    "    model = Model(inputs=inputs, outputs=output)\n",
			
 
				+    "    my_opt = optimizers.Adam(learning_rate=0.000001, beta_1=0.9, beta_2=0.999, amsgrad=False)\n",
			
 
				+    "    print(model.summary)\n",
			
 
				+    "    model.compile(optimizer=my_opt, loss=\"categorical_crossentropy\",\n",
			
 
				+    "                  metrics=[\"accuracy\"])\n",
			
 
				+    "    return model"
			
 
				    ]
			
 
				   },
			
 
				   {
			
 
				    "cell_type": "code",
			
 
				-   "execution_count": 14,
			
 
				-   "metadata": {},
			
 
				-   "outputs": [
			
 
				-    {
			
 
				-     "data": {
			
 
				-      "text/plain": [
			
 
				-       "(1, 14, 32, 32, 32)"
			
 
				-      ]
			
 
				-     },
			
 
				-     "execution_count": 14,
			
 
				-     "metadata": {},
			
 
				-     "output_type": "execute_result"
			
 
				-    }
			
 
				-   ],
			
 
				-   "source": [
			
 
				-    "def model_sequential(): # créer un objet modèle\n",
			
 
				+   "execution_count": null,
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "def model_light(): # créer un objet modèle\n",
			
 
				     "    \"\"\"\n",
			
 
				     "    Return a simple sequentiel model\n",
			
 
				     "    \n",
			
 
				     "    Returns :\n",
			
 
				     "        - model : keras.Model\n",
			
 
				     "    \"\"\"\n",
			
 
				-    "    inputs = Input(shape=(32,32,32,14)) # 759 aa, 21 car onehot\n",
			
 
				-    "    conv_1 = Conv3D(64, (28, 28, 28), padding=\"same\", activation=\"LeakyReLU\",\n",
			
 
				-    "                        kernel_initializer=\"he_normal\")(inputs)\n",
			
 
				-    "    conv_2 = Conv3D(64, (26, 26, 26), padding=\"same\", activation=\"LeakyReLU\",\n",
			
 
				-    "                        kernel_initializer=\"he_normal\")(conv_1)\n",
			
 
				+    "    inputs = Input(shape=(14,32,32,32))\n",
			
 
				+    "    conv_1 = Conv3D(32, (28, 28, 28), padding=\"same\", activation=\"relu\", kernel_initializer=\"he_normal\")(inputs)\n",
			
 
				+    "    conv_2 = Conv3D(64, (26, 26, 26), padding=\"same\", activation=\"relu\", kernel_initializer=\"he_normal\")(conv_1)\n",
			
 
				     "    drop_1 = Dropout(0.2)(conv_2)\n",
			
 
				     "    maxpool = MaxPooling3D()(drop_1)\n",
			
 
				-    "    drop_2 = Dropout(0.4)(maxpool)\n",
			
 
				-    "    dense = Dense(512)(drop_2)\n",
			
 
				-    "    drop_3 = Dropout(0.4)(dense)\n",
			
 
				-    "    output = TimeDistributed(Dense(3, activation='softmax'))(drop_3)\n",
			
 
				+    "    drop_2 = Dropout(0.3)(maxpool)\n",
			
 
				+    "    maxpool_2 = MaxPooling3D()(drop_2)\n",
			
 
				+    "    drop_3 = Dropout(0.3)(maxpool_2)\n",
			
 
				+    "    dense = Dense(256)(drop_3)\n",
			
 
				+    "    drop_4 = Dropout(0.4)(dense)\n",
			
 
				+    "    flatters = Flatten()(drop_4)\n",
			
 
				+    "    output = Dense(3, activation='softmax')(flatters)\n",
			
 
				     "    model = Model(inputs=inputs, outputs=output)\n",
			
 
				     "    my_opt = optimizers.Adam(learning_rate=0.000001, beta_1=0.9, beta_2=0.999, amsgrad=False)\n",
			
 
				     "    print(model.summary)\n",
			
@@ -179,12 +233,92 @@
 
				     "    return model"
			
 
				    ]
			
 
				   },
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "metadata": {},
			
 
				+   "source": [
			
 
				+    "## Create pocket lists\n",
			
 
				+    "4 lists are created :\n",
			
 
				+    "  + control\n",
			
 
				+    "  + steroid\n",
			
 
				+    "  + heme\n",
			
 
				+    "  + nucleotide"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "data = in_out_lists(1400)\n",
			
 
				+    "pockets = np.cumsum(data[1], axis=0)[-1]"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "print(\"with random seed=9001 and a 1400 pockets dataset the rates are:\\n\\\n",
			
 
				+    "      {} heme, {} nucleotide, {} control\\n\\\n",
			
 
				+    "      Total avaible dataset are composed of the following proportions:\\n\\\n",
			
 
				+    "      {} heme, {} nucleotide, {} control\".format(pockets[0]/1400, pockets[1]/1400,pockets[2]/1400,\n",
			
 
				+    "                                                0.145, 0.380, 0.475))"
			
 
				+   ]
			
 
				+  },
			
 
				   {
			
 
				    "cell_type": "code",
			
 
				    "execution_count": null,
			
 
				    "metadata": {},
			
 
				    "outputs": [],
			
 
				-   "source": []
			
 
				+   "source": [
			
 
				+    "data_onehot = data[0]\n",
			
 
				+    "output = data[1]\n",
			
 
				+    "X_train = data_onehot[0:1000,]\n",
			
 
				+    "Y_train = output[0:1000,]\n",
			
 
				+    "X_test = data_onehot[1000:,]\n",
			
 
				+    "Y_test = output[1000:,]"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "my_model = model_new()"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "tf.test.is_gpu_available()\n",
			
 
				+    "#my_model.fit(X_train, Y_train, epochs=50, batch_size=30)"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "history_mild_2mp = mild_model.fit(X_train, Y_train, validation_data=(X_test, Y_test), epochs=30, batch_size=32)\n",
			
 
				+    "my_model.save('new_model_e30_b32_t1000.h5')"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "#predictions=prediction_history()"
			
 
				+   ]
			
 
				   }
			
 
				  ],
			
 
				  "metadata": {
			
--- a/DeepDrug.py
+++ b/DeepDrug.py
@@ -0,0 +1,250 @@
 
				+#!/usr/bin/env python
			
 
				+# coding: utf-8
			
 
				+
			
 
				+# # DeepDrug3D
			
 
				+
			
 
				+# ## Importing library
			
 
				+
			
 
				+# In[6]:
			
 
				+
			
 
				+
			
 
				+import numpy as np
			
 
				+import tensorflow as tf
			
 
				+from sklearn.preprocessing import LabelEncoder
			
 
				+from keras.models import Sequential
			
 
				+from keras import optimizers, callbacks
			
 
				+from keras.layers import Dense, Flatten, TimeDistributed, Dropout
			
 
				+from keras import Input, Model
			
 
				+from keras.layers import add, Activation
			
 
				+from keras.layers.advanced_activations import LeakyReLU
			
 
				+#from keras.utils import plot_model  # Needs pydot.
			
 
				+from keras.layers import Convolution3D, MaxPooling3D
			
 
				+
			
 
				+
			
 
				+# ### used to store model prediction in order to plot roc curve
			
 
				+
			
 
				+# In[ ]:
			
 
				+
			
 
				+
			
 
				+class prediction_history(callbacks.Callback):
			
 
				+    def __init__(self):
			
 
				+        self.predhis = []
			
 
				+    def on_epoch_end(self, epoch, logs={}):
			
 
				+        self.predhis.append(model.predict(predictor_train))
			
 
				+
			
 
				+
			
 
				+# ### Creating input and ouputs
			
 
				+
			
 
				+# In[ ]:
			
 
				+
			
 
				+
			
 
				+def in_out_lists(size=1000):
			
 
				+    """
			
 
				+    returns a tuple of array used as input and output for the model
			
 
				+    Arguments:
			
 
				+        - size, int: default 1000, size of the lists to be created
			
 
				+        
			
 
				+    Returns:
			
 
				+        - tuple (data_onehot, output):
			
 
				+            -data_onehot, ndarray: containing one-hot encoded pockets
			
 
				+            -output, ndarray: containing size-3 vectors for classification
			
 
				+    """
			
 
				+    with open("control.list", "r") as filin:
			
 
				+        control = filin.read()
			
 
				+        control = control.split("\n")
			
 
				+        control.pop()
			
 
				+
			
 
				+    with open("steroid.list", "r") as filin:
			
 
				+        steroid = filin.read()
			
 
				+        steroid = steroid.split("\n")
			
 
				+        steroid.pop()
			
 
				+
			
 
				+    with open("heme.list", "r") as filin:
			
 
				+        heme = filin.read()
			
 
				+        heme = heme.split("\n")
			
 
				+        heme.pop()
			
 
				+
			
 
				+    with open("nucleotide.list", "r") as filin:
			
 
				+        nucleotide = filin.read()
			
 
				+        nucleotide = nucleotide.split("\n")
			
 
				+        nucleotide.pop()
			
 
				+    
			
 
				+    lmin = len(heme)
			
 
				+    lmid = len(nucleotide)
			
 
				+    lmax = len(control)
			
 
				+    tot_size = lmin + lmid + lmax
			
 
				+    data_onehot = np.ndarray(shape=(size, 14, 32, 32, 32)) # initializing empty array
			
 
				+
			
 
				+    np.random.seed(9001)
			
 
				+    indices = np.random.permutation(tot_size)
			
 
				+    indices = indices[:size]
			
 
				+    output = np.ndarray(shape=(size, 3)) # softmax 3, {steroid=1, heme=1, nucleotide=1}
			
 
				+
			
 
				+    n = -1
			
 
				+    for i in indices:
			
 
				+        n += 1
			
 
				+        if i < lmin:
			
 
				+            data_onehot[n,] = np.load("deepdrug3d_voxel_data/"+heme[i]+".npy")
			
 
				+            output[n,] = [1,0,0]
			
 
				+        elif i > lmin and i < (lmin + lmid):
			
 
				+            data_onehot[n,] = np.load("deepdrug3d_voxel_data/"+nucleotide[i - lmin]+".npy")
			
 
				+            output[n,] = [0,1,0]
			
 
				+        else:
			
 
				+            data_onehot[n,] = np.load("deepdrug3d_voxel_data/"+control[i - (lmin+lmid) - 1]+".npy")
			
 
				+            output[n,] = [0,0,1]
			
 
				+    
			
 
				+    return (data_onehot, output)
			
 
				+
			
 
				+
			
 
				+# ### Defining different model to test and compare
			
 
				+
			
 
				+# In[ ]:
			
 
				+
			
 
				+
			
 
				+def model_heavy(): # créer un objet modèle
			
 
				+    """
			
 
				+    Return a simple sequentiel model
			
 
				+    
			
 
				+    Returns :
			
 
				+        - model : keras.Model
			
 
				+    """
			
 
				+    inputs = Input(shape=(14,32,32,32))
			
 
				+    conv_1 = Conv3D(64, (28, 28, 28), padding="same", activation="relu", kernel_initializer="he_normal")(inputs)
			
 
				+    conv_2 = Conv3D(64, (26, 26, 26), padding="same", activation="relu", kernel_initializer="he_normal")(conv_1)
			
 
				+    drop_1 = Dropout(0.2)(conv_2)
			
 
				+    maxpool = MaxPooling3D()(drop_1)
			
 
				+    drop_2 = Dropout(0.4)(maxpool)
			
 
				+    dense = Dense(512)(drop_2)
			
 
				+    drop_3 = Dropout(0.4)(dense)
			
 
				+    flatters = Flatten()(drop_3)
			
 
				+    #output = TimeDistributed(Dense(3, activation='softmax'))(drop_3)
			
 
				+    output = Dense(3, activation='softmax')(flatters)
			
 
				+    model = Model(inputs=inputs, outputs=output)
			
 
				+    my_opt = optimizers.Adam(learning_rate=0.000001, beta_1=0.9, beta_2=0.999, amsgrad=False)
			
 
				+    print(model.summary)
			
 
				+    model.compile(optimizer=my_opt, loss="categorical_crossentropy",
			
 
				+                  metrics=["accuracy"])
			
 
				+    return model
			
 
				+
			
 
				+
			
 
				+# In[8]:
			
 
				+
			
 
				+
			
 
				+def model_new(): # créer un objet modèle
			
 
				+    """
			
 
				+    Return a simple sequentiel model
			
 
				+    
			
 
				+    Returns :
			
 
				+        - model : keras.Model
			
 
				+    """
			
 
				+    inputs = Input(shape=(14,32,32,32))
			
 
				+    conv_1 = Convolution3D(filters=64, kernel_size=5, padding="valid", data_format='channels_first')(inputs)
			
 
				+    activation_1 = LeakyReLU(alpha = 0.1)(conv_1)
			
 
				+    drop_1 = Dropout(0.2)(activation_1)
			
 
				+    conv_2 = Convolution3D(filters=64, kernel_size=3, padding="valid", data_format='channels_first')(drop_1)
			
 
				+    activation_2 = LeakyReLU(alpha = 0.1)(conv_2)
			
 
				+    maxpool = MaxPooling3D(pool_size=(2,2,2),
			
 
				+                            strides=None,
			
 
				+                            padding='valid',
			
 
				+                            data_format='channels_first')(activation_2)
			
 
				+    drop_2 = Dropout(0.4)(maxpool)
			
 
				+    flatters = Flatten()(drop_2)
			
 
				+    dense = Dense(128)(flatters)
			
 
				+    activation_3 = LeakyReLU(alpha = 0.1)(dense)
			
 
				+    drop_3 = Dropout(0.4)(activation_3)
			
 
				+    output = Dense(3, activation='softmax')(drop_3)
			
 
				+    model = Model(inputs=inputs, outputs=output)
			
 
				+    my_opt = optimizers.Adam(learning_rate=0.000001, beta_1=0.9, beta_2=0.999, amsgrad=False)
			
 
				+    print(model.summary)
			
 
				+    model.compile(optimizer=my_opt, loss="categorical_crossentropy",
			
 
				+                  metrics=["accuracy"])
			
 
				+    return model
			
 
				+
			
 
				+
			
 
				+# In[ ]:
			
 
				+
			
 
				+
			
 
				+def model_light(): # créer un objet modèle
			
 
				+    """
			
 
				+    Return a simple sequentiel model
			
 
				+    
			
 
				+    Returns :
			
 
				+        - model : keras.Model
			
 
				+    """
			
 
				+    inputs = Input(shape=(14,32,32,32))
			
 
				+    conv_1 = Conv3D(32, (28, 28, 28), padding="same", activation="relu", kernel_initializer="he_normal")(inputs)
			
 
				+    conv_2 = Conv3D(64, (26, 26, 26), padding="same", activation="relu", kernel_initializer="he_normal")(conv_1)
			
 
				+    drop_1 = Dropout(0.2)(conv_2)
			
 
				+    maxpool = MaxPooling3D()(drop_1)
			
 
				+    drop_2 = Dropout(0.3)(maxpool)
			
 
				+    maxpool_2 = MaxPooling3D()(drop_2)
			
 
				+    drop_3 = Dropout(0.3)(maxpool_2)
			
 
				+    dense = Dense(256)(drop_3)
			
 
				+    drop_4 = Dropout(0.4)(dense)
			
 
				+    flatters = Flatten()(drop_4)
			
 
				+    output = Dense(3, activation='softmax')(flatters)
			
 
				+    model = Model(inputs=inputs, outputs=output)
			
 
				+    my_opt = optimizers.Adam(learning_rate=0.000001, beta_1=0.9, beta_2=0.999, amsgrad=False)
			
 
				+    print(model.summary)
			
 
				+    model.compile(optimizer=my_opt, loss="categorical_crossentropy",
			
 
				+                  metrics=["accuracy"])
			
 
				+    return model
			
 
				+
			
 
				+
			
 
				+# ## Create pocket lists
			
 
				+# 4 lists are created :
			
 
				+#   + control
			
 
				+#   + steroid
			
 
				+#   + heme
			
 
				+#   + nucleotide
			
 
				+
			
 
				+# In[ ]:
			
 
				+
			
 
				+
			
 
				+data = in_out_lists(1400)
			
 
				+pockets = np.cumsum(data[1], axis=0)[-1]
			
 
				+
			
 
				+
			
 
				+# In[ ]:
			
 
				+
			
 
				+
			
 
				+print("with random seed=9001 and a 1400 pockets dataset the rates are:\n      {} heme, {} nucleotide, {} control\n      Total avaible dataset are composed of the following proportions:\n      {} heme, {} nucleotide, {} control".format(pockets[0]/1400, pockets[1]/1400,pockets[2]/1400,
			
 
				+                                                0.145, 0.380, 0.475))
			
 
				+
			
 
				+
			
 
				+# In[ ]:
			
 
				+
			
 
				+
			
 
				+data_onehot = data[0]
			
 
				+output = data[1]
			
 
				+X_train = data_onehot[0:1000,]
			
 
				+Y_train = output[0:1000,]
			
 
				+X_test = data_onehot[1000:,]
			
 
				+Y_test = output[1000:,]
			
 
				+
			
 
				+
			
 
				+# In[ ]:
			
 
				+
			
 
				+
			
 
				+my_model = model_new()
			
 
				+
			
 
				+
			
 
				+# In[ ]:
			
 
				+
			
 
				+
			
 
				+tf.test.is_gpu_available()
			
 
				+#my_model.fit(X_train, Y_train, epochs=50, batch_size=30)
			
 
				+
			
 
				+
			
 
				+# In[ ]:
			
 
				+
			
 
				+
			
 
				+history_mild_2mp = mild_model.fit(X_train, Y_train, validation_data=(X_test, Y_test), epochs=30, batch_size=32)
			
 
				+my_model.save('new_model_e30_b32_t1000.h5')
			
 
				+
			
 
				+
			
 
				+# In[ ]:
			
 
				+
			
 
				+
			
 
				+#predictions=prediction_history()
			
 
				+
Author	SHA1	Message	Date
Nicolasticot	bb9a7f549e	added new model, with different hyperparameter from dd3d git	5 years ago
Nicolasticot	860e797732	changed mild model, added maxpooling	5 years ago