projet de deep-learning. Apprentissage de poches de liaison de protéines-ligands

DeepDrug.ipynb 5.4KB


  1. {
  2. "cells": [
  3. {
  4. "cell_type": "markdown",
  5. "metadata": {},
  6. "source": [
  7. "# DeepDrug3D"
  8. ]
  9. },
  10. {
  11. "cell_type": "code",
  12. "execution_count": 1,
  13. "metadata": {},
  14. "outputs": [
  15. {
  16. "name": "stderr",
  17. "output_type": "stream",
  18. "text": [
  19. "Using TensorFlow backend.\n"
  20. ]
  21. }
  22. ],
  23. "source": [
  24. "import numpy as np\n",
  25. "\n",
  26. "from sklearn.preprocessing import LabelEncoder\n",
  27. "from keras.models import Sequential\n",
  28. "from keras import optimizers\n",
  29. "from keras.layers import Dense, Flatten, TimeDistributedn, Dropout\n",
  30. "from keras import Input, Model\n",
  31. "from keras.layers import add, Activation\n",
  32. "#from keras.utils import plot_model # Needs pydot.\n",
  33. "from keras.layers import Conv3D, MaxPooling3D"
  34. ]
  35. },
  36. {
  37. "cell_type": "markdown",
  38. "metadata": {},
  39. "source": [
  40. "## Create pocket lists\n",
  41. "4 pockets are created :\n",
  42. " + control\n",
  43. " + steroid\n",
  44. " + heme\n",
  45. " + nucleotide"
  46. ]
  47. },
  48. {
  49. "cell_type": "code",
  50. "execution_count": 2,
  51. "metadata": {},
  52. "outputs": [
  53. {
  54. "data": {
  55. "text/plain": [
  56. "''"
  57. ]
  58. },
  59. "execution_count": 2,
  60. "metadata": {},
  61. "output_type": "execute_result"
  62. }
  63. ],
  64. "source": [
  65. "with open(\"control.list\", \"r\") as filin:\n",
  66. " control = filin.read()\n",
  67. "control = control.split(\"\\n\")\n",
  68. "control.pop()\n",
  69. "\n",
  70. "with open(\"steroid.list\", \"r\") as filin:\n",
  71. " steroid = filin.read()\n",
  72. "steroid = steroid.split(\"\\n\")\n",
  73. "steroid.pop()\n",
  74. "\n",
  75. "with open(\"heme.list\", \"r\") as filin:\n",
  76. " heme = filin.read()\n",
  77. "heme = heme.split(\"\\n\")\n",
  78. "heme.pop()\n",
  79. "\n",
  80. "with open(\"nucleotide.list\", \"r\") as filin:\n",
  81. " nucleotide = filin.read()\n",
  82. "nucleotide = nucleotide.split(\"\\n\")\n",
  83. "nucleotide.pop()"
  84. ]
  85. },
  86. {
  87. "cell_type": "markdown",
  88. "metadata": {},
  89. "source": [
  90. "### Creating input and ouputs"
  91. ]
  92. },
  93. {
  94. "cell_type": "code",
  95. "execution_count": 3,
  96. "metadata": {},
  97. "outputs": [],
  98. "source": [
  99. "data_onehot = np.ndarray(shape=(2219, 14, 32, 32, 32)) # initializing empty array\n",
  100. "indices = np.random.permutation(2219)\n",
  101. "output = np.ndarray(shape=(2219, 3)) # softmax 3, {steroid=1, heme=1, nucleotide=1}\n",
  102. "lmin = len(steroid)\n",
  103. "lmid = len(heme)\n",
  104. "lmax = len(nucleotide)"
  105. ]
  106. },
  107. {
  108. "cell_type": "code",
  109. "execution_count": 4,
  110. "metadata": {},
  111. "outputs": [],
  112. "source": [
  113. "n = -1\n",
  114. "for i in indices:\n",
  115. " n += 1\n",
  116. " if i < lmin:\n",
  117. " data_onehot[n,] = np.load(\"deepdrug3d_voxel_data/\"+steroid[i]+\".npy\")\n",
  118. " output[n,] = [1,0,0]\n",
  119. " elif i > lmin and i < (lmin + lmid):\n",
  120. " data_onehot[n,] = np.load(\"deepdrug3d_voxel_data/\"+heme[i - lmin]+\".npy\")\n",
  121. " output[n,] = [0,1,0]\n",
  122. " else:\n",
  123. " data_onehot[n,] = np.load(\"deepdrug3d_voxel_data/\"+nucleotide[i - (lmin+lmid) - 1]+\".npy\")\n",
  124. " output[n,] = [0,0,1]"
  125. ]
  126. },
  127. {
  128. "cell_type": "code",
  129. "execution_count": 5,
  130. "metadata": {},
  131. "outputs": [],
  132. "source": [
  133. "X_train = data_onehot[0:1664,]\n",
  134. "Y_train = output[0:1664,]\n",
  135. "X_test = data_onehot[1664:,]\n",
  136. "Y_test = output[1664:,]"
  137. ]
  138. },
  139. {
  140. "cell_type": "code",
  141. "execution_count": 14,
  142. "metadata": {},
  143. "outputs": [
  144. {
  145. "data": {
  146. "text/plain": [
  147. "(1, 14, 32, 32, 32)"
  148. ]
  149. },
  150. "execution_count": 14,
  151. "metadata": {},
  152. "output_type": "execute_result"
  153. }
  154. ],
  155. "source": [
  156. "def model_sequential(): # créer un objet modèle\n",
  157. " \"\"\"\n",
  158. " Return a simple sequentiel model\n",
  159. " \n",
  160. " Returns :\n",
  161. " - model : keras.Model\n",
  162. " \"\"\"\n",
  163. " inputs = Input(shape=(32,32,32,14)) # 759 aa, 21 car onehot\n",
  164. " conv_1 = Conv3D(64, (28, 28, 28), padding=\"same\", activation=\"LeakyReLU\",\n",
  165. " kernel_initializer=\"he_normal\")(inputs)\n",
  166. " conv_2 = Conv3D(64, (26, 26, 26), padding=\"same\", activation=\"LeakyReLU\",\n",
  167. " kernel_initializer=\"he_normal\")(conv_1)\n",
  168. " drop_1 = Dropout(0.2)(conv_2)\n",
  169. " maxpool = MaxPooling3D()(drop_1)\n",
  170. " drop_2 = Dropout(0.4)(maxpool)\n",
  171. " dense = Dense(512)(drop_2)\n",
  172. " drop_3 = Dropout(0.4)(dense)\n",
  173. " output = TimeDistributed(Dense(3, activation='softmax'))(drop_3)\n",
  174. " model = Model(inputs=inputs, outputs=output)\n",
  175. " my_opt = optimizers.Adam(learning_rate=0.000001, beta_1=0.9, beta_2=0.999, amsgrad=False)\n",
  176. " print(model.summary)\n",
  177. " model.compile(optimizer=my_opt, loss=\"categorical_crossentropy\",\n",
  178. " metrics=[\"accuracy\"])\n",
  179. " return model"
  180. ]
  181. },
  182. {
  183. "cell_type": "code",
  184. "execution_count": null,
  185. "metadata": {},
  186. "outputs": [],
  187. "source": []
  188. }
  189. ],
  190. "metadata": {
  191. "kernelspec": {
  192. "display_name": "Python 3",
  193. "language": "python",
  194. "name": "python3"
  195. },
  196. "language_info": {
  197. "codemirror_mode": {
  198. "name": "ipython",
  199. "version": 3
  200. },
  201. "file_extension": ".py",
  202. "mimetype": "text/x-python",
  203. "name": "python",
  204. "nbconvert_exporter": "python",
  205. "pygments_lexer": "ipython3",
  206. "version": "3.7.4"
  207. }
  208. },
  209. "nbformat": 4,
  210. "nbformat_minor": 4
  211. }