#!/usr/bin/env python # coding: utf-8 # # DeepDrug3D # ## Importing library # In[6]: import numpy as np import tensorflow as tf from sklearn.preprocessing import LabelEncoder from keras.models import Sequential, load_model from keras import optimizers, callbacks from keras.layers import Dense, Flatten, TimeDistributed, Dropout from keras import Input, Model from keras.layers import add, Activation from keras.layers.advanced_activations import LeakyReLU #from keras.utils import plot_model # Needs pydot. from keras.layers import Convolution3D, MaxPooling3D # ### used to store model prediction in order to plot roc curve # ### Creating input and ouputs # In[ ]: def in_out_lists(size=1000): """ returns a tuple of array used as input and output for the model Arguments: - size, int: default 1000, size of the lists to be created Returns: - tuple (data_onehot, output): -data_onehot, ndarray: containing one-hot encoded pockets -output, ndarray: containing size-3 vectors for classification """ with open("control.list", "r") as filin: control = filin.read() control = control.split("\n") control.pop() with open("steroid.list", "r") as filin: steroid = filin.read() steroid = steroid.split("\n") steroid.pop() with open("heme.list", "r") as filin: heme = filin.read() heme = heme.split("\n") heme.pop() with open("nucleotide.list", "r") as filin: nucleotide = filin.read() nucleotide = nucleotide.split("\n") nucleotide.pop() lmin = len(heme) lmid = len(nucleotide) lmax = len(control) tot_size = lmin + lmid + lmax data_onehot = np.ndarray(shape=(size, 14, 32, 32, 32)) # initializing empty array np.random.seed(9001) indices = np.random.permutation(tot_size) indices = indices[:size] output = np.ndarray(shape=(size, 3)) # softmax 3, {steroid=1, heme=1, nucleotide=1} n = -1 for i in indices: n += 1 if i < lmin: data_onehot[n,] = np.load("deepdrug3d_voxel_data/"+heme[i]+".npy") output[n,] = [1,0,0] elif i > lmin and i < (lmin + lmid): data_onehot[n,] = np.load("deepdrug3d_voxel_data/"+nucleotide[i - lmin]+".npy") output[n,] = [0,1,0] else: data_onehot[n,] = np.load("deepdrug3d_voxel_data/"+control[i - (lmin+lmid) - 1]+".npy") output[n,] = [0,0,1] return (data_onehot, output) # ### Defining different model to test and compare # In[ ]: def model_heavy(): # créer un objet modèle """ Return a simple sequentiel model Returns : - model : keras.Model """ inputs = Input(shape=(14,32,32,32)) conv_1 = Conv3D(64, (28, 28, 28), padding="same", activation="relu", kernel_initializer="he_normal")(inputs) conv_2 = Conv3D(64, (26, 26, 26), padding="same", activation="relu", kernel_initializer="he_normal")(conv_1) drop_1 = Dropout(0.2)(conv_2) maxpool = MaxPooling3D()(drop_1) drop_2 = Dropout(0.4)(maxpool) dense = Dense(512)(drop_2) drop_3 = Dropout(0.4)(dense) flatters = Flatten()(drop_3) #output = TimeDistributed(Dense(3, activation='softmax'))(drop_3) output = Dense(3, activation='softmax')(flatters) model = Model(inputs=inputs, outputs=output) my_opt = optimizers.Adam(learning_rate=0.000001, beta_1=0.9, beta_2=0.999, amsgrad=False) print(model.summary) model.compile(optimizer=my_opt, loss="categorical_crossentropy", metrics=["accuracy"]) return model # In[8]: def model_fast_k32(): # créer un objet modèle """ Return a simple sequentiel model Returns : - model : keras.Model """ inputs = Input(shape=(14,32,32,32)) conv_1 = Convolution3D(filters=64, kernel_size=32, padding="valid", data_format='channels_first')(inputs) activation_1 = LeakyReLU(alpha = 0.1)(conv_1) drop_1 = Dropout(0.2)(activation_1) conv_2 = Convolution3D(filters=128, kernel_size=32, padding="valid", data_format='channels_first')(drop_1) activation_2 = LeakyReLU(alpha = 0.1)(conv_2) maxpool = MaxPooling3D(pool_size=(2,2,2), strides=None, padding='valid', data_format='channels_first')(activation_2) drop_2 = Dropout(0.4)(maxpool) flatters = Flatten()(drop_2) dense = Dense(256)(flatters) activation_3 = LeakyReLU(alpha = 0.1)(dense) drop_3 = Dropout(0.4)(activation_3) output = Dense(3, activation='softmax')(drop_3) model = Model(inputs=inputs, outputs=output) my_opt = optimizers.Adam(learning_rate=0.000001, beta_1=0.9, beta_2=0.999, amsgrad=False) print(model.summary) model.compile(optimizer=my_opt, loss="categorical_crossentropy", metrics=["accuracy"]) return model def model_fast_k16(): # créer un objet modèle """ Return a simple sequentiel model Returns : - model : keras.Model """ inputs = Input(shape=(14,32,32,32)) conv_1 = Convolution3D(filters=64, kernel_size=16, padding="valid", data_format='channels_first')(inputs) activation_1 = LeakyReLU(alpha = 0.1)(conv_1) drop_1 = Dropout(0.2)(activation_1) conv_2 = Convolution3D(filters=128, kernel_size=16, padding="valid", data_format='channels_first')(drop_1) activation_2 = LeakyReLU(alpha = 0.1)(conv_2) maxpool = MaxPooling3D(pool_size=(2,2,2), strides=None, padding='valid', data_format='channels_first')(activation_2) drop_2 = Dropout(0.4)(maxpool) flatters = Flatten()(drop_2) dense = Dense(256)(flatters) activation_3 = LeakyReLU(alpha = 0.1)(dense) drop_3 = Dropout(0.4)(activation_3) output = Dense(3, activation='softmax')(drop_3) model = Model(inputs=inputs, outputs=output) my_opt = optimizers.Adam(learning_rate=0.000001, beta_1=0.9, beta_2=0.999, amsgrad=False) print(model.summary) model.compile(optimizer=my_opt, loss="categorical_crossentropy", metrics=["accuracy"]) return model # In[ ]: def model_light(): # créer un objet modèle """ Return a simple sequentiel model Returns : - model : keras.Model """ inputs = Input(shape=(14,32,32,32)) conv_1 = Conv3D(32, (28, 28, 28), padding="same", activation="relu", kernel_initializer="he_normal")(inputs) conv_2 = Conv3D(64, (26, 26, 26), padding="same", activation="relu", kernel_initializer="he_normal")(conv_1) drop_1 = Dropout(0.2)(conv_2) maxpool = MaxPooling3D()(drop_1) drop_2 = Dropout(0.3)(maxpool) maxpool_2 = MaxPooling3D()(drop_2) drop_3 = Dropout(0.3)(maxpool_2) dense = Dense(256)(drop_3) drop_4 = Dropout(0.4)(dense) flatters = Flatten()(drop_4) output = Dense(3, activation='softmax')(flatters) model = Model(inputs=inputs, outputs=output) my_opt = optimizers.Adam(learning_rate=0.000001, beta_1=0.9, beta_2=0.999, amsgrad=False) print(model.summary) model.compile(optimizer=my_opt, loss="categorical_crossentropy", metrics=["accuracy"]) return model # ## Create pocket lists # 4 lists are created : # + control # + steroid # + heme # + nucleotide # In[ ]: sample = 1000 data = in_out_lists(sample) pockets = np.cumsum(data[1], axis=0)[-1] # In[ ]: print("with random seed=9001 and a {} pockets dataset the rates are:\n {} heme, {} nucleotide, {} control\n Total avaible dataset are composed of the following proportions:\n {} heme, {} nucleotide, {} control".format(sample, pockets[0]/sample, pockets[1]/sample,pockets[2]/sample, 0.145, 0.380, 0.475)) # In[ ]: train = int(sample*0.6) data_onehot = data[0] output = data[1] X_train = data_onehot[0:train,] Y_train = output[0:train,] X_test = data_onehot[train:,] Y_test = output[train:,] # In[ ]: my_model = model_fast_k16() # In[ ]: tf.test.is_gpu_available() # In[ ]: my_model.fit(X_train, Y_train, validation_data=(X_test, Y_test), epochs=50, batch_size=32) #my_model.save('new_model_e50_b32_t1600.h5') #my_model = load_model('new_model_e50_b32_t1600.h5') # ## Testing steroids with open("steroid.list", "r") as filin: steroid = filin.read() steroid = steroid.split("\n") steroid.pop() X_steroid = np.ndarray(shape=(69, 14, 32, 32, 32)) i = -1 for pocket in steroid: i += 1 X_steroid[i,] = np.load("deepdrug3d_voxel_data/"+pocket+".npy") Y_pred_steroid = my_model.predict(X_steroid) Y_pred_steroid = Y_pred_steroid.round() steroid_predict = Y_pred_steroid.cumsum(axis=0) print("On 69 steroid-binded pockets, prediction are the following:\n\ predicted as heme:\t{}\npredicted as nucleotide:\t{}\n\ predicted as control:\t{}\n".format(steroid_predict[0], steroid_predict[1], steroid_predict[2]))