#!/usr/bin/env python # coding: utf-8 # # DeepDrug3D # ## Importing library # In[ ]: import numpy as np import tensorflow as tf from sklearn.preprocessing import LabelEncoder from keras.models import Sequential from keras import optimizers, callbacks from keras.layers import Dense, Flatten, TimeDistributed, Dropout from keras import Input, Model from keras.layers import add, Activation #from keras.utils import plot_model # Needs pydot. from keras.layers import Conv3D, MaxPooling3D # ### used to store model prediction in order to plot roc curve # In[ ]: class prediction_history(callbacks.Callback): def __init__(self): self.predhis = [] def on_epoch_end(self, epoch, logs={}): self.predhis.append(model.predict(predictor_train)) # ### Creating input and ouputs # In[ ]: def in_out_lists(size=1000): """ returns a tuple of array used as input and output for the model Arguments: - size, int: default 1000, size of the lists to be created Returns: - tuple (data_onehot, output): -data_onehot, ndarray: containing one-hot encoded pockets -output, ndarray: containing size-3 vectors for classification """ with open("control.list", "r") as filin: control = filin.read() control = control.split("\n") control.pop() with open("steroid.list", "r") as filin: steroid = filin.read() steroid = steroid.split("\n") steroid.pop() with open("heme.list", "r") as filin: heme = filin.read() heme = heme.split("\n") heme.pop() with open("nucleotide.list", "r") as filin: nucleotide = filin.read() nucleotide = nucleotide.split("\n") nucleotide.pop() lmin = len(heme) lmid = len(nucleotide) lmax = len(control) tot_size = lmin + lmid + lmax data_onehot = np.ndarray(shape=(size, 14, 32, 32, 32)) # initializing empty array np.random.seed(9001) indices = np.random.permutation(tot_size) indices = indices[:size] output = np.ndarray(shape=(size, 3)) # softmax 3, {steroid=1, heme=1, nucleotide=1} n = -1 for i in indices: n += 1 if i < lmin: data_onehot[n,] = np.load("deepdrug3d_voxel_data/"+heme[i]+".npy") output[n,] = [1,0,0] elif i > lmin and i < (lmin + lmid): data_onehot[n,] = np.load("deepdrug3d_voxel_data/"+nucleotide[i - lmin]+".npy") output[n,] = [0,1,0] else: data_onehot[n,] = np.load("deepdrug3d_voxel_data/"+control[i - (lmin+lmid) - 1]+".npy") output[n,] = [0,0,1] return (data_onehot, output) # ### Defining different model to test and compare # In[ ]: def model_heavy(): # créer un objet modèle """ Return a simple sequentiel model Returns : - model : keras.Model """ inputs = Input(shape=(14,32,32,32)) conv_1 = Conv3D(64, (28, 28, 28), padding="same", activation="relu", kernel_initializer="he_normal")(inputs) conv_2 = Conv3D(64, (26, 26, 26), padding="same", activation="relu", kernel_initializer="he_normal")(conv_1) drop_1 = Dropout(0.2)(conv_2) maxpool = MaxPooling3D()(drop_1) drop_2 = Dropout(0.4)(maxpool) dense = Dense(512)(drop_2) drop_3 = Dropout(0.4)(dense) flatters = Flatten()(drop_3) #output = TimeDistributed(Dense(3, activation='softmax'))(drop_3) output = Dense(3, activation='softmax')(flatters) model = Model(inputs=inputs, outputs=output) my_opt = optimizers.Adam(learning_rate=0.000001, beta_1=0.9, beta_2=0.999, amsgrad=False) print(model.summary) model.compile(optimizer=my_opt, loss="categorical_crossentropy", metrics=["accuracy"]) return model # In[ ]: def model_light(): # créer un objet modèle """ Return a simple sequentiel model Returns : - model : keras.Model """ inputs = Input(shape=(14,32,32,32)) conv_1 = Conv3D(32, (28, 28, 28), padding="same", activation="relu", kernel_initializer="he_normal")(inputs) conv_2 = Conv3D(64, (26, 26, 26), padding="same", activation="relu", kernel_initializer="he_normal")(conv_1) drop_1 = Dropout(0.2)(conv_2) maxpool = MaxPooling3D()(drop_1) drop_2 = Dropout(0.3)(maxpool) maxpool_2 = MaxPooling3D()(drop_2) drop_3 = Dropout(0.3)(maxpool_2) dense = Dense(256)(drop_3) drop_4 = Dropout(0.4)(dense) flatters = Flatten()(drop_4) output = Dense(3, activation='softmax')(flatters) model = Model(inputs=inputs, outputs=output) my_opt = optimizers.Adam(learning_rate=0.000001, beta_1=0.9, beta_2=0.999, amsgrad=False) print(model.summary) model.compile(optimizer=my_opt, loss="categorical_crossentropy", metrics=["accuracy"]) return model # ## Create pocket lists # 4 lists are created : # + control # + steroid # + heme # + nucleotide # In[ ]: data = in_out_lists(1400) pockets = np.cumsum(data[1], axis=0)[-1] # In[ ]: print("with random seed=9001 and a 1400 pockets dataset the rates are:\n {} heme, {} nucleotide, {} control\n Total avaible dataset are composed of the following proportions:\n {} heme, {} nucleotide, {} control".format(pockets[0]/1400, pockets[1]/1400,pockets[2]/1400, 0.145, 0.380, 0.475)) # In[ ]: data_onehot = data[0] output = data[1] X_train = data_onehot[0:1000,] Y_train = output[0:1000,] X_test = data_onehot[1000:,] Y_test = output[1000:,] # In[ ]: my_model = model_light() # In[ ]: tf.test.is_gpu_available() #my_model.fit(X_train, Y_train, epochs=50, batch_size=30) # In[ ]: history_mild_2mp = my_model.fit(X_train, Y_train, validation_data=(X_test, Y_test), epochs=30, batch_size=32) my_model.save('light_model_2mp_e30_b32.h5') # In[ ]: #predictions=prediction_history()