123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216 |
- #!/usr/bin/env python
- # coding: utf-8
-
- # # DeepDrug3D
-
- # ## Importing library
-
- # In[ ]:
-
-
- import numpy as np
- import tensorflow as tf
- from sklearn.preprocessing import LabelEncoder
- from keras.models import Sequential
- from keras import optimizers, callbacks
- from keras.layers import Dense, Flatten, TimeDistributed, Dropout
- from keras import Input, Model
- from keras.layers import add, Activation
- #from keras.utils import plot_model # Needs pydot.
- from keras.layers import Conv3D, MaxPooling3D
-
-
- # ### used to store model prediction in order to plot roc curve
-
- # In[ ]:
-
-
- class prediction_history(callbacks.Callback):
- def __init__(self):
- self.predhis = []
- def on_epoch_end(self, epoch, logs={}):
- self.predhis.append(model.predict(predictor_train))
-
-
- # ### Creating input and ouputs
-
- # In[ ]:
-
-
- def in_out_lists(size=1000):
- """
- returns a tuple of array used as input and output for the model
- Arguments:
- - size, int: default 1000, size of the lists to be created
-
- Returns:
- - tuple (data_onehot, output):
- -data_onehot, ndarray: containing one-hot encoded pockets
- -output, ndarray: containing size-3 vectors for classification
- """
- with open("control.list", "r") as filin:
- control = filin.read()
- control = control.split("\n")
- control.pop()
-
- with open("steroid.list", "r") as filin:
- steroid = filin.read()
- steroid = steroid.split("\n")
- steroid.pop()
-
- with open("heme.list", "r") as filin:
- heme = filin.read()
- heme = heme.split("\n")
- heme.pop()
-
- with open("nucleotide.list", "r") as filin:
- nucleotide = filin.read()
- nucleotide = nucleotide.split("\n")
- nucleotide.pop()
-
- lmin = len(heme)
- lmid = len(nucleotide)
- lmax = len(control)
- tot_size = lmin + lmid + lmax
- data_onehot = np.ndarray(shape=(size, 14, 32, 32, 32)) # initializing empty array
-
- np.random.seed(9001)
- indices = np.random.permutation(tot_size)
- indices = indices[:size]
- output = np.ndarray(shape=(size, 3)) # softmax 3, {steroid=1, heme=1, nucleotide=1}
-
- n = -1
- for i in indices:
- n += 1
- if i < lmin:
- data_onehot[n,] = np.load("deepdrug3d_voxel_data/"+heme[i]+".npy")
- output[n,] = [1,0,0]
- elif i > lmin and i < (lmin + lmid):
- data_onehot[n,] = np.load("deepdrug3d_voxel_data/"+nucleotide[i - lmin]+".npy")
- output[n,] = [0,1,0]
- else:
- data_onehot[n,] = np.load("deepdrug3d_voxel_data/"+control[i - (lmin+lmid) - 1]+".npy")
- output[n,] = [0,0,1]
-
- return (data_onehot, output)
-
-
- # ### Defining different model to test and compare
-
- # In[ ]:
-
-
- def model_heavy(): # créer un objet modèle
- """
- Return a simple sequentiel model
-
- Returns :
- - model : keras.Model
- """
- inputs = Input(shape=(14,32,32,32))
- conv_1 = Conv3D(64, (28, 28, 28), padding="same", activation="relu", kernel_initializer="he_normal")(inputs)
- conv_2 = Conv3D(64, (26, 26, 26), padding="same", activation="relu", kernel_initializer="he_normal")(conv_1)
- drop_1 = Dropout(0.2)(conv_2)
- maxpool = MaxPooling3D()(drop_1)
- drop_2 = Dropout(0.4)(maxpool)
- dense = Dense(512)(drop_2)
- drop_3 = Dropout(0.4)(dense)
- flatters = Flatten()(drop_3)
- #output = TimeDistributed(Dense(3, activation='softmax'))(drop_3)
- output = Dense(3, activation='softmax')(flatters)
- model = Model(inputs=inputs, outputs=output)
- my_opt = optimizers.Adam(learning_rate=0.000001, beta_1=0.9, beta_2=0.999, amsgrad=False)
- print(model.summary)
- model.compile(optimizer=my_opt, loss="categorical_crossentropy",
- metrics=["accuracy"])
- return model
-
-
- # In[ ]:
-
-
- def model_light(): # créer un objet modèle
- """
- Return a simple sequentiel model
-
- Returns :
- - model : keras.Model
- """
- inputs = Input(shape=(14,32,32,32))
- conv_1 = Conv3D(32, (28, 28, 28), padding="same", activation="relu", kernel_initializer="he_normal")(inputs)
- conv_2 = Conv3D(64, (26, 26, 26), padding="same", activation="relu", kernel_initializer="he_normal")(conv_1)
- drop_1 = Dropout(0.2)(conv_2)
- maxpool = MaxPooling3D()(drop_1)
- drop_2 = Dropout(0.3)(maxpool)
- maxpool_2 = MaxPooling3D()(drop_2)
- drop_3 = Dropout(0.3)(maxpool_2)
- dense = Dense(256)(drop_3)
- drop_4 = Dropout(0.4)(dense)
- flatters = Flatten()(drop_4)
- output = Dense(3, activation='softmax')(flatters)
- model = Model(inputs=inputs, outputs=output)
- my_opt = optimizers.Adam(learning_rate=0.000001, beta_1=0.9, beta_2=0.999, amsgrad=False)
- print(model.summary)
- model.compile(optimizer=my_opt, loss="categorical_crossentropy",
- metrics=["accuracy"])
- return model
-
-
- # ## Create pocket lists
- # 4 lists are created :
- # + control
- # + steroid
- # + heme
- # + nucleotide
-
- # In[ ]:
-
-
- data = in_out_lists(1400)
- pockets = np.cumsum(data[1], axis=0)[-1]
-
-
- # In[ ]:
-
-
- print("with random seed=9001 and a 1400 pockets dataset the rates are:\n {} heme, {} nucleotide, {} control\n Total avaible dataset are composed of the following proportions:\n {} heme, {} nucleotide, {} control".format(pockets[0]/1400, pockets[1]/1400,pockets[2]/1400,
- 0.145, 0.380, 0.475))
-
-
- # In[ ]:
-
-
- data_onehot = data[0]
- output = data[1]
- X_train = data_onehot[0:1000,]
- Y_train = output[0:1000,]
- X_test = data_onehot[1000:,]
- Y_test = output[1000:,]
-
-
- # In[ ]:
-
-
- my_model = model_light()
-
-
- # In[ ]:
-
-
- tf.test.is_gpu_available()
- #my_model.fit(X_train, Y_train, epochs=50, batch_size=30)
-
-
- # In[ ]:
-
-
- history_mild_2mp = my_model.fit(X_train, Y_train, validation_data=(X_test, Y_test), epochs=30, batch_size=32)
- my_model.save('light_model_2mp_e30_b32.h5')
-
-
- # In[ ]:
-
-
- #predictions=prediction_history()
-
|