#!/bin/python3 import pandas as pd import numpy as np import pbxplore as pbx import sys from scipy.spatial.distance import squareform, pdist, jaccard class Conformations: """ An instance of the class conformations contains differents conformations of the same protein, encoded as 1D sequences of protein bloc, in a pandas dataframe. """ pass def __init__(self, filename): """ df : pd.DataFrame object Each row of the dataframe is a conformation and each column a position of the sequence. dfnum : to compute dissimilarity with the function pdist, elements of the DataFrame must be float """ self.df = pd.DataFrame() for chain_name, chain in pbx.chains_from_files([filename]): dihedrals = chain.get_phi_psi_angles() pb_seq = pbx.assign(dihedrals) self.df = self.df.append(pd.Series(list(pb_seq)), ignore_index=True) def identity(self): """ Returns a matrix of the distance between all conformations based on wether the PB at each position is identical or not. Returns the matrix as a numpy.ndarray object. """ dict_str_to_float = {'Z':0,'a':1,'b':2,'c':3,'d':4,'e':5,'f':6,'g':7, 'h':8,'i':9,'j':10,'k':11,'l':12,'m':13,'n':14, 'o':15,'p':16} # dict to transform PB to int, # necessary for the pdsit function dfnum = self.df.replace(dict_str_to_float) return squareform(pdist(dfnum, metric ='jaccard')) def dissimilarity(self, matrix): """ Returns a matrix of the distance between all conformations computed according to a substitution matrix of the protein blocks. If no substitution matrix is specified, use the matrix from the PBxplore package. """ dissimilarity = np.ndarray((self.df.shape[0],self.df.shape[0])) if matrix == None: matrix = pd.read_table("data/PBs_substitution_matrix.dat", index_col=False, sep ='\t') matrix.index = matrix.columns if __name__ == "__main__": if len(sys.argv) != 2: print("Error : usage $ python3 projet8 md.pdb") exit() confs = Conformations(sys.argv[1]) print(confs.df)