Projet de classification de conformations de protéines par k-medoids

projet8.py 1.3KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546
  1. #!/bin/python3
  2. import pandas as pd
  3. import numpy as np
  4. import pbxplore as pbx
  5. import sys
  6. class Conformations:
  7. """
  8. An instance of the class conformations contains differents conformations of
  9. the same protein, encoded as 1D sequences of protein bloc, in a pandas
  10. dataframe.
  11. """
  12. pass
  13. def __init__(self, filename):
  14. """
  15. df : pd.DataFrame object
  16. Each row of the dataframe is a conformation and each column a position
  17. of the sequence.
  18. """
  19. self.df = pd.DataFrame()
  20. for chain_name, chain in pbx.chains_from_files([filename]):
  21. dihedrals = chain.get_phi_psi_angles()
  22. pb_seq = pbx.assign(dihedrals)
  23. self.df = self.df.append(pd.Series(list(pb_seq)), ignore_index=True)
  24. def dissimilarity(self):
  25. """
  26. computes the dissimilarity matrix of the intance's df
  27. """
  28. matrix = pd.DataFrame(index=np.arange(self.df.shape[0]),
  29. columns=np.arange(self.df.shape[0]))
  30. matrix = matrix.fillna(0)
  31. return matrix
  32. if __name__ == "__main__":
  33. if len(sys.argv) != 2:
  34. print("Error : usage $ python3 projet8 md.pdb")
  35. exit()
  36. confs = Conformations(sys.argv[1])
  37. print(confs.df)