Réimplémentation du programme DSSP en Python

pdb.py 6.3KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175
  1. # custom imports
  2. from chem import *
  3. # import libs
  4. import sys
  5. import pymol
  6. class PDBFile:
  7. def get_content(self, filename):
  8. """Get content of a PDB file
  9. Parameters
  10. ----------
  11. filename : str
  12. The name of the file to be parsed.
  13. This function is used to get the raw content
  14. of the PDB file.
  15. Returns
  16. -------
  17. list
  18. List of lines of the PDB file.
  19. """
  20. with open(filename) as f:
  21. return(f.readlines())
  22. def get_header(self):
  23. """Returns header of a PDB file
  24. Parameters
  25. ----------
  26. self : PDBFile
  27. The instance of PDB File to get the header from.
  28. This function is used to get the header of the PDB file.
  29. Returns
  30. -------
  31. dict
  32. A field-formatted metadata header.
  33. """
  34. metadata = {}
  35. for line in self.rawLines:
  36. # no need to continue if meta are complete
  37. if(len(metadata) <4):
  38. if(line[0:10] == "HEADER "):
  39. metadata['header']=line
  40. elif(line[0:10] == "COMPND 2"):
  41. metadata['compound']=line
  42. elif(line[0:10] == "SOURCE 2"):
  43. metadata['source']=line
  44. elif(line[0:10] == "AUTHOR "):
  45. metadata['author']=line
  46. else:
  47. # if meta are complete, stop parsing
  48. break
  49. return(metadata)
  50. def get_atoms(self, filename):
  51. """Get all atoms inside a PDB file.
  52. Parameters
  53. ----------
  54. self : PDBFile
  55. The instance of PDB File to get the header from.
  56. filename :
  57. The name of the file. Used to be sent to
  58. add_hydrogens(), as it will be used by PyMOL lib.
  59. This function is used to parse all ATOM lines and
  60. create residues accordingly.
  61. """
  62. self.atoms = []
  63. self.residues = []
  64. temp_atoms = []
  65. count_h = 0
  66. for line in self.rawLines:
  67. if line.startswith("ATOM" or "HETATM"):
  68. if(line[76:78].strip()=="H"):
  69. count_h+=1
  70. atom = Atom(atom_id = int(line[6:11].strip()),
  71. atom_name = line[12:16].strip(),
  72. res_name = line[17:20].strip(),
  73. chain_id = line[21:22],
  74. res_seq_nb = int(line[22:26].strip()),
  75. insertion_code = line[26:27],
  76. coordinates = [float(line[30:38].strip()),
  77. float(line[38:46].strip()),
  78. float(line[46:54].strip()),
  79. ])
  80. self.atoms.append(atom)
  81. # get the current indice of atom
  82. i = self.atoms.index(atom)
  83. # if this is a brand new residue
  84. if(len(self.atoms)>1 and
  85. (atom.res_seq_nb == self.atoms[i-1].res_seq_nb
  86. and atom.insertion_code!=self.atoms[i-1].insertion_code)):
  87. self.residues.append(Residue(temp_atoms, len(self.residues)+1))
  88. temp_atoms=[]
  89. if(len(self.atoms)>1
  90. and (atom.res_seq_nb != self.atoms[i-1].res_seq_nb)):
  91. self.residues.append(Residue(temp_atoms, len(self.residues)+1))
  92. temp_atoms=[]
  93. temp_atoms.append(atom)
  94. # last residue
  95. self.residues.append(Residue(temp_atoms, len(self.residues)))
  96. # hydrogens should represent in average 50% of total atoms...
  97. # We use 30% threshold...
  98. if(count_h/len(temp_atoms)<0.30):
  99. #Need to add hydrogens
  100. self.add_hydrogens(filename)
  101. def add_hydrogens(self, filename, output_pdb=None):
  102. """Add hydrogens to N atoms of the protein backbone.
  103. Parameters
  104. ----------
  105. self : PDBFile
  106. The instance of PDB File to get the header from.
  107. filename : str
  108. The name of the file to be exploited by PyMOL lib.
  109. output_pdb : str
  110. If defined, the name of the file to be saved py PyMOL,
  111. with hydrogens added.
  112. This function is used to add the missing H atoms on the
  113. Nitrogens atoms of the protein backbone, when H are missing
  114. from PDB file. H are needed to perform all the bond interactions-
  115. based calculations.
  116. Returns
  117. -------
  118. dict
  119. A dictionnary of all Hydrogens added to the backbone.
  120. """
  121. pymol.finish_launching(['pymol', '-qc'])
  122. pymol.cmd.load(filename)
  123. pymol.cmd.select("nitrogens",'name n')
  124. pymol.cmd.h_add("nitrogens")
  125. pymol.stored.pos = {}
  126. pymol.cmd.iterate_state(1, "hydrogens",
  127. 'stored.pos[resi] = []; ' \
  128. 'stored.pos[resi].append(name,resi,x,y,z) '\
  129. 'if resi not in stored.pos.keys() ' \
  130. 'else ' \
  131. 'stored.pos[resi].append([name,resi,x,y,z])')
  132. if(output_pdb!=None):
  133. pymol.cmd.save(output_pdb)
  134. for resi in self.residues:
  135. if(str(resi.resid) in pymol.stored.pos.keys()):
  136. # iterate over residue H atoms
  137. for hydrogen in pymol.stored.pos[str(resi.resid)]:
  138. atom = Atom(atom_id = len(self.atoms)+1,
  139. atom_name = "H",
  140. res_name = resi.res_name,
  141. chain_id = resi.chain_id,
  142. res_seq_nb = resi.resid,
  143. insertion_code = resi.insertion_code,
  144. coordinates = [hydrogen[2],
  145. hydrogen[3],
  146. hydrogen[4]])
  147. # add hydrogen to atoms list
  148. self.atoms.append(atom)
  149. resi.atoms[atom.atom_name] = atom
  150. return(pymol.stored.pos)
  151. def __init__(self, filename, output_pdb=None):
  152. self.rawLines = self.get_content(filename)
  153. self.metadata = self.get_header()
  154. self.get_atoms(filename)