Réimplémentation du programme DSSP en Python

pdb.py 4.5KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111
  1. # custom imports
  2. from atom import *
  3. # import libs
  4. import sys
  5. import pymol
  6. class PDBFile:
  7. def get_content(self, filename):
  8. with open(filename) as f:
  9. return(f.readlines())
  10. def get_header(self):
  11. metadata = {}
  12. for line in self.rawLines:
  13. # no need to continue if meta are complete
  14. if(len(metadata) <4):
  15. if(line[0:10] == "HEADER "):
  16. metadata['header']=line
  17. elif(line[0:10] == "COMPND 2"):
  18. metadata['compound']=line
  19. elif(line[0:10] == "SOURCE 2"):
  20. metadata['source']=line
  21. elif(line[0:10] == "AUTHOR "):
  22. metadata['author']=line
  23. else:
  24. # if meta are complete, stop parsing
  25. break
  26. return(metadata)
  27. def get_atoms(self, filename):
  28. self.atoms = []
  29. self.residues = []
  30. temp_atoms = []
  31. count_h = 0
  32. for line in self.rawLines:
  33. if line.startswith("ATOM" or "HETATM"):
  34. if(line[76:78].strip()=="H"):
  35. count_h+=1
  36. atom = Atom(atom_id = int(line[6:11].strip()),
  37. atom_name = line[12:16].strip(),
  38. res_name = line[17:20].strip(),
  39. chain_id = line[21:22],
  40. res_seq_nb = int(line[22:26].strip()),
  41. insertion_code = line[26:27],
  42. coordinates = [float(line[30:38].strip()),
  43. float(line[38:46].strip()),
  44. float(line[46:54].strip()),
  45. ])
  46. self.atoms.append(atom)
  47. # get the current indice of atom
  48. i = self.atoms.index(atom)
  49. # if this is a brand new residue
  50. if(len(self.atoms)>1
  51. and atom.res_seq_nb != self.atoms[i-1].res_seq_nb):
  52. self.residues.append(Residue(temp_atoms))
  53. temp_atoms=[]
  54. temp_atoms.append(atom)
  55. # last residue
  56. self.residues.append(Residue(temp_atoms))
  57. # hydrogens should represent in average 50% of total atoms...
  58. # We use 30% threshold...
  59. if(count_h/len(temp_atoms)<0.30):
  60. #if(output_pdb==None):
  61. print("Need to add hydrogens ! If you want the modified PDB file, "
  62. "please use the -o output.pdb argument")
  63. self.add_hydrogens(filename)
  64. def check_hydrogens(self, atoms):
  65. print("ENTER CHECK HYDROGEN")
  66. return True
  67. def add_hydrogens(self, filename, output_pdb=None):
  68. pymol.finish_launching(['pymol', '-qc'])
  69. pymol.cmd.load(filename)
  70. pymol.cmd.select("nitrogens",'name n')
  71. pymol.cmd.h_add("nitrogens")
  72. pymol.stored.pos = {}
  73. pymol.cmd.iterate_state(1, "hydrogens",
  74. 'stored.pos[resi] = []; ' \
  75. 'stored.pos[resi].append(name,resi,x,y,z) '\
  76. 'if resi not in stored.pos.keys() ' \
  77. 'else ' \
  78. 'stored.pos[resi].append([name,resi,x,y,z])')
  79. if(output_pdb!=None):
  80. pymol.cmd.save(output_file)
  81. for resi in self.residues:
  82. if(str(resi.resid) in pymol.stored.pos.keys()):
  83. # iterate over residue H atoms
  84. for hydrogen in pymol.stored.pos[str(resi.resid)]:
  85. atom = Atom(atom_id = len(self.atoms)+1,
  86. atom_name = "H",
  87. res_name = resi.res_name,
  88. chain_id = resi.chain_id,
  89. res_seq_nb = resi.resid,
  90. insertion_code = resi.insertion_code,
  91. coordinates = [hydrogen[2],
  92. hydrogen[3],
  93. hydrogen[4]])
  94. # add hydrogen to atoms list
  95. self.atoms.append(atom)
  96. resi.atoms[atom.atom_name] = atom
  97. return(pymol.stored.pos)
  98. def __init__(self, filename, output_pdb=None):
  99. self.rawLines = self.get_content(filename)
  100. self.metadata = self.get_header()
  101. self.get_atoms(filename)
  102. # for elem in self.metadata :
  103. # print(self.metadata[elem], end="")