123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118 |
- # custom imports
- from atom import *
- # import libs
- import sys
- import pymol
-
- class PDBFile:
- def get_content(self, filename):
- with open(filename) as f:
- return(f.readlines())
-
- def get_header(self):
- metadata = {}
- for line in self.rawLines:
- # no need to continue if meta are complete
- if(len(metadata) <4):
- if(line[0:10] == "HEADER "):
- metadata['header']=line
- elif(line[0:10] == "COMPND 2"):
- metadata['compound']=line
- elif(line[0:10] == "SOURCE 2"):
- metadata['source']=line
- elif(line[0:10] == "AUTHOR "):
- metadata['author']=line
- else:
- # if meta are complete, stop parsing
- break
- return(metadata)
-
- def get_atoms(self, filename):
- self.atoms = []
- self.residues = []
- temp_atoms = []
- count_h = 0
- for line in self.rawLines:
- if line.startswith("ATOM" or "HETATM"):
- if(line[76:78].strip()=="H"):
- count_h+=1
- atom = Atom(atom_id = int(line[6:11].strip()),
- atom_name = line[12:16].strip(),
- res_name = line[17:20].strip(),
- chain_id = line[21:22],
- res_seq_nb = int(line[22:26].strip()),
- insertion_code = line[26:27],
- coordinates = [float(line[30:38].strip()),
- float(line[38:46].strip()),
- float(line[46:54].strip()),
- ])
- self.atoms.append(atom)
- # get the current indice of atom
- i = self.atoms.index(atom)
- # if this is a brand new residue
-
- if(len(self.atoms)>1 and
- (atom.res_seq_nb == self.atoms[i-1].res_seq_nb
- and atom.insertion_code!=self.atoms[i-1].insertion_code)):
- self.residues.append(Residue(temp_atoms, len(self.residues)+1))
- temp_atoms=[]
-
- if(len(self.atoms)>1
- and (atom.res_seq_nb != self.atoms[i-1].res_seq_nb)):
- self.residues.append(Residue(temp_atoms, len(self.residues)+1))
- temp_atoms=[]
- temp_atoms.append(atom)
- # last residue
- self.residues.append(Residue(temp_atoms, len(self.residues)))
- # hydrogens should represent in average 50% of total atoms...
- # We use 30% threshold...
- if(count_h/len(temp_atoms)<0.30):
- #if(output_pdb==None):
- print("Need to add hydrogens ! If you want the modified PDB file, "
- "please use the -o output.pdb argument")
- self.add_hydrogens(filename)
-
- def check_hydrogens(self, atoms):
- print("ENTER CHECK HYDROGEN")
- return True
-
- def add_hydrogens(self, filename, output_pdb=None):
- pymol.finish_launching(['pymol', '-qc'])
- pymol.cmd.load(filename)
- pymol.cmd.select("nitrogens",'name n')
- pymol.cmd.h_add("nitrogens")
- pymol.stored.pos = {}
- pymol.cmd.iterate_state(1, "hydrogens",
- 'stored.pos[resi] = []; ' \
- 'stored.pos[resi].append(name,resi,x,y,z) '\
- 'if resi not in stored.pos.keys() ' \
- 'else ' \
- 'stored.pos[resi].append([name,resi,x,y,z])')
- if(output_pdb!=None):
- pymol.cmd.save(output_file)
- for resi in self.residues:
- if(str(resi.resid) in pymol.stored.pos.keys()):
- # iterate over residue H atoms
- for hydrogen in pymol.stored.pos[str(resi.resid)]:
- atom = Atom(atom_id = len(self.atoms)+1,
- atom_name = "H",
- res_name = resi.res_name,
- chain_id = resi.chain_id,
- res_seq_nb = resi.resid,
- insertion_code = resi.insertion_code,
- coordinates = [hydrogen[2],
- hydrogen[3],
- hydrogen[4]])
- # add hydrogen to atoms list
- self.atoms.append(atom)
- resi.atoms[atom.atom_name] = atom
- return(pymol.stored.pos)
-
- def __init__(self, filename, output_pdb=None):
- self.rawLines = self.get_content(filename)
- self.metadata = self.get_header()
- self.get_atoms(filename)
- # for elem in self.metadata :
- # print(self.metadata[elem], end="")
-
|