# custom imports from chem import * # import libs import sys import pymol class PDBFile: def get_content(self, filename): """Get content of a PDB file Parameters ---------- filename : str The name of the file to be parsed. This function is used to get the raw content of the PDB file. Returns ------- list List of lines of the PDB file. """ with open(filename) as f: return(f.readlines()) def get_header(self): """Returns header of a PDB file Parameters ---------- self : PDBFile The instance of PDB File to get the header from. This function is used to get the header of the PDB file. Returns ------- dict A field-formatted metadata header. """ metadata = {} for line in self.rawLines: # no need to continue if meta are complete if(len(metadata) <4): if(line[0:10] == "HEADER "): metadata['header']=line elif(line[0:10] == "COMPND 2"): metadata['compound']=line elif(line[0:10] == "SOURCE 2"): metadata['source']=line elif(line[0:10] == "AUTHOR "): metadata['author']=line else: # if meta are complete, stop parsing break return(metadata) def get_atoms(self, filename): """Get all atoms inside a PDB file. Parameters ---------- self : PDBFile The instance of PDB File to get the header from. filename : The name of the file. Used to be sent to add_hydrogens(), as it will be used by PyMOL lib. This function is used to parse all ATOM lines and create residues accordingly. """ self.atoms = [] self.residues = [] temp_atoms = [] count_h = 0 for line in self.rawLines: if line.startswith("ATOM" or "HETATM"): if(line[76:78].strip()=="H"): count_h+=1 atom = Atom(atom_id = int(line[6:11].strip()), atom_name = line[12:16].strip(), res_name = line[17:20].strip(), chain_id = line[21:22], res_seq_nb = int(line[22:26].strip()), insertion_code = line[26:27], coordinates = [float(line[30:38].strip()), float(line[38:46].strip()), float(line[46:54].strip()), ]) self.atoms.append(atom) # get the current indice of atom i = self.atoms.index(atom) # if this is a brand new residue if(len(self.atoms)>1 and (atom.res_seq_nb == self.atoms[i-1].res_seq_nb and atom.insertion_code!=self.atoms[i-1].insertion_code)): self.residues.append(Residue(temp_atoms, len(self.residues)+1)) temp_atoms=[] if(len(self.atoms)>1 and (atom.res_seq_nb != self.atoms[i-1].res_seq_nb)): self.residues.append(Residue(temp_atoms, len(self.residues)+1)) temp_atoms=[] temp_atoms.append(atom) # last residue self.residues.append(Residue(temp_atoms, len(self.residues))) # hydrogens should represent in average 50% of total atoms... # We use 30% threshold... if(count_h/len(temp_atoms)<0.30): #Need to add hydrogens self.add_hydrogens(filename) def add_hydrogens(self, filename, output_pdb=None): """Add hydrogens to N atoms of the protein backbone. Parameters ---------- self : PDBFile The instance of PDB File to get the header from. filename : str The name of the file to be exploited by PyMOL lib. output_pdb : str If defined, the name of the file to be saved py PyMOL, with hydrogens added. This function is used to add the missing H atoms on the Nitrogens atoms of the protein backbone, when H are missing from PDB file. H are needed to perform all the bond interactions- based calculations. Returns ------- dict A dictionnary of all Hydrogens added to the backbone. """ pymol.finish_launching(['pymol', '-qc']) pymol.cmd.load(filename) pymol.cmd.select("nitrogens",'name n') pymol.cmd.h_add("nitrogens") pymol.stored.pos = {} pymol.cmd.iterate_state(1, "hydrogens", 'stored.pos[resi] = []; ' \ 'stored.pos[resi].append(name,resi,x,y,z) '\ 'if resi not in stored.pos.keys() ' \ 'else ' \ 'stored.pos[resi].append([name,resi,x,y,z])') if(output_pdb!=None): pymol.cmd.save(output_pdb) for resi in self.residues: if(str(resi.resid) in pymol.stored.pos.keys()): # iterate over residue H atoms for hydrogen in pymol.stored.pos[str(resi.resid)]: atom = Atom(atom_id = len(self.atoms)+1, atom_name = "H", res_name = resi.res_name, chain_id = resi.chain_id, res_seq_nb = resi.resid, insertion_code = resi.insertion_code, coordinates = [hydrogen[2], hydrogen[3], hydrogen[4]]) # add hydrogen to atoms list self.atoms.append(atom) resi.atoms[atom.atom_name] = atom return(pymol.stored.pos) def __init__(self, filename, output_pdb=None): self.rawLines = self.get_content(filename) self.metadata = self.get_header() self.get_atoms(filename)