richard
/
projet_court


			
				
					
						
						
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175
							# custom imports
from chem import *
# import libs
import sys
import pymol

class PDBFile:
    def get_content(self, filename):
        """Get content of a PDB file

        Parameters
        ----------
        filename : str
            The name of the file to be parsed.

        This function is used to get the raw content
        of the PDB file.

        Returns
        -------
        list
            List of lines of the PDB file.
        """
        with open(filename) as f:
            return(f.readlines())
        
    def get_header(self):
        """Returns header of a PDB file

        Parameters
        ----------
        self : PDBFile
            The instance of PDB File to get the header from.

        This function is used to get the header of the PDB file.

        Returns
        -------
        dict
            A field-formatted metadata header.
        """
        metadata = {}
        for line in self.rawLines:
            # no need to continue if meta are complete
            if(len(metadata) <4):
                if(line[0:10] == "HEADER    "):
                    metadata['header']=line
                elif(line[0:10] == "COMPND   2"):
                    metadata['compound']=line
                elif(line[0:10] == "SOURCE   2"):
                    metadata['source']=line
                elif(line[0:10] == "AUTHOR    "):
                    metadata['author']=line
            else:
                # if meta are complete, stop parsing
                break     
        return(metadata)

    def get_atoms(self, filename):
        """Get all atoms inside a PDB file.

        Parameters
        ----------
        self : PDBFile
            The instance of PDB File to get the header from.
        filename :
            The name of the file. Used to be sent to 
            add_hydrogens(), as it will be used by PyMOL lib.
        
        This function is used to parse all ATOM lines and 
        create residues accordingly.
        """
        self.atoms = []
        self.residues = []
        temp_atoms = []
        count_h = 0
        for line in self.rawLines:
            if line.startswith("ATOM" or "HETATM"):
                if(line[76:78].strip()=="H"):
                    count_h+=1
                atom = Atom(atom_id = int(line[6:11].strip()),
                            atom_name = line[12:16].strip(),
                            res_name = line[17:20].strip(),
                            chain_id = line[21:22],
                            res_seq_nb = int(line[22:26].strip()),
                            insertion_code = line[26:27],
                            coordinates = [float(line[30:38].strip()),
                                     float(line[38:46].strip()),
                                     float(line[46:54].strip()),
                            ])
                self.atoms.append(atom)
                # get the current indice of atom
                i = self.atoms.index(atom)
                # if this is a brand new residue

                if(len(self.atoms)>1 and
                   (atom.res_seq_nb == self.atoms[i-1].res_seq_nb
                    and atom.insertion_code!=self.atoms[i-1].insertion_code)):
                    self.residues.append(Residue(temp_atoms, len(self.residues)+1))
                    temp_atoms=[]
                
                if(len(self.atoms)>1
                   and (atom.res_seq_nb != self.atoms[i-1].res_seq_nb)):
                    self.residues.append(Residue(temp_atoms, len(self.residues)+1))
                    temp_atoms=[]
                temp_atoms.append(atom)
        # last residue
        self.residues.append(Residue(temp_atoms, len(self.residues)))
        # hydrogens should represent in average 50% of total atoms...
        # We use 30% threshold...
        if(count_h/len(temp_atoms)<0.30):
            #Need to add hydrogens 
            self.add_hydrogens(filename)

    def add_hydrogens(self, filename, output_pdb=None):
        """Add hydrogens to N atoms of the protein backbone.

        Parameters
        ----------
        self : PDBFile
            The instance of PDB File to get the header from.

        filename : str
            The name of the file to be exploited by PyMOL lib.
        output_pdb : str
            If defined, the name of the file to be saved py PyMOL,
            with hydrogens added.

        This function is used to add the missing H atoms on the
        Nitrogens atoms of the protein backbone, when H are missing
        from PDB file. H are needed to perform all the bond interactions-
        based calculations.

        Returns
        -------
        dict
            A dictionnary of all Hydrogens added to the backbone.
        """
        pymol.finish_launching(['pymol', '-qc'])
        pymol.cmd.load(filename)
        pymol.cmd.select("nitrogens",'name n')  
        pymol.cmd.h_add("nitrogens")
        pymol.stored.pos = {}
        pymol.cmd.iterate_state(1, "hydrogens",
                                'stored.pos[resi] = []; ' \
                                'stored.pos[resi].append(name,resi,x,y,z) '\
                                'if resi not in stored.pos.keys() ' \
                                'else ' \
                                'stored.pos[resi].append([name,resi,x,y,z])')
        if(output_pdb!=None):
            pymol.cmd.save(output_pdb)
        for resi in self.residues:
            if(str(resi.resid) in pymol.stored.pos.keys()):
                # iterate over residue H atoms
                for hydrogen in pymol.stored.pos[str(resi.resid)]:
                    atom = Atom(atom_id = len(self.atoms)+1,
                                atom_name = "H", 
                                res_name = resi.res_name,
                                chain_id = resi.chain_id,
                                res_seq_nb = resi.resid,
                                insertion_code = resi.insertion_code,
                                coordinates = [hydrogen[2],
                                               hydrogen[3],
                                               hydrogen[4]])
                    # add hydrogen to atoms list
                    self.atoms.append(atom)
                    resi.atoms[atom.atom_name] = atom
        return(pymol.stored.pos)

    def __init__(self, filename, output_pdb=None):
        self.rawLines = self.get_content(filename)
        self.metadata = self.get_header()
        self.get_atoms(filename)