123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673 |
- import math
- from geom import *
-
- class Atom:
- """Defines Atom objects and their associated methods.
-
- This function is used to instenciate Atom objects with
- convenient fields, and a method used to compute
- distance between atoms.
- """
- def dist_atoms(self, atom2):
- """Compute distance between two atoms.
-
- Parameters
- ----------
- self : Atom object
- First atom involved to compute the distance.
- atom2 : Atom object
- Second atom involved to compute the distance.
-
- This function is used to get the distance between
- two atoms in 3D space.
-
- Returns
- -------
- float
- Distance, in angströms.
- """
- return(math.sqrt((self.coord_x-atom2.coord_x)**2 +
- (self.coord_y-atom2.coord_y)**2 +
- (self.coord_z-atom2.coord_z)**2))
-
-
- def __init__(self, atom_id, atom_name, res_name, chain_id,
- res_seq_nb, insertion_code, coordinates):
- """Constructor of Atom class object.
-
- Parameters
- ----------
- self : Atom object
- The new atom being created.
- atom_id : int
- Sequential PDB atom number.
- res_name : str
- 3-letter code of residue.
- chain_id : str
- Unique chain ID.
- res_seq_nb : int
- Actual amino acid sequence position.
- insertion_code : str
- Insertion letter when added in case of ambiguity.
- coordinates : list[float, ...]
- X,Y,Z coordinates of atom.
- """
- self.atom_id = atom_id
- self.atom_name = atom_name
- self.res_name = res_name
- self.chain_id = chain_id
- self.res_seq_nb = res_seq_nb
- self.insertion_code = insertion_code
- self.coord_x = coordinates[0]
- self.coord_y = coordinates[1]
- self.coord_z = coordinates[2]
- self.coords = coordinates
-
- class Structure:
- def __init__(self, res):
- self.res = res
-
- class Turn(Structure):
-
- def __init__(self, turn_type, res):
- self.turn_type = turn_type
- Structure.res = res
-
- class Bridge(Structure):
-
- def __init__(self, bridge_type, res_num, res_partner, indices):
- self.bridge_type = bridge_type
- self.res_num = res_num
- self.res_partner = res_partner
- Structure.res = res_num
- self.i = indices[0]
- self.j = indices[1]
-
- class Helix(Structure):
-
- def __init__(self, residues, res_num, helix_type):
- self.residues = residues
- self.res_num = res_num
- Structure.res = res_num
- self.helix_type = helix_type
-
-
- class Residue:
- def __init__(self, atoms_list, indice):
- """Constructor of Residue class object.
-
- Parameters
- ----------
- self : Residue object
- The new residue being created.
- atoms_list : list[Atom, ...]
- List of Atom objects contained in this residue.
- indice : int
- Sequential Residue number.
- """
- self.atoms = {}
- for atom in atoms_list:
- self.atoms[atom.atom_name] = atom
- self.resid = atom.res_seq_nb
- self.res_name = atom.res_name
- self.res_letter = self.get_amino_letter(atom.res_name)
- self.chain_id = atom.chain_id
- self.insertion_code = atom.insertion_code
- self.indice = indice
-
- def get_amino_letter(self, res_name):
- """Get 1-letter code of amino acid 3-letter code.
-
- Parameters
- ----------
- self : Residue object
- Reference to Residue instance.
- res_name : str
- 3-letter code of the residue.
-
- This function is used to convert the 3-letter code
- format of the residue name to its 1-letter code
- correspondance.
-
- Returns
- -------
- str
- 3-letter amino acid code.
- """
- code3 = ['ALA', 'ARG', 'ASN', 'ASP', 'CYS', 'GLU', 'GLN', 'GLY',
- 'HIS', 'ILE', 'LEU', 'LYS', 'MET', 'PHE', 'PRO', 'SER',
- 'THR', 'TRP', 'TYR', 'VAL']
-
-
- code1 = ['A','R','N','D','C','E','Q','G','H','I','L','K','M','F','P',
- 'S','T','W','Y','V']
-
- return code1[code3.index(res_name)]
-
- def h_bond(self, res2):
- """Compute the H-Bond interaction between
- two residues, in kcal/mole.
-
- Parameters
- ----------
- self : Residue object
- Reference to Residue instance.
- res2 : Residue object
- Second residue to evaluate the bond
- energy.
-
- This function computes the bond energy between
- two residues based on N,H,C and O atoms of each
- residue.
-
- Returns
- -------
- str
- 3-letter amino acid code.
- """
- if("H" not in res2.atoms.keys()):
- return(False)
- # dimensionnal factor, in kcal/mole
- f = 332
- # partial charges
- q1 = 0.42
- q2 = 0.20
- # distance between O-N atoms, in angströms
- r_ON = self.atoms["O"].dist_atoms(res2.atoms["N"])
- # distance between C-H atoms, in angströms
- r_CH = self.atoms["C"].dist_atoms(res2.atoms["H"])
- # distance between O-H atoms, in angströms
- r_OH = self.atoms["O"].dist_atoms(res2.atoms["H"])
- # distance between C-N atoms, in angströms
- r_CN = self.atoms["C"].dist_atoms(res2.atoms["N"])
- # electrostatic interaction energy, in kcal/mole
- E = q1*q2*(1/r_ON + 1/r_CH - 1/r_OH - 1/r_CN)*f
- return(E)
-
- def get_turns(self, residues):
- """
- Get all the turns from a specific residue.
-
- Parameters
- ----------
- self : Residue object
- Reference to Residue instance.
- residues : list[Residue, ...]
- List of all the Residues in the protein.
-
- This function determines if there is a n-turn
- at a specific residue, and its type. 3,4,5-turn...
-
- Returns
- -------
- Turn object | Boolean
- If there is a n-turn, a Turn object is returned,
- if not, False is returned.
-
- """
- turns = {}
- i = residues.index(self)
- k = 0
- for j in range(3,6):
- if(i+j<len(residues)):
- if(self.h_bond(residues[i+j])<-0.5):
- k = j
- if k != 0:
- return Turn(k,residues[i].resid)
-
- return False
-
- def get_bends(self, residues):
- """Get the bend from a specific residue.
-
- Parameters
- ----------
- self : Residue object
- Reference to Residue instance.
- residues : list[Residue, ...]
- List of all the Residues in the protein.
-
- This function determines if there is a bend
- at a specific residue. Used to determine Kappa
- angle.
-
- Returns
- -------
- list[float, str]
- Kappa angle value, and S to signify bend presence,
- if Kappa>70°.
- """
- i = residues.index(self)
- if i >=2 and i <len(residues)-2:
- angle = math.degrees(vector_angles(
- vectors_substr(position_vector(residues[i].atoms["CA"].coords),
- position_vector(residues[i-2].atoms["CA"].coords)),
- vectors_substr(position_vector(residues[i+2].atoms["CA"].coords),
- position_vector(residues[i].atoms["CA"].coords))))
- if(angle>70):
- return [angle, 'S']
- return [angle, '']
- else:
- return [360.0, '']
-
- def get_helix(self, residues):
- """Return if there is an helix at a given residue,
- as well as its type.
-
- Parameters
- ----------
- self : Residue object
- Reference to Residue instance.
- residues : list[Residue, ...]
- List of all the Residues in the protein.
-
- This function determines if there is an helix
- at a specific residue.
-
- Returns
- -------
- list[str, int]
- The type of helix G(=3),H(=4),I(=5), and the sequential
- position of residue where the helix starts.
- """
- i = residues.index(self)
- # if there are no turns or it is the first residue, skip
- if i == 0:
- return False
-
- if(self.get_turns(residues) and residues[i-1].get_turns(residues)):
- return(self.get_turns(residues).turn_type, residues[i].indice)
- return(False)
-
- def get_ladder(self, residues):
- """Return if there is a ladder at a given residue.
-
- Parameters
- ----------
- self : Residue object
- Reference to Residue instance.
- residues : list[Residue, ...]
- List of all the Residues in the protein.
-
- Returns
- -------
- dict | Boolean
- If there is a ladder, return it as a dictionnary, else returns False.
-
- """
- i = residues.index(self)
- if i != 0:
- if self.get_bridges(residues):
- if (residues[i-1].get_bridges(residues)):
- local_bridge = self.get_bridges(residues)
- consec_bridge = residues[i-1].get_bridges(residues)
- if local_bridge.bridge_type == consec_bridge.bridge_type:
- ladder = {'start':consec_bridge.res_num,
- 'end':local_bridge.res_num,
- 'bridges':[consec_bridge, local_bridge]}
- return ladder
- return False
-
- def get_tco(self, residues):
- """Cosine of angle between C=O of residue I and C=O of residue I-1.
-
- Parameters
- ----------
- self : Residue object
- Reference to Residue instance.
- residues : list[Residue, ...]
- List of all the Residues in the protein.
-
- For α-helices, TCO is near +1, for β-sheets TCO is near -1.
- Not used for structure definition.
-
- Returns
- -------
- float
- Cosine of angle.
- """
- i = residues.index(self)
- if(i!=0):
- res2 = residues[i-1]
- CO_res1 = vector_from_pos(self.atoms["C"].coords,
- self.atoms["O"].coords)
- CO_res2 = vector_from_pos(res2.atoms["C"].coords,
- res2.atoms["O"].coords)
- angle = vector_angles(CO_res1, CO_res2)
- else:
- angle = math.pi/2
- return(math.cos(angle))
-
- def get_chirality(self, residues):
- """Return the chirality at a given residue.
-
- Parameters
- ----------
- self : Residue object
- Reference to Residue instance.
- residues : list[Residue, ...]
- List of all the Residues in the protein.
-
- Virtual torsion angle (dihedral angle) defined by the four
- Cα atoms of residues I-1,I,I+1,I+2.Used to define chirality
- (structure code '+' or '-').
-
- Returns
- -------
- list[float, str]
- Dihedral angle and chirality sign.
- """
- i = residues.index(self)
- if (i >=1 and i < len(residues)-2):
- chirality = {}
- angle = calc_dihedral(residues[i-1].atoms["CA"].coords,
- residues[i].atoms["CA"].coords,
- residues[i+1].atoms["CA"].coords,
- residues[i+2].atoms["CA"].coords)
-
- if(angle>0 and angle<=180):
- sign="+"
-
- if(angle<=0 and angle > -180):
- sign="-"
-
- else:
- angle = 360.0
- sign = ''
-
- return [angle, sign]
-
- def get_phi_psi(self, residues):
- """Compute Phi and Psi angles of protein
- backbone around the peptidic bound at each
- residue.
-
- Parameters
- ----------
- self : Residue object
- Reference to Residue instance.
- residues : list[Residue, ...]
- List of all the Residues in the protein.
-
- Returns
- -------
- tuple(float, float)
- Phi and Psi angles.
- """
- i = residues.index(self)
- if(i==0):
- phi = 360.0
- else:
- phi = calc_dihedral(residues[i-1].atoms["C"].coords,
- residues[i].atoms["N"].coords,
- residues[i].atoms["CA"].coords,
- residues[i].atoms["C"].coords)
- if(i==len(residues)-1):
- psi = 360.0
- else:
- psi = calc_dihedral(residues[i].atoms["N"].coords,
- residues[i].atoms["CA"].coords,
- residues[i].atoms["C"].coords,
- residues[i+1].atoms["N"].coords)
-
- return((phi, psi))
- def get_bridges(residues):
- """Construct all bridges between residues in the
- protein.
-
- Parameters
- ----------
- residues : list[Residue, ...]
- List of all the Residues in the protein.
-
- Constructs a list of Bridges objects, characterized by
- their involved residues positions, sequential positions,
- type (parallel or antiparallel).
-
- Returns
- -------
- list(Bridge)
- Returns the list of bridges.
- """
- bridges = {}
- bridge = {}
- strongest_bridge = {}
- for i in range(1,len(residues)-4):
- E_min = 0
- for j in range(i+2,len(residues)-1):
- # select triplet with the minimal energy
- if(residues[i-1].h_bond(residues[j])<-0.5
- and residues[j].h_bond(residues[i+1])<-0.5):
- bridge = {'res1':residues[i-1].h_bond(residues[j]),
- 'res2':residues[j].h_bond(residues[i+1]),
- 'ipos':residues[i].resid,
- 'jpos':residues[j].resid,
- 'i':residues[i].indice,
- 'j':residues[j].indice,
- 'btype':"para"}
-
- if(residues[j-1].h_bond(residues[i])<-0.5
- and residues[i].h_bond(residues[j+1])<-0.5):
- bridge = {'res1':residues[j-1].h_bond(residues[i]),
- 'res2':residues[i].h_bond(residues[j+1]),
- 'ipos':residues[i].resid,
- 'jpos':residues[j].resid,
- 'i':residues[i].indice,
- 'j':residues[j].indice,
- 'btype':"para"}
-
- if(residues[i].h_bond(residues[j])<-0.5
- and residues[j].h_bond(residues[i])<-0.5):
- bridge = {'res1':residues[i].h_bond(residues[j]),
- 'res2':residues[j].h_bond(residues[i]),
- 'ipos':residues[i].resid,
- 'jpos':residues[j].resid,
- 'i':residues[i].indice,
- 'j':residues[j].indice,
- 'btype':"anti"}
-
- if(residues[i-1].h_bond(residues[j+1])<-0.5
- and residues[j-1].h_bond(residues[i+1])<-0.5):
- bridge = {'res1':residues[i-1].h_bond(residues[j+1]),
- 'res2':residues[j-1].h_bond(residues[i+1]),
- 'ipos':residues[i].resid,
- 'jpos':residues[j].resid,
- 'i':residues[i].indice,
- 'j':residues[j].indice,
- 'btype':"anti"}
-
- if(bridge):
- if(bridge['res1']+bridge['res2']<E_min):
- E_min = bridge['res1']+bridge['res2']
- strongest_bridge = bridge
- coord_bridge = [i,j]
- bridge = {}
-
- # finally add the strongest bridge at i and j pos
- if(strongest_bridge):
- bridges[strongest_bridge['i']] = (Bridge(strongest_bridge['btype'],
- strongest_bridge['ipos'],
- strongest_bridge['jpos'],
- [strongest_bridge['i'],
- strongest_bridge['j']]))
- bridges[strongest_bridge['j']] = (Bridge(strongest_bridge['btype'],
- strongest_bridge['ipos'],
- strongest_bridge['jpos'],
- [strongest_bridge['i'],
- strongest_bridge['j']]))
- if(len(bridges)>0):
- return(bridges)
- else:
- return(False)
-
- def get_ladders(bridges, residues):
- """Construct all ladders in the protein.
-
- Parameters
- ----------
- residues : list[Residue, ...]
- List of all the Residues in the protein.
-
- Constructs a dictionnary of ladders, characterized by
- their involved residues positions, sequential positions,
- involved bridges. Ladders are made of consecutive bridges
- of the same type.
-
- Returns
- -------
- dict
- Returns the dictionnary of ladders.
- """
- ladders = {}
- i = 1
- while i < len(residues):
- k = 1
- if i in bridges.keys():
- temp_bridges = [bridges[i]]
- while ((i+k in bridges.keys()) and
- (bridges[i].bridge_type == bridges[i+k].bridge_type)):
- temp_bridges.append(bridges[i+k])
- k+=1
- if k>1:
- ladders[i] = {'start':bridges[i].res_num,
- 'end':bridges[i+k-1].res_num,
- 'bridges':temp_bridges,
- 'i':i,
- 'j':i+k-1}
- i+=k-1
- else:
- i+=1
- return ladders
-
- def connected_ladders(ladd_1, ladd_2):
- """Check if two ladders, ladd_1 and ladd_2,
- are linked by one shared bridge.
- """
- links = []
- for bridge in ladd_1['bridges']:
- if bridge.res_partner in res_list(ladd_2):
- return ladd_2
- return False
-
- def get_sheets(ladders):
- """
- Bridges between ladders.
- Check if 1 bridge between one ladder and one or more other ladders.
- Iterate over all residues of one ladder and check if bridge with other residues
- of the other ladders.
- """
- ladds = [ elem for elem in ladders.values() ]
- sheets = {}
- corresp = {}
- for ladd1 in ladds:
- for ladd2 in ladds:
- if connected_ladders(ladd1, ladd2)!=False:
- corresp_list = [ elem for elem in corresp.keys() ]
- if ladd1['i'] not in corresp_list and ladd2['i'] not in corresp_list:
- ind = len(sheets.keys())
- sheets[ind] = []
- sheets[ind].append(ladd1)
- sheets[ind].append(ladd2)
- corresp[ladd1['i']] = ind
- corresp[ladd2['i']] = ind
- elif ladd2 not in corresp_list and ladd1 in corresp_list:
- sheets[corresp[ladd1['i']]].append(ladd2)
- corresp[ladd2['i']] = corresp[ladd1['i']]
- elif ladd1 not in corresp_list and ladd2 in corresp_list:
- sheets[corresp[ladd2['i']]].append(ladd1)
- corresp[ladd1['i']] = corresp[ladd2['i']]
- return sheets
-
- def res_list(ladder):
- """Iterate over ladders, from start to end,
- to get the list of residues inside it.
- """
- l=[]
- for i in range(ladder['i'], ladder['j']):
- l.append(i)
- return(l)
-
- def build_turns_patterns(residues):
- turns_3 = {}
- turns_4 = {}
- turns_5 = {}
- for i,res in enumerate(residues):
- turn = residues[i].get_turns(residues)
- if(turn):
- for k in range(turn.turn_type):
- if turn.turn_type == 3:
- turns_3[i+1+k] = turn.turn_type
- if turn.turn_type == 4:
- turns_4[i+1+k] = turn.turn_type
- if turn.turn_type == 5:
- turns_5[i+1+k] = turn.turn_type
- return[turns_3, turns_4, turns_5]
-
- def build_helix_patterns(residues):
- """Builds series of letters (G, H or I) corresponding
- to helix types (3, 4 or 5). Used for the final output.
- """
- helix_3 = {}
- helix_4 = {}
- helix_5 = {}
- for i,res in enumerate(residues):
- helix = residues[i].get_helix(residues)
- if(helix):
- helix_type = residues[i].get_helix(residues)[0]
- helix_pos = residues[i].get_helix(residues)[1]
- for k in range(helix_type):
- if helix_type == 3:
- helix_3[i+1+k] = "G"
- if helix_type == 4:
- helix_4[i+1+k] = "H"
- if helix_type == 5:
- helix_5[i+1+k] = "I"
- return[helix_3, helix_4, helix_5]
-
- def print_helix_pattern(residues, res, helix):
- """Used to print the Helix type at a specific
- residue position if there is one. Used for the final
- output.
- """
- i = residues.index(res)+1
- if i in helix.keys():
- return (helix[i])
- else:
- return(' ')
-
- def print_turn_pattern(residues, res, turns):
- """Builds series of symbols (>, < or n) corresponding
- to turn types (n=3, 4 or 5). Used for the final output.
- '>' represent start of the turn series.
- '<' represent end of the turn.
- """
- i = residues.index(res)+1
- if i in turns.keys() and not i-1 in turns.keys():
- return(">")
- elif i in turns.keys() and i-1 in turns.keys():
- return(turns[i])
- elif i not in turns.keys() and i-1 in turns.keys():
- return("<")
- else:
- return(' ')
-
- def raw_ladders_print(ladders):
- """Function to print roughly the connected ladders by bridges in
- the final output.
- This has been use to illustrate sheets principles.
- Only shown if the -v or --verbose flag is used when running
- dssp.py.
-
- """
- print("### CONNECTED LADDERS ###")
- for ladd1 in ladders:
- ladd_1 = ladders[ladd1]
- for ladd2 in ladders:
- ladd_2 = ladders[ladd2]
- for bridge in ladd_1['bridges']:
- if bridge.j in res_list(ladd_2):
- print("ladder", ladd_1['i'],"-", ladd_1['j'], "|",
- bridge.i, "...", bridge.j, "| ladder",
- ladd_2['i'], ladd_2['j'])
-
|