Réimplémentation du programme DSSP en Python

chem.py 22KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673
  1. import math
  2. from geom import *
  3. class Atom:
  4. """Defines Atom objects and their associated methods.
  5. This function is used to instenciate Atom objects with
  6. convenient fields, and a method used to compute
  7. distance between atoms.
  8. """
  9. def dist_atoms(self, atom2):
  10. """Compute distance between two atoms.
  11. Parameters
  12. ----------
  13. self : Atom object
  14. First atom involved to compute the distance.
  15. atom2 : Atom object
  16. Second atom involved to compute the distance.
  17. This function is used to get the distance between
  18. two atoms in 3D space.
  19. Returns
  20. -------
  21. float
  22. Distance, in angströms.
  23. """
  24. return(math.sqrt((self.coord_x-atom2.coord_x)**2 +
  25. (self.coord_y-atom2.coord_y)**2 +
  26. (self.coord_z-atom2.coord_z)**2))
  27. def __init__(self, atom_id, atom_name, res_name, chain_id,
  28. res_seq_nb, insertion_code, coordinates):
  29. """Constructor of Atom class object.
  30. Parameters
  31. ----------
  32. self : Atom object
  33. The new atom being created.
  34. atom_id : int
  35. Sequential PDB atom number.
  36. res_name : str
  37. 3-letter code of residue.
  38. chain_id : str
  39. Unique chain ID.
  40. res_seq_nb : int
  41. Actual amino acid sequence position.
  42. insertion_code : str
  43. Insertion letter when added in case of ambiguity.
  44. coordinates : list[float, ...]
  45. X,Y,Z coordinates of atom.
  46. """
  47. self.atom_id = atom_id
  48. self.atom_name = atom_name
  49. self.res_name = res_name
  50. self.chain_id = chain_id
  51. self.res_seq_nb = res_seq_nb
  52. self.insertion_code = insertion_code
  53. self.coord_x = coordinates[0]
  54. self.coord_y = coordinates[1]
  55. self.coord_z = coordinates[2]
  56. self.coords = coordinates
  57. class Structure:
  58. def __init__(self, res):
  59. self.res = res
  60. class Turn(Structure):
  61. def __init__(self, turn_type, res):
  62. self.turn_type = turn_type
  63. Structure.res = res
  64. class Bridge(Structure):
  65. def __init__(self, bridge_type, res_num, res_partner, indices):
  66. self.bridge_type = bridge_type
  67. self.res_num = res_num
  68. self.res_partner = res_partner
  69. Structure.res = res_num
  70. self.i = indices[0]
  71. self.j = indices[1]
  72. class Helix(Structure):
  73. def __init__(self, residues, res_num, helix_type):
  74. self.residues = residues
  75. self.res_num = res_num
  76. Structure.res = res_num
  77. self.helix_type = helix_type
  78. class Residue:
  79. def __init__(self, atoms_list, indice):
  80. """Constructor of Residue class object.
  81. Parameters
  82. ----------
  83. self : Residue object
  84. The new residue being created.
  85. atoms_list : list[Atom, ...]
  86. List of Atom objects contained in this residue.
  87. indice : int
  88. Sequential Residue number.
  89. """
  90. self.atoms = {}
  91. for atom in atoms_list:
  92. self.atoms[atom.atom_name] = atom
  93. self.resid = atom.res_seq_nb
  94. self.res_name = atom.res_name
  95. self.res_letter = self.get_amino_letter(atom.res_name)
  96. self.chain_id = atom.chain_id
  97. self.insertion_code = atom.insertion_code
  98. self.indice = indice
  99. def get_amino_letter(self, res_name):
  100. """Get 1-letter code of amino acid 3-letter code.
  101. Parameters
  102. ----------
  103. self : Residue object
  104. Reference to Residue instance.
  105. res_name : str
  106. 3-letter code of the residue.
  107. This function is used to convert the 3-letter code
  108. format of the residue name to its 1-letter code
  109. correspondance.
  110. Returns
  111. -------
  112. str
  113. 3-letter amino acid code.
  114. """
  115. code3 = ['ALA', 'ARG', 'ASN', 'ASP', 'CYS', 'GLU', 'GLN', 'GLY',
  116. 'HIS', 'ILE', 'LEU', 'LYS', 'MET', 'PHE', 'PRO', 'SER',
  117. 'THR', 'TRP', 'TYR', 'VAL']
  118. code1 = ['A','R','N','D','C','E','Q','G','H','I','L','K','M','F','P',
  119. 'S','T','W','Y','V']
  120. return code1[code3.index(res_name)]
  121. def h_bond(self, res2):
  122. """Compute the H-Bond interaction between
  123. two residues, in kcal/mole.
  124. Parameters
  125. ----------
  126. self : Residue object
  127. Reference to Residue instance.
  128. res2 : Residue object
  129. Second residue to evaluate the bond
  130. energy.
  131. This function computes the bond energy between
  132. two residues based on N,H,C and O atoms of each
  133. residue.
  134. Returns
  135. -------
  136. str
  137. 3-letter amino acid code.
  138. """
  139. if("H" not in res2.atoms.keys()):
  140. return(False)
  141. # dimensionnal factor, in kcal/mole
  142. f = 332
  143. # partial charges
  144. q1 = 0.42
  145. q2 = 0.20
  146. # distance between O-N atoms, in angströms
  147. r_ON = self.atoms["O"].dist_atoms(res2.atoms["N"])
  148. # distance between C-H atoms, in angströms
  149. r_CH = self.atoms["C"].dist_atoms(res2.atoms["H"])
  150. # distance between O-H atoms, in angströms
  151. r_OH = self.atoms["O"].dist_atoms(res2.atoms["H"])
  152. # distance between C-N atoms, in angströms
  153. r_CN = self.atoms["C"].dist_atoms(res2.atoms["N"])
  154. # electrostatic interaction energy, in kcal/mole
  155. E = q1*q2*(1/r_ON + 1/r_CH - 1/r_OH - 1/r_CN)*f
  156. return(E)
  157. def get_turns(self, residues):
  158. """
  159. Get all the turns from a specific residue.
  160. Parameters
  161. ----------
  162. self : Residue object
  163. Reference to Residue instance.
  164. residues : list[Residue, ...]
  165. List of all the Residues in the protein.
  166. This function determines if there is a n-turn
  167. at a specific residue, and its type. 3,4,5-turn...
  168. Returns
  169. -------
  170. Turn object | Boolean
  171. If there is a n-turn, a Turn object is returned,
  172. if not, False is returned.
  173. """
  174. turns = {}
  175. i = residues.index(self)
  176. k = 0
  177. for j in range(3,6):
  178. if(i+j<len(residues)):
  179. if(self.h_bond(residues[i+j])<-0.5):
  180. k = j
  181. if k != 0:
  182. return Turn(k,residues[i].resid)
  183. return False
  184. def get_bends(self, residues):
  185. """Get the bend from a specific residue.
  186. Parameters
  187. ----------
  188. self : Residue object
  189. Reference to Residue instance.
  190. residues : list[Residue, ...]
  191. List of all the Residues in the protein.
  192. This function determines if there is a bend
  193. at a specific residue. Used to determine Kappa
  194. angle.
  195. Returns
  196. -------
  197. list[float, str]
  198. Kappa angle value, and S to signify bend presence,
  199. if Kappa>70°.
  200. """
  201. i = residues.index(self)
  202. if i >=2 and i <len(residues)-2:
  203. angle = math.degrees(vector_angles(
  204. vectors_substr(position_vector(residues[i].atoms["CA"].coords),
  205. position_vector(residues[i-2].atoms["CA"].coords)),
  206. vectors_substr(position_vector(residues[i+2].atoms["CA"].coords),
  207. position_vector(residues[i].atoms["CA"].coords))))
  208. if(angle>70):
  209. return [angle, 'S']
  210. return [angle, '']
  211. else:
  212. return [360.0, '']
  213. def get_helix(self, residues):
  214. """Return if there is an helix at a given residue,
  215. as well as its type.
  216. Parameters
  217. ----------
  218. self : Residue object
  219. Reference to Residue instance.
  220. residues : list[Residue, ...]
  221. List of all the Residues in the protein.
  222. This function determines if there is an helix
  223. at a specific residue.
  224. Returns
  225. -------
  226. list[str, int]
  227. The type of helix G(=3),H(=4),I(=5), and the sequential
  228. position of residue where the helix starts.
  229. """
  230. i = residues.index(self)
  231. # if there are no turns or it is the first residue, skip
  232. if i == 0:
  233. return False
  234. if(self.get_turns(residues) and residues[i-1].get_turns(residues)):
  235. return(self.get_turns(residues).turn_type, residues[i].indice)
  236. return(False)
  237. def get_ladder(self, residues):
  238. """Return if there is a ladder at a given residue.
  239. Parameters
  240. ----------
  241. self : Residue object
  242. Reference to Residue instance.
  243. residues : list[Residue, ...]
  244. List of all the Residues in the protein.
  245. Returns
  246. -------
  247. dict | Boolean
  248. If there is a ladder, return it as a dictionnary, else returns False.
  249. """
  250. i = residues.index(self)
  251. if i != 0:
  252. if self.get_bridges(residues):
  253. if (residues[i-1].get_bridges(residues)):
  254. local_bridge = self.get_bridges(residues)
  255. consec_bridge = residues[i-1].get_bridges(residues)
  256. if local_bridge.bridge_type == consec_bridge.bridge_type:
  257. ladder = {'start':consec_bridge.res_num,
  258. 'end':local_bridge.res_num,
  259. 'bridges':[consec_bridge, local_bridge]}
  260. return ladder
  261. return False
  262. def get_tco(self, residues):
  263. """Cosine of angle between C=O of residue I and C=O of residue I-1.
  264. Parameters
  265. ----------
  266. self : Residue object
  267. Reference to Residue instance.
  268. residues : list[Residue, ...]
  269. List of all the Residues in the protein.
  270. For α-helices, TCO is near +1, for β-sheets TCO is near -1.
  271. Not used for structure definition.
  272. Returns
  273. -------
  274. float
  275. Cosine of angle.
  276. """
  277. i = residues.index(self)
  278. if(i!=0):
  279. res2 = residues[i-1]
  280. CO_res1 = vector_from_pos(self.atoms["C"].coords,
  281. self.atoms["O"].coords)
  282. CO_res2 = vector_from_pos(res2.atoms["C"].coords,
  283. res2.atoms["O"].coords)
  284. angle = vector_angles(CO_res1, CO_res2)
  285. else:
  286. angle = math.pi/2
  287. return(math.cos(angle))
  288. def get_chirality(self, residues):
  289. """Return the chirality at a given residue.
  290. Parameters
  291. ----------
  292. self : Residue object
  293. Reference to Residue instance.
  294. residues : list[Residue, ...]
  295. List of all the Residues in the protein.
  296. Virtual torsion angle (dihedral angle) defined by the four
  297. Cα atoms of residues I-1,I,I+1,I+2.Used to define chirality
  298. (structure code '+' or '-').
  299. Returns
  300. -------
  301. list[float, str]
  302. Dihedral angle and chirality sign.
  303. """
  304. i = residues.index(self)
  305. if (i >=1 and i < len(residues)-2):
  306. chirality = {}
  307. angle = calc_dihedral(residues[i-1].atoms["CA"].coords,
  308. residues[i].atoms["CA"].coords,
  309. residues[i+1].atoms["CA"].coords,
  310. residues[i+2].atoms["CA"].coords)
  311. if(angle>0 and angle<=180):
  312. sign="+"
  313. if(angle<=0 and angle > -180):
  314. sign="-"
  315. else:
  316. angle = 360.0
  317. sign = ''
  318. return [angle, sign]
  319. def get_phi_psi(self, residues):
  320. """Compute Phi and Psi angles of protein
  321. backbone around the peptidic bound at each
  322. residue.
  323. Parameters
  324. ----------
  325. self : Residue object
  326. Reference to Residue instance.
  327. residues : list[Residue, ...]
  328. List of all the Residues in the protein.
  329. Returns
  330. -------
  331. tuple(float, float)
  332. Phi and Psi angles.
  333. """
  334. i = residues.index(self)
  335. if(i==0):
  336. phi = 360.0
  337. else:
  338. phi = calc_dihedral(residues[i-1].atoms["C"].coords,
  339. residues[i].atoms["N"].coords,
  340. residues[i].atoms["CA"].coords,
  341. residues[i].atoms["C"].coords)
  342. if(i==len(residues)-1):
  343. psi = 360.0
  344. else:
  345. psi = calc_dihedral(residues[i].atoms["N"].coords,
  346. residues[i].atoms["CA"].coords,
  347. residues[i].atoms["C"].coords,
  348. residues[i+1].atoms["N"].coords)
  349. return((phi, psi))
  350. def get_bridges(residues):
  351. """Construct all bridges between residues in the
  352. protein.
  353. Parameters
  354. ----------
  355. residues : list[Residue, ...]
  356. List of all the Residues in the protein.
  357. Constructs a list of Bridges objects, characterized by
  358. their involved residues positions, sequential positions,
  359. type (parallel or antiparallel).
  360. Returns
  361. -------
  362. list(Bridge)
  363. Returns the list of bridges.
  364. """
  365. bridges = {}
  366. bridge = {}
  367. strongest_bridge = {}
  368. for i in range(1,len(residues)-4):
  369. E_min = 0
  370. for j in range(i+2,len(residues)-1):
  371. # select triplet with the minimal energy
  372. if(residues[i-1].h_bond(residues[j])<-0.5
  373. and residues[j].h_bond(residues[i+1])<-0.5):
  374. bridge = {'res1':residues[i-1].h_bond(residues[j]),
  375. 'res2':residues[j].h_bond(residues[i+1]),
  376. 'ipos':residues[i].resid,
  377. 'jpos':residues[j].resid,
  378. 'i':residues[i].indice,
  379. 'j':residues[j].indice,
  380. 'btype':"para"}
  381. if(residues[j-1].h_bond(residues[i])<-0.5
  382. and residues[i].h_bond(residues[j+1])<-0.5):
  383. bridge = {'res1':residues[j-1].h_bond(residues[i]),
  384. 'res2':residues[i].h_bond(residues[j+1]),
  385. 'ipos':residues[i].resid,
  386. 'jpos':residues[j].resid,
  387. 'i':residues[i].indice,
  388. 'j':residues[j].indice,
  389. 'btype':"para"}
  390. if(residues[i].h_bond(residues[j])<-0.5
  391. and residues[j].h_bond(residues[i])<-0.5):
  392. bridge = {'res1':residues[i].h_bond(residues[j]),
  393. 'res2':residues[j].h_bond(residues[i]),
  394. 'ipos':residues[i].resid,
  395. 'jpos':residues[j].resid,
  396. 'i':residues[i].indice,
  397. 'j':residues[j].indice,
  398. 'btype':"anti"}
  399. if(residues[i-1].h_bond(residues[j+1])<-0.5
  400. and residues[j-1].h_bond(residues[i+1])<-0.5):
  401. bridge = {'res1':residues[i-1].h_bond(residues[j+1]),
  402. 'res2':residues[j-1].h_bond(residues[i+1]),
  403. 'ipos':residues[i].resid,
  404. 'jpos':residues[j].resid,
  405. 'i':residues[i].indice,
  406. 'j':residues[j].indice,
  407. 'btype':"anti"}
  408. if(bridge):
  409. if(bridge['res1']+bridge['res2']<E_min):
  410. E_min = bridge['res1']+bridge['res2']
  411. strongest_bridge = bridge
  412. coord_bridge = [i,j]
  413. bridge = {}
  414. # finally add the strongest bridge at i and j pos
  415. if(strongest_bridge):
  416. bridges[strongest_bridge['i']] = (Bridge(strongest_bridge['btype'],
  417. strongest_bridge['ipos'],
  418. strongest_bridge['jpos'],
  419. [strongest_bridge['i'],
  420. strongest_bridge['j']]))
  421. bridges[strongest_bridge['j']] = (Bridge(strongest_bridge['btype'],
  422. strongest_bridge['ipos'],
  423. strongest_bridge['jpos'],
  424. [strongest_bridge['i'],
  425. strongest_bridge['j']]))
  426. if(len(bridges)>0):
  427. return(bridges)
  428. else:
  429. return(False)
  430. def get_ladders(bridges, residues):
  431. """Construct all ladders in the protein.
  432. Parameters
  433. ----------
  434. residues : list[Residue, ...]
  435. List of all the Residues in the protein.
  436. Constructs a dictionnary of ladders, characterized by
  437. their involved residues positions, sequential positions,
  438. involved bridges. Ladders are made of consecutive bridges
  439. of the same type.
  440. Returns
  441. -------
  442. dict
  443. Returns the dictionnary of ladders.
  444. """
  445. ladders = {}
  446. i = 1
  447. while i < len(residues):
  448. k = 1
  449. if i in bridges.keys():
  450. temp_bridges = [bridges[i]]
  451. while ((i+k in bridges.keys()) and
  452. (bridges[i].bridge_type == bridges[i+k].bridge_type)):
  453. temp_bridges.append(bridges[i+k])
  454. k+=1
  455. if k>1:
  456. ladders[i] = {'start':bridges[i].res_num,
  457. 'end':bridges[i+k-1].res_num,
  458. 'bridges':temp_bridges,
  459. 'i':i,
  460. 'j':i+k-1}
  461. i+=k-1
  462. else:
  463. i+=1
  464. return ladders
  465. def connected_ladders(ladd_1, ladd_2):
  466. """Check if two ladders, ladd_1 and ladd_2,
  467. are linked by one shared bridge.
  468. """
  469. links = []
  470. for bridge in ladd_1['bridges']:
  471. if bridge.res_partner in res_list(ladd_2):
  472. return ladd_2
  473. return False
  474. def get_sheets(ladders):
  475. """
  476. Bridges between ladders.
  477. Check if 1 bridge between one ladder and one or more other ladders.
  478. Iterate over all residues of one ladder and check if bridge with other residues
  479. of the other ladders.
  480. """
  481. ladds = [ elem for elem in ladders.values() ]
  482. sheets = {}
  483. corresp = {}
  484. for ladd1 in ladds:
  485. for ladd2 in ladds:
  486. if connected_ladders(ladd1, ladd2)!=False:
  487. corresp_list = [ elem for elem in corresp.keys() ]
  488. if ladd1['i'] not in corresp_list and ladd2['i'] not in corresp_list:
  489. ind = len(sheets.keys())
  490. sheets[ind] = []
  491. sheets[ind].append(ladd1)
  492. sheets[ind].append(ladd2)
  493. corresp[ladd1['i']] = ind
  494. corresp[ladd2['i']] = ind
  495. elif ladd2 not in corresp_list and ladd1 in corresp_list:
  496. sheets[corresp[ladd1['i']]].append(ladd2)
  497. corresp[ladd2['i']] = corresp[ladd1['i']]
  498. elif ladd1 not in corresp_list and ladd2 in corresp_list:
  499. sheets[corresp[ladd2['i']]].append(ladd1)
  500. corresp[ladd1['i']] = corresp[ladd2['i']]
  501. return sheets
  502. def res_list(ladder):
  503. """Iterate over ladders, from start to end,
  504. to get the list of residues inside it.
  505. """
  506. l=[]
  507. for i in range(ladder['i'], ladder['j']):
  508. l.append(i)
  509. return(l)
  510. def build_turns_patterns(residues):
  511. turns_3 = {}
  512. turns_4 = {}
  513. turns_5 = {}
  514. for i,res in enumerate(residues):
  515. turn = residues[i].get_turns(residues)
  516. if(turn):
  517. for k in range(turn.turn_type):
  518. if turn.turn_type == 3:
  519. turns_3[i+1+k] = turn.turn_type
  520. if turn.turn_type == 4:
  521. turns_4[i+1+k] = turn.turn_type
  522. if turn.turn_type == 5:
  523. turns_5[i+1+k] = turn.turn_type
  524. return[turns_3, turns_4, turns_5]
  525. def build_helix_patterns(residues):
  526. """Builds series of letters (G, H or I) corresponding
  527. to helix types (3, 4 or 5). Used for the final output.
  528. """
  529. helix_3 = {}
  530. helix_4 = {}
  531. helix_5 = {}
  532. for i,res in enumerate(residues):
  533. helix = residues[i].get_helix(residues)
  534. if(helix):
  535. helix_type = residues[i].get_helix(residues)[0]
  536. helix_pos = residues[i].get_helix(residues)[1]
  537. for k in range(helix_type):
  538. if helix_type == 3:
  539. helix_3[i+1+k] = "G"
  540. if helix_type == 4:
  541. helix_4[i+1+k] = "H"
  542. if helix_type == 5:
  543. helix_5[i+1+k] = "I"
  544. return[helix_3, helix_4, helix_5]
  545. def print_helix_pattern(residues, res, helix):
  546. """Used to print the Helix type at a specific
  547. residue position if there is one. Used for the final
  548. output.
  549. """
  550. i = residues.index(res)+1
  551. if i in helix.keys():
  552. return (helix[i])
  553. else:
  554. return(' ')
  555. def print_turn_pattern(residues, res, turns):
  556. """Builds series of symbols (>, < or n) corresponding
  557. to turn types (n=3, 4 or 5). Used for the final output.
  558. '>' represent start of the turn series.
  559. '<' represent end of the turn.
  560. """
  561. i = residues.index(res)+1
  562. if i in turns.keys() and not i-1 in turns.keys():
  563. return(">")
  564. elif i in turns.keys() and i-1 in turns.keys():
  565. return(turns[i])
  566. elif i not in turns.keys() and i-1 in turns.keys():
  567. return("<")
  568. else:
  569. return(' ')
  570. def raw_ladders_print(ladders):
  571. """Function to print roughly the connected ladders by bridges in
  572. the final output.
  573. This has been use to illustrate sheets principles.
  574. Only shown if the -v or --verbose flag is used when running
  575. dssp.py.
  576. """
  577. print("### CONNECTED LADDERS ###")
  578. for ladd1 in ladders:
  579. ladd_1 = ladders[ladd1]
  580. for ladd2 in ladders:
  581. ladd_2 = ladders[ladd2]
  582. for bridge in ladd_1['bridges']:
  583. if bridge.j in res_list(ladd_2):
  584. print("ladder", ladd_1['i'],"-", ladd_1['j'], "|",
  585. bridge.i, "...", bridge.j, "| ladder",
  586. ladd_2['i'], ladd_2['j'])