Réimplémentation du programme DSSP en Python

chem.py~ 19KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499
  1. import math
  2. #from structure import *
  3. from geom import *
  4. class Atom:
  5. def dist_atoms(self, atom2):
  6. return(math.sqrt((self.coord_x-atom2.coord_x)**2 +
  7. (self.coord_y-atom2.coord_y)**2 +
  8. (self.coord_z-atom2.coord_z)**2))
  9. def __init__(self, atom_id, atom_name, res_name, chain_id,
  10. res_seq_nb, insertion_code, coordinates):
  11. self.atom_id = atom_id
  12. self.atom_name = atom_name
  13. self.res_name = res_name
  14. self.chain_id = chain_id
  15. self.res_seq_nb = res_seq_nb
  16. self.insertion_code = insertion_code
  17. self.coord_x = coordinates[0]
  18. self.coord_y = coordinates[1]
  19. self.coord_z = coordinates[2]
  20. self.coords = coordinates
  21. class Structure:
  22. def __init__(self, res):
  23. self.res = res
  24. class Turn(Structure):
  25. def __init__(self, turn_type, res):
  26. self.turn_type = turn_type
  27. Structure.res = res
  28. class Bridge(Structure):
  29. def __init__(self, bridge_type, res_num, res_partner, indices):
  30. self.bridge_type = bridge_type
  31. self.res_num = res_num
  32. self.res_partner = res_partner
  33. Structure.res = res_num
  34. self.i = indices[0]
  35. self.j = indices[1]
  36. class Helix(Structure):
  37. def __init__(self, residues, res_num, helix_type):
  38. self.residues = residues
  39. self.res_num = res_num
  40. Structure.res = res_num
  41. self.helix_type = helix_type
  42. class Residue:
  43. def __init__(self, atoms_list, indice):
  44. self.atoms = {}
  45. for atom in atoms_list:
  46. self.atoms[atom.atom_name] = atom
  47. self.resid = atom.res_seq_nb
  48. self.res_name = atom.res_name
  49. self.res_letter = self.get_amino_letter(atom.res_name)
  50. self.chain_id = atom.chain_id
  51. self.insertion_code = atom.insertion_code
  52. self.indice = indice
  53. def get_amino_letter(self, res_name):
  54. code3 = ['ALA', 'ARG', 'ASN', 'ASP', 'CYS', 'GLU', 'GLN', 'GLY',
  55. 'HIS', 'ILE', 'LEU', 'LYS', 'MET', 'PHE', 'PRO', 'SER',
  56. 'THR', 'TRP', 'TYR', 'VAL']
  57. code1 = ['A','R','N','D','C','E','Q','G','H','I','L','K','M','F','P',
  58. 'S','T','W','Y','V']
  59. return code1[code3.index(res_name)]
  60. def h_bond(self, res2):
  61. if("H" not in res2.atoms.keys()):
  62. return(False)
  63. # dimensionnal factor, in kcal/mole
  64. f = 332
  65. # partial charges
  66. q1 = 0.42
  67. q2 = 0.20
  68. # distance between O-N atoms, in angströms
  69. r_ON = self.atoms["O"].dist_atoms(res2.atoms["N"])
  70. # distance between C-H atoms, in angströms
  71. r_CH = self.atoms["C"].dist_atoms(res2.atoms["H"])
  72. # distance between O-H atoms, in angströms
  73. r_OH = self.atoms["O"].dist_atoms(res2.atoms["H"])
  74. # distance between C-N atoms, in angströms
  75. r_CN = self.atoms["C"].dist_atoms(res2.atoms["N"])
  76. # electrostatic interaction energy, in kcal/mole
  77. E = q1*q2*(1/r_ON + 1/r_CH - 1/r_OH - 1/r_CN)*f
  78. return(E)
  79. def get_turns(self, residues):
  80. """
  81. Get all the turns from a specific residue.
  82. """
  83. turns = {}
  84. i = residues.index(self)
  85. k = 0
  86. for j in range(3,6):
  87. if(i+j<len(residues)):
  88. if(self.h_bond(residues[i+j])<-0.5):
  89. k = j
  90. if k != 0:
  91. #print(k,"TURN", residues[i].resid, residues[i+k].resid)
  92. return Turn(k,residues[i].resid)
  93. return False
  94. def get_bends(self, residues):
  95. i = residues.index(self)
  96. if i >=2 and i <len(residues)-2:
  97. angle = math.degrees(vector_angles(vectors_substr(position_vector(residues[i].atoms["CA"].coords),
  98. position_vector(residues[i-2].atoms["CA"].coords)),
  99. vectors_substr(position_vector(residues[i+2].atoms["CA"].coords),
  100. position_vector(residues[i].atoms["CA"].coords))))
  101. if(angle>70):
  102. return [angle, 'S']
  103. return [angle, '']
  104. else:
  105. return [360.0, '']
  106. def get_bridges(self, residues):
  107. bridges = {}
  108. bridge = {}
  109. strongest_bridge = {}
  110. i = residues.index(self)
  111. if(i >= 1 and i < len(residues)-4):
  112. E_min = 0
  113. for j in range(i+2,len(residues)-1):
  114. # select triplet with the minimal energy
  115. if(residues[i-1].h_bond(residues[j])<-0.5
  116. and residues[j].h_bond(residues[i+1])<-0.5):
  117. bridge = {'res1':residues[i-1].h_bond(residues[j]),
  118. 'res2':residues[j].h_bond(residues[i+1]),
  119. 'ipos':residues[i].resid,
  120. 'jpos':residues[j].resid,
  121. 'btype':"para"}
  122. if(residues[j-1].h_bond(residues[i])<-0.5
  123. and residues[i].h_bond(residues[j+1])<-0.5):
  124. bridge = {'res1':residues[j-1].h_bond(residues[i]),
  125. 'res2':residues[i].h_bond(residues[j+1]),
  126. 'ipos':residues[i].resid,
  127. 'jpos':residues[j].resid,
  128. 'btype':"para"}
  129. if(residues[i].h_bond(residues[j])<-0.5
  130. and residues[j].h_bond(residues[i])<-0.5):
  131. bridge = {'res1':residues[i].h_bond(residues[j]),
  132. 'res2':residues[j].h_bond(residues[i]),
  133. 'ipos':residues[i].resid,
  134. 'jpos':residues[j].resid,
  135. 'btype':"anti"}
  136. if(residues[i-1].h_bond(residues[j+1])<-0.5
  137. and residues[j-1].h_bond(residues[i+1])<-0.5):
  138. bridge = {'res1':residues[i-1].h_bond(residues[j+1]),
  139. 'res2':residues[j-1].h_bond(residues[i+1]),
  140. 'ipos':residues[i].resid,
  141. 'jpos':residues[j].resid,
  142. 'btype':"anti"}
  143. if(bridge):
  144. if(bridge['res1']+bridge['res2']<E_min):
  145. E_min = bridge['res1']+bridge['res2']
  146. strongest_bridge = bridge
  147. bridge = {}
  148. coord_bridge = [i,j]
  149. # finally add the strongest bridge at i and j pos
  150. if(strongest_bridge):
  151. bridges[coord_bridge[0]] = (Bridge(strongest_bridge['btype'],
  152. strongest_bridge['ipos'],
  153. strongest_bridge['jpos']))
  154. bridges[coord_bridge[1]] = (Bridge(strongest_bridge['btype'],
  155. strongest_bridge['jpos'],
  156. strongest_bridge['ipos']))
  157. if(len(bridges)>0):
  158. return(bridges[coord_bridge[0]])
  159. else:
  160. return(False)
  161. def get_helix(self, residues):
  162. """
  163. Return if there is an helix at a given residue,
  164. as well as its type.
  165. """
  166. i = residues.index(self)
  167. # if there are no turns or it is the first residue, skip
  168. if i == 0:
  169. return False
  170. if(self.get_turns(residues) and residues[i-1].get_turns(residues)):
  171. #print(self.get_turns(residues).turn_type,"- HELIX at", residues[i].indice)
  172. return(self.get_turns(residues).turn_type, residues[i].indice)
  173. return(False)
  174. def get_ladder(self, residues):
  175. #ladders = {}
  176. i = residues.index(self)
  177. if i != 0:
  178. if self.get_bridges(residues):
  179. if (residues[i-1].get_bridges(residues)):
  180. local_bridge = self.get_bridges(residues)
  181. consec_bridge = residues[i-1].get_bridges(residues)
  182. if local_bridge.bridge_type == consec_bridge.bridge_type:
  183. #print("ladder", consec_bridge.res_num, local_bridge.res_num)
  184. ladder = {'start':consec_bridge.res_num,
  185. 'end':local_bridge.res_num,
  186. 'bridges':[consec_bridge, local_bridge]}
  187. return ladder
  188. return False
  189. def get_tco(self, residues):
  190. i = residues.index(self)
  191. if(i!=0):
  192. res2 = residues[i-1]
  193. CO_res1 = vector_from_pos(self.atoms["C"].coords,
  194. self.atoms["O"].coords)
  195. CO_res2 = vector_from_pos(res2.atoms["C"].coords,
  196. res2.atoms["O"].coords)
  197. angle = vector_angles(CO_res1, CO_res2)
  198. else:
  199. angle = math.pi/2
  200. return(math.cos(angle))
  201. def get_chirality(self, residues):
  202. i = residues.index(self)
  203. if (i >=1 and i < len(residues)-2):
  204. chirality = {}
  205. angle = calc_dihedral(residues[i-1].atoms["CA"].coords,
  206. residues[i].atoms["CA"].coords,
  207. residues[i+1].atoms["CA"].coords,
  208. residues[i+2].atoms["CA"].coords)
  209. if(angle>0 and angle<=180):
  210. sign="+"
  211. if(angle<=0 and angle > -180):
  212. sign="-"
  213. else:
  214. angle = 360.0
  215. sign = ''
  216. return [angle, sign]
  217. def get_phi_psi(self, residues):
  218. i = residues.index(self)
  219. if(i==0):
  220. phi = 360.0
  221. else:
  222. phi = calc_dihedral(residues[i-1].atoms["C"].coords,
  223. residues[i].atoms["N"].coords,
  224. residues[i].atoms["CA"].coords,
  225. residues[i].atoms["C"].coords)
  226. if(i==len(residues)-1):
  227. psi = 360.0
  228. else:
  229. psi = calc_dihedral(residues[i].atoms["N"].coords,
  230. residues[i].atoms["CA"].coords,
  231. residues[i].atoms["C"].coords,
  232. residues[i+1].atoms["N"].coords)
  233. return((phi, psi))
  234. def get_bridges(residues):
  235. bridges = {}
  236. bridge = {}
  237. strongest_bridge = {}
  238. for i in range(1,len(residues)-4):
  239. E_min = 0
  240. for j in range(i+2,len(residues)-1):
  241. # select triplet with the minimal energy
  242. if(residues[i-1].h_bond(residues[j])<-0.5
  243. and residues[j].h_bond(residues[i+1])<-0.5):
  244. bridge = {'res1':residues[i-1].h_bond(residues[j]),
  245. 'res2':residues[j].h_bond(residues[i+1]),
  246. 'ipos':residues[i].resid,
  247. 'jpos':residues[j].resid,
  248. 'i':residues[i].indice,
  249. 'j':residues[j].indice,
  250. 'btype':"para"}
  251. if(residues[j-1].h_bond(residues[i])<-0.5
  252. and residues[i].h_bond(residues[j+1])<-0.5):
  253. bridge = {'res1':residues[j-1].h_bond(residues[i]),
  254. 'res2':residues[i].h_bond(residues[j+1]),
  255. 'ipos':residues[i].resid,
  256. 'jpos':residues[j].resid,
  257. 'i':residues[i].indice,
  258. 'j':residues[j].indice,
  259. 'btype':"para"}
  260. if(residues[i].h_bond(residues[j])<-0.5
  261. and residues[j].h_bond(residues[i])<-0.5):
  262. bridge = {'res1':residues[i].h_bond(residues[j]),
  263. 'res2':residues[j].h_bond(residues[i]),
  264. 'ipos':residues[i].resid,
  265. 'jpos':residues[j].resid,
  266. 'i':residues[i].indice,
  267. 'j':residues[j].indice,
  268. 'btype':"anti"}
  269. if(residues[i-1].h_bond(residues[j+1])<-0.5
  270. and residues[j-1].h_bond(residues[i+1])<-0.5):
  271. bridge = {'res1':residues[i-1].h_bond(residues[j+1]),
  272. 'res2':residues[j-1].h_bond(residues[i+1]),
  273. 'ipos':residues[i].resid,
  274. 'jpos':residues[j].resid,
  275. 'i':residues[i].indice,
  276. 'j':residues[j].indice,
  277. 'btype':"anti"}
  278. if(bridge):
  279. if(bridge['res1']+bridge['res2']<E_min):
  280. E_min = bridge['res1']+bridge['res2']
  281. strongest_bridge = bridge
  282. coord_bridge = [i,j]
  283. bridge = {}
  284. # finally add the strongest bridge at i and j pos
  285. if(strongest_bridge):
  286. bridges[strongest_bridge['i']] = (Bridge(strongest_bridge['btype'],
  287. strongest_bridge['ipos'],
  288. strongest_bridge['jpos'],
  289. [strongest_bridge['i'],
  290. strongest_bridge['j']]))
  291. bridges[strongest_bridge['j']] = (Bridge(strongest_bridge['btype'],
  292. strongest_bridge['ipos'],
  293. strongest_bridge['jpos'],
  294. [strongest_bridge['i'],
  295. strongest_bridge['j']]))
  296. if(len(bridges)>0):
  297. return(bridges)
  298. else:
  299. return(False)
  300. def get_ladders(bridges, residues):
  301. ladders = {}
  302. i = 1
  303. while i < len(residues):
  304. k = 1
  305. if i in bridges.keys():
  306. temp_bridges = [bridges[i]]
  307. while ((i+k in bridges.keys()) and
  308. (bridges[i].bridge_type == bridges[i+k].bridge_type)):
  309. temp_bridges.append(bridges[i+k])
  310. k+=1
  311. if k>1:
  312. #print("ladder", bridges[i].res_num, bridges[i+k-1].res_num)
  313. ladders[i] = {'start':bridges[i].res_num,
  314. 'end':bridges[i+k-1].res_num,
  315. 'bridges':temp_bridges,
  316. 'i':i,
  317. 'j':i+k-1}
  318. i+=k-1
  319. else:
  320. i+=1
  321. return ladders
  322. def connected_ladders(ladd_1, ladd_2):
  323. links = []
  324. for bridge in ladd_1['bridges']:
  325. if bridge.res_partner in res_list(ladd_2):
  326. return ladd_2
  327. return False
  328. def connected_ladders2(ladd_1, ladd_2):
  329. links = []
  330. for bridge in ladd_1['bridges']:
  331. if bridge.res_partner in res_list(ladd_2):
  332. return([ladd_1['i'], ladd_1['j'], bridge.i, bridge.j,
  333. ladd_2['i'], ladd_2['j']])
  334. #return ladd_2
  335. return False
  336. def get_sheets(ladders):
  337. """
  338. Bridges between ladders.
  339. Check if 1 bridge between one ladder and one or more other ladders.
  340. Iterate over all residues of one ladder and check if bridge with other residues
  341. of the other ladders.
  342. """
  343. ladds = [ elem for elem in ladders.values() ]
  344. sheets = {}
  345. corresp = {}
  346. for ladd1 in ladds:
  347. for ladd2 in ladds:
  348. if connected_ladders(ladd1, ladd2)!=False:
  349. corresp_list = [ elem for elem in corresp.keys() ]
  350. if ladd1['i'] not in corresp_list and ladd2['i'] not in corresp_list:
  351. ind = len(sheets.keys())
  352. sheets[ind] = []
  353. sheets[ind].append(ladd1)
  354. sheets[ind].append(ladd2)
  355. corresp[ladd1['i']] = ind
  356. corresp[ladd2['i']] = ind
  357. elif ladd2 not in corresp_list and ladd1 in corresp_list:
  358. sheets[corresp[ladd1['i']]].append(ladd2)
  359. corresp[ladd2['i']] = corresp[ladd1['i']]
  360. elif ladd1 not in corresp_list and ladd2 in corresp_list:
  361. sheets[corresp[ladd2['i']]].append(ladd1)
  362. corresp[ladd1['i']] = corresp[ladd2['i']]
  363. return sheets
  364. def get_sheets2(ladders):
  365. """
  366. Bridges between ladders.
  367. Check if 1 bridge between one ladder and one or more other ladders.
  368. Iterate over all residues of one ladder and check if bridge with other residues
  369. of the other ladders.
  370. """
  371. sheets = {}
  372. for ladder in ladders:
  373. for ladd2 in ladders:
  374. if connected_ladders(ladders[ladder], ladders[ladd2]):
  375. bridge_i = connected_ladders2(ladders[ladder], ladders[ladd2])[2]
  376. bridge_j = connected_ladders2(ladders[ladder], ladders[ladd2])[3]
  377. print("ladder",ladders[ladder]['i'], ladders[ladder]['j'],"bridge",bridge_i, bridge_j,
  378. "ladder 2",ladders[ladd2]['i'], ladders[ladd2]['j'])
  379. def res_list(ladder):
  380. # TODO : method in ladder class
  381. l=[]
  382. for i in range(ladder['i'], ladder['j']):
  383. l.append(i)
  384. return(l)
  385. def build_turns_patterns(residues):
  386. turns_3 = {}
  387. turns_4 = {}
  388. turns_5 = {}
  389. for i,res in enumerate(residues):
  390. turn = residues[i].get_turns(residues)
  391. if(turn):
  392. for k in range(turn.turn_type):
  393. if turn.turn_type == 3:
  394. turns_3[i+1+k] = turn.turn_type
  395. if turn.turn_type == 4:
  396. turns_4[i+1+k] = turn.turn_type
  397. if turn.turn_type == 5:
  398. turns_5[i+1+k] = turn.turn_type
  399. return[turns_3, turns_4, turns_5]
  400. def build_helix_patterns(residues):
  401. helix_3 = {}
  402. helix_4 = {}
  403. helix_5 = {}
  404. for i,res in enumerate(residues):
  405. helix = residues[i].get_helix(residues)
  406. if(helix):
  407. helix_type = residues[i].get_helix(residues)[0]
  408. helix_pos = residues[i].get_helix(residues)[1]
  409. #print("TYPE", helix_type)
  410. for k in range(helix_type):
  411. if helix_type == 3:
  412. helix_3[i+1+k] = "G"
  413. if helix_type == 4:
  414. helix_4[i+1+k] = "H"
  415. if helix_type == 5:
  416. helix_5[i+1+k] = "I"
  417. #print(helix_3)
  418. return[helix_3, helix_4, helix_5]
  419. def print_helix_pattern(residues, res, helix):
  420. i = residues.index(res)+1
  421. if i in helix.keys():
  422. return (helix[i])
  423. else:
  424. return(' ')
  425. def print_turn_pattern(residues, res, turns):
  426. i = residues.index(res)+1
  427. if i in turns.keys() and not i-1 in turns.keys():
  428. return(">")
  429. elif i in turns.keys() and i-1 in turns.keys():
  430. return(turns[i])
  431. elif i not in turns.keys() and i-1 in turns.keys():
  432. return("<")
  433. else:
  434. return(' ')