Browse Source

Finish docstrings, update readme

Thomas Forest 4 years ago
parent
commit
8adcb869c1
8 changed files with 348 additions and 45 deletions
  1. 49 0
      README.md
  2. BIN
      doc/Notice.pdf
  3. BIN
      doc/Sujets_Projets_Courts_2019_2020.pdf
  4. 0 37
      doc/notes.org
  5. BIN
      doc/pdb_format_33_rot90.png
  6. 270 3
      src/chem.py
  7. 5 0
      src/geom.py
  8. 24 5
      src/pdb.py

+ 49 - 0
README.md View File

@@ -24,11 +24,60 @@ Load your environnement
24 24
 conda activate dssp
25 25
 ```
26 26
 
27
+### Running
28
+
27 29
 You should be ready to run the program, by calling, for example
28 30
 ```
29 31
 python3 src/dssp.py data/1est.pdb
30 32
 ```
31 33
 
34
+And if you want the extended output, use the -v flag, or --verbose
35
+```
36
+python3 src/dssp.py data/1est.pdb -v
37
+```
38
+
39
+To get usage help, run
40
+```
41
+python3 src/dssp.py data/1est.pdb -h
42
+```
43
+
44
+## Output
45
+
46
+To understand the output, here is a short description of the DSSP output, as followed from the official website : https://swift.cmbi.umcn.nl/gv/dssp/
47
+
48
+*   RESIDUE
49
+
50
+two columns of residue numbers. First column is DSSP's sequential residue number, starting at the first residue actually in the data set and including chain breaks; this number is used to refer to residues throughout. Second column gives crystallographers' 'residue sequence number','insertion code' and 'chain identifier' (see protein data bank file record format manual), given for reference only. 
51
+
52
+*   AA
53
+
54
+one letter amino acid code
55
+
56
+
57
+*   BP1 
58
+
59
+residue number of first bridge partner
60
+
61
+*   TCO
62
+
63
+cosine of angle between C=O of residue I and C=O of residue I-1. For α-helices, TCO is near +1, for β-sheets TCO is near -1. Not used for structure definition.
64
+
65
+*   KAPPA
66
+
67
+virtual bond angle (bend angle) defined by the three Cα atoms of residues I-2,I,I+2. Used to define bend (structure code 'S').
68
+
69
+*   ALPHA
70
+
71
+virtual torsion angle (dihedral angle) defined by the four Cα atoms of residues I-1,I,I+1,I+2.Used to define chirality (structure code '+' or '-').
72
+
73
+*   PHI PSI
74
+
75
+IUPAC peptide backbone torsion angles
76
+
77
+*   X-CA Y-CA Z-CA
78
+
79
+echo of Cα atom coordinates 
80
+
32 81
 ## Author
33 82
 
34 83
 * **FOREST Thomas** - *M2BI* - [Univ-Paris-Diderot.fr](https://www.univ-paris-diderot.fr/) - thomas.forest@etu.univ-paris-diderot.fr

BIN
doc/Notice.pdf View File


BIN
doc/Sujets_Projets_Courts_2019_2020.pdf View File


+ 0 - 37
doc/notes.org View File

@@ -1,37 +0,0 @@
1
-Dépôt output DSSP:
2
-http://ftp.cbi.pku.edu.cn/pub/database/DSSP/latest_release/
3
-
4
-
5
-structure basée sur liaisons H
6
-
7
-- n-turns : liaison H entre CO et NH du res i+n, ou n = [3,4,5]
8
-- bridges : LH avec res non proches
9
-
10
-* hélices : 4-turns répétés
11
-* brins beta : repetition bridges
12
-
13
-common imperfections : helical kinks + beta-bulges
14
-
15
-propriétés déduites geométriquement : bends, chirality, SS bonds, solvent exposure
16
-
17
-
18
-Hbond(i,j) = [E<-0.5kcal/mole]
19
-
20
-electrostatic interaction energy between twho H-bonding groups by placing partial charges on the C,O(+q1,-q1) and N,H(-q2,+q2) atoms 
21
-
22
-E = q1q2(1/r(ON) + 1/r(CH) - 1/r(OH) - 1/r(CN))*f
23
-avec
24
-q1 = 0.42e
25
-q2 = 0.20e
26
-e la charge élémentaire d'un electro (e = 1,602 176 634 ×10−19 C)
27
-r en angström
28
-f (facteur de dimmensionnalité) = 332
29
-E en kcal/mol
30
-
31
-Bonne LH si E <= -3 kcal/mol
32
-
33
-Cutoff choisi permet une distance N-O jusqu'à 2.2 Angström
34
-N-H bound distance : 1.030 A
35
-https://pubs.acs.org/doi/pdf/10.1021/ja00313a047
36
-
37
-Angles : https://www.sciencedirect.com/topics/biochemistry-genetics-and-molecular-biology/peptide-bond

BIN
doc/pdb_format_33_rot90.png View File


+ 270 - 3
src/chem.py View File

@@ -2,8 +2,30 @@ import math
2 2
 from geom import *
3 3
 
4 4
 class Atom:
5
+    """Defines Atom objects and their associated methods.
5 6
 
7
+    This function is used to instenciate Atom objects with
8
+    convenient fields, and a method used to compute
9
+    distance between atoms.
10
+    """
6 11
     def dist_atoms(self, atom2):
12
+        """Compute distance between two atoms.
13
+
14
+        Parameters
15
+        ----------
16
+        self : Atom object
17
+            First atom involved to compute the distance.
18
+        atom2 : Atom object
19
+            Second atom involved to compute the distance.
20
+
21
+        This function is used to get the distance between
22
+        two atoms in 3D space.
23
+
24
+        Returns
25
+        -------
26
+        float
27
+            Distance, in angströms.
28
+        """
7 29
         return(math.sqrt((self.coord_x-atom2.coord_x)**2 +
8 30
                          (self.coord_y-atom2.coord_y)**2 +
9 31
                          (self.coord_z-atom2.coord_z)**2))
@@ -11,6 +33,25 @@ class Atom:
11 33
 
12 34
     def __init__(self, atom_id, atom_name, res_name, chain_id,
13 35
                  res_seq_nb, insertion_code, coordinates):
36
+        """Constructor of Atom class object.
37
+
38
+        Parameters
39
+        ----------
40
+        self : Atom object
41
+            The new atom being created.
42
+        atom_id : int
43
+            Sequential PDB atom number.
44
+        res_name : str
45
+            3-letter code of residue.
46
+        chain_id : str
47
+            Unique chain ID.
48
+        res_seq_nb : int
49
+            Actual amino acid sequence position.
50
+        insertion_code : str
51
+            Insertion letter when added in case of ambiguity.
52
+        coordinates : list[float, ...]
53
+            X,Y,Z coordinates of atom.
54
+        """
14 55
         self.atom_id = atom_id
15 56
         self.atom_name = atom_name
16 57
         self.res_name = res_name
@@ -53,6 +94,17 @@ class Helix(Structure):
53 94
         
54 95
 class Residue:
55 96
     def __init__(self, atoms_list, indice):
97
+        """Constructor of Residue class object.
98
+
99
+        Parameters
100
+        ----------
101
+        self : Residue object
102
+            The new residue being created.
103
+        atoms_list : list[Atom, ...]
104
+            List of Atom objects contained in this residue.
105
+        indice : int
106
+            Sequential Residue number.
107
+        """
56 108
         self.atoms = {}
57 109
         for atom in atoms_list:
58 110
             self.atoms[atom.atom_name] = atom
@@ -64,6 +116,24 @@ class Residue:
64 116
             self.indice = indice
65 117
             
66 118
     def get_amino_letter(self, res_name):
119
+        """Get 1-letter code of amino acid 3-letter code.
120
+
121
+        Parameters
122
+        ----------
123
+        self : Residue object
124
+            Reference to Residue instance.
125
+        res_name : str
126
+            3-letter code of the residue.
127
+
128
+        This function is used to convert the 3-letter code
129
+        format of the residue name to its 1-letter code
130
+        correspondance.
131
+
132
+        Returns
133
+        -------
134
+        str
135
+            3-letter amino acid code.
136
+        """
67 137
         code3 = ['ALA', 'ARG', 'ASN', 'ASP', 'CYS', 'GLU', 'GLN', 'GLY',
68 138
                  'HIS', 'ILE', 'LEU', 'LYS', 'MET', 'PHE', 'PRO', 'SER',
69 139
                  'THR', 'TRP', 'TYR', 'VAL']
@@ -75,6 +145,26 @@ class Residue:
75 145
         return code1[code3.index(res_name)]
76 146
 
77 147
     def h_bond(self, res2):
148
+        """Compute the H-Bond interaction between
149
+        two residues, in kcal/mole.
150
+
151
+        Parameters
152
+        ----------
153
+        self : Residue object
154
+            Reference to Residue instance.
155
+        res2 : Residue object
156
+            Second residue to evaluate the bond
157
+            energy.
158
+
159
+        This function computes the bond energy between 
160
+        two residues based on N,H,C and O atoms of each
161
+        residue.
162
+
163
+        Returns
164
+        -------
165
+        str
166
+            3-letter amino acid code.
167
+        """
78 168
         if("H" not in res2.atoms.keys()):
79 169
             return(False)
80 170
         # dimensionnal factor, in kcal/mole
@@ -97,6 +187,23 @@ class Residue:
97 187
     def get_turns(self, residues):
98 188
         """
99 189
         Get all the turns from a specific residue.
190
+
191
+        Parameters
192
+        ----------
193
+        self : Residue object
194
+            Reference to Residue instance.
195
+        residues : list[Residue, ...]
196
+            List of all the Residues in the protein.
197
+
198
+        This function determines if there is a n-turn
199
+        at a specific residue, and its type. 3,4,5-turn...
200
+        
201
+        Returns
202
+        -------
203
+        Turn object | Boolean 
204
+            If there is a n-turn, a Turn object is returned, 
205
+            if not, False is returned.
206
+        
100 207
         """
101 208
         turns = {}
102 209
         i = residues.index(self)
@@ -111,6 +218,25 @@ class Residue:
111 218
         return False
112 219
 
113 220
     def get_bends(self, residues):
221
+        """Get the bend from a specific residue.
222
+
223
+        Parameters
224
+        ----------
225
+        self : Residue object
226
+            Reference to Residue instance.
227
+        residues : list[Residue, ...]
228
+            List of all the Residues in the protein.
229
+
230
+        This function determines if there is a bend
231
+        at a specific residue. Used to determine Kappa
232
+        angle.
233
+        
234
+        Returns
235
+        -------
236
+        list[float, str] 
237
+            Kappa angle value, and S to signify bend presence,
238
+            if Kappa>70°.
239
+        """
114 240
         i = residues.index(self)
115 241
         if i >=2 and i <len(residues)-2:
116 242
             angle = math.degrees(vector_angles(
@@ -125,9 +251,24 @@ class Residue:
125 251
             return [360.0, '']
126 252
 
127 253
     def get_helix(self, residues):
128
-        """
129
-        Return if there is an helix at a given residue,
254
+        """Return if there is an helix at a given residue,
130 255
         as well as its type.
256
+
257
+        Parameters
258
+        ----------
259
+        self : Residue object
260
+            Reference to Residue instance.
261
+        residues : list[Residue, ...]
262
+            List of all the Residues in the protein.
263
+
264
+        This function determines if there is an helix
265
+        at a specific residue. 
266
+
267
+        Returns
268
+        -------
269
+        list[str, int]
270
+            The type of helix G(=3),H(=4),I(=5), and the sequential
271
+            position of residue where the helix starts.
131 272
         """
132 273
         i = residues.index(self)
133 274
         # if there are no turns or it is the first residue, skip
@@ -139,6 +280,21 @@ class Residue:
139 280
         return(False)
140 281
 
141 282
     def get_ladder(self, residues):
283
+        """Return if there is a ladder at a given residue.
284
+
285
+        Parameters
286
+        ----------
287
+        self : Residue object
288
+            Reference to Residue instance.
289
+        residues : list[Residue, ...]
290
+            List of all the Residues in the protein.
291
+
292
+        Returns
293
+        -------
294
+        dict | Boolean
295
+            If there is a ladder, return it as a dictionnary, else returns False.
296
+        
297
+        """
142 298
         i = residues.index(self)
143 299
         if i != 0:
144 300
             if self.get_bridges(residues):
@@ -153,6 +309,23 @@ class Residue:
153 309
         return False   
154 310
 
155 311
     def get_tco(self, residues):
312
+        """Cosine of angle between C=O of residue I and C=O of residue I-1. 
313
+
314
+        Parameters
315
+        ----------
316
+        self : Residue object
317
+            Reference to Residue instance.
318
+        residues : list[Residue, ...]
319
+            List of all the Residues in the protein.
320
+
321
+        For α-helices, TCO is near +1, for β-sheets TCO is near -1. 
322
+        Not used for structure definition. 
323
+
324
+        Returns
325
+        -------
326
+        float
327
+            Cosine of angle.
328
+        """
156 329
         i = residues.index(self)
157 330
         if(i!=0):
158 331
             res2 = residues[i-1]
@@ -166,6 +339,24 @@ class Residue:
166 339
         return(math.cos(angle))
167 340
 
168 341
     def get_chirality(self, residues):
342
+        """Return the chirality at a given residue.
343
+
344
+        Parameters
345
+        ----------
346
+        self : Residue object
347
+            Reference to Residue instance.
348
+        residues : list[Residue, ...]
349
+            List of all the Residues in the protein.
350
+
351
+        Virtual torsion angle (dihedral angle) defined by the four 
352
+        Cα atoms of residues I-1,I,I+1,I+2.Used to define chirality 
353
+        (structure code '+' or '-'). 
354
+
355
+        Returns
356
+        -------
357
+        list[float, str]
358
+            Dihedral angle and chirality sign. 
359
+        """
169 360
         i = residues.index(self)
170 361
         if (i >=1 and i < len(residues)-2):
171 362
             chirality = {}
@@ -187,6 +378,22 @@ class Residue:
187 378
         return [angle, sign]
188 379
     
189 380
     def get_phi_psi(self, residues):
381
+        """Compute Phi and Psi angles of protein
382
+        backbone around the peptidic bound at each
383
+        residue.
384
+
385
+        Parameters
386
+        ----------
387
+        self : Residue object
388
+            Reference to Residue instance.
389
+        residues : list[Residue, ...]
390
+            List of all the Residues in the protein.
391
+
392
+        Returns
393
+        -------
394
+        tuple(float, float)
395
+        Phi and Psi angles.
396
+        """
190 397
         i = residues.index(self)  
191 398
         if(i==0):
192 399
             phi = 360.0
@@ -205,6 +412,23 @@ class Residue:
205 412
 
206 413
         return((phi, psi))
207 414
 def get_bridges(residues):
415
+    """Construct all bridges between residues in the 
416
+    protein.
417
+    
418
+    Parameters
419
+    ----------
420
+    residues : list[Residue, ...]
421
+        List of all the Residues in the protein.
422
+    
423
+    Constructs a list of Bridges objects, characterized by
424
+    their involved residues positions, sequential positions,
425
+    type (parallel or antiparallel).
426
+    
427
+    Returns
428
+    -------
429
+    list(Bridge)
430
+    Returns the list of bridges.
431
+    """
208 432
     bridges = {}
209 433
     bridge = {}
210 434
     strongest_bridge = {}
@@ -277,6 +501,23 @@ def get_bridges(residues):
277 501
         return(False)
278 502
 
279 503
 def get_ladders(bridges, residues):
504
+    """Construct all ladders in the protein.
505
+    
506
+    Parameters
507
+    ----------
508
+    residues : list[Residue, ...]
509
+        List of all the Residues in the protein.
510
+    
511
+    Constructs a dictionnary of ladders, characterized by
512
+    their involved residues positions, sequential positions,
513
+    involved bridges. Ladders are made of consecutive bridges
514
+    of the same type.
515
+    
516
+    Returns
517
+    -------
518
+    dict
519
+        Returns the dictionnary of ladders.
520
+    """
280 521
     ladders = {}
281 522
     i = 1
282 523
     while i < len(residues):
@@ -299,6 +540,9 @@ def get_ladders(bridges, residues):
299 540
     return ladders
300 541
 
301 542
 def connected_ladders(ladd_1, ladd_2):
543
+    """Check if two ladders, ladd_1 and ladd_2, 
544
+    are linked by one shared bridge.
545
+    """
302 546
     links = []
303 547
     for bridge in ladd_1['bridges']:
304 548
         if bridge.res_partner in res_list(ladd_2):
@@ -335,6 +579,9 @@ def get_sheets(ladders):
335 579
     return sheets
336 580
    
337 581
 def res_list(ladder):
582
+    """Iterate over ladders, from start to end,
583
+    to get the list of residues inside it.
584
+    """
338 585
     l=[]
339 586
     for i in range(ladder['i'], ladder['j']):
340 587
         l.append(i)
@@ -357,6 +604,9 @@ def build_turns_patterns(residues):
357 604
     return[turns_3, turns_4, turns_5]
358 605
 
359 606
 def build_helix_patterns(residues):
607
+    """Builds series of letters (G, H  or I) corresponding
608
+    to helix types (3, 4 or 5). Used for the final output.
609
+    """
360 610
     helix_3 = {}
361 611
     helix_4 = {}
362 612
     helix_5 = {}
@@ -375,6 +625,10 @@ def build_helix_patterns(residues):
375 625
     return[helix_3, helix_4, helix_5]
376 626
 
377 627
 def print_helix_pattern(residues, res, helix):
628
+    """Used to print the Helix type at a specific
629
+    residue position if there is one. Used for the final
630
+    output.
631
+    """
378 632
     i = residues.index(res)+1
379 633
     if i in helix.keys():
380 634
         return (helix[i])
@@ -382,6 +636,11 @@ def print_helix_pattern(residues, res, helix):
382 636
         return(' ')
383 637
 
384 638
 def print_turn_pattern(residues, res, turns):
639
+    """Builds series of symbols (>, <  or n) corresponding
640
+    to turn types (n=3, 4 or 5). Used for the final output.
641
+    '>' represent start of the turn series.
642
+    '<' represent end of the turn.
643
+    """
385 644
     i = residues.index(res)+1
386 645
     if i in turns.keys() and not i-1 in turns.keys():
387 646
         return(">")
@@ -392,7 +651,15 @@ def print_turn_pattern(residues, res, turns):
392 651
     else:
393 652
         return(' ')
394 653
     
395
-def raw_ladders_print(ladders, ):
654
+def raw_ladders_print(ladders):
655
+    """Function to print roughly the connected ladders by bridges in 
656
+    the final output.
657
+    This has been use to illustrate sheets principles.
658
+    Only shown if the -v or --verbose flag is used when running
659
+    dssp.py.
660
+
661
+    """
662
+    print("### CONNECTED LADDERS ###")
396 663
     for ladd1 in ladders:
397 664
         ladd_1 = ladders[ladd1]
398 665
         for ladd2 in ladders:

+ 5 - 0
src/geom.py View File

@@ -1,5 +1,10 @@
1 1
 import math
2 2
 import numpy as np
3
+
4
+# A couple of obvious functions for vector manipulation.
5
+# Lack of time caused this not to be OOP-implemented, or furthermore
6
+# documented.
7
+
3 8
 def vector_from_pos(a, b):
4 9
     xAB = b[0]-a[0]
5 10
     yAB = b[1]-a[1]

+ 24 - 5
src/pdb.py View File

@@ -112,11 +112,30 @@ class PDBFile:
112 112
             #Need to add hydrogens 
113 113
             self.add_hydrogens(filename)
114 114
 
115
-    def check_hydrogens(self, atoms):
116
-        print("ENTER CHECK HYDROGEN")
117
-        return True
118
-
119 115
     def add_hydrogens(self, filename, output_pdb=None):
116
+        """Add hydrogens to N atoms of the protein backbone.
117
+
118
+        Parameters
119
+        ----------
120
+        self : PDBFile
121
+            The instance of PDB File to get the header from.
122
+
123
+        filename : str
124
+            The name of the file to be exploited by PyMOL lib.
125
+        output_pdb : str
126
+            If defined, the name of the file to be saved py PyMOL,
127
+            with hydrogens added.
128
+
129
+        This function is used to add the missing H atoms on the
130
+        Nitrogens atoms of the protein backbone, when H are missing
131
+        from PDB file. H are needed to perform all the bond interactions-
132
+        based calculations.
133
+
134
+        Returns
135
+        -------
136
+        dict
137
+            A dictionnary of all Hydrogens added to the backbone.
138
+        """
120 139
         pymol.finish_launching(['pymol', '-qc'])
121 140
         pymol.cmd.load(filename)
122 141
         pymol.cmd.select("nitrogens",'name n')  
@@ -129,7 +148,7 @@ class PDBFile:
129 148
                                 'else ' \
130 149
                                 'stored.pos[resi].append([name,resi,x,y,z])')
131 150
         if(output_pdb!=None):
132
-            pymol.cmd.save(output_file)
151
+            pymol.cmd.save(output_pdb)
133 152
         for resi in self.residues:
134 153
             if(str(resi.resid) in pymol.stored.pos.keys()):
135 154
                 # iterate over residue H atoms