Browse Source

Finish docstrings, update readme

Thomas Forest 5 years ago
parent
commit
8adcb869c1
8 changed files with 348 additions and 45 deletions
  1. 49 0
      README.md
  2. BIN
      doc/Notice.pdf
  3. BIN
      doc/Sujets_Projets_Courts_2019_2020.pdf
  4. 0 37
      doc/notes.org
  5. BIN
      doc/pdb_format_33_rot90.png
  6. 270 3
      src/chem.py
  7. 5 0
      src/geom.py
  8. 24 5
      src/pdb.py

+ 49 - 0
README.md View File

24
 conda activate dssp
24
 conda activate dssp
25
 ```
25
 ```
26
 
26
 
27
+### Running
28
+
27
 You should be ready to run the program, by calling, for example
29
 You should be ready to run the program, by calling, for example
28
 ```
30
 ```
29
 python3 src/dssp.py data/1est.pdb
31
 python3 src/dssp.py data/1est.pdb
30
 ```
32
 ```
31
 
33
 
34
+And if you want the extended output, use the -v flag, or --verbose
35
+```
36
+python3 src/dssp.py data/1est.pdb -v
37
+```
38
+
39
+To get usage help, run
40
+```
41
+python3 src/dssp.py data/1est.pdb -h
42
+```
43
+
44
+## Output
45
+
46
+To understand the output, here is a short description of the DSSP output, as followed from the official website : https://swift.cmbi.umcn.nl/gv/dssp/
47
+
48
+*   RESIDUE
49
+
50
+two columns of residue numbers. First column is DSSP's sequential residue number, starting at the first residue actually in the data set and including chain breaks; this number is used to refer to residues throughout. Second column gives crystallographers' 'residue sequence number','insertion code' and 'chain identifier' (see protein data bank file record format manual), given for reference only. 
51
+
52
+*   AA
53
+
54
+one letter amino acid code
55
+
56
+
57
+*   BP1 
58
+
59
+residue number of first bridge partner
60
+
61
+*   TCO
62
+
63
+cosine of angle between C=O of residue I and C=O of residue I-1. For α-helices, TCO is near +1, for β-sheets TCO is near -1. Not used for structure definition.
64
+
65
+*   KAPPA
66
+
67
+virtual bond angle (bend angle) defined by the three Cα atoms of residues I-2,I,I+2. Used to define bend (structure code 'S').
68
+
69
+*   ALPHA
70
+
71
+virtual torsion angle (dihedral angle) defined by the four Cα atoms of residues I-1,I,I+1,I+2.Used to define chirality (structure code '+' or '-').
72
+
73
+*   PHI PSI
74
+
75
+IUPAC peptide backbone torsion angles
76
+
77
+*   X-CA Y-CA Z-CA
78
+
79
+echo of Cα atom coordinates 
80
+
32
 ## Author
81
 ## Author
33
 
82
 
34
 * **FOREST Thomas** - *M2BI* - [Univ-Paris-Diderot.fr](https://www.univ-paris-diderot.fr/) - thomas.forest@etu.univ-paris-diderot.fr
83
 * **FOREST Thomas** - *M2BI* - [Univ-Paris-Diderot.fr](https://www.univ-paris-diderot.fr/) - thomas.forest@etu.univ-paris-diderot.fr

BIN
doc/Notice.pdf View File


BIN
doc/Sujets_Projets_Courts_2019_2020.pdf View File


+ 0 - 37
doc/notes.org View File

1
-Dépôt output DSSP:
2
-http://ftp.cbi.pku.edu.cn/pub/database/DSSP/latest_release/
3
-
4
-
5
-structure basée sur liaisons H
6
-
7
-- n-turns : liaison H entre CO et NH du res i+n, ou n = [3,4,5]
8
-- bridges : LH avec res non proches
9
-
10
-* hélices : 4-turns répétés
11
-* brins beta : repetition bridges
12
-
13
-common imperfections : helical kinks + beta-bulges
14
-
15
-propriétés déduites geométriquement : bends, chirality, SS bonds, solvent exposure
16
-
17
-
18
-Hbond(i,j) = [E<-0.5kcal/mole]
19
-
20
-electrostatic interaction energy between twho H-bonding groups by placing partial charges on the C,O(+q1,-q1) and N,H(-q2,+q2) atoms 
21
-
22
-E = q1q2(1/r(ON) + 1/r(CH) - 1/r(OH) - 1/r(CN))*f
23
-avec
24
-q1 = 0.42e
25
-q2 = 0.20e
26
-e la charge élémentaire d'un electro (e = 1,602 176 634 ×10−19 C)
27
-r en angström
28
-f (facteur de dimmensionnalité) = 332
29
-E en kcal/mol
30
-
31
-Bonne LH si E <= -3 kcal/mol
32
-
33
-Cutoff choisi permet une distance N-O jusqu'à 2.2 Angström
34
-N-H bound distance : 1.030 A
35
-https://pubs.acs.org/doi/pdf/10.1021/ja00313a047
36
-
37
-Angles : https://www.sciencedirect.com/topics/biochemistry-genetics-and-molecular-biology/peptide-bond

BIN
doc/pdb_format_33_rot90.png View File


+ 270 - 3
src/chem.py View File

2
 from geom import *
2
 from geom import *
3
 
3
 
4
 class Atom:
4
 class Atom:
5
+    """Defines Atom objects and their associated methods.
5
 
6
 
7
+    This function is used to instenciate Atom objects with
8
+    convenient fields, and a method used to compute
9
+    distance between atoms.
10
+    """
6
     def dist_atoms(self, atom2):
11
     def dist_atoms(self, atom2):
12
+        """Compute distance between two atoms.
13
+
14
+        Parameters
15
+        ----------
16
+        self : Atom object
17
+            First atom involved to compute the distance.
18
+        atom2 : Atom object
19
+            Second atom involved to compute the distance.
20
+
21
+        This function is used to get the distance between
22
+        two atoms in 3D space.
23
+
24
+        Returns
25
+        -------
26
+        float
27
+            Distance, in angströms.
28
+        """
7
         return(math.sqrt((self.coord_x-atom2.coord_x)**2 +
29
         return(math.sqrt((self.coord_x-atom2.coord_x)**2 +
8
                          (self.coord_y-atom2.coord_y)**2 +
30
                          (self.coord_y-atom2.coord_y)**2 +
9
                          (self.coord_z-atom2.coord_z)**2))
31
                          (self.coord_z-atom2.coord_z)**2))
11
 
33
 
12
     def __init__(self, atom_id, atom_name, res_name, chain_id,
34
     def __init__(self, atom_id, atom_name, res_name, chain_id,
13
                  res_seq_nb, insertion_code, coordinates):
35
                  res_seq_nb, insertion_code, coordinates):
36
+        """Constructor of Atom class object.
37
+
38
+        Parameters
39
+        ----------
40
+        self : Atom object
41
+            The new atom being created.
42
+        atom_id : int
43
+            Sequential PDB atom number.
44
+        res_name : str
45
+            3-letter code of residue.
46
+        chain_id : str
47
+            Unique chain ID.
48
+        res_seq_nb : int
49
+            Actual amino acid sequence position.
50
+        insertion_code : str
51
+            Insertion letter when added in case of ambiguity.
52
+        coordinates : list[float, ...]
53
+            X,Y,Z coordinates of atom.
54
+        """
14
         self.atom_id = atom_id
55
         self.atom_id = atom_id
15
         self.atom_name = atom_name
56
         self.atom_name = atom_name
16
         self.res_name = res_name
57
         self.res_name = res_name
53
         
94
         
54
 class Residue:
95
 class Residue:
55
     def __init__(self, atoms_list, indice):
96
     def __init__(self, atoms_list, indice):
97
+        """Constructor of Residue class object.
98
+
99
+        Parameters
100
+        ----------
101
+        self : Residue object
102
+            The new residue being created.
103
+        atoms_list : list[Atom, ...]
104
+            List of Atom objects contained in this residue.
105
+        indice : int
106
+            Sequential Residue number.
107
+        """
56
         self.atoms = {}
108
         self.atoms = {}
57
         for atom in atoms_list:
109
         for atom in atoms_list:
58
             self.atoms[atom.atom_name] = atom
110
             self.atoms[atom.atom_name] = atom
64
             self.indice = indice
116
             self.indice = indice
65
             
117
             
66
     def get_amino_letter(self, res_name):
118
     def get_amino_letter(self, res_name):
119
+        """Get 1-letter code of amino acid 3-letter code.
120
+
121
+        Parameters
122
+        ----------
123
+        self : Residue object
124
+            Reference to Residue instance.
125
+        res_name : str
126
+            3-letter code of the residue.
127
+
128
+        This function is used to convert the 3-letter code
129
+        format of the residue name to its 1-letter code
130
+        correspondance.
131
+
132
+        Returns
133
+        -------
134
+        str
135
+            3-letter amino acid code.
136
+        """
67
         code3 = ['ALA', 'ARG', 'ASN', 'ASP', 'CYS', 'GLU', 'GLN', 'GLY',
137
         code3 = ['ALA', 'ARG', 'ASN', 'ASP', 'CYS', 'GLU', 'GLN', 'GLY',
68
                  'HIS', 'ILE', 'LEU', 'LYS', 'MET', 'PHE', 'PRO', 'SER',
138
                  'HIS', 'ILE', 'LEU', 'LYS', 'MET', 'PHE', 'PRO', 'SER',
69
                  'THR', 'TRP', 'TYR', 'VAL']
139
                  'THR', 'TRP', 'TYR', 'VAL']
75
         return code1[code3.index(res_name)]
145
         return code1[code3.index(res_name)]
76
 
146
 
77
     def h_bond(self, res2):
147
     def h_bond(self, res2):
148
+        """Compute the H-Bond interaction between
149
+        two residues, in kcal/mole.
150
+
151
+        Parameters
152
+        ----------
153
+        self : Residue object
154
+            Reference to Residue instance.
155
+        res2 : Residue object
156
+            Second residue to evaluate the bond
157
+            energy.
158
+
159
+        This function computes the bond energy between 
160
+        two residues based on N,H,C and O atoms of each
161
+        residue.
162
+
163
+        Returns
164
+        -------
165
+        str
166
+            3-letter amino acid code.
167
+        """
78
         if("H" not in res2.atoms.keys()):
168
         if("H" not in res2.atoms.keys()):
79
             return(False)
169
             return(False)
80
         # dimensionnal factor, in kcal/mole
170
         # dimensionnal factor, in kcal/mole
97
     def get_turns(self, residues):
187
     def get_turns(self, residues):
98
         """
188
         """
99
         Get all the turns from a specific residue.
189
         Get all the turns from a specific residue.
190
+
191
+        Parameters
192
+        ----------
193
+        self : Residue object
194
+            Reference to Residue instance.
195
+        residues : list[Residue, ...]
196
+            List of all the Residues in the protein.
197
+
198
+        This function determines if there is a n-turn
199
+        at a specific residue, and its type. 3,4,5-turn...
200
+        
201
+        Returns
202
+        -------
203
+        Turn object | Boolean 
204
+            If there is a n-turn, a Turn object is returned, 
205
+            if not, False is returned.
206
+        
100
         """
207
         """
101
         turns = {}
208
         turns = {}
102
         i = residues.index(self)
209
         i = residues.index(self)
111
         return False
218
         return False
112
 
219
 
113
     def get_bends(self, residues):
220
     def get_bends(self, residues):
221
+        """Get the bend from a specific residue.
222
+
223
+        Parameters
224
+        ----------
225
+        self : Residue object
226
+            Reference to Residue instance.
227
+        residues : list[Residue, ...]
228
+            List of all the Residues in the protein.
229
+
230
+        This function determines if there is a bend
231
+        at a specific residue. Used to determine Kappa
232
+        angle.
233
+        
234
+        Returns
235
+        -------
236
+        list[float, str] 
237
+            Kappa angle value, and S to signify bend presence,
238
+            if Kappa>70°.
239
+        """
114
         i = residues.index(self)
240
         i = residues.index(self)
115
         if i >=2 and i <len(residues)-2:
241
         if i >=2 and i <len(residues)-2:
116
             angle = math.degrees(vector_angles(
242
             angle = math.degrees(vector_angles(
125
             return [360.0, '']
251
             return [360.0, '']
126
 
252
 
127
     def get_helix(self, residues):
253
     def get_helix(self, residues):
128
-        """
129
-        Return if there is an helix at a given residue,
254
+        """Return if there is an helix at a given residue,
130
         as well as its type.
255
         as well as its type.
256
+
257
+        Parameters
258
+        ----------
259
+        self : Residue object
260
+            Reference to Residue instance.
261
+        residues : list[Residue, ...]
262
+            List of all the Residues in the protein.
263
+
264
+        This function determines if there is an helix
265
+        at a specific residue. 
266
+
267
+        Returns
268
+        -------
269
+        list[str, int]
270
+            The type of helix G(=3),H(=4),I(=5), and the sequential
271
+            position of residue where the helix starts.
131
         """
272
         """
132
         i = residues.index(self)
273
         i = residues.index(self)
133
         # if there are no turns or it is the first residue, skip
274
         # if there are no turns or it is the first residue, skip
139
         return(False)
280
         return(False)
140
 
281
 
141
     def get_ladder(self, residues):
282
     def get_ladder(self, residues):
283
+        """Return if there is a ladder at a given residue.
284
+
285
+        Parameters
286
+        ----------
287
+        self : Residue object
288
+            Reference to Residue instance.
289
+        residues : list[Residue, ...]
290
+            List of all the Residues in the protein.
291
+
292
+        Returns
293
+        -------
294
+        dict | Boolean
295
+            If there is a ladder, return it as a dictionnary, else returns False.
296
+        
297
+        """
142
         i = residues.index(self)
298
         i = residues.index(self)
143
         if i != 0:
299
         if i != 0:
144
             if self.get_bridges(residues):
300
             if self.get_bridges(residues):
153
         return False   
309
         return False   
154
 
310
 
155
     def get_tco(self, residues):
311
     def get_tco(self, residues):
312
+        """Cosine of angle between C=O of residue I and C=O of residue I-1. 
313
+
314
+        Parameters
315
+        ----------
316
+        self : Residue object
317
+            Reference to Residue instance.
318
+        residues : list[Residue, ...]
319
+            List of all the Residues in the protein.
320
+
321
+        For α-helices, TCO is near +1, for β-sheets TCO is near -1. 
322
+        Not used for structure definition. 
323
+
324
+        Returns
325
+        -------
326
+        float
327
+            Cosine of angle.
328
+        """
156
         i = residues.index(self)
329
         i = residues.index(self)
157
         if(i!=0):
330
         if(i!=0):
158
             res2 = residues[i-1]
331
             res2 = residues[i-1]
166
         return(math.cos(angle))
339
         return(math.cos(angle))
167
 
340
 
168
     def get_chirality(self, residues):
341
     def get_chirality(self, residues):
342
+        """Return the chirality at a given residue.
343
+
344
+        Parameters
345
+        ----------
346
+        self : Residue object
347
+            Reference to Residue instance.
348
+        residues : list[Residue, ...]
349
+            List of all the Residues in the protein.
350
+
351
+        Virtual torsion angle (dihedral angle) defined by the four 
352
+        Cα atoms of residues I-1,I,I+1,I+2.Used to define chirality 
353
+        (structure code '+' or '-'). 
354
+
355
+        Returns
356
+        -------
357
+        list[float, str]
358
+            Dihedral angle and chirality sign. 
359
+        """
169
         i = residues.index(self)
360
         i = residues.index(self)
170
         if (i >=1 and i < len(residues)-2):
361
         if (i >=1 and i < len(residues)-2):
171
             chirality = {}
362
             chirality = {}
187
         return [angle, sign]
378
         return [angle, sign]
188
     
379
     
189
     def get_phi_psi(self, residues):
380
     def get_phi_psi(self, residues):
381
+        """Compute Phi and Psi angles of protein
382
+        backbone around the peptidic bound at each
383
+        residue.
384
+
385
+        Parameters
386
+        ----------
387
+        self : Residue object
388
+            Reference to Residue instance.
389
+        residues : list[Residue, ...]
390
+            List of all the Residues in the protein.
391
+
392
+        Returns
393
+        -------
394
+        tuple(float, float)
395
+        Phi and Psi angles.
396
+        """
190
         i = residues.index(self)  
397
         i = residues.index(self)  
191
         if(i==0):
398
         if(i==0):
192
             phi = 360.0
399
             phi = 360.0
205
 
412
 
206
         return((phi, psi))
413
         return((phi, psi))
207
 def get_bridges(residues):
414
 def get_bridges(residues):
415
+    """Construct all bridges between residues in the 
416
+    protein.
417
+    
418
+    Parameters
419
+    ----------
420
+    residues : list[Residue, ...]
421
+        List of all the Residues in the protein.
422
+    
423
+    Constructs a list of Bridges objects, characterized by
424
+    their involved residues positions, sequential positions,
425
+    type (parallel or antiparallel).
426
+    
427
+    Returns
428
+    -------
429
+    list(Bridge)
430
+    Returns the list of bridges.
431
+    """
208
     bridges = {}
432
     bridges = {}
209
     bridge = {}
433
     bridge = {}
210
     strongest_bridge = {}
434
     strongest_bridge = {}
277
         return(False)
501
         return(False)
278
 
502
 
279
 def get_ladders(bridges, residues):
503
 def get_ladders(bridges, residues):
504
+    """Construct all ladders in the protein.
505
+    
506
+    Parameters
507
+    ----------
508
+    residues : list[Residue, ...]
509
+        List of all the Residues in the protein.
510
+    
511
+    Constructs a dictionnary of ladders, characterized by
512
+    their involved residues positions, sequential positions,
513
+    involved bridges. Ladders are made of consecutive bridges
514
+    of the same type.
515
+    
516
+    Returns
517
+    -------
518
+    dict
519
+        Returns the dictionnary of ladders.
520
+    """
280
     ladders = {}
521
     ladders = {}
281
     i = 1
522
     i = 1
282
     while i < len(residues):
523
     while i < len(residues):
299
     return ladders
540
     return ladders
300
 
541
 
301
 def connected_ladders(ladd_1, ladd_2):
542
 def connected_ladders(ladd_1, ladd_2):
543
+    """Check if two ladders, ladd_1 and ladd_2, 
544
+    are linked by one shared bridge.
545
+    """
302
     links = []
546
     links = []
303
     for bridge in ladd_1['bridges']:
547
     for bridge in ladd_1['bridges']:
304
         if bridge.res_partner in res_list(ladd_2):
548
         if bridge.res_partner in res_list(ladd_2):
335
     return sheets
579
     return sheets
336
    
580
    
337
 def res_list(ladder):
581
 def res_list(ladder):
582
+    """Iterate over ladders, from start to end,
583
+    to get the list of residues inside it.
584
+    """
338
     l=[]
585
     l=[]
339
     for i in range(ladder['i'], ladder['j']):
586
     for i in range(ladder['i'], ladder['j']):
340
         l.append(i)
587
         l.append(i)
357
     return[turns_3, turns_4, turns_5]
604
     return[turns_3, turns_4, turns_5]
358
 
605
 
359
 def build_helix_patterns(residues):
606
 def build_helix_patterns(residues):
607
+    """Builds series of letters (G, H  or I) corresponding
608
+    to helix types (3, 4 or 5). Used for the final output.
609
+    """
360
     helix_3 = {}
610
     helix_3 = {}
361
     helix_4 = {}
611
     helix_4 = {}
362
     helix_5 = {}
612
     helix_5 = {}
375
     return[helix_3, helix_4, helix_5]
625
     return[helix_3, helix_4, helix_5]
376
 
626
 
377
 def print_helix_pattern(residues, res, helix):
627
 def print_helix_pattern(residues, res, helix):
628
+    """Used to print the Helix type at a specific
629
+    residue position if there is one. Used for the final
630
+    output.
631
+    """
378
     i = residues.index(res)+1
632
     i = residues.index(res)+1
379
     if i in helix.keys():
633
     if i in helix.keys():
380
         return (helix[i])
634
         return (helix[i])
382
         return(' ')
636
         return(' ')
383
 
637
 
384
 def print_turn_pattern(residues, res, turns):
638
 def print_turn_pattern(residues, res, turns):
639
+    """Builds series of symbols (>, <  or n) corresponding
640
+    to turn types (n=3, 4 or 5). Used for the final output.
641
+    '>' represent start of the turn series.
642
+    '<' represent end of the turn.
643
+    """
385
     i = residues.index(res)+1
644
     i = residues.index(res)+1
386
     if i in turns.keys() and not i-1 in turns.keys():
645
     if i in turns.keys() and not i-1 in turns.keys():
387
         return(">")
646
         return(">")
392
     else:
651
     else:
393
         return(' ')
652
         return(' ')
394
     
653
     
395
-def raw_ladders_print(ladders, ):
654
+def raw_ladders_print(ladders):
655
+    """Function to print roughly the connected ladders by bridges in 
656
+    the final output.
657
+    This has been use to illustrate sheets principles.
658
+    Only shown if the -v or --verbose flag is used when running
659
+    dssp.py.
660
+
661
+    """
662
+    print("### CONNECTED LADDERS ###")
396
     for ladd1 in ladders:
663
     for ladd1 in ladders:
397
         ladd_1 = ladders[ladd1]
664
         ladd_1 = ladders[ladd1]
398
         for ladd2 in ladders:
665
         for ladd2 in ladders:

+ 5 - 0
src/geom.py View File

1
 import math
1
 import math
2
 import numpy as np
2
 import numpy as np
3
+
4
+# A couple of obvious functions for vector manipulation.
5
+# Lack of time caused this not to be OOP-implemented, or furthermore
6
+# documented.
7
+
3
 def vector_from_pos(a, b):
8
 def vector_from_pos(a, b):
4
     xAB = b[0]-a[0]
9
     xAB = b[0]-a[0]
5
     yAB = b[1]-a[1]
10
     yAB = b[1]-a[1]

+ 24 - 5
src/pdb.py View File

112
             #Need to add hydrogens 
112
             #Need to add hydrogens 
113
             self.add_hydrogens(filename)
113
             self.add_hydrogens(filename)
114
 
114
 
115
-    def check_hydrogens(self, atoms):
116
-        print("ENTER CHECK HYDROGEN")
117
-        return True
118
-
119
     def add_hydrogens(self, filename, output_pdb=None):
115
     def add_hydrogens(self, filename, output_pdb=None):
116
+        """Add hydrogens to N atoms of the protein backbone.
117
+
118
+        Parameters
119
+        ----------
120
+        self : PDBFile
121
+            The instance of PDB File to get the header from.
122
+
123
+        filename : str
124
+            The name of the file to be exploited by PyMOL lib.
125
+        output_pdb : str
126
+            If defined, the name of the file to be saved py PyMOL,
127
+            with hydrogens added.
128
+
129
+        This function is used to add the missing H atoms on the
130
+        Nitrogens atoms of the protein backbone, when H are missing
131
+        from PDB file. H are needed to perform all the bond interactions-
132
+        based calculations.
133
+
134
+        Returns
135
+        -------
136
+        dict
137
+            A dictionnary of all Hydrogens added to the backbone.
138
+        """
120
         pymol.finish_launching(['pymol', '-qc'])
139
         pymol.finish_launching(['pymol', '-qc'])
121
         pymol.cmd.load(filename)
140
         pymol.cmd.load(filename)
122
         pymol.cmd.select("nitrogens",'name n')  
141
         pymol.cmd.select("nitrogens",'name n')  
129
                                 'else ' \
148
                                 'else ' \
130
                                 'stored.pos[resi].append([name,resi,x,y,z])')
149
                                 'stored.pos[resi].append([name,resi,x,y,z])')
131
         if(output_pdb!=None):
150
         if(output_pdb!=None):
132
-            pymol.cmd.save(output_file)
151
+            pymol.cmd.save(output_pdb)
133
         for resi in self.residues:
152
         for resi in self.residues:
134
             if(str(resi.resid) in pymol.stored.pos.keys()):
153
             if(str(resi.resid) in pymol.stored.pos.keys()):
135
                 # iterate over residue H atoms
154
                 # iterate over residue H atoms