Bladeren bron

update plot lib form multiple chrom coverage

tforest 2 jaren geleden
bovenliggende
commit
257c713458
4 gewijzigde bestanden met toevoegingen van 59 en 13 verwijderingen
  1. 1 3
      __init__.py
  2. 48 8
      customgraphics.py
  3. 1 2
      sfs_tools.py
  4. 9 0
      vcf_utils.py

+ 1 - 3
__init__.py Bestand weergeven

@@ -1,3 +1 @@
1
-from frst import sfs_tools
2
-from frst import customgraphics
3
-from frst import vcf_utils
1
+from frst import sfs_tools, customgraphics, vcf_utils, sfs_tools

+ 48 - 8
customgraphics.py Bestand weergeven

@@ -9,8 +9,14 @@ FOREST Thomas (thomas.forest@college-de-france.fr)
9 9
 import matplotlib.pyplot as plt
10 10
 import matplotlib.ticker as ticker
11 11
 import numpy as np
12
+import gc
13
+import time
14
+import datetime
15
+import pandas as pd
16
+# custom libs
12 17
 from frst import vcf_utils
13 18
 
19
+
14 20
 def heatmap(data, row_labels=None, col_labels=None, ax=None,
15 21
             cbar_kw={}, cbarlabel="", **kwargs):
16 22
     """
@@ -145,8 +151,17 @@ def plot_matrix(mat, legend=None, color_scale_type="YlGn", cbarlabel = "qt", tit
145 151
     fig.tight_layout()
146 152
     plt.show()
147 153
 
148
-def plot(x, y, outfile = None, outfolder = None, ylab=None, xlab=None, title=None):
149
-    plt.plot(x, y)
154
+def plot(x, y, outfile = None, outfolder = None, ylab=None, xlab=None,
155
+         title=None, label = None, show=True):
156
+    if x:
157
+        fig, = plt.plot(x, y)
158
+    else:
159
+        # x is optional
160
+        fig, = plt.plot(y)
161
+    if label:
162
+        # if legend
163
+        fig.set_label(label)
164
+        plt.legend()
150 165
     if ylab:
151 166
         plt.ylabel(ylab)
152 167
     if xlab:
@@ -156,7 +171,8 @@ def plot(x, y, outfile = None, outfolder = None, ylab=None, xlab=None, title=Non
156 171
     if outfile:
157 172
         plt.savefig(outfile)
158 173
     else:
159
-        plt.show()
174
+        if show == True:
175
+            plt.show()
160 176
 
161 177
 def scatter(x, y, ylab=None, xlab=None, title=None):
162 178
     plt.scatter(x, y)
@@ -178,14 +194,38 @@ def barplot(x, y, ylab=None, xlab=None, title=None):
178 194
         plt.title(title)
179 195
     plt.show()
180 196
 
181
-def plot_chrom_continuity(vcf_entries, chr_id, outfile = None, outfolder = None):
197
+def plot_chrom_continuity(vcf_entries, chr_id, x=None, y=None, outfile = None,
198
+                          outfolder = None, returned=False, show=True, label=True):
182 199
     chr_name = list(vcf_entries.keys())[chr_id]
200
+    if label:
201
+        label = chr_name
183 202
     chr_entries = vcf_entries[chr_name]
184 203
     genotyped_pos = vcf_utils.genotyping_continuity_plot(chr_entries)
185
-    plot(genotyped_pos[0], genotyped_pos[1], ylab = "genotyped pos.",
186
-         xlab = "pos. in ref.",
187
-         title = "Genotyped pos in chr "+str(chr_id+1)+":'"+chr_name+"'",
188
-         outfile = outfile, outfolder = outfolder)
204
+    if returned:
205
+        # if we do not want to plot while executing
206
+        # useful for storing the x,y coords in a variable for ex.
207
+        return genotyped_pos
208
+    else:
209
+        # to plot on the fly
210
+        plot(x, y=genotyped_pos[1], ylab = "genotyped pos.",
211
+             xlab = "pos. in ref.",
212
+             title = "Genotyped pos in chr "+str(chr_id+1)+":'"+chr_name+"'",
213
+             outfile = outfile, outfolder = outfolder, show=show, label=label)
214
+
215
+def plot_whole_karyotype(recent_variants, mem_clean = False):
216
+    coords = []
217
+    for chr in range(len(recent_variants)):
218
+        x, y = vcf_utils.customgraphics.plot_chrom_continuity(recent_variants, chr_id = chr, show = False, returned = True)
219
+        coords.append([x, y])
220
+        if mem_clean:
221
+            start = time.time()
222
+            del x
223
+            del y
224
+            gc.collect()
225
+            end = time.time()
226
+            print("Cleaned mem. in", str(datetime.timedelta(seconds=end - start)))
227
+    # maybe add a clean of recent_variants in extreme cases, before building the plots
228
+    return coords
189 229
 
190 230
 def plot_chrom_coverage(vcf_entries, chr_id):
191 231
     chr_name = list(vcf_entries.keys())[chr_id]

+ 1 - 2
sfs_tools.py Bestand weergeven

@@ -19,8 +19,7 @@ import matplotlib.pyplot as plt
19 19
 def sfs_from_vcf(n, vcf_file, folded = True, diploid = True, phased = False, verbose = False):
20 20
 
21 21
     """
22
-    Multiplication de deux nombres entiers.
23
-    Cette fonction ne sert pas à grand chose.
22
+    Generates a Site Frequency Spectrum from a gzipped VCF file format.
24 23
 
25 24
     Parameters
26 25
     ----------

+ 9 - 0
vcf_utils.py Bestand weergeven

@@ -18,6 +18,9 @@ from frst import customgraphics
18 18
 import json 
19 19
 import time
20 20
 import datetime
21
+import gc
22
+import pandas as pd
23
+
21 24
 
22 25
 def parse_vcf(vcf_file, phased=False, stop_at=None, chr_starts_with="*"):
23 26
     start = time.time()
@@ -199,6 +202,12 @@ def compute_coverage(vcf_entries, verbose=False):
199 202
         coords[1].append(y)
200 203
     return coords
201 204
 
205
+def free(obj):
206
+    """ Free the object and call the garbage collector explicitely
207
+    """
208
+    del obj
209
+    gc.collect()
210
+
202 211
 if __name__ == "__main__":
203 212
     # check args
204 213
     if len(sys.argv) !=2: