Browse Source

update plot lib form multiple chrom coverage

tforest 2 years ago
parent
commit
257c713458
4 changed files with 59 additions and 13 deletions
  1. 1 3
      __init__.py
  2. 48 8
      customgraphics.py
  3. 1 2
      sfs_tools.py
  4. 9 0
      vcf_utils.py

+ 1 - 3
__init__.py View File

1
-from frst import sfs_tools
2
-from frst import customgraphics
3
-from frst import vcf_utils
1
+from frst import sfs_tools, customgraphics, vcf_utils, sfs_tools

+ 48 - 8
customgraphics.py View File

9
 import matplotlib.pyplot as plt
9
 import matplotlib.pyplot as plt
10
 import matplotlib.ticker as ticker
10
 import matplotlib.ticker as ticker
11
 import numpy as np
11
 import numpy as np
12
+import gc
13
+import time
14
+import datetime
15
+import pandas as pd
16
+# custom libs
12
 from frst import vcf_utils
17
 from frst import vcf_utils
13
 
18
 
19
+
14
 def heatmap(data, row_labels=None, col_labels=None, ax=None,
20
 def heatmap(data, row_labels=None, col_labels=None, ax=None,
15
             cbar_kw={}, cbarlabel="", **kwargs):
21
             cbar_kw={}, cbarlabel="", **kwargs):
16
     """
22
     """
145
     fig.tight_layout()
151
     fig.tight_layout()
146
     plt.show()
152
     plt.show()
147
 
153
 
148
-def plot(x, y, outfile = None, outfolder = None, ylab=None, xlab=None, title=None):
149
-    plt.plot(x, y)
154
+def plot(x, y, outfile = None, outfolder = None, ylab=None, xlab=None,
155
+         title=None, label = None, show=True):
156
+    if x:
157
+        fig, = plt.plot(x, y)
158
+    else:
159
+        # x is optional
160
+        fig, = plt.plot(y)
161
+    if label:
162
+        # if legend
163
+        fig.set_label(label)
164
+        plt.legend()
150
     if ylab:
165
     if ylab:
151
         plt.ylabel(ylab)
166
         plt.ylabel(ylab)
152
     if xlab:
167
     if xlab:
156
     if outfile:
171
     if outfile:
157
         plt.savefig(outfile)
172
         plt.savefig(outfile)
158
     else:
173
     else:
159
-        plt.show()
174
+        if show == True:
175
+            plt.show()
160
 
176
 
161
 def scatter(x, y, ylab=None, xlab=None, title=None):
177
 def scatter(x, y, ylab=None, xlab=None, title=None):
162
     plt.scatter(x, y)
178
     plt.scatter(x, y)
178
         plt.title(title)
194
         plt.title(title)
179
     plt.show()
195
     plt.show()
180
 
196
 
181
-def plot_chrom_continuity(vcf_entries, chr_id, outfile = None, outfolder = None):
197
+def plot_chrom_continuity(vcf_entries, chr_id, x=None, y=None, outfile = None,
198
+                          outfolder = None, returned=False, show=True, label=True):
182
     chr_name = list(vcf_entries.keys())[chr_id]
199
     chr_name = list(vcf_entries.keys())[chr_id]
200
+    if label:
201
+        label = chr_name
183
     chr_entries = vcf_entries[chr_name]
202
     chr_entries = vcf_entries[chr_name]
184
     genotyped_pos = vcf_utils.genotyping_continuity_plot(chr_entries)
203
     genotyped_pos = vcf_utils.genotyping_continuity_plot(chr_entries)
185
-    plot(genotyped_pos[0], genotyped_pos[1], ylab = "genotyped pos.",
186
-         xlab = "pos. in ref.",
187
-         title = "Genotyped pos in chr "+str(chr_id+1)+":'"+chr_name+"'",
188
-         outfile = outfile, outfolder = outfolder)
204
+    if returned:
205
+        # if we do not want to plot while executing
206
+        # useful for storing the x,y coords in a variable for ex.
207
+        return genotyped_pos
208
+    else:
209
+        # to plot on the fly
210
+        plot(x, y=genotyped_pos[1], ylab = "genotyped pos.",
211
+             xlab = "pos. in ref.",
212
+             title = "Genotyped pos in chr "+str(chr_id+1)+":'"+chr_name+"'",
213
+             outfile = outfile, outfolder = outfolder, show=show, label=label)
214
+
215
+def plot_whole_karyotype(recent_variants, mem_clean = False):
216
+    coords = []
217
+    for chr in range(len(recent_variants)):
218
+        x, y = vcf_utils.customgraphics.plot_chrom_continuity(recent_variants, chr_id = chr, show = False, returned = True)
219
+        coords.append([x, y])
220
+        if mem_clean:
221
+            start = time.time()
222
+            del x
223
+            del y
224
+            gc.collect()
225
+            end = time.time()
226
+            print("Cleaned mem. in", str(datetime.timedelta(seconds=end - start)))
227
+    # maybe add a clean of recent_variants in extreme cases, before building the plots
228
+    return coords
189
 
229
 
190
 def plot_chrom_coverage(vcf_entries, chr_id):
230
 def plot_chrom_coverage(vcf_entries, chr_id):
191
     chr_name = list(vcf_entries.keys())[chr_id]
231
     chr_name = list(vcf_entries.keys())[chr_id]

+ 1 - 2
sfs_tools.py View File

19
 def sfs_from_vcf(n, vcf_file, folded = True, diploid = True, phased = False, verbose = False):
19
 def sfs_from_vcf(n, vcf_file, folded = True, diploid = True, phased = False, verbose = False):
20
 
20
 
21
     """
21
     """
22
-    Multiplication de deux nombres entiers.
23
-    Cette fonction ne sert pas à grand chose.
22
+    Generates a Site Frequency Spectrum from a gzipped VCF file format.
24
 
23
 
25
     Parameters
24
     Parameters
26
     ----------
25
     ----------

+ 9 - 0
vcf_utils.py View File

18
 import json 
18
 import json 
19
 import time
19
 import time
20
 import datetime
20
 import datetime
21
+import gc
22
+import pandas as pd
23
+
21
 
24
 
22
 def parse_vcf(vcf_file, phased=False, stop_at=None, chr_starts_with="*"):
25
 def parse_vcf(vcf_file, phased=False, stop_at=None, chr_starts_with="*"):
23
     start = time.time()
26
     start = time.time()
199
         coords[1].append(y)
202
         coords[1].append(y)
200
     return coords
203
     return coords
201
 
204
 
205
+def free(obj):
206
+    """ Free the object and call the garbage collector explicitely
207
+    """
208
+    del obj
209
+    gc.collect()
210
+
202
 if __name__ == "__main__":
211
 if __name__ == "__main__":
203
     # check args
212
     # check args
204
     if len(sys.argv) !=2:
213
     if len(sys.argv) !=2: