Browse Source

fix karyotype plot

tforest 2 years ago
parent
commit
0425fcfa03
2 changed files with 70 additions and 24 deletions
  1. 64 20
      customgraphics.py
  2. 6 4
      vcf_utils.py

+ 64 - 20
customgraphics.py View File

@@ -152,12 +152,24 @@ def plot_matrix(mat, legend=None, color_scale_type="YlGn", cbarlabel = "qt", tit
152 152
     plt.show()
153 153
 
154 154
 def plot(x, y, outfile = None, outfolder = None, ylab=None, xlab=None,
155
-         title=None, label = None, show=True):
155
+         title=None, label = None, show=True, nb_subplots = None, subplot_init = False,
156
+         subplot_id = None):
157
+    if subplot_init:
158
+        # define a certain amount of subplots
159
+        fig, axs = plt.subplots(nb_subplots)
160
+    
156 161
     if x:
157
-        fig, = plt.plot(x, y)
162
+        if nb_subplots:
163
+            axs[subplot_id].plot(x, y)
164
+        else:
165
+            fig, = plt.plot(x, y)
158 166
     else:
159 167
         # x is optional
160
-        fig, = plt.plot(y)
168
+        if nb_subplots:
169
+            # define a certain amount of subplots
170
+            axs[subplot_id].plot(y)
171
+        else:
172
+            fig, = plt.plot(y)
161 173
     if label:
162 174
         # if legend
163 175
         fig.set_label(label)
@@ -195,37 +207,69 @@ def barplot(x, y, ylab=None, xlab=None, title=None):
195 207
     plt.show()
196 208
 
197 209
 def plot_chrom_continuity(vcf_entries, chr_id, x=None, y=None, outfile = None,
198
-                          outfolder = None, returned=False, show=True, label=True):
210
+                          outfolder = None, returned=False, show=True, label=True, step=1, nb_subplots = None,
211
+                          subplot_init = False, subplot_id = None, title = None):
199 212
     chr_name = list(vcf_entries.keys())[chr_id]
200 213
     if label:
201 214
         label = chr_name
215
+    if not title:
216
+        title = "Genotyped pos in chr "+str(chr_id+1)+":'"+chr_name+"'"
202 217
     chr_entries = vcf_entries[chr_name]
203
-    genotyped_pos = vcf_utils.genotyping_continuity_plot(chr_entries)
218
+    genotyped_pos = vcf_utils.genotyping_continuity_plot(chr_entries, step=step)
204 219
     if returned:
205 220
         # if we do not want to plot while executing
206 221
         # useful for storing the x,y coords in a variable for ex.
207 222
         return genotyped_pos
208 223
     else:
209 224
         # to plot on the fly
210
-        plot(x, y=genotyped_pos[1], ylab = "genotyped pos.",
225
+        plot(x=genotyped_pos[0], y=genotyped_pos[1], ylab = "genotyped pos.",
211 226
              xlab = "pos. in ref.",
212
-             title = "Genotyped pos in chr "+str(chr_id+1)+":'"+chr_name+"'",
213
-             outfile = outfile, outfolder = outfolder, show=show, label=label)
227
+             title = title,
228
+             outfile = outfile, outfolder = outfolder, show=show, label=label,
229
+             nb_subplots = nb_subplots, subplot_init = subplot_init, subplot_id = subplot_id)
214 230
 
215
-def plot_whole_karyotype(recent_variants, mem_clean = False):
231
+def plot_whole_karyotype(recent_variants, mem_clean = False, step = 1, show = True, min_chr_id = 0,
232
+                         max_chr_id = None, stacked = False, title = None):
216 233
     coords = []
217
-    for chr in range(len(recent_variants)):
218
-        x, y = vcf_utils.customgraphics.plot_chrom_continuity(recent_variants, chr_id = chr, show = False, returned = True)
219
-        coords.append([x, y])
220
-        if mem_clean:
221
-            start = time.time()
222
-            del x
223
-            del y
224
-            gc.collect()
225
-            end = time.time()
226
-            print("Cleaned mem. in", str(datetime.timedelta(seconds=end - start)))
234
+    if max_chr_id :
235
+        nb_iter = max_chr_id
236
+    else:
237
+        nb_iter =  len(recent_variants)
238
+    if show :
239
+        iter_start = min_chr_id + 1
240
+        if not step :
241
+            step = round(len(recent_variants[list(recent_variants.keys())[min_chr_id]]) / step)
242
+        if stacked:
243
+            nb_subplots = nb_iter - min_chr_id
244
+            subplot_init = True
245
+        else:
246
+            nb_subplots = None
247
+            subplot_init = False
248
+        vcf_utils.customgraphics.plot_chrom_continuity(recent_variants, chr_id = min_chr_id, show = False, returned = False, step = step,
249
+                                                       nb_subplots = nb_subplots, subplot_init = subplot_init, subplot_id = min_chr_id)
250
+    else :
251
+        iter_start = 0
252
+    for chr in range(iter_start, nb_iter):
253
+        if show == False:
254
+            x, y = vcf_utils.customgraphics.plot_chrom_continuity(recent_variants, chr_id = chr, show = False, returned = True, step = step)
255
+            coords.append([x, y])
256
+            if mem_clean:
257
+                start = time.time()
258
+                del x
259
+                del y
260
+                gc.collect()
261
+                end = time.time()
262
+                print("Cleaned mem. in", str(datetime.timedelta(seconds=end - start)))
263
+        else:
264
+            # if show is enable, use a step
265
+            step = round(len(recent_variants[list(recent_variants.keys())[chr]]) / 1000)
266
+            vcf_utils.customgraphics.plot_chrom_continuity(recent_variants, chr_id = chr, show = False, returned = False, step = step, subplot_id = chr)
267
+        # last case
268
+    if show == True:
269
+        vcf_utils.customgraphics.plot_chrom_continuity(recent_variants, chr_id = nb_iter, show = True, returned = False, step = step, subplot_id = nb_iter, title = title)
227 270
     # maybe add a clean of recent_variants in extreme cases, before building the plots
228
-    return coords
271
+    if show == False:
272
+        return coords
229 273
 
230 274
 def plot_chrom_coverage(vcf_entries, chr_id):
231 275
     chr_name = list(vcf_entries.keys())[chr_id]

+ 6 - 4
vcf_utils.py View File

@@ -170,21 +170,23 @@ def build_polymorph_coverage_matrix(entries, noGenotype, diploid=True, na_omit =
170 170
         mat = mat / row_sums[:, np.newaxis]
171 171
     return mat
172 172
 
173
-def genotyping_continuity_plot(vcf_entries, verbose=False):
173
+def genotyping_continuity_plot(vcf_entries,
174
+                                                            verbose=False,
175
+                                                            step = 1):
174 176
     last_pos = int(sorted(list(vcf_entries.keys()))[-1])
175 177
     x = 0
176 178
     y = 1
177 179
     coords = [[], []]
178 180
     print(last_pos, "sites to scan")
179
-    for k, pos in enumerate(range(last_pos)):
181
+    for k, pos in enumerate(range(0, last_pos, step)):
180 182
         if verbose:
181 183
             progress = round(k/int(last_pos))*100
182 184
             if progress % 10 == 0:
183 185
                 print(progress, "%")
184 186
         # if pos is genotyped
185 187
         if k in vcf_entries:
186
-            y+=1
187
-        x+=1
188
+            y+=1*step
189
+        x+=1*step
188 190
         coords[0].append(x)
189 191
         coords[1].append(y)
190 192
     return coords