Browse Source

Previous plotting system before feb version

tforest 2 months ago
parent
commit
8c8545a9ff
1 changed files with 26 additions and 176 deletions
  1. 26 176
      swp2.py

+ 26 - 176
swp2.py View File

@@ -124,158 +124,6 @@ def plot_straight_x_y(x,y):
124 124
     x_1.append(x[-1])
125 125
     return x_1, y_1
126 126
 
127
-def plot_all_epochs_thetafolder_old(folder_path, mu, tgen, title = "Title",
128
-    theta_scale = True, ax = None, input = None, output = None):
129
-    #scenari = {}
130
-    cpt = 0
131
-    epochs = {}
132
-    for file_name in os.listdir(folder_path):
133
-        breaks = 0
134
-        cpt +=1
135
-        if os.path.isfile(os.path.join(folder_path, file_name)):
136
-            x, y, likelihood, theta, sfs, L = parse_stwp_theta_file(folder_path+file_name, breaks = breaks,
137
-                                                             tgen = tgen,
138
-                                                             mu = mu, relative_theta_scale = theta_scale)
139
-            SFS_stored = sfs
140
-            L_stored = L
141
-            while not (x == 0 and y == 0):
142
-                if breaks not in epochs.keys():
143
-                    epochs[breaks] = {}
144
-                epochs[breaks][likelihood] = x,y
145
-                breaks += 1
146
-                x,y,likelihood,theta,sfs,L = parse_stwp_theta_file(folder_path+file_name, breaks = breaks,
147
-                                                                 tgen = tgen,
148
-                                                                  mu = mu, relative_theta_scale = theta_scale)
149
-            if x == 0:
150
-                # last break did not work, then breaks = breaks-1
151
-                breaks -= 1
152
-    print("\n*******\n"+title+"\n--------\n"+"mu="+str(mu)+"\ntgen="+str(tgen)+"\nbreaks="+str(breaks)+"\n*******\n")
153
-    print(cpt, "theta file(s) have been scanned.")
154
-    my_dpi = 300
155
-    if ax is None:
156
-        # intialize figure
157
-        my_dpi = 300
158
-        fnt_size = 18
159
-        # plt.rcParams['font.size'] = fnt_size
160
-        fig, ax1 = plt.subplots(figsize=(5000/my_dpi, 2800/my_dpi), dpi=my_dpi)
161
-    else:
162
-        fnt_size = 12
163
-        # plt.rcParams['font.size'] = fnt_size
164
-        ax1 = ax[1][0,0]
165
-    ax1.set_yscale('log')
166
-    ax1.set_xscale('log')
167
-    ax1.grid(True,which="both", linestyle='--', alpha = 0.3)
168
-    brkpt_lik = []
169
-    top_plots = {}
170
-    for epoch, scenari in epochs.items():
171
-        # sort starting by the smallest -log(Likelihood)
172
-        best10_scenari = (sorted(list(scenari.keys())))[:10]
173
-        greatest_likelihood = best10_scenari[0]
174
-        # store the tuple breakpoints and likelihood for later plot
175
-        brkpt_lik.append((epoch, greatest_likelihood))
176
-        x, y = scenari[greatest_likelihood]
177
-        #without breakpoint
178
-        if epoch == 0:
179
-            # do something with the theta without bp and skip the plotting
180
-            N0 = y[0]
181
-            #continue
182
-        for i in range(len(y)):
183
-            # divide by N0
184
-            y[i] = y[i]/N0
185
-            x[i] = x[i]/N0
186
-        top_plots[greatest_likelihood] = x,y,epoch
187
-    plots_likelihoods = list(top_plots.keys())
188
-    for i in range(len(plots_likelihoods)):
189
-        plots_likelihoods[i] = float(plots_likelihoods[i])
190
-    best10_plots = sorted(plots_likelihoods)[:10]
191
-    top_plot_lik = str(best10_plots[0])
192
-    plot_handles = []
193
-    # plt.rcParams['font.size'] = fnt_size
194
-    p0, = ax1.plot(top_plots[top_plot_lik][0], top_plots[top_plot_lik][1], 'o', linestyle = "-",
195
-    alpha=1, lw=2, label = str(top_plots[top_plot_lik][2])+' brks | Lik='+top_plot_lik)
196
-    plot_handles.append(p0)
197
-    for k, plot_Lk in enumerate(best10_plots[1:]):
198
-        plot_Lk = str(plot_Lk)
199
-        # plt.rcParams['font.size'] = fnt_size
200
-        p, = ax1.plot(top_plots[plot_Lk][0], top_plots[plot_Lk][1], 'o', linestyle = "--",
201
-        alpha=1/(k+1), lw=1.5, label = str(top_plots[plot_Lk][2])+' brks | Lik='+plot_Lk)
202
-        plot_handles.append(p)
203
-    if theta_scale:
204
-        ax1.set_xlabel("Coal. time", fontsize=fnt_size)
205
-        ax1.set_ylabel("Pop. size scaled by N0", fontsize=fnt_size)
206
-        # recent_scale_lower_bound = 0.01
207
-        # recent_scale_upper_bound = 0.1
208
-        # ax1.axvline(x=recent_scale_lower_bound)
209
-        # ax1.axvline(x=recent_scale_upper_bound)
210
-    else:
211
-        # years
212
-        plt.set_xlabel("Time (years)", fontsize=fnt_size)
213
-        plt.set_ylabel("Individuals (N)", fontsize=fnt_size)
214
-    # plt.rcParams['font.size'] = fnt_size
215
-    # print(fnt_size, "rcParam font.size=", plt.rcParams['font.size'])
216
-    ax1.legend(handles = plot_handles, loc='best', fontsize = fnt_size*0.5)
217
-    ax1.set_title(title)
218
-    if ax is None:
219
-        plt.savefig(title+'_b'+str(breaks)+'.pdf')
220
-    # plot likelihood against nb of breakpoints
221
-    # best possible likelihood from SFS
222
-    # Segregating sites
223
-    S = sum(SFS_stored)
224
-    # Number of kept sites from which the SFS is computed
225
-    L = L_stored
226
-    # number of monomorphic sites
227
-    S0 = L-S
228
-    # print("SFS", SFS_stored)
229
-    # print("S", S, "L", L, "S0=", S0)
230
-    # compute Ln
231
-    Ln = log_facto(S+S0) - log_facto(S0) + np.log(float(S0)/(S+S0)) * S0
232
-    for xi in range(0, len(SFS_stored)):
233
-        p_i = SFS_stored[xi] / float(S+S0)
234
-        Ln += np.log(p_i) * SFS_stored[xi] - log_facto(SFS_stored[xi])
235
-    # basic plot likelihood
236
-    if ax is None:
237
-        fig, ax2 = plt.subplots(figsize=(5000/my_dpi, 2800/my_dpi), dpi=my_dpi)
238
-        # plt.rcParams['font.size'] = fnt_size
239
-    else:
240
-        #plt.rcParams['font.size'] = fnt_size
241
-        ax2 = ax[0][0,1]
242
-    ax2.plot(np.array(brkpt_lik)[:, 0], np.array(brkpt_lik)[:, 1].astype(float), 'o', linestyle = "dotted", lw=2)
243
-    ax2.axhline(y=-Ln, linestyle = "-.", color = "red", label = "$-\log\mathcal{L}$ = "+str(round(-Ln, 2)))
244
-    ax2.set_yscale('log')
245
-    ax2.set_xlabel("# breakpoints", fontsize=fnt_size)
246
-    ax2.set_ylabel("$-\log\mathcal{L}$", fontsize=fnt_size)
247
-    ax2.legend(loc='best', fontsize = fnt_size*0.5)
248
-    ax2.set_title(title+" Likelihood gain from # breakpoints")
249
-    if ax is None:
250
-        plt.savefig(title+'_Breakpts_Likelihood.pdf')
251
-    # AIC
252
-    if ax is None:
253
-        fig, ax3 = plt.subplots(figsize=(5000/my_dpi, 2800/my_dpi), dpi=my_dpi)
254
-        # plt.rcParams['font.size'] = '18'
255
-    else:
256
-        #plt.rcParams['font.size'] = fnt_size
257
-        ax3 = ax[1][0,1]
258
-    AIC = []
259
-    for brk in np.array(brkpt_lik)[:, 0]:
260
-        brk = int(brk)
261
-        AIC.append((2*brk+1)+2*np.array(brkpt_lik)[brk, 1].astype(float))
262
-    ax3.plot(np.array(brkpt_lik)[:, 0], AIC, 'o', linestyle = "dotted", lw=2)
263
-    # AIC = 2*k - 2ln(L) ; where k is the number of parameters, here brks+1
264
-    AIC_ln = 2*(len(brkpt_lik)+1) - 2*Ln
265
-    ax3.axhline(y=AIC_ln, linestyle = "-.", color = "red",
266
-    label = "Min. AIC = "+str(round(AIC_ln, 2)))
267
-    selected_brks_nb = AIC.index(min(AIC))
268
-    ax3.set_yscale('log')
269
-    ax3.set_xlabel("# breakpoints", fontsize=fnt_size)
270
-    ax3.set_ylabel("AIC")
271
-    ax3.legend(loc='best', fontsize = fnt_size*0.5)
272
-    ax3.set_title(title+" AIC")
273
-    if ax is None:
274
-        plt.savefig(title+'_Breakpts_Likelihood_AIC.pdf')
275
-    print("S", S)
276
-    # return plots
277
-    return ax[0], ax[1]
278
-
279 127
 def plot_all_epochs_thetafolder(full_dict, mu, tgen, title = "Title",
280 128
     theta_scale = True, ax = None, input = None, output = None):
281 129
     my_dpi = 300
@@ -442,21 +290,26 @@ def save_all_epochs_thetafolder(folder_path, mu, tgen, title = "Title", theta_sc
442 290
     # AIC = 2*k - 2ln(L) ; where k is the number of parameters, here brks+1
443 291
     AIC_ln = 2*(len(brkpt_lik)+1) - 2*Ln
444 292
     best_AIC = AIC_ln
293
+    selected_brks_nb = AIC.index(min(AIC))
445 294
     # to return : plots ; Ln_Brks ; AIC_Brks ; best_Ln ; best_AIC
446 295
     # 'plots' dict keys: 'best', {epochs}('0', '1',...)
447 296
     if input == None:
448
-        saved_plots = {"all_epochs":plots, "Ln_Brks":Ln_Brks,
297
+        saved_plots = {"S":S, "S0":S0, "L":L, "all_epochs":plots, "Ln_Brks":Ln_Brks,
449 298
                         "AIC_Brks":AIC_Brks, "best_Ln":best_Ln,
450
-                        "best_AIC":best_AIC}
299
+                        "best_AIC":best_AIC, "best_epoch_by_AIC":selected_brks_nb}
451 300
     else:
452 301
         # if the dict has to be loaded from input
453 302
         with open(input, 'r') as json_file:
454 303
             saved_plots = json.load(json_file)
304
+            saved_plots["S"] = S
305
+            saved_plots["S0"] = S0
306
+            saved_plots["L"] = L
455 307
             saved_plots["all_epochs"] = plots
456 308
             saved_plots["Ln_Brks"] = Ln_Brks
457 309
             saved_plots["AIC_Brks"] = AIC_Brks
458 310
             saved_plots["best_Ln"] = best_Ln
459 311
             saved_plots["best_AIC"] = best_AIC
312
+            saved_plots["best_epoch_by_AIC"] = selected_brks_nb
460 313
     if output == None:
461 314
         output = title+"_plotdata.json"
462 315
     with open(output, 'w') as json_file:
@@ -573,7 +426,7 @@ def save_k_theta(folder_path, mu, tgen, title = "Title", theta_scale = True,
573 426
         json.dump(saved_plots, json_file)
574 427
     return saved_plots
575 428
 
576
-def plot_scaled_theta(plot_lines, prop, title, ax = None, n_ticks = 10):
429
+def plot_scaled_theta(plot_lines, prop, title, ax = None, n_ticks = 10, subset = None):
577 430
     # fig 2 & 3
578 431
     if ax is None:
579 432
         my_dpi = 300
@@ -589,13 +442,21 @@ def plot_scaled_theta(plot_lines, prop, title, ax = None, n_ticks = 10):
589 442
     lines_fig2 = []
590 443
     lines_fig3 = []
591 444
     #plt.figure(figsize=(5000/my_dpi, 2800/my_dpi), dpi=my_dpi)
592
-    for epoch, plot in enumerate(plot_lines):
445
+    nb_breaks = len(plot_lines)
446
+    for breaks, plot in enumerate(plot_lines):
447
+        if subset is not None:
448
+            if breaks not in subset :
449
+                # skip if not in subset
450
+                if max(subset) > nb_breaks and breaks == nb_breaks-1:
451
+                    pass
452
+                else:
453
+                    continue
593 454
         x,y=plot
594 455
         x2_plot, y2_plot = plot_straight_x_y(x,y)
595
-        p2, = ax2.plot(x2_plot, y2_plot, 'o', linestyle="-", alpha=0.75, lw=2, label = str(epoch)+' brks')
456
+        p2, = ax2.plot(x2_plot, y2_plot, 'o', linestyle="-", alpha=0.75, lw=2, label = str(breaks)+' brks')
596 457
         lines_fig2.append(p2)
597 458
         # Plotting (fig 3) which is the same but log scale for x
598
-        p3, = ax3.plot(x2_plot, y2_plot, 'o', linestyle="-", alpha=0.75, lw=2, label = str(epoch)+' brks')
459
+        p3, = ax3.plot(x2_plot, y2_plot, 'o', linestyle="-", alpha=0.75, lw=2, label = str(breaks)+' brks')
599 460
         lines_fig3.append(p3)
600 461
     ax2.set_xlabel("Relative scale", fontsize=fnt_size)
601 462
     ax2.set_ylabel("theta", fontsize=fnt_size)
@@ -666,24 +527,9 @@ def plot_raw_stairs(plot_lines, prop, title, ax = None, n_ticks = 10):
666 527
     # return plots
667 528
     return ax
668 529
 
669
-def combined_plot(folder_path, mu, tgen, breaks, title = "Title", theta_scale = True):
530
+def combined_plot(folder_path, mu, tgen, breaks, title = "Title", theta_scale = True, selected_breaks = []):
670 531
     my_dpi = 300
671
-    # # Add some extra space for the second axis at the bottom
672
-    # #plt.rcParams['font.size'] = 18
673
-    # fig, axs = plt.subplots(2, 2, figsize=(5000/my_dpi, 2970/my_dpi), dpi=my_dpi)
674
-    # #plt.rcParams['font.size'] = 12
675
-    # ax = plot_all_epochs_thetafolder(folder_path, mu, tgen, title, theta_scale, ax = axs)
676
-    # ax = plot_test_theta(folder_path, mu, tgen, title, theta_scale, breaks_max = breaks, ax = axs)
677
-    # # Adjust layout to prevent clipping of titles
678
-    #
679 532
 
680
-    # # Save the entire grid as a single figure
681
-    # plt.savefig(title+'_combined.pdf')
682
-    # plt.clf()
683
-    # # # second call for individual plots
684
-    # # plot_all_epochs_thetafolder(folder_path, mu, tgen, title, theta_scale, ax = None)
685
-    # # plot_test_theta(folder_path, mu, tgen, title, theta_scale, breaks_max = breaks, ax = None)
686
-    # # plt.clf()
687 533
     save_k_theta(folder_path, mu, tgen, title, theta_scale, breaks_max = breaks, output = title+"_plotdata.json")
688 534
     save_all_epochs_thetafolder(folder_path, mu, tgen, title, theta_scale, input = title+"_plotdata.json", output = title+"_plotdata.json")
689 535
 
@@ -699,12 +545,15 @@ def combined_plot(folder_path, mu, tgen, breaks, title = "Title", theta_scale =
699 545
     # fig2.tight_layout()
700 546
     ax1 = plot_raw_stairs(plot_lines = loaded_data['raw_stairs'],
701 547
                             prop = loaded_data['prop'], title = title, ax = ax1)
702
-
703 548
     ax1 = plot_scaled_theta(plot_lines = loaded_data['scaled_stairs'],
704
-                            prop = loaded_data['prop'], title = title, ax = ax1)
549
+                                prop = loaded_data['prop'], title = title, ax = ax1, subset=[loaded_data['best_epoch_by_AIC']]+selected_breaks)
550
+    ax2 = plot_scaled_theta(plot_lines = loaded_data['scaled_stairs'],
551
+                            prop = loaded_data['prop'], title = title, ax = ax2)
705 552
     ax1, ax2 = plot_all_epochs_thetafolder(loaded_data, mu, tgen, title, theta_scale, ax = [ax1, ax2])
706 553
     fig1.savefig(title+'_combined_p1.pdf')
554
+    print("Wrote", title+'_combined_p1.pdf')
707 555
     fig2.savefig(title+'_combined_p2.pdf')
556
+    print("Wrote", title+'_combined_p2.pdf')
708 557
     plot_raw_stairs(plot_lines = loaded_data['raw_stairs'],
709 558
                             prop = loaded_data['prop'], title = title, ax = None)
710 559
     plot_scaled_theta(plot_lines = loaded_data['scaled_stairs'],
@@ -712,6 +561,7 @@ def combined_plot(folder_path, mu, tgen, breaks, title = "Title", theta_scale =
712 561
 
713 562
     plt.close(fig1)
714 563
     plt.close(fig2)
564
+
715 565
 if __name__ == "__main__":
716 566
 
717 567
     if len(sys.argv) != 4: