1 year ago · 8c8545a9ff
--- a/swp2.py
+++ b/swp2.py
 
															     x_1.append(x[-1])
														
 
															     return x_1, y_1
														
 
															-def plot_all_epochs_thetafolder_old(folder_path, mu, tgen, title = "Title",
														
 
															-    theta_scale = True, ax = None, input = None, output = None):
														
 
															-    #scenari = {}
														
 
															-    cpt = 0
														
 
															-    epochs = {}
														
 
															-    for file_name in os.listdir(folder_path):
														
 
															-        breaks = 0
														
 
															-        cpt +=1
														
 
															-        if os.path.isfile(os.path.join(folder_path, file_name)):
														
 
															-            x, y, likelihood, theta, sfs, L = parse_stwp_theta_file(folder_path+file_name, breaks = breaks,
														
 
															-                                                             tgen = tgen,
														
 
															-                                                             mu = mu, relative_theta_scale = theta_scale)
														
 
															-            SFS_stored = sfs
														
 
															-            L_stored = L
														
 
															-            while not (x == 0 and y == 0):
														
 
															-                if breaks not in epochs.keys():
														
 
															-                    epochs[breaks] = {}
														
 
															-                epochs[breaks][likelihood] = x,y
														
 
															-                breaks += 1
														
 
															-                x,y,likelihood,theta,sfs,L = parse_stwp_theta_file(folder_path+file_name, breaks = breaks,
														
 
															-                                                                 tgen = tgen,
														
 
															-                                                                  mu = mu, relative_theta_scale = theta_scale)
														
 
															-            if x == 0:
														
 
															-                # last break did not work, then breaks = breaks-1
														
 
															-                breaks -= 1
														
 
															-    print("\n*******\n"+title+"\n--------\n"+"mu="+str(mu)+"\ntgen="+str(tgen)+"\nbreaks="+str(breaks)+"\n*******\n")
														
 
															-    print(cpt, "theta file(s) have been scanned.")
														
 
															-    my_dpi = 300
														
 
															-    if ax is None:
														
 
															-        # intialize figure
														
 
															-        my_dpi = 300
														
 
															-        fnt_size = 18
														
 
															-        # plt.rcParams['font.size'] = fnt_size
														
 
															-        fig, ax1 = plt.subplots(figsize=(5000/my_dpi, 2800/my_dpi), dpi=my_dpi)
														
 
															-    else:
														
 
															-        fnt_size = 12
														
 
															-        # plt.rcParams['font.size'] = fnt_size
														
 
															-        ax1 = ax[1][0,0]
														
 
															-    ax1.set_yscale('log')
														
 
															-    ax1.set_xscale('log')
														
 
															-    ax1.grid(True,which="both", linestyle='--', alpha = 0.3)
														
 
															-    brkpt_lik = []
														
 
															-    top_plots = {}
														
 
															-    for epoch, scenari in epochs.items():
														
 
															-        # sort starting by the smallest -log(Likelihood)
														
 
															-        best10_scenari = (sorted(list(scenari.keys())))[:10]
														
 
															-        greatest_likelihood = best10_scenari[0]
														
 
															-        # store the tuple breakpoints and likelihood for later plot
														
 
															-        brkpt_lik.append((epoch, greatest_likelihood))
														
 
															-        x, y = scenari[greatest_likelihood]
														
 
															-        #without breakpoint
														
 
															-        if epoch == 0:
														
 
															-            # do something with the theta without bp and skip the plotting
														
 
															-            N0 = y[0]
														
 
															-            #continue
														
 
															-        for i in range(len(y)):
														
 
															-            # divide by N0
														
 
															-            y[i] = y[i]/N0
														
 
															-            x[i] = x[i]/N0
														
 
															-        top_plots[greatest_likelihood] = x,y,epoch
														
 
															-    plots_likelihoods = list(top_plots.keys())
														
 
															-    for i in range(len(plots_likelihoods)):
														
 
															-        plots_likelihoods[i] = float(plots_likelihoods[i])
														
 
															-    best10_plots = sorted(plots_likelihoods)[:10]
														
 
															-    top_plot_lik = str(best10_plots[0])
														
 
															-    plot_handles = []
														
 
															-    # plt.rcParams['font.size'] = fnt_size
														
 
															-    p0, = ax1.plot(top_plots[top_plot_lik][0], top_plots[top_plot_lik][1], 'o', linestyle = "-",
														
 
															-    alpha=1, lw=2, label = str(top_plots[top_plot_lik][2])+' brks | Lik='+top_plot_lik)
														
 
															-    plot_handles.append(p0)
														
 
															-    for k, plot_Lk in enumerate(best10_plots[1:]):
														
 
															-        plot_Lk = str(plot_Lk)
														
 
															-        # plt.rcParams['font.size'] = fnt_size
														
 
															-        p, = ax1.plot(top_plots[plot_Lk][0], top_plots[plot_Lk][1], 'o', linestyle = "--",
														
 
															-        alpha=1/(k+1), lw=1.5, label = str(top_plots[plot_Lk][2])+' brks | Lik='+plot_Lk)
														
 
															-        plot_handles.append(p)
														
 
															-    if theta_scale:
														
 
															-        ax1.set_xlabel("Coal. time", fontsize=fnt_size)
														
 
															-        ax1.set_ylabel("Pop. size scaled by N0", fontsize=fnt_size)
														
 
															-        # recent_scale_lower_bound = 0.01
														
 
															-        # recent_scale_upper_bound = 0.1
														
 
															-        # ax1.axvline(x=recent_scale_lower_bound)
														
 
															-        # ax1.axvline(x=recent_scale_upper_bound)
														
 
															-    else:
														
 
															-        # years
														
 
															-        plt.set_xlabel("Time (years)", fontsize=fnt_size)
														
 
															-        plt.set_ylabel("Individuals (N)", fontsize=fnt_size)
														
 
															-    # plt.rcParams['font.size'] = fnt_size
														
 
															-    # print(fnt_size, "rcParam font.size=", plt.rcParams['font.size'])
														
 
															-    ax1.legend(handles = plot_handles, loc='best', fontsize = fnt_size*0.5)
														
 
															-    ax1.set_title(title)
														
 
															-    if ax is None:
														
 
															-        plt.savefig(title+'_b'+str(breaks)+'.pdf')
														
 
															-    # plot likelihood against nb of breakpoints
														
 
															-    # best possible likelihood from SFS
														
 
															-    # Segregating sites
														
 
															-    S = sum(SFS_stored)
														
 
															-    # Number of kept sites from which the SFS is computed
														
 
															-    L = L_stored
														
 
															-    # number of monomorphic sites
														
 
															-    S0 = L-S
														
 
															-    # print("SFS", SFS_stored)
														
 
															-    # print("S", S, "L", L, "S0=", S0)
														
 
															-    # compute Ln
														
 
															-    Ln = log_facto(S+S0) - log_facto(S0) + np.log(float(S0)/(S+S0)) * S0
														
 
															-    for xi in range(0, len(SFS_stored)):
														
 
															-        p_i = SFS_stored[xi] / float(S+S0)
														
 
															-        Ln += np.log(p_i) * SFS_stored[xi] - log_facto(SFS_stored[xi])
														
 
															-    # basic plot likelihood
														
 
															-    if ax is None:
														
 
															-        fig, ax2 = plt.subplots(figsize=(5000/my_dpi, 2800/my_dpi), dpi=my_dpi)
														
 
															-        # plt.rcParams['font.size'] = fnt_size
														
 
															-    else:
														
 
															-        #plt.rcParams['font.size'] = fnt_size
														
 
															-        ax2 = ax[0][0,1]
														
 
															-    ax2.plot(np.array(brkpt_lik)[:, 0], np.array(brkpt_lik)[:, 1].astype(float), 'o', linestyle = "dotted", lw=2)
														
 
															-    ax2.axhline(y=-Ln, linestyle = "-.", color = "red", label = "$-\log\mathcal{L}$ = "+str(round(-Ln, 2)))
														
 
															-    ax2.set_yscale('log')
														
 
															-    ax2.set_xlabel("# breakpoints", fontsize=fnt_size)
														
 
															-    ax2.set_ylabel("$-\log\mathcal{L}$", fontsize=fnt_size)
														
 
															-    ax2.legend(loc='best', fontsize = fnt_size*0.5)
														
 
															-    ax2.set_title(title+" Likelihood gain from # breakpoints")
														
 
															-    if ax is None:
														
 
															-        plt.savefig(title+'_Breakpts_Likelihood.pdf')
														
 
															-    # AIC
														
 
															-    if ax is None:
														
 
															-        fig, ax3 = plt.subplots(figsize=(5000/my_dpi, 2800/my_dpi), dpi=my_dpi)
														
 
															-        # plt.rcParams['font.size'] = '18'
														
 
															-    else:
														
 
															-        #plt.rcParams['font.size'] = fnt_size
														
 
															-        ax3 = ax[1][0,1]
														
 
															-    AIC = []
														
 
															-    for brk in np.array(brkpt_lik)[:, 0]:
														
 
															-        brk = int(brk)
														
 
															-        AIC.append((2*brk+1)+2*np.array(brkpt_lik)[brk, 1].astype(float))
														
 
															-    ax3.plot(np.array(brkpt_lik)[:, 0], AIC, 'o', linestyle = "dotted", lw=2)
														
 
															-    # AIC = 2*k - 2ln(L) ; where k is the number of parameters, here brks+1
														
 
															-    AIC_ln = 2*(len(brkpt_lik)+1) - 2*Ln
														
 
															-    ax3.axhline(y=AIC_ln, linestyle = "-.", color = "red",
														
 
															-    label = "Min. AIC = "+str(round(AIC_ln, 2)))
														
 
															-    selected_brks_nb = AIC.index(min(AIC))
														
 
															-    ax3.set_yscale('log')
														
 
															-    ax3.set_xlabel("# breakpoints", fontsize=fnt_size)
														
 
															-    ax3.set_ylabel("AIC")
														
 
															-    ax3.legend(loc='best', fontsize = fnt_size*0.5)
														
 
															-    ax3.set_title(title+" AIC")
														
 
															-    if ax is None:
														
 
															-        plt.savefig(title+'_Breakpts_Likelihood_AIC.pdf')
														
 
															-    print("S", S)
														
 
															-    # return plots
														
 
															-    return ax[0], ax[1]
														
 
															-
														
 
															 def plot_all_epochs_thetafolder(full_dict, mu, tgen, title = "Title",
														
 
															     theta_scale = True, ax = None, input = None, output = None):
														
 
															     my_dpi = 300
														
 
															     # AIC = 2*k - 2ln(L) ; where k is the number of parameters, here brks+1
														
 
															     AIC_ln = 2*(len(brkpt_lik)+1) - 2*Ln
														
 
															     best_AIC = AIC_ln
														
 
															+    selected_brks_nb = AIC.index(min(AIC))
														
 
															     # to return : plots ; Ln_Brks ; AIC_Brks ; best_Ln ; best_AIC
														
 
															     # 'plots' dict keys: 'best', {epochs}('0', '1',...)
														
 
															     if input == None:
														
 
															-        saved_plots = {"all_epochs":plots, "Ln_Brks":Ln_Brks,
														
 
															+        saved_plots = {"S":S, "S0":S0, "L":L, "all_epochs":plots, "Ln_Brks":Ln_Brks,
														
 
															                         "AIC_Brks":AIC_Brks, "best_Ln":best_Ln,
														
 
															-                        "best_AIC":best_AIC}
														
 
															+                        "best_AIC":best_AIC, "best_epoch_by_AIC":selected_brks_nb}
														
 
															     else:
														
 
															         # if the dict has to be loaded from input
														
 
															         with open(input, 'r') as json_file:
														
 
															             saved_plots = json.load(json_file)
														
 
															+            saved_plots["S"] = S
														
 
															+            saved_plots["S0"] = S0
														
 
															+            saved_plots["L"] = L
														
 
															             saved_plots["all_epochs"] = plots
														
 
															             saved_plots["Ln_Brks"] = Ln_Brks
														
 
															             saved_plots["AIC_Brks"] = AIC_Brks
														
 
															             saved_plots["best_Ln"] = best_Ln
														
 
															             saved_plots["best_AIC"] = best_AIC
														
 
															+            saved_plots["best_epoch_by_AIC"] = selected_brks_nb
														
 
															     if output == None:
														
 
															         output = title+"_plotdata.json"
														
 
															     with open(output, 'w') as json_file:
														
 
															         json.dump(saved_plots, json_file)
														
 
															     return saved_plots
														
 
															-def plot_scaled_theta(plot_lines, prop, title, ax = None, n_ticks = 10):
														
 
															+def plot_scaled_theta(plot_lines, prop, title, ax = None, n_ticks = 10, subset = None):
														
 
															     # fig 2 & 3
														
 
															     if ax is None:
														
 
															         my_dpi = 300
														
 
															     lines_fig2 = []
														
 
															     lines_fig3 = []
														
 
															     #plt.figure(figsize=(5000/my_dpi, 2800/my_dpi), dpi=my_dpi)
														
 
															-    for epoch, plot in enumerate(plot_lines):
														
 
															+    nb_breaks = len(plot_lines)
														
 
															+    for breaks, plot in enumerate(plot_lines):
														
 
															+        if subset is not None:
														
 
															+            if breaks not in subset :
														
 
															+                # skip if not in subset
														
 
															+                if max(subset) > nb_breaks and breaks == nb_breaks-1:
														
 
															+                    pass
														
 
															+                else:
														
 
															+                    continue
														
 
															         x,y=plot
														
 
															         x2_plot, y2_plot = plot_straight_x_y(x,y)
														
 
															-        p2, = ax2.plot(x2_plot, y2_plot, 'o', linestyle="-", alpha=0.75, lw=2, label = str(epoch)+' brks')
														
 
															+        p2, = ax2.plot(x2_plot, y2_plot, 'o', linestyle="-", alpha=0.75, lw=2, label = str(breaks)+' brks')
														
 
															         lines_fig2.append(p2)
														
 
															         # Plotting (fig 3) which is the same but log scale for x
														
 
															-        p3, = ax3.plot(x2_plot, y2_plot, 'o', linestyle="-", alpha=0.75, lw=2, label = str(epoch)+' brks')
														
 
															+        p3, = ax3.plot(x2_plot, y2_plot, 'o', linestyle="-", alpha=0.75, lw=2, label = str(breaks)+' brks')
														
 
															         lines_fig3.append(p3)
														
 
															     ax2.set_xlabel("Relative scale", fontsize=fnt_size)
														
 
															     ax2.set_ylabel("theta", fontsize=fnt_size)
														
 
															     # return plots
														
 
															     return ax
														
 
															-def combined_plot(folder_path, mu, tgen, breaks, title = "Title", theta_scale = True):
														
 
															+def combined_plot(folder_path, mu, tgen, breaks, title = "Title", theta_scale = True, selected_breaks = []):
														
 
															     my_dpi = 300
														
 
															-    # # Add some extra space for the second axis at the bottom
														
 
															-    # #plt.rcParams['font.size'] = 18
														
 
															-    # fig, axs = plt.subplots(2, 2, figsize=(5000/my_dpi, 2970/my_dpi), dpi=my_dpi)
														
 
															-    # #plt.rcParams['font.size'] = 12
														
 
															-    # ax = plot_all_epochs_thetafolder(folder_path, mu, tgen, title, theta_scale, ax = axs)
														
 
															-    # ax = plot_test_theta(folder_path, mu, tgen, title, theta_scale, breaks_max = breaks, ax = axs)
														
 
															-    # # Adjust layout to prevent clipping of titles
														
 
															-    #
														
 
															-    # # Save the entire grid as a single figure
														
 
															-    # plt.savefig(title+'_combined.pdf')
														
 
															-    # plt.clf()
														
 
															-    # # # second call for individual plots
														
 
															-    # # plot_all_epochs_thetafolder(folder_path, mu, tgen, title, theta_scale, ax = None)
														
 
															-    # # plot_test_theta(folder_path, mu, tgen, title, theta_scale, breaks_max = breaks, ax = None)
														
 
															-    # # plt.clf()
														
 
															     save_k_theta(folder_path, mu, tgen, title, theta_scale, breaks_max = breaks, output = title+"_plotdata.json")
														
 
															     save_all_epochs_thetafolder(folder_path, mu, tgen, title, theta_scale, input = title+"_plotdata.json", output = title+"_plotdata.json")
														
 
															     # fig2.tight_layout()
														
 
															     ax1 = plot_raw_stairs(plot_lines = loaded_data['raw_stairs'],
														
 
															                             prop = loaded_data['prop'], title = title, ax = ax1)
														
 
															-
														
 
															     ax1 = plot_scaled_theta(plot_lines = loaded_data['scaled_stairs'],
														
 
															-                            prop = loaded_data['prop'], title = title, ax = ax1)
														
 
															+                                prop = loaded_data['prop'], title = title, ax = ax1, subset=[loaded_data['best_epoch_by_AIC']]+selected_breaks)
														
 
															+    ax2 = plot_scaled_theta(plot_lines = loaded_data['scaled_stairs'],
														
 
															+                            prop = loaded_data['prop'], title = title, ax = ax2)
														
 
															     ax1, ax2 = plot_all_epochs_thetafolder(loaded_data, mu, tgen, title, theta_scale, ax = [ax1, ax2])
														
 
															     fig1.savefig(title+'_combined_p1.pdf')
														
 
															+    print("Wrote", title+'_combined_p1.pdf')
														
 
															     fig2.savefig(title+'_combined_p2.pdf')
														
 
															+    print("Wrote", title+'_combined_p2.pdf')
														
 
															     plot_raw_stairs(plot_lines = loaded_data['raw_stairs'],
														
 
															                             prop = loaded_data['prop'], title = title, ax = None)
														
 
															     plot_scaled_theta(plot_lines = loaded_data['scaled_stairs'],
														
 
															     plt.close(fig1)
														
 
															     plt.close(fig2)
														
 
															+
														
 
															 if __name__ == "__main__":
														
 
															     if len(sys.argv) != 4: