1 year ago · eb2799bc98
--- a/swp2.py
+++ b/swp2.py
@@ -2,10 +2,11 @@ import matplotlib.pyplot as plt
 
				 import os
			
 
				 import numpy as np
			
 
				 import math
			
 
				+import json
			
 
				+import io
			
 
				 from scipy.special import gammaln
			
 
				 from matplotlib.backends.backend_pdf import PdfPages
			
 
				 from matplotlib.ticker import MaxNLocator
			
 
				-import io
			
 
				 from mpl_toolkits.axes_grid1.inset_locator import inset_axes
			
 
				 from matplotlib.ticker import MultipleLocator
			
 
				 def log_facto(k):
			
@@ -197,8 +198,6 @@ def plot_k_epochs_thetafolder(folder_path, mu, tgen, breaks = 2, title = "Title"
 
				     my_dpi = 300
			
 
				     plt.figure(figsize=(5000/my_dpi, 2800/my_dpi), dpi=my_dpi)
			
 
				     plt.plot(x, y, 'r-', lw=2, label = 'Lik='+greatest_likelihood)
			
 
				-    plt.xlim(1e-3, 1)
			
 
				-    plt.ylim(0, 10)
			
 
				     #plt.yscale('log')
			
 
				     plt.xscale('log')
			
 
				     plt.grid(True,which="both", linestyle='--', alpha = 0.3)
			
@@ -271,7 +270,6 @@ def plot_all_epochs_thetafolder(folder_path, mu, tgen, title = "Title", theta_sc
 
				         fnt_size = 12
			
 
				         # plt.rcParams['font.size'] = fnt_size
			
 
				         ax1 = ax[0,0]
			
 
				-    #ax1.set_xlim(1e-3, 1)
			
 
				     ax1.set_yscale('log')
			
 
				     ax1.set_xscale('log')
			
 
				     ax1.grid(True,which="both", linestyle='--', alpha = 0.3)
			
@@ -321,7 +319,6 @@ def plot_all_epochs_thetafolder(folder_path, mu, tgen, title = "Title", theta_sc
 
				         # years
			
 
				         plt.set_xlabel("Time (years)", fontsize=fnt_size)
			
 
				         plt.set_ylabel("Individuals (N)", fontsize=fnt_size)
			
 
				-        ax1.set_xlim(1e-5, 1)
			
 
				     # plt.rcParams['font.size'] = fnt_size
			
 
				     # print(fnt_size, "rcParam font.size=", plt.rcParams['font.size'])
			
 
				     ax1.legend(handles = plot_handles, loc='best', fontsize = fnt_size*0.5)
			
@@ -382,6 +379,189 @@ def plot_all_epochs_thetafolder(folder_path, mu, tgen, title = "Title", theta_sc
 
				     # return plots
			
 
				     return ax
			
 
				 
			
 
				+def save_k_theta(folder_path, mu, tgen, title = "Title", theta_scale = True,
			
 
				+    breaks_max = 10, output = None):
			
 
				+    """
			
 
				+    Save theta values as is to do basic plots.
			
 
				+    """
			
 
				+    cpt = 0
			
 
				+    epochs = {}
			
 
				+    len_sfs = 0
			
 
				+    for file_name in os.listdir(folder_path):
			
 
				+        cpt +=1
			
 
				+        if os.path.isfile(os.path.join(folder_path, file_name)):
			
 
				+            for k in range(breaks_max):
			
 
				+                thetas,sfs = return_x_y_from_stwp_theta_file_as_is(folder_path+file_name, breaks = k,
			
 
				+                                                                 tgen = tgen,
			
 
				+                                                                 mu = mu, relative_theta_scale = theta_scale)
			
 
				+                if thetas == 0:
			
 
				+                    continue
			
 
				+                if len(thetas)-1 != k:
			
 
				+                    continue
			
 
				+                if k not in epochs.keys():
			
 
				+                    epochs[k] = {}
			
 
				+                likelihood = str(eval(thetas[k][2]))
			
 
				+                epochs[k][likelihood] = thetas
			
 
				+                #epochs[k] = thetas
			
 
				+    print("\n*******\n"+title+"\n--------\n"+"mu="+str(mu)+"\ntgen="+str(tgen)+"\nbreaks="+str(k)+"\n*******\n")
			
 
				+    print(cpt, "theta file(s) have been scanned.")
			
 
				+    plots = []
			
 
				+    best_epochs = {}
			
 
				+    for epoch in epochs:
			
 
				+        likelihoods = []
			
 
				+        for key in epochs[epoch].keys():
			
 
				+            likelihoods.append(key)
			
 
				+        likelihoods.sort()
			
 
				+        minLogLn = str(likelihoods[0])
			
 
				+        best_epochs[epoch] = epochs[epoch][minLogLn]
			
 
				+    for epoch, theta in best_epochs.items():
			
 
				+        groups = np.array(list(theta.values()), dtype=object)[:, 1].tolist()
			
 
				+        x = []
			
 
				+        y = []
			
 
				+        thetas = np.array(list(theta.values()), dtype=object)[:, 0]
			
 
				+        for i,group in enumerate(groups):
			
 
				+            x += group[::-1]
			
 
				+            y += list(np.repeat(thetas[i], len(group)))
			
 
				+            if epoch == 0:
			
 
				+                N0 = y[0]
			
 
				+                # compute the proportion of information used at each bin of the SFS
			
 
				+                sum_theta_i = 0
			
 
				+                for i in range(2, len(y)+2):
			
 
				+                    sum_theta_i+=y[i-2] / (i-1)
			
 
				+                prop = []
			
 
				+                for k in range(2, len(y)+2):
			
 
				+                    prop.append(y[k-2] / (k - 1) / sum_theta_i)
			
 
				+                prop = prop[::-1]
			
 
				+        # normalise to N0 (N0 of epoch1)
			
 
				+        for i in range(len(y)):
			
 
				+            y[i] = y[i]/N0
			
 
				+        # x_plot, y_plot = plot_straight_x_y(x, y)
			
 
				+        p = x, y
			
 
				+        # add plot to the list of all plots to superimpose
			
 
				+        plots.append(p)
			
 
				+    cumul = 0
			
 
				+    prop_cumul = []
			
 
				+    for val in prop:
			
 
				+        prop_cumul.append(val+cumul)
			
 
				+        cumul = val+cumul
			
 
				+    prop = prop_cumul
			
 
				+
			
 
				+    lines_fig2 = []
			
 
				+    for epoch, theta in best_epochs.items():
			
 
				+        groups = np.array(list(theta.values()), dtype=object)[:, 1].tolist()
			
 
				+        x = []
			
 
				+        y = []
			
 
				+        thetas = np.array(list(theta.values()), dtype=object)[:, 0]
			
 
				+        for i,group in enumerate(groups):
			
 
				+            x += group[::-1]
			
 
				+            y += list(np.repeat(thetas[i], len(group)))
			
 
				+            if epoch == 0:
			
 
				+                N0 = y[0]
			
 
				+        for i in range(len(y)):
			
 
				+            y[i] = y[i]/N0
			
 
				+        x_2 = []
			
 
				+        T = 0
			
 
				+        for i in range(len(x)):
			
 
				+            x[i] = int(x[i])
			
 
				+        # compute the times as: theta_k / (k*(k-1))
			
 
				+        for i in range(0, len(x)):
			
 
				+            T += y[i] / (x[i]*(x[i]-1))
			
 
				+            x_2.append(T)
			
 
				+        # Save plotting (fig 2)
			
 
				+        x_2 = [0]+x_2
			
 
				+        y = [y[0]]+y
			
 
				+        # x2_plot, y2_plot = plot_straight_x_y(x_2, y)
			
 
				+        p2 = x_2, y
			
 
				+        lines_fig2.append(p2)
			
 
				+
			
 
				+    saved_plots = {"raw_stairs":plots, "scaled_stairs":lines_fig2,
			
 
				+                    "prop":prop}
			
 
				+    if output == None:
			
 
				+        output = title+"_plotdata.json"
			
 
				+    with open(output, 'w') as json_file:
			
 
				+        json.dump(saved_plots, json_file)
			
 
				+    return saved_plots
			
 
				+
			
 
				+def plot_raw_stairs(plot_lines, plot_lines2, prop, title, ax = None, n_ticks = 10):
			
 
				+    # multiple fig
			
 
				+    if ax is None:
			
 
				+        # intialize figure 1
			
 
				+        my_dpi = 300
			
 
				+        fnt_size = 18
			
 
				+        # plt.rcParams['font.size'] = fnt_size
			
 
				+        fig, ax1 = plt.subplots(figsize=(5000/my_dpi, 2800/my_dpi), dpi=my_dpi)
			
 
				+    else:
			
 
				+        fnt_size = 12
			
 
				+        # plt.rcParams['font.size'] = fnt_size
			
 
				+        ax1 = ax[0, 1]
			
 
				+        plt.subplots_adjust(wspace=0.3, hspace=0.3)
			
 
				+    plots = []
			
 
				+
			
 
				+    for epoch, plot in enumerate(plot_lines):
			
 
				+        x,y = plot
			
 
				+        x_plot, y_plot = plot_straight_x_y(x,y)
			
 
				+        p, = ax1.plot(x_plot, y_plot, 'o', linestyle="-", alpha=0.75, lw=2, label = str(epoch)+' brks')
			
 
				+
			
 
				+        # add plot to the list of all plots to superimpose
			
 
				+        plots.append(p)
			
 
				+    x_ticks = x
			
 
				+    # print(x_ticks)
			
 
				+    #print(prop, "\n", sum(prop))
			
 
				+    #ax.legend(handles=[p0]+plots)
			
 
				+    ax1.set_xlabel("# bin", fontsize=fnt_size)
			
 
				+    # Set the x-axis locator to reduce the number of ticks to 10
			
 
				+    ax1.set_ylabel("theta", fontsize=fnt_size)
			
 
				+    ax1.set_title("Title", fontsize=fnt_size)
			
 
				+    ax1.legend(handles=plots, loc='best', fontsize = fnt_size*0.5)
			
 
				+    ax1.set_xticks(x_ticks)
			
 
				+    step = len(x_ticks)//(n_ticks-1)
			
 
				+    values = x_ticks[::step]
			
 
				+    new_prop = []
			
 
				+    for val in values:
			
 
				+        new_prop.append(prop[int(val)-2])
			
 
				+    new_prop = new_prop[::-1]
			
 
				+    ax1.set_xticks(values)
			
 
				+    ax1.set_xticklabels([f'{values[k]}\n{val:.2f}' for k, val in enumerate(new_prop)], fontsize = fnt_size*0.8)
			
 
				+    if ax is None:
			
 
				+        plt.savefig(title+'_raw'+str(k)+'.pdf')
			
 
				+    # fig 2 & 3
			
 
				+    if ax is None:
			
 
				+        fig2, ax2 = plt.subplots(figsize=(5000/my_dpi, 2800/my_dpi), dpi=my_dpi)
			
 
				+        fig3, ax3 = plt.subplots(figsize=(5000/my_dpi, 2800/my_dpi), dpi=my_dpi)
			
 
				+    else:
			
 
				+        # plt.rcParams['font.size'] = fnt_size
			
 
				+        # place of plots on the grid
			
 
				+        ax2 = ax[1,0]
			
 
				+        ax3 = ax[1,1]
			
 
				+    lines_fig2 = []
			
 
				+    lines_fig3 = []
			
 
				+    #plt.figure(figsize=(5000/my_dpi, 2800/my_dpi), dpi=my_dpi)
			
 
				+    for epoch, plot in enumerate(plot_lines2):
			
 
				+        x,y=plot
			
 
				+        x2_plot, y2_plot = plot_straight_x_y(x,y)
			
 
				+        p2, = ax2.plot(x2_plot, y2_plot, 'o', linestyle="-", alpha=0.75, lw=2, label = str(epoch)+' brks')
			
 
				+        lines_fig2.append(p2)
			
 
				+        # Plotting (fig 3) which is the same but log scale for x
			
 
				+        p3, = ax3.plot(x2_plot, y2_plot, 'o', linestyle="-", alpha=0.75, lw=2, label = str(epoch)+' brks')
			
 
				+        lines_fig3.append(p3)
			
 
				+    ax2.set_xlabel("Relative scale", fontsize=fnt_size)
			
 
				+    ax2.set_ylabel("theta", fontsize=fnt_size)
			
 
				+    ax2.set_title("Title", fontsize=fnt_size)
			
 
				+    ax2.legend(handles=lines_fig2, loc='best', fontsize = fnt_size*0.5)
			
 
				+    if ax is None:
			
 
				+        plt.savefig(title+'_plot2_'+str(k)+'.pdf')
			
 
				+    ax3.set_xscale('log')
			
 
				+    ax3.set_yscale('log')
			
 
				+    ax3.set_xlabel("log Relative scale", fontsize=fnt_size)
			
 
				+    ax3.set_ylabel("theta", fontsize=fnt_size)
			
 
				+    ax3.set_title("Title", fontsize=fnt_size)
			
 
				+    ax3.legend(handles=lines_fig3, loc='best', fontsize = fnt_size*0.5)
			
 
				+    if ax is None:
			
 
				+        plt.savefig(title+'_plot3_'+str(k)+'_log.pdf')
			
 
				+        plt.clf()
			
 
				+    # return plots
			
 
				+    return ax
			
 
				+
			
 
				 def plot_test_theta(folder_path, mu, tgen, title = "Title", theta_scale = True, breaks_max = 10, ax = None, n_ticks = 10):
			
 
				     """
			
 
				     Use theta values as is to do basic plots.
			
@@ -402,7 +582,7 @@ def plot_test_theta(folder_path, mu, tgen, title = "Title", theta_scale = True,
 
				                     continue
			
 
				                 if k not in epochs.keys():
			
 
				                     epochs[k] = {}
			
 
				-                likelihood = thetas[k][2]
			
 
				+                likelihood = str(eval(thetas[k][2]))
			
 
				                 epochs[k][likelihood] = thetas
			
 
				                 #epochs[k] = thetas
			
 
				     print("\n*******\n"+title+"\n--------\n"+"mu="+str(mu)+"\ntgen="+str(tgen)+"\nbreaks="+str(k)+"\n*******\n")
			
@@ -424,7 +604,7 @@ def plot_test_theta(folder_path, mu, tgen, title = "Title", theta_scale = True,
 
				     for epoch in epochs:
			
 
				         likelihoods = []
			
 
				         for key in epochs[epoch].keys():
			
 
				-            likelihoods.append(float(key))
			
 
				+            likelihoods.append(key)
			
 
				         likelihoods.sort()
			
 
				         minLogLn = str(likelihoods[0])
			
 
				         best_epochs[epoch] = epochs[epoch][minLogLn]
			
@@ -537,24 +717,35 @@ def plot_test_theta(folder_path, mu, tgen, title = "Title", theta_scale = True,
 
				 
			
 
				 def combined_plot(folder_path, mu, tgen, breaks, title = "Title", theta_scale = True):
			
 
				     my_dpi = 300
			
 
				-    # Add some extra space for the second axis at the bottom
			
 
				-    #plt.rcParams['font.size'] = 18
			
 
				-    fig, axs = plt.subplots(3, 2, figsize=(5000/my_dpi, 2970/my_dpi), dpi=my_dpi)
			
 
				-    #plt.rcParams['font.size'] = 12
			
 
				-    ax = plot_all_epochs_thetafolder(folder_path, mu, tgen, title, theta_scale, ax = axs)
			
 
				-    ax = plot_test_theta(folder_path, mu, tgen, title, theta_scale, breaks_max = breaks, ax = axs)
			
 
				-    # Adjust layout to prevent clipping of titles
			
 
				-    plt.tight_layout()
			
 
				-    # Adjust absolute space between the top and bottom rows
			
 
				-    #plt.subplots_adjust(hspace=0.7)  # Adjust this value based on your requirement
			
 
				-    # Save the entire grid as a single figure
			
 
				-    plt.savefig(title+'_combined.pdf')
			
 
				-    plt.clf()
			
 
				-    # # second call for individual plots
			
 
				-    # plot_all_epochs_thetafolder(folder_path, mu, tgen, title, theta_scale, ax = None)
			
 
				-    # plot_test_theta(folder_path, mu, tgen, title, theta_scale, breaks_max = breaks, ax = None)
			
 
				+    # # Add some extra space for the second axis at the bottom
			
 
				+    # #plt.rcParams['font.size'] = 18
			
 
				+    # fig, axs = plt.subplots(2, 2, figsize=(5000/my_dpi, 2970/my_dpi), dpi=my_dpi)
			
 
				+    # #plt.rcParams['font.size'] = 12
			
 
				+    # ax = plot_all_epochs_thetafolder(folder_path, mu, tgen, title, theta_scale, ax = axs)
			
 
				+    # ax = plot_test_theta(folder_path, mu, tgen, title, theta_scale, breaks_max = breaks, ax = axs)
			
 
				+    # # Adjust layout to prevent clipping of titles
			
 
				+    # plt.tight_layout()
			
 
				+    # # Adjust absolute space between the top and bottom rows
			
 
				+    # #plt.subplots_adjust(hspace=0.7)  # Adjust this value based on your requirement
			
 
				+    # # Save the entire grid as a single figure
			
 
				+    # plt.savefig(title+'_combined.pdf')
			
 
				     # plt.clf()
			
 
				+    # # # second call for individual plots
			
 
				+    # # plot_all_epochs_thetafolder(folder_path, mu, tgen, title, theta_scale, ax = None)
			
 
				+    # # plot_test_theta(folder_path, mu, tgen, title, theta_scale, breaks_max = breaks, ax = None)
			
 
				+    # # plt.clf()
			
 
				+    # save_k_theta(folder_path, mu, tgen, title, theta_scale, breaks_max = breaks, output = title+"_plotdata.json")
			
 
				+
			
 
				+    with open(title+"_plotdata.json", 'r') as json_file:
			
 
				+        loaded_data = json.load(json_file)
			
 
				+
			
 
				+    fig1, ax1 = plt.subplots(2, 2, figsize=(5000/my_dpi, 2970/my_dpi), dpi=my_dpi)
			
 
				+    # plot_test_theta(folder_path, mu, tgen, title, theta_scale, breaks_max = breaks, ax = ax1)
			
 
				+    ax1 = plot_raw_stairs(plot_lines = loaded_data['raw_stairs'], plot_lines2 = loaded_data['scaled_stairs'],
			
 
				+                            prop = loaded_data['prop'], title = title, ax = ax1)
			
 
				 
			
 
				+    plt.savefig(title+'_raw_scaled.pdf')
			
 
				+    fig1.clf()
			
 
				 if __name__ == "__main__":
			
 
				 
			
 
				     if len(sys.argv) != 4: