1 year ago · 463b9e63f9
--- a/swp2.py
+++ b/swp2.py
@@ -1,7 +1,8 @@
 
				 import matplotlib.pyplot as plt
			
 
				 import os
			
 
				+import numpy as np
			
 
				 
			
 
				-def return_x_y_from_stwp_theta_file(stwp_theta_file, breaks, mu, tgen):
			
 
				+def return_x_y_from_stwp_theta_file(stwp_theta_file, breaks, mu, tgen, relative_theta_scale = False):
			
 
				     with open(stwp_theta_file, "r") as swp_file:
			
 
				         # Read the first line
			
 
				         line = swp_file.readline()
			
@@ -53,18 +54,27 @@ def return_x_y_from_stwp_theta_file(stwp_theta_file, breaks, mu, tgen):
 
				         #t =
			
 
				         if len(group.split(',')) == 1:
			
 
				             k = i
			
 
				-            t[i] += ((theta_L[group_nb] ) / (k*(k-1)) * tgen) / mu
			
 
				+            if relative_theta_scale:
			
 
				+                t[i] += ((theta_L[group_nb] ) / (k*(k-1)))
			
 
				+            else:
			
 
				+                t[i] += ((theta_L[group_nb] ) / (k*(k-1)) * tgen) / mu
			
 
				         else:
			
 
				             for k in range(j, i-1, -1 ):
			
 
				-                t[i] += ((theta_L[group_nb] ) / (k*(k-1)) * tgen) / mu
			
 
				+                if relative_theta_scale:
			
 
				+                    t[i] += ((theta_L[group_nb] ) / (k*(k-1)))
			
 
				+                else:
			
 
				+                    t[i] += ((theta_L[group_nb] ) / (k*(k-1)) * tgen) / mu
			
 
				         # we add the cumulative times at the end
			
 
				         t[i] += sum_t
			
 
				         sum_t = t[i]
			
 
				     # build the y axis (sizes)
			
 
				     y = []
			
 
				     for theta in theta_L:
			
 
				-        # with size N = theta/4mu
			
 
				-        size = theta / (4*mu)
			
 
				+        if relative_theta_scale:
			
 
				+            size = theta
			
 
				+        else:
			
 
				+            # with size N = theta/4mu
			
 
				+            size = theta / (4*mu)
			
 
				         y.append(size)
			
 
				         y.append(size)
			
 
				     # build the time x axis
			
@@ -73,19 +83,64 @@ def return_x_y_from_stwp_theta_file(stwp_theta_file, breaks, mu, tgen):
 
				         x.append(list(t.values())[time])
			
 
				         x.append(list(t.values())[time])
			
 
				     x.append(list(t.values())[len(t.values())-1])
			
 
				+    # if relative_theta_scale:
			
 
				+    #     # rescale
			
 
				+    #     #N0 = y[0]
			
 
				+    #     # for i in range(len(y)):
			
 
				+    #     #     # divide by N0
			
 
				+    #     #     y[i] = y[i]/N0
			
 
				+    #     #     x[i] = x[i]/N0
			
 
				     return x,y,likelihood
			
 
				 
			
 
				-def plot_3epochs_thetafolder(folder_path, mu, tgen, breaks = 2, title = "Title"):
			
 
				+def return_x_y_from_stwp_theta_file_as_is(stwp_theta_file, breaks, mu, tgen, relative_theta_scale = False):
			
 
				+    with open(stwp_theta_file, "r") as swp_file:
			
 
				+        # Read the first line
			
 
				+        line = swp_file.readline()
			
 
				+        L = float(line.split()[2])
			
 
				+        # Process lines until the end of the file
			
 
				+        while line:
			
 
				+            # check at each line
			
 
				+            if line.startswith("dim") :
			
 
				+                dim = int(line.split()[1])
			
 
				+                if dim == breaks+1:
			
 
				+                    likelihood = line.split()[5]
			
 
				+                    groups = line.split()[6:6+dim]
			
 
				+                    theta_site = line.split()[6+dim:6+dim+1+dim]
			
 
				+                elif dim < breaks+1:
			
 
				+                    line = swp_file.readline()
			
 
				+                    continue
			
 
				+                elif dim > breaks+1:
			
 
				+                    break
			
 
				+                    #return 0,0,0
			
 
				+            # Read the next line
			
 
				+            line = swp_file.readline()
			
 
				+    #### END of parsing
			
 
				+    # quit this file if the number of dimensions is incorrect
			
 
				+    if dim < breaks+1:
			
 
				+        return 0,0,0
			
 
				+    # get n, the last bin of the last group
			
 
				+    # revert the list of groups as the most recent times correspond
			
 
				+    # to the closest and last leafs of the coal. tree.
			
 
				+    groups = groups[::-1]
			
 
				+    theta_site = theta_site[::-1]
			
 
				+
			
 
				+    thetas = {}
			
 
				 
			
 
				+    for i in range(len(groups)):
			
 
				+        groups[i] = groups[i].split(',')
			
 
				+        #print(groups[i], len(groups[i]))
			
 
				+        thetas[i] = [float(theta_site[i]), groups[i], likelihood]
			
 
				+    return thetas
			
 
				+
			
 
				+def plot_k_epochs_thetafolder(folder_path, mu, tgen, breaks = 2, title = "Title", theta_scale = True):
			
 
				     scenari = {}
			
 
				     cpt = 0
			
 
				-
			
 
				     for file_name in os.listdir(folder_path):
			
 
				         if os.path.isfile(os.path.join(folder_path, file_name)):
			
 
				             # Perform actions on each file
			
 
				             x,y,likelihood = return_x_y_from_stwp_theta_file(folder_path+file_name, breaks = breaks,
			
 
				                                                              tgen = tgen,
			
 
				-                                                             mu = mu)
			
 
				+                                                             mu = mu, relative_theta_scale = theta_scale)
			
 
				             if x == 0 or y == 0:
			
 
				                 continue
			
 
				             cpt +=1
			
@@ -93,6 +148,7 @@ def plot_3epochs_thetafolder(folder_path, mu, tgen, breaks = 2, title = "Title")
 
				     print("\n*******\n"+title+"\n--------\n"+"mu="+str(mu)+"\ntgen="+str(tgen)+"\nbreaks="+str(breaks)+"\n*******\n")
			
 
				     print(cpt, "theta file(s) have been scanned.")
			
 
				     # sort starting by the smallest -log(Likelihood)
			
 
				+    print(scenari)
			
 
				     best10_scenari = (sorted(list(scenari.keys())))[:10]
			
 
				     print("10 greatest Likelihoods", best10_scenari)
			
 
				     greatest_likelihood = best10_scenari[0]
			
@@ -100,21 +156,181 @@ def plot_3epochs_thetafolder(folder_path, mu, tgen, breaks = 2, title = "Title")
 
				     my_dpi = 300
			
 
				     plt.figure(figsize=(5000/my_dpi, 2800/my_dpi), dpi=my_dpi)
			
 
				     plt.plot(x, y, 'r-', lw=2, label = 'Lik='+greatest_likelihood)
			
 
				-    plt.yscale('log')
			
 
				-    #plt.xscale('log')
			
 
				-    plt.grid(True,which="both", linestyle='--')
			
 
				+    plt.xlim(1e-3, 1)
			
 
				+    plt.ylim(0, 10)
			
 
				+    #plt.yscale('log')
			
 
				+    plt.xscale('log')
			
 
				+    plt.grid(True,which="both", linestyle='--', alpha = 0.3)
			
 
				 
			
 
				     for scenario in best10_scenari[1:]:
			
 
				         x,y = scenari[scenario]
			
 
				         #print("\n----  Lik:",scenario,"\n\nt=", x,"\n\nN=",y, "\n\n")
			
 
				         plt.plot(x, y, '--', lw=1, label = 'Lik='+scenario)
			
 
				-    plt.ylabel("Individuals (N)")
			
 
				-    plt.xlabel("Time (years)")
			
 
				+    if theta_scale:
			
 
				+        plt.xlabel("Coal. time")
			
 
				+        plt.ylabel("Pop. size scaled by N0")
			
 
				+        recent_scale_lower_bound = y[0] * 0.01
			
 
				+        recent_scale_upper_bound = y[0] * 0.1
			
 
				+        plt.axvline(x=recent_scale_lower_bound)
			
 
				+        plt.axvline(x=recent_scale_upper_bound)
			
 
				+    else:
			
 
				+        # years
			
 
				+        plt.xlabel("Time (years)")
			
 
				+        plt.ylabel("Individuals (N)")
			
 
				     plt.legend(loc='upper right')
			
 
				     plt.title(title)
			
 
				-    #plt.gcf().set_size(1000, 500)
			
 
				     plt.savefig(title+'_b'+str(breaks)+'.pdf')
			
 
				 
			
 
				+def plot_all_epochs_thetafolder(folder_path, mu, tgen, title = "Title", theta_scale = True):
			
 
				+    #scenari = {}
			
 
				+    cpt = 0
			
 
				+    epochs = {}
			
 
				+    for file_name in os.listdir(folder_path):
			
 
				+        breaks = 0
			
 
				+        cpt +=1
			
 
				+        if os.path.isfile(os.path.join(folder_path, file_name)):
			
 
				+            x, y, likelihood = return_x_y_from_stwp_theta_file(folder_path+file_name, breaks = breaks,
			
 
				+                                                             tgen = tgen,
			
 
				+                                                             mu = mu, relative_theta_scale = theta_scale)
			
 
				+            while not (x == 0 and y == 0):
			
 
				+                if breaks not in epochs.keys():
			
 
				+                    epochs[breaks] = {}
			
 
				+                epochs[breaks][likelihood] = x,y
			
 
				+                breaks += 1
			
 
				+                x,y,likelihood = return_x_y_from_stwp_theta_file(folder_path+file_name, breaks = breaks,
			
 
				+                                                                 tgen = tgen,
			
 
				+                                                                  mu = mu, relative_theta_scale = theta_scale)
			
 
				+    print("\n*******\n"+title+"\n--------\n"+"mu="+str(mu)+"\ntgen="+str(tgen)+"\nbreaks="+str(breaks)+"\n*******\n")
			
 
				+    print(cpt, "theta file(s) have been scanned.")
			
 
				+
			
 
				+    # intialize figure
			
 
				+    my_dpi = 300
			
 
				+    plt.figure(figsize=(5000/my_dpi, 2800/my_dpi), dpi=my_dpi)
			
 
				+    plt.xlim(1e-3, 1)
			
 
				+    #plt.ylim(0, 10)
			
 
				+    #plt.yscale('log')
			
 
				+    plt.xscale('log')
			
 
				+    plt.grid(True,which="both", linestyle='--', alpha = 0.3)
			
 
				+    brkpt_lik = []
			
 
				+    for epoch, scenari in epochs.items():
			
 
				+        # sort starting by the smallest -log(Likelihood)
			
 
				+        best10_scenari = (sorted(list(scenari.keys())))[:10]
			
 
				+        greatest_likelihood = best10_scenari[0]
			
 
				+        # store the tuple breakpoints and likelihood for later plot
			
 
				+        brkpt_lik.append((epoch, greatest_likelihood))
			
 
				+        x, y = scenari[greatest_likelihood]
			
 
				+        #without breakpoint
			
 
				+        if epoch == 0:
			
 
				+            # do something with the theta without bp and skip the plotting
			
 
				+            N0 = y[0]
			
 
				+            #continue
			
 
				+        for i in range(len(y)):
			
 
				+            # divide by N0
			
 
				+            y[i] = y[i]/N0
			
 
				+            x[i] = x[i]/N0
			
 
				+        plt.plot(x, y, '-', alpha=0.75, lw=2, label = str(epoch)+' BrkPt | Lik='+greatest_likelihood)
			
 
				+        if theta_scale:
			
 
				+            plt.xlabel("Coal. time")
			
 
				+            plt.ylabel("Pop. size scaled by N0")
			
 
				+            recent_scale_lower_bound = 0.01
			
 
				+            recent_scale_upper_bound = 0.1
			
 
				+            #print(recent_scale_lower_bound, recent_scale_upper_bound)
			
 
				+            plt.axvline(x=recent_scale_lower_bound)
			
 
				+            plt.axvline(x=recent_scale_upper_bound)
			
 
				+        else:
			
 
				+            # years
			
 
				+            plt.xlabel("Time (years)")
			
 
				+            plt.ylabel("Individuals (N)")
			
 
				+        plt.xlim(1e-5, 1)
			
 
				+        plt.legend(loc='upper right')
			
 
				+        plt.title(title)
			
 
				+        plt.savefig(title+'_b'+str(breaks)+'.pdf')
			
 
				+    # plot likelihood against nb of breakpoints
			
 
				+    plt.figure(figsize=(5000/my_dpi, 2800/my_dpi), dpi=my_dpi)
			
 
				+    plt.rcParams['font.size'] = '18'
			
 
				+    AIC = 2*(len(brkpt_lik)+1)+2*np.array(brkpt_lik)[:, 1].astype(float)
			
 
				+    plt.plot(np.array(brkpt_lik)[:, 0], AIC, 'o', linestyle = "dotted", lw=2)
			
 
				+    plt.axhline(y=106)
			
 
				+    plt.yscale('log')
			
 
				+    plt.xlabel("# breakpoints", fontsize=20)
			
 
				+    plt.ylabel("$-\log\mathcal{L}$")
			
 
				+    #plt.legend(loc='upper right')
			
 
				+    plt.title(title)
			
 
				+    plt.savefig(title+'_Breakpts_Likelihood.pdf')
			
 
				+
			
 
				+def plot_test_theta(folder_path, mu, tgen, title = "Title", theta_scale = True, breaks_max = 6):
			
 
				+    cpt = 0
			
 
				+    epochs = {}
			
 
				+    for file_name in os.listdir(folder_path):
			
 
				+        cpt +=1
			
 
				+        if os.path.isfile(os.path.join(folder_path, file_name)):
			
 
				+            for k in range(breaks_max):
			
 
				+                thetas = return_x_y_from_stwp_theta_file_as_is(folder_path+file_name, breaks = k,
			
 
				+                                                                 tgen = tgen,
			
 
				+                                                                 mu = mu, relative_theta_scale = theta_scale)
			
 
				+                if thetas[0] == 0:
			
 
				+                    continue
			
 
				+                epochs[k] = thetas
			
 
				+    print("\n*******\n"+title+"\n--------\n"+"mu="+str(mu)+"\ntgen="+str(tgen)+"\nbreaks="+str(k)+"\n*******\n")
			
 
				+    print(cpt, "theta file(s) have been scanned.")
			
 
				+
			
 
				+    # intialize figure
			
 
				+    my_dpi = 300
			
 
				+    plt.figure(figsize=(5000/my_dpi, 2800/my_dpi), dpi=my_dpi)
			
 
				+    for epoch, theta in epochs.items():
			
 
				+        groups = np.array(list(theta.values()), dtype=object)[:, 1].tolist()
			
 
				+        x = []
			
 
				+        y = []
			
 
				+        thetas = np.array(list(theta.values()), dtype=object)[:, 0]
			
 
				+        for i,group in enumerate(groups):
			
 
				+            x += group[::-1]
			
 
				+            y += list(np.repeat(thetas[i], len(group)))
			
 
				+            if epoch == 0:
			
 
				+                N0 = y[0]
			
 
				+        for i in range(len(y)):
			
 
				+            y[i] = y[i]/N0
			
 
				+        plt.plot(x, y, 'o', linestyle="dotted", alpha=0.75, lw=2, label = str(epoch)+' brks')
			
 
				+        plt.xlabel("# breaks")
			
 
				+        plt.ylabel("theta")
			
 
				+        plt.legend(loc='upper right')
			
 
				+        plt.savefig(title+'_test'+str(k)+'.pdf')
			
 
				+    # fig 2
			
 
				+    plt.figure(figsize=(5000/my_dpi, 2800/my_dpi), dpi=my_dpi)
			
 
				+    for epoch, theta in epochs.items():
			
 
				+        groups = np.array(list(theta.values()), dtype=object)[:, 1].tolist()
			
 
				+        x = []
			
 
				+        y = []
			
 
				+        thetas = np.array(list(theta.values()), dtype=object)[:, 0]
			
 
				+        for i,group in enumerate(groups):
			
 
				+            x += group[::-1]
			
 
				+            y += list(np.repeat(thetas[i], len(group)))
			
 
				+            if epoch == 0:
			
 
				+                N0 = y[0]
			
 
				+        for i in range(len(y)):
			
 
				+            y[i] = y[i]/N0
			
 
				+        #
			
 
				+        x_2 = []
			
 
				+        T = 0
			
 
				+        # k allant de de 14 à 2
			
 
				+        for i in range(len(x)):
			
 
				+            x[i] = int(x[i])
			
 
				+        #print(x[2])
			
 
				+        for i in range(0, len(x)):
			
 
				+            k = x[i]
			
 
				+            #print(k, y[k-2])
			
 
				+
			
 
				+            #theta_k = y[k] / (k*(k-1))
			
 
				+            T += y[i] / (x[i]*(x[i]-1))
			
 
				+            x_2.append(T)
			
 
				+
			
 
				+        plt.plot(x_2, y, 'o', linestyle="dotted", alpha=0.75, lw=2, label = str(epoch)+' brks')
			
 
				+        plt.xscale('log')
			
 
				+        plt.xlabel("# breaks")
			
 
				+        plt.ylabel("theta")
			
 
				+        plt.legend(loc='upper right')
			
 
				+        plt.savefig(title+'_test'+str(k)+'.pdf')
			
 
				+        #
			
 
				+
			
 
				 if __name__ == "__main__":
			
 
				 
			
 
				     if len(sys.argv) != 4:
			
@@ -125,4 +341,4 @@ if __name__ == "__main__":
 
				     mu = sys.argv[2]
			
 
				     tgen = sys.argv[3]
			
 
				 
			
 
				-    plot_3epochs_thetafolder(folder_path, mu, tgen)
			
 
				+    plot_all_epochs_thetafolder(folder_path, mu, tgen)