import matplotlib.pyplot as plt import os def return_x_y_from_stwp_theta_file(stwp_theta_file, breaks, mu, tgen): with open(stwp_theta_file, "r") as swp_file: # Read the first line line = swp_file.readline() L = float(line.split()[2]) # Process lines until the end of the file while line: # check at each line if line.startswith("dim") : dim = int(line.split()[1]) if dim == breaks+1: likelihood = line.split()[5] groups = line.split()[6:6+dim] theta_site = line.split()[6+dim:6+dim+1+dim] elif dim < breaks+1: line = swp_file.readline() continue elif dim > breaks+1: break #return 0,0,0 # Read the next line line = swp_file.readline() #### END of parsing # quit this file if the number of dimensions is incorrect if dim < breaks+1: return 0,0,0 # get n, the last bin of the last group #n = int(groups[-1].split(",")[-1]) # revert the list of groups as the most recent times correspond # to the closest and last leafs of the coal. tree. groups = groups[::-1] # initiate the dict of times t = {} # list of thetas theta_L = [] sum_t = 0 for group_nb, group in enumerate(groups): print(group_nb, group, theta_site[group_nb], len(theta_site)) # store all the thetas one by one, with one theta per group theta_L.append(float(theta_site[group_nb])) # if the group is of size 1 if len(group.split(',')) == 1: i = int(group) # if the group size is >1, take the first elem of the group # i is the first bin of each group, straight after a breakpoint else: i = int(group.split(",")[0]) j = int(group.split(",")[-1]) t[i] = 0 if len(group.split(',')) == 1: k = i t[i] += ((theta_L[group_nb] ) / (k*(k-1)) * tgen) / mu else: for k in range(i, j+1): t[i] += ((theta_L[group_nb] ) / (k*(k-1)) * tgen) / mu # we add the cumulative times at the end t[i] += sum_t sum_t = t[i] # build the y axis (sizes) y = [] #theta_L.sort(reverse=True) for theta in theta_L: # with size N = theta/4mu size = theta / (4*mu) y.append(size) y.append(size) # build the time x axis x = [0] # need to fix: t.values are sorted? and in reverse order because the groups and the times are sorted in opposite order for time in range(0, len(t.values())-1): x.append(list(t.values())[time]) x.append(list(t.values())[time]) x.append(list(t.values())[len(t.values())-1]) #x = x[::-1] #y = y[::-1] #x.sort(reverse=True) #y.sort(reverse=True) return x,y,likelihood def plot_3epochs_thetafolder(folder_path, mu, tgen, breaks = 2, title = "Title"): scenari = {} cpt = 0 for file_name in os.listdir(folder_path): if os.path.isfile(os.path.join(folder_path, file_name)): # Perform actions on each file x,y,likelihood = return_x_y_from_stwp_theta_file(folder_path+file_name, breaks = breaks, tgen = tgen, mu = mu) if x == 0 or y == 0: continue cpt +=1 scenari[likelihood] = x,y print("\n*******\n"+title+"\n--------\n"+"mu="+str(mu)+"\ntgen="+str(tgen)+"\nbreaks="+str(breaks)+"\n*******\n") print(cpt, "theta files have been scanned.") # sort starting by the smallest -log(Likelihood) best10_scenari = (sorted(list(scenari.keys())))[:10] print("10 greatest Likelihoods", best10_scenari) greatest_likelihood = best10_scenari[0] x, y = scenari[greatest_likelihood] my_dpi = 300 plt.figure(figsize=(5000/my_dpi, 2800/my_dpi), dpi=my_dpi) plt.plot(x, y, 'r-', lw=2, label = 'Lik='+greatest_likelihood) for scenario in best10_scenari[1:]: x,y = scenari[scenario] print("\n---- Lik:",scenario,"\n\nt=", x,"\n\nN=",y, "\n\n") plt.plot(x, y, '--', lw=1, label = 'Lik='+scenario) plt.ylabel("Individuals (N)") plt.xlabel("Time (years)") plt.legend(loc='upper right') plt.title(title) #plt.gcf().set_size(1000, 500) plt.savefig(title+'_b'+str(breaks)+'.pdf') #plt.show() if __name__ == "__main__": if len(sys.argv) != 4: print("Need 3 args: ThetaFolder MutationRate GenerationTime") exit(0) folder_path = sys.argv[1] mu = sys.argv[2] tgen = sys.argv[3] plot_3epochs_thetafolder(folder_path, mu, tgen)