123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135 |
-
- import matplotlib.pyplot as plt
- import os
-
- def return_x_y_from_stwp_theta_file(stwp_theta_file, breaks, mu, tgen):
- with open(stwp_theta_file, "r") as swp_file:
- # Read the first line
- line = swp_file.readline()
- L = float(line.split()[2])
- # Process lines until the end of the file
- while line:
- # check at each line
- if line.startswith("dim") :
- dim = int(line.split()[1])
- if dim == breaks+1:
- likelihood = line.split()[5]
- groups = line.split()[6:6+dim]
- theta_site = line.split()[6+dim:6+dim+1+dim]
- elif dim < breaks+1:
- line = swp_file.readline()
- continue
- elif dim > breaks+1:
- break
- #return 0,0,0
- # Read the next line
- line = swp_file.readline()
- #### END of parsing
- # quit this file if the number of dimensions is incorrect
- if dim < breaks+1:
- return 0,0,0
- # get n, the last bin of the last group
- #n = int(groups[-1].split(",")[-1])
- # revert the list of groups as the most recent times correspond
- # to the closest and last leafs of the coal. tree.
- groups = groups[::-1]
- # initiate the dict of times
- t = {}
- # list of thetas
- theta_L = []
- sum_t = 0
- for group_nb, group in enumerate(groups):
- print(group_nb, group, theta_site[group_nb], len(theta_site))
- # store all the thetas one by one, with one theta per group
- theta_L.append(float(theta_site[group_nb]))
- # if the group is of size 1
- if len(group.split(',')) == 1:
- i = int(group)
- # if the group size is >1, take the first elem of the group
- # i is the first bin of each group, straight after a breakpoint
- else:
- i = int(group.split(",")[0])
- j = int(group.split(",")[-1])
- t[i] = 0
- if len(group.split(',')) == 1:
- k = i
- t[i] += ((theta_L[group_nb] ) / (k*(k-1)) * tgen) / mu
- else:
- for k in range(i, j+1):
- t[i] += ((theta_L[group_nb] ) / (k*(k-1)) * tgen) / mu
- # we add the cumulative times at the end
- t[i] += sum_t
- sum_t = t[i]
- # build the y axis (sizes)
- y = []
- #theta_L.sort(reverse=True)
- for theta in theta_L:
- # with size N = theta/4mu
- size = theta / (4*mu)
- y.append(size)
- y.append(size)
-
- # build the time x axis
- x = [0]
- # need to fix: t.values are sorted? and in reverse order because the groups and the times are sorted in opposite order
- for time in range(0, len(t.values())-1):
- x.append(list(t.values())[time])
- x.append(list(t.values())[time])
- x.append(list(t.values())[len(t.values())-1])
-
- #x = x[::-1]
- #y = y[::-1]
- #x.sort(reverse=True)
- #y.sort(reverse=True)
-
- return x,y,likelihood
-
- def plot_3epochs_thetafolder(folder_path, mu, tgen, breaks = 2, title = "Title"):
-
- scenari = {}
- cpt = 0
-
- for file_name in os.listdir(folder_path):
- if os.path.isfile(os.path.join(folder_path, file_name)):
- # Perform actions on each file
- x,y,likelihood = return_x_y_from_stwp_theta_file(folder_path+file_name, breaks = breaks,
- tgen = tgen,
- mu = mu)
- if x == 0 or y == 0:
- continue
- cpt +=1
- scenari[likelihood] = x,y
- print("\n*******\n"+title+"\n--------\n"+"mu="+str(mu)+"\ntgen="+str(tgen)+"\nbreaks="+str(breaks)+"\n*******\n")
- print(cpt, "theta files have been scanned.")
- # sort starting by the smallest -log(Likelihood)
- best10_scenari = (sorted(list(scenari.keys())))[:10]
- print("10 greatest Likelihoods", best10_scenari)
- greatest_likelihood = best10_scenari[0]
- x, y = scenari[greatest_likelihood]
- my_dpi = 300
- plt.figure(figsize=(5000/my_dpi, 2800/my_dpi), dpi=my_dpi)
- plt.plot(x, y, 'r-', lw=2, label = 'Lik='+greatest_likelihood)
- for scenario in best10_scenari[1:]:
- x,y = scenari[scenario]
- print("\n---- Lik:",scenario,"\n\nt=", x,"\n\nN=",y, "\n\n")
- plt.plot(x, y, '--', lw=1, label = 'Lik='+scenario)
- plt.ylabel("Individuals (N)")
- plt.xlabel("Time (years)")
- plt.legend(loc='upper right')
- plt.title(title)
- #plt.gcf().set_size(1000, 500)
- plt.savefig(title+'_b'+str(breaks)+'.pdf')
- #plt.show()
-
- if __name__ == "__main__":
-
- if len(sys.argv) != 4:
- print("Need 3 args: ThetaFolder MutationRate GenerationTime")
- exit(0)
-
- folder_path = sys.argv[1]
- mu = sys.argv[2]
- tgen = sys.argv[3]
-
- plot_3epochs_thetafolder(folder_path, mu, tgen)
|