swp2.py 4.6KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129
  1. import matplotlib.pyplot as plt
  2. import os
  3. def return_x_y_from_stwp_theta_file(stwp_theta_file, breaks, mu, tgen):
  4. with open(stwp_theta_file, "r") as swp_file:
  5. # Read the first line
  6. line = swp_file.readline()
  7. L = float(line.split()[2])
  8. # Process lines until the end of the file
  9. while line:
  10. # check at each line
  11. if line.startswith("dim") :
  12. dim = int(line.split()[1])
  13. if dim == breaks+1:
  14. likelihood = line.split()[5]
  15. groups = line.split()[6:6+dim]
  16. theta_site = line.split()[6+dim:6+dim+1+dim]
  17. elif dim < breaks+1:
  18. line = swp_file.readline()
  19. continue
  20. elif dim > breaks+1:
  21. break
  22. #return 0,0,0
  23. # Read the next line
  24. line = swp_file.readline()
  25. #### END of parsing
  26. # quit this file if the number of dimensions is incorrect
  27. if dim < breaks+1:
  28. return 0,0,0
  29. # get n, the last bin of the last group
  30. # revert the list of groups as the most recent times correspond
  31. # to the closest and last leafs of the coal. tree.
  32. groups = groups[::-1]
  33. theta_site = theta_site[::-1]
  34. # initiate the dict of times
  35. t = {}
  36. # list of thetas
  37. theta_L = []
  38. sum_t = 0
  39. for group_nb, group in enumerate(groups):
  40. ###print(group_nb, group, theta_site[group_nb], len(theta_site))
  41. # store all the thetas one by one, with one theta per group
  42. theta_L.append(float(theta_site[group_nb]))
  43. # if the group is of size 1
  44. if len(group.split(',')) == 1:
  45. i = int(group)
  46. # if the group size is >1, take the first elem of the group
  47. # i is the first bin of each group, straight after a breakpoint
  48. else:
  49. i = int(group.split(",")[0])
  50. j = int(group.split(",")[-1])
  51. t[i] = 0
  52. #t =
  53. if len(group.split(',')) == 1:
  54. k = i
  55. t[i] += ((theta_L[group_nb] ) / (k*(k-1)) * tgen) / mu
  56. else:
  57. for k in range(j, i-1, -1 ):
  58. t[i] += ((theta_L[group_nb] ) / (k*(k-1)) * tgen) / mu
  59. # we add the cumulative times at the end
  60. t[i] += sum_t
  61. sum_t = t[i]
  62. # build the y axis (sizes)
  63. y = []
  64. for theta in theta_L:
  65. # with size N = theta/4mu
  66. size = theta / (4*mu)
  67. y.append(size)
  68. y.append(size)
  69. # build the time x axis
  70. x = [0]
  71. for time in range(0, len(t.values())-1):
  72. x.append(list(t.values())[time])
  73. x.append(list(t.values())[time])
  74. x.append(list(t.values())[len(t.values())-1])
  75. return x,y,likelihood
  76. def plot_3epochs_thetafolder(folder_path, mu, tgen, breaks = 2, title = "Title"):
  77. scenari = {}
  78. cpt = 0
  79. for file_name in os.listdir(folder_path):
  80. if os.path.isfile(os.path.join(folder_path, file_name)):
  81. # Perform actions on each file
  82. x,y,likelihood = return_x_y_from_stwp_theta_file(folder_path+file_name, breaks = breaks,
  83. tgen = tgen,
  84. mu = mu)
  85. if x == 0 or y == 0:
  86. continue
  87. cpt +=1
  88. scenari[likelihood] = x,y
  89. print("\n*******\n"+title+"\n--------\n"+"mu="+str(mu)+"\ntgen="+str(tgen)+"\nbreaks="+str(breaks)+"\n*******\n")
  90. print(cpt, "theta file(s) have been scanned.")
  91. # sort starting by the smallest -log(Likelihood)
  92. best10_scenari = (sorted(list(scenari.keys())))[:10]
  93. print("10 greatest Likelihoods", best10_scenari)
  94. greatest_likelihood = best10_scenari[0]
  95. x, y = scenari[greatest_likelihood]
  96. my_dpi = 300
  97. plt.figure(figsize=(5000/my_dpi, 2800/my_dpi), dpi=my_dpi)
  98. plt.plot(x, y, 'r-', lw=2, label = 'Lik='+greatest_likelihood)
  99. plt.yscale('log')
  100. #plt.xscale('log')
  101. plt.grid(True,which="both", linestyle='--')
  102. for scenario in best10_scenari[1:]:
  103. x,y = scenari[scenario]
  104. #print("\n---- Lik:",scenario,"\n\nt=", x,"\n\nN=",y, "\n\n")
  105. plt.plot(x, y, '--', lw=1, label = 'Lik='+scenario)
  106. plt.ylabel("Individuals (N)")
  107. plt.xlabel("Time (years)")
  108. plt.legend(loc='upper right')
  109. plt.title(title)
  110. #plt.gcf().set_size(1000, 500)
  111. plt.savefig(title+'_b'+str(breaks)+'.pdf')
  112. if __name__ == "__main__":
  113. if len(sys.argv) != 4:
  114. print("Need 3 args: ThetaFolder MutationRate GenerationTime")
  115. exit(0)
  116. folder_path = sys.argv[1]
  117. mu = sys.argv[2]
  118. tgen = sys.argv[3]
  119. plot_3epochs_thetafolder(folder_path, mu, tgen)