swp2.py 4.8KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135
  1. import matplotlib.pyplot as plt
  2. import os
  3. def return_x_y_from_stwp_theta_file(stwp_theta_file, breaks, mu, tgen):
  4. with open(stwp_theta_file, "r") as swp_file:
  5. # Read the first line
  6. line = swp_file.readline()
  7. L = float(line.split()[2])
  8. # Process lines until the end of the file
  9. while line:
  10. # check at each line
  11. if line.startswith("dim") :
  12. dim = int(line.split()[1])
  13. if dim == breaks+1:
  14. likelihood = line.split()[5]
  15. groups = line.split()[6:6+dim]
  16. theta_site = line.split()[6+dim:6+dim+1+dim]
  17. elif dim < breaks+1:
  18. line = swp_file.readline()
  19. continue
  20. elif dim > breaks+1:
  21. break
  22. #return 0,0,0
  23. # Read the next line
  24. line = swp_file.readline()
  25. #### END of parsing
  26. # quit this file if the number of dimensions is incorrect
  27. if dim < breaks+1:
  28. return 0,0,0
  29. # get n, the last bin of the last group
  30. #n = int(groups[-1].split(",")[-1])
  31. # revert the list of groups as the most recent times correspond
  32. # to the closest and last leafs of the coal. tree.
  33. groups = groups[::-1]
  34. # initiate the dict of times
  35. t = {}
  36. # list of thetas
  37. theta_L = []
  38. sum_t = 0
  39. for group_nb, group in enumerate(groups):
  40. print(group_nb, group, theta_site[group_nb], len(theta_site))
  41. # store all the thetas one by one, with one theta per group
  42. theta_L.append(float(theta_site[group_nb]))
  43. # if the group is of size 1
  44. if len(group.split(',')) == 1:
  45. i = int(group)
  46. # if the group size is >1, take the first elem of the group
  47. # i is the first bin of each group, straight after a breakpoint
  48. else:
  49. i = int(group.split(",")[0])
  50. j = int(group.split(",")[-1])
  51. t[i] = 0
  52. if len(group.split(',')) == 1:
  53. k = i
  54. t[i] += ((theta_L[group_nb] ) / (k*(k-1)) * tgen) / mu
  55. else:
  56. for k in range(i, j+1):
  57. t[i] += ((theta_L[group_nb] ) / (k*(k-1)) * tgen) / mu
  58. # we add the cumulative times at the end
  59. t[i] += sum_t
  60. sum_t = t[i]
  61. # build the y axis (sizes)
  62. y = []
  63. #theta_L.sort(reverse=True)
  64. for theta in theta_L:
  65. # with size N = theta/4mu
  66. size = theta / (4*mu)
  67. y.append(size)
  68. y.append(size)
  69. # build the time x axis
  70. x = [0]
  71. # need to fix: t.values are sorted? and in reverse order because the groups and the times are sorted in opposite order
  72. for time in range(0, len(t.values())-1):
  73. x.append(list(t.values())[time])
  74. x.append(list(t.values())[time])
  75. x.append(list(t.values())[len(t.values())-1])
  76. #x = x[::-1]
  77. #y = y[::-1]
  78. #x.sort(reverse=True)
  79. #y.sort(reverse=True)
  80. return x,y,likelihood
  81. def plot_3epochs_thetafolder(folder_path, mu, tgen, breaks = 2, title = "Title"):
  82. scenari = {}
  83. cpt = 0
  84. for file_name in os.listdir(folder_path):
  85. if os.path.isfile(os.path.join(folder_path, file_name)):
  86. # Perform actions on each file
  87. x,y,likelihood = return_x_y_from_stwp_theta_file(folder_path+file_name, breaks = breaks,
  88. tgen = tgen,
  89. mu = mu)
  90. if x == 0 or y == 0:
  91. continue
  92. cpt +=1
  93. scenari[likelihood] = x,y
  94. print("\n*******\n"+title+"\n--------\n"+"mu="+str(mu)+"\ntgen="+str(tgen)+"\nbreaks="+str(breaks)+"\n*******\n")
  95. print(cpt, "theta files have been scanned.")
  96. # sort starting by the smallest -log(Likelihood)
  97. best10_scenari = (sorted(list(scenari.keys())))[:10]
  98. print("10 greatest Likelihoods", best10_scenari)
  99. greatest_likelihood = best10_scenari[0]
  100. x, y = scenari[greatest_likelihood]
  101. my_dpi = 300
  102. plt.figure(figsize=(5000/my_dpi, 2800/my_dpi), dpi=my_dpi)
  103. plt.plot(x, y, 'r-', lw=2, label = 'Lik='+greatest_likelihood)
  104. for scenario in best10_scenari[1:]:
  105. x,y = scenari[scenario]
  106. print("\n---- Lik:",scenario,"\n\nt=", x,"\n\nN=",y, "\n\n")
  107. plt.plot(x, y, '--', lw=1, label = 'Lik='+scenario)
  108. plt.ylabel("Individuals (N)")
  109. plt.xlabel("Time (years)")
  110. plt.legend(loc='upper right')
  111. plt.title(title)
  112. #plt.gcf().set_size(1000, 500)
  113. plt.savefig(title+'_b'+str(breaks)+'.pdf')
  114. #plt.show()
  115. if __name__ == "__main__":
  116. if len(sys.argv) != 4:
  117. print("Need 3 args: ThetaFolder MutationRate GenerationTime")
  118. exit(0)
  119. folder_path = sys.argv[1]
  120. mu = sys.argv[2]
  121. tgen = sys.argv[3]
  122. plot_3epochs_thetafolder(folder_path, mu, tgen)