swp2.py 4.3KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123
  1. import matplotlib.pyplot as plt
  2. import os
  3. def return_x_y_from_stwp_theta_file(stwp_theta_file, breaks, mu, tgen):
  4. with open(stwp_theta_file, "r") as swp_file:
  5. # Read the first line
  6. line = swp_file.readline()
  7. L = float(line.split()[2])
  8. # Process lines until the end of the file
  9. while line:
  10. # check at each line
  11. if line.startswith("dim") :
  12. dim = int(line.split()[1])
  13. if dim == breaks+1:
  14. likelihood = line.split()[5]
  15. groups = line.split()[6:6+dim]
  16. theta_site = line.split()[6+dim:6+dim+1+dim]
  17. elif dim < breaks+1:
  18. line = swp_file.readline()
  19. continue
  20. elif dim > breaks+1:
  21. break
  22. #return 0,0,0
  23. # Read the next line
  24. line = swp_file.readline()
  25. #### END of parsing
  26. # quit this file if the number of dimensions is incorrect
  27. if dim < breaks+1:
  28. return 0,0,0
  29. # get n, the last bin of the last group
  30. n = int(groups[-1].split(",")[-1])
  31. # initiate the dict of times
  32. t = {}
  33. # list of thetas
  34. theta_L = []
  35. sum_t = 0
  36. for group_nb, group in enumerate(groups):
  37. # store all the thetas one by one, with one theta per group
  38. theta_L.append(float(theta_site[group_nb]))
  39. # if the group is of size 1
  40. if len(group.split(',')) == 1:
  41. i = int(group)
  42. # if the group size is >1, take the first elem of the group
  43. # i is the first bin of each group, straight after a breakpoint
  44. else:
  45. i = int(group.split(",")[0])
  46. j = int(group.split(",")[-1])
  47. t[i] = 0
  48. if len(group.split(',')) == 1:
  49. k = i
  50. t[i] += ((theta_L[group_nb] ) / (k*(k-1)) * tgen) / mu
  51. else:
  52. for k in range(i, j+1):
  53. t[i] += ((theta_L[group_nb] ) / (k*(k-1)) * tgen) / mu
  54. # we add the cumulative times at the end
  55. t[i] += sum_t
  56. sum_t = t[i]
  57. # build the y axis (sizes)
  58. y = []
  59. for theta in theta_L:
  60. # with size N = theta/4mu
  61. size = theta / (4*mu)
  62. y.append(size)
  63. y.append(size)
  64. # build the time x axis
  65. x = [0]
  66. for time in range(0, len(t.values())-1):
  67. x.append(list(t.values())[time])
  68. x.append(list(t.values())[time])
  69. x.append(list(t.values())[len(t.values())-1])
  70. return x,y,likelihood
  71. def plot_3epochs_thetafolder(folder_path, mu, tgen, breaks = 2, title = "Title"):
  72. scenari = {}
  73. cpt = 0
  74. for file_name in os.listdir(folder_path):
  75. if os.path.isfile(os.path.join(folder_path, file_name)):
  76. # Perform actions on each file
  77. x,y,likelihood = return_x_y_from_stwp_theta_file(folder_path+file_name, breaks = breaks,
  78. tgen = tgen,
  79. mu = mu)
  80. if x == 0 or y == 0:
  81. continue
  82. cpt +=1
  83. scenari[likelihood] = x,y
  84. print("\n*******\n"+title+"\n--------\n"+"mu="+str(mu)+"\ntgen="+str(tgen)+"\nbreaks="+str(breaks)+"\n*******\n")
  85. print(cpt, "theta files have been scanned.")
  86. # sort starting by the smallest -log(Likelihood)
  87. best10_scenari = (sorted(list(scenari.keys())))[:10]
  88. print("10 greatest Likelihoods", best10_scenari)
  89. greatest_likelihood = best10_scenari[0]
  90. x, y = scenari[greatest_likelihood]
  91. my_dpi = 300
  92. plt.figure(figsize=(5000/my_dpi, 2800/my_dpi), dpi=my_dpi)
  93. plt.plot(x, y, 'r-', lw=2, label = 'Lik='+greatest_likelihood)
  94. for scenario in best10_scenari[1:]:
  95. x,y = scenari[scenario]
  96. print("\n---- Lik:",scenario,"\n\nt=", x,"\n\nN=",y, "\n\n")
  97. plt.plot(x, y, '--', lw=1, label = 'Lik='+scenario)
  98. plt.ylabel("Individuals (N)")
  99. plt.xlabel("Time (years)")
  100. plt.legend(loc='upper right')
  101. plt.title(title)
  102. #plt.gcf().set_size(1000, 500)
  103. plt.savefig(title+'_b'+str(breaks)+'.pdf')
  104. #plt.show()
  105. if __name__ == "__main__":
  106. if len(sys.argv) != 4:
  107. print("Need 3 args: ThetaFolder MutationRate GenerationTime")
  108. exit(0)
  109. folder_path = sys.argv[1]
  110. mu = sys.argv[2]
  111. tgen = sys.argv[3]
  112. plot_3epochs_thetafolder(folder_path, mu, tgen)