swp2.py 4.5KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130
  1. import matplotlib.pyplot as plt
  2. import os
  3. def return_x_y_from_stwp_theta_file(stwp_theta_file, breaks, mu, tgen):
  4. with open(stwp_theta_file, "r") as swp_file:
  5. # Read the first line
  6. line = swp_file.readline()
  7. L = float(line.split()[2])
  8. # Process lines until the end of the file
  9. while line:
  10. # check at each line
  11. if line.startswith("dim") :
  12. dim = int(line.split()[1])
  13. if dim == breaks+1:
  14. likelihood = line.split()[5]
  15. groups = line.split()[6:6+dim]
  16. theta_site = line.split()[6+dim:6+dim+1+dim]
  17. elif dim < breaks+1:
  18. line = swp_file.readline()
  19. continue
  20. elif dim > breaks+1:
  21. break
  22. #return 0,0,0
  23. # Read the next line
  24. line = swp_file.readline()
  25. #### END of parsing
  26. # quit this file if the number of dimensions is incorrect
  27. if dim < breaks+1:
  28. return 0,0,0
  29. # get n, the last bin of the last group
  30. n = int(groups[-1].split(",")[-1])
  31. # initiate the dict of times
  32. t = {}
  33. # list of thetas
  34. theta_L = []
  35. sum_t = 0
  36. #groups.sort(reverse=True)
  37. for group_nb, group in enumerate(groups):
  38. print(group_nb, group)
  39. # store all the thetas one by one, with one theta per group
  40. theta_L.append(float(theta_site[group_nb]))
  41. # if the group is of size 1
  42. if len(group.split(',')) == 1:
  43. i = int(group)
  44. # if the group size is >1, take the first elem of the group
  45. # i is the first bin of each group, straight after a breakpoint
  46. else:
  47. i = int(group.split(",")[0])
  48. j = int(group.split(",")[-1])
  49. t[i] = 0
  50. if len(group.split(',')) == 1:
  51. k = i
  52. t[i] += ((theta_L[group_nb] ) / (k*(k-1)) * tgen) / mu
  53. else:
  54. for k in range(i, j+1):
  55. t[i] += ((theta_L[group_nb] ) / (k*(k-1)) * tgen) / mu
  56. # we add the cumulative times at the end
  57. t[i] += sum_t
  58. sum_t = t[i]
  59. # build the y axis (sizes)
  60. y = []
  61. #theta_L.sort(reverse=True)
  62. for theta in theta_L:
  63. # with size N = theta/4mu
  64. size = theta / (4*mu)
  65. y.append(size)
  66. y.append(size)
  67. # build the time x axis
  68. x = [0]
  69. for time in range(0, len(t.values())-1):
  70. x.append(list(t.values())[time])
  71. x.append(list(t.values())[time])
  72. x.append(list(t.values())[len(t.values())-1])
  73. #x.sort(reverse=True)
  74. #y.sort(reverse=True)
  75. return x,y,likelihood
  76. def plot_3epochs_thetafolder(folder_path, mu, tgen, breaks = 2, title = "Title"):
  77. scenari = {}
  78. cpt = 0
  79. for file_name in os.listdir(folder_path):
  80. if os.path.isfile(os.path.join(folder_path, file_name)):
  81. # Perform actions on each file
  82. x,y,likelihood = return_x_y_from_stwp_theta_file(folder_path+file_name, breaks = breaks,
  83. tgen = tgen,
  84. mu = mu)
  85. if x == 0 or y == 0:
  86. continue
  87. cpt +=1
  88. scenari[likelihood] = x,y
  89. print("\n*******\n"+title+"\n--------\n"+"mu="+str(mu)+"\ntgen="+str(tgen)+"\nbreaks="+str(breaks)+"\n*******\n")
  90. print(cpt, "theta files have been scanned.")
  91. # sort starting by the smallest -log(Likelihood)
  92. best10_scenari = (sorted(list(scenari.keys())))[:10]
  93. print("10 greatest Likelihoods", best10_scenari)
  94. greatest_likelihood = best10_scenari[0]
  95. x, y = scenari[greatest_likelihood]
  96. my_dpi = 300
  97. plt.figure(figsize=(5000/my_dpi, 2800/my_dpi), dpi=my_dpi)
  98. plt.plot(x, y, 'r-', lw=2, label = 'Lik='+greatest_likelihood)
  99. for scenario in best10_scenari[1:]:
  100. x,y = scenari[scenario]
  101. print("\n---- Lik:",scenario,"\n\nt=", x,"\n\nN=",y, "\n\n")
  102. plt.plot(x, y, '--', lw=1, label = 'Lik='+scenario)
  103. plt.ylabel("Individuals (N)")
  104. plt.xlabel("Time (years)")
  105. plt.legend(loc='upper right')
  106. plt.title(title)
  107. #plt.gcf().set_size(1000, 500)
  108. plt.savefig(title+'_b'+str(breaks)+'.pdf')
  109. #plt.show()
  110. if __name__ == "__main__":
  111. if len(sys.argv) != 4:
  112. print("Need 3 args: ThetaFolder MutationRate GenerationTime")
  113. exit(0)
  114. folder_path = sys.argv[1]
  115. mu = sys.argv[2]
  116. tgen = sys.argv[3]
  117. plot_3epochs_thetafolder(folder_path, mu, tgen)