|
@@ -1,7 +1,8 @@
|
1
|
1
|
import matplotlib.pyplot as plt
|
2
|
2
|
import os
|
|
3
|
+import numpy as np
|
3
|
4
|
|
4
|
|
-def return_x_y_from_stwp_theta_file(stwp_theta_file, breaks, mu, tgen):
|
|
5
|
+def return_x_y_from_stwp_theta_file(stwp_theta_file, breaks, mu, tgen, relative_theta_scale = False):
|
5
|
6
|
with open(stwp_theta_file, "r") as swp_file:
|
6
|
7
|
# Read the first line
|
7
|
8
|
line = swp_file.readline()
|
|
@@ -53,18 +54,27 @@ def return_x_y_from_stwp_theta_file(stwp_theta_file, breaks, mu, tgen):
|
53
|
54
|
#t =
|
54
|
55
|
if len(group.split(',')) == 1:
|
55
|
56
|
k = i
|
56
|
|
- t[i] += ((theta_L[group_nb] ) / (k*(k-1)) * tgen) / mu
|
|
57
|
+ if relative_theta_scale:
|
|
58
|
+ t[i] += ((theta_L[group_nb] ) / (k*(k-1)))
|
|
59
|
+ else:
|
|
60
|
+ t[i] += ((theta_L[group_nb] ) / (k*(k-1)) * tgen) / mu
|
57
|
61
|
else:
|
58
|
62
|
for k in range(j, i-1, -1 ):
|
59
|
|
- t[i] += ((theta_L[group_nb] ) / (k*(k-1)) * tgen) / mu
|
|
63
|
+ if relative_theta_scale:
|
|
64
|
+ t[i] += ((theta_L[group_nb] ) / (k*(k-1)))
|
|
65
|
+ else:
|
|
66
|
+ t[i] += ((theta_L[group_nb] ) / (k*(k-1)) * tgen) / mu
|
60
|
67
|
# we add the cumulative times at the end
|
61
|
68
|
t[i] += sum_t
|
62
|
69
|
sum_t = t[i]
|
63
|
70
|
# build the y axis (sizes)
|
64
|
71
|
y = []
|
65
|
72
|
for theta in theta_L:
|
66
|
|
- # with size N = theta/4mu
|
67
|
|
- size = theta / (4*mu)
|
|
73
|
+ if relative_theta_scale:
|
|
74
|
+ size = theta
|
|
75
|
+ else:
|
|
76
|
+ # with size N = theta/4mu
|
|
77
|
+ size = theta / (4*mu)
|
68
|
78
|
y.append(size)
|
69
|
79
|
y.append(size)
|
70
|
80
|
# build the time x axis
|
|
@@ -73,19 +83,64 @@ def return_x_y_from_stwp_theta_file(stwp_theta_file, breaks, mu, tgen):
|
73
|
83
|
x.append(list(t.values())[time])
|
74
|
84
|
x.append(list(t.values())[time])
|
75
|
85
|
x.append(list(t.values())[len(t.values())-1])
|
|
86
|
+ # if relative_theta_scale:
|
|
87
|
+ # # rescale
|
|
88
|
+ # #N0 = y[0]
|
|
89
|
+ # # for i in range(len(y)):
|
|
90
|
+ # # # divide by N0
|
|
91
|
+ # # y[i] = y[i]/N0
|
|
92
|
+ # # x[i] = x[i]/N0
|
76
|
93
|
return x,y,likelihood
|
77
|
94
|
|
78
|
|
-def plot_3epochs_thetafolder(folder_path, mu, tgen, breaks = 2, title = "Title"):
|
|
95
|
+def return_x_y_from_stwp_theta_file_as_is(stwp_theta_file, breaks, mu, tgen, relative_theta_scale = False):
|
|
96
|
+ with open(stwp_theta_file, "r") as swp_file:
|
|
97
|
+ # Read the first line
|
|
98
|
+ line = swp_file.readline()
|
|
99
|
+ L = float(line.split()[2])
|
|
100
|
+ # Process lines until the end of the file
|
|
101
|
+ while line:
|
|
102
|
+ # check at each line
|
|
103
|
+ if line.startswith("dim") :
|
|
104
|
+ dim = int(line.split()[1])
|
|
105
|
+ if dim == breaks+1:
|
|
106
|
+ likelihood = line.split()[5]
|
|
107
|
+ groups = line.split()[6:6+dim]
|
|
108
|
+ theta_site = line.split()[6+dim:6+dim+1+dim]
|
|
109
|
+ elif dim < breaks+1:
|
|
110
|
+ line = swp_file.readline()
|
|
111
|
+ continue
|
|
112
|
+ elif dim > breaks+1:
|
|
113
|
+ break
|
|
114
|
+ #return 0,0,0
|
|
115
|
+ # Read the next line
|
|
116
|
+ line = swp_file.readline()
|
|
117
|
+ #### END of parsing
|
|
118
|
+ # quit this file if the number of dimensions is incorrect
|
|
119
|
+ if dim < breaks+1:
|
|
120
|
+ return 0,0,0
|
|
121
|
+ # get n, the last bin of the last group
|
|
122
|
+ # revert the list of groups as the most recent times correspond
|
|
123
|
+ # to the closest and last leafs of the coal. tree.
|
|
124
|
+ groups = groups[::-1]
|
|
125
|
+ theta_site = theta_site[::-1]
|
|
126
|
+
|
|
127
|
+ thetas = {}
|
79
|
128
|
|
|
129
|
+ for i in range(len(groups)):
|
|
130
|
+ groups[i] = groups[i].split(',')
|
|
131
|
+ #print(groups[i], len(groups[i]))
|
|
132
|
+ thetas[i] = [float(theta_site[i]), groups[i], likelihood]
|
|
133
|
+ return thetas
|
|
134
|
+
|
|
135
|
+def plot_k_epochs_thetafolder(folder_path, mu, tgen, breaks = 2, title = "Title", theta_scale = True):
|
80
|
136
|
scenari = {}
|
81
|
137
|
cpt = 0
|
82
|
|
-
|
83
|
138
|
for file_name in os.listdir(folder_path):
|
84
|
139
|
if os.path.isfile(os.path.join(folder_path, file_name)):
|
85
|
140
|
# Perform actions on each file
|
86
|
141
|
x,y,likelihood = return_x_y_from_stwp_theta_file(folder_path+file_name, breaks = breaks,
|
87
|
142
|
tgen = tgen,
|
88
|
|
- mu = mu)
|
|
143
|
+ mu = mu, relative_theta_scale = theta_scale)
|
89
|
144
|
if x == 0 or y == 0:
|
90
|
145
|
continue
|
91
|
146
|
cpt +=1
|
|
@@ -93,6 +148,7 @@ def plot_3epochs_thetafolder(folder_path, mu, tgen, breaks = 2, title = "Title")
|
93
|
148
|
print("\n*******\n"+title+"\n--------\n"+"mu="+str(mu)+"\ntgen="+str(tgen)+"\nbreaks="+str(breaks)+"\n*******\n")
|
94
|
149
|
print(cpt, "theta file(s) have been scanned.")
|
95
|
150
|
# sort starting by the smallest -log(Likelihood)
|
|
151
|
+ print(scenari)
|
96
|
152
|
best10_scenari = (sorted(list(scenari.keys())))[:10]
|
97
|
153
|
print("10 greatest Likelihoods", best10_scenari)
|
98
|
154
|
greatest_likelihood = best10_scenari[0]
|
|
@@ -100,21 +156,181 @@ def plot_3epochs_thetafolder(folder_path, mu, tgen, breaks = 2, title = "Title")
|
100
|
156
|
my_dpi = 300
|
101
|
157
|
plt.figure(figsize=(5000/my_dpi, 2800/my_dpi), dpi=my_dpi)
|
102
|
158
|
plt.plot(x, y, 'r-', lw=2, label = 'Lik='+greatest_likelihood)
|
103
|
|
- plt.yscale('log')
|
104
|
|
- #plt.xscale('log')
|
105
|
|
- plt.grid(True,which="both", linestyle='--')
|
|
159
|
+ plt.xlim(1e-3, 1)
|
|
160
|
+ plt.ylim(0, 10)
|
|
161
|
+ #plt.yscale('log')
|
|
162
|
+ plt.xscale('log')
|
|
163
|
+ plt.grid(True,which="both", linestyle='--', alpha = 0.3)
|
106
|
164
|
|
107
|
165
|
for scenario in best10_scenari[1:]:
|
108
|
166
|
x,y = scenari[scenario]
|
109
|
167
|
#print("\n---- Lik:",scenario,"\n\nt=", x,"\n\nN=",y, "\n\n")
|
110
|
168
|
plt.plot(x, y, '--', lw=1, label = 'Lik='+scenario)
|
111
|
|
- plt.ylabel("Individuals (N)")
|
112
|
|
- plt.xlabel("Time (years)")
|
|
169
|
+ if theta_scale:
|
|
170
|
+ plt.xlabel("Coal. time")
|
|
171
|
+ plt.ylabel("Pop. size scaled by N0")
|
|
172
|
+ recent_scale_lower_bound = y[0] * 0.01
|
|
173
|
+ recent_scale_upper_bound = y[0] * 0.1
|
|
174
|
+ plt.axvline(x=recent_scale_lower_bound)
|
|
175
|
+ plt.axvline(x=recent_scale_upper_bound)
|
|
176
|
+ else:
|
|
177
|
+ # years
|
|
178
|
+ plt.xlabel("Time (years)")
|
|
179
|
+ plt.ylabel("Individuals (N)")
|
113
|
180
|
plt.legend(loc='upper right')
|
114
|
181
|
plt.title(title)
|
115
|
|
- #plt.gcf().set_size(1000, 500)
|
116
|
182
|
plt.savefig(title+'_b'+str(breaks)+'.pdf')
|
117
|
183
|
|
|
184
|
+def plot_all_epochs_thetafolder(folder_path, mu, tgen, title = "Title", theta_scale = True):
|
|
185
|
+ #scenari = {}
|
|
186
|
+ cpt = 0
|
|
187
|
+ epochs = {}
|
|
188
|
+ for file_name in os.listdir(folder_path):
|
|
189
|
+ breaks = 0
|
|
190
|
+ cpt +=1
|
|
191
|
+ if os.path.isfile(os.path.join(folder_path, file_name)):
|
|
192
|
+ x, y, likelihood = return_x_y_from_stwp_theta_file(folder_path+file_name, breaks = breaks,
|
|
193
|
+ tgen = tgen,
|
|
194
|
+ mu = mu, relative_theta_scale = theta_scale)
|
|
195
|
+ while not (x == 0 and y == 0):
|
|
196
|
+ if breaks not in epochs.keys():
|
|
197
|
+ epochs[breaks] = {}
|
|
198
|
+ epochs[breaks][likelihood] = x,y
|
|
199
|
+ breaks += 1
|
|
200
|
+ x,y,likelihood = return_x_y_from_stwp_theta_file(folder_path+file_name, breaks = breaks,
|
|
201
|
+ tgen = tgen,
|
|
202
|
+ mu = mu, relative_theta_scale = theta_scale)
|
|
203
|
+ print("\n*******\n"+title+"\n--------\n"+"mu="+str(mu)+"\ntgen="+str(tgen)+"\nbreaks="+str(breaks)+"\n*******\n")
|
|
204
|
+ print(cpt, "theta file(s) have been scanned.")
|
|
205
|
+
|
|
206
|
+ # intialize figure
|
|
207
|
+ my_dpi = 300
|
|
208
|
+ plt.figure(figsize=(5000/my_dpi, 2800/my_dpi), dpi=my_dpi)
|
|
209
|
+ plt.xlim(1e-3, 1)
|
|
210
|
+ #plt.ylim(0, 10)
|
|
211
|
+ #plt.yscale('log')
|
|
212
|
+ plt.xscale('log')
|
|
213
|
+ plt.grid(True,which="both", linestyle='--', alpha = 0.3)
|
|
214
|
+ brkpt_lik = []
|
|
215
|
+ for epoch, scenari in epochs.items():
|
|
216
|
+ # sort starting by the smallest -log(Likelihood)
|
|
217
|
+ best10_scenari = (sorted(list(scenari.keys())))[:10]
|
|
218
|
+ greatest_likelihood = best10_scenari[0]
|
|
219
|
+ # store the tuple breakpoints and likelihood for later plot
|
|
220
|
+ brkpt_lik.append((epoch, greatest_likelihood))
|
|
221
|
+ x, y = scenari[greatest_likelihood]
|
|
222
|
+ #without breakpoint
|
|
223
|
+ if epoch == 0:
|
|
224
|
+ # do something with the theta without bp and skip the plotting
|
|
225
|
+ N0 = y[0]
|
|
226
|
+ #continue
|
|
227
|
+ for i in range(len(y)):
|
|
228
|
+ # divide by N0
|
|
229
|
+ y[i] = y[i]/N0
|
|
230
|
+ x[i] = x[i]/N0
|
|
231
|
+ plt.plot(x, y, '-', alpha=0.75, lw=2, label = str(epoch)+' BrkPt | Lik='+greatest_likelihood)
|
|
232
|
+ if theta_scale:
|
|
233
|
+ plt.xlabel("Coal. time")
|
|
234
|
+ plt.ylabel("Pop. size scaled by N0")
|
|
235
|
+ recent_scale_lower_bound = 0.01
|
|
236
|
+ recent_scale_upper_bound = 0.1
|
|
237
|
+ #print(recent_scale_lower_bound, recent_scale_upper_bound)
|
|
238
|
+ plt.axvline(x=recent_scale_lower_bound)
|
|
239
|
+ plt.axvline(x=recent_scale_upper_bound)
|
|
240
|
+ else:
|
|
241
|
+ # years
|
|
242
|
+ plt.xlabel("Time (years)")
|
|
243
|
+ plt.ylabel("Individuals (N)")
|
|
244
|
+ plt.xlim(1e-5, 1)
|
|
245
|
+ plt.legend(loc='upper right')
|
|
246
|
+ plt.title(title)
|
|
247
|
+ plt.savefig(title+'_b'+str(breaks)+'.pdf')
|
|
248
|
+ # plot likelihood against nb of breakpoints
|
|
249
|
+ plt.figure(figsize=(5000/my_dpi, 2800/my_dpi), dpi=my_dpi)
|
|
250
|
+ plt.rcParams['font.size'] = '18'
|
|
251
|
+ AIC = 2*(len(brkpt_lik)+1)+2*np.array(brkpt_lik)[:, 1].astype(float)
|
|
252
|
+ plt.plot(np.array(brkpt_lik)[:, 0], AIC, 'o', linestyle = "dotted", lw=2)
|
|
253
|
+ plt.axhline(y=106)
|
|
254
|
+ plt.yscale('log')
|
|
255
|
+ plt.xlabel("# breakpoints", fontsize=20)
|
|
256
|
+ plt.ylabel("$-\log\mathcal{L}$")
|
|
257
|
+ #plt.legend(loc='upper right')
|
|
258
|
+ plt.title(title)
|
|
259
|
+ plt.savefig(title+'_Breakpts_Likelihood.pdf')
|
|
260
|
+
|
|
261
|
+def plot_test_theta(folder_path, mu, tgen, title = "Title", theta_scale = True, breaks_max = 6):
|
|
262
|
+ cpt = 0
|
|
263
|
+ epochs = {}
|
|
264
|
+ for file_name in os.listdir(folder_path):
|
|
265
|
+ cpt +=1
|
|
266
|
+ if os.path.isfile(os.path.join(folder_path, file_name)):
|
|
267
|
+ for k in range(breaks_max):
|
|
268
|
+ thetas = return_x_y_from_stwp_theta_file_as_is(folder_path+file_name, breaks = k,
|
|
269
|
+ tgen = tgen,
|
|
270
|
+ mu = mu, relative_theta_scale = theta_scale)
|
|
271
|
+ if thetas[0] == 0:
|
|
272
|
+ continue
|
|
273
|
+ epochs[k] = thetas
|
|
274
|
+ print("\n*******\n"+title+"\n--------\n"+"mu="+str(mu)+"\ntgen="+str(tgen)+"\nbreaks="+str(k)+"\n*******\n")
|
|
275
|
+ print(cpt, "theta file(s) have been scanned.")
|
|
276
|
+
|
|
277
|
+ # intialize figure
|
|
278
|
+ my_dpi = 300
|
|
279
|
+ plt.figure(figsize=(5000/my_dpi, 2800/my_dpi), dpi=my_dpi)
|
|
280
|
+ for epoch, theta in epochs.items():
|
|
281
|
+ groups = np.array(list(theta.values()), dtype=object)[:, 1].tolist()
|
|
282
|
+ x = []
|
|
283
|
+ y = []
|
|
284
|
+ thetas = np.array(list(theta.values()), dtype=object)[:, 0]
|
|
285
|
+ for i,group in enumerate(groups):
|
|
286
|
+ x += group[::-1]
|
|
287
|
+ y += list(np.repeat(thetas[i], len(group)))
|
|
288
|
+ if epoch == 0:
|
|
289
|
+ N0 = y[0]
|
|
290
|
+ for i in range(len(y)):
|
|
291
|
+ y[i] = y[i]/N0
|
|
292
|
+ plt.plot(x, y, 'o', linestyle="dotted", alpha=0.75, lw=2, label = str(epoch)+' brks')
|
|
293
|
+ plt.xlabel("# breaks")
|
|
294
|
+ plt.ylabel("theta")
|
|
295
|
+ plt.legend(loc='upper right')
|
|
296
|
+ plt.savefig(title+'_test'+str(k)+'.pdf')
|
|
297
|
+ # fig 2
|
|
298
|
+ plt.figure(figsize=(5000/my_dpi, 2800/my_dpi), dpi=my_dpi)
|
|
299
|
+ for epoch, theta in epochs.items():
|
|
300
|
+ groups = np.array(list(theta.values()), dtype=object)[:, 1].tolist()
|
|
301
|
+ x = []
|
|
302
|
+ y = []
|
|
303
|
+ thetas = np.array(list(theta.values()), dtype=object)[:, 0]
|
|
304
|
+ for i,group in enumerate(groups):
|
|
305
|
+ x += group[::-1]
|
|
306
|
+ y += list(np.repeat(thetas[i], len(group)))
|
|
307
|
+ if epoch == 0:
|
|
308
|
+ N0 = y[0]
|
|
309
|
+ for i in range(len(y)):
|
|
310
|
+ y[i] = y[i]/N0
|
|
311
|
+ #
|
|
312
|
+ x_2 = []
|
|
313
|
+ T = 0
|
|
314
|
+ # k allant de de 14 à 2
|
|
315
|
+ for i in range(len(x)):
|
|
316
|
+ x[i] = int(x[i])
|
|
317
|
+ #print(x[2])
|
|
318
|
+ for i in range(0, len(x)):
|
|
319
|
+ k = x[i]
|
|
320
|
+ #print(k, y[k-2])
|
|
321
|
+
|
|
322
|
+ #theta_k = y[k] / (k*(k-1))
|
|
323
|
+ T += y[i] / (x[i]*(x[i]-1))
|
|
324
|
+ x_2.append(T)
|
|
325
|
+
|
|
326
|
+ plt.plot(x_2, y, 'o', linestyle="dotted", alpha=0.75, lw=2, label = str(epoch)+' brks')
|
|
327
|
+ plt.xscale('log')
|
|
328
|
+ plt.xlabel("# breaks")
|
|
329
|
+ plt.ylabel("theta")
|
|
330
|
+ plt.legend(loc='upper right')
|
|
331
|
+ plt.savefig(title+'_test'+str(k)+'.pdf')
|
|
332
|
+ #
|
|
333
|
+
|
118
|
334
|
if __name__ == "__main__":
|
119
|
335
|
|
120
|
336
|
if len(sys.argv) != 4:
|
|
@@ -125,4 +341,4 @@ if __name__ == "__main__":
|
125
|
341
|
mu = sys.argv[2]
|
126
|
342
|
tgen = sys.argv[3]
|
127
|
343
|
|
128
|
|
- plot_3epochs_thetafolder(folder_path, mu, tgen)
|
|
344
|
+ plot_all_epochs_thetafolder(folder_path, mu, tgen)
|