|
@@ -1,12 +1,38 @@
|
1
|
1
|
import matplotlib.pyplot as plt
|
2
|
2
|
import os
|
3
|
3
|
import numpy as np
|
|
4
|
+import math
|
|
5
|
+from scipy.special import gammaln
|
|
6
|
+from matplotlib.backends.backend_pdf import PdfPages
|
|
7
|
+
|
|
8
|
+def log_facto(k):
|
|
9
|
+ k = int(k)
|
|
10
|
+ if k > 1e6:
|
|
11
|
+ return k * np.log(k) - k + np.log(2*math.pi*k)/2
|
|
12
|
+ val = 0
|
|
13
|
+ for i in range(2, k+1):
|
|
14
|
+ val += np.log(i)
|
|
15
|
+ return val
|
|
16
|
+
|
|
17
|
+def log_facto_1(k):
|
|
18
|
+ startf = 1 # start of factorial sequence
|
|
19
|
+ stopf = int(k+1) # end of of factorial sequence
|
|
20
|
+
|
|
21
|
+ q = gammaln(range(startf+1, stopf+1)) # n! = G(n+1)
|
|
22
|
+
|
|
23
|
+ return q[-1]
|
4
|
24
|
|
5
|
25
|
def return_x_y_from_stwp_theta_file(stwp_theta_file, breaks, mu, tgen, relative_theta_scale = False):
|
6
|
26
|
with open(stwp_theta_file, "r") as swp_file:
|
7
|
27
|
# Read the first line
|
8
|
28
|
line = swp_file.readline()
|
9
|
29
|
L = float(line.split()[2])
|
|
30
|
+ rands = swp_file.readline()
|
|
31
|
+ line = swp_file.readline()
|
|
32
|
+ # skip empty lines before SFS
|
|
33
|
+ while line == "\n":
|
|
34
|
+ line = swp_file.readline()
|
|
35
|
+ sfs = np.array(line.split()).astype(float)
|
10
|
36
|
# Process lines until the end of the file
|
11
|
37
|
while line:
|
12
|
38
|
# check at each line
|
|
@@ -27,7 +53,7 @@ def return_x_y_from_stwp_theta_file(stwp_theta_file, breaks, mu, tgen, relative_
|
27
|
53
|
#### END of parsing
|
28
|
54
|
# quit this file if the number of dimensions is incorrect
|
29
|
55
|
if dim < breaks+1:
|
30
|
|
- return 0,0,0
|
|
56
|
+ return 0,0,0,0,0
|
31
|
57
|
# get n, the last bin of the last group
|
32
|
58
|
# revert the list of groups as the most recent times correspond
|
33
|
59
|
# to the closest and last leafs of the coal. tree.
|
|
@@ -90,13 +116,19 @@ def return_x_y_from_stwp_theta_file(stwp_theta_file, breaks, mu, tgen, relative_
|
90
|
116
|
# # # divide by N0
|
91
|
117
|
# # y[i] = y[i]/N0
|
92
|
118
|
# # x[i] = x[i]/N0
|
93
|
|
- return x,y,likelihood
|
|
119
|
+ return x,y,likelihood,sfs,L
|
94
|
120
|
|
95
|
121
|
def return_x_y_from_stwp_theta_file_as_is(stwp_theta_file, breaks, mu, tgen, relative_theta_scale = False):
|
96
|
122
|
with open(stwp_theta_file, "r") as swp_file:
|
97
|
123
|
# Read the first line
|
98
|
124
|
line = swp_file.readline()
|
99
|
125
|
L = float(line.split()[2])
|
|
126
|
+ rands = swp_file.readline()
|
|
127
|
+ line = swp_file.readline()
|
|
128
|
+ # skip empty lines before SFS
|
|
129
|
+ while line == "\n":
|
|
130
|
+ line = swp_file.readline()
|
|
131
|
+ sfs = np.array(line.split()).astype(float)
|
100
|
132
|
# Process lines until the end of the file
|
101
|
133
|
while line:
|
102
|
134
|
# check at each line
|
|
@@ -117,7 +149,7 @@ def return_x_y_from_stwp_theta_file_as_is(stwp_theta_file, breaks, mu, tgen, rel
|
117
|
149
|
#### END of parsing
|
118
|
150
|
# quit this file if the number of dimensions is incorrect
|
119
|
151
|
if dim < breaks+1:
|
120
|
|
- return 0,0,0
|
|
152
|
+ return 0,0
|
121
|
153
|
# get n, the last bin of the last group
|
122
|
154
|
# revert the list of groups as the most recent times correspond
|
123
|
155
|
# to the closest and last leafs of the coal. tree.
|
|
@@ -130,7 +162,7 @@ def return_x_y_from_stwp_theta_file_as_is(stwp_theta_file, breaks, mu, tgen, rel
|
130
|
162
|
groups[i] = groups[i].split(',')
|
131
|
163
|
#print(groups[i], len(groups[i]))
|
132
|
164
|
thetas[i] = [float(theta_site[i]), groups[i], likelihood]
|
133
|
|
- return thetas
|
|
165
|
+ return thetas, sfs
|
134
|
166
|
|
135
|
167
|
def plot_k_epochs_thetafolder(folder_path, mu, tgen, breaks = 2, title = "Title", theta_scale = True):
|
136
|
168
|
scenari = {}
|
|
@@ -138,9 +170,9 @@ def plot_k_epochs_thetafolder(folder_path, mu, tgen, breaks = 2, title = "Title"
|
138
|
170
|
for file_name in os.listdir(folder_path):
|
139
|
171
|
if os.path.isfile(os.path.join(folder_path, file_name)):
|
140
|
172
|
# Perform actions on each file
|
141
|
|
- x,y,likelihood = return_x_y_from_stwp_theta_file(folder_path+file_name, breaks = breaks,
|
|
173
|
+ x,y,likelihood,sfs,L = return_x_y_from_stwp_theta_file(folder_path+file_name, breaks = breaks,
|
142
|
174
|
tgen = tgen,
|
143
|
|
- mu = mu, relative_theta_scale = theta_scale)
|
|
175
|
+ mu = mu, relative_theta_scale = theta_scale)
|
144
|
176
|
if x == 0 or y == 0:
|
145
|
177
|
continue
|
146
|
178
|
cpt +=1
|
|
@@ -189,15 +221,17 @@ def plot_all_epochs_thetafolder(folder_path, mu, tgen, title = "Title", theta_sc
|
189
|
221
|
breaks = 0
|
190
|
222
|
cpt +=1
|
191
|
223
|
if os.path.isfile(os.path.join(folder_path, file_name)):
|
192
|
|
- x, y, likelihood = return_x_y_from_stwp_theta_file(folder_path+file_name, breaks = breaks,
|
|
224
|
+ x, y, likelihood, sfs, L = return_x_y_from_stwp_theta_file(folder_path+file_name, breaks = breaks,
|
193
|
225
|
tgen = tgen,
|
194
|
226
|
mu = mu, relative_theta_scale = theta_scale)
|
|
227
|
+ SFS_stored = sfs
|
|
228
|
+ L_stored = L
|
195
|
229
|
while not (x == 0 and y == 0):
|
196
|
230
|
if breaks not in epochs.keys():
|
197
|
231
|
epochs[breaks] = {}
|
198
|
232
|
epochs[breaks][likelihood] = x,y
|
199
|
233
|
breaks += 1
|
200
|
|
- x,y,likelihood = return_x_y_from_stwp_theta_file(folder_path+file_name, breaks = breaks,
|
|
234
|
+ x,y,likelihood,sfs,L = return_x_y_from_stwp_theta_file(folder_path+file_name, breaks = breaks,
|
201
|
235
|
tgen = tgen,
|
202
|
236
|
mu = mu, relative_theta_scale = theta_scale)
|
203
|
237
|
print("\n*******\n"+title+"\n--------\n"+"mu="+str(mu)+"\ntgen="+str(tgen)+"\nbreaks="+str(breaks)+"\n*******\n")
|
|
@@ -208,7 +242,7 @@ def plot_all_epochs_thetafolder(folder_path, mu, tgen, title = "Title", theta_sc
|
208
|
242
|
plt.figure(figsize=(5000/my_dpi, 2800/my_dpi), dpi=my_dpi)
|
209
|
243
|
plt.xlim(1e-3, 1)
|
210
|
244
|
#plt.ylim(0, 10)
|
211
|
|
- #plt.yscale('log')
|
|
245
|
+ plt.yscale('log')
|
212
|
246
|
plt.xscale('log')
|
213
|
247
|
plt.grid(True,which="both", linestyle='--', alpha = 0.3)
|
214
|
248
|
brkpt_lik = []
|
|
@@ -228,7 +262,15 @@ def plot_all_epochs_thetafolder(folder_path, mu, tgen, title = "Title", theta_sc
|
228
|
262
|
# divide by N0
|
229
|
263
|
y[i] = y[i]/N0
|
230
|
264
|
x[i] = x[i]/N0
|
231
|
|
- plt.plot(x, y, '-', alpha=0.75, lw=2, label = str(epoch)+' BrkPt | Lik='+greatest_likelihood)
|
|
265
|
+ sum_theta_i = 0
|
|
266
|
+ print(epoch, x, y)
|
|
267
|
+ for i in range(2, len(y)-1):
|
|
268
|
+ sum_theta_i=y[i] / (i-1)
|
|
269
|
+ prop = []
|
|
270
|
+ for k in range(2, len(y)-1):
|
|
271
|
+ prop.append(y[k+1] / (k - 1) / sum_theta_i)
|
|
272
|
+ #print(epoch, prop)
|
|
273
|
+ plt.plot(x, y, 'o', linestyle = "-", alpha=0.75, lw=2, label = str(epoch)+' BrkPt | Lik='+greatest_likelihood)
|
232
|
274
|
if theta_scale:
|
233
|
275
|
plt.xlabel("Coal. time")
|
234
|
276
|
plt.ylabel("Pop. size scaled by N0")
|
|
@@ -246,35 +288,67 @@ def plot_all_epochs_thetafolder(folder_path, mu, tgen, title = "Title", theta_sc
|
246
|
288
|
plt.title(title)
|
247
|
289
|
plt.savefig(title+'_b'+str(breaks)+'.pdf')
|
248
|
290
|
# plot likelihood against nb of breakpoints
|
|
291
|
+
|
|
292
|
+ # best possible likelihood from SFS
|
|
293
|
+ # Segregating sites
|
|
294
|
+ S = sum(SFS_stored)
|
|
295
|
+ # number of monomorphic sites
|
|
296
|
+ L = L_stored
|
|
297
|
+ S0 = L-S
|
|
298
|
+ print("SFS", SFS_stored)
|
|
299
|
+ print("S", S, "L", L, "S0=", S0)
|
|
300
|
+ # compute Ln
|
|
301
|
+ Ln = log_facto(S+S0) - log_facto(S0) + np.log(float(S0)/(S+S0)) * S0
|
|
302
|
+ for xi in range(0, len(SFS_stored)):
|
|
303
|
+ p_i = SFS_stored[xi] / float(S+S0)
|
|
304
|
+ Ln += np.log(p_i) * SFS_stored[xi] - log_facto(SFS_stored[xi])
|
|
305
|
+ res = Ln
|
|
306
|
+ print(res)
|
|
307
|
+ # basic plot likelihood
|
249
|
308
|
plt.figure(figsize=(5000/my_dpi, 2800/my_dpi), dpi=my_dpi)
|
250
|
309
|
plt.rcParams['font.size'] = '18'
|
251
|
|
- AIC = 2*(len(brkpt_lik)+1)+2*np.array(brkpt_lik)[:, 1].astype(float)
|
252
|
|
- plt.plot(np.array(brkpt_lik)[:, 0], AIC, 'o', linestyle = "dotted", lw=2)
|
253
|
|
- plt.axhline(y=106)
|
|
310
|
+ plt.plot(np.array(brkpt_lik)[:, 0], np.array(brkpt_lik)[:, 1].astype(float), 'o', linestyle = "dotted", lw=2)
|
|
311
|
+ # plt.ylim(0,100)
|
|
312
|
+ # plt.axhline(y=res)
|
254
|
313
|
plt.yscale('log')
|
255
|
314
|
plt.xlabel("# breakpoints", fontsize=20)
|
256
|
315
|
plt.ylabel("$-\log\mathcal{L}$")
|
257
|
316
|
#plt.legend(loc='upper right')
|
258
|
317
|
plt.title(title)
|
259
|
318
|
plt.savefig(title+'_Breakpts_Likelihood.pdf')
|
|
319
|
+ # AIC
|
|
320
|
+ plt.figure(figsize=(5000/my_dpi, 2800/my_dpi), dpi=my_dpi)
|
|
321
|
+ plt.rcParams['font.size'] = '18'
|
|
322
|
+ AIC = 2*(len(brkpt_lik)+1)+2*np.array(brkpt_lik)[:, 1].astype(float)
|
|
323
|
+ plt.plot(np.array(brkpt_lik)[:, 0], AIC, 'o', linestyle = "dotted", lw=2)
|
|
324
|
+ # plt.axhline(y=106)
|
|
325
|
+ plt.yscale('log')
|
|
326
|
+ plt.xlabel("# breakpoints", fontsize=20)
|
|
327
|
+ plt.ylabel("AIC")
|
|
328
|
+ #plt.legend(loc='upper right')
|
|
329
|
+ plt.title(title)
|
|
330
|
+ plt.savefig(title+'_Breakpts_Likelihood_AIC.pdf')
|
260
|
331
|
|
261
|
|
-def plot_test_theta(folder_path, mu, tgen, title = "Title", theta_scale = True, breaks_max = 6):
|
|
332
|
+def plot_test_theta(folder_path, mu, tgen, title = "Title", theta_scale = True, breaks_max = 5):
|
|
333
|
+ """
|
|
334
|
+ Use theta values as is to do basic plots.
|
|
335
|
+ """
|
262
|
336
|
cpt = 0
|
263
|
337
|
epochs = {}
|
264
|
338
|
for file_name in os.listdir(folder_path):
|
265
|
339
|
cpt +=1
|
266
|
340
|
if os.path.isfile(os.path.join(folder_path, file_name)):
|
267
|
341
|
for k in range(breaks_max):
|
268
|
|
- thetas = return_x_y_from_stwp_theta_file_as_is(folder_path+file_name, breaks = k,
|
|
342
|
+ thetas,sfs = return_x_y_from_stwp_theta_file_as_is(folder_path+file_name, breaks = k,
|
269
|
343
|
tgen = tgen,
|
270
|
344
|
mu = mu, relative_theta_scale = theta_scale)
|
271
|
|
- if thetas[0] == 0:
|
|
345
|
+ if thetas == 0:
|
272
|
346
|
continue
|
273
|
347
|
epochs[k] = thetas
|
274
|
348
|
print("\n*******\n"+title+"\n--------\n"+"mu="+str(mu)+"\ntgen="+str(tgen)+"\nbreaks="+str(k)+"\n*******\n")
|
275
|
349
|
print(cpt, "theta file(s) have been scanned.")
|
276
|
350
|
|
277
|
|
- # intialize figure
|
|
351
|
+ # intialize figure 1
|
278
|
352
|
my_dpi = 300
|
279
|
353
|
plt.figure(figsize=(5000/my_dpi, 2800/my_dpi), dpi=my_dpi)
|
280
|
354
|
for epoch, theta in epochs.items():
|
|
@@ -294,7 +368,7 @@ def plot_test_theta(folder_path, mu, tgen, title = "Title", theta_scale = True,
|
294
|
368
|
plt.ylabel("theta")
|
295
|
369
|
plt.legend(loc='upper right')
|
296
|
370
|
plt.savefig(title+'_test'+str(k)+'.pdf')
|
297
|
|
- # fig 2
|
|
371
|
+ # fig 2 & 3
|
298
|
372
|
plt.figure(figsize=(5000/my_dpi, 2800/my_dpi), dpi=my_dpi)
|
299
|
373
|
for epoch, theta in epochs.items():
|
300
|
374
|
groups = np.array(list(theta.values()), dtype=object)[:, 1].tolist()
|
|
@@ -308,29 +382,41 @@ def plot_test_theta(folder_path, mu, tgen, title = "Title", theta_scale = True,
|
308
|
382
|
N0 = y[0]
|
309
|
383
|
for i in range(len(y)):
|
310
|
384
|
y[i] = y[i]/N0
|
311
|
|
- #
|
312
|
385
|
x_2 = []
|
313
|
386
|
T = 0
|
314
|
|
- # k allant de de 14 à 2
|
315
|
387
|
for i in range(len(x)):
|
316
|
388
|
x[i] = int(x[i])
|
317
|
|
- #print(x[2])
|
|
389
|
+ # compute the times as: theta_k / (k*(k-1))
|
318
|
390
|
for i in range(0, len(x)):
|
319
|
|
- k = x[i]
|
320
|
|
- #print(k, y[k-2])
|
321
|
|
-
|
322
|
|
- #theta_k = y[k] / (k*(k-1))
|
323
|
391
|
T += y[i] / (x[i]*(x[i]-1))
|
324
|
392
|
x_2.append(T)
|
|
393
|
+ # Plotting (fig 2)
|
|
394
|
+ plt.plot(x_2, y, 'o', linestyle="dotted", alpha=0.75, lw=2, label = str(epoch)+' brks')
|
|
395
|
+ plt.xlabel("# breaks")
|
|
396
|
+ plt.ylabel("theta")
|
|
397
|
+ plt.legend(loc='upper right')
|
|
398
|
+ plt.savefig(title+'_test'+str(k)+'.pdf')
|
325
|
399
|
|
326
|
|
- plt.plot(x_2, y, 'o', linestyle="dotted", alpha=0.75, lw=2, label = str(epoch)+' brks')
|
327
|
|
- plt.xscale('log')
|
328
|
|
- plt.xlabel("# breaks")
|
329
|
|
- plt.ylabel("theta")
|
330
|
|
- plt.legend(loc='upper right')
|
331
|
|
- plt.savefig(title+'_test'+str(k)+'.pdf')
|
332
|
|
- #
|
|
400
|
+ # Plotting (fig 3) which is the same but log scale for x
|
|
401
|
+ plt.plot(x_2, y, 'o', linestyle="dotted", alpha=0.75, lw=2, label = str(epoch)+' brks')
|
|
402
|
+ plt.xscale('log')
|
|
403
|
+ plt.xlabel("# breaks")
|
|
404
|
+ plt.ylabel("theta")
|
|
405
|
+ plt.legend(loc='upper right')
|
|
406
|
+ plt.savefig(title+'_test'+str(k)+'_log.pdf')
|
|
407
|
+
|
|
408
|
+def save_multi_image(filename):
|
|
409
|
+ pp = PdfPages(filename)
|
|
410
|
+ fig_nums = plt.get_fignums()
|
|
411
|
+ figs = [plt.figure(n) for n in fig_nums]
|
|
412
|
+ for fig in figs:
|
|
413
|
+ fig.savefig(pp, format='pdf')
|
|
414
|
+ pp.close()
|
333
|
415
|
|
|
416
|
+def combined_plot(folder_path, mu, tgen, breaks, title = "Title", theta_scale = True):
|
|
417
|
+ plot_all_epochs_thetafolder(folder_path, mu, tgen, title, theta_scale)
|
|
418
|
+ plot_test_theta(folder_path, mu, tgen, title, theta_scale, breaks_max = breaks)
|
|
419
|
+ save_multi_image(title+"_combined.pdf")
|
334
|
420
|
if __name__ == "__main__":
|
335
|
421
|
|
336
|
422
|
if len(sys.argv) != 4:
|