|
@@ -124,158 +124,6 @@ def plot_straight_x_y(x,y):
|
124
|
124
|
x_1.append(x[-1])
|
125
|
125
|
return x_1, y_1
|
126
|
126
|
|
127
|
|
-def plot_all_epochs_thetafolder_old(folder_path, mu, tgen, title = "Title",
|
128
|
|
- theta_scale = True, ax = None, input = None, output = None):
|
129
|
|
- #scenari = {}
|
130
|
|
- cpt = 0
|
131
|
|
- epochs = {}
|
132
|
|
- for file_name in os.listdir(folder_path):
|
133
|
|
- breaks = 0
|
134
|
|
- cpt +=1
|
135
|
|
- if os.path.isfile(os.path.join(folder_path, file_name)):
|
136
|
|
- x, y, likelihood, theta, sfs, L = parse_stwp_theta_file(folder_path+file_name, breaks = breaks,
|
137
|
|
- tgen = tgen,
|
138
|
|
- mu = mu, relative_theta_scale = theta_scale)
|
139
|
|
- SFS_stored = sfs
|
140
|
|
- L_stored = L
|
141
|
|
- while not (x == 0 and y == 0):
|
142
|
|
- if breaks not in epochs.keys():
|
143
|
|
- epochs[breaks] = {}
|
144
|
|
- epochs[breaks][likelihood] = x,y
|
145
|
|
- breaks += 1
|
146
|
|
- x,y,likelihood,theta,sfs,L = parse_stwp_theta_file(folder_path+file_name, breaks = breaks,
|
147
|
|
- tgen = tgen,
|
148
|
|
- mu = mu, relative_theta_scale = theta_scale)
|
149
|
|
- if x == 0:
|
150
|
|
- # last break did not work, then breaks = breaks-1
|
151
|
|
- breaks -= 1
|
152
|
|
- print("\n*******\n"+title+"\n--------\n"+"mu="+str(mu)+"\ntgen="+str(tgen)+"\nbreaks="+str(breaks)+"\n*******\n")
|
153
|
|
- print(cpt, "theta file(s) have been scanned.")
|
154
|
|
- my_dpi = 300
|
155
|
|
- if ax is None:
|
156
|
|
- # intialize figure
|
157
|
|
- my_dpi = 300
|
158
|
|
- fnt_size = 18
|
159
|
|
- # plt.rcParams['font.size'] = fnt_size
|
160
|
|
- fig, ax1 = plt.subplots(figsize=(5000/my_dpi, 2800/my_dpi), dpi=my_dpi)
|
161
|
|
- else:
|
162
|
|
- fnt_size = 12
|
163
|
|
- # plt.rcParams['font.size'] = fnt_size
|
164
|
|
- ax1 = ax[1][0,0]
|
165
|
|
- ax1.set_yscale('log')
|
166
|
|
- ax1.set_xscale('log')
|
167
|
|
- ax1.grid(True,which="both", linestyle='--', alpha = 0.3)
|
168
|
|
- brkpt_lik = []
|
169
|
|
- top_plots = {}
|
170
|
|
- for epoch, scenari in epochs.items():
|
171
|
|
- # sort starting by the smallest -log(Likelihood)
|
172
|
|
- best10_scenari = (sorted(list(scenari.keys())))[:10]
|
173
|
|
- greatest_likelihood = best10_scenari[0]
|
174
|
|
- # store the tuple breakpoints and likelihood for later plot
|
175
|
|
- brkpt_lik.append((epoch, greatest_likelihood))
|
176
|
|
- x, y = scenari[greatest_likelihood]
|
177
|
|
- #without breakpoint
|
178
|
|
- if epoch == 0:
|
179
|
|
- # do something with the theta without bp and skip the plotting
|
180
|
|
- N0 = y[0]
|
181
|
|
- #continue
|
182
|
|
- for i in range(len(y)):
|
183
|
|
- # divide by N0
|
184
|
|
- y[i] = y[i]/N0
|
185
|
|
- x[i] = x[i]/N0
|
186
|
|
- top_plots[greatest_likelihood] = x,y,epoch
|
187
|
|
- plots_likelihoods = list(top_plots.keys())
|
188
|
|
- for i in range(len(plots_likelihoods)):
|
189
|
|
- plots_likelihoods[i] = float(plots_likelihoods[i])
|
190
|
|
- best10_plots = sorted(plots_likelihoods)[:10]
|
191
|
|
- top_plot_lik = str(best10_plots[0])
|
192
|
|
- plot_handles = []
|
193
|
|
- # plt.rcParams['font.size'] = fnt_size
|
194
|
|
- p0, = ax1.plot(top_plots[top_plot_lik][0], top_plots[top_plot_lik][1], 'o', linestyle = "-",
|
195
|
|
- alpha=1, lw=2, label = str(top_plots[top_plot_lik][2])+' brks | Lik='+top_plot_lik)
|
196
|
|
- plot_handles.append(p0)
|
197
|
|
- for k, plot_Lk in enumerate(best10_plots[1:]):
|
198
|
|
- plot_Lk = str(plot_Lk)
|
199
|
|
- # plt.rcParams['font.size'] = fnt_size
|
200
|
|
- p, = ax1.plot(top_plots[plot_Lk][0], top_plots[plot_Lk][1], 'o', linestyle = "--",
|
201
|
|
- alpha=1/(k+1), lw=1.5, label = str(top_plots[plot_Lk][2])+' brks | Lik='+plot_Lk)
|
202
|
|
- plot_handles.append(p)
|
203
|
|
- if theta_scale:
|
204
|
|
- ax1.set_xlabel("Coal. time", fontsize=fnt_size)
|
205
|
|
- ax1.set_ylabel("Pop. size scaled by N0", fontsize=fnt_size)
|
206
|
|
- # recent_scale_lower_bound = 0.01
|
207
|
|
- # recent_scale_upper_bound = 0.1
|
208
|
|
- # ax1.axvline(x=recent_scale_lower_bound)
|
209
|
|
- # ax1.axvline(x=recent_scale_upper_bound)
|
210
|
|
- else:
|
211
|
|
- # years
|
212
|
|
- plt.set_xlabel("Time (years)", fontsize=fnt_size)
|
213
|
|
- plt.set_ylabel("Individuals (N)", fontsize=fnt_size)
|
214
|
|
- # plt.rcParams['font.size'] = fnt_size
|
215
|
|
- # print(fnt_size, "rcParam font.size=", plt.rcParams['font.size'])
|
216
|
|
- ax1.legend(handles = plot_handles, loc='best', fontsize = fnt_size*0.5)
|
217
|
|
- ax1.set_title(title)
|
218
|
|
- if ax is None:
|
219
|
|
- plt.savefig(title+'_b'+str(breaks)+'.pdf')
|
220
|
|
- # plot likelihood against nb of breakpoints
|
221
|
|
- # best possible likelihood from SFS
|
222
|
|
- # Segregating sites
|
223
|
|
- S = sum(SFS_stored)
|
224
|
|
- # Number of kept sites from which the SFS is computed
|
225
|
|
- L = L_stored
|
226
|
|
- # number of monomorphic sites
|
227
|
|
- S0 = L-S
|
228
|
|
- # print("SFS", SFS_stored)
|
229
|
|
- # print("S", S, "L", L, "S0=", S0)
|
230
|
|
- # compute Ln
|
231
|
|
- Ln = log_facto(S+S0) - log_facto(S0) + np.log(float(S0)/(S+S0)) * S0
|
232
|
|
- for xi in range(0, len(SFS_stored)):
|
233
|
|
- p_i = SFS_stored[xi] / float(S+S0)
|
234
|
|
- Ln += np.log(p_i) * SFS_stored[xi] - log_facto(SFS_stored[xi])
|
235
|
|
- # basic plot likelihood
|
236
|
|
- if ax is None:
|
237
|
|
- fig, ax2 = plt.subplots(figsize=(5000/my_dpi, 2800/my_dpi), dpi=my_dpi)
|
238
|
|
- # plt.rcParams['font.size'] = fnt_size
|
239
|
|
- else:
|
240
|
|
- #plt.rcParams['font.size'] = fnt_size
|
241
|
|
- ax2 = ax[0][0,1]
|
242
|
|
- ax2.plot(np.array(brkpt_lik)[:, 0], np.array(brkpt_lik)[:, 1].astype(float), 'o', linestyle = "dotted", lw=2)
|
243
|
|
- ax2.axhline(y=-Ln, linestyle = "-.", color = "red", label = "$-\log\mathcal{L}$ = "+str(round(-Ln, 2)))
|
244
|
|
- ax2.set_yscale('log')
|
245
|
|
- ax2.set_xlabel("# breakpoints", fontsize=fnt_size)
|
246
|
|
- ax2.set_ylabel("$-\log\mathcal{L}$", fontsize=fnt_size)
|
247
|
|
- ax2.legend(loc='best', fontsize = fnt_size*0.5)
|
248
|
|
- ax2.set_title(title+" Likelihood gain from # breakpoints")
|
249
|
|
- if ax is None:
|
250
|
|
- plt.savefig(title+'_Breakpts_Likelihood.pdf')
|
251
|
|
- # AIC
|
252
|
|
- if ax is None:
|
253
|
|
- fig, ax3 = plt.subplots(figsize=(5000/my_dpi, 2800/my_dpi), dpi=my_dpi)
|
254
|
|
- # plt.rcParams['font.size'] = '18'
|
255
|
|
- else:
|
256
|
|
- #plt.rcParams['font.size'] = fnt_size
|
257
|
|
- ax3 = ax[1][0,1]
|
258
|
|
- AIC = []
|
259
|
|
- for brk in np.array(brkpt_lik)[:, 0]:
|
260
|
|
- brk = int(brk)
|
261
|
|
- AIC.append((2*brk+1)+2*np.array(brkpt_lik)[brk, 1].astype(float))
|
262
|
|
- ax3.plot(np.array(brkpt_lik)[:, 0], AIC, 'o', linestyle = "dotted", lw=2)
|
263
|
|
- # AIC = 2*k - 2ln(L) ; where k is the number of parameters, here brks+1
|
264
|
|
- AIC_ln = 2*(len(brkpt_lik)+1) - 2*Ln
|
265
|
|
- ax3.axhline(y=AIC_ln, linestyle = "-.", color = "red",
|
266
|
|
- label = "Min. AIC = "+str(round(AIC_ln, 2)))
|
267
|
|
- selected_brks_nb = AIC.index(min(AIC))
|
268
|
|
- ax3.set_yscale('log')
|
269
|
|
- ax3.set_xlabel("# breakpoints", fontsize=fnt_size)
|
270
|
|
- ax3.set_ylabel("AIC")
|
271
|
|
- ax3.legend(loc='best', fontsize = fnt_size*0.5)
|
272
|
|
- ax3.set_title(title+" AIC")
|
273
|
|
- if ax is None:
|
274
|
|
- plt.savefig(title+'_Breakpts_Likelihood_AIC.pdf')
|
275
|
|
- print("S", S)
|
276
|
|
- # return plots
|
277
|
|
- return ax[0], ax[1]
|
278
|
|
-
|
279
|
127
|
def plot_all_epochs_thetafolder(full_dict, mu, tgen, title = "Title",
|
280
|
128
|
theta_scale = True, ax = None, input = None, output = None):
|
281
|
129
|
my_dpi = 300
|
|
@@ -442,21 +290,26 @@ def save_all_epochs_thetafolder(folder_path, mu, tgen, title = "Title", theta_sc
|
442
|
290
|
# AIC = 2*k - 2ln(L) ; where k is the number of parameters, here brks+1
|
443
|
291
|
AIC_ln = 2*(len(brkpt_lik)+1) - 2*Ln
|
444
|
292
|
best_AIC = AIC_ln
|
|
293
|
+ selected_brks_nb = AIC.index(min(AIC))
|
445
|
294
|
# to return : plots ; Ln_Brks ; AIC_Brks ; best_Ln ; best_AIC
|
446
|
295
|
# 'plots' dict keys: 'best', {epochs}('0', '1',...)
|
447
|
296
|
if input == None:
|
448
|
|
- saved_plots = {"all_epochs":plots, "Ln_Brks":Ln_Brks,
|
|
297
|
+ saved_plots = {"S":S, "S0":S0, "L":L, "all_epochs":plots, "Ln_Brks":Ln_Brks,
|
449
|
298
|
"AIC_Brks":AIC_Brks, "best_Ln":best_Ln,
|
450
|
|
- "best_AIC":best_AIC}
|
|
299
|
+ "best_AIC":best_AIC, "best_epoch_by_AIC":selected_brks_nb}
|
451
|
300
|
else:
|
452
|
301
|
# if the dict has to be loaded from input
|
453
|
302
|
with open(input, 'r') as json_file:
|
454
|
303
|
saved_plots = json.load(json_file)
|
|
304
|
+ saved_plots["S"] = S
|
|
305
|
+ saved_plots["S0"] = S0
|
|
306
|
+ saved_plots["L"] = L
|
455
|
307
|
saved_plots["all_epochs"] = plots
|
456
|
308
|
saved_plots["Ln_Brks"] = Ln_Brks
|
457
|
309
|
saved_plots["AIC_Brks"] = AIC_Brks
|
458
|
310
|
saved_plots["best_Ln"] = best_Ln
|
459
|
311
|
saved_plots["best_AIC"] = best_AIC
|
|
312
|
+ saved_plots["best_epoch_by_AIC"] = selected_brks_nb
|
460
|
313
|
if output == None:
|
461
|
314
|
output = title+"_plotdata.json"
|
462
|
315
|
with open(output, 'w') as json_file:
|
|
@@ -573,7 +426,7 @@ def save_k_theta(folder_path, mu, tgen, title = "Title", theta_scale = True,
|
573
|
426
|
json.dump(saved_plots, json_file)
|
574
|
427
|
return saved_plots
|
575
|
428
|
|
576
|
|
-def plot_scaled_theta(plot_lines, prop, title, ax = None, n_ticks = 10):
|
|
429
|
+def plot_scaled_theta(plot_lines, prop, title, ax = None, n_ticks = 10, subset = None):
|
577
|
430
|
# fig 2 & 3
|
578
|
431
|
if ax is None:
|
579
|
432
|
my_dpi = 300
|
|
@@ -589,13 +442,21 @@ def plot_scaled_theta(plot_lines, prop, title, ax = None, n_ticks = 10):
|
589
|
442
|
lines_fig2 = []
|
590
|
443
|
lines_fig3 = []
|
591
|
444
|
#plt.figure(figsize=(5000/my_dpi, 2800/my_dpi), dpi=my_dpi)
|
592
|
|
- for epoch, plot in enumerate(plot_lines):
|
|
445
|
+ nb_breaks = len(plot_lines)
|
|
446
|
+ for breaks, plot in enumerate(plot_lines):
|
|
447
|
+ if subset is not None:
|
|
448
|
+ if breaks not in subset :
|
|
449
|
+ # skip if not in subset
|
|
450
|
+ if max(subset) > nb_breaks and breaks == nb_breaks-1:
|
|
451
|
+ pass
|
|
452
|
+ else:
|
|
453
|
+ continue
|
593
|
454
|
x,y=plot
|
594
|
455
|
x2_plot, y2_plot = plot_straight_x_y(x,y)
|
595
|
|
- p2, = ax2.plot(x2_plot, y2_plot, 'o', linestyle="-", alpha=0.75, lw=2, label = str(epoch)+' brks')
|
|
456
|
+ p2, = ax2.plot(x2_plot, y2_plot, 'o', linestyle="-", alpha=0.75, lw=2, label = str(breaks)+' brks')
|
596
|
457
|
lines_fig2.append(p2)
|
597
|
458
|
# Plotting (fig 3) which is the same but log scale for x
|
598
|
|
- p3, = ax3.plot(x2_plot, y2_plot, 'o', linestyle="-", alpha=0.75, lw=2, label = str(epoch)+' brks')
|
|
459
|
+ p3, = ax3.plot(x2_plot, y2_plot, 'o', linestyle="-", alpha=0.75, lw=2, label = str(breaks)+' brks')
|
599
|
460
|
lines_fig3.append(p3)
|
600
|
461
|
ax2.set_xlabel("Relative scale", fontsize=fnt_size)
|
601
|
462
|
ax2.set_ylabel("theta", fontsize=fnt_size)
|
|
@@ -666,24 +527,9 @@ def plot_raw_stairs(plot_lines, prop, title, ax = None, n_ticks = 10):
|
666
|
527
|
# return plots
|
667
|
528
|
return ax
|
668
|
529
|
|
669
|
|
-def combined_plot(folder_path, mu, tgen, breaks, title = "Title", theta_scale = True):
|
|
530
|
+def combined_plot(folder_path, mu, tgen, breaks, title = "Title", theta_scale = True, selected_breaks = []):
|
670
|
531
|
my_dpi = 300
|
671
|
|
- # # Add some extra space for the second axis at the bottom
|
672
|
|
- # #plt.rcParams['font.size'] = 18
|
673
|
|
- # fig, axs = plt.subplots(2, 2, figsize=(5000/my_dpi, 2970/my_dpi), dpi=my_dpi)
|
674
|
|
- # #plt.rcParams['font.size'] = 12
|
675
|
|
- # ax = plot_all_epochs_thetafolder(folder_path, mu, tgen, title, theta_scale, ax = axs)
|
676
|
|
- # ax = plot_test_theta(folder_path, mu, tgen, title, theta_scale, breaks_max = breaks, ax = axs)
|
677
|
|
- # # Adjust layout to prevent clipping of titles
|
678
|
|
- #
|
679
|
532
|
|
680
|
|
- # # Save the entire grid as a single figure
|
681
|
|
- # plt.savefig(title+'_combined.pdf')
|
682
|
|
- # plt.clf()
|
683
|
|
- # # # second call for individual plots
|
684
|
|
- # # plot_all_epochs_thetafolder(folder_path, mu, tgen, title, theta_scale, ax = None)
|
685
|
|
- # # plot_test_theta(folder_path, mu, tgen, title, theta_scale, breaks_max = breaks, ax = None)
|
686
|
|
- # # plt.clf()
|
687
|
533
|
save_k_theta(folder_path, mu, tgen, title, theta_scale, breaks_max = breaks, output = title+"_plotdata.json")
|
688
|
534
|
save_all_epochs_thetafolder(folder_path, mu, tgen, title, theta_scale, input = title+"_plotdata.json", output = title+"_plotdata.json")
|
689
|
535
|
|
|
@@ -699,12 +545,15 @@ def combined_plot(folder_path, mu, tgen, breaks, title = "Title", theta_scale =
|
699
|
545
|
# fig2.tight_layout()
|
700
|
546
|
ax1 = plot_raw_stairs(plot_lines = loaded_data['raw_stairs'],
|
701
|
547
|
prop = loaded_data['prop'], title = title, ax = ax1)
|
702
|
|
-
|
703
|
548
|
ax1 = plot_scaled_theta(plot_lines = loaded_data['scaled_stairs'],
|
704
|
|
- prop = loaded_data['prop'], title = title, ax = ax1)
|
|
549
|
+ prop = loaded_data['prop'], title = title, ax = ax1, subset=[loaded_data['best_epoch_by_AIC']]+selected_breaks)
|
|
550
|
+ ax2 = plot_scaled_theta(plot_lines = loaded_data['scaled_stairs'],
|
|
551
|
+ prop = loaded_data['prop'], title = title, ax = ax2)
|
705
|
552
|
ax1, ax2 = plot_all_epochs_thetafolder(loaded_data, mu, tgen, title, theta_scale, ax = [ax1, ax2])
|
706
|
553
|
fig1.savefig(title+'_combined_p1.pdf')
|
|
554
|
+ print("Wrote", title+'_combined_p1.pdf')
|
707
|
555
|
fig2.savefig(title+'_combined_p2.pdf')
|
|
556
|
+ print("Wrote", title+'_combined_p2.pdf')
|
708
|
557
|
plot_raw_stairs(plot_lines = loaded_data['raw_stairs'],
|
709
|
558
|
prop = loaded_data['prop'], title = title, ax = None)
|
710
|
559
|
plot_scaled_theta(plot_lines = loaded_data['scaled_stairs'],
|
|
@@ -712,6 +561,7 @@ def combined_plot(folder_path, mu, tgen, breaks, title = "Title", theta_scale =
|
712
|
561
|
|
713
|
562
|
plt.close(fig1)
|
714
|
563
|
plt.close(fig2)
|
|
564
|
+
|
715
|
565
|
if __name__ == "__main__":
|
716
|
566
|
|
717
|
567
|
if len(sys.argv) != 4:
|