Browse Source

Keeping only thetas with the lowest logLik for breakpoints thetas

tforest 11 months ago
parent
commit
66d399d6f0
1 changed files with 34 additions and 13 deletions
  1. 34 13
      swp2.py

+ 34 - 13
swp2.py View File

56
     #### END of parsing
56
     #### END of parsing
57
     # quit this file if the number of dimensions is incorrect
57
     # quit this file if the number of dimensions is incorrect
58
     if dim < breaks+1:
58
     if dim < breaks+1:
59
-        return 0,0,0,0,0
59
+        return 0,0,0,0,0,0
60
     # get n, the last bin of the last group
60
     # get n, the last bin of the last group
61
     # revert the list of groups as the most recent times correspond
61
     # revert the list of groups as the most recent times correspond
62
     # to the closest and last leafs of the coal. tree.
62
     # to the closest and last leafs of the coal. tree.
63
     groups = groups[::-1]
63
     groups = groups[::-1]
64
     theta_site = theta_site[::-1]
64
     theta_site = theta_site[::-1]
65
+    # store thetas for later use
66
+    grps = groups.copy()
67
+    thetas = {}
68
+    for i in range(len(groups)):
69
+        grps[i] = grps[i].split(',')
70
+        thetas[i] = [float(theta_site[i]), grps[i], likelihood]
65
     # initiate the dict of times
71
     # initiate the dict of times
66
     t = {}
72
     t = {}
67
     # list of thetas
73
     # list of thetas
119
     #     #     # divide by N0
125
     #     #     # divide by N0
120
     #     #     y[i] = y[i]/N0
126
     #     #     y[i] = y[i]/N0
121
     #     #     x[i] = x[i]/N0
127
     #     #     x[i] = x[i]/N0
122
-    return x,y,likelihood,sfs,L
128
+    return x,y,likelihood,thetas,sfs,L
123
 
129
 
124
 def return_x_y_from_stwp_theta_file_as_is(stwp_theta_file, breaks, mu, tgen, relative_theta_scale = False):
130
 def return_x_y_from_stwp_theta_file_as_is(stwp_theta_file, breaks, mu, tgen, relative_theta_scale = False):
125
     with open(stwp_theta_file, "r") as swp_file:
131
     with open(stwp_theta_file, "r") as swp_file:
163
 
169
 
164
     for i in range(len(groups)):
170
     for i in range(len(groups)):
165
         groups[i] = groups[i].split(',')
171
         groups[i] = groups[i].split(',')
166
-        #print(groups[i], len(groups[i]))
172
+        # print(groups[i], len(groups[i]))
167
         thetas[i] = [float(theta_site[i]), groups[i], likelihood]
173
         thetas[i] = [float(theta_site[i]), groups[i], likelihood]
168
     return thetas, sfs
174
     return thetas, sfs
169
 
175
 
236
         breaks = 0
242
         breaks = 0
237
         cpt +=1
243
         cpt +=1
238
         if os.path.isfile(os.path.join(folder_path, file_name)):
244
         if os.path.isfile(os.path.join(folder_path, file_name)):
239
-            x, y, likelihood, sfs, L = return_x_y_from_stwp_theta_file(folder_path+file_name, breaks = breaks,
245
+            x, y, likelihood, theta, sfs, L = return_x_y_from_stwp_theta_file(folder_path+file_name, breaks = breaks,
240
                                                              tgen = tgen,
246
                                                              tgen = tgen,
241
                                                              mu = mu, relative_theta_scale = theta_scale)
247
                                                              mu = mu, relative_theta_scale = theta_scale)
242
             SFS_stored = sfs
248
             SFS_stored = sfs
246
                     epochs[breaks] = {}
252
                     epochs[breaks] = {}
247
                 epochs[breaks][likelihood] = x,y
253
                 epochs[breaks][likelihood] = x,y
248
                 breaks += 1
254
                 breaks += 1
249
-                x,y,likelihood,sfs,L = return_x_y_from_stwp_theta_file(folder_path+file_name, breaks = breaks,
255
+                x,y,likelihood,theta,sfs,L = return_x_y_from_stwp_theta_file(folder_path+file_name, breaks = breaks,
250
                                                                  tgen = tgen,
256
                                                                  tgen = tgen,
251
                                                                   mu = mu, relative_theta_scale = theta_scale)
257
                                                                   mu = mu, relative_theta_scale = theta_scale)
252
             if x == 0:
258
             if x == 0:
373
     if ax is None:
379
     if ax is None:
374
         plt.savefig(title+'_Breakpts_Likelihood_AIC.pdf')
380
         plt.savefig(title+'_Breakpts_Likelihood_AIC.pdf')
375
     print("S", S)
381
     print("S", S)
382
+    # return plots
376
     return ax
383
     return ax
377
 
384
 
378
 def plot_test_theta(folder_path, mu, tgen, title = "Title", theta_scale = True, breaks_max = 10, ax = None, n_ticks = 10):
385
 def plot_test_theta(folder_path, mu, tgen, title = "Title", theta_scale = True, breaks_max = 10, ax = None, n_ticks = 10):
381
     """
388
     """
382
     cpt = 0
389
     cpt = 0
383
     epochs = {}
390
     epochs = {}
391
+    len_sfs = 0
384
     for file_name in os.listdir(folder_path):
392
     for file_name in os.listdir(folder_path):
385
         cpt +=1
393
         cpt +=1
386
         if os.path.isfile(os.path.join(folder_path, file_name)):
394
         if os.path.isfile(os.path.join(folder_path, file_name)):
390
                                                                  mu = mu, relative_theta_scale = theta_scale)
398
                                                                  mu = mu, relative_theta_scale = theta_scale)
391
                 if thetas == 0:
399
                 if thetas == 0:
392
                     continue
400
                     continue
393
-                epochs[k] = thetas
401
+                if len(thetas)-1 != k:
402
+                    continue
403
+                if k not in epochs.keys():
404
+                    epochs[k] = {}
405
+                likelihood = thetas[k][2]
406
+                epochs[k][likelihood] = thetas
407
+                #epochs[k] = thetas
394
     print("\n*******\n"+title+"\n--------\n"+"mu="+str(mu)+"\ntgen="+str(tgen)+"\nbreaks="+str(k)+"\n*******\n")
408
     print("\n*******\n"+title+"\n--------\n"+"mu="+str(mu)+"\ntgen="+str(tgen)+"\nbreaks="+str(k)+"\n*******\n")
395
     print(cpt, "theta file(s) have been scanned.")
409
     print(cpt, "theta file(s) have been scanned.")
396
     # multiple fig
410
     # multiple fig
405
         # plt.rcParams['font.size'] = fnt_size
419
         # plt.rcParams['font.size'] = fnt_size
406
         ax1 = ax[0, 1]
420
         ax1 = ax[0, 1]
407
         plt.subplots_adjust(wspace=0.3, hspace=0.3)
421
         plt.subplots_adjust(wspace=0.3, hspace=0.3)
408
-
409
     plots = []
422
     plots = []
410
-    for epoch, theta in epochs.items():
423
+    best_epochs = {}
424
+    for epoch in epochs:
425
+        likelihoods = []
426
+        for key in epochs[epoch].keys():
427
+            likelihoods.append(float(key))
428
+        likelihoods.sort()
429
+        minLogLn = str(likelihoods[0])
430
+        best_epochs[epoch] = epochs[epoch][minLogLn]
431
+    for epoch, theta in best_epochs.items():
411
         groups = np.array(list(theta.values()), dtype=object)[:, 1].tolist()
432
         groups = np.array(list(theta.values()), dtype=object)[:, 1].tolist()
412
         x = []
433
         x = []
413
         y = []
434
         y = []
467
     lines_fig2 = []
488
     lines_fig2 = []
468
     lines_fig3 = []
489
     lines_fig3 = []
469
     #plt.figure(figsize=(5000/my_dpi, 2800/my_dpi), dpi=my_dpi)
490
     #plt.figure(figsize=(5000/my_dpi, 2800/my_dpi), dpi=my_dpi)
470
-    for epoch, theta in epochs.items():
491
+    for epoch, theta in best_epochs.items():
471
         groups = np.array(list(theta.values()), dtype=object)[:, 1].tolist()
492
         groups = np.array(list(theta.values()), dtype=object)[:, 1].tolist()
472
         x = []
493
         x = []
473
         y = []
494
         y = []
529
     # Save the entire grid as a single figure
550
     # Save the entire grid as a single figure
530
     plt.savefig(title+'_combined.pdf')
551
     plt.savefig(title+'_combined.pdf')
531
     plt.clf()
552
     plt.clf()
532
-    # second call for individual plots
533
-    plot_all_epochs_thetafolder(folder_path, mu, tgen, title, theta_scale, ax = None)
534
-    plot_test_theta(folder_path, mu, tgen, title, theta_scale, breaks_max = breaks, ax = None)
535
-    plt.clf()
553
+    # # second call for individual plots
554
+    # plot_all_epochs_thetafolder(folder_path, mu, tgen, title, theta_scale, ax = None)
555
+    # plot_test_theta(folder_path, mu, tgen, title, theta_scale, breaks_max = breaks, ax = None)
556
+    # plt.clf()
536
 
557
 
537
 if __name__ == "__main__":
558
 if __name__ == "__main__":
538
 
559