Browse Source

Keeping only thetas with the lowest logLik for breakpoints thetas

tforest 4 months ago
parent
commit
66d399d6f0
1 changed files with 34 additions and 13 deletions
  1. 34 13
      swp2.py

+ 34 - 13
swp2.py View File

@@ -56,12 +56,18 @@ def return_x_y_from_stwp_theta_file(stwp_theta_file, breaks, mu, tgen, relative_
56 56
     #### END of parsing
57 57
     # quit this file if the number of dimensions is incorrect
58 58
     if dim < breaks+1:
59
-        return 0,0,0,0,0
59
+        return 0,0,0,0,0,0
60 60
     # get n, the last bin of the last group
61 61
     # revert the list of groups as the most recent times correspond
62 62
     # to the closest and last leafs of the coal. tree.
63 63
     groups = groups[::-1]
64 64
     theta_site = theta_site[::-1]
65
+    # store thetas for later use
66
+    grps = groups.copy()
67
+    thetas = {}
68
+    for i in range(len(groups)):
69
+        grps[i] = grps[i].split(',')
70
+        thetas[i] = [float(theta_site[i]), grps[i], likelihood]
65 71
     # initiate the dict of times
66 72
     t = {}
67 73
     # list of thetas
@@ -119,7 +125,7 @@ def return_x_y_from_stwp_theta_file(stwp_theta_file, breaks, mu, tgen, relative_
119 125
     #     #     # divide by N0
120 126
     #     #     y[i] = y[i]/N0
121 127
     #     #     x[i] = x[i]/N0
122
-    return x,y,likelihood,sfs,L
128
+    return x,y,likelihood,thetas,sfs,L
123 129
 
124 130
 def return_x_y_from_stwp_theta_file_as_is(stwp_theta_file, breaks, mu, tgen, relative_theta_scale = False):
125 131
     with open(stwp_theta_file, "r") as swp_file:
@@ -163,7 +169,7 @@ def return_x_y_from_stwp_theta_file_as_is(stwp_theta_file, breaks, mu, tgen, rel
163 169
 
164 170
     for i in range(len(groups)):
165 171
         groups[i] = groups[i].split(',')
166
-        #print(groups[i], len(groups[i]))
172
+        # print(groups[i], len(groups[i]))
167 173
         thetas[i] = [float(theta_site[i]), groups[i], likelihood]
168 174
     return thetas, sfs
169 175
 
@@ -236,7 +242,7 @@ def plot_all_epochs_thetafolder(folder_path, mu, tgen, title = "Title", theta_sc
236 242
         breaks = 0
237 243
         cpt +=1
238 244
         if os.path.isfile(os.path.join(folder_path, file_name)):
239
-            x, y, likelihood, sfs, L = return_x_y_from_stwp_theta_file(folder_path+file_name, breaks = breaks,
245
+            x, y, likelihood, theta, sfs, L = return_x_y_from_stwp_theta_file(folder_path+file_name, breaks = breaks,
240 246
                                                              tgen = tgen,
241 247
                                                              mu = mu, relative_theta_scale = theta_scale)
242 248
             SFS_stored = sfs
@@ -246,7 +252,7 @@ def plot_all_epochs_thetafolder(folder_path, mu, tgen, title = "Title", theta_sc
246 252
                     epochs[breaks] = {}
247 253
                 epochs[breaks][likelihood] = x,y
248 254
                 breaks += 1
249
-                x,y,likelihood,sfs,L = return_x_y_from_stwp_theta_file(folder_path+file_name, breaks = breaks,
255
+                x,y,likelihood,theta,sfs,L = return_x_y_from_stwp_theta_file(folder_path+file_name, breaks = breaks,
250 256
                                                                  tgen = tgen,
251 257
                                                                   mu = mu, relative_theta_scale = theta_scale)
252 258
             if x == 0:
@@ -373,6 +379,7 @@ def plot_all_epochs_thetafolder(folder_path, mu, tgen, title = "Title", theta_sc
373 379
     if ax is None:
374 380
         plt.savefig(title+'_Breakpts_Likelihood_AIC.pdf')
375 381
     print("S", S)
382
+    # return plots
376 383
     return ax
377 384
 
378 385
 def plot_test_theta(folder_path, mu, tgen, title = "Title", theta_scale = True, breaks_max = 10, ax = None, n_ticks = 10):
@@ -381,6 +388,7 @@ def plot_test_theta(folder_path, mu, tgen, title = "Title", theta_scale = True,
381 388
     """
382 389
     cpt = 0
383 390
     epochs = {}
391
+    len_sfs = 0
384 392
     for file_name in os.listdir(folder_path):
385 393
         cpt +=1
386 394
         if os.path.isfile(os.path.join(folder_path, file_name)):
@@ -390,7 +398,13 @@ def plot_test_theta(folder_path, mu, tgen, title = "Title", theta_scale = True,
390 398
                                                                  mu = mu, relative_theta_scale = theta_scale)
391 399
                 if thetas == 0:
392 400
                     continue
393
-                epochs[k] = thetas
401
+                if len(thetas)-1 != k:
402
+                    continue
403
+                if k not in epochs.keys():
404
+                    epochs[k] = {}
405
+                likelihood = thetas[k][2]
406
+                epochs[k][likelihood] = thetas
407
+                #epochs[k] = thetas
394 408
     print("\n*******\n"+title+"\n--------\n"+"mu="+str(mu)+"\ntgen="+str(tgen)+"\nbreaks="+str(k)+"\n*******\n")
395 409
     print(cpt, "theta file(s) have been scanned.")
396 410
     # multiple fig
@@ -405,9 +419,16 @@ def plot_test_theta(folder_path, mu, tgen, title = "Title", theta_scale = True,
405 419
         # plt.rcParams['font.size'] = fnt_size
406 420
         ax1 = ax[0, 1]
407 421
         plt.subplots_adjust(wspace=0.3, hspace=0.3)
408
-
409 422
     plots = []
410
-    for epoch, theta in epochs.items():
423
+    best_epochs = {}
424
+    for epoch in epochs:
425
+        likelihoods = []
426
+        for key in epochs[epoch].keys():
427
+            likelihoods.append(float(key))
428
+        likelihoods.sort()
429
+        minLogLn = str(likelihoods[0])
430
+        best_epochs[epoch] = epochs[epoch][minLogLn]
431
+    for epoch, theta in best_epochs.items():
411 432
         groups = np.array(list(theta.values()), dtype=object)[:, 1].tolist()
412 433
         x = []
413 434
         y = []
@@ -467,7 +488,7 @@ def plot_test_theta(folder_path, mu, tgen, title = "Title", theta_scale = True,
467 488
     lines_fig2 = []
468 489
     lines_fig3 = []
469 490
     #plt.figure(figsize=(5000/my_dpi, 2800/my_dpi), dpi=my_dpi)
470
-    for epoch, theta in epochs.items():
491
+    for epoch, theta in best_epochs.items():
471 492
         groups = np.array(list(theta.values()), dtype=object)[:, 1].tolist()
472 493
         x = []
473 494
         y = []
@@ -529,10 +550,10 @@ def combined_plot(folder_path, mu, tgen, breaks, title = "Title", theta_scale =
529 550
     # Save the entire grid as a single figure
530 551
     plt.savefig(title+'_combined.pdf')
531 552
     plt.clf()
532
-    # second call for individual plots
533
-    plot_all_epochs_thetafolder(folder_path, mu, tgen, title, theta_scale, ax = None)
534
-    plot_test_theta(folder_path, mu, tgen, title, theta_scale, breaks_max = breaks, ax = None)
535
-    plt.clf()
553
+    # # second call for individual plots
554
+    # plot_all_epochs_thetafolder(folder_path, mu, tgen, title, theta_scale, ax = None)
555
+    # plot_test_theta(folder_path, mu, tgen, title, theta_scale, breaks_max = breaks, ax = None)
556
+    # plt.clf()
536 557
 
537 558
 if __name__ == "__main__":
538 559