Browse Source

New plots for swp2 output files

tforest 1 year ago
parent
commit
463b9e63f9
1 changed files with 231 additions and 15 deletions
  1. 231 15
      swp2.py

+ 231 - 15
swp2.py View File

@@ -1,7 +1,8 @@
1 1
 import matplotlib.pyplot as plt
2 2
 import os
3
+import numpy as np
3 4
 
4
-def return_x_y_from_stwp_theta_file(stwp_theta_file, breaks, mu, tgen):
5
+def return_x_y_from_stwp_theta_file(stwp_theta_file, breaks, mu, tgen, relative_theta_scale = False):
5 6
     with open(stwp_theta_file, "r") as swp_file:
6 7
         # Read the first line
7 8
         line = swp_file.readline()
@@ -53,18 +54,27 @@ def return_x_y_from_stwp_theta_file(stwp_theta_file, breaks, mu, tgen):
53 54
         #t =
54 55
         if len(group.split(',')) == 1:
55 56
             k = i
56
-            t[i] += ((theta_L[group_nb] ) / (k*(k-1)) * tgen) / mu
57
+            if relative_theta_scale:
58
+                t[i] += ((theta_L[group_nb] ) / (k*(k-1)))
59
+            else:
60
+                t[i] += ((theta_L[group_nb] ) / (k*(k-1)) * tgen) / mu
57 61
         else:
58 62
             for k in range(j, i-1, -1 ):
59
-                t[i] += ((theta_L[group_nb] ) / (k*(k-1)) * tgen) / mu
63
+                if relative_theta_scale:
64
+                    t[i] += ((theta_L[group_nb] ) / (k*(k-1)))
65
+                else:
66
+                    t[i] += ((theta_L[group_nb] ) / (k*(k-1)) * tgen) / mu
60 67
         # we add the cumulative times at the end
61 68
         t[i] += sum_t
62 69
         sum_t = t[i]
63 70
     # build the y axis (sizes)
64 71
     y = []
65 72
     for theta in theta_L:
66
-        # with size N = theta/4mu
67
-        size = theta / (4*mu)
73
+        if relative_theta_scale:
74
+            size = theta
75
+        else:
76
+            # with size N = theta/4mu
77
+            size = theta / (4*mu)
68 78
         y.append(size)
69 79
         y.append(size)
70 80
     # build the time x axis
@@ -73,19 +83,64 @@ def return_x_y_from_stwp_theta_file(stwp_theta_file, breaks, mu, tgen):
73 83
         x.append(list(t.values())[time])
74 84
         x.append(list(t.values())[time])
75 85
     x.append(list(t.values())[len(t.values())-1])
86
+    # if relative_theta_scale:
87
+    #     # rescale
88
+    #     #N0 = y[0]
89
+    #     # for i in range(len(y)):
90
+    #     #     # divide by N0
91
+    #     #     y[i] = y[i]/N0
92
+    #     #     x[i] = x[i]/N0
76 93
     return x,y,likelihood
77 94
 
78
-def plot_3epochs_thetafolder(folder_path, mu, tgen, breaks = 2, title = "Title"):
95
+def return_x_y_from_stwp_theta_file_as_is(stwp_theta_file, breaks, mu, tgen, relative_theta_scale = False):
96
+    with open(stwp_theta_file, "r") as swp_file:
97
+        # Read the first line
98
+        line = swp_file.readline()
99
+        L = float(line.split()[2])
100
+        # Process lines until the end of the file
101
+        while line:
102
+            # check at each line
103
+            if line.startswith("dim") :
104
+                dim = int(line.split()[1])
105
+                if dim == breaks+1:
106
+                    likelihood = line.split()[5]
107
+                    groups = line.split()[6:6+dim]
108
+                    theta_site = line.split()[6+dim:6+dim+1+dim]
109
+                elif dim < breaks+1:
110
+                    line = swp_file.readline()
111
+                    continue
112
+                elif dim > breaks+1:
113
+                    break
114
+                    #return 0,0,0
115
+            # Read the next line
116
+            line = swp_file.readline()
117
+    #### END of parsing
118
+    # quit this file if the number of dimensions is incorrect
119
+    if dim < breaks+1:
120
+        return 0,0,0
121
+    # get n, the last bin of the last group
122
+    # revert the list of groups as the most recent times correspond
123
+    # to the closest and last leafs of the coal. tree.
124
+    groups = groups[::-1]
125
+    theta_site = theta_site[::-1]
126
+
127
+    thetas = {}
79 128
 
129
+    for i in range(len(groups)):
130
+        groups[i] = groups[i].split(',')
131
+        #print(groups[i], len(groups[i]))
132
+        thetas[i] = [float(theta_site[i]), groups[i], likelihood]
133
+    return thetas
134
+
135
+def plot_k_epochs_thetafolder(folder_path, mu, tgen, breaks = 2, title = "Title", theta_scale = True):
80 136
     scenari = {}
81 137
     cpt = 0
82
-
83 138
     for file_name in os.listdir(folder_path):
84 139
         if os.path.isfile(os.path.join(folder_path, file_name)):
85 140
             # Perform actions on each file
86 141
             x,y,likelihood = return_x_y_from_stwp_theta_file(folder_path+file_name, breaks = breaks,
87 142
                                                              tgen = tgen,
88
-                                                             mu = mu)
143
+                                                             mu = mu, relative_theta_scale = theta_scale)
89 144
             if x == 0 or y == 0:
90 145
                 continue
91 146
             cpt +=1
@@ -93,6 +148,7 @@ def plot_3epochs_thetafolder(folder_path, mu, tgen, breaks = 2, title = "Title")
93 148
     print("\n*******\n"+title+"\n--------\n"+"mu="+str(mu)+"\ntgen="+str(tgen)+"\nbreaks="+str(breaks)+"\n*******\n")
94 149
     print(cpt, "theta file(s) have been scanned.")
95 150
     # sort starting by the smallest -log(Likelihood)
151
+    print(scenari)
96 152
     best10_scenari = (sorted(list(scenari.keys())))[:10]
97 153
     print("10 greatest Likelihoods", best10_scenari)
98 154
     greatest_likelihood = best10_scenari[0]
@@ -100,21 +156,181 @@ def plot_3epochs_thetafolder(folder_path, mu, tgen, breaks = 2, title = "Title")
100 156
     my_dpi = 300
101 157
     plt.figure(figsize=(5000/my_dpi, 2800/my_dpi), dpi=my_dpi)
102 158
     plt.plot(x, y, 'r-', lw=2, label = 'Lik='+greatest_likelihood)
103
-    plt.yscale('log')
104
-    #plt.xscale('log')
105
-    plt.grid(True,which="both", linestyle='--')
159
+    plt.xlim(1e-3, 1)
160
+    plt.ylim(0, 10)
161
+    #plt.yscale('log')
162
+    plt.xscale('log')
163
+    plt.grid(True,which="both", linestyle='--', alpha = 0.3)
106 164
 
107 165
     for scenario in best10_scenari[1:]:
108 166
         x,y = scenari[scenario]
109 167
         #print("\n----  Lik:",scenario,"\n\nt=", x,"\n\nN=",y, "\n\n")
110 168
         plt.plot(x, y, '--', lw=1, label = 'Lik='+scenario)
111
-    plt.ylabel("Individuals (N)")
112
-    plt.xlabel("Time (years)")
169
+    if theta_scale:
170
+        plt.xlabel("Coal. time")
171
+        plt.ylabel("Pop. size scaled by N0")
172
+        recent_scale_lower_bound = y[0] * 0.01
173
+        recent_scale_upper_bound = y[0] * 0.1
174
+        plt.axvline(x=recent_scale_lower_bound)
175
+        plt.axvline(x=recent_scale_upper_bound)
176
+    else:
177
+        # years
178
+        plt.xlabel("Time (years)")
179
+        plt.ylabel("Individuals (N)")
113 180
     plt.legend(loc='upper right')
114 181
     plt.title(title)
115
-    #plt.gcf().set_size(1000, 500)
116 182
     plt.savefig(title+'_b'+str(breaks)+'.pdf')
117 183
 
184
+def plot_all_epochs_thetafolder(folder_path, mu, tgen, title = "Title", theta_scale = True):
185
+    #scenari = {}
186
+    cpt = 0
187
+    epochs = {}
188
+    for file_name in os.listdir(folder_path):
189
+        breaks = 0
190
+        cpt +=1
191
+        if os.path.isfile(os.path.join(folder_path, file_name)):
192
+            x, y, likelihood = return_x_y_from_stwp_theta_file(folder_path+file_name, breaks = breaks,
193
+                                                             tgen = tgen,
194
+                                                             mu = mu, relative_theta_scale = theta_scale)
195
+            while not (x == 0 and y == 0):
196
+                if breaks not in epochs.keys():
197
+                    epochs[breaks] = {}
198
+                epochs[breaks][likelihood] = x,y
199
+                breaks += 1
200
+                x,y,likelihood = return_x_y_from_stwp_theta_file(folder_path+file_name, breaks = breaks,
201
+                                                                 tgen = tgen,
202
+                                                                  mu = mu, relative_theta_scale = theta_scale)
203
+    print("\n*******\n"+title+"\n--------\n"+"mu="+str(mu)+"\ntgen="+str(tgen)+"\nbreaks="+str(breaks)+"\n*******\n")
204
+    print(cpt, "theta file(s) have been scanned.")
205
+
206
+    # intialize figure
207
+    my_dpi = 300
208
+    plt.figure(figsize=(5000/my_dpi, 2800/my_dpi), dpi=my_dpi)
209
+    plt.xlim(1e-3, 1)
210
+    #plt.ylim(0, 10)
211
+    #plt.yscale('log')
212
+    plt.xscale('log')
213
+    plt.grid(True,which="both", linestyle='--', alpha = 0.3)
214
+    brkpt_lik = []
215
+    for epoch, scenari in epochs.items():
216
+        # sort starting by the smallest -log(Likelihood)
217
+        best10_scenari = (sorted(list(scenari.keys())))[:10]
218
+        greatest_likelihood = best10_scenari[0]
219
+        # store the tuple breakpoints and likelihood for later plot
220
+        brkpt_lik.append((epoch, greatest_likelihood))
221
+        x, y = scenari[greatest_likelihood]
222
+        #without breakpoint
223
+        if epoch == 0:
224
+            # do something with the theta without bp and skip the plotting
225
+            N0 = y[0]
226
+            #continue
227
+        for i in range(len(y)):
228
+            # divide by N0
229
+            y[i] = y[i]/N0
230
+            x[i] = x[i]/N0
231
+        plt.plot(x, y, '-', alpha=0.75, lw=2, label = str(epoch)+' BrkPt | Lik='+greatest_likelihood)
232
+        if theta_scale:
233
+            plt.xlabel("Coal. time")
234
+            plt.ylabel("Pop. size scaled by N0")
235
+            recent_scale_lower_bound = 0.01
236
+            recent_scale_upper_bound = 0.1
237
+            #print(recent_scale_lower_bound, recent_scale_upper_bound)
238
+            plt.axvline(x=recent_scale_lower_bound)
239
+            plt.axvline(x=recent_scale_upper_bound)
240
+        else:
241
+            # years
242
+            plt.xlabel("Time (years)")
243
+            plt.ylabel("Individuals (N)")
244
+        plt.xlim(1e-5, 1)
245
+        plt.legend(loc='upper right')
246
+        plt.title(title)
247
+        plt.savefig(title+'_b'+str(breaks)+'.pdf')
248
+    # plot likelihood against nb of breakpoints
249
+    plt.figure(figsize=(5000/my_dpi, 2800/my_dpi), dpi=my_dpi)
250
+    plt.rcParams['font.size'] = '18'
251
+    AIC = 2*(len(brkpt_lik)+1)+2*np.array(brkpt_lik)[:, 1].astype(float)
252
+    plt.plot(np.array(brkpt_lik)[:, 0], AIC, 'o', linestyle = "dotted", lw=2)
253
+    plt.axhline(y=106)
254
+    plt.yscale('log')
255
+    plt.xlabel("# breakpoints", fontsize=20)
256
+    plt.ylabel("$-\log\mathcal{L}$")
257
+    #plt.legend(loc='upper right')
258
+    plt.title(title)
259
+    plt.savefig(title+'_Breakpts_Likelihood.pdf')
260
+
261
+def plot_test_theta(folder_path, mu, tgen, title = "Title", theta_scale = True, breaks_max = 6):
262
+    cpt = 0
263
+    epochs = {}
264
+    for file_name in os.listdir(folder_path):
265
+        cpt +=1
266
+        if os.path.isfile(os.path.join(folder_path, file_name)):
267
+            for k in range(breaks_max):
268
+                thetas = return_x_y_from_stwp_theta_file_as_is(folder_path+file_name, breaks = k,
269
+                                                                 tgen = tgen,
270
+                                                                 mu = mu, relative_theta_scale = theta_scale)
271
+                if thetas[0] == 0:
272
+                    continue
273
+                epochs[k] = thetas
274
+    print("\n*******\n"+title+"\n--------\n"+"mu="+str(mu)+"\ntgen="+str(tgen)+"\nbreaks="+str(k)+"\n*******\n")
275
+    print(cpt, "theta file(s) have been scanned.")
276
+
277
+    # intialize figure
278
+    my_dpi = 300
279
+    plt.figure(figsize=(5000/my_dpi, 2800/my_dpi), dpi=my_dpi)
280
+    for epoch, theta in epochs.items():
281
+        groups = np.array(list(theta.values()), dtype=object)[:, 1].tolist()
282
+        x = []
283
+        y = []
284
+        thetas = np.array(list(theta.values()), dtype=object)[:, 0]
285
+        for i,group in enumerate(groups):
286
+            x += group[::-1]
287
+            y += list(np.repeat(thetas[i], len(group)))
288
+            if epoch == 0:
289
+                N0 = y[0]
290
+        for i in range(len(y)):
291
+            y[i] = y[i]/N0
292
+        plt.plot(x, y, 'o', linestyle="dotted", alpha=0.75, lw=2, label = str(epoch)+' brks')
293
+        plt.xlabel("# breaks")
294
+        plt.ylabel("theta")
295
+        plt.legend(loc='upper right')
296
+        plt.savefig(title+'_test'+str(k)+'.pdf')
297
+    # fig 2
298
+    plt.figure(figsize=(5000/my_dpi, 2800/my_dpi), dpi=my_dpi)
299
+    for epoch, theta in epochs.items():
300
+        groups = np.array(list(theta.values()), dtype=object)[:, 1].tolist()
301
+        x = []
302
+        y = []
303
+        thetas = np.array(list(theta.values()), dtype=object)[:, 0]
304
+        for i,group in enumerate(groups):
305
+            x += group[::-1]
306
+            y += list(np.repeat(thetas[i], len(group)))
307
+            if epoch == 0:
308
+                N0 = y[0]
309
+        for i in range(len(y)):
310
+            y[i] = y[i]/N0
311
+        #
312
+        x_2 = []
313
+        T = 0
314
+        # k allant de de 14 à 2
315
+        for i in range(len(x)):
316
+            x[i] = int(x[i])
317
+        #print(x[2])
318
+        for i in range(0, len(x)):
319
+            k = x[i]
320
+            #print(k, y[k-2])
321
+
322
+            #theta_k = y[k] / (k*(k-1))
323
+            T += y[i] / (x[i]*(x[i]-1))
324
+            x_2.append(T)
325
+
326
+        plt.plot(x_2, y, 'o', linestyle="dotted", alpha=0.75, lw=2, label = str(epoch)+' brks')
327
+        plt.xscale('log')
328
+        plt.xlabel("# breaks")
329
+        plt.ylabel("theta")
330
+        plt.legend(loc='upper right')
331
+        plt.savefig(title+'_test'+str(k)+'.pdf')
332
+        #
333
+
118 334
 if __name__ == "__main__":
119 335
 
120 336
     if len(sys.argv) != 4:
@@ -125,4 +341,4 @@ if __name__ == "__main__":
125 341
     mu = sys.argv[2]
126 342
     tgen = sys.argv[3]
127 343
 
128
-    plot_3epochs_thetafolder(folder_path, mu, tgen)
344
+    plot_all_epochs_thetafolder(folder_path, mu, tgen)