Browse Source

swp2 plot add

tforest 9 months ago
parent
commit
2b5dc75f30
1 changed files with 122 additions and 0 deletions
  1. 122 0
      swp2.py

+ 122 - 0
swp2.py View File

@@ -0,0 +1,122 @@
1
+
2
+import matplotlib.pyplot as plt
3
+import os
4
+
5
+def return_x_y_from_stwp_theta_file(stwp_theta_file, breaks, mu, tgen):
6
+    with open(stwp_theta_file, "r") as swp_file:
7
+        # Read the first line
8
+        line = swp_file.readline()
9
+        L = float(line.split()[2])
10
+        # Process lines until the end of the file
11
+        while line:
12
+            # check at each line
13
+            if line.startswith("dim") :
14
+                dim = int(line.split()[1])
15
+                if dim == breaks+1:
16
+                    likelihood = line.split()[5]
17
+                    groups = line.split()[6:6+dim]
18
+                    theta_site = line.split()[6+dim:6+dim+1+dim]
19
+                elif dim < breaks+1:
20
+                    line = swp_file.readline()
21
+                    continue
22
+                elif dim > breaks+1:
23
+                    break
24
+                    #return 0,0,0
25
+            # Read the next line
26
+            line = swp_file.readline()
27
+    #### END of parsing
28
+    # quit this file if the number of dimensions is incorrect
29
+    if dim < breaks+1:
30
+        return 0,0,0 
31
+    # get n, the last bin of the last group
32
+    n = int(groups[-1].split(",")[-1])
33
+    # initiate the dict of times
34
+    t = {}
35
+    # list of thetas 
36
+    theta_L = []
37
+    sum_t = 0
38
+    for group_nb, group in enumerate(groups):
39
+        # store all the thetas one by one, with one theta per group
40
+        theta_L.append(float(theta_site[group_nb]))
41
+        # if the group is of size 1
42
+        if len(group.split(',')) == 1:
43
+            i = int(group)
44
+        # if the group size is >1, take the first elem of the group
45
+        # i is the first bin of each group, straight after a breakpoint
46
+        else:
47
+            i = int(group.split(",")[0])
48
+            j = int(group.split(",")[-1])
49
+        t[i] = 0
50
+        if len(group.split(',')) == 1:
51
+            k = i
52
+            t[i] += ((theta_L[group_nb] ) / (k*(k-1)) * tgen) / mu
53
+        else:
54
+            for k in range(i, j+1):
55
+                t[i] += ((theta_L[group_nb] ) / (k*(k-1)) * tgen) / mu
56
+        # we add the cumulative times at the end
57
+        t[i] += sum_t
58
+        sum_t = t[i]
59
+    # build the y axis (sizes)
60
+    y = []
61
+    for theta in theta_L:
62
+        # with size N = theta/4mu
63
+        size = theta / (4*mu)
64
+        y.append(size)
65
+        y.append(size)
66
+    # build the time x axis
67
+    x = [0]
68
+    for time in range(0, len(t.values())-1):
69
+        x.append(list(t.values())[time])
70
+        x.append(list(t.values())[time])
71
+    x.append(list(t.values())[len(t.values())-1])
72
+
73
+    return x,y,likelihood
74
+
75
+def plot_3epochs_thetafolder(folder_path, mu, tgen, breaks = 2, title = "Title"):
76
+    
77
+    scenari = {}
78
+    cpt = 0
79
+    
80
+    for file_name in os.listdir(folder_path):
81
+        if os.path.isfile(os.path.join(folder_path, file_name)):
82
+            # Perform actions on each file
83
+            x,y,likelihood = return_x_y_from_stwp_theta_file(folder_path+file_name, breaks = breaks, 
84
+                                                             tgen = tgen,
85
+                                                             mu = mu)
86
+            if x == 0 or y == 0:
87
+                continue
88
+            cpt +=1
89
+            scenari[likelihood] = x,y
90
+    print("\n*******\n"+title+"\n--------\n"+"mu="+str(mu)+"\ntgen="+str(tgen)+"\nbreaks="+str(breaks)+"\n*******\n")
91
+    print(cpt, "theta files have been scanned.")
92
+    # sort starting by the smallest -log(Likelihood)
93
+    best10_scenari = (sorted(list(scenari.keys())))[:10]
94
+    print("10 greatest Likelihoods", best10_scenari)
95
+    greatest_likelihood = best10_scenari[0]
96
+    x, y = scenari[greatest_likelihood]
97
+    my_dpi = 300
98
+    plt.figure(figsize=(5000/my_dpi, 2800/my_dpi), dpi=my_dpi)
99
+    plt.plot(x, y, 'r-', lw=2, label = 'Lik='+greatest_likelihood)
100
+    for scenario in best10_scenari[1:]:
101
+        x,y = scenari[scenario]
102
+        print("\n----  Lik:",scenario,"\n\nt=", x,"\n\nN=",y, "\n\n")
103
+        plt.plot(x, y, '--', lw=1, label = 'Lik='+scenario)
104
+    plt.ylabel("Individuals (N)")
105
+    plt.xlabel("Time (years)")
106
+    plt.legend(loc='upper right')
107
+    plt.title(title)
108
+    #plt.gcf().set_size(1000, 500)
109
+    plt.savefig(title+'_b'+str(breaks)+'.pdf')
110
+    #plt.show()
111
+
112
+if __name__ == "__main__":
113
+            
114
+    if len(sys.argv) != 4:
115
+        print("Need 3 args: ThetaFolder MutationRate GenerationTime")
116
+        exit(0)
117
+        
118
+    folder_path = sys.argv[1]
119
+    mu = sys.argv[2]
120
+    tgen = sys.argv[3]
121
+
122
+    plot_3epochs_thetafolder(folder_path, mu, tgen)