tforest před 2 roky
rodič
revize
a7490aed15
1 změnil soubory, kde provedl 18 přidání a 6 odebrání
  1. 18 6
      vcf_to_sfs.py

+ 18 - 6
vcf_to_sfs.py Zobrazit soubor

@@ -38,13 +38,25 @@ with gzip.open(sys.argv[1], "rb") as inputgz:
38 38
                 smpl_genotype = [int(a) for a in sample.split(':')[0].split('/') if a != '.']                    
39 39
                 nb_alleles = set(smpl_genotype)
40 40
                 snp_genotypes += smpl_genotype
41
+            if len(set(snp_genotypes)) == 1:
42
+                line = inputgz.readline()
43
+                continue
44
+            #print(snp_genotypes)
41 45
             for k in set(snp_genotypes):
42 46
                 allele_counts[snp_genotypes.count(k)] = k
47
+            if 7 in allele_counts.keys():
48
+                print(allele_counts)
49
+                #print(allele_counts)
43 50
             if folded :
44
-                for count in allele_counts.keys():
45
-                    if count <= len(snp_genotypes)/2 :
46
-                        SFS_values[count-1] += 1                        
47
-                    else:
48
-                        SFS_values[len(snp_genotypes)-count] += 1
51
+                #for count in allele_counts.keys():
52
+                # for count in allele_counts.keys():
53
+                #     if count <= len(snp_genotypes)/2 :
54
+                #         SFS_values[count-1] += 1
55
+                #     else:
56
+                #         SFS_values[len(snp_genotypes)-count-1] += 1
57
+                SFS_values[min(allele_counts.keys())-1] += 1
49 58
         line = inputgz.readline()
50
-        print(SFS_values)
59
+        #print(SFS_values)
60
+
61
+
62
+# Note : tout est doublé là