tforest 2 years ago
parent
commit
6799dd9a53
1 changed files with 0 additions and 17 deletions
  1. 0 17
      vcf_to_sfs.py

+ 0 - 17
vcf_to_sfs.py View File

@@ -28,8 +28,6 @@ if diploid and not folded:
28 28
 # initiate SFS_values with a zeros dict
29 29
 SFS_values = dict.fromkeys(range(n),0)
30 30
 
31
-polycount = 0
32
-
33 31
 with gzip.open(sys.argv[1], "rb") as inputgz:
34 32
     line = inputgz.readline()
35 33
     genotypes = []
@@ -66,12 +64,6 @@ with gzip.open(sys.argv[1], "rb") as inputgz:
66 64
                     smpl_genotype = [int(a) for a in sample.split(':')[0].split('|') if a != '.']
67 65
                 nb_alleles = set(smpl_genotype)
68 66
                 snp_genotypes += smpl_genotype
69
-            # if set(snp_genotypes) > 2:
70
-            #     polyallelic = set(snp_genotypes)
71
-            # else:
72
-            #     polyallelic = False
73
-            polyallelic = len(ALT)
74
-            ##print(REF, ALT, snp_genotypes)
75 67
             # skip if all individuals have the same genotype
76 68
             if len(set(snp_genotypes)) == 1:
77 69
                 line = inputgz.readline()
@@ -80,18 +72,9 @@ with gzip.open(sys.argv[1], "rb") as inputgz:
80 72
                 allele_counts[snp_genotypes.count(k)] = k
81 73
                 allele_counts_list.append(snp_genotypes.count(k))
82 74
             if folded :
83
-                #allele_counts_list = list(allele_counts.keys())
84
-                ##print("ALC", allele_counts_list, "POLY", polyallelic, ALT)
85 75
                 for al in range(polyallelic-1):
86 76
                     SFS_values[min(allele_counts_list)-1] += 1/len(ALT)
87 77
                     allele_counts_list.remove(min(allele_counts_list))
88
-                # if len(ALT) == 1:
89
-                #     SFS_values[min(allele_counts_list)-1] += 1
90
-                # else:
91
-                #     for al in range(polyallelic-1):
92
-                #         SFS_values[min(allele_counts_list)-1] += 1/len(ALT)
93
-                #         allele_counts_list.remove(min(allele_counts_list))
94
-                #         polycount += 1
95 78
         line = inputgz.readline()
96 79
         print(SFS_values)
97 80
 print(polycount)