|
@@ -28,8 +28,6 @@ if diploid and not folded:
|
28
|
28
|
# initiate SFS_values with a zeros dict
|
29
|
29
|
SFS_values = dict.fromkeys(range(n),0)
|
30
|
30
|
|
31
|
|
-polycount = 0
|
32
|
|
-
|
33
|
31
|
with gzip.open(sys.argv[1], "rb") as inputgz:
|
34
|
32
|
line = inputgz.readline()
|
35
|
33
|
genotypes = []
|
|
@@ -66,12 +64,6 @@ with gzip.open(sys.argv[1], "rb") as inputgz:
|
66
|
64
|
smpl_genotype = [int(a) for a in sample.split(':')[0].split('|') if a != '.']
|
67
|
65
|
nb_alleles = set(smpl_genotype)
|
68
|
66
|
snp_genotypes += smpl_genotype
|
69
|
|
- # if set(snp_genotypes) > 2:
|
70
|
|
- # polyallelic = set(snp_genotypes)
|
71
|
|
- # else:
|
72
|
|
- # polyallelic = False
|
73
|
|
- polyallelic = len(ALT)
|
74
|
|
- ##print(REF, ALT, snp_genotypes)
|
75
|
67
|
# skip if all individuals have the same genotype
|
76
|
68
|
if len(set(snp_genotypes)) == 1:
|
77
|
69
|
line = inputgz.readline()
|
|
@@ -80,18 +72,9 @@ with gzip.open(sys.argv[1], "rb") as inputgz:
|
80
|
72
|
allele_counts[snp_genotypes.count(k)] = k
|
81
|
73
|
allele_counts_list.append(snp_genotypes.count(k))
|
82
|
74
|
if folded :
|
83
|
|
- #allele_counts_list = list(allele_counts.keys())
|
84
|
|
- ##print("ALC", allele_counts_list, "POLY", polyallelic, ALT)
|
85
|
75
|
for al in range(polyallelic-1):
|
86
|
76
|
SFS_values[min(allele_counts_list)-1] += 1/len(ALT)
|
87
|
77
|
allele_counts_list.remove(min(allele_counts_list))
|
88
|
|
- # if len(ALT) == 1:
|
89
|
|
- # SFS_values[min(allele_counts_list)-1] += 1
|
90
|
|
- # else:
|
91
|
|
- # for al in range(polyallelic-1):
|
92
|
|
- # SFS_values[min(allele_counts_list)-1] += 1/len(ALT)
|
93
|
|
- # allele_counts_list.remove(min(allele_counts_list))
|
94
|
|
- # polycount += 1
|
95
|
78
|
line = inputgz.readline()
|
96
|
79
|
print(SFS_values)
|
97
|
80
|
print(polycount)
|