|
|
|
|
28
|
# initiate SFS_values with a zeros dict
|
28
|
# initiate SFS_values with a zeros dict
|
29
|
SFS_values = dict.fromkeys(range(n),0)
|
29
|
SFS_values = dict.fromkeys(range(n),0)
|
30
|
|
30
|
|
31
|
-polycount = 0
|
|
|
32
|
-
|
|
|
33
|
with gzip.open(sys.argv[1], "rb") as inputgz:
|
31
|
with gzip.open(sys.argv[1], "rb") as inputgz:
|
34
|
line = inputgz.readline()
|
32
|
line = inputgz.readline()
|
35
|
genotypes = []
|
33
|
genotypes = []
|
|
|
|
|
66
|
smpl_genotype = [int(a) for a in sample.split(':')[0].split('|') if a != '.']
|
64
|
smpl_genotype = [int(a) for a in sample.split(':')[0].split('|') if a != '.']
|
67
|
nb_alleles = set(smpl_genotype)
|
65
|
nb_alleles = set(smpl_genotype)
|
68
|
snp_genotypes += smpl_genotype
|
66
|
snp_genotypes += smpl_genotype
|
69
|
- # if set(snp_genotypes) > 2:
|
|
|
70
|
- # polyallelic = set(snp_genotypes)
|
|
|
71
|
- # else:
|
|
|
72
|
- # polyallelic = False
|
|
|
73
|
- polyallelic = len(ALT)
|
|
|
74
|
- ##print(REF, ALT, snp_genotypes)
|
|
|
75
|
# skip if all individuals have the same genotype
|
67
|
# skip if all individuals have the same genotype
|
76
|
if len(set(snp_genotypes)) == 1:
|
68
|
if len(set(snp_genotypes)) == 1:
|
77
|
line = inputgz.readline()
|
69
|
line = inputgz.readline()
|
|
|
|
|
80
|
allele_counts[snp_genotypes.count(k)] = k
|
72
|
allele_counts[snp_genotypes.count(k)] = k
|
81
|
allele_counts_list.append(snp_genotypes.count(k))
|
73
|
allele_counts_list.append(snp_genotypes.count(k))
|
82
|
if folded :
|
74
|
if folded :
|
83
|
- #allele_counts_list = list(allele_counts.keys())
|
|
|
84
|
- ##print("ALC", allele_counts_list, "POLY", polyallelic, ALT)
|
|
|
85
|
for al in range(polyallelic-1):
|
75
|
for al in range(polyallelic-1):
|
86
|
SFS_values[min(allele_counts_list)-1] += 1/len(ALT)
|
76
|
SFS_values[min(allele_counts_list)-1] += 1/len(ALT)
|
87
|
allele_counts_list.remove(min(allele_counts_list))
|
77
|
allele_counts_list.remove(min(allele_counts_list))
|
88
|
- # if len(ALT) == 1:
|
|
|
89
|
- # SFS_values[min(allele_counts_list)-1] += 1
|
|
|
90
|
- # else:
|
|
|
91
|
- # for al in range(polyallelic-1):
|
|
|
92
|
- # SFS_values[min(allele_counts_list)-1] += 1/len(ALT)
|
|
|
93
|
- # allele_counts_list.remove(min(allele_counts_list))
|
|
|
94
|
- # polycount += 1
|
|
|
95
|
line = inputgz.readline()
|
78
|
line = inputgz.readline()
|
96
|
print(SFS_values)
|
79
|
print(SFS_values)
|
97
|
print(polycount)
|
80
|
print(polycount)
|