|
@@ -17,9 +17,7 @@ import sys
|
17
|
17
|
|
18
|
18
|
def sfs_from_vcf(n, vcf_file, folded = True, diploid = True, phased = False, verbose = False):
|
19
|
19
|
|
20
|
|
- """
|
21
|
|
- Multiplication de deux nombres entiers.
|
22
|
|
- Cette fonction ne sert pas à grand chose.
|
|
20
|
+ """ Returns an SFS from a VCF file.
|
23
|
21
|
|
24
|
22
|
Parameters
|
25
|
23
|
----------
|
|
@@ -42,7 +40,8 @@ def sfs_from_vcf(n, vcf_file, folded = True, diploid = True, phased = False, ver
|
42
|
40
|
n *= 2
|
43
|
41
|
# initiate SFS_values with a zeros dict
|
44
|
42
|
SFS_values = dict.fromkeys(range(n),0)
|
45
|
|
-
|
|
43
|
+ # store nb polyallellic sites
|
|
44
|
+ polyall = 0
|
46
|
45
|
with gzip.open(vcf_file, "rb") as inputgz:
|
47
|
46
|
line = inputgz.readline()
|
48
|
47
|
genotypes = []
|
|
@@ -88,17 +87,13 @@ def sfs_from_vcf(n, vcf_file, folded = True, diploid = True, phased = False, ver
|
88
|
87
|
allele_counts[snp_genotypes.count(k)] = k
|
89
|
88
|
allele_counts_list.append(snp_genotypes.count(k))
|
90
|
89
|
if folded and len(ALT) >= 2:
|
91
|
|
- pass
|
92
|
|
- # TODO - work in progress
|
93
|
|
- # for al in range(len(ALT)-1):
|
94
|
|
- # SFS_values[min(allele_counts_list)-1] += 1/len(ALT)
|
95
|
|
- # allele_counts_list.remove(min(allele_counts_list))
|
|
90
|
+ polyall += 1
|
96
|
91
|
else:
|
97
|
92
|
SFS_values[min(allele_counts_list)-1] += 1
|
98
|
93
|
line = inputgz.readline()
|
99
|
94
|
if verbose:
|
100
|
95
|
print(SFS_values)
|
101
|
|
- return SFS_values
|
|
96
|
+ return SFS_values, polyall
|
102
|
97
|
|
103
|
98
|
if __name__ == "__main__":
|
104
|
99
|
|
|
@@ -106,8 +101,10 @@ if __name__ == "__main__":
|
106
|
101
|
print("Need 2 args")
|
107
|
102
|
exit(0)
|
108
|
103
|
|
|
104
|
+ # PARAM : vcf_file
|
|
105
|
+ vcf_file = sys.argv[1]
|
109
|
106
|
# PARAM : Nb of indiv
|
110
|
107
|
n = int(sys.argv[2])
|
111
|
108
|
|
112
|
|
- sfs = sfs_from_vcf(n, sys.argv[1], folded = True, diploid = True, phased = False)
|
|
109
|
+ sfs, nb_polyall = sfs_from_vcf(n, vcf_file, folded = True, diploid = True, phased = False)
|
113
|
110
|
print(sfs)
|