123456789101112131415161718192021222324252627282930313233343536373839404142 |
- #!/usr/bin/env python3
-
- """
- Caution : At the moment for gzipped files only.
-
-
- """
-
- import gzip
- import sys
-
- # default folded SFS
- folded = True
-
- with gzip.open(sys.argv[1], "rb") as inputgz:
- line = inputgz.readline()
- genotypes = []
- SFS_values = {}
- while line:
- line = line.decode('utf-8').strip()
- if not line.startswith("##") and not line.startswith("#"):
- FORMAT = line.split("\t")[8:9]
- SAMPLES = line.split("\t")[9:]
- snp_genotypes = []
- for sample in SAMPLES:
- # for UNPHASED data
- smpl_genotype = [int(a) for a in sample.split(':')[0].split('/') if a != '.']
- #if not folded:
-
- print(smpl_genotype)
- nb_alleles = len(set(smpl_genotype))
- snp_genotypes.append(nb_alleles)
- print(snp_genotypes)
- nb_derived_allele = len([val for val in snp_genotypes if val != 0])
- print("nb derived allele", nb_derived_allele)
- if nb_derived_allele not in SFS_values.keys():
- SFS_values[nb_derived_allele] = 1
- else:
- SFS_values[nb_derived_allele] += 1
- line = inputgz.readline()
- print(SFS_values)
|