tforest 2 jaren geleden
bovenliggende
commit
6799dd9a53
1 gewijzigde bestanden met toevoegingen van 0 en 17 verwijderingen
  1. 0 17
      vcf_to_sfs.py

+ 0 - 17
vcf_to_sfs.py Bestand weergeven

28
 # initiate SFS_values with a zeros dict
28
 # initiate SFS_values with a zeros dict
29
 SFS_values = dict.fromkeys(range(n),0)
29
 SFS_values = dict.fromkeys(range(n),0)
30
 
30
 
31
-polycount = 0
32
-
33
 with gzip.open(sys.argv[1], "rb") as inputgz:
31
 with gzip.open(sys.argv[1], "rb") as inputgz:
34
     line = inputgz.readline()
32
     line = inputgz.readline()
35
     genotypes = []
33
     genotypes = []
66
                     smpl_genotype = [int(a) for a in sample.split(':')[0].split('|') if a != '.']
64
                     smpl_genotype = [int(a) for a in sample.split(':')[0].split('|') if a != '.']
67
                 nb_alleles = set(smpl_genotype)
65
                 nb_alleles = set(smpl_genotype)
68
                 snp_genotypes += smpl_genotype
66
                 snp_genotypes += smpl_genotype
69
-            # if set(snp_genotypes) > 2:
70
-            #     polyallelic = set(snp_genotypes)
71
-            # else:
72
-            #     polyallelic = False
73
-            polyallelic = len(ALT)
74
-            ##print(REF, ALT, snp_genotypes)
75
             # skip if all individuals have the same genotype
67
             # skip if all individuals have the same genotype
76
             if len(set(snp_genotypes)) == 1:
68
             if len(set(snp_genotypes)) == 1:
77
                 line = inputgz.readline()
69
                 line = inputgz.readline()
80
                 allele_counts[snp_genotypes.count(k)] = k
72
                 allele_counts[snp_genotypes.count(k)] = k
81
                 allele_counts_list.append(snp_genotypes.count(k))
73
                 allele_counts_list.append(snp_genotypes.count(k))
82
             if folded :
74
             if folded :
83
-                #allele_counts_list = list(allele_counts.keys())
84
-                ##print("ALC", allele_counts_list, "POLY", polyallelic, ALT)
85
                 for al in range(polyallelic-1):
75
                 for al in range(polyallelic-1):
86
                     SFS_values[min(allele_counts_list)-1] += 1/len(ALT)
76
                     SFS_values[min(allele_counts_list)-1] += 1/len(ALT)
87
                     allele_counts_list.remove(min(allele_counts_list))
77
                     allele_counts_list.remove(min(allele_counts_list))
88
-                # if len(ALT) == 1:
89
-                #     SFS_values[min(allele_counts_list)-1] += 1
90
-                # else:
91
-                #     for al in range(polyallelic-1):
92
-                #         SFS_values[min(allele_counts_list)-1] += 1/len(ALT)
93
-                #         allele_counts_list.remove(min(allele_counts_list))
94
-                #         polycount += 1
95
         line = inputgz.readline()
78
         line = inputgz.readline()
96
         print(SFS_values)
79
         print(SFS_values)
97
 print(polycount)
80
 print(polycount)