|
@@ -19,8 +19,42 @@ void slice_str(const char * str, char * buffer, size_t start, size_t end)
|
19
|
19
|
buffer[j] = 0;
|
20
|
20
|
}
|
21
|
21
|
|
|
22
|
+int min(int * array, int size){
|
|
23
|
+ //Consider first element as smallest
|
|
24
|
+ int smallest = array[0];
|
|
25
|
+ int i;
|
|
26
|
+ for (i = 0; i < num; i++) {
|
|
27
|
+ if (a[i] < smallest) {
|
|
28
|
+ smallest = a[i];
|
|
29
|
+ }
|
|
30
|
+ }
|
|
31
|
+}
|
|
32
|
+
|
|
33
|
+int countDistinct(int a[], int n) //Function Definition
|
|
34
|
+{
|
|
35
|
+ int i, j, count = 0;
|
|
36
|
+ //Traverse the array
|
|
37
|
+ for (i = 1; i < n; i++) //hold an array element
|
|
38
|
+ {
|
|
39
|
+ for (j = 0; j < i; j++)
|
|
40
|
+ {
|
|
41
|
+ if (a[i] == a[j]) //Check for duplicate elements
|
|
42
|
+ {
|
|
43
|
+ break; //If duplicate elements found then break
|
|
44
|
+ }
|
|
45
|
+ }
|
|
46
|
+ if (i == j)
|
|
47
|
+ {
|
|
48
|
+ count++; //increment the number of distinct elements
|
|
49
|
+ }
|
|
50
|
+ }
|
|
51
|
+ return count; //Return the number of distinct elements
|
|
52
|
+}
|
22
|
53
|
|
23
|
54
|
# define LL 8192 /* line length maximum */
|
|
55
|
+# define DIPLOID true
|
|
56
|
+# define FOLDED true
|
|
57
|
+# define IGNORED_FIELDS 9
|
24
|
58
|
|
25
|
59
|
int main ( int argc, char *argv[] ){
|
26
|
60
|
if ( argc < 3) {
|
|
@@ -29,9 +63,20 @@ int main ( int argc, char *argv[] ){
|
29
|
63
|
}
|
30
|
64
|
gzFile fp;
|
31
|
65
|
char line[LL];
|
|
66
|
+ int N;
|
32
|
67
|
char delim[] = "\t";
|
33
|
68
|
fp = gzopen( argv[1], "r" );
|
34
|
69
|
|
|
70
|
+ // pop of size 2N when diploid
|
|
71
|
+ if (DIPLOID == true && FOLDED == false) {
|
|
72
|
+ N = 2 * atoi(argv[2]);
|
|
73
|
+ } else {
|
|
74
|
+ N = atoi(argv[2]);
|
|
75
|
+ }
|
|
76
|
+
|
|
77
|
+ int snp_genotypes[N];
|
|
78
|
+ int SFS_values[N];
|
|
79
|
+
|
35
|
80
|
gzgets( fp, line, LL );
|
36
|
81
|
while ( ! gzeof( fp ) ){
|
37
|
82
|
int k = 0;
|
|
@@ -43,14 +88,30 @@ int main ( int argc, char *argv[] ){
|
43
|
88
|
char *vcf_field = strtok(line, delim);
|
44
|
89
|
while(vcf_field != NULL){
|
45
|
90
|
k++;
|
46
|
|
- if (k > 9) {
|
|
91
|
+ if (k > IGNORED_FIELDS) {
|
47
|
92
|
const size_t len = strlen(vcf_field);
|
48
|
93
|
char buffer[len + 1];
|
49
|
94
|
//printf("'%s'\n", ptr);
|
50
|
95
|
slice_str(vcf_field, buffer, 0, 0);
|
51
|
|
- printf("%s ", buffer);
|
|
96
|
+ //printf("%d %s ", N, buffer);
|
|
97
|
+ snp_genotypes[k-IGNORED_FIELDS] = atoi(buffer);
|
|
98
|
+ //printf("%d ", smpl_genotype[k-9]);
|
52
|
99
|
}
|
53
|
100
|
vcf_field = strtok(NULL, delim);
|
|
101
|
+ int c= countDistinct(snp_genotypes, N);
|
|
102
|
+ // skip if all individuals have the same genotype
|
|
103
|
+ if (c == 1) {
|
|
104
|
+ continue;
|
|
105
|
+ gzgets( fp, line, LL );
|
|
106
|
+ }
|
|
107
|
+ /* int i; */
|
|
108
|
+ /* for (i = 1; i < N; ++i) */
|
|
109
|
+ /* { */
|
|
110
|
+ /* printf("%d ", snp_genotypes[i]); */
|
|
111
|
+ /* } */
|
|
112
|
+ int allele_counts[c];
|
|
113
|
+
|
|
114
|
+ min(allele_counts, N);
|
54
|
115
|
}
|
55
|
116
|
// printf("%s", line );
|
56
|
117
|
// loads the next line
|