tforest преди 2 години
родител
ревизия
91915e5fa8
променени са 1 файла, в които са добавени 63 реда и са изтрити 2 реда
  1. 63 2
      vcf_to_sfs.c

+ 63 - 2
vcf_to_sfs.c Целия файл

19
     buffer[j] = 0;
19
     buffer[j] = 0;
20
 }
20
 }
21
 
21
 
22
+int min(int * array, int size){
23
+    //Consider first element as smallest
24
+   int smallest = array[0];
25
+   int i;
26
+   for (i = 0; i < num; i++) {
27
+      if (a[i] < smallest) {
28
+         smallest = a[i];
29
+      }
30
+   }
31
+}
32
+
33
+int countDistinct(int a[], int n)      //Function Definition
34
+{
35
+   int i, j, count = 0;
36
+   //Traverse the array
37
+   for (i = 1; i < n; i++)      //hold an array element
38
+   {
39
+      for (j = 0; j < i; j++)   
40
+      {
41
+         if (a[i] == a[j])    //Check for duplicate elements
42
+         {
43
+            break;             //If duplicate elements found then break
44
+         }
45
+      }
46
+      if (i == j)
47
+      {
48
+         count++;     //increment the number of distinct elements
49
+      }
50
+   }
51
+   return count;      //Return the number of distinct elements
52
+}
22
 
53
 
23
 # define LL 8192   /* line length maximum */
54
 # define LL 8192   /* line length maximum */
55
+# define DIPLOID true
56
+# define FOLDED true
57
+# define IGNORED_FIELDS 9
24
 
58
 
25
 int main ( int argc, char *argv[] ){
59
 int main ( int argc, char *argv[] ){
26
     if ( argc < 3) {
60
     if ( argc < 3) {
29
     }
63
     }
30
     gzFile fp;
64
     gzFile fp;
31
     char line[LL];
65
     char line[LL];
66
+    int N;
32
     char delim[] = "\t";
67
     char delim[] = "\t";
33
     fp = gzopen( argv[1], "r" );
68
     fp = gzopen( argv[1], "r" );
34
 
69
 
70
+    // pop of size 2N when diploid
71
+    if (DIPLOID == true && FOLDED == false) {
72
+	N = 2 * atoi(argv[2]);
73
+	    } else {
74
+	N = atoi(argv[2]);
75
+    }
76
+
77
+    int snp_genotypes[N];
78
+    int SFS_values[N];
79
+
35
     gzgets( fp, line, LL );
80
     gzgets( fp, line, LL );
36
     while ( ! gzeof( fp ) ){
81
     while ( ! gzeof( fp ) ){
37
 	int k = 0;
82
 	int k = 0;
43
 	char *vcf_field = strtok(line, delim);
88
 	char *vcf_field = strtok(line, delim);
44
 	while(vcf_field != NULL){
89
 	while(vcf_field != NULL){
45
 	    k++;
90
 	    k++;
46
-	    if (k > 9) {
91
+	    if (k > IGNORED_FIELDS) {
47
 		const size_t len = strlen(vcf_field);
92
 		const size_t len = strlen(vcf_field);
48
 		char buffer[len + 1];
93
 		char buffer[len + 1];
49
 		//printf("'%s'\n", ptr);
94
 		//printf("'%s'\n", ptr);
50
 		slice_str(vcf_field, buffer, 0, 0);
95
 		slice_str(vcf_field, buffer, 0, 0);
51
-		printf("%s      ", buffer);
96
+		//printf("%d %s      ", N, buffer);
97
+		snp_genotypes[k-IGNORED_FIELDS] = atoi(buffer);
98
+		//printf("%d ", smpl_genotype[k-9]);
52
 	    }
99
 	    }
53
 	    vcf_field = strtok(NULL, delim);
100
 	    vcf_field = strtok(NULL, delim);
101
+	    int c= countDistinct(snp_genotypes, N);
102
+            // skip if all individuals have the same genotype
103
+	    if (c == 1) {
104
+		continue;
105
+		gzgets( fp, line, LL );
106
+	    }
107
+	    /* int i; */
108
+	    /* for (i = 1; i < N; ++i) */
109
+	    /* 	{ */
110
+	    /* 	    printf("%d ", snp_genotypes[i]); */
111
+	    /* 	} */
112
+	    int allele_counts[c];
113
+	    
114
+	    min(allele_counts, N);
54
 	}
115
 	}
55
 	// printf("%s", line );
116
 	// printf("%s", line );
56
 	// loads the next line
117
 	// loads the next line