123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124 |
- # include <stdio.h>
- # include <stdlib.h>
- # include <zlib.h>
- #include <string.h>
- #include <stdbool.h>
-
- bool StartsWith(const char *a, const char *b)
- {
- if(strncmp(a, b, strlen(b)) == 0) return 1;
- return 0;
- }
-
- void slice_str(const char * str, char * buffer, size_t start, size_t end)
- {
- size_t j = 0;
- for ( size_t i = start; i <= end; ++i ) {
- buffer[j++] = str[i];
- }
- buffer[j] = 0;
- }
-
- int min(int * array, int size){
- //Consider first element as smallest
- int smallest = array[0];
- int i;
- for (i = 0; i < num; i++) {
- if (a[i] < smallest) {
- smallest = a[i];
- }
- }
- }
-
- int countDistinct(int a[], int n) //Function Definition
- {
- int i, j, count = 0;
- //Traverse the array
- for (i = 1; i < n; i++) //hold an array element
- {
- for (j = 0; j < i; j++)
- {
- if (a[i] == a[j]) //Check for duplicate elements
- {
- break; //If duplicate elements found then break
- }
- }
- if (i == j)
- {
- count++; //increment the number of distinct elements
- }
- }
- return count; //Return the number of distinct elements
- }
-
- # define LL 8192 /* line length maximum */
- # define DIPLOID true
- # define FOLDED true
- # define IGNORED_FIELDS 9
-
- int main ( int argc, char *argv[] ){
- if ( argc < 3) {
- printf("Need 2 args!\n");
- return 1;
- }
- gzFile fp;
- char line[LL];
- int N;
- char delim[] = "\t";
- fp = gzopen( argv[1], "r" );
-
- // pop of size 2N when diploid
- if (DIPLOID == true && FOLDED == false) {
- N = 2 * atoi(argv[2]);
- } else {
- N = atoi(argv[2]);
- }
-
- int snp_genotypes[N];
- int SFS_values[N];
-
- gzgets( fp, line, LL );
- while ( ! gzeof( fp ) ){
- int k = 0;
- if ( StartsWith(line, "##") || ( StartsWith(line, "#") ) || (strstr(line, "./.:.") != NULL)){
- gzgets( fp, line, LL );
- continue;
- }
-
- char *vcf_field = strtok(line, delim);
- while(vcf_field != NULL){
- k++;
- if (k > IGNORED_FIELDS) {
- const size_t len = strlen(vcf_field);
- char buffer[len + 1];
- //printf("'%s'\n", ptr);
- slice_str(vcf_field, buffer, 0, 0);
- //printf("%d %s ", N, buffer);
- snp_genotypes[k-IGNORED_FIELDS] = atoi(buffer);
- //printf("%d ", smpl_genotype[k-9]);
- }
- vcf_field = strtok(NULL, delim);
- int c= countDistinct(snp_genotypes, N);
- // skip if all individuals have the same genotype
- if (c == 1) {
- continue;
- gzgets( fp, line, LL );
- }
- /* int i; */
- /* for (i = 1; i < N; ++i) */
- /* { */
- /* printf("%d ", snp_genotypes[i]); */
- /* } */
- int allele_counts[c];
-
- min(allele_counts, N);
- }
- // printf("%s", line );
- // loads the next line
- gzgets( fp, line, LL );
- }
-
- gzclose( fp );
- return 0;
- }
|