vcf_to_sfs.c 1.3KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263
  1. # include <stdio.h>
  2. # include <stdlib.h>
  3. # include <zlib.h>
  4. #include <string.h>
  5. #include <stdbool.h>
  6. bool StartsWith(const char *a, const char *b)
  7. {
  8. if(strncmp(a, b, strlen(b)) == 0) return 1;
  9. return 0;
  10. }
  11. void slice_str(const char * str, char * buffer, size_t start, size_t end)
  12. {
  13. size_t j = 0;
  14. for ( size_t i = start; i <= end; ++i ) {
  15. buffer[j++] = str[i];
  16. }
  17. buffer[j] = 0;
  18. }
  19. # define LL 8192 /* line length maximum */
  20. int main ( int argc, char *argv[] ){
  21. if ( argc < 3) {
  22. printf("Need 2 args!\n");
  23. return 1;
  24. }
  25. gzFile fp;
  26. char line[LL];
  27. char delim[] = "\t";
  28. fp = gzopen( argv[1], "r" );
  29. gzgets( fp, line, LL );
  30. while ( ! gzeof( fp ) ){
  31. int k = 0;
  32. if ( StartsWith(line, "##") || ( StartsWith(line, "#") ) || (strstr(line, "./.:.") != NULL)){
  33. gzgets( fp, line, LL );
  34. continue;
  35. }
  36. char *vcf_field = strtok(line, delim);
  37. while(vcf_field != NULL){
  38. k++;
  39. if (k > 9) {
  40. const size_t len = strlen(vcf_field);
  41. char buffer[len + 1];
  42. //printf("'%s'\n", ptr);
  43. slice_str(vcf_field, buffer, 0, 0);
  44. printf("%s ", buffer);
  45. }
  46. vcf_field = strtok(NULL, delim);
  47. }
  48. // printf("%s", line );
  49. // loads the next line
  50. gzgets( fp, line, LL );
  51. }
  52. gzclose( fp );
  53. return 0;
  54. }