modified: myjupyterlab.sh
[GalaxyCodeBases.git] / c_cpp / libgFileIO / example.c
blob10ebd38e69e771384ac77334d94caa12b009c9fb
1 #define _GNU_SOURCE
2 #include <stdlib.h> //EXIT_FAILURE
3 #include <stdio.h>
4 #include <stdint.h>
5 #include <string.h>
6 #include <err.h>
7 #include <argp.h>
8 #include "libgFileIO.h"
10 #define MAXREADLEN (8ul*1024*1024)
11 uint64_t ReadsLenArr[MAXREADLEN];
13 const char *argp_program_version =
14 "fcounter 0.1 @"__TIME__ "," __DATE__;
15 const char *argp_program_bug_address =
16 "<huxuesong@genomics.org.cn>";
18 /* Program documentation. */
19 static char doc[] =
20 "Fasta/q Counter"
21 #ifdef DEBUG
22 " (Debug Version)"
23 #endif
24 #ifdef TEST
25 " (Test Version)"
26 #endif
29 /* A description of the arguments we accept. */
30 static char args_doc[] = "input_files (FASTA or FASTQ)";
32 /* The options we understand. */
33 static struct argp_option options[] = {
34 {"outfile", 'o', "./out.stat",0, "Output file" },
35 { 0 }
38 /* Used by main to communicate with parse_opt. */
39 struct arguments {
40 char **args;
41 char *outfile;
44 /* Parse a single option. */
45 static error_t
46 parse_opt (int key, char *arg, struct argp_state *state) {
47 /* Get the input argument from argp_parse, which we
48 know is a pointer to our arguments structure. */
49 struct arguments *arguments = state->input;
51 switch (key) {
52 case 'o':
53 arguments->outfile = arg;
54 break;
56 case ARGP_KEY_ARG:
57 arguments->args[state->arg_num] = arg;
58 break;
60 case ARGP_KEY_END:
61 if (state->arg_num < 1) /* Not enough arguments. */
62 //argp_usage (state);
63 argp_state_help (state,stderr,ARGP_HELP_STD_HELP);
64 break;
66 default:
67 return ARGP_ERR_UNKNOWN;
69 return 0;
72 /* Our argp parser. */
73 static struct argp argp = { options, parse_opt, args_doc, doc };
75 int main (int argc, char **argv) {
76 struct arguments arguments;
77 arguments.args=calloc(sizeof(size_t),argc);
79 // Default values.
80 arguments.outfile = "./out.stat";
82 // Parse our arguments; every option seen by parse_opt will be reflected in arguments.
83 argp_parse (&argp, argc, argv, 0, 0, &arguments);
85 uint64_t allbases=0;
86 uint64_t allreads=0;
87 uint64_t maxReadLen=0;
88 uint64_t minReadLen=MAXREADLEN+1;
90 char *const*line = arguments.args-1; // so that we can use *(++line)
92 fputs("\nParsing Sequence Files:\n", stderr);
93 while ( *(++line) ) {
94 ssize_t readlength;
95 fprintf(stderr, " <%s> ...", *line);
96 SeqFileObj *seqobj = inSeqFinit(*line,GFIOCHRBASE);
97 if (seqobj) {
98 while ( (readlength = (*seqobj->getNextSeq)(seqobj)) >= 0 ) {
99 #ifdef DEBUG
100 printf("-ID:[%s,%s] %zu %zu %zd\nSeq:[%s]\nQ:[%s] *%zx,%u\n",
101 seqobj->name,seqobj->comment,seqobj->readlength,seqobj->binMallocedQQWord,readlength,
102 seqobj->seq,seqobj->qual,(size_t)seqobj->seq,seqobj->type);
103 #endif
104 allbases += readlength;
105 ++allreads;
106 if (maxReadLen < readlength) maxReadLen = readlength;
107 if (minReadLen > readlength) minReadLen = readlength;
108 if (readlength < MAXREADLEN) {
109 ++ReadsLenArr[readlength];
110 } else {
111 ++ReadsLenArr[0];
114 } else continue;
115 fputs("\b\b\b\b, done !\n", stderr);
116 inSeqFdestroy(seqobj);
118 free(arguments.args);
119 fputs("\nCount Done!\n", stderr);
121 FILE *fp = fopen(arguments.outfile, "w");
122 fprintf(fp,"#Total_Bases: %lu\n#Total_Reads: %lu\n#Avg_Read_Len: %.1f\n"
123 "#Read_Len_Range: [%lu,%lu]\n"
124 "#Overflow: %lu\n"
125 "\n#Read_Len\tCount\tRatio\n",
126 allbases,allreads,
127 0.05+((double)allbases/(double)allreads),
128 minReadLen,maxReadLen,ReadsLenArr[0]);
129 for (uint64_t i=minReadLen; i<=maxReadLen; i++) {
130 if (ReadsLenArr[i])
131 fprintf(fp,"%lu\t%lu\t%g\n",i,ReadsLenArr[i],(double)ReadsLenArr[i]/(double)allreads);
133 if (ReadsLenArr[0]) {
134 fprintf(fp,"#>=%lu\t%lu\t%g\n",MAXREADLEN,ReadsLenArr[0],(double)ReadsLenArr[0]/(double)allreads);
136 fclose(fp);
138 exit(EXIT_SUCCESS);