modified: src1/input.c
[GalaxyCodeBases.git] / c_cpp / faststater / fcount.c
blobc35bcdfbbd94e51195b0aed8fcdb8d738517ff04
1 #define _GNU_SOURCE
2 #include <stdlib.h> //EXIT_FAILURE
3 #include <stdio.h>
4 #include <stdint.h>
5 #include <string.h>
6 #include <err.h>
7 #include <argp.h>
8 #include <math.h>
9 #include "gFileIO.h"
10 #include "chrseq.h"
11 #include "timer.h"
13 #define MAXREADLEN (64ul*1024*1024 - 1)
14 uint64_t ReadsLenArr[MAXREADLEN + 1];
16 const char *argp_program_version =
17 "fcounter 0.2 @"__TIME__ "," __DATE__;
18 const char *argp_program_bug_address =
19 "<galaxy001@gmail.com>";
21 /* Program documentation. */
22 static char doc[] =
23 "Fasta/q Counter"
24 #ifdef DEBUG
25 " (Debug Version)"
26 #endif
27 #ifdef TEST
28 " (Test Version)"
29 #endif
32 /* A description of the arguments we accept. */
33 static char args_doc[] = "input_files (FASTA or FASTQ)";
35 /* The options we understand. */
36 static struct argp_option options[] = {
37 {"outfile", 'o', "./out.stat",0, "Output file" },
38 { 0 }
41 /* Used by main to communicate with parse_opt. */
42 struct arguments {
43 char **args;
44 char *outfile;
47 /* Parse a single option. */
48 static error_t
49 parse_opt (int key, char *arg, struct argp_state *state) {
50 /* Get the input argument from argp_parse, which we
51 know is a pointer to our arguments structure. */
52 struct arguments *arguments = state->input;
54 switch (key) {
55 case 'o':
56 arguments->outfile = arg;
57 break;
59 case ARGP_KEY_ARG:
60 arguments->args[state->arg_num] = arg;
61 break;
63 case ARGP_KEY_END:
64 if (state->arg_num < 1) /* Not enough arguments. */
65 //argp_usage (state);
66 argp_state_help (state,stderr,ARGP_HELP_STD_HELP);
67 break;
69 default:
70 return ARGP_ERR_UNKNOWN;
72 return 0;
75 /* Our argp parser. */
76 static struct argp argp = { options, parse_opt, args_doc, doc };
78 int main (int argc, char **argv) {
79 struct arguments arguments;
80 arguments.args=calloc(sizeof(size_t),argc);
82 // Default values.
83 arguments.outfile = "./out.stat";
85 // Parse our arguments; every option seen by parse_opt will be reflected in arguments.
86 argp_parse (&argp, argc, argv, 0, 0, &arguments);
88 uint64_t allbases=0;
89 uint64_t allreads=0;
90 uint64_t maxReadLen=0;
91 uint64_t minReadLen=MAXREADLEN+1;
92 float128 SS=0.0;
93 double SStd;
95 char *const*line = arguments.args-1; // so that we can use *(++line)
97 G_TIMER_START;
99 fputs("\nParsing Sequence Files:\n", stderr);
100 while ( *(++line) ) {
101 ssize_t readlength;
102 fprintf(stderr, " <%s> ...", *line);
103 SeqFileObj *seqobj = inSeqFinit(*line,GFIOCHRBASE);
104 if (seqobj) {
105 while ( (readlength = (*seqobj->getNextSeq)(seqobj)) >= 0 ) {
106 #ifdef DEBUG
107 printf("-ID:[%s,%s] %zu %zu %zd\nSeq:[%s]\nQ:[%s] *%zx,%u\n",
108 seqobj->name,seqobj->comment,seqobj->readlength,seqobj->binMallocedQQWord,readlength,
109 seqobj->seq,seqobj->qual,(size_t)seqobj->seq,seqobj->type);
110 #endif
111 allbases += readlength;
112 SS += readlength*readlength;
113 ++allreads;
114 if (maxReadLen < readlength) maxReadLen = readlength;
115 if (minReadLen > readlength) minReadLen = readlength;
116 if (readlength < MAXREADLEN) {
117 ++ReadsLenArr[readlength];
118 } else {
119 ++ReadsLenArr[MAXREADLEN];
122 } else continue;
123 fputs("\b\b\b\b, done !\n", stderr);
124 inSeqFdestroy(seqobj);
126 free(arguments.args);
127 fputs("\nCount Done!\n", stderr);
129 SStd = sqrtl(( SS - (float128)allbases*(float128)allbases/(float128)allreads ) / (long double)(allreads -1));
131 FILE *fp = fopen(arguments.outfile, "w");
132 fprintf(fp,"#Total_Bases: %llu\n#Total_Reads: %llu\n#Avg_Read_Len: %.1f\tStd: %.3f\n"
133 "#Read_Len_Range: [%llu,%llu]\n"
134 "#Overflow: %llu\n"
135 "\n#Read_Len\tCount\tRatio\n",
136 allbases,allreads,
137 0.05+((double)allbases/(double)allreads), SStd,
138 minReadLen,maxReadLen,ReadsLenArr[0]);
139 for (uint64_t i=minReadLen; i<=maxReadLen; i++) {
140 if (ReadsLenArr[i])
141 fprintf(fp,"%llu\t%llu\t%g\n",i,ReadsLenArr[i],(double)ReadsLenArr[i]/(double)allreads);
143 if (ReadsLenArr[MAXREADLEN]) {
144 fprintf(fp,"#>=%lu\t%llu\t%g\n",MAXREADLEN,ReadsLenArr[MAXREADLEN],(double)ReadsLenArr[MAXREADLEN]/(double)allreads);
146 fclose(fp);
148 G_TIMER_END;
149 G_TIMER_PRINT;
151 exit(EXIT_SUCCESS);