modified: Makefile
[GalaxyCodeBases.git] / c_cpp / readscorr / fcount.c
blob04435da7e667e266cc9a5f14208e03f83fa17d3d
1 #define _GNU_SOURCE
2 #include <stdlib.h> //EXIT_FAILURE
3 #include <stdio.h>
4 #include <stdint.h>
5 #include <string.h>
6 //#include <errno.h>
7 #include <err.h>
8 #include <argp.h>
9 #include <math.h>
10 #include "getch.h"
11 #include "gFileIO.h"
12 #include "chrseq.h"
13 #include "timer.h"
15 #define MAXREADLEN (8ul*1024*1024)
16 uint64_t ReadsLenArr[MAXREADLEN];
18 const char *argp_program_version =
19 "fcounter 0.1 @"__TIME__ "," __DATE__;
20 const char *argp_program_bug_address =
21 "<huxuesong@genomics.org.cn>";
23 /* Program documentation. */
24 static char doc[] =
25 "Fasta/q Counter"
26 #ifdef DEBUG
27 " (Debug Version)"
28 #endif
29 #ifdef TEST
30 " (Test Version)"
31 #endif
34 /* A description of the arguments we accept. */
35 static char args_doc[] = "input_files (FASTA or FASTQ)";
37 /* The options we understand. */
38 static struct argp_option options[] = {
39 {"outfile", 'o', "./out.stat",0, "Output file" },
40 // {"countbase", 'c', 0, 0, "Also count bases" },
41 { 0 }
44 /* Used by main to communicate with parse_opt. */
45 struct arguments {
46 char **args;
47 char *outfile;
50 /* Parse a single option. */
51 static error_t
52 parse_opt (int key, char *arg, struct argp_state *state) {
53 /* Get the input argument from argp_parse, which we
54 know is a pointer to our arguments structure. */
55 struct arguments *arguments = state->input;
57 switch (key) {
58 case 'o':
59 arguments->outfile = arg;
60 break;
62 case ARGP_KEY_ARG:
63 arguments->args[state->arg_num] = arg;
64 break;
66 case ARGP_KEY_END:
67 if (state->arg_num < 1) /* Not enough arguments. */
68 //argp_usage (state);
69 argp_state_help (state,stderr,ARGP_HELP_STD_HELP);
70 break;
72 default:
73 return ARGP_ERR_UNKNOWN;
75 return 0;
78 /* Our argp parser. */
79 static struct argp argp = { options, parse_opt, args_doc, doc };
81 int main (int argc, char **argv) {
82 struct arguments arguments;
83 arguments.args=calloc(sizeof(size_t),argc); // the first is $0, so tailing NULL is of sure.
85 // Default values.
86 arguments.outfile = "./out.stat";
88 // Parse our arguments; every option seen by parse_opt will be reflected in arguments.
89 argp_parse (&argp, argc, argv, 0, 0, &arguments);
91 uint64_t allbases=0;
92 uint64_t allreads=0;
93 uint64_t maxReadLen=0;
94 float128 SS=0.0;
95 double SStd;
97 char *const*line = arguments.args-1; // so that we can use *(++line)
99 G_TIMER_START;
101 fputs("\nParsing Sequence Files:\n", stderr);
102 while ( *(++line) ) {
103 ssize_t readlength;
104 fprintf(stderr, " <%s> ...", *line);
105 SeqFileObj *seqobj = inSeqFinit(*line,GFIOCHRBASE);
106 if (seqobj) {
107 while ( (readlength = (*seqobj->getNextSeq)(seqobj)) >= 0 ) {
108 #ifdef DEBUG
109 puts(*line);
110 printf("-ID:[%s,%s] %zu %zu %zd\nSeq:[%s]\nQ:[%s] *%zx,%u\n",
111 seqobj->name,seqobj->comment,seqobj->readlength,seqobj->binMallocedQQWord,readlength,
112 seqobj->seq,seqobj->qual,(size_t)seqobj->seq,seqobj->type);
113 /*char *tmpseq;
114 //NormalizeChrSeq((char*)seqobj->seq);
115 printf("rc[%s]\n",tmpseq=ChrSeqRevComp(seqobj->seq,seqobj->readlength));
116 free(tmpseq);*/
117 #endif
118 allbases += readlength;
119 SS += readlength*readlength;
120 ++allreads;
121 if (maxReadLen < readlength) maxReadLen = readlength;
122 if (readlength < MAXREADLEN) {
123 ++ReadsLenArr[readlength];
124 } else {
125 ++ReadsLenArr[0];
128 } else continue;
129 fputs("\b\b\b\b, done !\n", stderr);
130 inSeqFdestroy(seqobj);
132 free(arguments.args);
133 fputs("\nCount Done!\n", stderr);
135 SStd = sqrtl(( SS - (float128)allbases*(float128)allbases/(float128)allreads ) / (long double)(allreads -1));
137 FILE *fp = fopen(arguments.outfile, "w");
138 fprintf(fp,"#Total_Bases: %lu\n#Total_Reads: %lu\n#Avg_Read_Len: %.1f\tStd: %.3f\n"
139 "#Max_Read_Len: %lu\n"
140 "#Overflow: %lu\n"
141 "\n#Read_Len\tCount\tRatio\n",
142 allbases,allreads,
143 0.05+((double)allbases/(double)allreads), SStd,
144 maxReadLen,ReadsLenArr[0]);
145 for (uint64_t i=1; i<=maxReadLen; i++) {
146 fprintf(fp,"%lu\t%lu\t%g\n",i,ReadsLenArr[i],(double)ReadsLenArr[i]/(double)allreads);
147 //printf("%lu\t%lu\n",i,ReadsLenArr[i]);
149 if (ReadsLenArr[0]) {
150 fprintf(fp,"#>=%lu\t%lu\t%g\n",MAXREADLEN,ReadsLenArr[0],(double)ReadsLenArr[0]/(double)allreads);
152 fclose(fp);
154 G_TIMER_END;
155 G_TIMER_PRINT;
157 exit(EXIT_SUCCESS);