2 #include <stdlib.h> //EXIT_FAILURE
8 #include "libgFileIO.h"
10 #define MAXREADLEN (8ul*1024*1024)
11 uint64_t ReadsLenArr
[MAXREADLEN
];
13 const char *argp_program_version
=
14 "fcounter 0.1 @"__TIME__
"," __DATE__
;
15 const char *argp_program_bug_address
=
16 "<huxuesong@genomics.org.cn>";
18 /* Program documentation. */
29 /* A description of the arguments we accept. */
30 static char args_doc
[] = "input_files (FASTA or FASTQ)";
32 /* The options we understand. */
33 static struct argp_option options
[] = {
34 {"outfile", 'o', "./out.stat",0, "Output file" },
38 /* Used by main to communicate with parse_opt. */
44 /* Parse a single option. */
46 parse_opt (int key
, char *arg
, struct argp_state
*state
) {
47 /* Get the input argument from argp_parse, which we
48 know is a pointer to our arguments structure. */
49 struct arguments
*arguments
= state
->input
;
53 arguments
->outfile
= arg
;
57 arguments
->args
[state
->arg_num
] = arg
;
61 if (state
->arg_num
< 1) /* Not enough arguments. */
63 argp_state_help (state
,stderr
,ARGP_HELP_STD_HELP
);
67 return ARGP_ERR_UNKNOWN
;
72 /* Our argp parser. */
73 static struct argp argp
= { options
, parse_opt
, args_doc
, doc
};
75 int main (int argc
, char **argv
) {
76 struct arguments arguments
;
77 arguments
.args
=calloc(sizeof(size_t),argc
);
80 arguments
.outfile
= "./out.stat";
82 // Parse our arguments; every option seen by parse_opt will be reflected in arguments.
83 argp_parse (&argp
, argc
, argv
, 0, 0, &arguments
);
87 uint64_t maxReadLen
=0;
88 uint64_t minReadLen
=MAXREADLEN
+1;
90 char *const*line
= arguments
.args
-1; // so that we can use *(++line)
92 fputs("\nParsing Sequence Files:\n", stderr
);
95 fprintf(stderr
, " <%s> ...", *line
);
96 SeqFileObj
*seqobj
= inSeqFinit(*line
,GFIOCHRBASE
);
98 while ( (readlength
= (*seqobj
->getNextSeq
)(seqobj
)) >= 0 ) {
100 printf("-ID:[%s,%s] %zu %zu %zd\nSeq:[%s]\nQ:[%s] *%zx,%u\n",
101 seqobj
->name
,seqobj
->comment
,seqobj
->readlength
,seqobj
->binMallocedQQWord
,readlength
,
102 seqobj
->seq
,seqobj
->qual
,(size_t)seqobj
->seq
,seqobj
->type
);
104 allbases
+= readlength
;
106 if (maxReadLen
< readlength
) maxReadLen
= readlength
;
107 if (minReadLen
> readlength
) minReadLen
= readlength
;
108 if (readlength
< MAXREADLEN
) {
109 ++ReadsLenArr
[readlength
];
115 fputs("\b\b\b\b, done !\n", stderr
);
116 inSeqFdestroy(seqobj
);
118 free(arguments
.args
);
119 fputs("\nCount Done!\n", stderr
);
121 FILE *fp
= fopen(arguments
.outfile
, "w");
122 fprintf(fp
,"#Total_Bases: %lu\n#Total_Reads: %lu\n#Avg_Read_Len: %.1f\n"
123 "#Read_Len_Range: [%lu,%lu]\n"
125 "\n#Read_Len\tCount\tRatio\n",
127 0.05+((double)allbases
/(double)allreads
),
128 minReadLen
,maxReadLen
,ReadsLenArr
[0]);
129 for (uint64_t i
=minReadLen
; i
<=maxReadLen
; i
++) {
131 fprintf(fp
,"%lu\t%lu\t%g\n",i
,ReadsLenArr
[i
],(double)ReadsLenArr
[i
]/(double)allreads
);
133 if (ReadsLenArr
[0]) {
134 fprintf(fp
,"#>=%lu\t%lu\t%g\n",MAXREADLEN
,ReadsLenArr
[0],(double)ReadsLenArr
[0]/(double)allreads
);