2 #include <stdlib.h> //EXIT_FAILURE
13 #define MAXREADLEN (64ul*1024*1024 - 1)
14 uint64_t ReadsLenArr
[MAXREADLEN
+ 1];
16 const char *argp_program_version
=
17 "fcounter 0.2 @"__TIME__
"," __DATE__
;
18 const char *argp_program_bug_address
=
19 "<galaxy001@gmail.com>";
21 /* Program documentation. */
32 /* A description of the arguments we accept. */
33 static char args_doc
[] = "input_files (FASTA or FASTQ)";
35 /* The options we understand. */
36 static struct argp_option options
[] = {
37 {"outfile", 'o', "./out.stat",0, "Output file" },
41 /* Used by main to communicate with parse_opt. */
47 /* Parse a single option. */
49 parse_opt (int key
, char *arg
, struct argp_state
*state
) {
50 /* Get the input argument from argp_parse, which we
51 know is a pointer to our arguments structure. */
52 struct arguments
*arguments
= state
->input
;
56 arguments
->outfile
= arg
;
60 arguments
->args
[state
->arg_num
] = arg
;
64 if (state
->arg_num
< 1) /* Not enough arguments. */
66 argp_state_help (state
,stderr
,ARGP_HELP_STD_HELP
);
70 return ARGP_ERR_UNKNOWN
;
75 /* Our argp parser. */
76 static struct argp argp
= { options
, parse_opt
, args_doc
, doc
};
78 int main (int argc
, char **argv
) {
79 struct arguments arguments
;
80 arguments
.args
=calloc(sizeof(size_t),argc
);
83 arguments
.outfile
= "./out.stat";
85 // Parse our arguments; every option seen by parse_opt will be reflected in arguments.
86 argp_parse (&argp
, argc
, argv
, 0, 0, &arguments
);
90 uint64_t maxReadLen
=0;
91 uint64_t minReadLen
=MAXREADLEN
+1;
95 char *const*line
= arguments
.args
-1; // so that we can use *(++line)
99 fputs("\nParsing Sequence Files:\n", stderr
);
100 while ( *(++line
) ) {
102 fprintf(stderr
, " <%s> ...", *line
);
103 SeqFileObj
*seqobj
= inSeqFinit(*line
,GFIOCHRBASE
);
105 while ( (readlength
= (*seqobj
->getNextSeq
)(seqobj
)) >= 0 ) {
107 printf("-ID:[%s,%s] %zu %zu %zd\nSeq:[%s]\nQ:[%s] *%zx,%u\n",
108 seqobj
->name
,seqobj
->comment
,seqobj
->readlength
,seqobj
->binMallocedQQWord
,readlength
,
109 seqobj
->seq
,seqobj
->qual
,(size_t)seqobj
->seq
,seqobj
->type
);
111 allbases
+= readlength
;
112 SS
+= readlength
*readlength
;
114 if (maxReadLen
< readlength
) maxReadLen
= readlength
;
115 if (minReadLen
> readlength
) minReadLen
= readlength
;
116 if (readlength
< MAXREADLEN
) {
117 ++ReadsLenArr
[readlength
];
119 ++ReadsLenArr
[MAXREADLEN
];
123 fputs("\b\b\b\b, done !\n", stderr
);
124 inSeqFdestroy(seqobj
);
126 free(arguments
.args
);
127 fputs("\nCount Done!\n", stderr
);
129 SStd
= sqrtl(( SS
- (float128
)allbases
*(float128
)allbases
/(float128
)allreads
) / (long double)(allreads
-1));
131 FILE *fp
= fopen(arguments
.outfile
, "w");
132 fprintf(fp
,"#Total_Bases: %llu\n#Total_Reads: %llu\n#Avg_Read_Len: %.1f\tStd: %.3f\n"
133 "#Read_Len_Range: [%llu,%llu]\n"
135 "\n#Read_Len\tCount\tRatio\n",
137 0.05+((double)allbases
/(double)allreads
), SStd
,
138 minReadLen
,maxReadLen
,ReadsLenArr
[0]);
139 for (uint64_t i
=minReadLen
; i
<=maxReadLen
; i
++) {
141 fprintf(fp
,"%llu\t%llu\t%g\n",i
,ReadsLenArr
[i
],(double)ReadsLenArr
[i
]/(double)allreads
);
143 if (ReadsLenArr
[MAXREADLEN
]) {
144 fprintf(fp
,"#>=%lu\t%llu\t%g\n",MAXREADLEN
,ReadsLenArr
[MAXREADLEN
],(double)ReadsLenArr
[MAXREADLEN
]/(double)allreads
);