2 #include <stdlib.h> //EXIT_FAILURE
15 #define MAXREADLEN (8ul*1024*1024)
16 uint64_t ReadsLenArr
[MAXREADLEN
];
18 const char *argp_program_version
=
19 "fcounter 0.1 @"__TIME__
"," __DATE__
;
20 const char *argp_program_bug_address
=
21 "<huxuesong@genomics.org.cn>";
23 /* Program documentation. */
34 /* A description of the arguments we accept. */
35 static char args_doc
[] = "input_files (FASTA or FASTQ)";
37 /* The options we understand. */
38 static struct argp_option options
[] = {
39 {"outfile", 'o', "./out.stat",0, "Output file" },
40 // {"countbase", 'c', 0, 0, "Also count bases" },
44 /* Used by main to communicate with parse_opt. */
50 /* Parse a single option. */
52 parse_opt (int key
, char *arg
, struct argp_state
*state
) {
53 /* Get the input argument from argp_parse, which we
54 know is a pointer to our arguments structure. */
55 struct arguments
*arguments
= state
->input
;
59 arguments
->outfile
= arg
;
63 arguments
->args
[state
->arg_num
] = arg
;
67 if (state
->arg_num
< 1) /* Not enough arguments. */
69 argp_state_help (state
,stderr
,ARGP_HELP_STD_HELP
);
73 return ARGP_ERR_UNKNOWN
;
78 /* Our argp parser. */
79 static struct argp argp
= { options
, parse_opt
, args_doc
, doc
};
81 int main (int argc
, char **argv
) {
82 struct arguments arguments
;
83 arguments
.args
=calloc(sizeof(size_t),argc
); // the first is $0, so tailing NULL is of sure.
86 arguments
.outfile
= "./out.stat";
88 // Parse our arguments; every option seen by parse_opt will be reflected in arguments.
89 argp_parse (&argp
, argc
, argv
, 0, 0, &arguments
);
93 uint64_t maxReadLen
=0;
97 char *const*line
= arguments
.args
-1; // so that we can use *(++line)
101 fputs("\nParsing Sequence Files:\n", stderr
);
102 while ( *(++line
) ) {
104 fprintf(stderr
, " <%s> ...", *line
);
105 SeqFileObj
*seqobj
= inSeqFinit(*line
,GFIOCHRBASE
);
107 while ( (readlength
= (*seqobj
->getNextSeq
)(seqobj
)) >= 0 ) {
110 printf("-ID:[%s,%s] %zu %zu %zd\nSeq:[%s]\nQ:[%s] *%zx,%u\n",
111 seqobj
->name
,seqobj
->comment
,seqobj
->readlength
,seqobj
->binMallocedQQWord
,readlength
,
112 seqobj
->seq
,seqobj
->qual
,(size_t)seqobj
->seq
,seqobj
->type
);
114 //NormalizeChrSeq((char*)seqobj->seq);
115 printf("rc[%s]\n",tmpseq=ChrSeqRevComp(seqobj->seq,seqobj->readlength));
118 allbases
+= readlength
;
119 SS
+= readlength
*readlength
;
121 if (maxReadLen
< readlength
) maxReadLen
= readlength
;
122 if (readlength
< MAXREADLEN
) {
123 ++ReadsLenArr
[readlength
];
129 fputs("\b\b\b\b, done !\n", stderr
);
130 inSeqFdestroy(seqobj
);
132 free(arguments
.args
);
133 fputs("\nCount Done!\n", stderr
);
135 SStd
= sqrtl(( SS
- (float128
)allbases
*(float128
)allbases
/(float128
)allreads
) / (long double)(allreads
-1));
137 FILE *fp
= fopen(arguments
.outfile
, "w");
138 fprintf(fp
,"#Total_Bases: %lu\n#Total_Reads: %lu\n#Avg_Read_Len: %.1f\tStd: %.3f\n"
139 "#Max_Read_Len: %lu\n"
141 "\n#Read_Len\tCount\tRatio\n",
143 0.05+((double)allbases
/(double)allreads
), SStd
,
144 maxReadLen
,ReadsLenArr
[0]);
145 for (uint64_t i
=1; i
<=maxReadLen
; i
++) {
146 fprintf(fp
,"%lu\t%lu\t%g\n",i
,ReadsLenArr
[i
],(double)ReadsLenArr
[i
]/(double)allreads
);
147 //printf("%lu\t%lu\n",i,ReadsLenArr[i]);
149 if (ReadsLenArr
[0]) {
150 fprintf(fp
,"#>=%lu\t%lu\t%g\n",MAXREADLEN
,ReadsLenArr
[0],(double)ReadsLenArr
[0]/(double)allreads
);